This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch docs-iceberg-public-api in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
commit e404e30c704933c20ae86df1f578192e16095de6 Author: Andy Grove <[email protected]> AuthorDate: Wed Jan 28 13:46:15 2026 -0700 Replace @IcebergApi with @CometPublicApi({"Iceberg"}) annotation Adopts a pattern inspired by Hadoop's @InterfaceAudience.LimitedPrivate where the annotation lists known external consumers. This makes it easy to add future consumers without creating new annotation types. Also addresses review feedback: removes redundant assertions in API tests and clarifies doc references. Co-Authored-By: Claude Opus 4.5 <[email protected]> --- .../arrow/c/AbstractCometSchemaImporter.java | 4 ++-- .../comet/{IcebergApi.java => CometPublicApi.java} | 24 +++++++++++-------- .../java/org/apache/comet/CometSchemaImporter.java | 4 ++-- .../src/main/java/org/apache/comet/IcebergApi.java | 27 ++++++---------------- .../apache/comet/parquet/AbstractColumnReader.java | 11 +++++---- .../java/org/apache/comet/parquet/BatchReader.java | 14 +++++------ .../org/apache/comet/parquet/ColumnReader.java | 8 +++---- .../apache/comet/parquet/ConstantColumnReader.java | 8 +++---- .../java/org/apache/comet/parquet/FileReader.java | 14 +++++------ .../apache/comet/parquet/MetadataColumnReader.java | 12 +++++----- .../main/java/org/apache/comet/parquet/Native.java | 8 +++---- .../apache/comet/parquet/ParquetColumnSpec.java | 22 +++++++++--------- .../java/org/apache/comet/parquet/ReadOptions.java | 12 +++++----- .../org/apache/comet/parquet/RowGroupReader.java | 6 ++--- .../java/org/apache/comet/parquet/TypeUtil.java | 4 ++-- .../main/java/org/apache/comet/parquet/Utils.java | 10 ++++---- .../org/apache/comet/parquet/WrappedInputFile.java | 6 ++--- .../java/org/apache/comet/vector/CometVector.java | 16 ++++++------- .../source/contributor-guide/iceberg_public_api.md | 8 ++++--- .../apache/comet/iceberg/api/AbstractApiTest.java | 23 ++++++++++-------- .../iceberg/api/IcebergApiVerificationTest.java | 24 +++++++++++-------- .../api/parquet/AbstractColumnReaderApiTest.java | 15 ++++-------- .../iceberg/api/parquet/BatchReaderApiTest.java | 12 +++++----- 23 files changed, 146 insertions(+), 146 deletions(-) diff --git a/common/src/main/java/org/apache/arrow/c/AbstractCometSchemaImporter.java b/common/src/main/java/org/apache/arrow/c/AbstractCometSchemaImporter.java index 49ce92a40..d56bcbf4f 100644 --- a/common/src/main/java/org/apache/arrow/c/AbstractCometSchemaImporter.java +++ b/common/src/main/java/org/apache/arrow/c/AbstractCometSchemaImporter.java @@ -23,7 +23,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.types.pojo.Field; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** This is a simple wrapper around SchemaImporter to make it accessible from Java Arrow. */ public abstract class AbstractCometSchemaImporter { @@ -69,7 +69,7 @@ public abstract class AbstractCometSchemaImporter { return vector; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public void close() { provider.close(); } diff --git a/common/src/main/java/org/apache/comet/IcebergApi.java b/common/src/main/java/org/apache/comet/CometPublicApi.java similarity index 67% copy from common/src/main/java/org/apache/comet/IcebergApi.java copy to common/src/main/java/org/apache/comet/CometPublicApi.java index 915fd8784..8b07582c3 100644 --- a/common/src/main/java/org/apache/comet/IcebergApi.java +++ b/common/src/main/java/org/apache/comet/CometPublicApi.java @@ -26,19 +26,25 @@ import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** - * Indicates that the annotated element is part of the public API used by Apache Iceberg. + * Indicates that the annotated element is part of the public API used by external projects. * - * <p>This annotation marks classes, methods, constructors, and fields that form the contract - * between Comet and Iceberg. Changes to these APIs may break Iceberg's Comet integration, so - * contributors should exercise caution and consider backward compatibility when modifying annotated - * elements. + * <p>The {@link #value()} lists the known consumers of this API element. Changes to annotated + * elements may break those consumers, so contributors should exercise caution and consider backward + * compatibility when modifying them. * - * <p>The Iceberg integration uses Comet's native Parquet reader for accelerated vectorized reads. - * See the contributor guide documentation for details on how Iceberg uses these APIs. + * <p>Inspired by Hadoop's {@code @InterfaceAudience.LimitedPrivate}. * - * @see <a href="https://iceberg.apache.org/">Apache Iceberg</a> + * <p>Example usage: + * + * <pre> + * @CometPublicApi({"Iceberg"}) + * public class FileReader { ... } + * </pre> */ @Documented @Retention(RetentionPolicy.RUNTIME) @Target({ElementType.TYPE, ElementType.METHOD, ElementType.CONSTRUCTOR, ElementType.FIELD}) -public @interface IcebergApi {} +public @interface CometPublicApi { + /** Lists the external projects known to use this API element. */ + String[] value(); +} diff --git a/common/src/main/java/org/apache/comet/CometSchemaImporter.java b/common/src/main/java/org/apache/comet/CometSchemaImporter.java index 4841f16f1..43e9f655f 100644 --- a/common/src/main/java/org/apache/comet/CometSchemaImporter.java +++ b/common/src/main/java/org/apache/comet/CometSchemaImporter.java @@ -23,9 +23,9 @@ import org.apache.arrow.c.*; import org.apache.arrow.memory.BufferAllocator; /** This is a simple wrapper around SchemaImporter to make it accessible from Java Arrow. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class CometSchemaImporter extends AbstractCometSchemaImporter { - @IcebergApi + @CometPublicApi({"Iceberg"}) public CometSchemaImporter(BufferAllocator allocator) { super(allocator); } diff --git a/common/src/main/java/org/apache/comet/IcebergApi.java b/common/src/main/java/org/apache/comet/IcebergApi.java index 915fd8784..92a4f008b 100644 --- a/common/src/main/java/org/apache/comet/IcebergApi.java +++ b/common/src/main/java/org/apache/comet/IcebergApi.java @@ -19,26 +19,13 @@ package org.apache.comet; -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - /** - * Indicates that the annotated element is part of the public API used by Apache Iceberg. - * - * <p>This annotation marks classes, methods, constructors, and fields that form the contract - * between Comet and Iceberg. Changes to these APIs may break Iceberg's Comet integration, so - * contributors should exercise caution and consider backward compatibility when modifying annotated - * elements. + * Convenience constant for use with {@link CometPublicApi}. * - * <p>The Iceberg integration uses Comet's native Parquet reader for accelerated vectorized reads. - * See the contributor guide documentation for details on how Iceberg uses these APIs. - * - * @see <a href="https://iceberg.apache.org/">Apache Iceberg</a> + * <p>Usage: {@code @CometPublicApi({IcebergApi.ICEBERG})} */ -@Documented -@Retention(RetentionPolicy.RUNTIME) -@Target({ElementType.TYPE, ElementType.METHOD, ElementType.CONSTRUCTOR, ElementType.FIELD}) -public @interface IcebergApi {} +public final class IcebergApi { + public static final String ICEBERG = "Iceberg"; + + private IcebergApi() {} +} diff --git a/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java b/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java index f8385f41b..81b736b7e 100644 --- a/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java @@ -28,11 +28,11 @@ import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.TimestampNTZType$; import org.apache.comet.CometConf; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; import org.apache.comet.vector.CometVector; /** Base class for Comet Parquet column reader implementations. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public abstract class AbstractColumnReader implements AutoCloseable { protected static final Logger LOG = LoggerFactory.getLogger(AbstractColumnReader.class); @@ -63,7 +63,8 @@ public abstract class AbstractColumnReader implements AutoCloseable { protected int batchSize; /** A pointer to the native implementation of ColumnReader. */ - @IcebergApi protected long nativeHandle; + @CometPublicApi({"Iceberg"}) + protected long nativeHandle; AbstractColumnReader( DataType type, @@ -98,7 +99,7 @@ public abstract class AbstractColumnReader implements AutoCloseable { /** * Set the batch size of this reader to be 'batchSize'. Also initializes the native column reader. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public void setBatchSize(int batchSize) { assert nativeHandle == 0 : "Native column reader shouldn't be initialized before " + "'setBatchSize' is called"; @@ -116,7 +117,7 @@ public abstract class AbstractColumnReader implements AutoCloseable { /** Returns the {@link CometVector} read by this reader. */ public abstract CometVector currentBatch(); - @IcebergApi + @CometPublicApi({"Iceberg"}) @Override public void close() { if (nativeHandle != 0) { diff --git a/common/src/main/java/org/apache/comet/parquet/BatchReader.java b/common/src/main/java/org/apache/comet/parquet/BatchReader.java index 857a1de3c..50d037917 100644 --- a/common/src/main/java/org/apache/comet/parquet/BatchReader.java +++ b/common/src/main/java/org/apache/comet/parquet/BatchReader.java @@ -64,8 +64,8 @@ import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.util.AccumulatorV2; import org.apache.comet.CometConf; +import org.apache.comet.CometPublicApi; import org.apache.comet.CometSchemaImporter; -import org.apache.comet.IcebergApi; import org.apache.comet.shims.ShimBatchReader; import org.apache.comet.shims.ShimFileFormat; import org.apache.comet.vector.CometVector; @@ -88,7 +88,7 @@ import org.apache.comet.vector.CometVector; * } * </pre> */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Closeable { private static final Logger LOG = LoggerFactory.getLogger(FileReader.class); protected static final BufferAllocator ALLOCATOR = new RootAllocator(); @@ -191,7 +191,7 @@ public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Cl * @deprecated since 0.10.0, will be removed in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public BatchReader(AbstractColumnReader[] columnReaders) { // Todo: set useDecimal128 and useLazyMaterialization int numColumns = columnReaders.length; @@ -390,7 +390,7 @@ public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Cl * @deprecated since 0.10.0, will be removed in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public void setSparkSchema(StructType schema) { this.sparkSchema = schema; } @@ -399,7 +399,7 @@ public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Cl * @deprecated since 0.10.0, will be removed in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public AbstractColumnReader[] getColumnReaders() { return columnReaders; } @@ -503,7 +503,7 @@ public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Cl return nextBatch(batchSize); } - @IcebergApi + @CometPublicApi({"Iceberg"}) public boolean nextBatch(int batchSize) { long totalDecodeTime = 0, totalLoadTime = 0; for (int i = 0; i < columnReaders.length; i++) { @@ -530,7 +530,7 @@ public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Cl return true; } - @IcebergApi + @CometPublicApi({"Iceberg"}) @Override public void close() throws IOException { if (columnReaders != null) { diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java index 9def88e42..43db9898e 100644 --- a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java @@ -43,15 +43,15 @@ import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.spark.sql.types.DataType; import org.apache.comet.CometConf; +import org.apache.comet.CometPublicApi; import org.apache.comet.CometSchemaImporter; -import org.apache.comet.IcebergApi; import org.apache.comet.vector.CometDecodedVector; import org.apache.comet.vector.CometDictionary; import org.apache.comet.vector.CometDictionaryVector; import org.apache.comet.vector.CometPlainVector; import org.apache.comet.vector.CometVector; -@IcebergApi +@CometPublicApi({"Iceberg"}) public class ColumnReader extends AbstractColumnReader { protected static final Logger LOG = LoggerFactory.getLogger(ColumnReader.class); protected final BufferAllocator ALLOCATOR = new RootAllocator(); @@ -116,7 +116,7 @@ public class ColumnReader extends AbstractColumnReader { * @deprecated since 0.10.0, will be removed in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public void setPageReader(PageReader pageReader) throws IOException { this.pageReader = pageReader; @@ -132,7 +132,7 @@ public class ColumnReader extends AbstractColumnReader { } /** This method is called from Apache Iceberg. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public void setRowGroupReader(RowGroupReader rowGroupReader, ParquetColumnSpec columnSpec) throws IOException { ColumnDescriptor descriptor = Utils.buildColumnDescriptor(columnSpec); diff --git a/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java index 58f68543b..afc682177 100644 --- a/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java @@ -27,13 +27,13 @@ import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns; import org.apache.spark.sql.types.*; import org.apache.spark.unsafe.types.UTF8String; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** * A column reader that always return constant vectors. Used for reading partition columns, for * instance. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class ConstantColumnReader extends MetadataColumnReader { /** Whether all the values in this constant column are nulls */ private boolean isNull; @@ -59,7 +59,7 @@ public class ConstantColumnReader extends MetadataColumnReader { * @deprecated since 0.10.0, will be removed in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public ConstantColumnReader( DataType type, ColumnDescriptor descriptor, Object value, boolean useDecimal128) { super(type, descriptor, useDecimal128, true); @@ -67,7 +67,7 @@ public class ConstantColumnReader extends MetadataColumnReader { } // Used by Iceberg - @IcebergApi + @CometPublicApi({"Iceberg"}) public ConstantColumnReader( DataType type, ParquetColumnSpec spec, Object value, boolean useDecimal128) { super(type, spec, useDecimal128, true); diff --git a/common/src/main/java/org/apache/comet/parquet/FileReader.java b/common/src/main/java/org/apache/comet/parquet/FileReader.java index 80c214fc7..0b076b059 100644 --- a/common/src/main/java/org/apache/comet/parquet/FileReader.java +++ b/common/src/main/java/org/apache/comet/parquet/FileReader.java @@ -90,7 +90,7 @@ import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType; import org.apache.spark.sql.execution.metric.SQLMetric; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; import static org.apache.parquet.hadoop.ParquetFileWriter.EFMAGIC; import static org.apache.parquet.hadoop.ParquetFileWriter.MAGIC; @@ -103,7 +103,7 @@ import static org.apache.comet.parquet.RowGroupFilter.FilterLevel.STATISTICS; * A Parquet file reader. Mostly followed {@code ParquetFileReader} in {@code parquet-mr}, but with * customizations & optimizations for Comet. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class FileReader implements Closeable { private static final Logger LOG = LoggerFactory.getLogger(FileReader.class); @@ -138,7 +138,7 @@ public class FileReader implements Closeable { } /** This constructor is called from Apache Iceberg. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public FileReader( WrappedInputFile file, ReadOptions cometOptions, @@ -262,7 +262,7 @@ public class FileReader implements Closeable { } /** This method is called from Apache Iceberg. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public void setRequestedSchemaFromSpecs(List<ParquetColumnSpec> specList) { paths.clear(); for (ParquetColumnSpec colSpec : specList) { @@ -341,7 +341,7 @@ public class FileReader implements Closeable { } /** Skips the next row group. Returns false if there's no row group to skip. Otherwise, true. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public boolean skipNextRowGroup() { return advanceToNextBlock(); } @@ -350,7 +350,7 @@ public class FileReader implements Closeable { * Returns the next row group to read (after applying row group filtering), or null if there's no * more row group. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public RowGroupReader readNextRowGroup() throws IOException { if (currentBlock == blocks.size()) { return null; @@ -871,7 +871,7 @@ public class FileReader implements Closeable { } } - @IcebergApi + @CometPublicApi({"Iceberg"}) @Override public void close() throws IOException { try { diff --git a/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java b/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java index f20c450e1..837ccee5b 100644 --- a/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java @@ -28,12 +28,12 @@ import org.apache.arrow.vector.FieldVector; import org.apache.parquet.column.ColumnDescriptor; import org.apache.spark.sql.types.DataType; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; import org.apache.comet.vector.CometPlainVector; import org.apache.comet.vector.CometVector; /** A metadata column reader that can be extended by {@link RowIndexColumnReader} etc. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class MetadataColumnReader extends AbstractColumnReader { private final BufferAllocator allocator = new RootAllocator(); @@ -48,7 +48,7 @@ public class MetadataColumnReader extends AbstractColumnReader { * @deprecated since 0.10.0, will be made package private in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public MetadataColumnReader( DataType type, ColumnDescriptor descriptor, boolean useDecimal128, boolean isConstant) { // TODO: should we handle legacy dates & timestamps for metadata columns? @@ -58,7 +58,7 @@ public class MetadataColumnReader extends AbstractColumnReader { } // Used by Iceberg - @IcebergApi + @CometPublicApi({"Iceberg"}) public MetadataColumnReader( DataType type, ParquetColumnSpec spec, boolean useDecimal128, boolean isConstant) { // TODO: should we handle legacy dates & timestamps for metadata columns? @@ -73,7 +73,7 @@ public class MetadataColumnReader extends AbstractColumnReader { super.setBatchSize(batchSize); } - @IcebergApi + @CometPublicApi({"Iceberg"}) @Override public void readBatch(int total) { if (vector == null) { @@ -95,7 +95,7 @@ public class MetadataColumnReader extends AbstractColumnReader { vector.setNumNulls(total); } - @IcebergApi + @CometPublicApi({"Iceberg"}) @Override public CometVector currentBatch() { return vector; diff --git a/common/src/main/java/org/apache/comet/parquet/Native.java b/common/src/main/java/org/apache/comet/parquet/Native.java index babd0d392..9f57d673e 100644 --- a/common/src/main/java/org/apache/comet/parquet/Native.java +++ b/common/src/main/java/org/apache/comet/parquet/Native.java @@ -22,7 +22,7 @@ package org.apache.comet.parquet; import java.nio.ByteBuffer; import java.util.Map; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; import org.apache.comet.NativeBase; public final class Native extends NativeBase { @@ -144,7 +144,7 @@ public final class Native extends NativeBase { * * @param handle the handle to the native Parquet column reader */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public static native void resetBatch(long handle); /** @@ -223,14 +223,14 @@ public final class Native extends NativeBase { public static native void setDecimal(long handle, byte[] value); /** Set position of row index vector for Iceberg Metadata Column */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public static native void setPosition(long handle, long value, int size); /** Set row index vector for Spark row index metadata column and return vector size */ public static native int setIndices(long handle, long offset, int size, long[] indices); /** Set deleted info for Iceberg Metadata Column */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public static native void setIsDeleted(long handle, boolean[] isDeleted); /** diff --git a/common/src/main/java/org/apache/comet/parquet/ParquetColumnSpec.java b/common/src/main/java/org/apache/comet/parquet/ParquetColumnSpec.java index 95fed362d..631d07296 100644 --- a/common/src/main/java/org/apache/comet/parquet/ParquetColumnSpec.java +++ b/common/src/main/java/org/apache/comet/parquet/ParquetColumnSpec.java @@ -21,7 +21,7 @@ package org.apache.comet.parquet; import java.util.Map; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** * Parquet ColumnSpec encapsulates the information withing a Parquet ColumnDescriptor. Utility @@ -29,7 +29,7 @@ import org.apache.comet.IcebergApi; * passing of Column descriptors between Comet and Iceberg. This is required because Iceberg shades * Parquet, changing the package of Parquet classes and making then incompatible with Comet. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class ParquetColumnSpec { private final int fieldId; @@ -44,7 +44,7 @@ public class ParquetColumnSpec { private String logicalTypeName; private Map<String, String> logicalTypeParams; - @IcebergApi + @CometPublicApi({"Iceberg"}) public ParquetColumnSpec( int fieldId, String[] path, @@ -66,22 +66,22 @@ public class ParquetColumnSpec { this.logicalTypeParams = logicalTypeParams; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public int getFieldId() { return fieldId; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public String[] getPath() { return path; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public String getPhysicalType() { return physicalType; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public int getTypeLength() { return typeLength; } @@ -90,22 +90,22 @@ public class ParquetColumnSpec { return isRepeated; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public int getMaxRepetitionLevel() { return maxRepetitionLevel; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public int getMaxDefinitionLevel() { return maxDefinitionLevel; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public String getLogicalTypeName() { return logicalTypeName; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public Map<String, String> getLogicalTypeParams() { return logicalTypeParams; } diff --git a/common/src/main/java/org/apache/comet/parquet/ReadOptions.java b/common/src/main/java/org/apache/comet/parquet/ReadOptions.java index ec5c16ce8..2d48ea00e 100644 --- a/common/src/main/java/org/apache/comet/parquet/ReadOptions.java +++ b/common/src/main/java/org/apache/comet/parquet/ReadOptions.java @@ -27,14 +27,14 @@ import org.apache.spark.SparkEnv; import org.apache.spark.launcher.SparkLauncher; import org.apache.comet.CometConf; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** * Comet specific Parquet related read options. * * <p>TODO: merge this with {@link org.apache.parquet.HadoopReadOptions} once PARQUET-2203 is done. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class ReadOptions { private static final Logger LOG = LoggerFactory.getLogger(ReadOptions.class); @@ -88,12 +88,12 @@ public class ReadOptions { return adjustReadRangeSkew; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public static Builder builder(Configuration conf) { return new Builder(conf); } - @IcebergApi + @CometPublicApi({"Iceberg"}) public static class Builder { private final Configuration conf; @@ -138,7 +138,7 @@ public class ReadOptions { return this; } - @IcebergApi + @CometPublicApi({"Iceberg"}) public ReadOptions build() { return new ReadOptions( parallelIOEnabled, @@ -148,7 +148,7 @@ public class ReadOptions { adjustReadRangeSkew); } - @IcebergApi + @CometPublicApi({"Iceberg"}) public Builder(Configuration conf) { this.conf = conf; this.parallelIOEnabled = diff --git a/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java b/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java index 0ca7478b7..3024a35b9 100644 --- a/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java +++ b/common/src/main/java/org/apache/comet/parquet/RowGroupReader.java @@ -29,9 +29,9 @@ import org.apache.parquet.column.page.PageReadStore; import org.apache.parquet.column.page.PageReader; import org.apache.parquet.internal.filter2.columnindex.RowRanges; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; -@IcebergApi +@CometPublicApi({"Iceberg"}) public class RowGroupReader implements PageReadStore { private final Map<String, PageReader> readers = new HashMap<>(); private final long rowCount; @@ -50,7 +50,7 @@ public class RowGroupReader implements PageReadStore { this.rowIndexOffset = -1; } - @IcebergApi + @CometPublicApi({"Iceberg"}) @Override public long getRowCount() { return rowCount; diff --git a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java index 818c828cf..cb5429861 100644 --- a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java +++ b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java @@ -30,7 +30,7 @@ import org.apache.spark.sql.internal.SQLConf; import org.apache.spark.sql.types.*; import org.apache.comet.CometConf; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; import static org.apache.comet.parquet.Utils.descriptorToParquetColumnSpec; @@ -42,7 +42,7 @@ public class TypeUtil { * @deprecated since 0.10.0, will be removed in 0.11.0. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public static ColumnDescriptor convertToParquet(StructField field) { Type.Repetition repetition; int maxDefinitionLevel; diff --git a/common/src/main/java/org/apache/comet/parquet/Utils.java b/common/src/main/java/org/apache/comet/parquet/Utils.java index c1b6fb2c1..340fd4204 100644 --- a/common/src/main/java/org/apache/comet/parquet/Utils.java +++ b/common/src/main/java/org/apache/comet/parquet/Utils.java @@ -29,13 +29,13 @@ import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Types; import org.apache.spark.sql.types.*; +import org.apache.comet.CometPublicApi; import org.apache.comet.CometSchemaImporter; -import org.apache.comet.IcebergApi; public class Utils { /** This method is called from Apache Iceberg. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public static ColumnReader getColumnReader( DataType type, ParquetColumnSpec columnSpec, @@ -63,7 +63,7 @@ public class Utils { * instead. * @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a> */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public static ColumnReader getColumnReader( DataType type, ColumnDescriptor descriptor, @@ -296,7 +296,7 @@ public class Utils { } } - @IcebergApi + @CometPublicApi({"Iceberg"}) public static ColumnDescriptor buildColumnDescriptor(ParquetColumnSpec columnSpec) { PrimitiveType.PrimitiveTypeName primType = PrimitiveType.PrimitiveTypeName.valueOf(columnSpec.getPhysicalType()); @@ -462,7 +462,7 @@ public class Utils { } } - @IcebergApi + @CometPublicApi({"Iceberg"}) public static ParquetColumnSpec descriptorToParquetColumnSpec(ColumnDescriptor descriptor) { String[] path = descriptor.getPath(); diff --git a/common/src/main/java/org/apache/comet/parquet/WrappedInputFile.java b/common/src/main/java/org/apache/comet/parquet/WrappedInputFile.java index 9b5e50ddb..207e52120 100644 --- a/common/src/main/java/org/apache/comet/parquet/WrappedInputFile.java +++ b/common/src/main/java/org/apache/comet/parquet/WrappedInputFile.java @@ -26,17 +26,17 @@ import java.lang.reflect.Method; import org.apache.parquet.io.InputFile; import org.apache.parquet.io.SeekableInputStream; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** * Wraps an Object that possibly implements the methods of a Parquet InputFile (but is not a Parquet * InputFile). Such an object` exists, for instance, in Iceberg's InputFile */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public class WrappedInputFile implements InputFile { Object wrapped; - @IcebergApi + @CometPublicApi({"Iceberg"}) public WrappedInputFile(Object inputFile) { this.wrapped = inputFile; } diff --git a/common/src/main/java/org/apache/comet/vector/CometVector.java b/common/src/main/java/org/apache/comet/vector/CometVector.java index 6dda765d5..cf9ada3f0 100644 --- a/common/src/main/java/org/apache/comet/vector/CometVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometVector.java @@ -39,10 +39,10 @@ import org.apache.spark.sql.vectorized.ColumnarMap; import org.apache.spark.unsafe.Platform; import org.apache.spark.unsafe.types.UTF8String; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** Base class for all Comet column vector implementations. */ -@IcebergApi +@CometPublicApi({"Iceberg"}) public abstract class CometVector extends ColumnVector { private static final int DECIMAL_BYTE_WIDTH = 16; private final byte[] DECIMAL_BYTES = new byte[DECIMAL_BYTE_WIDTH]; @@ -61,7 +61,7 @@ public abstract class CometVector extends ColumnVector { } } - @IcebergApi + @CometPublicApi({"Iceberg"}) public CometVector(DataType type, boolean useDecimal128) { super(type); this.useDecimal128 = useDecimal128; @@ -71,18 +71,18 @@ public abstract class CometVector extends ColumnVector { * Sets the number of nulls in this vector to be 'numNulls'. This is used when the vector is * reused across batches. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public abstract void setNumNulls(int numNulls); /** * Sets the number of values (including both nulls and non-nulls) in this vector to be * 'numValues'. This is used when the vector is reused across batches. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public abstract void setNumValues(int numValues); /** Returns the number of values in this vector. */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public abstract int numValues(); /** Whether the elements of this vector are of fixed length. */ @@ -222,7 +222,7 @@ public abstract class CometVector extends ColumnVector { throw new UnsupportedOperationException("Not implemented"); } - @IcebergApi + @CometPublicApi({"Iceberg"}) public abstract ValueVector getValueVector(); /** @@ -232,7 +232,7 @@ public abstract class CometVector extends ColumnVector { * @param length the length of the new vector * @return the new vector */ - @IcebergApi + @CometPublicApi({"Iceberg"}) public abstract CometVector slice(int offset, int length); /** diff --git a/docs/source/contributor-guide/iceberg_public_api.md b/docs/source/contributor-guide/iceberg_public_api.md index 051b22459..54823f92b 100644 --- a/docs/source/contributor-guide/iceberg_public_api.md +++ b/docs/source/contributor-guide/iceberg_public_api.md @@ -26,8 +26,10 @@ native Parquet reader for vectorized reads in Spark. **Important**: Changes to these APIs may break Iceberg's Comet integration. Contributors should exercise caution when modifying these classes and consider backward compatibility. -All classes and methods documented here are marked with the `@IcebergApi` annotation -(`org.apache.comet.IcebergApi`) to make them easily identifiable in the source code. +All classes and methods documented here are marked with the `@CometPublicApi({"Iceberg"})` +annotation (`org.apache.comet.CometPublicApi`) to make them easily identifiable in the source code. +This follows a pattern similar to Hadoop's `@InterfaceAudience.LimitedPrivate`, where the +annotation lists the known external consumers of the API. ## Overview @@ -317,7 +319,7 @@ Arrow's base vector interface (shaded). Used as return type in `CometVector.getV 1. Creates `CometSchemaImporter` with a `RootAllocator` 2. Uses `Utils.getColumnReader()` to create appropriate column readers -3. Calls `Native.resetBatch()` and `setPageReader()` for each row group +3. Calls `Native.resetBatch()` and `ColumnReader.setPageReader()` for each row group 4. Uses `BatchReader` to coordinate reading batches across all columns 5. Retrieves results via `delegate().currentBatch()` diff --git a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/AbstractApiTest.java b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/AbstractApiTest.java index 4ba7b2b09..533586d52 100644 --- a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/AbstractApiTest.java +++ b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/AbstractApiTest.java @@ -27,12 +27,13 @@ import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; import java.util.Comparator; import org.junit.After; import org.junit.Before; -import org.apache.comet.IcebergApi; +import org.apache.comet.CometPublicApi; /** * Base class for Iceberg API tests. Provides common utilities for testing annotated API elements. @@ -55,24 +56,28 @@ public abstract class AbstractApiTest { } } - /** Checks if a class has the @IcebergApi annotation. */ + /** Checks if a class has the @CometPublicApi annotation listing "Iceberg". */ protected static boolean hasIcebergApiAnnotation(Class<?> clazz) { - return clazz.isAnnotationPresent(IcebergApi.class); + return hasCometPublicApiForIceberg(clazz.getAnnotation(CometPublicApi.class)); } - /** Checks if a method has the @IcebergApi annotation. */ + /** Checks if a method has the @CometPublicApi annotation listing "Iceberg". */ protected static boolean hasIcebergApiAnnotation(Method method) { - return method.isAnnotationPresent(IcebergApi.class); + return hasCometPublicApiForIceberg(method.getAnnotation(CometPublicApi.class)); } - /** Checks if a constructor has the @IcebergApi annotation. */ + /** Checks if a constructor has the @CometPublicApi annotation listing "Iceberg". */ protected static boolean hasIcebergApiAnnotation(Constructor<?> constructor) { - return constructor.isAnnotationPresent(IcebergApi.class); + return hasCometPublicApiForIceberg(constructor.getAnnotation(CometPublicApi.class)); } - /** Checks if a field has the @IcebergApi annotation. */ + /** Checks if a field has the @CometPublicApi annotation listing "Iceberg". */ protected static boolean hasIcebergApiAnnotation(Field field) { - return field.isAnnotationPresent(IcebergApi.class); + return hasCometPublicApiForIceberg(field.getAnnotation(CometPublicApi.class)); + } + + private static boolean hasCometPublicApiForIceberg(CometPublicApi annotation) { + return annotation != null && Arrays.asList(annotation.value()).contains("Iceberg"); } /** Checks if a class is public. */ diff --git a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/IcebergApiVerificationTest.java b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/IcebergApiVerificationTest.java index be761e387..33e5328e3 100644 --- a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/IcebergApiVerificationTest.java +++ b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/IcebergApiVerificationTest.java @@ -33,20 +33,20 @@ import org.junit.Test; import org.apache.arrow.c.AbstractCometSchemaImporter; +import org.apache.comet.CometPublicApi; import org.apache.comet.CometSchemaImporter; -import org.apache.comet.IcebergApi; import org.apache.comet.parquet.*; import org.apache.comet.vector.CometVector; import static org.assertj.core.api.Assertions.assertThat; /** - * Verifies that all @IcebergApi annotated elements are properly accessible. This test uses + * Verifies that all @CometPublicApi annotated elements are properly accessible. This test uses * reflection to scan for all annotated elements and ensures they meet the public API contract. */ public class IcebergApiVerificationTest extends AbstractApiTest { - /** List of all classes that should have @IcebergApi annotations. */ + /** List of all classes that should have @CometPublicApi annotations. */ private static final List<Class<?>> ICEBERG_API_CLASSES = Arrays.asList( // Parquet classes @@ -71,8 +71,8 @@ public class IcebergApiVerificationTest extends AbstractApiTest { AbstractCometSchemaImporter.class); @Test - public void testIcebergApiAnnotationIsRetainedAtRuntime() { - Retention retention = IcebergApi.class.getAnnotation(Retention.class); + public void testCometPublicApiAnnotationIsRetainedAtRuntime() { + Retention retention = CometPublicApi.class.getAnnotation(Retention.class); assertThat(retention).isNotNull(); assertThat(retention.value()).isEqualTo(RetentionPolicy.RUNTIME); } @@ -117,7 +117,7 @@ public class IcebergApiVerificationTest extends AbstractApiTest { } } - assertThat(missingAnnotations).as("Classes without @IcebergApi annotation").isEmpty(); + assertThat(missingAnnotations).as("Classes without @CometPublicApi annotation").isEmpty(); } @Test @@ -130,7 +130,9 @@ public class IcebergApiVerificationTest extends AbstractApiTest { } } - assertThat(nonPublicClasses).as("@IcebergApi annotated classes that are not public").isEmpty(); + assertThat(nonPublicClasses) + .as("@CometPublicApi annotated classes that are not public") + .isEmpty(); } @Test @@ -145,7 +147,9 @@ public class IcebergApiVerificationTest extends AbstractApiTest { } } - assertThat(nonPublicMethods).as("@IcebergApi annotated methods that are not public").isEmpty(); + assertThat(nonPublicMethods) + .as("@CometPublicApi annotated methods that are not public") + .isEmpty(); } @Test @@ -162,7 +166,7 @@ public class IcebergApiVerificationTest extends AbstractApiTest { } assertThat(nonPublicConstructors) - .as("@IcebergApi annotated constructors that are not public") + .as("@CometPublicApi annotated constructors that are not public") .isEmpty(); } @@ -179,7 +183,7 @@ public class IcebergApiVerificationTest extends AbstractApiTest { } assertThat(inaccessibleFields) - .as("@IcebergApi annotated fields that are not accessible") + .as("@CometPublicApi annotated fields that are not accessible") .isEmpty(); } diff --git a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/AbstractColumnReaderApiTest.java b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/AbstractColumnReaderApiTest.java index 6e111c710..698aaaae4 100644 --- a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/AbstractColumnReaderApiTest.java +++ b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/AbstractColumnReaderApiTest.java @@ -54,40 +54,35 @@ public class AbstractColumnReaderApiTest extends AbstractApiTest { } @Test - public void testSetBatchSizeMethodExists() throws NoSuchMethodException { + public void testSetBatchSizeMethod() throws NoSuchMethodException { Method method = AbstractColumnReader.class.getMethod("setBatchSize", int.class); assertThat(hasIcebergApiAnnotation(method)).isTrue(); } @Test - public void testCloseMethodExists() throws NoSuchMethodException { + public void testCloseMethod() throws NoSuchMethodException { Method method = AbstractColumnReader.class.getMethod("close"); assertThat(hasIcebergApiAnnotation(method)).isTrue(); } @Test - public void testReadBatchMethodExists() throws NoSuchMethodException { + public void testReadBatchMethod() throws NoSuchMethodException { Method method = AbstractColumnReader.class.getMethod("readBatch", int.class); assertThat(Modifier.isAbstract(method.getModifiers())).isTrue(); } @Test - public void testCurrentBatchMethodExists() throws NoSuchMethodException { + public void testCurrentBatchMethod() throws NoSuchMethodException { Method method = AbstractColumnReader.class.getMethod("currentBatch"); assertThat(Modifier.isAbstract(method.getModifiers())).isTrue(); assertThat(method.getReturnType().getSimpleName()).isEqualTo("CometVector"); } @Test - public void testNativeHandleFieldExists() throws NoSuchFieldException { + public void testNativeHandleField() throws NoSuchFieldException { Field field = AbstractColumnReader.class.getDeclaredField("nativeHandle"); assertThat(hasIcebergApiAnnotation(field)).isTrue(); assertThat(Modifier.isProtected(field.getModifiers())).isTrue(); - } - - @Test - public void testNativeHandleFieldType() throws NoSuchFieldException { - Field field = AbstractColumnReader.class.getDeclaredField("nativeHandle"); assertThat(field.getType()).isEqualTo(long.class); } } diff --git a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/BatchReaderApiTest.java b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/BatchReaderApiTest.java index fc673b7d2..ef5eb3080 100644 --- a/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/BatchReaderApiTest.java +++ b/iceberg-public-api/src/test/java/org/apache/comet/iceberg/api/parquet/BatchReaderApiTest.java @@ -46,39 +46,39 @@ public class BatchReaderApiTest extends AbstractApiTest { } @Test - public void testConstructorWithColumnReadersExists() throws NoSuchMethodException { + public void testConstructorWithColumnReaders() throws NoSuchMethodException { Constructor<?> constructor = BatchReader.class.getConstructor(AbstractColumnReader[].class); assertThat(hasIcebergApiAnnotation(constructor)).isTrue(); } @Test - public void testSetSparkSchemaMethodExists() throws NoSuchMethodException { + public void testSetSparkSchemaMethod() throws NoSuchMethodException { Method method = BatchReader.class.getMethod("setSparkSchema", StructType.class); assertThat(hasIcebergApiAnnotation(method)).isTrue(); } @Test - public void testGetColumnReadersMethodExists() throws NoSuchMethodException { + public void testGetColumnReadersMethod() throws NoSuchMethodException { Method method = BatchReader.class.getMethod("getColumnReaders"); assertThat(hasIcebergApiAnnotation(method)).isTrue(); assertThat(method.getReturnType()).isEqualTo(AbstractColumnReader[].class); } @Test - public void testNextBatchWithSizeMethodExists() throws NoSuchMethodException { + public void testNextBatchWithSizeMethod() throws NoSuchMethodException { Method method = BatchReader.class.getMethod("nextBatch", int.class); assertThat(hasIcebergApiAnnotation(method)).isTrue(); assertThat(method.getReturnType()).isEqualTo(boolean.class); } @Test - public void testCurrentBatchMethodExists() throws NoSuchMethodException { + public void testCurrentBatchMethod() throws NoSuchMethodException { Method method = BatchReader.class.getMethod("currentBatch"); assertThat(method.getReturnType().getSimpleName()).isEqualTo("ColumnarBatch"); } @Test - public void testCloseMethodExists() throws NoSuchMethodException { + public void testCloseMethod() throws NoSuchMethodException { Method method = BatchReader.class.getMethod("close"); assertThat(hasIcebergApiAnnotation(method)).isTrue(); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
