This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 43ef03bf88 Parquet: Remove deprecated VectorizedReader.setRowGroupInfo
and ParquetValueReader.setPageSource (#12321)
43ef03bf88 is described below
commit 43ef03bf88a3015a8efaa251b9bed35e47e01679
Author: Yuya Ebihara <[email protected]>
AuthorDate: Thu Feb 20 16:19:36 2025 +0900
Parquet: Remove deprecated VectorizedReader.setRowGroupInfo and
ParquetValueReader.setPageSource (#12321)
---
.palantir/revapi.yml | 35 ++++++++++++++++++++++
.../iceberg/arrow/vectorized/BaseBatchReader.java | 6 ----
.../arrow/vectorized/VectorizedArrowReader.java | 24 ---------------
.../iceberg/flink/data/FlinkParquetReaders.java | 13 ++------
.../iceberg/flink/data/FlinkParquetReaders.java | 13 ++------
.../iceberg/data/parquet/BaseParquetReaders.java | 6 +---
.../iceberg/data/parquet/BaseParquetWriter.java | 6 +---
.../apache/iceberg/parquet/ParquetValueReader.java | 9 ------
.../iceberg/parquet/ParquetValueReaders.java | 8 -----
.../apache/iceberg/parquet/VectorizedReader.java | 13 --------
.../iceberg/spark/data/SparkParquetReaders.java | 17 +++--------
.../spark/data/vectorized/ColumnarBatchReader.java | 6 ----
.../iceberg/spark/data/SparkParquetReaders.java | 17 +++--------
.../spark/data/vectorized/ColumnarBatchReader.java | 6 ----
.../spark/data/vectorized/CometColumnReader.java | 10 -------
.../data/vectorized/CometColumnarBatchReader.java | 6 ----
.../spark/data/vectorized/ColumnarBatchReader.java | 6 ----
.../spark/data/vectorized/CometColumnReader.java | 10 -------
.../data/vectorized/CometColumnarBatchReader.java | 6 ----
19 files changed, 51 insertions(+), 166 deletions(-)
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
index 18c63fbe7b..9fc79c53cc 100644
--- a/.palantir/revapi.yml
+++ b/.palantir/revapi.yml
@@ -1171,6 +1171,41 @@ acceptedBreaks:
\ java.util.function.Function<T, java.lang.Long>,
org.apache.iceberg.io.CloseableIterable<java.lang.Long>,\
\ java.util.function.Consumer<T>)"
justification: "Removing deprecated code"
+ "1.8.0":
+ org.apache.iceberg:iceberg-parquet:
+ - code: "java.class.visibilityReduced"
+ old: "class org.apache.iceberg.data.parquet.BaseParquetReaders<T extends
java.lang.Object>"
+ new: "class org.apache.iceberg.data.parquet.BaseParquetReaders<T extends
java.lang.Object>"
+ justification: "Changing deprecated code"
+ - code: "java.class.visibilityReduced"
+ old: "class org.apache.iceberg.data.parquet.BaseParquetWriter<T extends
java.lang.Object>"
+ new: "class org.apache.iceberg.data.parquet.BaseParquetWriter<T extends
java.lang.Object>"
+ justification: "Changing deprecated code"
+ - code: "java.element.noLongerDeprecated"
+ old: "class org.apache.iceberg.data.parquet.BaseParquetReaders<T extends
java.lang.Object>"
+ new: "class org.apache.iceberg.data.parquet.BaseParquetReaders<T extends
java.lang.Object>"
+ justification: "Changing deprecated code"
+ - code: "java.element.noLongerDeprecated"
+ old: "class org.apache.iceberg.data.parquet.BaseParquetWriter<T extends
java.lang.Object>"
+ new: "class org.apache.iceberg.data.parquet.BaseParquetWriter<T extends
java.lang.Object>"
+ justification: "Changing deprecated code"
+ - code: "java.method.removed"
+ old: "method void
org.apache.iceberg.parquet.ParquetValueReader<T>::setPageSource(org.apache.parquet.column.page.PageReadStore,\
+ \ long)"
+ justification: "Removing deprecated code"
+ - code: "java.method.removed"
+ old: "method void
org.apache.iceberg.parquet.ParquetValueReaders.StructReader<T,\
+ \ I>::<init>(java.util.List<org.apache.parquet.schema.Type>,
java.util.List<org.apache.iceberg.parquet.ParquetValueReader<?>>)"
+ justification: "Removing deprecated code"
+ - code: "java.method.removed"
+ old: "method void
org.apache.iceberg.parquet.VectorizedReader<T>::setRowGroupInfo(org.apache.parquet.column.page.PageReadStore,\
+ \ java.util.Map<org.apache.parquet.hadoop.metadata.ColumnPath,
org.apache.parquet.hadoop.metadata.ColumnChunkMetaData>,\
+ \ long)"
+ justification: "Removing deprecated code"
+ - code: "java.method.visibilityReduced"
+ old: "method void
org.apache.iceberg.data.parquet.BaseParquetWriter<T>::<init>()"
+ new: "method void
org.apache.iceberg.data.parquet.BaseParquetWriter<T>::<init>()"
+ justification: "Changing deprecated code"
apache-iceberg-0.14.0:
org.apache.iceberg:iceberg-api:
- code: "java.class.defaultSerializationChanged"
diff --git
a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/BaseBatchReader.java
b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/BaseBatchReader.java
index daa116f292..3222afeb53 100644
---
a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/BaseBatchReader.java
+++
b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/BaseBatchReader.java
@@ -39,12 +39,6 @@ public abstract class BaseBatchReader<T> implements
VectorizedReader<T> {
this.vectorHolders = new VectorHolder[readers.size()];
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData,
long rowPosition) {
- setRowGroupInfo(pageStore, metaData);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData) {
diff --git
a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java
b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java
index ef33660dac..f65774fec2 100644
---
a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java
+++
b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java
@@ -427,12 +427,6 @@ public class VectorizedArrowReader implements
VectorizedReader<VectorHolder> {
}
}
- @Override
- public void setRowGroupInfo(
- PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata,
long rowPosition) {
- setRowGroupInfo(source, metadata);
- }
-
@Override
public void setRowGroupInfo(PageReadStore source, Map<ColumnPath,
ColumnChunkMetaData> metadata) {
ColumnChunkMetaData chunkMetaData =
metadata.get(ColumnPath.get(columnDescriptor.getPath()));
@@ -474,10 +468,6 @@ public class VectorizedArrowReader implements
VectorizedReader<VectorHolder> {
return VectorHolder.dummyHolder(numValsToRead);
}
- @Override
- public void setRowGroupInfo(
- PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata,
long rowPosition) {}
-
@Override
public void setRowGroupInfo(
PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata) {}
@@ -545,12 +535,6 @@ public class VectorizedArrowReader implements
VectorizedReader<VectorHolder> {
return nullabilityHolder;
}
- @Override
- public void setRowGroupInfo(
- PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata,
long rowPosition) {
- setRowGroupInfo(source, metadata);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata) {
@@ -601,10 +585,6 @@ public class VectorizedArrowReader implements
VectorizedReader<VectorHolder> {
return VectorHolder.constantHolder(icebergField(), numValsToRead, value);
}
- @Override
- public void setRowGroupInfo(
- PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata,
long rowPosition) {}
-
@Override
public void setRowGroupInfo(
PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata) {}
@@ -632,10 +612,6 @@ public class VectorizedArrowReader implements
VectorizedReader<VectorHolder> {
return VectorHolder.deletedVectorHolder(numValsToRead);
}
- @Override
- public void setRowGroupInfo(
- PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata,
long rowPosition) {}
-
@Override
public void setRowGroupInfo(
PageReadStore source, Map<ColumnPath, ColumnChunkMetaData> metadata) {}
diff --git
a/flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
b/flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
index a23fb2d6ee..fc407fe2a1 100644
---
a/flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
+++
b/flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
@@ -116,7 +116,6 @@ public class FlinkParquetReaders {
expected != null ? expected.fields() : ImmutableList.of();
List<ParquetValueReader<?>> reorderedFields =
Lists.newArrayListWithExpectedSize(expectedFields.size());
- List<Type> types =
Lists.newArrayListWithExpectedSize(expectedFields.size());
// Defaulting to parent max definition level
int defaultMaxDefinitionLevel =
type.getMaxDefinitionLevel(currentPath());
for (Types.NestedField field : expectedFields) {
@@ -128,32 +127,26 @@ public class FlinkParquetReaders {
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel);
reorderedFields.add(
ParquetValueReaders.constant(idToConstant.get(id),
fieldMaxDefinitionLevel));
- types.add(null);
} else if (id == MetadataColumns.ROW_POSITION.fieldId()) {
reorderedFields.add(ParquetValueReaders.position());
- types.add(null);
} else if (id == MetadataColumns.IS_DELETED.fieldId()) {
reorderedFields.add(ParquetValueReaders.constant(false));
- types.add(null);
} else if (reader != null) {
reorderedFields.add(reader);
- types.add(typesById.get(id));
} else if (field.initialDefault() != null) {
reorderedFields.add(
ParquetValueReaders.constant(
RowDataUtil.convertConstant(field.type(),
field.initialDefault()),
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel)));
- types.add(typesById.get(id));
} else if (field.isOptional()) {
reorderedFields.add(ParquetValueReaders.nulls());
- types.add(null);
} else {
throw new IllegalArgumentException(
String.format("Missing required field: %s", field.name()));
}
}
- return new RowDataReader(types, reorderedFields);
+ return new RowDataReader(reorderedFields);
}
@Override
@@ -662,8 +655,8 @@ public class FlinkParquetReaders {
extends ParquetValueReaders.StructReader<RowData, GenericRowData> {
private final int numFields;
- RowDataReader(List<Type> types, List<ParquetValueReader<?>> readers) {
- super(types, readers);
+ RowDataReader(List<ParquetValueReader<?>> readers) {
+ super(readers);
this.numFields = readers.size();
}
diff --git
a/flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
b/flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
index a23fb2d6ee..fc407fe2a1 100644
---
a/flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
+++
b/flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
@@ -116,7 +116,6 @@ public class FlinkParquetReaders {
expected != null ? expected.fields() : ImmutableList.of();
List<ParquetValueReader<?>> reorderedFields =
Lists.newArrayListWithExpectedSize(expectedFields.size());
- List<Type> types =
Lists.newArrayListWithExpectedSize(expectedFields.size());
// Defaulting to parent max definition level
int defaultMaxDefinitionLevel =
type.getMaxDefinitionLevel(currentPath());
for (Types.NestedField field : expectedFields) {
@@ -128,32 +127,26 @@ public class FlinkParquetReaders {
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel);
reorderedFields.add(
ParquetValueReaders.constant(idToConstant.get(id),
fieldMaxDefinitionLevel));
- types.add(null);
} else if (id == MetadataColumns.ROW_POSITION.fieldId()) {
reorderedFields.add(ParquetValueReaders.position());
- types.add(null);
} else if (id == MetadataColumns.IS_DELETED.fieldId()) {
reorderedFields.add(ParquetValueReaders.constant(false));
- types.add(null);
} else if (reader != null) {
reorderedFields.add(reader);
- types.add(typesById.get(id));
} else if (field.initialDefault() != null) {
reorderedFields.add(
ParquetValueReaders.constant(
RowDataUtil.convertConstant(field.type(),
field.initialDefault()),
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel)));
- types.add(typesById.get(id));
} else if (field.isOptional()) {
reorderedFields.add(ParquetValueReaders.nulls());
- types.add(null);
} else {
throw new IllegalArgumentException(
String.format("Missing required field: %s", field.name()));
}
}
- return new RowDataReader(types, reorderedFields);
+ return new RowDataReader(reorderedFields);
}
@Override
@@ -662,8 +655,8 @@ public class FlinkParquetReaders {
extends ParquetValueReaders.StructReader<RowData, GenericRowData> {
private final int numFields;
- RowDataReader(List<Type> types, List<ParquetValueReader<?>> readers) {
- super(types, readers);
+ RowDataReader(List<ParquetValueReader<?>> readers) {
+ super(readers);
this.numFields = readers.size();
}
diff --git
a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java
b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java
index 7e972e6ced..43f46794df 100644
---
a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java
+++
b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java
@@ -53,11 +53,7 @@ import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
-/**
- * @deprecated since 1.8.0, will be made package-private in 1.9.0
- */
-@Deprecated
-public abstract class BaseParquetReaders<T> {
+abstract class BaseParquetReaders<T> {
protected BaseParquetReaders() {}
protected ParquetValueReader<T> createReader(Schema expectedSchema,
MessageType fileSchema) {
diff --git
a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java
b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java
index 1bfa90b263..13ae65d10a 100644
---
a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java
+++
b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java
@@ -32,11 +32,7 @@ import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
-/**
- * @deprecated since 1.8.0, will be made package-private in 1.9.0
- */
-@Deprecated
-public abstract class BaseParquetWriter<T> {
+abstract class BaseParquetWriter<T> {
@SuppressWarnings("unchecked")
protected ParquetValueWriter<T> createWriter(MessageType type) {
diff --git
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java
index 01d3e15bb4..14e5071b4d 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java
@@ -28,15 +28,6 @@ public interface ParquetValueReader<T> {
List<TripleIterator<?>> columns();
- /**
- * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link
#setPageSource(PageReadStore)}
- * instead.
- */
- @Deprecated
- default void setPageSource(PageReadStore pageStore, long rowPosition) {
- setPageSource(pageStore);
- }
-
default void setPageSource(PageReadStore pageStore) {
throw new UnsupportedOperationException(
this.getClass().getName() + " doesn't implement
setPageSource(PageReadStore)");
diff --git
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java
index bb89300859..0d26708bdf 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java
@@ -900,14 +900,6 @@ public class ParquetValueReaders {
private final TripleIterator<?> column;
private final List<TripleIterator<?>> children;
- /**
- * @deprecated will be removed in 1.9.0; use {@link #StructReader(List)}
instead.
- */
- @Deprecated
- protected StructReader(List<Type> types, List<ParquetValueReader<?>>
readers) {
- this(readers);
- }
-
protected StructReader(List<ParquetValueReader<?>> readers) {
this.readers =
(ParquetValueReader<?>[])
Array.newInstance(ParquetValueReader.class, readers.size());
diff --git
a/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java
b/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java
index caf2b6ff22..23be5d5f50 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java
@@ -37,19 +37,6 @@ public interface VectorizedReader<T> {
void setBatchSize(int batchSize);
- /**
- * Sets the row group information to be used with this reader
- *
- * @param pages row group information for all the columns
- * @param metadata map of {@link ColumnPath} -> {@link
ColumnChunkMetaData} for the row group
- * @param rowPosition the row group's row offset in the parquet file
- * @deprecated since 1.8.0, will be removed in 1.9.0; use {@link
#setRowGroupInfo(PageReadStore,
- * Map)} instead.
- */
- @Deprecated
- void setRowGroupInfo(
- PageReadStore pages, Map<ColumnPath, ColumnChunkMetaData> metadata, long
rowPosition);
-
/**
* Sets the row group information to be used with this reader
*
diff --git
a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index 3ce54d2d9f..223d7a8995 100644
---
a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++
b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -108,16 +108,14 @@ public class SparkParquetReaders {
// the expected struct is ignored because nested fields are never found
when the
List<ParquetValueReader<?>> newFields =
Lists.newArrayListWithExpectedSize(fieldReaders.size());
- List<Type> types =
Lists.newArrayListWithExpectedSize(fieldReaders.size());
List<Type> fields = struct.getFields();
for (int i = 0; i < fields.size(); i += 1) {
Type fieldType = fields.get(i);
int fieldD = type().getMaxDefinitionLevel(path(fieldType.getName())) -
1;
newFields.add(ParquetValueReaders.option(fieldType, fieldD,
fieldReaders.get(i)));
- types.add(fieldType);
}
- return new InternalRowReader(types, newFields);
+ return new InternalRowReader(newFields);
}
}
@@ -161,7 +159,6 @@ public class SparkParquetReaders {
expected != null ? expected.fields() : ImmutableList.of();
List<ParquetValueReader<?>> reorderedFields =
Lists.newArrayListWithExpectedSize(expectedFields.size());
- List<Type> types =
Lists.newArrayListWithExpectedSize(expectedFields.size());
// Defaulting to parent max definition level
int defaultMaxDefinitionLevel =
type.getMaxDefinitionLevel(currentPath());
for (Types.NestedField field : expectedFields) {
@@ -173,32 +170,26 @@ public class SparkParquetReaders {
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel);
reorderedFields.add(
ParquetValueReaders.constant(idToConstant.get(id),
fieldMaxDefinitionLevel));
- types.add(null);
} else if (id == MetadataColumns.ROW_POSITION.fieldId()) {
reorderedFields.add(ParquetValueReaders.position());
- types.add(null);
} else if (id == MetadataColumns.IS_DELETED.fieldId()) {
reorderedFields.add(ParquetValueReaders.constant(false));
- types.add(null);
} else if (reader != null) {
reorderedFields.add(reader);
- types.add(typesById.get(id));
} else if (field.initialDefault() != null) {
reorderedFields.add(
ParquetValueReaders.constant(
SparkUtil.internalToSpark(field.type(),
field.initialDefault()),
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel)));
- types.add(typesById.get(id));
} else if (field.isOptional()) {
reorderedFields.add(ParquetValueReaders.nulls());
- types.add(null);
} else {
throw new IllegalArgumentException(
String.format("Missing required field: %s", field.name()));
}
}
- return new InternalRowReader(types, reorderedFields);
+ return new InternalRowReader(reorderedFields);
}
@Override
@@ -555,8 +546,8 @@ public class SparkParquetReaders {
private static class InternalRowReader extends StructReader<InternalRow,
GenericInternalRow> {
private final int numFields;
- InternalRowReader(List<Type> types, List<ParquetValueReader<?>> readers) {
- super(types, readers);
+ InternalRowReader(List<ParquetValueReader<?>> readers) {
+ super(readers);
this.numFields = readers.size();
}
diff --git
a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
index 77cb2ff771..6581dc580a 100644
---
a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
+++
b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
@@ -52,12 +52,6 @@ public class ColumnarBatchReader extends
BaseBatchReader<ColumnarBatch> {
readers.stream().anyMatch(reader -> reader instanceof
DeletedVectorReader);
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData,
long rowPosition) {
- setRowGroupInfo(pageStore, metaData);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData) {
diff --git
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index 3ce54d2d9f..223d7a8995 100644
---
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -108,16 +108,14 @@ public class SparkParquetReaders {
// the expected struct is ignored because nested fields are never found
when the
List<ParquetValueReader<?>> newFields =
Lists.newArrayListWithExpectedSize(fieldReaders.size());
- List<Type> types =
Lists.newArrayListWithExpectedSize(fieldReaders.size());
List<Type> fields = struct.getFields();
for (int i = 0; i < fields.size(); i += 1) {
Type fieldType = fields.get(i);
int fieldD = type().getMaxDefinitionLevel(path(fieldType.getName())) -
1;
newFields.add(ParquetValueReaders.option(fieldType, fieldD,
fieldReaders.get(i)));
- types.add(fieldType);
}
- return new InternalRowReader(types, newFields);
+ return new InternalRowReader(newFields);
}
}
@@ -161,7 +159,6 @@ public class SparkParquetReaders {
expected != null ? expected.fields() : ImmutableList.of();
List<ParquetValueReader<?>> reorderedFields =
Lists.newArrayListWithExpectedSize(expectedFields.size());
- List<Type> types =
Lists.newArrayListWithExpectedSize(expectedFields.size());
// Defaulting to parent max definition level
int defaultMaxDefinitionLevel =
type.getMaxDefinitionLevel(currentPath());
for (Types.NestedField field : expectedFields) {
@@ -173,32 +170,26 @@ public class SparkParquetReaders {
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel);
reorderedFields.add(
ParquetValueReaders.constant(idToConstant.get(id),
fieldMaxDefinitionLevel));
- types.add(null);
} else if (id == MetadataColumns.ROW_POSITION.fieldId()) {
reorderedFields.add(ParquetValueReaders.position());
- types.add(null);
} else if (id == MetadataColumns.IS_DELETED.fieldId()) {
reorderedFields.add(ParquetValueReaders.constant(false));
- types.add(null);
} else if (reader != null) {
reorderedFields.add(reader);
- types.add(typesById.get(id));
} else if (field.initialDefault() != null) {
reorderedFields.add(
ParquetValueReaders.constant(
SparkUtil.internalToSpark(field.type(),
field.initialDefault()),
maxDefinitionLevelsById.getOrDefault(id,
defaultMaxDefinitionLevel)));
- types.add(typesById.get(id));
} else if (field.isOptional()) {
reorderedFields.add(ParquetValueReaders.nulls());
- types.add(null);
} else {
throw new IllegalArgumentException(
String.format("Missing required field: %s", field.name()));
}
}
- return new InternalRowReader(types, reorderedFields);
+ return new InternalRowReader(reorderedFields);
}
@Override
@@ -555,8 +546,8 @@ public class SparkParquetReaders {
private static class InternalRowReader extends StructReader<InternalRow,
GenericInternalRow> {
private final int numFields;
- InternalRowReader(List<Type> types, List<ParquetValueReader<?>> readers) {
- super(types, readers);
+ InternalRowReader(List<ParquetValueReader<?>> readers) {
+ super(readers);
this.numFields = readers.size();
}
diff --git
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
index 2123939399..a0670c2bbe 100644
---
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
+++
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
@@ -50,12 +50,6 @@ public class ColumnarBatchReader extends
BaseBatchReader<ColumnarBatch> {
readers.stream().anyMatch(reader -> reader instanceof
DeletedVectorReader);
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData,
long rowPosition) {
- setRowGroupInfo(pageStore, metaData);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData) {
diff --git
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
index 4794863ab1..16159dcbdf 100644
---
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
+++
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
@@ -19,7 +19,6 @@
package org.apache.iceberg.spark.data.vectorized;
import java.io.IOException;
-import java.util.Map;
import org.apache.comet.parquet.AbstractColumnReader;
import org.apache.comet.parquet.ColumnReader;
import org.apache.comet.parquet.TypeUtil;
@@ -31,10 +30,7 @@ import
org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.types.Types;
import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.column.page.PageReader;
-import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
-import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
@@ -137,12 +133,6 @@ class CometColumnReader implements
VectorizedReader<ColumnVector> {
this.batchSize = size;
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageReadStore, Map<ColumnPath, ColumnChunkMetaData> map,
long size) {
- throw new UnsupportedOperationException("Not supported");
- }
-
@Override
public ColumnVector read(ColumnVector reuse, int numRowsToRead) {
throw new UnsupportedOperationException("Not supported");
diff --git
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
index 1440e5d1d3..04ac69476a 100644
---
a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
+++
b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
@@ -70,12 +70,6 @@ class CometColumnarBatchReader implements
VectorizedReader<ColumnarBatch> {
delegate.setSparkSchema(SparkSchemaUtil.convert(schema));
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData,
long rowPosition) {
- setRowGroupInfo(pageStore, metaData);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData) {
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
index 2123939399..a0670c2bbe 100644
---
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
+++
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnarBatchReader.java
@@ -50,12 +50,6 @@ public class ColumnarBatchReader extends
BaseBatchReader<ColumnarBatch> {
readers.stream().anyMatch(reader -> reader instanceof
DeletedVectorReader);
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData,
long rowPosition) {
- setRowGroupInfo(pageStore, metaData);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData) {
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
index 4794863ab1..16159dcbdf 100644
---
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
+++
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnReader.java
@@ -19,7 +19,6 @@
package org.apache.iceberg.spark.data.vectorized;
import java.io.IOException;
-import java.util.Map;
import org.apache.comet.parquet.AbstractColumnReader;
import org.apache.comet.parquet.ColumnReader;
import org.apache.comet.parquet.TypeUtil;
@@ -31,10 +30,7 @@ import
org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.types.Types;
import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.page.PageReadStore;
import org.apache.parquet.column.page.PageReader;
-import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
-import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
@@ -137,12 +133,6 @@ class CometColumnReader implements
VectorizedReader<ColumnVector> {
this.batchSize = size;
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageReadStore, Map<ColumnPath, ColumnChunkMetaData> map,
long size) {
- throw new UnsupportedOperationException("Not supported");
- }
-
@Override
public ColumnVector read(ColumnVector reuse, int numRowsToRead) {
throw new UnsupportedOperationException("Not supported");
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
index 1440e5d1d3..04ac69476a 100644
---
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
+++
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/CometColumnarBatchReader.java
@@ -70,12 +70,6 @@ class CometColumnarBatchReader implements
VectorizedReader<ColumnarBatch> {
delegate.setSparkSchema(SparkSchemaUtil.convert(schema));
}
- @Override
- public void setRowGroupInfo(
- PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData,
long rowPosition) {
- setRowGroupInfo(pageStore, metaData);
- }
-
@Override
public void setRowGroupInfo(
PageReadStore pageStore, Map<ColumnPath, ColumnChunkMetaData> metaData) {