This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 406016bf2a Data, Orc, Parquet: Throw exception when non-vectorized
reader set recordsPerBatch (#15701)
406016bf2a is described below
commit 406016bf2a60e25db121ee4895f1008037664dc7
Author: GuoYu <[email protected]>
AuthorDate: Sat Mar 21 19:58:36 2026 +0800
Data, Orc, Parquet: Throw exception when non-vectorized reader set
recordsPerBatch (#15701)
* Core: Throw exception when non-vectorized set recordsPerBatch
* Remove useless assume
* Remove FEATURE_RECORDS_PER_BATCH
---
.../apache/iceberg/data/BaseFormatModelTests.java | 29 +++++++++++++++++++---
.../org/apache/iceberg/orc/ORCFormatModel.java | 5 ++++
.../apache/iceberg/parquet/ParquetFormatModel.java | 5 ++++
3 files changed, 35 insertions(+), 4 deletions(-)
diff --git
a/data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java
b/data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java
index c9967ebcb1..e295b5fbc1 100644
--- a/data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java
+++ b/data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java
@@ -24,6 +24,7 @@ import static org.apache.iceberg.TestBase.SCHEMA;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.assertj.core.api.Assumptions.assumeThat;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
import java.io.IOException;
import java.util.Arrays;
@@ -72,6 +73,10 @@ public abstract class BaseFormatModelTests<T> {
protected abstract void assertEquals(Schema schema, List<T> expected,
List<T> actual);
+ protected boolean supportsBatchReads() {
+ return false;
+ }
+
private static final FileFormat[] FILE_FORMATS =
new FileFormat[] {FileFormat.AVRO, FileFormat.PARQUET, FileFormat.ORC};
@@ -85,16 +90,13 @@ public abstract class BaseFormatModelTests<T> {
static final String FEATURE_FILTER = "filter";
static final String FEATURE_CASE_SENSITIVE = "caseSensitive";
- static final String FEATURE_RECORDS_PER_BATCH = "recordsPerBatch";
static final String FEATURE_SPLIT = "split";
static final String FEATURE_REUSE_CONTAINERS = "reuseContainers";
private static final Map<FileFormat, String[]> MISSING_FEATURES =
Map.of(
FileFormat.AVRO,
- new String[] {
- FEATURE_FILTER, FEATURE_CASE_SENSITIVE, FEATURE_RECORDS_PER_BATCH,
FEATURE_SPLIT
- },
+ new String[] {FEATURE_FILTER, FEATURE_CASE_SENSITIVE, FEATURE_SPLIT},
FileFormat.ORC,
new String[] {FEATURE_REUSE_CONTAINERS});
@@ -607,6 +609,25 @@ public abstract class BaseFormatModelTests<T> {
reuseRecords.forEach(r -> assertThat(r).isSameAs(reuseRecords.get(0)));
}
+ @ParameterizedTest
+ @FieldSource("FILE_FORMATS")
+ void testReaderBuilderRecordsPerBatchNotSupported(FileFormat fileFormat)
throws IOException {
+ assumeFalse(supportsBatchReads(), engineType().getSimpleName() + "
supports batch reads");
+
+ DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
+ Schema schema = dataGenerator.schema();
+ List<Record> genericRecords = dataGenerator.generateRecords();
+ writeGenericRecords(fileFormat, schema, genericRecords);
+
+ InputFile inputFile = encryptedFile.encryptingOutputFile().toInputFile();
+ assertThatThrownBy(
+ () ->
+ FormatModelRegistry.readBuilder(fileFormat, engineType(),
inputFile)
+ .recordsPerBatch(100))
+ .hasMessageContaining("Batch reading is not supported")
+ .isInstanceOf(UnsupportedOperationException.class);
+ }
+
private void readAndAssertGenericRecords(
FileFormat fileFormat, Schema schema, List<Record> expected) throws
IOException {
InputFile inputFile = encryptedFile.encryptingOutputFile().toInputFile();
diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORCFormatModel.java
b/orc/src/main/java/org/apache/iceberg/orc/ORCFormatModel.java
index ed5d734ef9..e9d83cb27b 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/ORCFormatModel.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/ORCFormatModel.java
@@ -269,6 +269,11 @@ public class ORCFormatModel<D, S, R>
@Override
public ReadBuilder<D, S> recordsPerBatch(int numRowsPerBatch) {
+ if (!isBatchReader) {
+ throw new UnsupportedOperationException(
+ "Batch reading is not supported in non-vectorized reader");
+ }
+
internal.recordsPerBatch(numRowsPerBatch);
return this;
}
diff --git
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetFormatModel.java
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetFormatModel.java
index 90d6e3ef41..fbd7a6e97f 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetFormatModel.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetFormatModel.java
@@ -279,6 +279,11 @@ public class ParquetFormatModel<D, S, R>
@Override
public ReadBuilder<D, S> recordsPerBatch(int numRowsPerBatch) {
+ if (!isBatchReader) {
+ throw new UnsupportedOperationException(
+ "Batch reading is not supported in non-vectorized reader");
+ }
+
internal.recordsPerBatch(numRowsPerBatch);
return this;
}