rdblue commented on code in PR #12298: URL: https://github.com/apache/iceberg/pull/12298#discussion_r2373044552
########## core/src/main/java/org/apache/iceberg/avro/Avro.java: ########## @@ -693,59 +858,184 @@ public ReadBuilder createReaderFunc( */ @Override public ReadBuilder split(long newStart, long newLength) { - this.start = newStart; - this.length = newLength; + impl.split(newStart, newLength); return this; } @Override public ReadBuilder project(org.apache.iceberg.Schema projectedSchema) { - this.schema = projectedSchema; + impl.project(projectedSchema); return this; } @Override public ReadBuilder reuseContainers() { - this.reuseContainers = true; + impl.reuseContainers(); return this; } public ReadBuilder reuseContainers(boolean shouldReuse) { - this.reuseContainers = shouldReuse; + impl.reuseContainers = shouldReuse; return this; } public ReadBuilder rename(String fullName, String newName) { - renames.put(fullName, newName); + impl.renames.put(fullName, newName); return this; } @Override public InternalData.ReadBuilder setRootType(Class<? extends StructLike> rootClass) { - this.rootType = rootClass; + impl.setRootType(rootClass); return this; } @Override public InternalData.ReadBuilder setCustomType( int fieldId, Class<? extends StructLike> structClass) { - typeMap.put(fieldId, structClass); + impl.setCustomType(fieldId, structClass); return this; } public ReadBuilder withNameMapping(NameMapping newNameMapping) { - this.nameMapping = newNameMapping; + impl.nameMapping(newNameMapping); return this; } public ReadBuilder classLoader(ClassLoader classLoader) { - this.loader = classLoader; + impl.loader = classLoader; return this; } @Override @SuppressWarnings("unchecked") public <D> AvroIterable<D> build() { + return (AvroIterable<D>) impl.build(); + } + } + + static class ReadBuilderImpl<D, S> + implements InternalData.ReadBuilder, org.apache.iceberg.io.ReadBuilder<D, S> { + private final InputFile file; + private final Map<String, String> renames = Maps.newLinkedHashMap(); + private final Map<Integer, Class<? extends StructLike>> typeMap = Maps.newHashMap(); + private Long start = null; + private Long length = null; + private Class<? extends StructLike> rootType = null; + private ClassLoader loader = Thread.currentThread().getContextClassLoader(); + private NameMapping nameMapping; + private boolean reuseContainers = false; + private org.apache.iceberg.Schema schema = null; + private Function<Schema, DatumReader<?>> createReaderFunc = null; + private BiFunction<org.apache.iceberg.Schema, Schema, DatumReader<?>> createReaderBiFunc = null; + private Function<org.apache.iceberg.Schema, DatumReader<?>> createResolvingReaderFunc = null; + private BiFunction<org.apache.iceberg.Schema, Map<Integer, ?>, DatumReader<?>> readerFunction; + private Map<Integer, ?> constantFieldAccessors = ImmutableMap.of(); + + @SuppressWarnings("UnnecessaryLambda") + private final Function<org.apache.iceberg.Schema, DatumReader<D>> defaultCreateReaderFunc = + readSchema -> { + GenericAvroReader<D> reader = GenericAvroReader.create(readSchema); + reader.setClassLoader(loader); + return reader; + }; + + ReadBuilderImpl(InputFile file) { + Preconditions.checkNotNull(file, "Input file cannot be null"); + this.file = file; + } + + ReadBuilderImpl<D, S> readerFunction( + BiFunction<org.apache.iceberg.Schema, Map<Integer, ?>, DatumReader<?>> newReaderFunction) { + Preconditions.checkState( + createReaderBiFunc == null + && createReaderFunc == null + && createResolvingReaderFunc == null, + "Cannot set multiple read builder functions"); + this.readerFunction = newReaderFunction; + return this; + } + + @Override + public ReadBuilderImpl<D, S> split(long newStart, long newLength) { + this.start = newStart; + this.length = newLength; + return this; + } + + @Override + public ReadBuilderImpl<D, S> project(org.apache.iceberg.Schema projectedSchema) { + this.schema = projectedSchema; + return this; + } + + @Override + public ReadBuilderImpl<D, S> set(String key, String value) { + // Configuration is not used for Avro reader creation + return this; + } + + @Override + public ReadBuilderImpl<D, S> reuseContainers() { + this.reuseContainers = true; + return this; + } + + @Override + public ReadBuilderImpl<D, S> recordsPerBatch(int numRowsPerBatch) { + throw new UnsupportedOperationException("Batch reading is not supported in Avro reader"); + } + + @Override + public ReadBuilderImpl<D, S> constantValues(Map<Integer, ?> newConstantFieldAccessors) { + this.constantFieldAccessors = newConstantFieldAccessors; + return this; + } + + @Override + public ReadBuilderImpl<D, S> setRootType(Class<? extends StructLike> rootClass) { + this.rootType = rootClass; + return this; + } + + @Override + public ReadBuilderImpl<D, S> setCustomType( + int fieldId, Class<? extends StructLike> structClass) { + typeMap.put(fieldId, structClass); + return this; + } + + @Override + public ReadBuilderImpl<D, S> nameMapping(NameMapping newNameMapping) { + this.nameMapping = newNameMapping; + return this; + } + + @Override + public ReadBuilderImpl<D, S> caseSensitive(boolean newCaseSensitive) { + // Filtering is not supported in Avro reader + return this; + } + + @Override + public ReadBuilderImpl<D, S> filter(Expression newFilter) { + // Filtering is not supported in Avro reader + return this; Review Comment: Even if filtering is not supported, this could use a comment to justify ignoring the filter that is passed in rather than throwing an exception. The justification would have to be that filtering is best-effort and optional. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org