jackye1995 commented on a change in pull request #1820:
URL: https://github.com/apache/iceberg/pull/1820#discussion_r555379148
##########
File path: core/src/main/java/org/apache/iceberg/ManifestReader.java
##########
@@ -52,8 +52,9 @@
public class ManifestReader<F extends ContentFile<F>>
extends CloseableGroup implements CloseableIterable<F> {
static final ImmutableList<String> ALL_COLUMNS = ImmutableList.of("*");
- static final Set<String> STATS_COLUMNS = Sets.newHashSet(
- "value_counts", "null_value_counts", "nan_value_counts", "lower_bounds",
"upper_bounds");
+
+ private static final Set<String> STATS_COLUMNS = Sets.newHashSet(
Review comment:
nit: use immutable set
##########
File path: core/src/test/java/org/apache/iceberg/TestManifestReaderStats.java
##########
@@ -100,47 +110,79 @@ public void
testReadEntriesWithFilterAndSelectIncludesFullStats() throws IOExcep
public void testReadIteratorWithFilterAndSelectDropsStats() throws
IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest,
FILE_IO)
- .select(ImmutableSet.of("record_count"))
+ .select(ImmutableList.of("file_path"))
.filterRows(Expressions.equal("id", 3))) {
DataFile entry = reader.iterator().next();
assertStatsDropped(entry);
}
}
+
+ @Test
+ public void testReadIteratorWithFilterAndSelectRecordCountDropsStats()
throws IOException {
+ ManifestFile manifest = writeManifest(1000L, FILE);
+ try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest,
FILE_IO)
+ .select(ImmutableList.of("file_path", "record_count"))
+ .filterRows(Expressions.equal("id", 3))) {
+ DataFile entry = reader.iterator().next();
+ assertStatsDropped(entry);
+ }
+ }
+
@Test
public void testReadIteratorWithFilterAndSelectStatsIncludesFullStats()
throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest,
FILE_IO)
- .select(ImmutableSet.of("record_count", "value_counts"))
+ .select(ImmutableList.of("file_path", "value_counts"))
.filterRows(Expressions.equal("id", 3))) {
DataFile entry = reader.iterator().next();
assertFullStats(entry);
+
+ // explicitly call copyWithoutStats and ensure record count will not be
dropped
+ assertStatsDropped(entry.copyWithoutStats());
}
}
@Test
- public void testReadEntriesWithSelectNotIncludeFullStats() throws
IOException {
+ public void testReadEntriesWithSelectNotProjectStats() throws IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest,
FILE_IO)
- .select(ImmutableSet.of("record_count"))) {
+ .select(ImmutableList.of("file_path"))) {
CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
ManifestEntry<DataFile> entry = entries.iterator().next();
- assertStatsDropped(entry.file());
+ DataFile dataFile = entry.file();
+
+ // selected field is populated
+ Assert.assertEquals(FILE_PATH, dataFile.path());
+
+ // not selected fields are all null and not projected
+ Assert.assertNull(dataFile.columnSizes());
+ Assert.assertNull(dataFile.valueCounts());
+ Assert.assertNull(dataFile.nullValueCounts());
+ Assert.assertNull(dataFile.nanValueCounts());
+ Assert.assertNull(dataFile.lowerBounds());
+ Assert.assertNull(dataFile.upperBounds());
+ assertNullRecordCount(dataFile);
}
}
+
@Test
- public void testReadEntriesWithSelectCertainStatNotIncludeFullStats() throws
IOException {
+ public void testReadEntriesWithSelectCertainStatNotProjectStats() throws
IOException {
ManifestFile manifest = writeManifest(1000L, FILE);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest,
FILE_IO)
- .select(ImmutableSet.of("record_count", "value_counts"))) {
+ .select(ImmutableList.of("file_path", "value_counts"))) {
DataFile dataFile = reader.iterator().next();
- Assert.assertEquals(3, dataFile.recordCount());
- Assert.assertNull(dataFile.columnSizes());
+ // selected fields are populated
Assert.assertEquals(VALUE_COUNT, dataFile.valueCounts());
+ Assert.assertEquals(FILE_PATH, dataFile.path());
+
+ // not selected fields are all null and not projected
+ Assert.assertNull(dataFile.columnSizes());
Assert.assertNull(dataFile.nullValueCounts());
+ Assert.assertNull(dataFile.nanValueCounts());
Assert.assertNull(dataFile.lowerBounds());
Assert.assertNull(dataFile.upperBounds());
- Assert.assertNull(dataFile.nanValueCounts());
Review comment:
nit: to minimize changes, no need to move line for this. Same comment
for L206
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]