rdblue commented on a change in pull request #3400:
URL: https://github.com/apache/iceberg/pull/3400#discussion_r744289709



##########
File path: 
spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/source/SparkBatchQueryScan.java
##########
@@ -74,60 +101,176 @@
       throw new IllegalArgumentException("Cannot only specify option 
end-snapshot-id to do incremental scan");
     }
 
-    // look for split behavior overrides in options
-    this.splitSize = Spark3Util.propertyAsLong(options, 
SparkReadOptions.SPLIT_SIZE, null);
-    this.splitLookback = Spark3Util.propertyAsInt(options, 
SparkReadOptions.LOOKBACK, null);
-    this.splitOpenFileCost = Spark3Util.propertyAsLong(options, 
SparkReadOptions.FILE_OPEN_COST, null);
+    this.splitSize = readConf.splitSizeOption();
+    this.splitLookback = readConf.splitLookbackOption();
+    this.splitOpenFileCost = readConf.splitOpenFileCostOption();
+    this.runtimeFilterExpressions = Lists.newArrayList();
   }
 
-  @Override
-  protected List<CombinedScanTask> tasks() {
-    if (tasks == null) {
-      TableScan scan = table()
-          .newScan()
-          .caseSensitive(caseSensitive())
-          .project(expectedSchema());
+  private TableScan scan() {
+    if (scan == null) {
+      this.scan = buildScan();
+    }
 
-      if (snapshotId != null) {
-        scan = scan.useSnapshot(snapshotId);
-      }
+    return scan;
+  }
 
-      if (asOfTimestamp != null) {
-        scan = scan.asOfTime(asOfTimestamp);
+  private TableScan buildScan() {
+    TableScan tableScan = table()
+        .newScan()
+        .caseSensitive(caseSensitive())
+        .project(expectedSchema());
+
+    if (snapshotId != null) {
+      tableScan = tableScan.useSnapshot(snapshotId);
+    }
+
+    if (asOfTimestamp != null) {
+      tableScan = tableScan.asOfTime(asOfTimestamp);
+    }
+
+    if (startSnapshotId != null) {
+      if (endSnapshotId != null) {
+        tableScan = tableScan.appendsBetween(startSnapshotId, endSnapshotId);
+      } else {
+        tableScan = tableScan.appendsAfter(startSnapshotId);
       }
+    }
 
-      if (startSnapshotId != null) {
-        if (endSnapshotId != null) {
-          scan = scan.appendsBetween(startSnapshotId, endSnapshotId);
-        } else {
-          scan = scan.appendsAfter(startSnapshotId);
-        }
+    if (splitSize != null) {
+      tableScan = tableScan.option(TableProperties.SPLIT_SIZE, 
String.valueOf(splitSize));
+    }
+
+    if (splitLookback != null) {
+      tableScan = tableScan.option(TableProperties.SPLIT_LOOKBACK, 
String.valueOf(splitLookback));
+    }
+
+    if (splitOpenFileCost != null) {
+      tableScan = tableScan.option(TableProperties.SPLIT_OPEN_FILE_COST, 
String.valueOf(splitOpenFileCost));
+    }
+
+    for (Expression filter : filterExpressions()) {
+      tableScan = tableScan.filter(filter);
+    }
+
+    return tableScan;
+  }
+
+  private Set<Integer> specIds() {
+    if (specIds == null) {
+      Set<Integer> specIdSet = Sets.newHashSet();
+      for (FileScanTask file : files()) {
+        specIdSet.add(file.spec().specId());
       }
+      this.specIds = specIdSet;
+    }
 
-      if (splitSize != null) {
-        scan = scan.option(TableProperties.SPLIT_SIZE, splitSize.toString());
+    return specIds;
+  }
+
+  private List<FileScanTask> files() {
+    if (files == null) {
+      try (CloseableIterable<FileScanTask> filesIterable = scan().planFiles()) 
{
+        this.files = Lists.newArrayList(filesIterable);
+      } catch (IOException e) {
+        throw new UncheckedIOException("Failed to close table scan: " + scan, 
e);
       }
+    }
 
-      if (splitLookback != null) {
-        scan = scan.option(TableProperties.SPLIT_LOOKBACK, 
splitLookback.toString());
+    return files;
+  }
+
+  @Override
+  protected List<CombinedScanTask> tasks() {
+    if (tasks == null) {
+      CloseableIterable<FileScanTask> splitFiles = TableScanUtil.splitFiles(
+          CloseableIterable.withNoopClose(files()),
+          scan().targetSplitSize());
+      CloseableIterable<CombinedScanTask> scanTasks = TableScanUtil.planTasks(
+          splitFiles, scan().targetSplitSize(),
+          scan().splitLookback(), scan().splitOpenFileCost());

Review comment:
       Why access the private instance variable through a method call?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to