amogh-jahagirdar commented on code in PR #14660:
URL: https://github.com/apache/iceberg/pull/14660#discussion_r2556752990
##########
core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java:
##########
@@ -763,22 +774,45 @@ static void clearPlanningState() {
* @param tableScan the table scan to plan
* @param planId the unique identifier for this plan
* @param tasksPerPlanTask number of file scan tasks to group per plan task
+ * @return the initial file scan tasks and the first plan task key
*/
- private static void planFilesFor(TableScan tableScan, String planId, int
tasksPerPlanTask) {
+ private static Pair<List<FileScanTask>, String> planFilesFor(
+ TableScan tableScan, String planId, int tasksPerPlanTask) {
Iterable<List<FileScanTask>> taskGroupings =
Iterables.partition(tableScan.planFiles(), tasksPerPlanTask);
+ Iterator<List<FileScanTask>> taskIterator = taskGroupings.iterator();
+ String planTaskKeyPrefix = planId + "-" + tableScan.table().uuid() + "-";
+
+ if (!taskIterator.hasNext()) {
+ // Handle empty table scans
+ String emptyPlanTaskKey = planTaskKeyPrefix + 0;
+ IN_MEMORY_PLANNING_STATE.addPlanTask(emptyPlanTaskKey,
Collections.emptyList());
Review Comment:
Do we even need to update the planning state if it's empty? We should just
be able to return the empty pair. Then we can keep all the plan task key logic
as is.
##########
core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java:
##########
@@ -763,22 +774,45 @@ static void clearPlanningState() {
* @param tableScan the table scan to plan
* @param planId the unique identifier for this plan
* @param tasksPerPlanTask number of file scan tasks to group per plan task
+ * @return the initial file scan tasks and the first plan task key
*/
- private static void planFilesFor(TableScan tableScan, String planId, int
tasksPerPlanTask) {
+ private static Pair<List<FileScanTask>, String> planFilesFor(
+ TableScan tableScan, String planId, int tasksPerPlanTask) {
Iterable<List<FileScanTask>> taskGroupings =
Iterables.partition(tableScan.planFiles(), tasksPerPlanTask);
+ Iterator<List<FileScanTask>> taskIterator = taskGroupings.iterator();
+ String planTaskKeyPrefix = planId + "-" + tableScan.table().uuid() + "-";
+
+ if (!taskIterator.hasNext()) {
+ // Handle empty table scans
+ String emptyPlanTaskKey = planTaskKeyPrefix + 0;
+ IN_MEMORY_PLANNING_STATE.addPlanTask(emptyPlanTaskKey,
Collections.emptyList());
+ return Pair.of(Collections.emptyList(), null);
+ }
+
int planTaskSequence = 0;
String previousPlanTask = null;
- for (List<FileScanTask> taskGrouping : taskGroupings) {
- String planTaskKey =
- String.format("%s-%s-%s", planId, tableScan.table().uuid(),
planTaskSequence++);
+ String firstPlanTaskKey = null;
+ List<FileScanTask> initialFileScanTasks = null;
+
+ do {
Review Comment:
Just a preference, but I tend to prefer straightforward for loops where
possible rather than have to peek further down in a `do while` to see what
we're iterating over. I'd keep the for loop as is.
##########
core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java:
##########
@@ -763,22 +774,45 @@ static void clearPlanningState() {
* @param tableScan the table scan to plan
* @param planId the unique identifier for this plan
* @param tasksPerPlanTask number of file scan tasks to group per plan task
+ * @return the initial file scan tasks and the first plan task key
*/
- private static void planFilesFor(TableScan tableScan, String planId, int
tasksPerPlanTask) {
+ private static Pair<List<FileScanTask>, String> planFilesFor(
+ TableScan tableScan, String planId, int tasksPerPlanTask) {
Iterable<List<FileScanTask>> taskGroupings =
Iterables.partition(tableScan.planFiles(), tasksPerPlanTask);
+ Iterator<List<FileScanTask>> taskIterator = taskGroupings.iterator();
+ String planTaskKeyPrefix = planId + "-" + tableScan.table().uuid() + "-";
+
+ if (!taskIterator.hasNext()) {
+ // Handle empty table scans
+ String emptyPlanTaskKey = planTaskKeyPrefix + 0;
+ IN_MEMORY_PLANNING_STATE.addPlanTask(emptyPlanTaskKey,
Collections.emptyList());
+ return Pair.of(Collections.emptyList(), null);
+ }
+
int planTaskSequence = 0;
String previousPlanTask = null;
- for (List<FileScanTask> taskGrouping : taskGroupings) {
- String planTaskKey =
- String.format("%s-%s-%s", planId, tableScan.table().uuid(),
planTaskSequence++);
+ String firstPlanTaskKey = null;
+ List<FileScanTask> initialFileScanTasks = null;
+
+ do {
+ List<FileScanTask> taskGrouping = taskIterator.next();
+ String planTaskKey = planTaskKeyPrefix + planTaskSequence++;
IN_MEMORY_PLANNING_STATE.addPlanTask(planTaskKey, taskGrouping);
+
+ if (firstPlanTaskKey == null) {
Review Comment:
This could also be an else after the if (previousPlanTask != null), because
the case where it's the first task is the same as where there is no previous
task. Not really opinionated on this, just something I observed as another way
of doing it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]