deniskuzZ commented on code in PR #6029: URL: https://github.com/apache/hive/pull/6029#discussion_r2279500498
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java: ########## @@ -78,33 +70,14 @@ public String[] getLocations() { // getLocations() won't be accurate when called on worker nodes and will always return "*" if (locations == null && conf != null) { boolean localityPreferred = conf.getBoolean(InputFormatConfig.LOCALITY, false); - locations = localityPreferred ? blockLocations(taskGroup, conf) : ANYWHERE; + locations = localityPreferred ? Util.blockLocations(taskGroup, conf) : ANYWHERE; } else { locations = ANYWHERE; } return locations; } - // We should move to Util.blockLocations once the following PR is merged and shipped - // https://github.com/apache/iceberg/pull/11053 - private static String[] blockLocations(ScanTaskGroup<FileScanTask> task, Configuration conf) { - final Set<String> locationSets = Sets.newHashSet(); - task.tasks().forEach(fileScanTask -> { - final Path path = new Path(fileScanTask.file().path().toString()); - try { - final FileSystem fs = path.getFileSystem(conf); - for (BlockLocation location : fs.getFileBlockLocations(path, fileScanTask.start(), fileScanTask.length())) { - locationSets.addAll(Arrays.asList(location.getHosts())); - } - } catch (IOException e) { - LOG.warn("Failed to get block locations for path {}", path, e); - } - }); - - return locationSets.toArray(new String[0]); - } - Review Comment: @okumin shouldn't we upgrade the iceberg version? We are on 1.9.1 ATM https://github.com/apache/hive/blob/master/iceberg/pom.xml#L29 ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java: ########## @@ -78,33 +70,14 @@ public String[] getLocations() { // getLocations() won't be accurate when called on worker nodes and will always return "*" if (locations == null && conf != null) { boolean localityPreferred = conf.getBoolean(InputFormatConfig.LOCALITY, false); - locations = localityPreferred ? blockLocations(taskGroup, conf) : ANYWHERE; + locations = localityPreferred ? Util.blockLocations(taskGroup, conf) : ANYWHERE; } else { locations = ANYWHERE; } return locations; } - // We should move to Util.blockLocations once the following PR is merged and shipped - // https://github.com/apache/iceberg/pull/11053 - private static String[] blockLocations(ScanTaskGroup<FileScanTask> task, Configuration conf) { - final Set<String> locationSets = Sets.newHashSet(); - task.tasks().forEach(fileScanTask -> { - final Path path = new Path(fileScanTask.file().path().toString()); - try { - final FileSystem fs = path.getFileSystem(conf); - for (BlockLocation location : fs.getFileBlockLocations(path, fileScanTask.start(), fileScanTask.length())) { - locationSets.addAll(Arrays.asList(location.getHosts())); - } - } catch (IOException e) { - LOG.warn("Failed to get block locations for path {}", path, e); - } - }); - - return locationSets.toArray(new String[0]); - } - Review Comment: @okumin, shouldn't we upgrade the iceberg version? We are on 1.9.1 ATM https://github.com/apache/hive/blob/master/iceberg/pom.xml#L29 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org