morningman commented on code in PR #17706:
URL: https://github.com/apache/doris/pull/17706#discussion_r1134233541


##########
fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplitter.java:
##########
@@ -152,4 +161,31 @@ public int getTotalPartitionNum() {
     public int getReadPartitionNum() {
         return readPartitionNum;
     }
+
+    // Get splits by using FileSystem API, the splits are blocks in HDFS or S3 
like storage system.
+    public static InputSplit[] getHiveSplits(Path path, InputFormat<?, ?> 
inputFormat,
+                                             JobConf jobConf) throws 
IOException {
+        FileSystem fs = path.getFileSystem(jobConf);
+        boolean splittable = HiveUtil.isSplittable(inputFormat, fs, path);
+        List<InputSplit> splits = Lists.newArrayList();
+        RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = 
fs.listFiles(path, true);
+        if (!locatedFileStatusRemoteIterator.hasNext()) {
+            LOG.debug("File status for path {} is empty.", path);
+            return new InputSplit[0];
+        }
+        if (!splittable) {
+            LOG.debug("Path {} is not splittable.", path);
+            LocatedFileStatus status = locatedFileStatusRemoteIterator.next();
+            splits.add(new FileSplit(status.getPath(), 0, status.getLen(), 
status.getBlockLocations()[0].getHosts()));
+            return splits.toArray(new InputSplit[1]);
+        }
+        while (locatedFileStatusRemoteIterator.hasNext()) {
+            LocatedFileStatus status = locatedFileStatusRemoteIterator.next();
+            for (BlockLocation block : status.getBlockLocations()) {
+                splits.add(new FileSplit(status.getPath(), block.getOffset(), 
block.getLength(), block.getHosts()));

Review Comment:
   This need to be cached



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to