Github user kunal642 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2869#discussion_r230277069
--- Diff:
hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java
---
@@ -88,6 +99,50 @@ public CarbonTable getOrCreateCarbonTable(Configuration
configuration) throws IO
}
}
+ /**
+ * This method will list all the carbondata files in the table path and
treat one carbondata
+ * file as one split.
+ */
+ public List<InputSplit> getAllFileSplits(JobContext job) throws
IOException {
+ List<InputSplit> splits = new ArrayList<>();
+ CarbonTable carbonTable =
getOrCreateCarbonTable(job.getConfiguration());
+ if (null == carbonTable) {
+ throw new IOException("Missing/Corrupt schema file for table.");
+ }
+ for (CarbonFile carbonFile :
getAllCarbonDataFiles(carbonTable.getTablePath())) {
+ CarbonInputSplit split =
+ new CarbonInputSplit("null", new
Path(carbonFile.getAbsolutePath()), 0,
+ carbonFile.getLength(), carbonFile.getLocations(),
FileFormat.COLUMNAR_V3);
+ split.setVersion(ColumnarFormatVersion.V3);
+ BlockletDetailInfo info = new BlockletDetailInfo();
+ split.setDetailInfo(info);
+ info.setBlockSize(carbonFile.getLength());
+ // Read the footer offset and set.
+ FileReader reader = FileFactory
--- End diff --
moved to CarbonVectorizedRecordReader.initialize()
---