Github user kumarvishal09 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/3029#discussion_r244714375
--- Diff:
processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
---
@@ -400,24 +417,53 @@ private static int
getDimensionDefaultCardinality(CarbonDimension dimension) {
* @param tableLastUpdatedTime
* @return
*/
- public static boolean checkIfAnyRestructuredBlockExists(Map<String,
TaskBlockInfo> segmentMapping,
- Map<String, List<DataFileFooter>> dataFileMetadataSegMapping, long
tableLastUpdatedTime) {
- boolean restructuredBlockExists = false;
- for (Map.Entry<String, TaskBlockInfo> taskMap :
segmentMapping.entrySet()) {
- String segmentId = taskMap.getKey();
+ public static boolean checkIfAnyRestructuredBlockExists(
+ Map<String, TaskBlockInfo> segmentMapping,
+ Map<String, List<DataFileFooter>> dataFileMetadataSegMapping,
+ long tableLastUpdatedTime) {
+
+ for (Map.Entry<String, TaskBlockInfo> segmentEntry :
segmentMapping.entrySet()) {
+ String segmentId = segmentEntry.getKey();
List<DataFileFooter> listMetadata =
dataFileMetadataSegMapping.get(segmentId);
- for (DataFileFooter dataFileFooter : listMetadata) {
- // if schema modified timestamp is greater than footer stored
schema timestamp,
- // it indicates it is a restructured block
- if (tableLastUpdatedTime >
dataFileFooter.getSchemaUpdatedTimeStamp()) {
- restructuredBlockExists = true;
- break;
- }
+
+ if (isRestructured(listMetadata, tableLastUpdatedTime)) {
+ return true;
}
- if (restructuredBlockExists) {
- break;
+ }
+
+ return false;
+ }
+
+ public static boolean isRestructured(List<DataFileFooter> listMetadata,
+ long tableLastUpdatedTime) {
+ /*
+ * TODO: only in case of add and drop this variable should be true
+ */
+ for (DataFileFooter dataFileFooter : listMetadata) {
+ // if schema modified timestamp is greater than footer stored schema
timestamp,
+ // it indicates it is a restructured block
+ if (tableLastUpdatedTime >
dataFileFooter.getSchemaUpdatedTimeStamp()) {
+ return true;
}
}
- return restructuredBlockExists;
+ return false;
}
+
+ public static boolean isSorted(TaskBlockInfo taskBlockInfo) throws
IOException {
+ String filePath =
+
taskBlockInfo.getAllTableBlockInfoList().iterator().next().get(0).getFilePath();
+ long fileSize =
+ FileFactory.getCarbonFile(filePath,
FileFactory.getFileType(filePath)).getSize();
+
+ FileReader fileReader =
FileFactory.getFileHolder(FileFactory.getFileType(filePath));
+ ByteBuffer buffer =
+
fileReader.readByteBuffer(FileFactory.getUpdatedFilePath(filePath), fileSize -
8, 8);
+ fileReader.finish();
+
+ CarbonFooterReaderV3 footerReader = new CarbonFooterReaderV3(filePath,
buffer.getLong());
+ FileFooter3 footer = footerReader.readFooterVersion3();
+
+ return footer.isIs_sort();
--- End diff --
Now during compaction it will read file footer twice which will impact the
compaction perfornace ....so for this expose get/set method in TableBlockInfo
which will store filefooter....and in AbstractQueryExecutor.java add a check
if filefooter is present then no need to read again
please find the code reference
org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java:217
---