Repository: carbondata Updated Branches: refs/heads/master 077dd58e3 -> 05033f71e
[CARBONDATA-2935] Write is_sorter in footer for compaction carbondata.thrift is modified to add is_sorted in footer to indicate whether the file is sorted, which will help compaction to decide whether to use merge sort or not This closes #2720 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/05033f71 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/05033f71 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/05033f71 Branch: refs/heads/master Commit: 05033f71e028d708f1a7d23b0d01775134be3078 Parents: 077dd58 Author: Jacky Li <[email protected]> Authored: Fri Sep 14 18:28:25 2018 +0800 Committer: ravipesala <[email protected]> Committed: Tue Sep 18 22:13:12 2018 +0530 ---------------------------------------------------------------------- format/src/main/thrift/carbondata.thrift | 1 + .../sql/CarbonGetTableDetailComandTestCase.scala | 6 +++--- .../store/CarbonFactDataHandlerColumnar.java | 2 +- .../store/writer/AbstractFactDataWriter.java | 4 ++-- .../store/writer/CarbonFactDataWriter.java | 2 +- .../writer/v3/CarbonFactDataWriterImplV3.java | 17 +++++++++++++---- 6 files changed, 21 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/format/src/main/thrift/carbondata.thrift ---------------------------------------------------------------------- diff --git a/format/src/main/thrift/carbondata.thrift b/format/src/main/thrift/carbondata.thrift index 7130066..ec51ff7 100644 --- a/format/src/main/thrift/carbondata.thrift +++ b/format/src/main/thrift/carbondata.thrift @@ -205,6 +205,7 @@ struct FileFooter3{ 3: required list<BlockletIndex> blocklet_index_list; // Blocklet index of all blocklets in this file 4: optional list<BlockletInfo3> blocklet_info_list3; // Information about blocklets of all columns in this file for V3 format 5: optional dictionary.ColumnDictionaryChunk dictionary; // Blocklet local dictionary + 6: optional bool is_sort; // True if the data is sorted in this file, it is used for compaction to decide whether to use merge sort or not } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala index fcb6110..2669417 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala @@ -42,10 +42,10 @@ class CarbonGetTableDetailCommandTestCase extends QueryTest with BeforeAndAfterA assertResult(2)(result.length) assertResult("table_info1")(result(0).getString(0)) - // 2220 is the size of carbon table. Note that since 1.5.0, we add additional compressor name in metadata - assertResult(2220)(result(0).getLong(1)) + // 2221 is the size of carbon table. Note that since 1.5.0, we add additional compressor name in metadata + assertResult(2221)(result(0).getLong(1)) assertResult("table_info2")(result(1).getString(0)) - assertResult(2220)(result(1).getLong(1)) + assertResult(2221)(result(1).getLong(1)) } override def afterAll: Unit = { http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java index c23b071..cf51941 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java @@ -374,7 +374,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { } consumerExecutorService.shutdownNow(); processWriteTaskSubmitList(consumerExecutorServiceTaskList); - this.dataWriter.writeFooterToFile(); + this.dataWriter.writeFooter(); LOGGER.info("All blocklets have been finished writing"); // close all the open stream for both the files this.dataWriter.closeWriter(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index 836e2c8..ad0e8e0 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -229,7 +229,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { LOGGER.info("Writing data to file as max file size reached for file: " + activeFile + ". Data block size: " + currentFileSize); // write meta data to end of the existing file - writeBlockletInfoToFile(); + writeFooterToFile(); this.currentFileSize = 0; this.dataChunksOffsets = new ArrayList<>(); this.dataChunksLength = new ArrayList<>(); @@ -324,7 +324,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { /** * This method will write metadata at the end of file file format in thrift format */ - protected abstract void writeBlockletInfoToFile() throws CarbonDataWriterException; + protected abstract void writeFooterToFile() throws CarbonDataWriterException; /** * Below method will be used to fill the vlock info details http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java index 3a2fa1c..e9d413d 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java @@ -35,7 +35,7 @@ public interface CarbonFactDataWriter { * * @throws CarbonDataWriterException */ - void writeFooterToFile() throws CarbonDataWriterException; + void writeFooter() throws CarbonDataWriterException; /** * Below method will be used to initialise the writer http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java index 25204cb..e3cb052 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java @@ -45,6 +45,7 @@ import org.apache.carbondata.processing.store.writer.AbstractFactDataWriter; import static org.apache.carbondata.core.constants.CarbonCommonConstants.TABLE_BLOCKLET_SIZE; import static org.apache.carbondata.core.constants.CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB; import static org.apache.carbondata.core.constants.CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB_DEFAULT_VALUE; +import static org.apache.carbondata.processing.loading.sort.SortScopeOptions.SortScope.NO_SORT; /** * Below class will be used to write the data in V3 format @@ -68,6 +69,11 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter { */ private long blockletSizeThreshold; + /** + * True if this file is sorted + */ + private boolean isSorted; + public CarbonFactDataWriterImplV3(CarbonFactDataHandlerModel model) { super(model); String blockletSize = @@ -83,10 +89,11 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter { LOGGER.info("Blocklet size configure for table is: " + blockletSizeThreshold); } blockletDataHolder = new BlockletDataHolder(fallbackExecutorService, model); + isSorted = model.getSortScope() != NO_SORT; } - @Override protected void writeBlockletInfoToFile() - throws CarbonDataWriterException { + @Override + protected void writeFooterToFile() throws CarbonDataWriterException { try { // get the current file position long currentPosition = currentOffsetInFile; @@ -94,6 +101,7 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter { FileFooter3 convertFileMeta = CarbonMetadataUtil .convertFileFooterVersion3(blockletMetadata, blockletIndex, localCardinality, thriftColumnSchemaList.size()); + convertFileMeta.setIs_sort(isSorted); // fill the carbon index details fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), carbonDataFileName, currentPosition); // write the footer @@ -377,9 +385,10 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter { } } - @Override public void writeFooterToFile() throws CarbonDataWriterException { + @Override + public void writeFooter() throws CarbonDataWriterException { if (this.blockletMetadata.size() > 0) { - writeBlockletInfoToFile(); + writeFooterToFile(); } } }
