Repository: carbondata
Updated Branches:
  refs/heads/master 077dd58e3 -> 05033f71e


[CARBONDATA-2935] Write is_sorter in footer for compaction

carbondata.thrift is modified to add is_sorted in footer to indicate whether 
the file is sorted, which will help compaction to decide whether to use merge 
sort or not

This closes #2720


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/05033f71
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/05033f71
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/05033f71

Branch: refs/heads/master
Commit: 05033f71e028d708f1a7d23b0d01775134be3078
Parents: 077dd58
Author: Jacky Li <[email protected]>
Authored: Fri Sep 14 18:28:25 2018 +0800
Committer: ravipesala <[email protected]>
Committed: Tue Sep 18 22:13:12 2018 +0530

----------------------------------------------------------------------
 format/src/main/thrift/carbondata.thrift           |  1 +
 .../sql/CarbonGetTableDetailComandTestCase.scala   |  6 +++---
 .../store/CarbonFactDataHandlerColumnar.java       |  2 +-
 .../store/writer/AbstractFactDataWriter.java       |  4 ++--
 .../store/writer/CarbonFactDataWriter.java         |  2 +-
 .../writer/v3/CarbonFactDataWriterImplV3.java      | 17 +++++++++++++----
 6 files changed, 21 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/format/src/main/thrift/carbondata.thrift
----------------------------------------------------------------------
diff --git a/format/src/main/thrift/carbondata.thrift 
b/format/src/main/thrift/carbondata.thrift
index 7130066..ec51ff7 100644
--- a/format/src/main/thrift/carbondata.thrift
+++ b/format/src/main/thrift/carbondata.thrift
@@ -205,6 +205,7 @@ struct FileFooter3{
     3: required list<BlockletIndex> blocklet_index_list;       // Blocklet 
index of all blocklets in this file
     4: optional list<BlockletInfo3> blocklet_info_list3;       // Information 
about blocklets of all columns in this file for V3 format
     5: optional dictionary.ColumnDictionaryChunk dictionary; // Blocklet local 
dictionary
+    6: optional bool is_sort; // True if the data is sorted in this file, it 
is used for compaction to decide whether to use merge sort or not
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala
 
b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala
index fcb6110..2669417 100644
--- 
a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala
@@ -42,10 +42,10 @@ class CarbonGetTableDetailCommandTestCase extends QueryTest 
with BeforeAndAfterA
 
     assertResult(2)(result.length)
     assertResult("table_info1")(result(0).getString(0))
-    // 2220 is the size of carbon table. Note that since 1.5.0, we add 
additional compressor name in metadata
-    assertResult(2220)(result(0).getLong(1))
+    // 2221 is the size of carbon table. Note that since 1.5.0, we add 
additional compressor name in metadata
+    assertResult(2221)(result(0).getLong(1))
     assertResult("table_info2")(result(1).getString(0))
-    assertResult(2220)(result(1).getLong(1))
+    assertResult(2221)(result(1).getLong(1))
   }
 
   override def afterAll: Unit = {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index c23b071..cf51941 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -374,7 +374,7 @@ public class CarbonFactDataHandlerColumnar implements 
CarbonFactHandler {
       }
       consumerExecutorService.shutdownNow();
       processWriteTaskSubmitList(consumerExecutorServiceTaskList);
-      this.dataWriter.writeFooterToFile();
+      this.dataWriter.writeFooter();
       LOGGER.info("All blocklets have been finished writing");
       // close all the open stream for both the files
       this.dataWriter.closeWriter();

http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
index 836e2c8..ad0e8e0 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
@@ -229,7 +229,7 @@ public abstract class AbstractFactDataWriter implements 
CarbonFactDataWriter {
       LOGGER.info("Writing data to file as max file size reached for file: "
           + activeFile + ". Data block size: " + currentFileSize);
       // write meta data to end of the existing file
-      writeBlockletInfoToFile();
+      writeFooterToFile();
       this.currentFileSize = 0;
       this.dataChunksOffsets = new ArrayList<>();
       this.dataChunksLength = new ArrayList<>();
@@ -324,7 +324,7 @@ public abstract class AbstractFactDataWriter implements 
CarbonFactDataWriter {
   /**
    * This method will write metadata at the end of file file format in thrift 
format
    */
-  protected abstract void writeBlockletInfoToFile() throws 
CarbonDataWriterException;
+  protected abstract void writeFooterToFile() throws CarbonDataWriterException;
 
   /**
    * Below method will be used to fill the vlock info details

http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java
index 3a2fa1c..e9d413d 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java
@@ -35,7 +35,7 @@ public interface CarbonFactDataWriter {
    *
    * @throws CarbonDataWriterException
    */
-  void writeFooterToFile() throws CarbonDataWriterException;
+  void writeFooter() throws CarbonDataWriterException;
 
   /**
    * Below method will be used to initialise the writer

http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
----------------------------------------------------------------------
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
index 25204cb..e3cb052 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java
@@ -45,6 +45,7 @@ import 
org.apache.carbondata.processing.store.writer.AbstractFactDataWriter;
 import static 
org.apache.carbondata.core.constants.CarbonCommonConstants.TABLE_BLOCKLET_SIZE;
 import static 
org.apache.carbondata.core.constants.CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB;
 import static 
org.apache.carbondata.core.constants.CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB_DEFAULT_VALUE;
+import static 
org.apache.carbondata.processing.loading.sort.SortScopeOptions.SortScope.NO_SORT;
 
 /**
  * Below class will be used to write the data in V3 format
@@ -68,6 +69,11 @@ public class CarbonFactDataWriterImplV3 extends 
AbstractFactDataWriter {
    */
   private long blockletSizeThreshold;
 
+  /**
+   * True if this file is sorted
+   */
+  private boolean isSorted;
+
   public CarbonFactDataWriterImplV3(CarbonFactDataHandlerModel model) {
     super(model);
     String blockletSize =
@@ -83,10 +89,11 @@ public class CarbonFactDataWriterImplV3 extends 
AbstractFactDataWriter {
       LOGGER.info("Blocklet size configure for table is: " + 
blockletSizeThreshold);
     }
     blockletDataHolder = new BlockletDataHolder(fallbackExecutorService, 
model);
+    isSorted = model.getSortScope() != NO_SORT;
   }
 
-  @Override protected void writeBlockletInfoToFile()
-      throws CarbonDataWriterException {
+  @Override
+  protected void writeFooterToFile() throws CarbonDataWriterException {
     try {
       // get the current file position
       long currentPosition = currentOffsetInFile;
@@ -94,6 +101,7 @@ public class CarbonFactDataWriterImplV3 extends 
AbstractFactDataWriter {
       FileFooter3 convertFileMeta = CarbonMetadataUtil
           .convertFileFooterVersion3(blockletMetadata, blockletIndex, 
localCardinality,
               thriftColumnSchemaList.size());
+      convertFileMeta.setIs_sort(isSorted);
       // fill the carbon index details
       fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), 
carbonDataFileName, currentPosition);
       // write the footer
@@ -377,9 +385,10 @@ public class CarbonFactDataWriterImplV3 extends 
AbstractFactDataWriter {
     }
   }
 
-  @Override public void writeFooterToFile() throws CarbonDataWriterException {
+  @Override
+  public void writeFooter() throws CarbonDataWriterException {
     if (this.blockletMetadata.size() > 0) {
-      writeBlockletInfoToFile();
+      writeFooterToFile();
     }
   }
 }

Reply via email to