This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 57309d70d08675c31975d2a60692835e7a6c22cf
Author: Manhua <kevin...@qq.com>
AuthorDate: Wed Jul 17 17:39:29 2019 +0800

    [CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli
    
    When update last column chunk data size, current code use 
columnDataSize.add(fileSizeInBytes - footerSizeInBytes - previousChunkOffset) 
for every blocklet. This leads to wrong result for calculting the data size of 
the last column, especially when a carbon data file has multiple blocklet.
    
    In this PR, we fix this problem and modify the calcultion by remarking the 
end offset of blocklet.
    
    This closes #3330
---
 .../java/org/apache/carbondata/tool/DataFile.java  | 32 +++++++++++-----------
 .../org/apache/carbondata/tool/CarbonCliTest.java  |  6 ++--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java 
b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
index e553a78..4ed3945 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
@@ -121,16 +121,21 @@ class DataFile {
     this.partNo = CarbonTablePath.DataFileUtil.getPartNo(fileName);
 
     // calculate blocklet size and column size
-    // first calculate the header size, it equals the offset of first
-    // column chunk in first blocklet
-    long headerSizeInBytes = 
footer.blocklet_info_list3.get(0).column_data_chunks_offsets.get(0);
-    long previousOffset = headerSizeInBytes;
-    for (BlockletInfo3 blockletInfo3 : footer.blocklet_info_list3) {
+    for (int j = 0; j < footer.getBlocklet_info_list3().size(); j++) {
+      // remark start and end offset of current blocklet for computing 
blocklet size
+      // and chunk data size of the last column
+      BlockletInfo3 blockletInfo3 = footer.blocklet_info_list3.get(j);
+      long blockletEndOffset;
+      if (j != footer.getBlocklet_info_list3().size() - 1) {
+        // use start offset of next blocklet as end offset of current blocklet
+        blockletEndOffset = footer.blocklet_info_list3.get(j + 
1).column_data_chunks_offsets.get(j);
+      } else {
+        // use start offset of footer as end offset of current blocklet if it 
is the last blocklet
+        blockletEndOffset = fileSizeInBytes - footerSizeInBytes;
+      }
       // calculate blocklet size in bytes
-      long blockletOffset = blockletInfo3.column_data_chunks_offsets.get(0);
-      blockletSizeInBytes.add(blockletOffset - previousOffset);
-      previousOffset = blockletOffset;
-
+      this.blockletSizeInBytes.add(
+              blockletEndOffset - 
blockletInfo3.column_data_chunks_offsets.get(0));
       // calculate column size in bytes for each column
       LinkedList<Long> columnDataSize = new LinkedList<>();
       LinkedList<Long> columnMetaSize = new LinkedList<>();
@@ -140,17 +145,12 @@ class DataFile {
         
columnMetaSize.add(blockletInfo3.column_data_chunks_length.get(i).longValue());
         previousChunkOffset = blockletInfo3.column_data_chunks_offsets.get(i);
       }
-      // last column chunk data size
-      columnDataSize.add(fileSizeInBytes - footerSizeInBytes - 
previousChunkOffset);
+      // update chunk data size of the last column
+      columnDataSize.add(blockletEndOffset - previousChunkOffset);
       columnDataSize.removeFirst();
       this.columnDataSizeInBytes.add(columnDataSize);
       this.columnMetaSizeInBytes.add(columnMetaSize);
-
     }
-    // last blocklet size
-    blockletSizeInBytes.add(
-        fileSizeInBytes - footerSizeInBytes - headerSizeInBytes - 
previousOffset);
-    this.blockletSizeInBytes.removeFirst();
 
     assert (blockletSizeInBytes.size() == getNumBlocklets());
   }
diff --git 
a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java 
b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index af8d51d..4d89777 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -234,11 +234,11 @@ public class CarbonCliTest {
 
     expectedOutput = buildLines(
         "BLK  BLKLT  Meta Size  Data Size  LocalDict  DictEntries  DictSize  
AvgPageSize  Min%  Max%   Min  Max      " ,
-        "0    0      3.36KB     5.14MB     false      0            0.0B      
93.76KB      0.0   100.0  0    2999990  " ,
+        "0    0      3.36KB     2.57MB     false      0            0.0B      
93.76KB      0.0   100.0  0    2999990  " ,
         "0    1      3.36KB     2.57MB     false      0            0.0B      
93.76KB      0.0   100.0  1    2999992  " ,
-        "1    0      3.36KB     5.14MB     false      0            0.0B      
93.76KB      0.0   100.0  3    2999994  " ,
+        "1    0      3.36KB     2.57MB     false      0            0.0B      
93.76KB      0.0   100.0  3    2999994  " ,
         "1    1      3.36KB     2.57MB     false      0            0.0B      
93.76KB      0.0   100.0  5    2999996  " ,
-        "2    0      3.36KB     4.06MB     false      0            0.0B      
93.76KB      0.0   100.0  7    2999998  " ,
+        "2    0      3.36KB     2.57MB     false      0            0.0B      
93.76KB      0.0   100.0  7    2999998  " ,
         "2    1      2.04KB     1.49MB     false      0            0.0B      
89.62KB      0.0   100.0  9    2999999  ");
     Assert.assertTrue(output.contains(expectedOutput));
     Assert.assertTrue(output.contains("## version Details"));

Reply via email to