This is an automated email from the ASF dual-hosted git repository.

ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 297b18f  [CARBONDATA-4084] Fixed data corruption issue after fallback 
of Local dictionary
297b18f is described below

commit 297b18f68d0a811bf612187ab94efe15211d2679
Author: kumarvishal09 <[email protected]>
AuthorDate: Fri Dec 18 15:48:58 2020 +0530

    [CARBONDATA-4084] Fixed data corruption issue after fallback of Local 
dictionary
    
    Why is this PR needed?
    Data is getting corrupted when fallback happens for local dictionary 
encoded page.
    
    What changes were proposed in this PR?
    In LVByteBufferColumnPage Length getting added inside put method. In case 
of fallback when reverse lookup is done and encoded page is replaced with 
actual value[Dictionary values]. As actual Dictionary value is already present 
in LV format again one more length is getting added[LLV], so during query it's 
showing extra character for those columns
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    No
    
    This closes #4058
---
 .../datastore/page/DecoderBasedFallbackEncoder.java  | 20 ++++++++++++++++++--
 .../generator/ColumnLocalDictionaryGenerator.java    | 11 +++++++++++
 .../generator/LocalDictionaryGenerator.java          |  6 ++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
index 30d07f7..abb4160 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
@@ -114,8 +114,24 @@ public class DecoderBasedFallbackEncoder implements 
Callable<FallbackEncodedColu
     for (int i = 0; i < pageSize; i++) {
       int index = reverseInvertedIndex[i] * 3;
       int keyArray = (int) keyGenerator.getKeyArray(bytes, index)[0];
-      actualDataColumnPage
-          .putBytes(rowId++, 
localDictionaryGenerator.getDictionaryKeyBasedOnValue(keyArray));
+      if (actualDataColumnPage instanceof LVByteBufferColumnPage) {
+        //TODO It's a quick fix, we can fallback to old logic adding Length 
outside of
+        // LVByteBufferColumnPage. Update the page data with Length before 
adding to column page.
+        // so it won't be added again when fallback scenario occurs.
+        // Though this is not a bottleneck as after fallback only encoded page 
will be decoded
+        // and System.array copy is very fast. So for few pages it won't 
impact much
+        byte[] dictionaryKeyBasedOnValue =
+            localDictionaryGenerator.getDictionaryKeyBasedOnValue(keyArray);
+        byte[] output =
+            new byte[dictionaryKeyBasedOnValue.length - 
localDictionaryGenerator.getLVLength()];
+        System
+            .arraycopy(dictionaryKeyBasedOnValue, 
localDictionaryGenerator.getLVLength(), output, 0,
+                output.length);
+        actualDataColumnPage.putBytes(rowId++, output);
+      } else {
+        actualDataColumnPage
+            .putBytes(rowId++, 
localDictionaryGenerator.getDictionaryKeyBasedOnValue(keyArray));
+      }
     }
 
     // get column spec for existing column page
diff --git 
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
 
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
index d7df88f..de5f84b 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
@@ -34,10 +34,16 @@ public class ColumnLocalDictionaryGenerator implements 
LocalDictionaryGenerator
    */
   private DictionaryStore dictionaryHolder;
 
+  /**
+   * number of bytes used for storing the length of the dictionary value
+   */
+  private int lvLength;
+
   public ColumnLocalDictionaryGenerator(int threshold, int lvLength) {
     // adding 1 to threshold for null value
     int newThreshold = threshold + 1;
     this.dictionaryHolder = new MapBasedDictionaryStore(newThreshold);
+    this.lvLength = lvLength;
     ByteBuffer byteBuffer = ByteBuffer.allocate(
         lvLength + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length);
 
@@ -86,4 +92,9 @@ public class ColumnLocalDictionaryGenerator implements 
LocalDictionaryGenerator
   public byte[] getDictionaryKeyBasedOnValue(int value) {
     return this.dictionaryHolder.getDictionaryKeyBasedOnValue(value);
   }
+
+  @Override
+  public int getLVLength() {
+    return this.lvLength;
+  }
 }
diff --git 
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
 
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
index 35a9f65..05a9473 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
@@ -46,4 +46,10 @@ public interface LocalDictionaryGenerator {
    * @return dictionary key based on value
    */
   byte[] getDictionaryKeyBasedOnValue(int value);
+
+  /**
+   * @return number of bytes used for storing the length of the dictionary 
value
+   * for String type 2 Bytes and Long String 4 bytes
+   */
+  int getLVLength();
 }

Reply via email to