This is an automated email from the ASF dual-hosted git repository.
ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 297b18f [CARBONDATA-4084] Fixed data corruption issue after fallback
of Local dictionary
297b18f is described below
commit 297b18f68d0a811bf612187ab94efe15211d2679
Author: kumarvishal09 <[email protected]>
AuthorDate: Fri Dec 18 15:48:58 2020 +0530
[CARBONDATA-4084] Fixed data corruption issue after fallback of Local
dictionary
Why is this PR needed?
Data is getting corrupted when fallback happens for local dictionary
encoded page.
What changes were proposed in this PR?
In LVByteBufferColumnPage Length getting added inside put method. In case
of fallback when reverse lookup is done and encoded page is replaced with
actual value[Dictionary values]. As actual Dictionary value is already present
in LV format again one more length is getting added[LLV], so during query it's
showing extra character for those columns
Does this PR introduce any user interface change?
No
Is any new testcase added?
No
This closes #4058
---
.../datastore/page/DecoderBasedFallbackEncoder.java | 20 ++++++++++++++++++--
.../generator/ColumnLocalDictionaryGenerator.java | 11 +++++++++++
.../generator/LocalDictionaryGenerator.java | 6 ++++++
3 files changed, 35 insertions(+), 2 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
index 30d07f7..abb4160 100644
---
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
+++
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
@@ -114,8 +114,24 @@ public class DecoderBasedFallbackEncoder implements
Callable<FallbackEncodedColu
for (int i = 0; i < pageSize; i++) {
int index = reverseInvertedIndex[i] * 3;
int keyArray = (int) keyGenerator.getKeyArray(bytes, index)[0];
- actualDataColumnPage
- .putBytes(rowId++,
localDictionaryGenerator.getDictionaryKeyBasedOnValue(keyArray));
+ if (actualDataColumnPage instanceof LVByteBufferColumnPage) {
+ //TODO It's a quick fix, we can fallback to old logic adding Length
outside of
+ // LVByteBufferColumnPage. Update the page data with Length before
adding to column page.
+ // so it won't be added again when fallback scenario occurs.
+ // Though this is not a bottleneck as after fallback only encoded page
will be decoded
+ // and System.array copy is very fast. So for few pages it won't
impact much
+ byte[] dictionaryKeyBasedOnValue =
+ localDictionaryGenerator.getDictionaryKeyBasedOnValue(keyArray);
+ byte[] output =
+ new byte[dictionaryKeyBasedOnValue.length -
localDictionaryGenerator.getLVLength()];
+ System
+ .arraycopy(dictionaryKeyBasedOnValue,
localDictionaryGenerator.getLVLength(), output, 0,
+ output.length);
+ actualDataColumnPage.putBytes(rowId++, output);
+ } else {
+ actualDataColumnPage
+ .putBytes(rowId++,
localDictionaryGenerator.getDictionaryKeyBasedOnValue(keyArray));
+ }
}
// get column spec for existing column page
diff --git
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
index d7df88f..de5f84b 100644
---
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
+++
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/ColumnLocalDictionaryGenerator.java
@@ -34,10 +34,16 @@ public class ColumnLocalDictionaryGenerator implements
LocalDictionaryGenerator
*/
private DictionaryStore dictionaryHolder;
+ /**
+ * number of bytes used for storing the length of the dictionary value
+ */
+ private int lvLength;
+
public ColumnLocalDictionaryGenerator(int threshold, int lvLength) {
// adding 1 to threshold for null value
int newThreshold = threshold + 1;
this.dictionaryHolder = new MapBasedDictionaryStore(newThreshold);
+ this.lvLength = lvLength;
ByteBuffer byteBuffer = ByteBuffer.allocate(
lvLength + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length);
@@ -86,4 +92,9 @@ public class ColumnLocalDictionaryGenerator implements
LocalDictionaryGenerator
public byte[] getDictionaryKeyBasedOnValue(int value) {
return this.dictionaryHolder.getDictionaryKeyBasedOnValue(value);
}
+
+ @Override
+ public int getLVLength() {
+ return this.lvLength;
+ }
}
diff --git
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
index 35a9f65..05a9473 100644
---
a/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
+++
b/core/src/main/java/org/apache/carbondata/core/localdictionary/generator/LocalDictionaryGenerator.java
@@ -46,4 +46,10 @@ public interface LocalDictionaryGenerator {
* @return dictionary key based on value
*/
byte[] getDictionaryKeyBasedOnValue(int value);
+
+ /**
+ * @return number of bytes used for storing the length of the dictionary
value
+ * for String type 2 Bytes and Long String 4 bytes
+ */
+ int getLVLength();
}