Repository: carbondata
Updated Branches:
  refs/heads/master c94c8ce56 -> d8dfa4f21


[HOTFIX] Fix NPE in spark, when same vector reads files with local dictionary 
and without local dictionary

Problem: NPE in spark, when same vector reads files with local dictionary and 
without local dictionary

Cause: when two carbondata files are present, one with local dictionary and one 
without local dictionary. If same vector is used to read this
files [can happen if task is launched to group of files]. If local dictionary 
files are found first, dictionary is set for that vector. But
it was never reset for another file reading.

Solution: reset dictionary once batch is processed,set only for local 
dictionary batch processing.

This closes #2895


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8dfa4f2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8dfa4f2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8dfa4f2

Branch: refs/heads/master
Commit: d8dfa4f21bf3527ed3522799c2d65e143cfd787c
Parents: c94c8ce
Author: ajantha-bhat <ajanthab...@gmail.com>
Authored: Mon Nov 5 15:30:27 2018 +0530
Committer: manishgupta88 <tomanishgupt...@gmail.com>
Committed: Tue Nov 13 20:37:15 2018 +0530

----------------------------------------------------------------------
 .../store/impl/LocalDictDimensionDataChunkStore.java      | 10 ++--------
 .../core/scan/result/vector/CarbonDictionary.java         |  4 ----
 .../scan/result/vector/impl/CarbonDictionaryImpl.java     | 10 ----------
 .../carbondata/hadoop/api/CarbonFileInputFormat.java      |  2 +-
 .../spark/vectorreader/VectorizedCarbonRecordReader.java  |  1 +
 5 files changed, 4 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
index a384743..0eb6d65 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
@@ -61,10 +61,7 @@ public class LocalDictDimensionDataChunkStore implements 
DimensionDataChunkStore
     int columnValueSize = dimensionDataChunkStore.getColumnValueSize();
     int rowsNum = data.length / columnValueSize;
     CarbonColumnVector vector = vectorInfo.vector;
-    if (!dictionary.isDictionaryUsed()) {
-      vector.setDictionary(dictionary);
-      dictionary.setDictionaryUsed();
-    }
+    vector.setDictionary(dictionary);
     BitSet nullBitset = new BitSet();
     CarbonColumnVector dictionaryVector = ColumnarVectorWrapperDirectFactory
         .getDirectVectorWrapperFactory(vector.getDictionaryVector(), 
invertedIndex, nullBitset,
@@ -91,10 +88,7 @@ public class LocalDictDimensionDataChunkStore implements 
DimensionDataChunkStore
   }
 
   @Override public void fillRow(int rowId, CarbonColumnVector vector, int 
vectorRow) {
-    if (!dictionary.isDictionaryUsed()) {
-      vector.setDictionary(dictionary);
-      dictionary.setDictionaryUsed();
-    }
+    vector.setDictionary(dictionary);
     int surrogate = dimensionDataChunkStore.getSurrogate(rowId);
     if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY) {
       vector.putNull(vectorRow);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
index 2147c43..882a365 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
@@ -22,10 +22,6 @@ public interface CarbonDictionary  {
 
   int getDictionarySize();
 
-  boolean isDictionaryUsed();
-
-  void setDictionaryUsed();
-
   byte[] getDictionaryValue(int index);
 
   byte[][] getAllDictionaryValues();

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
index c8fd573..20e6171 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
@@ -24,8 +24,6 @@ public class CarbonDictionaryImpl implements CarbonDictionary 
{
 
   private int actualSize;
 
-  private boolean isDictUsed;
-
   public CarbonDictionaryImpl(byte[][] dictionary, int actualSize) {
     this.dictionary = dictionary;
     this.actualSize = actualSize;
@@ -39,14 +37,6 @@ public class CarbonDictionaryImpl implements 
CarbonDictionary {
     return this.dictionary.length;
   }
 
-  @Override public boolean isDictionaryUsed() {
-    return this.isDictUsed;
-  }
-
-  @Override public void setDictionaryUsed() {
-    this.isDictUsed = true;
-  }
-
   @Override public byte[] getDictionaryValue(int index) {
     return dictionary[index];
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java
----------------------------------------------------------------------
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java
index 8b43190..dbfa4ec 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java
@@ -199,7 +199,7 @@ public class CarbonFileInputFormat<T> extends 
CarbonInputFormat<T> implements Se
     try {
       carbonFiles = FileFactory.getCarbonFile(tablePath).listFiles(true, new 
CarbonFileFilter() {
         @Override public boolean accept(CarbonFile file) {
-          return file.getName().contains(CarbonTablePath.CARBON_DATA_EXT);
+          return file.getName().endsWith(CarbonTablePath.CARBON_DATA_EXT);
         }
       });
     } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java
----------------------------------------------------------------------
diff --git 
a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java
 
b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java
index 45686ea..1bde17d 100644
--- 
a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java
+++ 
b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java
@@ -336,6 +336,7 @@ public class VectorizedCarbonRecordReader extends 
AbstractRecordReader<Object> {
       for (int i = 0; i < isNoDictStringField.length; i++) {
         if (isNoDictStringField[i]) {
           vectorProxy.resetDictionaryIds(i);
+          vectorProxy.column(i).setDictionary(null);
         }
       }
     }

Reply via email to