Repository: carbondata Updated Branches: refs/heads/master c94c8ce56 -> d8dfa4f21
[HOTFIX] Fix NPE in spark, when same vector reads files with local dictionary and without local dictionary Problem: NPE in spark, when same vector reads files with local dictionary and without local dictionary Cause: when two carbondata files are present, one with local dictionary and one without local dictionary. If same vector is used to read this files [can happen if task is launched to group of files]. If local dictionary files are found first, dictionary is set for that vector. But it was never reset for another file reading. Solution: reset dictionary once batch is processed,set only for local dictionary batch processing. This closes #2895 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8dfa4f2 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8dfa4f2 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8dfa4f2 Branch: refs/heads/master Commit: d8dfa4f21bf3527ed3522799c2d65e143cfd787c Parents: c94c8ce Author: ajantha-bhat <ajanthab...@gmail.com> Authored: Mon Nov 5 15:30:27 2018 +0530 Committer: manishgupta88 <tomanishgupt...@gmail.com> Committed: Tue Nov 13 20:37:15 2018 +0530 ---------------------------------------------------------------------- .../store/impl/LocalDictDimensionDataChunkStore.java | 10 ++-------- .../core/scan/result/vector/CarbonDictionary.java | 4 ---- .../scan/result/vector/impl/CarbonDictionaryImpl.java | 10 ---------- .../carbondata/hadoop/api/CarbonFileInputFormat.java | 2 +- .../spark/vectorreader/VectorizedCarbonRecordReader.java | 1 + 5 files changed, 4 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java index a384743..0eb6d65 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java @@ -61,10 +61,7 @@ public class LocalDictDimensionDataChunkStore implements DimensionDataChunkStore int columnValueSize = dimensionDataChunkStore.getColumnValueSize(); int rowsNum = data.length / columnValueSize; CarbonColumnVector vector = vectorInfo.vector; - if (!dictionary.isDictionaryUsed()) { - vector.setDictionary(dictionary); - dictionary.setDictionaryUsed(); - } + vector.setDictionary(dictionary); BitSet nullBitset = new BitSet(); CarbonColumnVector dictionaryVector = ColumnarVectorWrapperDirectFactory .getDirectVectorWrapperFactory(vector.getDictionaryVector(), invertedIndex, nullBitset, @@ -91,10 +88,7 @@ public class LocalDictDimensionDataChunkStore implements DimensionDataChunkStore } @Override public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) { - if (!dictionary.isDictionaryUsed()) { - vector.setDictionary(dictionary); - dictionary.setDictionaryUsed(); - } + vector.setDictionary(dictionary); int surrogate = dimensionDataChunkStore.getSurrogate(rowId); if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY) { vector.putNull(vectorRow); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java index 2147c43..882a365 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java @@ -22,10 +22,6 @@ public interface CarbonDictionary { int getDictionarySize(); - boolean isDictionaryUsed(); - - void setDictionaryUsed(); - byte[] getDictionaryValue(int index); byte[][] getAllDictionaryValues(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java index c8fd573..20e6171 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java @@ -24,8 +24,6 @@ public class CarbonDictionaryImpl implements CarbonDictionary { private int actualSize; - private boolean isDictUsed; - public CarbonDictionaryImpl(byte[][] dictionary, int actualSize) { this.dictionary = dictionary; this.actualSize = actualSize; @@ -39,14 +37,6 @@ public class CarbonDictionaryImpl implements CarbonDictionary { return this.dictionary.length; } - @Override public boolean isDictionaryUsed() { - return this.isDictUsed; - } - - @Override public void setDictionaryUsed() { - this.isDictUsed = true; - } - @Override public byte[] getDictionaryValue(int index) { return dictionary[index]; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java index 8b43190..dbfa4ec 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonFileInputFormat.java @@ -199,7 +199,7 @@ public class CarbonFileInputFormat<T> extends CarbonInputFormat<T> implements Se try { carbonFiles = FileFactory.getCarbonFile(tablePath).listFiles(true, new CarbonFileFilter() { @Override public boolean accept(CarbonFile file) { - return file.getName().contains(CarbonTablePath.CARBON_DATA_EXT); + return file.getName().endsWith(CarbonTablePath.CARBON_DATA_EXT); } }); } catch (IOException e) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java ---------------------------------------------------------------------- diff --git a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java index 45686ea..1bde17d 100644 --- a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java +++ b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java @@ -336,6 +336,7 @@ public class VectorizedCarbonRecordReader extends AbstractRecordReader<Object> { for (int i = 0; i < isNoDictStringField.length; i++) { if (isNoDictStringField[i]) { vectorProxy.resetDictionaryIds(i); + vectorProxy.column(i).setDictionary(null); } } }