[17/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
deleted file mode 100644
index 6629d31..000
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.datastore.chunk.impl;
-
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory.DimensionStoreType;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
-import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
-import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
-
-/**
- * This class is gives access to fixed length dimension data chunk store
- */
-public class FixedLengthDimensionDataChunk extends AbstractDimensionDataChunk {
-
-  /**
-   * Constructor
-   *
-   * @param dataChunkdata chunk
-   * @param invertedIndexinverted index
-   * @param invertedIndexReverse reverse inverted index
-   * @param numberOfRows number of rows
-   * @param columnValueSize  size of each column value
-   */
-  public FixedLengthDimensionDataChunk(byte[] dataChunk, int[] invertedIndex,
-  int[] invertedIndexReverse, int numberOfRows, int columnValueSize) {
-long totalSize = null != invertedIndex ?
-dataChunk.length + (2 * numberOfRows * 
CarbonCommonConstants.INT_SIZE_IN_BYTE) :
-dataChunk.length;
-dataChunkStore = DimensionChunkStoreFactory.INSTANCE
-.getDimensionChunkStore(columnValueSize, null != invertedIndex, 
numberOfRows, totalSize,
-DimensionStoreType.FIXEDLENGTH);
-dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunk);
-  }
-
-  /**
-   * Below method will be used to fill the data based on offset and row id
-   *
-   * @param data data to filed
-   * @param offset   offset from which data need to be filed
-   * @param indexrow id of the chunk
-   * @param keyStructureInfo define the structure of the key
-   * @return how many bytes was copied
-   */
-  @Override public int fillChunkData(byte[] data, int offset, int index,
-  KeyStructureInfo keyStructureInfo) {
-dataChunkStore.fillRow(index, data, offset);
-return dataChunkStore.getColumnValueSize();
-  }
-
-  /**
-   * Converts to column dictionary integer value
-   *
-   * @param rowId
-   * @param columnIndex
-   * @param row
-   * @param restructuringInfo
-   * @return
-   */
-  @Override public int fillConvertedChunkData(int rowId, int columnIndex, 
int[] row,
-  KeyStructureInfo restructuringInfo) {
-row[columnIndex] = dataChunkStore.getSurrogate(rowId);
-return columnIndex + 1;
-  }
-
-  /**
-   * Fill the data to vector
-   *
-   * @param vectorInfo
-   * @param column
-   * @param restructuringInfo
-   * @return next column index
-   */
-  @Override public int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, 
int column,
-  KeyStructureInfo restructuringInfo) {
-ColumnVectorInfo columnVectorInfo = vectorInfo[column];
-int offset = columnVectorInfo.offset;
-int vectorOffset = columnVectorInfo.vectorOffset;
-int len = columnVectorInfo.size + offset;
-CarbonColumnVector vector = columnVectorInfo.vector;
-for (int j = offset; j < len; j++) {
-  int dict = dataChunkStore.getSurrogate(j);
-  if (columnVectorInfo.directDictionaryGenerator == null) {
-vector.putInt(vectorOffset++, dict);
-  } else {
-Object 

[17/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92c9f224/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
deleted file mode 100644
index 6629d31..000
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionDataChunk.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.carbondata.core.datastore.chunk.impl;
-
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory;
-import 
org.apache.carbondata.core.datastore.chunk.store.DimensionChunkStoreFactory.DimensionStoreType;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
-import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
-import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
-
-/**
- * This class is gives access to fixed length dimension data chunk store
- */
-public class FixedLengthDimensionDataChunk extends AbstractDimensionDataChunk {
-
-  /**
-   * Constructor
-   *
-   * @param dataChunkdata chunk
-   * @param invertedIndexinverted index
-   * @param invertedIndexReverse reverse inverted index
-   * @param numberOfRows number of rows
-   * @param columnValueSize  size of each column value
-   */
-  public FixedLengthDimensionDataChunk(byte[] dataChunk, int[] invertedIndex,
-  int[] invertedIndexReverse, int numberOfRows, int columnValueSize) {
-long totalSize = null != invertedIndex ?
-dataChunk.length + (2 * numberOfRows * 
CarbonCommonConstants.INT_SIZE_IN_BYTE) :
-dataChunk.length;
-dataChunkStore = DimensionChunkStoreFactory.INSTANCE
-.getDimensionChunkStore(columnValueSize, null != invertedIndex, 
numberOfRows, totalSize,
-DimensionStoreType.FIXEDLENGTH);
-dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunk);
-  }
-
-  /**
-   * Below method will be used to fill the data based on offset and row id
-   *
-   * @param data data to filed
-   * @param offset   offset from which data need to be filed
-   * @param indexrow id of the chunk
-   * @param keyStructureInfo define the structure of the key
-   * @return how many bytes was copied
-   */
-  @Override public int fillChunkData(byte[] data, int offset, int index,
-  KeyStructureInfo keyStructureInfo) {
-dataChunkStore.fillRow(index, data, offset);
-return dataChunkStore.getColumnValueSize();
-  }
-
-  /**
-   * Converts to column dictionary integer value
-   *
-   * @param rowId
-   * @param columnIndex
-   * @param row
-   * @param restructuringInfo
-   * @return
-   */
-  @Override public int fillConvertedChunkData(int rowId, int columnIndex, 
int[] row,
-  KeyStructureInfo restructuringInfo) {
-row[columnIndex] = dataChunkStore.getSurrogate(rowId);
-return columnIndex + 1;
-  }
-
-  /**
-   * Fill the data to vector
-   *
-   * @param vectorInfo
-   * @param column
-   * @param restructuringInfo
-   * @return next column index
-   */
-  @Override public int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, 
int column,
-  KeyStructureInfo restructuringInfo) {
-ColumnVectorInfo columnVectorInfo = vectorInfo[column];
-int offset = columnVectorInfo.offset;
-int vectorOffset = columnVectorInfo.vectorOffset;
-int len = columnVectorInfo.size + offset;
-CarbonColumnVector vector = columnVectorInfo.vector;
-for (int j = offset; j < len; j++) {
-  int dict = dataChunkStore.getSurrogate(j);
-  if (columnVectorInfo.directDictionaryGenerator == null) {
-vector.putInt(vectorOffset++, dict);
-  } else {
-Object 

[17/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-04 Thread jackylk
[CARBONDATA-2099] Refactor query scan process to improve readability

Unified concepts in scan process flow:

1.QueryModel contains all parameter for scan, it is created by API in 
CarbonTable. (In future, CarbonTable will be the entry point for various table 
operations)
2.Use term ColumnChunk to represent one column in one blocklet, and use 
ChunkIndex in reader to read specified column chunk
3.Use term ColumnPage to represent one page in one ColumnChunk
4.QueryColumn => ProjectionColumn, indicating it is for projection

This closes #1874


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/55c4e438
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/55c4e438
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/55c4e438

Branch: refs/heads/carbonstore-rebase5
Commit: 55c4e43828d3918fb8657ca7279a89a3cad4d667
Parents: 03157f9
Author: Jacky Li 
Authored: Tue Jan 30 21:24:04 2018 +0800
Committer: Jacky Li 
Committed: Sun Mar 4 18:15:06 2018 +0800

--
 .../dictionary/AbstractDictionaryCache.java |   3 +-
 .../cache/dictionary/DictionaryCacheLoader.java |   7 +-
 .../dictionary/DictionaryCacheLoaderImpl.java   |  11 +-
 .../core/datastore/BTreeBuilderInfo.java|   6 -
 .../carbondata/core/datastore/DataRefNode.java  |  81 +--
 .../carbondata/core/datastore/FileHolder.java   | 118 
 .../carbondata/core/datastore/FileReader.java   | 114 +++
 .../core/datastore/block/SegmentProperties.java |  50 +-
 .../chunk/DimensionColumnDataChunk.java | 116 ---
 .../datastore/chunk/DimensionColumnPage.java| 111 +++
 .../chunk/impl/AbstractDimensionColumnPage.java |  89 +++
 .../chunk/impl/AbstractDimensionDataChunk.java  |  95 ---
 .../impl/ColumnGroupDimensionColumnPage.java| 194 ++
 .../impl/ColumnGroupDimensionDataChunk.java | 194 --
 .../chunk/impl/DimensionRawColumnChunk.java |  46 +-
 .../impl/FixedLengthDimensionColumnPage.java| 163 +
 .../impl/FixedLengthDimensionDataChunk.java | 163 -
 .../chunk/impl/MeasureRawColumnChunk.java   |  26 +-
 .../impl/VariableLengthDimensionColumnPage.java | 133 
 .../impl/VariableLengthDimensionDataChunk.java  | 140 
 .../reader/DimensionColumnChunkReader.java  |  14 +-
 .../chunk/reader/MeasureColumnChunkReader.java  |  12 +-
 .../AbstractChunkReaderV2V3Format.java  |  34 +-
 ...mpressedDimensionChunkFileBasedReaderV1.java |  38 +-
 ...mpressedDimensionChunkFileBasedReaderV2.java |  30 +-
 ...essedDimChunkFileBasedPageLevelReaderV3.java |  11 +-
 ...mpressedDimensionChunkFileBasedReaderV3.java |  49 +-
 .../AbstractMeasureChunkReaderV2V3Format.java   |  42 +-
 ...CompressedMeasureChunkFileBasedReaderV1.java |  16 +-
 ...CompressedMeasureChunkFileBasedReaderV2.java |  24 +-
 ...CompressedMeasureChunkFileBasedReaderV3.java |  45 +-
 ...essedMsrChunkFileBasedPageLevelReaderV3.java |   8 +-
 .../chunk/store/ColumnPageWrapper.java  |  30 +-
 .../chunk/store/DimensionDataChunkStore.java|   8 +-
 .../SafeFixedLengthDimensionDataChunkStore.java |   6 +-
 ...feVariableLengthDimensionDataChunkStore.java |   8 +-
 ...nsafeFixedLengthDimensionDataChunkStore.java |  10 +-
 ...afeVariableLengthDimesionDataChunkStore.java |  10 +-
 .../datastore/columnar/ColumnGroupModel.java|  26 -
 .../core/datastore/impl/DFSFileHolderImpl.java  | 166 -
 .../core/datastore/impl/DFSFileReaderImpl.java  | 155 
 .../datastore/impl/DefaultFileTypeProvider.java |  16 +-
 .../core/datastore/impl/FileFactory.java|   4 +-
 .../core/datastore/impl/FileHolderImpl.java | 224 --
 .../core/datastore/impl/FileReaderImpl.java | 215 ++
 .../core/datastore/impl/FileTypeInerface.java   |   4 +-
 .../impl/btree/AbstractBTreeLeafNode.java   |  60 +-
 .../impl/btree/BTreeDataRefNodeFinder.java  |   6 +-
 .../datastore/impl/btree/BTreeNonLeafNode.java  |  52 +-
 .../impl/btree/BlockBTreeLeafNode.java  |   6 +-
 .../impl/btree/BlockletBTreeLeafNode.java   |  46 +-
 .../page/encoding/EncodingFactory.java  |   8 +-
 .../server/NonSecureDictionaryServer.java   |   1 -
 .../core/indexstore/BlockletDetailInfo.java |   4 -
 .../blockletindex/BlockletDataRefNode.java  | 228 ++
 .../BlockletDataRefNodeWrapper.java | 241 ---
 .../indexstore/blockletindex/IndexWrapper.java  |   2 +-
 .../blockletindex/SegmentIndexFileStore.java|   7 +-
 .../core/memory/HeapMemoryAllocator.java|   2 +-
 .../core/metadata/blocklet/SegmentInfo.java |  19 -
 .../core/metadata/schema/table/CarbonTable.java | 130 +++-
 .../schema/table/RelationIdentifier.java|  16 -
 .../core/metadata/schema/table/TableInfo.java   |   6 +-
 .../schema/table/column/CarbonColumn.java   |   2 +-
 .../schema/table/column/CarbonDimension.java|  12 -
 

[17/50] [abbrv] carbondata git commit: [CARBONDATA-2099] Refactor query scan process to improve readability

2018-03-02 Thread jackylk
http://git-wip-us.apache.org/repos/asf/carbondata/blob/636eb799/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
index 8c8d08f..a689d8e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeFixedLengthDimensionDataChunkStore.java
@@ -124,22 +124,22 @@ public class UnsafeFixedLengthDimensionDataChunkStore
   /**
* to compare the two byte array
*
-   * @param indexindex of first byte array
+   * @param rowIdindex of first byte array
* @param compareValue value of to be compared
* @return compare result
*/
-  @Override public int compareTo(int index, byte[] compareValue) {
+  @Override public int compareTo(int rowId, byte[] compareValue) {
 // based on index we need to calculate the actual position in memory block
-index = index * columnValueSize;
+rowId = rowId * columnValueSize;
 int compareResult = 0;
 for (int i = 0; i < compareValue.length; i++) {
   compareResult = (CarbonUnsafe.getUnsafe()
-  .getByte(dataPageMemoryBlock.getBaseObject(), 
dataPageMemoryBlock.getBaseOffset() + index)
+  .getByte(dataPageMemoryBlock.getBaseObject(), 
dataPageMemoryBlock.getBaseOffset() + rowId)
   & 0xff) - (compareValue[i] & 0xff);
   if (compareResult != 0) {
 break;
   }
-  index++;
+  rowId++;
 }
 return compareResult;
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/636eb799/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
index 36b2bd8..e1eb378 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
@@ -189,11 +189,11 @@ public class UnsafeVariableLengthDimesionDataChunkStore
   /**
* to compare the two byte array
*
-   * @param index index of first byte array
+   * @param rowId index of first byte array
* @param compareValue value of to be compared
* @return compare result
*/
-  @Override public int compareTo(int index, byte[] compareValue) {
+  @Override public int compareTo(int rowId, byte[] compareValue) {
 // now to get the row from memory block we need to do following thing
 // 1. first get the current offset
 // 2. if it's not a last row- get the next row offset
@@ -201,13 +201,13 @@ public class UnsafeVariableLengthDimesionDataChunkStore
 // else subtract the current row offset
 // with complete data length get the offset of set of data
 int currentDataOffset = 
CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
-dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((long)index
+dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((long) rowId
 * CarbonCommonConstants.INT_SIZE_IN_BYTE * 1L));
 short length = 0;
 // calculating the length of data
-if (index < numberOfRows - 1) {
+if (rowId < numberOfRows - 1) {
   int OffsetOfNextdata = 
CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
-  dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((index + 1)
+  dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + 
((rowId + 1)
   * CarbonCommonConstants.INT_SIZE_IN_BYTE));
   length = (short) (OffsetOfNextdata - (currentDataOffset
   + CarbonCommonConstants.SHORT_SIZE_IN_BYTE));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/636eb799/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ColumnGroupModel.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ColumnGroupModel.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/ColumnGroupModel.java
index 74d268a..e2a4161 100644
---