This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new a4bd3df  [CARBONDATA-3995] Support presto querying older complex type 
stores
a4bd3df is described below

commit a4bd3df8a29bcadd985bdd2b88bc186c9d74f25e
Author: ajantha-bhat <[email protected]>
AuthorDate: Fri Sep 18 12:41:45 2020 +0530

    [CARBONDATA-3995] Support presto querying older complex type stores
    
    Why is this PR needed?
    Before carbon 2.0, complex child length is stored as SHORT for string,
    varchar, binary, date, decimal types.
    So, In 2.0 as it is stored as INT, presto complex query code always
    assumes it as INT and goes to out of bound exception when old store is 
queried.
    
    What changes were proposed in this PR?
    If INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY encoding is present, parse as
    INT, else parse as SHORT. so, that both stores can be queried.
    
    This closes #3937
---
 .../dimension/v3/DimensionChunkReaderV3.java       |  2 +
 .../encoding/compress/DirectCompressCodec.java     | 44 +++++++++++++------
 .../metadata/datatype/DecimalConverterFactory.java | 50 ++++++++++++++--------
 .../core/scan/result/vector/ColumnVectorInfo.java  |  3 ++
 4 files changed, 70 insertions(+), 29 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java
index 2538687..53744db 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/DimensionChunkReaderV3.java
@@ -253,6 +253,8 @@ public class DimensionChunkReaderV3 extends 
AbstractDimensionChunkReader {
     ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, 
encoderMetas,
         compressorName, vectorInfo != null);
     if (vectorInfo != null) {
+      // set encodings of current page in the vectorInfo, used for decoding 
the complex child page
+      vectorInfo.encodings = encodings;
       decoder
           .decodeAndFillVector(pageData.array(), offset, 
pageMetadata.data_page_length, vectorInfo,
               nullBitSet, isLocalDictEncodedPage, 
pageMetadata.numberOfRowsInpage,
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
index 27520c9..fbf1d73 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
@@ -50,6 +50,7 @@ import 
org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVec
 import 
org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertibleVector;
 import 
org.apache.carbondata.core.scan.result.vector.impl.directread.SequentialFill;
 import org.apache.carbondata.core.util.ByteUtil;
+import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.format.Encoding;
 
 /**
@@ -316,6 +317,17 @@ public class DirectCompressCodec implements 
ColumnPageCodec {
     private void fillPrimitiveType(byte[] pageData, CarbonColumnVector vector,
         DataType vectorDataType, DataType pageDataType, int pageSize, 
ColumnVectorInfo vectorInfo,
         BitSet nullBits) {
+      int intSizeInBytes = DataTypes.INT.getSizeInBytes();
+      int shortSizeInBytes = DataTypes.SHORT.getSizeInBytes();
+      int lengthStoredInBytes;
+      if (vectorInfo.encodings != null && vectorInfo.encodings.size() > 0 && 
CarbonUtil
+          .hasEncoding(vectorInfo.encodings, 
Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY)) {
+        lengthStoredInBytes = intSizeInBytes;
+      } else {
+        // Before to carbon 2.0, complex child length is stored as SHORT
+        // for string, varchar, binary, date, decimal types
+        lengthStoredInBytes = shortSizeInBytes;
+      }
       int rowId = 0;
       if (pageDataType == DataTypes.BOOLEAN || pageDataType == DataTypes.BYTE) 
{
         if (vectorDataType == DataTypes.SHORT) {
@@ -345,7 +357,6 @@ public class DirectCompressCodec implements ColumnPageCodec 
{
           }
         }
       } else if (pageDataType == DataTypes.SHORT) {
-        int shortSizeInBytes = DataTypes.SHORT.getSizeInBytes();
         int size = pageSize * shortSizeInBytes;
         if (vectorDataType == DataTypes.SHORT) {
           for (int i = 0; i < size; i += shortSizeInBytes) {
@@ -397,7 +408,6 @@ public class DirectCompressCodec implements ColumnPageCodec 
{
           }
         }
       } else {
-        int intSizeInBytes = DataTypes.INT.getSizeInBytes();
         if (pageDataType == DataTypes.INT) {
           int size = pageSize * intSizeInBytes;
           if (vectorDataType == DataTypes.INT) {
@@ -441,36 +451,46 @@ public class DirectCompressCodec implements 
ColumnPageCodec {
               || vectorDataType == DataTypes.VARCHAR) {
             // for complex primitive string, binary, varchar type
             int offset = 0;
+            int length;
             for (int i = 0; i < pageSize; i++) {
-              int len = ByteBuffer.wrap(pageData, offset, 
intSizeInBytes).getInt();
-              offset += intSizeInBytes;
-              if (vectorDataType == DataTypes.BINARY && len == 0) {
+              if (lengthStoredInBytes == intSizeInBytes) {
+                length = ByteBuffer.wrap(pageData, offset, 
lengthStoredInBytes).getInt();
+              } else {
+                length = ByteBuffer.wrap(pageData, offset, 
lengthStoredInBytes).getShort();
+              }
+              offset += lengthStoredInBytes;
+              if (vectorDataType == DataTypes.BINARY && length == 0) {
                 vector.putNull(i);
                 continue;
               }
-              byte[] row = new byte[len];
-              System.arraycopy(pageData, offset, row, 0, len);
+              byte[] row = new byte[length];
+              System.arraycopy(pageData, offset, row, 0, length);
               if (Arrays.equals(row, 
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY)) {
                 vector.putNull(i);
               } else {
                 vector.putObject(i, row);
               }
-              offset += len;
+              offset += length;
             }
           } else if (vectorDataType == DataTypes.DATE) {
             // for complex primitive date type
             int offset = 0;
+            int length;
             for (int i = 0; i < pageSize; i++) {
-              int len = ByteBuffer.wrap(pageData, offset, 
intSizeInBytes).getInt();
-              offset += intSizeInBytes;
+              if (lengthStoredInBytes == intSizeInBytes) {
+                length = ByteBuffer.wrap(pageData, offset, 
lengthStoredInBytes).getInt();
+              } else {
+                length = ByteBuffer.wrap(pageData, offset, 
lengthStoredInBytes).getShort();
+              }
+              offset += lengthStoredInBytes;
               int surrogateInternal =
                   ByteUtil.toXorInt(pageData, offset, intSizeInBytes);
-              if (len == 0) {
+              if (length == 0) {
                 vector.putObject(0, null);
               } else {
                 vector.putObject(0, surrogateInternal - 
DateDirectDictionaryGenerator.cutOffDate);
               }
-              offset += len;
+              offset += length;
             }
           } else if (DataTypes.isDecimal(vectorDataType)) {
             // for complex primitive decimal type
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java
index 7659cba..b848898 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java
@@ -27,7 +27,9 @@ import 
org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
 import 
org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory;
 import org.apache.carbondata.core.util.ByteUtil;
+import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.core.util.DataTypeUtil;
+import org.apache.carbondata.format.Encoding;
 
 /**
  * Decimal converter to keep the data compact.
@@ -110,6 +112,10 @@ public final class DecimalConverterFactory {
     @Override
     public void fillVector(Object valuesToBeConverted, int size,
         ColumnVectorInfo vectorInfo, BitSet nullBitSet, DataType pageType) {
+      if (!(valuesToBeConverted instanceof byte[])) {
+        throw new UnsupportedOperationException("This object type " + 
valuesToBeConverted.getClass()
+            + " is not supported in this method");
+      }
       // TODO we need to find way to directly set to vector with out 
conversion. This way is very
       // inefficient.
       CarbonColumnVector vector = getCarbonColumnVector(vectorInfo, 
nullBitSet);
@@ -124,9 +130,16 @@ public final class DecimalConverterFactory {
         precision = vectorInfo.measure.getMeasure().getPrecision();
         newMeasureScale = vectorInfo.measure.getMeasure().getScale();
       }
-      if (!(valuesToBeConverted instanceof byte[])) {
-        throw new UnsupportedOperationException("This object type " + 
valuesToBeConverted.getClass()
-            + " is not supported in this method");
+      int shortSizeInBytes = DataTypes.SHORT.getSizeInBytes();
+      int intSizeInBytes = DataTypes.INT.getSizeInBytes();
+      int lengthStoredInBytes;
+      if (vectorInfo.encodings != null && vectorInfo.encodings.size() > 0 && 
CarbonUtil
+          .hasEncoding(vectorInfo.encodings, 
Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY)) {
+        lengthStoredInBytes = intSizeInBytes;
+      } else {
+        // before to carbon 2.0, complex child length is stored as SHORT
+        // for string, varchar, binary, date, decimal types
+        lengthStoredInBytes = shortSizeInBytes;
       }
       byte[] data = (byte[]) valuesToBeConverted;
       if (pageType == DataTypes.BYTE) {
@@ -142,7 +155,6 @@ public final class DecimalConverterFactory {
           }
         }
       } else if (pageType == DataTypes.SHORT) {
-        int shortSizeInBytes = DataTypes.SHORT.getSizeInBytes();
         for (int i = 0; i < size; i++) {
           if (nullBitSet.get(i)) {
             vector.putNull(i);
@@ -172,7 +184,6 @@ public final class DecimalConverterFactory {
           }
         }
       } else {
-        int intSizeInBytes = DataTypes.INT.getSizeInBytes();
         if (pageType == DataTypes.INT) {
           for (int i = 0; i < size; i++) {
             if (nullBitSet.get(i)) {
@@ -205,39 +216,44 @@ public final class DecimalConverterFactory {
         } else if (pageType == DataTypes.BYTE_ARRAY) {
           // complex primitive decimal dimension
           int offset = 0;
+          int length;
           for (int j = 0; j < size; j++) {
             // here decimal data will be Length[4 byte], scale[1 byte], 
value[Length byte]
-            int len = ByteBuffer.wrap(data, offset, intSizeInBytes).getInt();
-            offset += intSizeInBytes;
-            if (len == 0) {
+            if (lengthStoredInBytes == intSizeInBytes) {
+              length = ByteBuffer.wrap(data, offset, 
lengthStoredInBytes).getInt();
+            } else {
+              length = ByteBuffer.wrap(data, offset, 
lengthStoredInBytes).getShort();
+            }
+            offset += lengthStoredInBytes;
+            if (length == 0) {
               vector.putNull(j);
               continue;
             }
             // jump the scale offset
             offset += 1;
             // remove scale from the length
-            len -= 1;
-            byte[] row = new byte[len];
-            System.arraycopy(data, offset, row, 0, len);
+            length -= 1;
+            byte[] row = new byte[length];
+            System.arraycopy(data, offset, row, 0, length);
             long val;
-            if (len == 1) {
+            if (length == 1) {
               val = row[0];
-            } else if (len == 2) {
+            } else if (length == 2) {
               val = ByteUtil.toShort(row, 0);
-            } else if (len == 4) {
+            } else if (length == 4) {
               val = ByteUtil.toInt(row, 0);
-            } else if (len == 3) {
+            } else if (length == 3) {
               val = ByteUtil.valueOf3Bytes(row, 0);
             } else {
               // TODO: check if other value can come
-              val = ByteUtil.toLong(row, 0, len);
+              val = ByteUtil.toLong(row, 0, length);
             }
             BigDecimal value = BigDecimal.valueOf(val, scale);
             if (value.scale() < newMeasureScale) {
               value = value.setScale(newMeasureScale);
             }
             vector.putDecimal(j, value, precision);
-            offset += len;
+            offset += length;
           }
         }
       }
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java
index ed8be52..afccd3c 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java
@@ -28,6 +28,7 @@ import 
org.apache.carbondata.core.scan.filter.GenericQueryType;
 import org.apache.carbondata.core.scan.model.ProjectionDimension;
 import org.apache.carbondata.core.scan.model.ProjectionMeasure;
 import 
org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl;
+import org.apache.carbondata.format.Encoding;
 
 public class ColumnVectorInfo implements Comparable<ColumnVectorInfo> {
   public int offset;
@@ -45,6 +46,8 @@ public class ColumnVectorInfo implements 
Comparable<ColumnVectorInfo> {
   public DecimalConverterFactory.DecimalConverter decimalConverter;
   // Vector stack is used in complex column vectorInfo to store all the 
children vectors.
   public Stack<CarbonColumnVector> vectorStack = new Stack<>();
+  // store the encoding of the column, used while decoding the page for 
filling the vector
+  public List<Encoding> encodings;
 
   @Override
   public int compareTo(ColumnVectorInfo o) {

Reply via email to