[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/carbondata/pull/2417


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201277468
  
--- Diff: 
integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingUnsafeColumnPageForComplexDataType.scala
 ---
@@ -0,0 +1,588 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.dataload
+
+import java.io.{File, PrintWriter}
+import java.sql.Timestamp
+
+import scala.collection.mutable
+import scala.util.Random
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test class of Adaptive Encoding UnSafe Column Page with Complex Data 
type
+ *
+ */
+
+class TestAdaptiveEncodingUnsafeColumnPageForComplexDataType
+  extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+
+new 
File(CarbonProperties.getInstance().getSystemFolderLocation).delete()
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  test("test INT with struct and array, Encoding INT-->BYTE") {
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201244026
  
--- Diff: 
integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingForNullValues.scala
 ---
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.dataload
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test class of Adaptive Encoding UnSafe Column Page with Null values
+ *
+ */
+
+class TestAdaptiveEncodingForNullValues
+  extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  test("test INT with struct and array, Encoding INT-->BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) " +
+  "stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+
+  test("test SMALLINT with struct and array SMALLINT --> BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+
+  test("test BigInt with struct and array BIGINT --> BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+  test("test Double with Struct and Array DOUBLE --> BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+  test("test Decimal with Struct") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by " +
+  "'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+  test("test Timestamp with Struct") {
+sql("Drop table if exists adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, 
"/MM/dd")
+sql(
+  "create table adaptive(roll int, student 
struct) stored by " +
+  "'carbondata'")
+sql("insert into adaptive values(1,'null$abc')")
+checkAnswer(sql("select * from adaptive"),

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201243892
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
--- End diff --

in continuation of the above comment, for floating codec lazyColumnPage has 
only double based comparison with statistics min and max value. So always we 
will get lazy page returned value as double for floating codec. So the 
conversion cannot be avoided. 
   
Also currently float is not implemented in carbon core. Always float values 
will be converted to double and saved in disk. 


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201242475
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
 ---
@@ -359,38 +412,36 @@ public void freeMemory() {
 }
   }
 
-  @Override
-  public void convertValue(ColumnPageValueConverter codec) {
-int pageSize = getPageSize();
+  @Override public void convertValue(ColumnPageValueConverter codec) {
 if (dataType == DataTypes.BYTE) {
-  for (long i = 0; i < pageSize; i++) {
+  for (long i = 0; i < totalLength / ByteUtil.SIZEOF_BYTE; i++) {
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201241294
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
+  } else if ((srcDataType == DataTypes.BYTE) ||
+  (srcDataType == DataTypes.BOOLEAN) ||
+  (srcDataType == DataTypes.SHORT) ||
+  (srcDataType == DataTypes.SHORT_INT) ||
+  (srcDataType == DataTypes.INT) ||
+  (srcDataType == DataTypes.LONG) ||
+  (srcDataType == DataTypes.TIMESTAMP)) {
+long longData = columnPage.getLong(rowId);
--- End diff --

There are altogether three level of Column Page Abstraction. 
ColumnPageWrapper
LazyColumnPage
Safe | unsafe | variable | fixed. Column Page.

The below layer after return data based on DataType and the value is 
converted and given back to LazyColumnPage. As the Data is Adaptive or Adaptive 
Delta Encoded, LazyColumnPage Decode the data after negating with Max value in 
page statistics. This Max value is either long, double or decimal. So Lazy has 
to get the data in long, double or decimal format to make it comparable with 
Max long value. Therefore in Lazy getLong, getDouble and getDecimal are 
implemented in order to decode the data (negating with max) and subsequently 
the data is returned as Long. So in the case of Integral codec the returned 
value will be long, in floating adaptive it will be double and for decimal it 
will be returned as decimal.  
So here getLong is correct and the data we will be getting in long format 
Later they are reconverted back to respective datatypes.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201233403
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
--- End diff --

Added Unsupported Exception for Non Complex Primitive. 


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201229878
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java 
---
@@ -870,4 +873,23 @@ private static void 
getChildDimensionOrdinal(CarbonDimension queryDimensions,
   }
 }
   }
+
+  /**
--- End diff --

Will be refactored in CARBONDATA-2713


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201226831
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
 ---
@@ -93,9 +93,9 @@ public PrimitiveQueryType(String name, String parentname, 
int blockIndex,
 return 1;
   }
 
-  @Override public void parseBlocksAndReturnComplexColumnByteArray(
-  DimensionRawColumnChunk[] rawColumnChunks, int rowNumber,
-  int pageNumber, DataOutputStream dataOutputStream) throws 
IOException {
+  @Override
+  public void 
parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[] 
rawColumnChunks,
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201226564
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
+if (pageEncoder == null) {
+  ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
+  return encoder.encode(page);
+} else {
+  LOGGER.info("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  // TODO: remove the Sout after testing,
+  // currently added as executor Info logs doesn't come in IDE
+  System.out.println("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  return pageEncoder.encode(page);
+}
   }
 
+  private static ColumnPageEncoder createCodecForDimension(ColumnPage 
inputPage) {
+TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec();
+if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
+  if (inputPage.getDataType() == DataTypes.BYTE_ARRAY
+  || inputPage.getDataType() == DataTypes.STRING) {
+// use legacy encoder
+return null;
+  } else if ((inputPage.getDataType() == DataTypes.BYTE) || 
(inputPage.getDataType()
+  == DataTypes.SHORT) || (inputPage.getDataType() == 
DataTypes.INT) || (
+  inputPage.getDataType() == DataTypes.LONG)) {
+return 
selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true)
+.createEncoder(null);
+  } else if ((inputPage.getDataType() == DataTypes.FLOAT) || 
(inputPage.getDataType()
+  == DataTypes.DOUBLE)) {
+return 
selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true)
+.createEncoder(null);
+  }
+}
+// use legacy encoder
+return null;
--- End diff --

As part of Refactoring in CARBONDATA-2713, this will be handled. 


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201226314
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
--- End diff --

Looping logic is not required as only one particular column will only come 
during execution. No need to further drill down to its children. 


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201225430
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
+if (pageEncoder == null) {
+  ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
+  return encoder.encode(page);
+} else {
+  LOGGER.info("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  // TODO: remove the Sout after testing,
+  // currently added as executor Info logs doesn't come in IDE
+  System.out.println("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  return pageEncoder.encode(page);
+}
   }
 
+  private static ColumnPageEncoder createCodecForDimension(ColumnPage 
inputPage) {
+TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec();
--- End diff --

This refactoring will be done as part of CARBONDATA-2713.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-10 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201225285
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/ComplexColumnPage.java
 ---
@@ -63,25 +66,42 @@ public ComplexColumnPage(List 
complexColumnType) {
* below method will be used to initlize the column page of complex type
* @param columnToDictMap
* dictionary map
-   * @param columnNames
-   * list of columns
* @param pageSize
* number of records
* @throws MemoryException
* if memory is not sufficient
*/
-  public void initialize(Map 
columnToDictMap,
-  List columnNames, int pageSize) throws MemoryException {
+  public void initialize(Map 
columnToDictMap, int pageSize)
+  throws MemoryException {
+DataType dataType;
 for (int i = 0; i < this.columnPages.length; i++) {
-  LocalDictionaryGenerator localDictionaryGenerator = 
columnToDictMap.get(columnNames.get(i));
+  ComplexColumnInfo complexColumnInfo = complexColumnInfoList.get(i);
+  LocalDictionaryGenerator localDictionaryGenerator =
+  columnToDictMap.get(complexColumnInfo.getColumnNames());
   if (null == localDictionaryGenerator) {
-TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
-.newInstance(columnNames.get(i), DataTypes.BYTE_ARRAY, 
complexColumnType.get(i));
-this.columnPages[i] = ColumnPage.newPage(spec, 
DataTypes.BYTE_ARRAY, pageSize);
-this.columnPages[i].setStatsCollector(new DummyStatsCollector());
+dataType = complexColumnInfo.getColumnDataTypes();
+if ((complexColumnInfo.isNoDictionary() && 
!((DataTypes.isStructType(dataType) || DataTypes
+.isArrayType(dataType) || (dataType == DataTypes.STRING) || 
(dataType
+== DataTypes.VARCHAR) || (dataType == DataTypes.DATE) || 
DataTypes
+.isDecimal(dataType) {
+  TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
+  .newInstance(complexColumnInfo.getColumnNames(), dataType,
+  complexColumnInfo.getComplexColumnType());
+  // no dictionary primitive types need adaptive encoding,
+  // hence store as object instead of byte array
+  this.columnPages[i] = ColumnPage.newPage(spec, dataType, 
pageSize);
+  
this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType));
+} else {
+  TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201221334
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
+  } else if ((srcDataType == DataTypes.BYTE) ||
+  (srcDataType == DataTypes.BOOLEAN) ||
+  (srcDataType == DataTypes.SHORT) ||
+  (srcDataType == DataTypes.SHORT_INT) ||
+  (srcDataType == DataTypes.INT) ||
+  (srcDataType == DataTypes.LONG) ||
+  (srcDataType == DataTypes.TIMESTAMP)) {
+long longData = columnPage.getLong(rowId);
+if ((srcDataType == DataTypes.BYTE)) {
+  byte out = (byte) longData;
+  return ByteUtil.toBytes(out);
+} else if (srcDataType == DataTypes.BOOLEAN) {
+  byte out = (byte) longData;
+  return ByteUtil.toBytes(ByteUtil.toBoolean(out));
+} else if (srcDataType == DataTypes.SHORT) {
+  short out = (short) longData;
+  return ByteUtil.toBytes(out);
+} else if (srcDataType == DataTypes.SHORT_INT) {
+  int out = (int) longData;
+  return ByteUtil.toBytes(out);
+} else if (srcDataType == DataTypes.INT) {
+  int out = (int) longData;
+  return ByteUtil.toBytes(out);
+} else {
+  // timestamp and long
+  return ByteUtil.toBytes(longData);
+}
+  } else if ((targetDataType == DataTypes.STRING) ||
+  (targetDataType == DataTypes.VARCHAR) ||
+  (targetDataType == DataTypes.BYTE_ARRAY)) {
+return columnPage.getBytes(rowId);
+  } else {
+throw new RuntimeException("unsupported type: " + targetDataType);
+  }
+} else if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
!this.isAdaptiveComplexPrimitive()) {
+  if ((srcDataType == DataTypes.BYTE) || (srcDataType == 
DataTypes.BOOLEAN)) {
+byte[] out = new byte[1];
+out[0] = (columnPage.getByte(rowId));
+return out;
+  } else if (srcDataType == DataTypes.BYTE_ARRAY) {
+return columnPage.getBytes(rowId);
+  } else {
+throw new RuntimeException("unsupported type: " + targetDataType);
+  }
+} else {
+  return columnPage.getBytes(rowId);
--- End diff --

This getBytes is only for Complex STRUCT and ARRAY type which are in Byte 
Array Format. Later when CARBONDATA-2713 turns STRUCT and ARRAY to V-format 
then this check will throw exception


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201199734
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
+if (pageEncoder == null) {
+  ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
+  return encoder.encode(page);
+} else {
+  LOGGER.info("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201199788
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java ---
@@ -141,7 +141,7 @@ private static ColumnPage 
createFixLengthByteArrayPage(TableSpec.ColumnSpec colu
 throw new RuntimeException(e);
   }
 } else {
-  return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize, 
eachValueSize);
+  return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize);
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201198855
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
+  } else if ((srcDataType == DataTypes.BYTE) ||
+  (srcDataType == DataTypes.BOOLEAN) ||
+  (srcDataType == DataTypes.SHORT) ||
+  (srcDataType == DataTypes.SHORT_INT) ||
+  (srcDataType == DataTypes.INT) ||
+  (srcDataType == DataTypes.LONG) ||
+  (srcDataType == DataTypes.TIMESTAMP)) {
+long longData = columnPage.getLong(rowId);
+if ((srcDataType == DataTypes.BYTE)) {
+  byte out = (byte) longData;
--- End diff --

Placing a switch statement is not a small change and will impact many other 
files and classes. As this is not a functional issue postposing this to 
CARBONDATA-2713.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201197871
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
--- End diff --

Decimal type is supported inside complex. But currently, it avoids the 
Adaptive Encoding path and goes as Byte Array Format. This will be modified in 
CARBONDATA-2605.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201197596
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
--- End diff --

Currently, Struct and Array types are stored as Byte Array. But as they are 
storing in LV format it can be modified and fit in V format. This refactoring 
will be done as part of jira CARBONDATA-2605


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread sounakr
Github user sounakr commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r201073104
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -17,32 +17,39 @@
 
 package org.apache.carbondata.core.datastore.chunk.store;
 
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.ColumnType;
 import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
+import org.apache.carbondata.core.util.ByteUtil;
 
 public class ColumnPageWrapper implements DimensionColumnPage {
 
   private ColumnPage columnPage;
 
-  public ColumnPageWrapper(ColumnPage columnPage) {
+  private boolean isAdaptiveComplexPrimitivePage;
+
+  public ColumnPageWrapper(ColumnPage columnPage, boolean 
isAdaptiveComplexPrimitivePage) {
 this.columnPage = columnPage;
+this.isAdaptiveComplexPrimitivePage = isAdaptiveComplexPrimitivePage;
   }
 
   @Override
   public int fillRawData(int rowId, int offset, byte[] data, 
KeyStructureInfo restructuringInfo) {
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public int fillSurrogateKey(int rowId, int chunkIndex, int[] 
outputSurrogateKey,
+  @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] 
outputSurrogateKey,
--- End diff --

Done


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200992646
  
--- Diff: 
integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingUnsafeColumnPageForComplexDataType.scala
 ---
@@ -0,0 +1,588 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.dataload
+
+import java.io.{File, PrintWriter}
+import java.sql.Timestamp
+
+import scala.collection.mutable
+import scala.util.Random
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test class of Adaptive Encoding UnSafe Column Page with Complex Data 
type
+ *
+ */
+
+class TestAdaptiveEncodingUnsafeColumnPageForComplexDataType
+  extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+
+new 
File(CarbonProperties.getInstance().getSystemFolderLocation).delete()
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  test("test INT with struct and array, Encoding INT-->BYTE") {
--- End diff --

1) Why testcases change for unsafe safe, derive once class from other and 
pass parameters
2) Write function to reduce duplication of logic among testcases


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200991672
  
--- Diff: 
integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestAdaptiveEncodingForNullValues.scala
 ---
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.dataload
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test class of Adaptive Encoding UnSafe Column Page with Null values
+ *
+ */
+
+class TestAdaptiveEncodingForNullValues
+  extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE,
+"true")
+  }
+
+  test("test INT with struct and array, Encoding INT-->BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) " +
+  "stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+
+  test("test SMALLINT with struct and array SMALLINT --> BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+
+  test("test BigInt with struct and array BIGINT --> BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+  test("test Double with Struct and Array DOUBLE --> BYTE") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by 'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+  test("test Decimal with Struct") {
+sql("Drop table if exists adaptive")
+sql(
+  "create table adaptive(roll int, student 
struct>) stored by " +
+  "'carbondata'")
+sql("insert into adaptive values(1,'null$abc$null:null:null')")
+checkAnswer(sql("select * from adaptive"),
+  Seq(Row(1, Row(null, "abc", mutable.WrappedArray.make(Array(null, 
null, null))
+  }
+
+  test("test Timestamp with Struct") {
+sql("Drop table if exists adaptive")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, 
"/MM/dd")
+sql(
+  "create table adaptive(roll int, student 
struct) stored by " +
+  "'carbondata'")
+sql("insert into adaptive values(1,'null$abc')")
+checkAnswer(sql("select * from adaptive"),
   

[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200989973
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java 
---
@@ -870,4 +873,23 @@ private static void 
getChildDimensionOrdinal(CarbonDimension queryDimensions,
   }
 }
   }
+
+  /**
--- End diff --

Move to right location, not in query util


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200989677
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
 ---
@@ -93,9 +93,9 @@ public PrimitiveQueryType(String name, String parentname, 
int blockIndex,
 return 1;
   }
 
-  @Override public void parseBlocksAndReturnComplexColumnByteArray(
-  DimensionRawColumnChunk[] rawColumnChunks, int rowNumber,
-  int pageNumber, DataOutputStream dataOutputStream) throws 
IOException {
+  @Override
+  public void 
parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[] 
rawColumnChunks,
--- End diff --

No change revert changes


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200986348
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
+if (pageEncoder == null) {
+  ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
+  return encoder.encode(page);
+} else {
+  LOGGER.info("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  // TODO: remove the Sout after testing,
+  // currently added as executor Info logs doesn't come in IDE
+  System.out.println("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  return pageEncoder.encode(page);
+}
   }
 
+  private static ColumnPageEncoder createCodecForDimension(ColumnPage 
inputPage) {
+TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec();
+if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
+  if (inputPage.getDataType() == DataTypes.BYTE_ARRAY
+  || inputPage.getDataType() == DataTypes.STRING) {
+// use legacy encoder
+return null;
+  } else if ((inputPage.getDataType() == DataTypes.BYTE) || 
(inputPage.getDataType()
+  == DataTypes.SHORT) || (inputPage.getDataType() == 
DataTypes.INT) || (
+  inputPage.getDataType() == DataTypes.LONG)) {
+return 
selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true)
+.createEncoder(null);
+  } else if ((inputPage.getDataType() == DataTypes.FLOAT) || 
(inputPage.getDataType()
+  == DataTypes.DOUBLE)) {
+return 
selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true)
+.createEncoder(null);
+  }
+}
+// use legacy encoder
+return null;
--- End diff --

legacy encoder  logic should also be here


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200985673
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
+if (pageEncoder == null) {
+  ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
+  return encoder.encode(page);
+} else {
+  LOGGER.info("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  // TODO: remove the Sout after testing,
+  // currently added as executor Info logs doesn't come in IDE
+  System.out.println("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
+  .getColumnSpec().getSchemaDataType() + " Destination data type: 
" + pageEncoder
+  .getEncoderMeta(page).getStoreDataType() + " for the column: " + 
pageEncoder
+  .getEncoderMeta(page).getColumnSpec().getFieldName());
+  return pageEncoder.encode(page);
+}
   }
 
+  private static ColumnPageEncoder createCodecForDimension(ColumnPage 
inputPage) {
+TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec();
--- End diff --

move to EncodeFactory


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200985232
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
--- End diff --

FallbackEncodedColumnPage is also using complex type so child column types 
loop logic not present.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200984983
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 ---
@@ -147,10 +160,48 @@ protected void fillLegacyFields(DataChunk2 dataChunk)
 
   public static EncodedColumnPage encodedColumn(ColumnPage page)
   throws IOException, MemoryException {
-ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
-return encoder.encode(page);
+ColumnPageEncoder pageEncoder = createCodecForDimension(page);
+if (pageEncoder == null) {
+  ColumnPageEncoder encoder = new 
DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
+  return encoder.encode(page);
+} else {
+  LOGGER.info("Encoder result ---> Source data type: " + 
pageEncoder.getEncoderMeta(page)
--- End diff --

move to EncodeFactory


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200967561
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/ComplexColumnPage.java
 ---
@@ -63,25 +66,42 @@ public ComplexColumnPage(List 
complexColumnType) {
* below method will be used to initlize the column page of complex type
* @param columnToDictMap
* dictionary map
-   * @param columnNames
-   * list of columns
* @param pageSize
* number of records
* @throws MemoryException
* if memory is not sufficient
*/
-  public void initialize(Map 
columnToDictMap,
-  List columnNames, int pageSize) throws MemoryException {
+  public void initialize(Map 
columnToDictMap, int pageSize)
+  throws MemoryException {
+DataType dataType;
 for (int i = 0; i < this.columnPages.length; i++) {
-  LocalDictionaryGenerator localDictionaryGenerator = 
columnToDictMap.get(columnNames.get(i));
+  ComplexColumnInfo complexColumnInfo = complexColumnInfoList.get(i);
+  LocalDictionaryGenerator localDictionaryGenerator =
+  columnToDictMap.get(complexColumnInfo.getColumnNames());
   if (null == localDictionaryGenerator) {
-TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
-.newInstance(columnNames.get(i), DataTypes.BYTE_ARRAY, 
complexColumnType.get(i));
-this.columnPages[i] = ColumnPage.newPage(spec, 
DataTypes.BYTE_ARRAY, pageSize);
-this.columnPages[i].setStatsCollector(new DummyStatsCollector());
+dataType = complexColumnInfo.getColumnDataTypes();
+if ((complexColumnInfo.isNoDictionary() && 
!((DataTypes.isStructType(dataType) || DataTypes
+.isArrayType(dataType) || (dataType == DataTypes.STRING) || 
(dataType
+== DataTypes.VARCHAR) || (dataType == DataTypes.DATE) || 
DataTypes
+.isDecimal(dataType) {
+  TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
+  .newInstance(complexColumnInfo.getColumnNames(), dataType,
+  complexColumnInfo.getComplexColumnType());
+  // no dictionary primitive types need adaptive encoding,
+  // hence store as object instead of byte array
+  this.columnPages[i] = ColumnPage.newPage(spec, dataType, 
pageSize);
+  
this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType));
+} else {
+  TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
--- End diff --

remove duplicate logic


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200964076
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java ---
@@ -141,7 +141,7 @@ private static ColumnPage 
createFixLengthByteArrayPage(TableSpec.ColumnSpec colu
 throw new RuntimeException(e);
   }
 } else {
-  return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize, 
eachValueSize);
+  return new SafeFixLengthColumnPage(columnSpec, dataType, pageSize);
--- End diff --

This should not be removed.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200962872
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
+  } else if ((srcDataType == DataTypes.BYTE) ||
+  (srcDataType == DataTypes.BOOLEAN) ||
+  (srcDataType == DataTypes.SHORT) ||
+  (srcDataType == DataTypes.SHORT_INT) ||
+  (srcDataType == DataTypes.INT) ||
+  (srcDataType == DataTypes.LONG) ||
+  (srcDataType == DataTypes.TIMESTAMP)) {
+long longData = columnPage.getLong(rowId);
+if ((srcDataType == DataTypes.BYTE)) {
+  byte out = (byte) longData;
+  return ByteUtil.toBytes(out);
+} else if (srcDataType == DataTypes.BOOLEAN) {
+  byte out = (byte) longData;
+  return ByteUtil.toBytes(ByteUtil.toBoolean(out));
+} else if (srcDataType == DataTypes.SHORT) {
+  short out = (short) longData;
+  return ByteUtil.toBytes(out);
+} else if (srcDataType == DataTypes.SHORT_INT) {
+  int out = (int) longData;
+  return ByteUtil.toBytes(out);
+} else if (srcDataType == DataTypes.INT) {
+  int out = (int) longData;
+  return ByteUtil.toBytes(out);
+} else {
+  // timestamp and long
+  return ByteUtil.toBytes(longData);
+}
+  } else if ((targetDataType == DataTypes.STRING) ||
+  (targetDataType == DataTypes.VARCHAR) ||
+  (targetDataType == DataTypes.BYTE_ARRAY)) {
+return columnPage.getBytes(rowId);
+  } else {
+throw new RuntimeException("unsupported type: " + targetDataType);
+  }
+} else if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
!this.isAdaptiveComplexPrimitive()) {
+  if ((srcDataType == DataTypes.BYTE) || (srcDataType == 
DataTypes.BOOLEAN)) {
+byte[] out = new byte[1];
+out[0] = (columnPage.getByte(rowId));
+return out;
+  } else if (srcDataType == DataTypes.BYTE_ARRAY) {
+return columnPage.getBytes(rowId);
+  } else {
+throw new RuntimeException("unsupported type: " + targetDataType);
+  }
+} else {
+  return columnPage.getBytes(rowId);
--- End diff --

getBytes should not be default , it should throw exception, unsupported 
datatype


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200938152
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
+  } else if ((srcDataType == DataTypes.BYTE) ||
+  (srcDataType == DataTypes.BOOLEAN) ||
+  (srcDataType == DataTypes.SHORT) ||
+  (srcDataType == DataTypes.SHORT_INT) ||
+  (srcDataType == DataTypes.INT) ||
+  (srcDataType == DataTypes.LONG) ||
+  (srcDataType == DataTypes.TIMESTAMP)) {
+long longData = columnPage.getLong(rowId);
+if ((srcDataType == DataTypes.BYTE)) {
+  byte out = (byte) longData;
--- End diff --

Use Switch instead of if statement


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200935214
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
+  } else if ((srcDataType == DataTypes.BYTE) ||
+  (srcDataType == DataTypes.BOOLEAN) ||
+  (srcDataType == DataTypes.SHORT) ||
+  (srcDataType == DataTypes.SHORT_INT) ||
+  (srcDataType == DataTypes.INT) ||
+  (srcDataType == DataTypes.LONG) ||
+  (srcDataType == DataTypes.TIMESTAMP)) {
+long longData = columnPage.getLong(rowId);
+if ((srcDataType == DataTypes.BYTE)) {
+  byte out = (byte) longData;
--- End diff --

Should not convert to Long and then covert, it is overhead


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200932526
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
+ColumnType columnType = columnPage.getColumnSpec().getColumnType();
+DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
+DataType targetDataType = columnPage.getDataType();
+if (columnPage.getNullBits().get(rowId)) {
+  // if this row is null, return default null represent in byte array
+  return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
+}
+if ((columnType == ColumnType.COMPLEX_PRIMITIVE) && 
this.isAdaptiveComplexPrimitive()) {
+  if (srcDataType == DataTypes.DOUBLE || srcDataType == 
DataTypes.FLOAT) {
+double doubleData = columnPage.getDouble(rowId);
+if (srcDataType == DataTypes.FLOAT) {
+  float out = (float) doubleData;
+  return ByteUtil.toBytes(out);
+} else {
+  return ByteUtil.toBytes(doubleData);
+}
+  } else if (DataTypes.isDecimal(srcDataType)) {
+throw new RuntimeException("unsupported type: " + srcDataType);
--- End diff --

Why decimal type is not supported? If previously it is supported, should 
not introduce backward compatability


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200931266
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -53,11 +60,75 @@ public int fillVector(int[] filteredRowId, 
ColumnVectorInfo[] vectorInfo, int ch
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public byte[] getChunkData(int rowId) {
-return columnPage.getBytes(rowId);
+  @Override public byte[] getChunkData(int rowId) {
--- End diff --

complex type Metadata column also should be adaptive encoded.


---


[GitHub] carbondata pull request #2417: [CARBONDATA-2607][Complex Column Enhancements...

2018-07-09 Thread gvramana
Github user gvramana commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2417#discussion_r200927005
  
--- Diff: 
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 ---
@@ -17,32 +17,39 @@
 
 package org.apache.carbondata.core.datastore.chunk.store;
 
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.ColumnType;
 import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.scan.executor.infos.KeyStructureInfo;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
+import org.apache.carbondata.core.util.ByteUtil;
 
 public class ColumnPageWrapper implements DimensionColumnPage {
 
   private ColumnPage columnPage;
 
-  public ColumnPageWrapper(ColumnPage columnPage) {
+  private boolean isAdaptiveComplexPrimitivePage;
+
+  public ColumnPageWrapper(ColumnPage columnPage, boolean 
isAdaptiveComplexPrimitivePage) {
 this.columnPage = columnPage;
+this.isAdaptiveComplexPrimitivePage = isAdaptiveComplexPrimitivePage;
   }
 
   @Override
   public int fillRawData(int rowId, int offset, byte[] data, 
KeyStructureInfo restructuringInfo) {
 throw new UnsupportedOperationException("internal error");
   }
 
-  @Override
-  public int fillSurrogateKey(int rowId, int chunkIndex, int[] 
outputSurrogateKey,
+  @Override public int fillSurrogateKey(int rowId, int chunkIndex, int[] 
outputSurrogateKey,
--- End diff --

Keep it same


---