[carbondata] branch master updated: [HOTFIX][DataLoad]fix task assignment issue using NODE_MIN_SIZE_FIRST block assignment strategy

2019-01-17 Thread manishgupta88
This is an automated email from the ASF dual-hosted git repository.

manishgupta88 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new 39cd2f4  [HOTFIX][DataLoad]fix task assignment issue using 
NODE_MIN_SIZE_FIRST block assignment strategy
39cd2f4 is described below

commit 39cd2f44cf17d3817d27a11a8ca871fac2500794
Author: ndwangsen 
AuthorDate: Wed Jan 9 18:44:08 2019 +0800

[HOTFIX][DataLoad]fix task assignment issue using NODE_MIN_SIZE_FIRST block 
assignment strategy

This PR sloves the problem of incorrect assignment of tasks if specified 
minimum data size to load is less than the average size for each node.

This closes #3059
---
 .../org/apache/carbondata/processing/util/CarbonLoaderUtil.java   | 8 
 1 file changed, 8 insertions(+)

diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
 
b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
index 23e4a8f..0ff3eb6 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
@@ -609,6 +609,14 @@ public final class CarbonLoaderUtil {
   blockAssignmentStrategy = BlockAssignmentStrategy.BLOCK_SIZE_FIRST;
 } else {
   blockAssignmentStrategy = BlockAssignmentStrategy.BLOCK_NUM_FIRST;
+  // fall back to BLOCK_NUM_FIRST strategy need to reset
+  // the average expected size for each node
+  if (numOfNodes == 0) {
+sizePerNode = 1;
+  } else {
+sizePerNode = blockInfos.size() / numOfNodes;
+sizePerNode = sizePerNode <= 0 ? 1 : sizePerNode;
+  }
 }
 LOGGER.info("Specified minimum data size to load is less than the 
average size "
 + "for each node, fallback to default strategy" + 
blockAssignmentStrategy);



[carbondata] branch master updated: [CARBONDATA-3233]Fix JVM crash issue in snappy compressor and update the pagesize correctly

2019-01-17 Thread manishgupta88
This is an automated email from the ASF dual-hosted git repository.

manishgupta88 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new 92c9ce3  [CARBONDATA-3233]Fix JVM crash issue in snappy compressor and 
update the pagesize correctly
92c9ce3 is described below

commit 92c9ce3ff0da23f207376e4f8861717e2e3de1e5
Author: akashrn5 
AuthorDate: Mon Jan 7 16:34:48 2019 +0530

[CARBONDATA-3233]Fix JVM crash issue in snappy compressor and update the 
pagesize correctly

Problem:
1. During dataload sometimes the JVM is crashed during offheap snappy 
compression. We get the maximun compress size from compressor and allocate
that much memory and then call rawCompress with the base offset of page and 
then base offset of newly created memory block. During this call
sometimes JVM crashes from Snappy. This issue is random one and fails only 
sometimes.
2. PageSize is getting updated wrongly, actaul pageSize is number of rows 
in page, we were updaing the rowId not rowSize

Solution:
Remove the method implementation and let super class handle the compression 
based on the datatype. which will fix this random JVM crash issue

This closes #3053
---
 .../datastore/page/UnsafeFixLengthColumnPage.java  | 29 +++---
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
index da0e487..2e576bc 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
@@ -17,10 +17,8 @@
 
 package org.apache.carbondata.core.datastore.page;
 
-import java.io.IOException;
 import java.math.BigDecimal;
 
-import org.apache.carbondata.core.datastore.compression.Compressor;
 import 
org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta;
 import org.apache.carbondata.core.memory.CarbonUnsafe;
 import org.apache.carbondata.core.memory.MemoryBlock;
@@ -112,7 +110,8 @@ public class UnsafeFixLengthColumnPage extends ColumnPage {
 
   private void updatePageSize(int rowId) {
 if (pageSize < rowId) {
-  pageSize = rowId;
+  // update the actual number of rows
+  pageSize = rowId + 1;
 }
   }
 
@@ -359,7 +358,7 @@ public class UnsafeFixLengthColumnPage extends ColumnPage {
 
   @Override
   public float[] getFloatPage() {
-float[] data = new float[getPageSize()];
+float[] data = new float[getEndLoop()];
 for (long i = 0; i < data.length; i++) {
   long offset = i << floatBits;
   data[(int)i] = CarbonUnsafe.getUnsafe().getFloat(baseAddress, baseOffset 
+ offset);
@@ -369,7 +368,7 @@ public class UnsafeFixLengthColumnPage extends ColumnPage {
 
   @Override
   public double[] getDoublePage() {
-double[] data = new double[getPageSize()];
+double[] data = new double[getEndLoop()];
 for (long i = 0; i < data.length; i++) {
   long offset = i << doubleBits;
   data[(int)i] = CarbonUnsafe.getUnsafe().getDouble(baseAddress, 
baseOffset + offset);
@@ -541,26 +540,6 @@ public class UnsafeFixLengthColumnPage extends ColumnPage {
 return totalLength;
   }
 
-  @Override public byte[] compress(Compressor compressor) throws 
MemoryException, IOException {
-if (UnsafeMemoryManager.isOffHeap() && compressor.supportUnsafe()) {
-  // use raw compression and copy to byte[]
-  int inputSize = totalLength;
-  long compressedMaxSize = compressor.maxCompressedLength(inputSize);
-  MemoryBlock compressed =
-  UnsafeMemoryManager.allocateMemoryWithRetry(taskId, 
compressedMaxSize);
-  long outSize = compressor.rawCompress(baseOffset, inputSize, 
compressed.getBaseOffset());
-  assert outSize < Integer.MAX_VALUE;
-  byte[] output = new byte[(int) outSize];
-  CarbonUnsafe.getUnsafe()
-  .copyMemory(compressed.getBaseObject(), compressed.getBaseOffset(), 
output,
-  CarbonUnsafe.BYTE_ARRAY_OFFSET, outSize);
-  UnsafeMemoryManager.INSTANCE.freeMemory(taskId, compressed);
-  return output;
-} else {
-  return super.compress(compressor);
-}
-  }
-
   /**
* reallocate memory if capacity length than current size + request size
*/



[carbondata] branch master updated: [CARBONDATA-3241] Refactor the requested scan columns and the projection columns

2019-01-15 Thread manishgupta88
This is an automated email from the ASF dual-hosted git repository.

manishgupta88 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
 new 86713f5  [CARBONDATA-3241] Refactor the requested scan columns and the 
projection columns
86713f5 is described below

commit 86713f505a80d2a22912b15d65aa008324ad29e4
Author: dhatchayani 
AuthorDate: Thu Jan 10 15:00:51 2019 +0530

[CARBONDATA-3241] Refactor the requested scan columns and the projection 
columns

Refactor the requested columns methods by changing both the scan list and 
the projection list together.

This closes #3062
---
 .../execution/strategy/CarbonLateDecodeStrategy.scala | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
index a23a191..0f706af 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
@@ -367,7 +367,7 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
   // In case of implicit exist we should disable vectorPushRowFilters as 
it goes in IUD flow
   // to get the positionId or tupleID
   var implicitExisted = false
-  val updatedProjects = projects.map {
+  var updatedProjects = projects.map {
   case a@Alias(s: ScalaUDF, name)
 if name.equalsIgnoreCase(CarbonCommonConstants.POSITION_ID) ||
 
name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID) =>
@@ -388,9 +388,15 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 }
   case other => other
   }
+  val updatedColumns: (Seq[Attribute], Seq[Expression]) = 
getRequestedColumns(relation,
+projectsAttr,
+filterSet,
+handledSet,
+newProjectList,
+updatedProjects)
   // Don't request columns that are only referenced by pushed filters.
-  val requestedColumns =
-getRequestedColumns(relation, projectsAttr, filterSet, handledSet, 
newProjectList)
+  val requestedColumns = updatedColumns._1
+  updatedProjects = updatedColumns._2
 
   var updateRequestedColumns =
 if (!vectorPushRowFilters && !implicitExisted && 
!hasDictionaryFilterCols
@@ -449,9 +455,10 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
   projectsAttr: Seq[Attribute],
   filterSet: AttributeSet,
   handledSet: AttributeSet,
-  newProjectList: Seq[Attribute]) = {
-(projectsAttr.to[mutable.LinkedHashSet] ++ filterSet -- handledSet)
-  .map(relation.attributeMap).toSeq ++ newProjectList
+  newProjectList: Seq[Attribute],
+  updatedProjects: Seq[Expression]): (Seq[Attribute], Seq[Expression]) = {
+((projectsAttr.to[mutable.LinkedHashSet] ++ filterSet -- handledSet)
+   .map(relation.attributeMap).toSeq ++ newProjectList, updatedProjects)
   }
 
   private def getDataSourceScan(relation: LogicalRelation,



carbondata git commit: [CARBONDATA-3223] Fixed Wrong Datasize and Indexsize calculation for old store using Show Segments

2019-01-06 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 923dab1b5 -> 72da33495


[CARBONDATA-3223] Fixed Wrong Datasize and Indexsize calculation for old store 
using Show Segments

Problem: Table Created and Loading on older version(1.1) was showing data-size 
and index-size 0B when refreshed on new version. This was
because when the data-size was coming as "null" we were not computing it, 
directly assigning 0 value to it.

Solution: Showing the old datasize and indexsize as NA.

Also refactored SetQuerySegment code for better understandability.

This closes #3047


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/72da3349
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/72da3349
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/72da3349

Branch: refs/heads/master
Commit: 72da33495362fdbf4cd0e24331ca77a1fab470f6
Parents: 923dab1
Author: manishnalla1994 
Authored: Wed Jan 2 18:00:36 2019 +0530
Committer: manishgupta88 
Committed: Mon Jan 7 11:33:06 2019 +0530

--
 .../hadoop/api/CarbonInputFormat.java   | 25 +++-
 .../org/apache/carbondata/api/CarbonStore.scala |  4 ++--
 .../org/apache/spark/sql/CarbonCountStar.scala  |  2 +-
 3 files changed, 22 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/72da3349/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
--
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
index 24691f2..26144e2 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
@@ -277,12 +277,7 @@ m filterExpression
   public static void setQuerySegment(Configuration conf, 
AbsoluteTableIdentifier identifier) {
 String dbName = 
identifier.getCarbonTableIdentifier().getDatabaseName().toLowerCase();
 String tbName = 
identifier.getCarbonTableIdentifier().getTableName().toLowerCase();
-String segmentNumbersFromProperty = CarbonProperties.getInstance()
-.getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbName + 
"." + tbName, "*");
-if (!segmentNumbersFromProperty.trim().equals("*")) {
-  CarbonInputFormat.setSegmentsToAccess(conf,
-  Segment.toSegmentList(segmentNumbersFromProperty.split(","), null));
-}
+getQuerySegmentToAccess(conf, dbName, tbName);
   }
 
   /**
@@ -827,4 +822,22 @@ m filterExpression
 }
 return projectColumns.toArray(new String[projectColumns.size()]);
   }
+
+  private static void getQuerySegmentToAccess(Configuration conf, String 
dbName, String tableName) {
+String segmentNumbersFromProperty = CarbonProperties.getInstance()
+.getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbName + 
"." + tableName, "*");
+if (!segmentNumbersFromProperty.trim().equals("*")) {
+  CarbonInputFormat.setSegmentsToAccess(conf,
+  Segment.toSegmentList(segmentNumbersFromProperty.split(","), null));
+}
+  }
+
+  /**
+   * Set `CARBON_INPUT_SEGMENTS` from property to configuration
+   */
+  public static void setQuerySegment(Configuration conf, CarbonTable 
carbonTable) {
+String tableName = carbonTable.getTableName();
+getQuerySegmentToAccess(conf, carbonTable.getDatabaseName(), tableName);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/72da3349/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
index da9d4c2..11db430 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
@@ -107,8 +107,8 @@ object CarbonStore {
 (indices.asScala.map(_.getFile_size).sum, 
FileFactory.getCarbonFile(indexPath).getSize)
   } else {
 // for batch segment, we can get the data size from table status 
file directly
-(if (load.getDataSize == null) 0L else load.getDataSize.toLong,
-  if (load.getIndexSize == null) 0L else load.getIndexSize.toLong)
+(if (load.getDataSize == null) -1L else load.getDataSize.toLong,
+  if (load.getIndexSize == null) -1L else load.getIndexSize

carbondata git commit: [CARBONDATA-3149] Documentation for alter table column rename

2019-01-04 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master bc1e94472 -> fc4d51176


[CARBONDATA-3149] Documentation for alter table column rename

Added documentation for alter table column rename

This closes #3044


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fc4d5117
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fc4d5117
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fc4d5117

Branch: refs/heads/master
Commit: fc4d51176399f6c22a745a415ecb9096a633dbc9
Parents: bc1e944
Author: akashrn5 
Authored: Wed Jan 2 12:09:05 2019 +0530
Committer: manishgupta88 
Committed: Fri Jan 4 16:52:21 2019 +0530

--
 docs/ddl-of-carbondata.md | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/fc4d5117/docs/ddl-of-carbondata.md
--
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index d1a4794..aaa2eda 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -47,7 +47,8 @@ CarbonData DDL statements are documented here,which includes:
 * [RENAME TABLE](#rename-table)
 * [ADD COLUMNS](#add-columns)
 * [DROP COLUMNS](#drop-columns)
-* [CHANGE DATA TYPE](#change-data-type)
+* [RENAME COLUMN](#change-column-nametype)
+* [CHANGE COLUMN NAME/TYPE](#change-column-nametype)
 * [MERGE INDEXES](#merge-index)
 * [SET/UNSET Local Dictionary 
Properties](#set-and-unset-for-local-dictionary-properties)
   * [DROP TABLE](#drop-table)
@@ -681,13 +682,13 @@ Users can specify which columns to include and exclude 
for local dictionary gene
 
  **NOTE:** Drop Complex child column is not supported.
 
-   - # CHANGE DATA TYPE
+   - # CHANGE COLUMN NAME/TYPE

- This command is used to change the data type from INT to BIGINT or 
decimal precision from lower to higher.
+ This command is used to change column name and the data type from INT to 
BIGINT or decimal precision from lower to higher.
  Change of decimal data type from lower precision to higher precision will 
only be supported for cases where there is no data loss.
 
  ```
- ALTER TABLE [db_name.]table_name CHANGE col_name col_name 
changed_column_type
+ ALTER TABLE [db_name.]table_name CHANGE col_old_name col_new_name 
column_type
  ```
 
  Valid Scenarios
@@ -695,10 +696,10 @@ Users can specify which columns to include and exclude 
for local dictionary gene
  - Valid scenario - Change of decimal precision from (10,2) to (12,3) is 
valid as the total number of digits are increased by 2 but scale is increased 
only by 1 which will not lead to any data loss.
  - **NOTE:** The allowed range is 38,38 (precision, scale) and is a valid 
upper case scenario which is not resulting in data loss.
 
- Example1:Changing data type of column a1 from INT to BIGINT.
+ Example1:Change column a1's name to a2 and its data type from INT to 
BIGINT.
 
  ```
- ALTER TABLE test_db.carbon CHANGE a1 a1 BIGINT
+ ALTER TABLE test_db.carbon CHANGE a1 a2 BIGINT
  ```
  
  Example2:Changing decimal precision of column a1 from 10 to 18.
@@ -707,6 +708,13 @@ Users can specify which columns to include and exclude for 
local dictionary gene
  ALTER TABLE test_db.carbon CHANGE a1 a1 DECIMAL(18,2)
  ```
 
+ Example3:Change column a3's name to a4.
+
+ ```
+ ALTER TABLE test_db.carbon CHANGE a3 a4 STRING
+ ```
+
+ **NOTE:** Once the column is renamed, user has to take care about 
replacing the fileheader with the new name or changing the column header in csv 
file.
 - # MERGE INDEX
 
  This command is used to merge all the CarbonData index files 
(.carbonindex) inside a segment to a single CarbonData index merge file 
(.carbonindexmerge). This enhances the first query performance.



carbondata git commit: [CARBONDATA-3202]update the schema to session catalog after add column, drop column and column rename

2018-12-30 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 3e4638b33 -> b0733ecbf


[CARBONDATA-3202]update the schema to session catalog after add column, drop 
column and column rename

Problem:

1. For alter table rename, once we change the table name in carbon, we fire 
alter table rename DDL using hive client. But for add, drop and column
rename Spark does not support there features, but hive supports. so after 
rename, or add or drop column, the new updated schema is not updated in catalog.
2. after column rename column comment is not getting copied to renamed column

Solution:

1. We can directly call the spark API alterTableDataSchema by passing the 
updated schema, which in turn updates the shema in sessioncatalog. Since
this API is supported from spark2.1 onward, codes changes will be for spark 2.2 
and spark2.3, behavior with spark2.1 remains the same.
2. while updating the catalog schema, if column has comment, put in column 
metadata

This closes #3027


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b0733ecb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b0733ecb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b0733ecb

Branch: refs/heads/master
Commit: b0733ecbf380d7956dee57a9048dd7537620744e
Parents: 3e4638b
Author: akashrn5 
Authored: Thu Dec 27 11:31:44 2018 +0530
Committer: manishgupta88 
Committed: Mon Dec 31 08:50:22 2018 +0530

--
 .../ThriftWrapperSchemaConverterImpl.java   |  1 +
 .../sql/hive/CarbonInMemorySessionState.scala   | 15 +++---
 .../spark/sql/hive/CarbonSessionState.scala | 55 +---
 .../spark/sql/hive/CarbonSessionUtil.scala  | 55 +++-
 .../CarbonAlterTableAddColumnCommand.scala  | 12 +++--
 ...terTableColRenameDataTypeChangeCommand.scala | 24 ++---
 .../CarbonAlterTableDropColumnCommand.scala |  9 ++--
 .../spark/sql/hive/CarbonSessionCatalog.scala   | 26 ++---
 .../org/apache/spark/util/AlterTableUtil.scala  | 16 ++
 .../spark/sql/hive/CarbonSessionState.scala | 31 ++-
 .../restructure/AlterTableRevertTestCase.scala  |  1 -
 .../AlterTableColumnRenameTestCase.scala| 11 +++-
 12 files changed, 158 insertions(+), 98 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b0733ecb/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java
index 13f592f..dca7fa2 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java
@@ -548,6 +548,7 @@ public class ThriftWrapperSchemaConverterImpl implements 
SchemaConverter {
   if (sortColumns != null) {
 wrapperColumnSchema.setSortColumn(true);
   }
+  
wrapperColumnSchema.setColumnProperties(externalColumnSchema.getColumnProperties());
 }
 
wrapperColumnSchema.setFunction(externalColumnSchema.getAggregate_function());
 List 
parentColumnTableRelation =

http://git-wip-us.apache.org/repos/asf/carbondata/blob/b0733ecb/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
--
diff --git 
a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
 
b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
index ba6aae5..da60fb0 100644
--- 
a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
+++ 
b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.parser.CarbonSparkSqlParser
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.{CarbonEnv, SparkSession}
 
+import org.apache.carbondata.core.metadata.schema.table.column.{ColumnSchema 
=> ColumnSchema}
 import org.apache.carbondata.core.util.CarbonUtil
 import org.apache.carbondata.core.util.path.CarbonTablePath
 import org.apache.carbondata.format.TableInfo
@@ -79,15 +80,13 @@ class InMemorySessionCatalog(
 
   override def alterTable(tableIdentifier: TableIdentifier,
   schemaParts: String,
-  cols: 
Option[Seq[org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema]])
-  : Unit = {
+

carbondata git commit: [CARBONDATA-3196] [CARBONDATA-3203]Fixed Compaction for Complex types with Dictionary Include and also supported Compaction for restructured table

2018-12-28 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master f5c1b7bbd -> 7c4e79fca


[CARBONDATA-3196] [CARBONDATA-3203]Fixed Compaction for Complex types with 
Dictionary Include and also supported Compaction for restructured table

Problem1: Compaction Failing for Complex datatypes with Dictionary Include as 
KeyGenenrator was not being set in model for Dictionary Include Complex
Columns and dictionary include complex columns were not handled for finding 
cardinality.

Solution: Handled both these issues by setting KeyGenerator and storing 
cardinality of Complex dictionary include columns.

Problem2: Compaction was failing for restructured table containing dictionary 
include complex columns.

Solution: Handled complex columns for this case by inserting correct indices of 
the columns.

This closes #3022


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7c4e79fc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7c4e79fc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7c4e79fc

Branch: refs/heads/master
Commit: 7c4e79fca8e6aac044bbadaf1210b1be2f3b8a8e
Parents: f5c1b7b
Author: manishnalla1994 
Authored: Mon Dec 24 17:37:36 2018 +0530
Committer: manishgupta88 
Committed: Fri Dec 28 17:27:13 2018 +0530

--
 .../core/scan/wrappers/ByteArrayWrapper.java|  4 ++
 .../src/test/resources/structofarray.csv| 10 +++
 .../complexType/TestCompactionComplexType.scala | 65 
 .../loading/CarbonDataLoadConfiguration.java| 32 +-
 .../merger/CompactionResultSortProcessor.java   | 14 -
 .../sort/sortdata/SortParameters.java   |  2 +-
 .../sort/sortdata/TableFieldStat.java   | 34 +-
 .../store/CarbonFactDataHandlerModel.java   | 31 +++---
 .../store/CarbonFactHandlerFactory.java |  1 -
 .../util/CarbonDataProcessorUtil.java   | 33 ++
 10 files changed, 171 insertions(+), 55 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/7c4e79fc/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java
index 65f29d4..1b903f7 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java
@@ -81,6 +81,10 @@ public class ByteArrayWrapper implements 
Comparable, Serializa
 return this.noDictionaryKeys[index];
   }
 
+  public byte[] getComplexKeyByIndex(int index) {
+return this.complexTypesKeys[index];
+  }
+
   /**
* to get the no dictionary column data
*

http://git-wip-us.apache.org/repos/asf/carbondata/blob/7c4e79fc/integration/spark-common-test/src/test/resources/structofarray.csv
--
diff --git a/integration/spark-common-test/src/test/resources/structofarray.csv 
b/integration/spark-common-test/src/test/resources/structofarray.csv
new file mode 100644
index 000..ef21b44
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/structofarray.csv
@@ -0,0 +1,10 @@
+Cust,2015,1,20,M,SSC,Y,123456789$2015-01-01  
00:00:00$100&3000$100.123&3000.234$United Kingdom$2015-01-01  
00:00:00&2014-01-01  00:00:00,42,104,160,325046028.8,859616748.6
+Cust0001,2015,1,30,F,Degree,N,123456790$2015-01-02  
00:00:00$101&3000$101.123&3001.234$United States$2015-01-02  
00:00:00&2014-01-02  00:00:00,141,181,54,378476092.1,818599132.6
+Cust0002,2015,1,40,M,graduation,D,123456791$2015-01-03  
00:00:00$102&3000$102.123&3002.234$United States$2015-01-03  
00:00:00&2014-01-03  00:00:00,138,43,175,408335001.4,906020942.6
+Cust0003,2015,1,50,F,PG,Y,123456792$2015-01-04  
00:00:00$103&3000$103.123&3003.234$Australia$2015-01-04  
00:00:00&2014-01-04  00:00:00,96,63,184,493146274.5,556184083.3
+Cust0004,2015,1,60,M,MS,N,123456793$2015-01-05  
00:00:00$104&3000$104.123&3004.234$United States$2015-01-05  
00:00:00&2014-01-05  00:00:00,115,172,165,457941392.3,641744932.5
+Cust0005,2015,1,70,F,Doctor,D,123456794$2015-01-06  
00:00:00$105&3000$105.123&3005.234$United States$2015-01-06  
00:00:00&2014-01-06  00:00:00,178,192,178,112452170.2,502438883.3
+Cust0006,2015,1,80,M,Layer,Y,123456795$2015-01-07  
00:00:00$106&3000$106.123&3006.234$United States$2015-01-07  
00:00:00&2014-01-07  00:00:00,172,194,49,943273831.2,37711205.

carbondata git commit: [CARBONDATA-3017] Map DDL Support

2018-12-14 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master ebdd5486e -> 90f63a0cc


[CARBONDATA-3017] Map DDL Support

Support Create DDL for Map type.

This closes #2980


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/90f63a0c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/90f63a0c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/90f63a0c

Branch: refs/heads/master
Commit: 90f63a0cc7e0ba11b9a850cbd19a1a0dd3212e4e
Parents: ebdd548
Author: manishnalla1994 
Authored: Tue Oct 16 15:18:08 2018 +0530
Committer: manishgupta88 
Committed: Fri Dec 14 22:37:42 2018 +0530

--
 .../apache/carbondata/core/util/CarbonUtil.java |   4 +
 .../hadoop/api/CarbonTableOutputFormat.java |  17 +-
 .../TestCreateDDLForComplexMapType.scala| 445 +++
 .../LocalDictionarySupportLoadTableTest.scala   |  17 +
 .../spark/rdd/NewCarbonDataLoadRDD.scala|  10 +-
 .../carbondata/spark/util/CarbonScalaUtil.scala |   1 +
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |  43 +-
 .../streaming/CarbonAppendableStreamSink.scala  |   9 +-
 .../spark/rdd/CarbonDataRDDFactory.scala|   4 +-
 .../CarbonAlterTableCompactionCommand.scala |   6 +
 .../management/CarbonLoadDataCommand.scala  |   6 +-
 .../table/CarbonCreateTableCommand.scala|   2 +-
 .../spark/util/AllDictionaryTestCase.scala  |   4 +-
 .../util/ExternalColumnDictionaryTestCase.scala |   4 +-
 .../TestStreamingTableWithRowParser.scala   |   3 +-
 .../loading/ComplexDelimitersEnum.java  |  39 ++
 .../loading/DataLoadProcessBuilder.java |   7 +-
 .../loading/model/CarbonLoadModel.java  |  38 +-
 .../loading/model/CarbonLoadModelBuilder.java   |  11 +-
 .../processing/loading/model/LoadOption.java|  17 +-
 .../loading/parser/CarbonParserFactory.java |  25 +-
 .../loading/parser/impl/ArrayParserImpl.java|   6 +-
 .../loading/parser/impl/MapParserImpl.java  |  60 +++
 .../loading/parser/impl/RowParserImpl.java  |   8 +-
 .../sdk/file/CarbonWriterBuilder.java   |   1 +
 .../streaming/parser/RowStreamParserImp.scala   |   2 +
 26 files changed, 715 insertions(+), 74 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/90f63a0c/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index ac52728..fc4704e 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -597,6 +597,10 @@ public final class CarbonUtil {
*/
   public static String delimiterConverter(String delimiter) {
 switch (delimiter) {
+  case "\\001":
+  case "\\002":
+  case "\\003":
+  case "\\004":
   case "|":
   case "*":
   case ".":

http://git-wip-us.apache.org/repos/asf/carbondata/blob/90f63a0c/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java
--
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java
index dbd2f0e..16486d0 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java
@@ -37,6 +37,7 @@ import org.apache.carbondata.core.util.CarbonThreadFactory;
 import org.apache.carbondata.core.util.ObjectSerializationUtil;
 import org.apache.carbondata.core.util.ThreadLocalSessionInfo;
 import org.apache.carbondata.hadoop.internal.ObjectArrayWritable;
+import org.apache.carbondata.processing.loading.ComplexDelimitersEnum;
 import org.apache.carbondata.processing.loading.DataLoadExecutor;
 import org.apache.carbondata.processing.loading.TableProcessingOperations;
 import 
org.apache.carbondata.processing.loading.iterator.CarbonOutputIteratorWrapper;
@@ -338,11 +339,19 @@ public class CarbonTableOutputFormat extends 
FileOutputFormat 1) {
-  model.setComplexDelimiterLevel2(split[1]);
+model.setComplexDelimiter(split[0]);
+if (split.length > 2) {
+  model.setComplexDelimiter(split[1]);
+  model.setComplexDelimiter(split[2]);
+} else if (split.length > 1) {
+  model.setComplexDelimiter(split[1]);
 }
 model.setDateFormat(
 conf.get(

http://git-wip-us.apache.org/repos/asf/carbondata/blob/90f63a0c/integration/spa

carbondata git commit: [CARBONDATA-3134] fixed null values when cachelevel is set as blocklet

2018-11-28 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 295734cc8 -> a5f080b67


[CARBONDATA-3134] fixed null values when cachelevel is set as blocklet

Problem:
For each blocklet an object of SegmentPropertiesAndSchemaHolder is created to 
store the schema used for query. This object is created only if no other 
blocklet has the same schema. To check the schema we are comparing 
List, as the equals method in ColumnSchema does not check for 
columnUniqueId therefore this check is failing and the new restructured 
blocklet is using the schema of the old blocklet. Due to this the newly added 
column is being ignored as the old blocklet schema specifies that the column is 
delete(alter drop).

Solution:
Instead of checking the equality through equals and hashcode, write a new 
implementation for both and check based on columnUniqueId.

This closes #2956


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a5f080b6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a5f080b6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a5f080b6

Branch: refs/heads/master
Commit: a5f080b6752a7a789463455edbbe4b888f6694e3
Parents: 295734c
Author: kunal642 
Authored: Tue Nov 27 14:13:27 2018 +0530
Committer: manishgupta88 
Committed: Wed Nov 28 15:43:24 2018 +0530

--
 .../block/SegmentPropertiesAndSchemaHolder.java | 40 ++--
 .../schema/table/column/ColumnSchema.java   |  4 ++
 .../StandardPartitionTableQueryTestCase.scala   |  2 +-
 3 files changed, 42 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
index 1b7e1f8..6f9a93d 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
@@ -18,6 +18,8 @@ package org.apache.carbondata.core.datastore.block;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -332,13 +334,45 @@ public class SegmentPropertiesAndSchemaHolder {
   }
   SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper other =
   (SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper) obj;
-  return tableIdentifier.equals(other.tableIdentifier) && columnsInTable
-  .equals(other.columnsInTable) && Arrays
+  return tableIdentifier.equals(other.tableIdentifier) && 
checkColumnSchemaEquality(
+  columnsInTable, other.columnsInTable) && Arrays
   .equals(columnCardinality, other.columnCardinality);
 }
 
+private boolean checkColumnSchemaEquality(List obj1, 
List obj2) {
+  if (obj1 == null || obj2 == null || (obj1.size() != obj2.size())) {
+return false;
+  }
+  List clonedObj1 = new ArrayList<>(obj1);
+  List clonedObj2 = new ArrayList<>(obj2);
+  clonedObj1.addAll(obj1);
+  clonedObj2.addAll(obj2);
+  sortList(clonedObj1);
+  sortList(clonedObj2);
+  boolean exists = true;
+  for (int i = 0; i < obj1.size(); i++) {
+if (!clonedObj1.get(i).equalsWithStrictCheck(clonedObj2.get(i))) {
+  exists = false;
+  break;
+}
+  }
+  return exists;
+}
+
+private void sortList(List columnSchemas) {
+  Collections.sort(columnSchemas, new Comparator() {
+@Override public int compare(ColumnSchema o1, ColumnSchema o2) {
+  return o1.getColumnUniqueId().compareTo(o2.getColumnUniqueId());
+}
+  });
+}
+
 @Override public int hashCode() {
-  return tableIdentifier.hashCode() + columnsInTable.hashCode() + Arrays
+  int allColumnsHashCode = 0;
+  for (ColumnSchema columnSchema: columnsInTable) {
+allColumnsHashCode = allColumnsHashCode + 
columnSchema.strictHashCode();
+  }
+  return tableIdentifier.hashCode() + allColumnsHashCode + Arrays
   .hashCode(columnCardinality);
 }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/

[1/2] carbondata git commit: [CARBONDATA-3113] Fixed Local Dictionary Query Performance and Added reusable buffer for direct flow

2018-11-21 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 647bfbaea -> d79ba999f


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
index e5312f3..51dfbf2 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
@@ -308,7 +308,8 @@ public abstract class ColumnPage {
 
   private static ColumnPage newDecimalPage(ColumnPageEncoderMeta meta,
   byte[] lvEncodedByteArray) throws MemoryException {
-return VarLengthColumnPageBase.newDecimalColumnPage(meta, 
lvEncodedByteArray);
+return VarLengthColumnPageBase
+.newDecimalColumnPage(meta, lvEncodedByteArray, 
lvEncodedByteArray.length);
   }
 
   private static ColumnPage newLVBytesPage(TableSpec.ColumnSpec columnSpec,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
index 9bed89f..1de8201 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java
@@ -88,7 +88,8 @@ public class DecoderBasedFallbackEncoder implements 
Callablehttp://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
index 81bb1b5..0f409f6 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
@@ -125,7 +125,7 @@ public abstract class VarLengthColumnPageBase extends 
ColumnPage {
* Create a new column page for decimal page
*/
   public static ColumnPage newDecimalColumnPage(ColumnPageEncoderMeta meta,
-  byte[] lvEncodedBytes) throws MemoryException {
+  byte[] lvEncodedBytes, int actualDataLength) throws MemoryException {
 TableSpec.ColumnSpec columnSpec = meta.getColumnSpec();
 DecimalConverterFactory.DecimalConverter decimalConverter =
 
DecimalConverterFactory.INSTANCE.getDecimalConverter(columnSpec.getPrecision(),
@@ -137,7 +137,7 @@ public abstract class VarLengthColumnPageBase extends 
ColumnPage {
   CarbonCommonConstants.INT_SIZE_IN_BYTE, meta.getCompressorName());
 } else {
   // Here the size is always fixed.
-  return getDecimalColumnPage(meta, lvEncodedBytes, size);
+  return getDecimalColumnPage(meta, lvEncodedBytes, size, 
actualDataLength);
 }
   }
 
@@ -160,7 +160,7 @@ public abstract class VarLengthColumnPageBase extends 
ColumnPage {
   }
 
   private static ColumnPage getDecimalColumnPage(ColumnPageEncoderMeta meta,
-  byte[] lvEncodedBytes, int size) throws MemoryException {
+  byte[] lvEncodedBytes, int size, int actualDataLength) throws 
MemoryException {
 TableSpec.ColumnSpec columnSpec = meta.getColumnSpec();
 String compressorName = meta.getCompressorName();
 TableSpec.ColumnSpec spec = TableSpec.ColumnSpec
@@ -171,7 +171,7 @@ public abstract class VarLengthColumnPageBase extends 
ColumnPage {
 int offset;
 int rowId = 0;
 int counter = 0;
-for (offset = 0; offset < lvEncodedBytes.length; offset += size) {
+for (offset = 0; offset < actualDataLength; offset += size) {
   rowOffset.putInt(counter, offset);
   rowId++;
   counter++;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java
index 6f36c08..b5dc502 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java
+++ 

[2/2] carbondata git commit: [CARBONDATA-3113] Fixed Local Dictionary Query Performance and Added reusable buffer for direct flow

2018-11-21 Thread manishgupta88
[CARBONDATA-3113] Fixed Local Dictionary Query Performance and Added reusable 
buffer for direct flow

Following optimizations done in the PR.

1. Added reusable buffer for direct flow In query for each page each column it 
is creating a byte array, when number of columns are high it is
causing lots of minor gc and degrading query performance, as each page is 
getting uncompressed one by one we can
use same buffer for all the columns and based on requested size it will resize.

2. Fixed Local Dictionary performance issue.Reverted back #2895 and fixed NPE 
issue by setting null for local
dictionary to vector In safe and Unsafe VariableLengthDataChunkStore

This closes #2872


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d79ba999
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d79ba999
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d79ba999

Branch: refs/heads/master
Commit: d79ba999f2f683da4be7554a8aba67b9dac01975
Parents: 647bfba
Author: kumarvishal09 
Authored: Sun Oct 28 20:39:05 2018 +0530
Committer: manishgupta88 
Committed: Wed Nov 21 14:45:00 2018 +0530

--
 .../core/datastore/ReusableDataBuffer.java  | 55 
 .../chunk/impl/DimensionRawColumnChunk.java | 15 +++---
 .../impl/FixedLengthDimensionColumnPage.java| 16 +++---
 .../chunk/impl/MeasureRawColumnChunk.java   | 15 +++---
 .../impl/VariableLengthDimensionColumnPage.java | 16 +++---
 .../reader/DimensionColumnChunkReader.java  |  6 ++-
 .../chunk/reader/MeasureColumnChunkReader.java  | 10 ++--
 .../reader/dimension/AbstractChunkReader.java   |  4 +-
 ...mpressedDimensionChunkFileBasedReaderV1.java | 28 ++
 ...mpressedDimensionChunkFileBasedReaderV2.java | 26 ++---
 ...essedDimChunkFileBasedPageLevelReaderV3.java |  7 ++-
 ...mpressedDimensionChunkFileBasedReaderV3.java | 54 ---
 .../measure/AbstractMeasureChunkReader.java |  7 +--
 ...CompressedMeasureChunkFileBasedReaderV1.java |  5 +-
 ...CompressedMeasureChunkFileBasedReaderV2.java | 12 +++--
 ...CompressedMeasureChunkFileBasedReaderV3.java | 34 ++--
 ...essedMsrChunkFileBasedPageLevelReaderV3.java |  6 ++-
 .../chunk/store/DimensionChunkStoreFactory.java | 18 ---
 .../impl/LocalDictDimensionDataChunkStore.java  | 17 --
 .../safe/AbstractNonDictionaryVectorFiller.java | 23 +---
 ...ariableIntLengthDimensionDataChunkStore.java |  5 +-
 ...feVariableLengthDimensionDataChunkStore.java | 19 ---
 ...iableShortLengthDimensionDataChunkStore.java |  5 +-
 .../UnsafeAbstractDimensionDataChunkStore.java  |  4 +-
 ...nsafeFixedLengthDimensionDataChunkStore.java |  4 +-
 ...ariableIntLengthDimensionDataChunkStore.java |  4 +-
 ...feVariableLengthDimensionDataChunkStore.java |  5 +-
 ...iableShortLengthDimensionDataChunkStore.java |  4 +-
 .../core/datastore/columnar/UnBlockIndexer.java |  4 +-
 .../compression/AbstractCompressor.java |  3 ++
 .../core/datastore/compression/Compressor.java  |  6 +++
 .../datastore/compression/SnappyCompressor.java | 20 +++
 .../datastore/compression/ZstdCompressor.java   |  8 +++
 .../core/datastore/page/ColumnPage.java |  3 +-
 .../page/DecoderBasedFallbackEncoder.java   |  3 +-
 .../datastore/page/VarLengthColumnPageBase.java |  8 +--
 .../page/encoding/ColumnPageDecoder.java|  7 ++-
 .../adaptive/AdaptiveDeltaFloatingCodec.java| 19 ---
 .../adaptive/AdaptiveDeltaIntegralCodec.java| 22 +---
 .../adaptive/AdaptiveFloatingCodec.java | 22 +---
 .../adaptive/AdaptiveIntegralCodec.java | 20 ---
 .../encoding/compress/DirectCompressCodec.java  | 23 +---
 .../datastore/page/encoding/rle/RLECodec.java   | 10 ++--
 .../executor/impl/AbstractQueryExecutor.java| 45 +---
 .../scan/executor/infos/BlockExecutionInfo.java | 21 
 .../core/scan/result/BlockletScannedResult.java | 23 +---
 .../scan/result/vector/CarbonDictionary.java|  4 ++
 .../vector/impl/CarbonDictionaryImpl.java   | 10 
 .../core/scan/scanner/LazyPageLoader.java   | 10 ++--
 .../impl/FixedLengthDimensionDataChunkTest.java |  3 +-
 .../executer/IncludeFilterExecuterImplTest.java |  4 +-
 .../carbondata/core/util/CarbonUtilTest.java| 32 ++--
 .../dataload/TestLoadDataWithCompression.scala  | 15 ++
 .../VectorizedCarbonRecordReader.java   |  1 -
 .../apache/carbondata/tool/ScanBenchmark.java   |  4 +-
 55 files changed, 546 insertions(+), 228 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/ReusableDataBuffer.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore

[2/3] carbondata git commit: [CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess

2018-11-20 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bed51ba7/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
index fd94344..7b7c0b6 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java
@@ -23,12 +23,13 @@ import java.util.BitSet;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.carbondata.core.datastore.TableSpec;
 import org.apache.carbondata.core.datastore.compression.Compressor;
 import org.apache.carbondata.core.datastore.compression.CompressorFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.datastore.page.ColumnPageValueConverter;
-import org.apache.carbondata.core.datastore.page.DecimalColumnPage;
 import org.apache.carbondata.core.datastore.page.LazyColumnPage;
+import org.apache.carbondata.core.datastore.page.VarLengthColumnPageBase;
 import org.apache.carbondata.core.datastore.page.encoding.ColumnPageCodec;
 import org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder;
 import org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder;
@@ -105,17 +106,32 @@ public class DirectCompressCodec implements 
ColumnPageCodec {
 
   @Override
   public void decodeAndFillVector(byte[] input, int offset, int length,
-  ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded)
+  ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded, 
int pageSize)
   throws MemoryException, IOException {
-ColumnPage decodedPage;
+Compressor compressor =
+
CompressorFactory.getInstance().getCompressor(meta.getCompressorName());
+byte[] unCompressData = compressor.unCompressByte(input, offset, 
length);
 if (DataTypes.isDecimal(dataType)) {
-  decodedPage = ColumnPage.decompressDecimalPage(meta, input, offset, 
length);
-  vectorInfo.decimalConverter = ((DecimalColumnPage) 
decodedPage).getDecimalConverter();
+  TableSpec.ColumnSpec columnSpec = meta.getColumnSpec();
+  DecimalConverterFactory.DecimalConverter decimalConverter =
+  DecimalConverterFactory.INSTANCE
+  .getDecimalConverter(columnSpec.getPrecision(), 
columnSpec.getScale());
+  vectorInfo.decimalConverter = decimalConverter;
+  if (DataTypes.isDecimal(meta.getStoreDataType())) {
+ColumnPage decimalColumnPage =
+VarLengthColumnPageBase.newDecimalColumnPage(meta, 
unCompressData);
+decimalConverter.fillVector(decimalColumnPage.getByteArrayPage(), 
pageSize, vectorInfo,
+nullBits, meta.getStoreDataType());
+  } else {
+converter
+.decodeAndFillVector(unCompressData, vectorInfo, nullBits, 
meta.getStoreDataType(),
+pageSize);
+  }
 } else {
-  decodedPage = ColumnPage.decompress(meta, input, offset, length, 
isLVEncoded);
+  converter
+  .decodeAndFillVector(unCompressData, vectorInfo, nullBits, 
meta.getStoreDataType(),
+  pageSize);
 }
-decodedPage.setNullBits(nullBits);
-converter.decodeAndFillVector(decodedPage, vectorInfo);
   }
 
   @Override public ColumnPage decode(byte[] input, int offset, int length, 
boolean isLVEncoded)
@@ -203,17 +219,15 @@ public class DirectCompressCodec implements 
ColumnPageCodec {
 }
 
 @Override
-public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo 
vectorInfo) {
+public void decodeAndFillVector(byte[] pageData, ColumnVectorInfo 
vectorInfo, BitSet nullBits,
+DataType pageDataType, int pageSize) {
   CarbonColumnVector vector = vectorInfo.vector;
-  BitSet nullBits = columnPage.getNullBits();
   DataType vectorDataType = vector.getType();
-  DataType pageDataType = columnPage.getDataType();
-  int pageSize = columnPage.getPageSize();
   BitSet deletedRows = vectorInfo.deletedRows;
   vector = ColumnarVectorWrapperDirectFactory
   .getDirectVectorWrapperFactory(vector, vectorInfo.invertedIndex, 
nullBits, deletedRows,
   true, false);
-  fillVector(columnPage, vector, vectorDataType, pageDataType, pageSize, 
vectorInfo);
+  fillVector(pageData, vector, vectorDataType, pageDataType, pageSize, 
vectorInfo, nullBits);
   if (deletedRows == null || deletedRows.isEmpty()) {
 for (int i = nullBits.nextSetBit(0); i >= 0; i = 

[1/3] carbondata git commit: [CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess

2018-11-20 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 51b10ba70 -> bed51ba77


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bed51ba7/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
index b4dd1b1..16763d3 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
@@ -303,6 +303,10 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 // applying the filter in spark's side. So we should disable 
vectorPushRowFilters option
 // in case of filters on global dictionary.
 val hasDictionaryFilterCols = hasFilterOnDictionaryColumn(filterSet, table)
+
+// In case of more dictionary columns spark code gen needs generate lot of 
code and that slows
+// down the query, so we limit the direct fill in case of more dictionary 
columns.
+val hasMoreDictionaryCols = 
hasMoreDictionaryColumnsOnProjection(projectSet, table)
 val vectorPushRowFilters = 
CarbonProperties.getInstance().isPushRowFiltersForVector
 if (projects.map(_.toAttribute) == projects &&
 projectSet.size == projects.size &&
@@ -342,7 +346,7 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 updateRequestedColumns.asInstanceOf[Seq[Attribute]])
   // Check whether spark should handle row filters in case of vector flow.
   if (!vectorPushRowFilters && scan.isInstanceOf[CarbonDataSourceScan]
-  && !hasDictionaryFilterCols) {
+  && !hasDictionaryFilterCols && !hasMoreDictionaryCols) {
 // Here carbon only do page pruning and row level pruning will be done 
by spark.
 scan.inputRDDs().head match {
   case rdd: CarbonScanRDD[InternalRow] =>
@@ -386,7 +390,8 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 (projectSet ++ filterSet -- 
handledSet).map(relation.attributeMap).toSeq ++ newProjectList
 
   var updateRequestedColumns =
-if (!vectorPushRowFilters && !implictsExisted && 
!hasDictionaryFilterCols) {
+if (!vectorPushRowFilters && !implictsExisted && 
!hasDictionaryFilterCols
+&& !hasMoreDictionaryCols) {
   updateRequestedColumnsFunc(
 (projectSet ++ filterSet).map(relation.attributeMap).toSeq,
 table,
@@ -398,7 +403,8 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 supportBatchedDataSource(relation.relation.sqlContext,
   updateRequestedColumns.asInstanceOf[Seq[Attribute]]) &&
 needDecoder.isEmpty
-  if (!vectorPushRowFilters && !supportBatch && !implictsExisted && 
!hasDictionaryFilterCols) {
+  if (!vectorPushRowFilters && !supportBatch && !implictsExisted && 
!hasDictionaryFilterCols
+  && !hasMoreDictionaryCols) {
 // revert for row scan
 updateRequestedColumns = updateRequestedColumnsFunc(requestedColumns, 
table, needDecoder)
   }
@@ -414,7 +420,7 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 updateRequestedColumns.asInstanceOf[Seq[Attribute]])
   // Check whether spark should handle row filters in case of vector flow.
   if (!vectorPushRowFilters && scan.isInstanceOf[CarbonDataSourceScan]
-  && !implictsExisted && !hasDictionaryFilterCols) {
+  && !implictsExisted && !hasDictionaryFilterCols && 
!hasMoreDictionaryCols) {
 // Here carbon only do page pruning and row level pruning will be done 
by spark.
 scan.inputRDDs().head match {
   case rdd: CarbonScanRDD[InternalRow] =>
@@ -518,6 +524,18 @@ private[sql] class CarbonLateDecodeStrategy extends 
SparkStrategy {
 filterColumns.exists(c => map.get(c.name).getOrElse(false))
   }
 
+  private def hasMoreDictionaryColumnsOnProjection(projectColumns: 
AttributeSet,
+  relation: CarbonDatasourceHadoopRelation): Boolean = {
+val map = relation.carbonRelation.metaData.dictionaryMap
+var count = 0
+projectColumns.foreach{c =>
+  if (map.get(c.name).getOrElse(false)) {
+count += 1
+  }
+}
+count > CarbonCommonConstants.CARBON_ALLOW_DIRECT_FILL_DICT_COLS_LIMIT
+  }
+
   private def getPartitioning(carbonTable: CarbonTable,
   output: Seq[Attribute]): Partitioning = {
 val info: BucketingInfo = 
carbonTable.getBucketingInfo(carbonTable.getTableName)



[3/3] carbondata git commit: [CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess

2018-11-20 Thread manishgupta88
[CARBONDATA-3112] Optimise decompressing while filling the vector during 
conversion of primitive typess

Following optimizations done in the PR.

1. Optimise decompressing while filling the vector during conversion of 
primitive types. It avoids creating an intermediate buffer while decompression.
2. Refactor the global dictionary decoder codegen to minimise the amount of 
code generated to reduce the time.
3. Disable lazy load for full scan queries as it is unnecessary.
4. Refactor the compressor interface and created Abstract class. All primitive 
datatype conversions now happen in little_endian as snappy does
that conversion while compressing. So it might break the compatibility for ZSTD 
for the last version.

This closes #2863


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bed51ba7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bed51ba7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bed51ba7

Branch: refs/heads/master
Commit: bed51ba772cf0e8c5c648f620b62d2c9ba4ef9e8
Parents: 51b10ba
Author: ravipesala 
Authored: Fri Oct 26 20:50:53 2018 +0530
Committer: manishgupta88 
Committed: Wed Nov 21 12:23:57 2018 +0530

--
 .../core/constants/CarbonCommonConstants.java   |   6 +
 ...mpressedDimensionChunkFileBasedReaderV3.java |   2 +-
 ...CompressedMeasureChunkFileBasedReaderV3.java |   2 +-
 .../safe/AbstractNonDictionaryVectorFiller.java |  47 +++--
 ...feVariableLengthDimensionDataChunkStore.java |   2 +-
 .../compression/AbstractCompressor.java | 123 
 .../datastore/compression/SnappyCompressor.java |   4 +-
 .../datastore/compression/ZstdCompressor.java   |  95 +
 .../page/ColumnPageValueConverter.java  |   6 +-
 .../datastore/page/VarLengthColumnPageBase.java |   2 +-
 .../page/encoding/ColumnPageDecoder.java|   2 +-
 .../adaptive/AdaptiveDeltaFloatingCodec.java|  74 ---
 .../adaptive/AdaptiveDeltaIntegralCodec.java| 164 ---
 .../adaptive/AdaptiveFloatingCodec.java |  73 +++
 .../adaptive/AdaptiveIntegralCodec.java | 137 +++--
 .../encoding/compress/DirectCompressCodec.java  | 146 --
 .../datastore/page/encoding/rle/RLECodec.java   |   2 +-
 .../statistics/PrimitivePageStatsCollector.java |   7 +
 .../page/statistics/StatisticsCollector.java|  66 --
 .../datatype/DecimalConverterFactory.java   |  53 +++--
 .../scan/result/vector/CarbonColumnVector.java  |   4 +
 .../scan/result/vector/CarbonDictionary.java|   2 +
 .../vector/impl/CarbonColumnVectorImpl.java |  35 +++-
 .../vector/impl/CarbonDictionaryImpl.java   |  37 
 .../AbstractCarbonColumnarVector.java   |  10 +
 ...umnarVectorWrapperDirectWithDeleteDelta.java |  10 +-
 ...erDirectWithDeleteDeltaAndInvertedIndex.java |  34 +++-
 ...narVectorWrapperDirectWithInvertedIndex.java |   9 +-
 .../apache/carbondata/core/util/ByteUtil.java   |  28 ++-
 .../presto/CarbonColumnVectorWrapper.java   |   9 +
 .../src/test/resources/IUD/negativevalue.csv|   7 +
 .../iud/UpdateCarbonTableTestCase.scala |  17 +-
 .../vectorreader/ColumnarVectorWrapper.java |  10 +
 .../ColumnarVectorWrapperDirect.java|   8 +
 .../VectorizedCarbonRecordReader.java   |  31 ++-
 .../datasources/SparkCarbonFileFormat.scala |  10 +-
 .../org/apache/spark/sql/CarbonVectorProxy.java | 156 ++-
 .../org/apache/spark/sql/CarbonVectorProxy.java | 200 ++-
 .../stream/CarbonStreamRecordReader.java|   5 +-
 .../spark/sql/CarbonDictionaryDecoder.scala | 195 --
 .../strategy/CarbonLateDecodeStrategy.scala |  26 ++-
 41 files changed, 1193 insertions(+), 663 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bed51ba7/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index b75648e..094e552 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1949,6 +1949,12 @@ public final class CarbonCommonConstants {
*/
   public static final String CARBON_WRITTEN_BY_APPNAME = 
"carbon.writtenby.app.name";
 
+  /**
+   * When more global dictionary columns are there then there is issue in 
generating codegen to them
+   * and it slows down the query.So we limit to 100 for now
+   */
+  public static final int CARBON_ALLOW_DIRECT_FILL_DICT_COLS_L

carbondata git commit: [CARBONDATA-3088][Compaction] support prefetch for compaction

2018-11-20 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master c5930527a -> 51b10ba70


[CARBONDATA-3088][Compaction] support prefetch for compaction

Current compaction performance is low. By adding logs to observe the compaction 
procedure, we found that in
`CarbonFactDataHandlerColumnar.addDataToStore(CarbonRow)`, it will wait about 
30ms before submitting a new TablePage producer. Since the method
`addDataToStore` is called in single thread, it will result the waiting every 
32000 records since it will collect 32000 records to form a TablePage.

To reduce the waiting time, we can prepare the 32000 records ahead. This an be 
achived using prefetch.

We will prepare two buffers, one will provide the records to the downstream 
(`addDataToStore`) and the other one will prepare the records
asynchronously. The first is called working buffer and the second is called 
backup buffer. Once working buffer is exhausted, the two buffers
will exchange their roles: the backup buffer will be the new working buffer and 
the old working buffer will be the new backup buffer and it
will be filled asynchronously.

Two parameters are involved for this feature:

1. carbon.detail.batch.size: This is an existed parameter and the default value 
is 100. This parameter controls the batch size of records that
return to the client. For normal query, it is OK to keep it as 100. But for 
compaction, since all the records will be operated, we suggest you
to set it to a larger value such as 32000. (32000 is the max rows for a table 
page that the down stream wants).

2. carbon.compaction.prefetch.enable: This is a new parameter and the default 
value is `false` (We may change it to `true` later). This
parameter controls whether we will prefetch the records for compation.

By using this prefetch feature, we can enhance the performance for compaction. 
More test results can be found in the PR description.

This closes #2906


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/51b10ba7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/51b10ba7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/51b10ba7

Branch: refs/heads/master
Commit: 51b10ba70e53c869d00c4552f8c03134a5f8eb4d
Parents: c593052
Author: xuchuanyin 
Authored: Mon Nov 5 15:11:09 2018 +0800
Committer: manishgupta88 
Committed: Wed Nov 21 10:17:35 2018 +0530

--
 .../scan/result/iterator/RawResultIterator.java | 199 ---
 .../carbondata/spark/rdd/StreamHandoffRDD.scala |   2 +-
 .../merger/CarbonCompactionExecutor.java|   2 +-
 3 files changed, 125 insertions(+), 78 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/51b10ba7/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
index 29d8751..1febb0b 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java
@@ -16,12 +16,21 @@
  */
 package org.apache.carbondata.core.scan.result.iterator;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
 import org.apache.carbondata.common.CarbonIterator;
 import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
 import org.apache.carbondata.core.scan.result.RowBatch;
 import org.apache.carbondata.core.scan.wrappers.ByteArrayWrapper;
+import org.apache.carbondata.core.util.CarbonProperties;
 
 import org.apache.log4j.Logger;
 
@@ -40,12 +49,14 @@ public class RawResultIterator extends 
CarbonIterator {
*/
   private CarbonIterator detailRawQueryResultIterator;
 
-  /**
-   * Counter to maintain the row counter.
-   */
-  private int counter = 0;
-
-  private Object[] currentConveretedRawRow = null;
+  private boolean prefetchEnabled;
+  private List currentBuffer;
+  private List backupBuffer;
+  private int currentIdxInBuffer;
+  private ExecutorService executorService;
+  private Future fetchFuture;
+  private Object[] currentRawRow = null;
+  private boolean isBackupFilled = false;
 
   /**
* LOGGER
@@ -53,72 +64,124 @@ public class RawResultIterator extends 
CarbonIterator {
   private static fi

carbondata git commit: [CARBONDATA-3106] WrittenbyAPI not serialized in executor with globalsort

2018-11-19 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master d4e8ba441 -> da91d4cc6


[CARBONDATA-3106] WrittenbyAPI not serialized in executor with globalsort

Problem:
Written_By_APPNAME when added in carbonproperty is not serialized in executor 
with global sort

Solution:
Add Written_by_APPNAME in hadoop conf and in executor side get it from 
configuration and add to carbonproperty

This closes #2928


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/da91d4cc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/da91d4cc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/da91d4cc

Branch: refs/heads/master
Commit: da91d4cc6805ce63aade48562a4f367442b38d4a
Parents: d4e8ba4
Author: Indhumathi27 
Authored: Fri Nov 16 21:49:16 2018 +0530
Committer: manishgupta88 
Committed: Tue Nov 20 10:34:28 2018 +0530

--
 .../spark/load/DataLoadProcessBuilderOnSpark.scala|  5 ++---
 .../spark/load/DataLoadProcessorStepOnSpark.scala |  6 +-
 .../store/writer/v3/CarbonFactDataWriterImplV3.java   | 10 +++---
 3 files changed, 14 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/da91d4cc/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
index 338180d..8ded6bd 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
@@ -66,9 +66,8 @@ object DataLoadProcessBuilderOnSpark {
 val sortStepRowCounter = sc.accumulator(0, "Sort Processor Accumulator")
 val writeStepRowCounter = sc.accumulator(0, "Write Processor Accumulator")
 
-CarbonProperties.getInstance()
-  .addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME,
-sparkSession.sparkContext.appName)
+hadoopConf
+  .set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, 
sparkSession.sparkContext.appName)
 
 val conf = SparkSQLUtil.broadCastHadoopConf(sc, hadoopConf)
 // 1. Input

http://git-wip-us.apache.org/repos/asf/carbondata/blob/da91d4cc/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala
index 0a68fb0..2ca47b3 100644
--- 
a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala
@@ -26,9 +26,10 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 
 import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.exception.CarbonDataWriterException
 import org.apache.carbondata.core.datastore.row.CarbonRow
-import org.apache.carbondata.core.util.ThreadLocalSessionInfo
+import org.apache.carbondata.core.util.{CarbonProperties, 
ThreadLocalSessionInfo}
 import org.apache.carbondata.processing.loading.{BadRecordsLogger, 
BadRecordsLoggerProvider, CarbonDataLoadConfiguration, DataLoadProcessBuilder, 
TableProcessingOperations}
 import org.apache.carbondata.processing.loading.converter.impl.RowConverterImpl
 import 
org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
@@ -228,6 +229,9 @@ object DataLoadProcessorStepOnSpark {
   modelBroadcast: Broadcast[CarbonLoadModel],
   rowCounter: Accumulator[Int],
   conf: Configuration) {
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME,
+conf.get(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME))
 ThreadLocalSessionInfo.setConfigurationToCurrentThread(conf)
 var model: CarbonLoadModel = null
 var tableName: String = null

http://git-wip-us.apache.org/repos/asf/carbondata/blob/da91d4cc/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFa

carbondata git commit: [CARBONDATA-3098] Fix for negative exponents value giving wrong results in Float datatype

2018-11-14 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master cd0ce4187 -> ceb135175


[CARBONDATA-3098] Fix for negative exponents value giving wrong results in 
Float datatype

Problem: When the value of exponent is a negative number then the data is 
incorrect due to loss of precision of Floating point values and wrong
calculation of the count of decimal points.

Solution: Handled floating point precision by converting it to double and 
counted the decimal count values as done in double datatype(using Big Decimal).

This closes #2918


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ceb13517
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ceb13517
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ceb13517

Branch: refs/heads/master
Commit: ceb13517553e729ccb408d95ae7ae401f7aebcb8
Parents: cd0ce41
Author: Manish Nalla 
Authored: Wed Nov 14 10:57:49 2018 +0530
Committer: manishgupta88 
Committed: Wed Nov 14 14:03:55 2018 +0530

--
 .../encoding/adaptive/AdaptiveFloatingCodec.java | 14 +-
 .../page/statistics/PrimitivePageStatsCollector.java | 14 +-
 .../datasource/SparkCarbonDataSourceTest.scala   | 15 +++
 3 files changed, 17 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb13517/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java
index 49696eb..b04c9df 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java
@@ -167,19 +167,7 @@ public class AdaptiveFloatingCodec extends AdaptiveCodec {
 
 @Override
 public void encode(int rowId, float value) {
-  if (targetDataType == DataTypes.BYTE) {
-encodedPage.putByte(rowId, (byte) (value * floatFactor));
-  } else if (targetDataType == DataTypes.SHORT) {
-encodedPage.putShort(rowId, (short) (value * floatFactor));
-  } else if (targetDataType == DataTypes.SHORT_INT) {
-encodedPage.putShortInt(rowId, (int) (value * floatFactor));
-  } else if (targetDataType == DataTypes.INT) {
-encodedPage.putInt(rowId, (int) (value * floatFactor));
-  } else if (targetDataType == DataTypes.LONG) {
-encodedPage.putLong(rowId, (long) (value * floatFactor));
-  } else {
-throw new RuntimeException("internal error: " + debugInfo());
-  }
+  encode(rowId, (double) value);
 }
 
 @Override

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb13517/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
index 9be5a58..e604057 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
@@ -253,19 +253,7 @@ public class PrimitivePageStatsCollector implements 
ColumnPageStatsCollector, Si
   }
 
   private int getDecimalCount(float value) {
-int decimalPlaces = 0;
-try {
-  String strValue = Float.valueOf(Math.abs(value)).toString();
-  int integerPlaces = strValue.indexOf('.');
-  if (-1 != integerPlaces) {
-decimalPlaces = strValue.length() - integerPlaces - 1;
-  }
-} catch (NumberFormatException e) {
-  if (!Double.isInfinite(value)) {
-throw e;
-  }
-}
-return decimalPlaces;
+return getDecimalCount((double) value);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb13517/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
--
diff --git 
a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
 
b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
index 1e58a9e..5

carbondata git commit: [CARBONDATA-3081] Fixed NPE for boolean type column with null value

2018-11-13 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master d8dfa4f21 -> 07943cec0


[CARBONDATA-3081] Fixed NPE for boolean type column with null value

Problem: NPE thrown when boolean type column has null values.

Solution: check for null values before converting byte to boolean.

This closes #2901


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/07943cec
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/07943cec
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/07943cec

Branch: refs/heads/master
Commit: 07943cec0eb17d7b0635083aac5c59e10dbcf03c
Parents: d8dfa4f
Author: kunal642 
Authored: Mon Nov 5 18:46:44 2018 +0530
Committer: manishgupta88 
Committed: Tue Nov 13 20:40:16 2018 +0530

--
 .../core/metadata/datatype/DecimalType.java |  2 +-
 .../util/CarbonVectorizedRecordReader.java  | 19 +---
 .../carbondata/sdk/file/CarbonReaderTest.java   | 49 
 3 files changed, 63 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/07943cec/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java
index b4bc20c..a7f7a4e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java
@@ -23,7 +23,7 @@ public class DecimalType extends DataType {
   private int scale;
 
   // create a decimal type object with specified precision and scale
-  public DecimalType(int precision, int scale) {
+  DecimalType(int precision, int scale) {
 super(DataTypes.DECIMAL_TYPE_ID, 8, "DECIMAL", -1);
 this.precision = precision;
 this.scale = scale;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/07943cec/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java
--
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java
index 9d3d7d6..7720434 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java
@@ -29,7 +29,6 @@ import 
org.apache.carbondata.core.datastore.block.TableBlockInfo;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.metadata.datatype.DecimalType;
 import org.apache.carbondata.core.metadata.datatype.StructField;
 import org.apache.carbondata.core.scan.executor.QueryExecutor;
 import org.apache.carbondata.core.scan.executor.QueryExecutorFactory;
@@ -149,7 +148,8 @@ public class CarbonVectorizedRecordReader extends 
AbstractRecordReader {
   new StructField(msr.getColumnName(), 
msr.getMeasure().getDataType());
 } else if (DataTypes.isDecimal(dataType)) {
   fields[msr.getOrdinal()] = new StructField(msr.getColumnName(),
-  new DecimalType(msr.getMeasure().getPrecision(), 
msr.getMeasure().getScale()));
+  DataTypes.createDecimalType(msr.getMeasure().getPrecision(),
+  msr.getMeasure().getScale()));
 } else {
   fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), 
DataTypes.DOUBLE);
 }
@@ -171,13 +171,20 @@ public class CarbonVectorizedRecordReader extends 
AbstractRecordReader {
 rowCount += 1;
 Object[] row = new Object[carbonColumnarBatch.columnVectors.length];
 for (int i = 0; i < carbonColumnarBatch.columnVectors.length; i ++) {
+  Object data = carbonColumnarBatch.columnVectors[i].getData(batchIdx - 1);
   if (carbonColumnarBatch.columnVectors[i].getType() == DataTypes.STRING
   || carbonColumnarBatch.columnVectors[i].getType() == 
DataTypes.VARCHAR) {
-byte[] data = (byte[]) 
carbonColumnarBatch.columnVectors[i].getData(batchIdx - 1);
-row[i] = ByteUtil.toString(data, 0, data.length);
+if (data == null) {
+  row[i] = null;
+} else {
+  row[i] = ByteUtil.toString((byte[]) data, 0, (((byte[]) 
data).length));
+}
   } else if (carbonColumnarBatch.columnVectors[i].getType() == 
DataTypes.BOOLEAN) {
-byte data = (byte) 
carbonColumnarBatch.columnVe

carbondata git commit: [HOTFIX] Fix NPE in spark, when same vector reads files with local dictionary and without local dictionary

2018-11-13 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master c94c8ce56 -> d8dfa4f21


[HOTFIX] Fix NPE in spark, when same vector reads files with local dictionary 
and without local dictionary

Problem: NPE in spark, when same vector reads files with local dictionary and 
without local dictionary

Cause: when two carbondata files are present, one with local dictionary and one 
without local dictionary. If same vector is used to read this
files [can happen if task is launched to group of files]. If local dictionary 
files are found first, dictionary is set for that vector. But
it was never reset for another file reading.

Solution: reset dictionary once batch is processed,set only for local 
dictionary batch processing.

This closes #2895


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8dfa4f2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8dfa4f2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8dfa4f2

Branch: refs/heads/master
Commit: d8dfa4f21bf3527ed3522799c2d65e143cfd787c
Parents: c94c8ce
Author: ajantha-bhat 
Authored: Mon Nov 5 15:30:27 2018 +0530
Committer: manishgupta88 
Committed: Tue Nov 13 20:37:15 2018 +0530

--
 .../store/impl/LocalDictDimensionDataChunkStore.java  | 10 ++
 .../core/scan/result/vector/CarbonDictionary.java |  4 
 .../scan/result/vector/impl/CarbonDictionaryImpl.java | 10 --
 .../carbondata/hadoop/api/CarbonFileInputFormat.java  |  2 +-
 .../spark/vectorreader/VectorizedCarbonRecordReader.java  |  1 +
 5 files changed, 4 insertions(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
index a384743..0eb6d65 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java
@@ -61,10 +61,7 @@ public class LocalDictDimensionDataChunkStore implements 
DimensionDataChunkStore
 int columnValueSize = dimensionDataChunkStore.getColumnValueSize();
 int rowsNum = data.length / columnValueSize;
 CarbonColumnVector vector = vectorInfo.vector;
-if (!dictionary.isDictionaryUsed()) {
-  vector.setDictionary(dictionary);
-  dictionary.setDictionaryUsed();
-}
+vector.setDictionary(dictionary);
 BitSet nullBitset = new BitSet();
 CarbonColumnVector dictionaryVector = ColumnarVectorWrapperDirectFactory
 .getDirectVectorWrapperFactory(vector.getDictionaryVector(), 
invertedIndex, nullBitset,
@@ -91,10 +88,7 @@ public class LocalDictDimensionDataChunkStore implements 
DimensionDataChunkStore
   }
 
   @Override public void fillRow(int rowId, CarbonColumnVector vector, int 
vectorRow) {
-if (!dictionary.isDictionaryUsed()) {
-  vector.setDictionary(dictionary);
-  dictionary.setDictionaryUsed();
-}
+vector.setDictionary(dictionary);
 int surrogate = dimensionDataChunkStore.getSurrogate(rowId);
 if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY) {
   vector.putNull(vectorRow);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
index 2147c43..882a365 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java
@@ -22,10 +22,6 @@ public interface CarbonDictionary  {
 
   int getDictionarySize();
 
-  boolean isDictionaryUsed();
-
-  void setDictionaryUsed();
-
   byte[] getDictionaryValue(int index);
 
   byte[][] getAllDictionaryValues();

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java
 
b/core/src/main/java/

carbondata git commit: [CARBONDATA-3057] Implement VectorizedReader for SDK Reader

2018-11-04 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 5a0bc6e71 -> 63a28a951


[CARBONDATA-3057] Implement VectorizedReader for SDK Reader

1. Added carbondata file listing for getting splits to avoid block/blocklet 
datamap
loading when filter expressions is not provided by the user

2. Implemented Vectorized reader, exposes a property to switch between record 
reader/vector reader.

This closes #2869


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/63a28a95
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/63a28a95
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/63a28a95

Branch: refs/heads/master
Commit: 63a28a951ed552680da1a5047f5937fb90a8d76d
Parents: 5a0bc6e
Author: kunal642 
Authored: Fri Oct 26 11:43:22 2018 +0530
Committer: manishgupta88 
Committed: Mon Nov 5 11:49:24 2018 +0530

--
 ...feVariableLengthDimensionDataChunkStore.java |   5 +-
 .../filesystem/AbstractDFSCarbonFile.java   |  26 +++
 .../core/datastore/filesystem/CarbonFile.java   |   8 +
 .../datastore/filesystem/LocalCarbonFile.java   |  23 ++
 .../encoding/compress/DirectCompressCodec.java  |   7 +-
 .../core/metadata/datatype/DecimalType.java |   2 +-
 .../core/metadata/datatype/StructType.java  |   2 +-
 .../vector/impl/CarbonColumnVectorImpl.java |  18 +-
 docs/sdk-guide.md   |   8 +
 .../carbondata/hadoop/CarbonRecordReader.java   |  15 ++
 .../hadoop/api/CarbonFileInputFormat.java   |  55 -
 .../util/CarbonVectorizedRecordReader.java  | 211 +++
 .../sdk/file/CarbonReaderBuilder.java   |  36 +++-
 .../sdk/file/CSVCarbonWriterTest.java   |   4 +-
 .../carbondata/sdk/file/CarbonReaderTest.java   | 140 ++--
 15 files changed, 519 insertions(+), 41 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/63a28a95/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
index 2873eed..01db383 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
@@ -163,13 +163,14 @@ public abstract class 
SafeVariableLengthDimensionDataChunkStore
 }
 DataType dt = vector.getType();
 
-if ((!(dt == DataTypes.STRING) && length == 0) || 
ByteUtil.UnsafeComparer.INSTANCE
+if (((!(dt == DataTypes.STRING) && !(dt == DataTypes.VARCHAR)) && length 
== 0)
+|| ByteUtil.UnsafeComparer.INSTANCE
 .equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0,
 CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, 
currentDataOffset,
 length)) {
   vector.putNull(vectorRow);
 } else {
-  if (dt == DataTypes.STRING) {
+  if (dt == DataTypes.STRING || dt == DataTypes.VARCHAR) {
 vector.putByteArray(vectorRow, currentDataOffset, length, data);
   } else if (dt == DataTypes.BOOLEAN) {
 vector.putBoolean(vectorRow, 
ByteUtil.toBoolean(data[currentDataOffset]));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/63a28a95/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
index 24efb70..d56caac 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java
@@ -524,6 +524,27 @@ public abstract class AbstractDFSCarbonFile implements 
CarbonFile {
 return getFiles(listStatus);
   }
 
+  /**
+   * Method used to list files recursively and apply file filter on the result.
+   *
+   */
+  @Override
+  public List listFiles(boolean recursive, CarbonFileFilter 
fileFilter)
+  throws IOException {
+List carbonFiles = new ArrayList<>();
+if (null != fileStatus && fileStatus.isDirectory()) {
+  RemoteIterator listStatus = 
fs.listFiles(fileStatus.getPath(), recursive);
+  while (listStatu

carbondata git commit: [CARBONDATA-3066]add documentation for writtenBy and getVersionDetails APIs in SDK

2018-11-02 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master fc2a53991 -> e622fa998


[CARBONDATA-3066]add documentation for writtenBy and getVersionDetails APIs in 
SDK

This PR adds the documentation for new APIs added in SDK
builder API- writtenBy()
reader API- getVersionDetails()

This closes #2888


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e622fa99
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e622fa99
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e622fa99

Branch: refs/heads/master
Commit: e622fa9982ba72ce7203afbd94d153e74d8e538c
Parents: fc2a539
Author: akashrn5 
Authored: Wed Oct 31 20:13:02 2018 +0530
Committer: manishgupta88 
Committed: Fri Nov 2 16:18:32 2018 +0530

--
 docs/sdk-guide.md | 28 +---
 1 file changed, 25 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/e622fa99/docs/sdk-guide.md
--
diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md
index 3a9be71..0ee1524 100644
--- a/docs/sdk-guide.md
+++ b/docs/sdk-guide.md
@@ -67,7 +67,7 @@ These SDK writer output contains just carbondata and 
carbonindex files. No metad
 
  CarbonProperties.getInstance().addProperty("enable.offheap.sort", 
enableOffheap);
  
- CarbonWriterBuilder builder = 
CarbonWriter.builder().outputPath(path).withCsvInput(schema);
+ CarbonWriterBuilder builder = 
CarbonWriter.builder().outputPath(path).withCsvInput(schema).writtenBy("SDK");
  
  CarbonWriter writer = builder.build();
  
@@ -124,7 +124,7 @@ public class TestSdkAvro {
 try {
   CarbonWriter writer = CarbonWriter.builder()
   .outputPath(path)
-  .withAvroInput(new 
org.apache.avro.Schema.Parser().parse(avroSchema)).build();
+  .withAvroInput(new 
org.apache.avro.Schema.Parser().parse(avroSchema)).writtenBy("SDK").build();
 
   for (int i = 0; i < 100; i++) {
 writer.write(record);
@@ -164,7 +164,7 @@ public class TestSdkJson {
 
 Schema CarbonSchema = new Schema(fields);
 
-CarbonWriterBuilder builder = 
CarbonWriter.builder().outputPath(path).withJsonInput(CarbonSchema);
+CarbonWriterBuilder builder = 
CarbonWriter.builder().outputPath(path).withJsonInput(CarbonSchema).writtenBy("SDK");
 
 // initialize json writer with carbon schema
 CarbonWriter writer = builder.build();
@@ -431,6 +431,16 @@ public CarbonWriterBuilder withJsonInput(Schema 
carbonSchema);
 
 ```
 /**
+* To support writing the ApplicationName which is writing the carbondata file
+* This is a mandatory API to call, else the build() call will fail with error.
+* @param application name which is writing the carbondata files
+* @return CarbonWriterBuilder
+*/
+public CarbonWriterBuilder writtenBy(String appName) {
+```
+
+```
+/**
 * Build a {@link CarbonWriter}
 * This writer is not thread safe,
 * use withThreadSafe() configuration in multi thread environment
@@ -686,6 +696,18 @@ Find example code at 
[CarbonReaderExample](https://github.com/apache/carbondata/
   public static Schema readSchemaInIndexFile(String indexFilePath);
 ```
 
+```
+  /**
+   * This method return the version details in formatted string by reading 
from carbondata file
+   * If application name is SDK_1.0.0 and this has written the carbondata file 
in carbondata 1.6 project version,
+   * then this API returns the String "SDK_1.0.0 in version: 1.6.0-SNAPSHOT"
+   * @param dataFilePath complete path including carbondata file name
+   * @return string with information of who has written this file in which 
carbondata project version
+   * @throws IOException
+   */
+  public static String getVersionDetails(String dataFilePath);
+```
+
 ### Class org.apache.carbondata.sdk.file.Schema
 ```
   /**



carbondata git commit: [CARBONDATA-3062] Fix Compatibility issue with cache_level as blocklet

2018-11-01 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 269f4c378 -> 6e58418eb


[CARBONDATA-3062] Fix Compatibility issue with cache_level as blocklet

In case of hybrid store we can have block as well as blocklet schema.
Scenario:
When there is a hybrid store in which few loads are from legacy store which do 
not contain the blocklet information and hence they will be, by
default have cache_level as BLOCK and few loads with latest store which contain 
the BLOCKLET information and have cache_level BLOCKLET. For these
type of scenarios we need to have separate task and footer schemas. For all 
loads with/without blocklet info there will not be any additional cost
of maintaining 2 variables

This closes #2883


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6e58418e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6e58418e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6e58418e

Branch: refs/heads/master
Commit: 6e58418eb15effbf60290d2e1b8ff06f8613d714
Parents: 269f4c3
Author: Indhumathi27 
Authored: Tue Oct 30 21:38:56 2018 +0530
Committer: manishgupta88 
Committed: Fri Nov 2 10:54:49 2018 +0530

--
 .../block/SegmentPropertiesAndSchemaHolder.java | 82 +---
 .../indexstore/blockletindex/BlockDataMap.java  |  2 +-
 .../blockletindex/BlockletDataMap.java  |  2 +-
 ...ithColumnMetCacheAndCacheLevelProperty.scala |  2 +-
 .../merger/RowResultMergerProcessor.java|  6 +-
 5 files changed, 57 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6e58418e/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
index cc6341b..1b7e1f8 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
@@ -284,11 +284,17 @@ public class SegmentPropertiesAndSchemaHolder {
 private int[] columnCardinality;
 private SegmentProperties segmentProperties;
 private List minMaxCacheColumns;
-private CarbonRowSchema[] taskSummarySchema;
-// same variable can be used for block and blocklet schema because at any 
given cache_level
-// with either block or blocklet and whenever cache_level is changed the 
cache and its
-// corresponding segmentProperties is flushed
-private CarbonRowSchema[] fileFooterEntrySchema;
+// in case of hybrid store we can have block as well as blocklet schema
+// Scenario: When there is a hybrid store in which few loads are from 
legacy store which do
+// not contain the blocklet information and hence they will be, by default 
have cache_level as
+// BLOCK and few loads with latest store which contain the BLOCKLET 
information and have
+// cache_level BLOCKLET. For these type of scenarios we need to have 
separate task and footer
+// schemas. For all loads with/without blocklet info there will not be any 
additional cost
+// of maintaining 2 variables
+private CarbonRowSchema[] taskSummarySchemaForBlock;
+private CarbonRowSchema[] taskSummarySchemaForBlocklet;
+private CarbonRowSchema[] fileFooterEntrySchemaForBlock;
+private CarbonRowSchema[] fileFooterEntrySchemaForBlocklet;
 
 public SegmentPropertiesWrapper(CarbonTable carbonTable,
 List columnsInTable, int[] columnCardinality) {
@@ -314,8 +320,10 @@ public class SegmentPropertiesAndSchemaHolder {
   if (null != minMaxCacheColumns) {
 minMaxCacheColumns.clear();
   }
-  taskSummarySchema = null;
-  fileFooterEntrySchema = null;
+  taskSummarySchemaForBlock = null;
+  taskSummarySchemaForBlocklet = null;
+  fileFooterEntrySchemaForBlock = null;
+  fileFooterEntrySchemaForBlocklet = null;
 }
 
 @Override public boolean equals(Object obj) {
@@ -350,48 +358,62 @@ public class SegmentPropertiesAndSchemaHolder {
   return columnCardinality;
 }
 
-public CarbonRowSchema[] getTaskSummarySchema(boolean storeBlockletCount,
+public CarbonRowSchema[] getTaskSummarySchemaForBlock(boolean 
storeBlockletCount,
 boolean filePathToBeStored) throws MemoryException {
-  if (null == taskSummarySchema) {
+  if (null == taskSummarySchemaForBlock) {
 synchronized (taskSchemaLock) {
-  if (null == taskSummarySchema) {
-taskSummarySchema = SchemaGenerator
+  if (n

carbondata git commit: [CARBONDATA-3054] Fix Dictionary file cannot be read in S3a with CarbonDictionaryDecoder.doConsume() codeGen

2018-10-31 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 0e39abf81 -> bcf3e0fd5


[CARBONDATA-3054] Fix Dictionary file cannot be read in S3a with 
CarbonDictionaryDecoder.doConsume() codeGen

problem: In S3a environment, when queried the data which has dictionary
files, Dictionary file cannot be read in S3a with 
CarbonDictionaryDecoder.doConsume() codeGen even though file is present.

cause: CarbonDictionaryDecoder.doConsume() codeGen doesn't set hadoop conf in 
thread local variable, only doExecute() sets it.
Hence, when getDictionaryWrapper() called from doConsume() codeGen, 
AbstractDictionaryCache.getDictionaryMetaCarbonFile() returns false for 
fileExists() operation.

solution:
In CarbonDictionaryDecoder.doConsume() codeGen, set hadoop conf in thread local 
variable

This closes #2876


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bcf3e0fd
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bcf3e0fd
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bcf3e0fd

Branch: refs/heads/master
Commit: bcf3e0fd595f612ee33a8ee2d9aa6197998f626e
Parents: 0e39abf
Author: ajantha-bhat 
Authored: Mon Oct 29 17:56:29 2018 +0530
Committer: manishgupta88 
Committed: Wed Oct 31 15:47:51 2018 +0530

--
 .../spark/sql/CarbonDictionaryDecoder.scala | 25 +---
 1 file changed, 17 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bcf3e0fd/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala
index f3d5bf0..c9434a1 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{Partition, TaskContext}
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors.attachTree
@@ -31,6 +32,7 @@ import org.apache.spark.sql.execution.{CodegenSupport, 
SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.optimizer.CarbonDecoderRelation
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.{SparkSQLUtil, SparkTypeConverter}
+import org.apache.spark.util.SerializableConfiguration
 
 import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType}
 import org.apache.carbondata.core.cache.dictionary.{Dictionary, 
DictionaryColumnUniqueIdentifier}
@@ -42,7 +44,6 @@ import 
org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension
 import org.apache.carbondata.core.scan.executor.util.QueryUtil
 import org.apache.carbondata.core.util.{DataTypeUtil, ThreadLocalSessionInfo}
-import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil
 import org.apache.carbondata.spark.CarbonAliasDecoderRelation
 import org.apache.carbondata.spark.rdd.CarbonRDDWithTableInfo
 
@@ -69,6 +70,9 @@ case class CarbonDictionaryDecoder(
   val getDictionaryColumnIds: Array[(String, ColumnIdentifier, 
CarbonDimension)] =
 CarbonDictionaryDecoder.getDictionaryColumnMapping(child.output, 
relations, profile, aliasMap)
 
+  val broadcastConf = SparkSQLUtil.broadCastHadoopConf(
+sparkSession.sparkContext, sparkSession.sessionState.newHadoopConf())
+
   override def doExecute(): RDD[InternalRow] = {
 attachTree(this, "execute") {
   val tableNameToCarbonTableMapping = relations.map { relation =>
@@ -76,12 +80,10 @@ case class CarbonDictionaryDecoder(
 (carbonTable.getTableName, carbonTable)
   }.toMap
 
-  val conf = SparkSQLUtil
-.broadCastHadoopConf(sparkSession.sparkContext, 
sparkSession.sessionState.newHadoopConf())
   if (CarbonDictionaryDecoder.isRequiredToDecode(getDictionaryColumnIds)) {
 val dataTypes = child.output.map { attr => attr.dataType }
 child.execute().mapPartitions { iter =>
-  
ThreadLocalSessionInfo.setConfigurationToCurrentThread(conf.value.value)
+  
ThreadLocalSessionInfo.setConfigurationToCurrentThread(broadcastConf.value.value)
   val cacheProvider: CacheProvider = CacheProvider.getInstance
   val forwardDictionaryCache: Cache[DictionaryColumnUniqueIdentifier, 
Dictionary] =
 cacheProvider.createCache(CacheType.FORWARD_DICTIONARY)
@@ -137,7 +139,7 @@ case class C

carbondata git commit: [CARBONDATA-3042] Column Schema objects are present in Driver and Executor even after dropping table

2018-10-30 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master e2c517e3f -> 10b393808


[CARBONDATA-3042] Column Schema objects are present in Driver and Executor even 
after dropping table

Problem:
Column Schema objects are present in Driver and Executor even after dropping 
table.

Solution:
In Driver: After dropping table, remove entry of tableInfo from 
CarbonMetaDataInstance.
In Executor: Remove usage of CarbonMetaDataInstance object and instead pass 
CarbonTable Object itself

This closes #2852


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/10b39380
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/10b39380
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/10b39380

Branch: refs/heads/master
Commit: 10b393808e91344b017ba3e946b28217c2dd9757
Parents: e2c517e
Author: Indhumathi27 
Authored: Thu Oct 25 13:37:08 2018 +0530
Committer: manishgupta88 
Committed: Tue Oct 30 14:19:39 2018 +0530

--
 .../core/metadata/CarbonMetadata.java   |  5 +-
 .../statusmanager/SegmentStatusManager.java |  6 +-
 .../carbondata/core/util/DeleteLoadFolders.java | 31 +--
 .../spark/rdd/AlterTableLoadPartitionRDD.scala  |  4 +-
 .../carbondata/spark/rdd/CarbonMergerRDD.scala  |  3 +-
 .../carbondata/spark/rdd/StreamHandoffRDD.scala |  1 -
 .../spark/sql/CarbonDictionaryDecoder.scala |  5 --
 .../spark/sql/hive/CarbonFileMetastore.scala|  3 +-
 .../spark/sql/hive/CarbonHiveMetaStore.scala|  2 +-
 .../loading/DataLoadProcessBuilder.java |  2 -
 .../sort/impl/ParallelReadMergeSorterImpl.java  |  9 ++-
 ...allelReadMergeSorterWithColumnRangeImpl.java |  8 +--
 .../UnsafeBatchParallelReadMergeSorterImpl.java |  6 +-
 ...allelReadMergeSorterWithColumnRangeImpl.java | 11 ++--
 .../CarbonRowDataWriterProcessorStepImpl.java   | 13 ++---
 .../steps/DataConverterProcessorStepImpl.java   |  6 +-
 .../steps/DataWriterBatchProcessorStepImpl.java | 11 ++--
 .../steps/DataWriterProcessorStepImpl.java  | 18 +++---
 .../merger/CompactionResultSortProcessor.java   |  4 +-
 .../merger/RowResultMergerProcessor.java|  5 +-
 .../partition/spliter/RowResultProcessor.java   |  5 +-
 .../sort/sortdata/SortParameters.java   | 44 +--
 .../store/CarbonFactDataHandlerModel.java   |  4 +-
 .../util/CarbonDataProcessorUtil.java   | 58 +---
 24 files changed, 110 insertions(+), 154 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/10b39380/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java 
b/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java
index 850f477..e44092e 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java
@@ -69,9 +69,8 @@ public final class CarbonMetadata {
 
   /**
* Below method will be used to set the carbon table
-   * This method will be used in executor side as driver will always have
-   * updated table so from driver during query execution and data loading
-   * we just need to add the table
+   * Note: Use this method only in driver as clean up in Executor is not 
handled
+   *   if this table is added to executor
*
* @param carbonTable
*/

http://git-wip-us.apache.org/repos/asf/carbondata/blob/10b39380/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
index 9196367..fbb765b 100755
--- 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
@@ -1001,9 +1001,9 @@ public class SegmentStatusManager {
 CarbonLockUtil.fileUnlock(carbonTableStatusLock, 
LockUsage.TABLE_STATUS_LOCK);
   }
   if (updationCompletionStatus) {
-DeleteLoadFolders.physicalFactAndMeasureMetadataDeletion(
-identifier, carbonTable.getMetadataPath(),
-newAddedLoadHistoryList, isForceDeletion, partitionSpecs);
+DeleteLoadFolders
+.physicalFactAndMeasureMetadataDeletion(carbonTable, 
newAddedLoadHistoryList,
+isForceDeletion, partitionSpecs);
   }
 }
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/10b39380/c

carbondata git commit: [CARBONDATA-2977] Write uncompress_size to ChunkCompressMeta in the file

2018-10-25 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 33a6dc2ac -> e19c5da6d


[CARBONDATA-2977] Write uncompress_size to ChunkCompressMeta in the file

Currently total_uncompressed_size and total_compress_size in the 
ChunkCompressMeta in the carbondata file is always 0. This PR writes the
correct value to the file.

This closes #2772


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e19c5da6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e19c5da6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e19c5da6

Branch: refs/heads/master
Commit: e19c5da6dbb07056b1053319d48a64a4b0715129
Parents: 33a6dc2
Author: Jacky Li 
Authored: Thu Sep 27 00:39:29 2018 +0800
Committer: manishgupta88 
Committed: Thu Oct 25 14:38:28 2018 +0530

--
 .../core/datastore/page/ColumnPage.java | 39 +++
 .../datastore/page/LocalDictColumnPage.java |  9 +++
 .../page/UnsafeFixLengthColumnPage.java |  7 ++
 .../datastore/page/VarLengthColumnPageBase.java |  5 ++
 .../page/encoding/ColumnPageEncoder.java|  7 +-
 .../core/util/CarbonMetadataUtil.java   | 10 +--
 .../apache/carbondata/tool/CarbonCliTest.java   | 69 
 7 files changed, 81 insertions(+), 65 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
index 8b9a9a5..e8097da 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
@@ -724,6 +724,45 @@ public abstract class ColumnPage {
   }
 
   /**
+   * Return total page data length in bytes
+   */
+  public long getPageLengthInBytes() throws IOException {
+DataType dataType = columnPageEncoderMeta.getStoreDataType();
+if (dataType == DataTypes.BOOLEAN) {
+  return getBooleanPage().length;
+} else if (dataType == DataTypes.BYTE) {
+  return getBytePage().length;
+} else if (dataType == DataTypes.SHORT) {
+  return getShortPage().length * SHORT.getSizeInBytes();
+} else if (dataType == DataTypes.SHORT_INT) {
+  return getShortIntPage().length;
+} else if (dataType == DataTypes.INT) {
+  return getIntPage().length * INT.getSizeInBytes();
+} else if (dataType == DataTypes.LONG) {
+  return getLongPage().length * LONG.getSizeInBytes();
+} else if (dataType == DataTypes.FLOAT) {
+  return getFloatPage().length * FLOAT.getSizeInBytes();
+} else if (dataType == DataTypes.DOUBLE) {
+  return getDoublePage().length * DOUBLE.getSizeInBytes();
+} else if (DataTypes.isDecimal(dataType)) {
+  return getDecimalPage().length;
+} else if (dataType == DataTypes.BYTE_ARRAY
+&& columnPageEncoderMeta.getColumnSpec().getColumnType() == 
ColumnType.COMPLEX_PRIMITIVE) {
+  return getComplexChildrenLVFlattenedBytePage().length;
+} else if (dataType == DataTypes.BYTE_ARRAY
+&& (columnPageEncoderMeta.getColumnSpec().getColumnType() == 
ColumnType.COMPLEX_STRUCT
+|| columnPageEncoderMeta.getColumnSpec().getColumnType() == 
ColumnType.COMPLEX_ARRAY
+|| columnPageEncoderMeta.getColumnSpec().getColumnType() == 
ColumnType.PLAIN_LONG_VALUE
+|| columnPageEncoderMeta.getColumnSpec().getColumnType() == 
ColumnType.PLAIN_VALUE)) {
+  return getComplexParentFlattenedBytePage().length;
+} else if (dataType == DataTypes.BYTE_ARRAY) {
+  return getLVFlattenedBytePage().length;
+} else {
+  throw new UnsupportedOperationException("unsupport compress column page: 
" + dataType);
+}
+  }
+
+  /**
* Compress page data using specified compressor
*/
   public byte[] compress(Compressor compressor) throws MemoryException, 
IOException {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
index 3da154a..5cf2130 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
@@ -344,4 +344,13 @@ public class LocalDictColumnPage extends ColumnPa

carbondata git commit: [CARBONDATA-2998] Refresh column schema for old store(before V3) for SORT_COLUMNS option

2018-10-24 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master c429cee16 -> 278d17178


[CARBONDATA-2998] Refresh column schema for old store(before V3) for 
SORT_COLUMNS option

Problem:
For old store, store before V3, SORT_COLUMN option is not set in ColumnSchema, 
but considered as SORT_COLUMNS. So, while refreshing the
table it will try to read from the thrift and make it as no sort column in 
ColumnSchema as it is not set before.

Solution:
While refreshing the table, check for the SORT_COLUMN property in the table 
properties and if nothing is set, then by default take all the
dimension columns as SORT_COLUMNS.

This closes #2806


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/278d1717
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/278d1717
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/278d1717

Branch: refs/heads/master
Commit: 278d1717880541a052aa4a5ee96ba73423d650b1
Parents: c429cee
Author: dhatchayani 
Authored: Tue Oct 9 17:34:42 2018 +0530
Committer: manishgupta88 
Committed: Wed Oct 24 14:52:25 2018 +0530

--
 .../management/RefreshCarbonTableCommand.scala  | 31 
 .../merger/CompactionResultSortProcessor.java   |  6 ++--
 2 files changed, 34 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
index 39e85ba..c129194 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.management
 import java.util
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -75,6 +76,9 @@ case class RefreshCarbonTableCommand(
   if (FileFactory.isFileExist(schemaFilePath, 
FileFactory.getFileType(schemaFilePath))) {
 // read TableInfo
 val tableInfo = SchemaReader.getTableInfo(identifier)
+// refresh the column schema in case of store before V3
+refreshColumnSchema(tableInfo)
+
 // 2.2 register the table with the hive check if the table being 
registered has
 // aggregate table then do the below steps
 // 2.2.1 validate that all the aggregate tables are copied at the 
store location.
@@ -119,6 +123,33 @@ case class RefreshCarbonTableCommand(
   }
 
   /**
+   * Refresh the sort_column flag in column schema in case of old store. 
Before V3, sort_column
+   * option is not set but by default all dimension columns should be treated
+   * as sort columns if SORT_COLUMNS property is not defined in tblproperties
+   *
+   * @param tableInfo
+   */
+  def refreshColumnSchema(tableInfo: TableInfo): Unit = {
+val tableProps: mutable.Map[String, String] = 
tableInfo.getFactTable.getTableProperties.asScala
+val sortColumns = tableProps.get(CarbonCommonConstants.SORT_COLUMNS)
+sortColumns match {
+  case Some(sortColumn) =>
+  // don't do anything
+  case None =>
+// iterate over all the columns and make all the dimensions as sort 
columns true
+// check for the complex data types parent and child columns to
+// avoid adding them in SORT_COLUMNS
+tableInfo.getFactTable.getListOfColumns.asScala collect
+({
+  case columnSchema if columnSchema.isDimensionColumn &&
+   !columnSchema.getDataType.isComplexType &&
+   columnSchema.getSchemaOrdinal != -1 =>
+columnSchema.setSortColumn(true)
+})
+}
+  }
+
+  /**
* the method prepare the data type for raw column
*
* @param column

http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java
index e0a30da..8d28d45 100644
--- 
a/processing/src/mai

carbondata git commit: [CARBONDATA-3022] Refactor ColumnPageWrapper

2018-10-23 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master c7c83684b -> fa9a4eeeb


[CARBONDATA-3022] Refactor ColumnPageWrapper

Refactor ColumnPageWrapper for better filter query performance.
Removed unnecessary checks and loops

This closes #2808


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa9a4eee
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa9a4eee
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa9a4eee

Branch: refs/heads/master
Commit: fa9a4eeeb489b77b3040d54e7878bac93ccb12aa
Parents: c7c8368
Author: dhatchayani 
Authored: Wed Oct 10 13:18:01 2018 +0530
Committer: manishgupta88 
Committed: Tue Oct 23 15:12:21 2018 +0530

--
 .../chunk/store/ColumnPageWrapper.java  | 126 +--
 .../core/scan/executor/util/QueryUtil.java  |  32 -
 .../carbondata/core/util/DataTypeUtil.java  |  18 ---
 3 files changed, 32 insertions(+), 144 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
index 627c75f..ba853f9 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java
@@ -26,7 +26,6 @@ import 
org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
-import org.apache.carbondata.core.scan.executor.util.QueryUtil;
 import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
 import org.apache.carbondata.core.scan.result.vector.CarbonDictionary;
 import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo;
@@ -83,31 +82,6 @@ public class ColumnPageWrapper implements 
DimensionColumnPage {
 return chunkIndex + 1;
   }
 
-  /**
-   * Fill the data to the vector
-   *
-   * @param rowId
-   * @param vector
-   * @param vectorRow
-   */
-  private void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
-if (columnPage.getNullBits().get(rowId)
-&& columnPage.getColumnSpec().getColumnType() == 
ColumnType.COMPLEX_PRIMITIVE) {
-  // if this row is null, return default null represent in byte array
-  byte[] value = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
-  QueryUtil.putDataToVector(vector, value, vectorRow, value.length);
-} else if (columnPage.getNullBits().get(rowId)) {
-  // if this row is null, return default null represent in byte array
-  byte[] value = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
-  QueryUtil.putDataToVector(vector, value, vectorRow, value.length);
-} else {
-  if (isExplicitSorted) {
-rowId = invertedReverseIndex[rowId];
-  }
-  QueryUtil.putDataToVector(vector, getActualData(rowId, true), vectorRow);
-}
-  }
-
   @Override
   public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, 
int chunkIndex) {
 ColumnVectorInfo columnVectorInfo = vectorInfo[chunkIndex];
@@ -214,78 +188,42 @@ public class ColumnPageWrapper implements 
DimensionColumnPage {
 return null;
   }
 
-  private Object getActualData(int rowId, boolean isRowIdChanged) {
-ColumnType columnType = columnPage.getColumnSpec().getColumnType();
-DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
-DataType targetDataType = columnPage.getDataType();
-if (null != localDictionary) {
-  return localDictionary
-  
.getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 
0, 3));
-} else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && 
this.isAdaptiveEncoded()) || (
-columnType == ColumnType.PLAIN_VALUE && 
DataTypeUtil.isPrimitiveColumn(srcDataType))) {
-  if (!isRowIdChanged && columnPage.getNullBits().get(rowId)
-  && columnType == ColumnType.COMPLEX_PRIMITIVE) {
-// if this row is null, return default null represent in byte array
-return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
-  }
-  if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) {
-// if this row is null, return default null represent in byte array
-return CarbonCommonConstants.EMPTY_BYTE_ARRAY;
+  /**
+   * Fill the data to the vector
+   *

carbondata git commit: [CARBONDATA-2990] Fixed JVM crash when rebuilding bloom datamap

2018-10-04 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master c3a870449 -> 8fbd4a5f5


[CARBONDATA-2990] Fixed JVM crash when rebuilding bloom datamap

Problem: while rebuilding the datamap it access the datamap store so it builds 
datamap and store in unsafe onheap storage. But while closing the
reader it frees all memory acquired during that task. Since acquired memory is 
onheap but releasing the memory with offheap allocator it crashes the jvm.

Solution: Maintain the type of memory acquired in the memory block itself and 
get the allocator as per the memory type and release it.

This closes #2793


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8fbd4a5f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8fbd4a5f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8fbd4a5f

Branch: refs/heads/master
Commit: 8fbd4a5f53070b3755f1f573b09e0066fa93a6ea
Parents: c3a8704
Author: ravipesala 
Authored: Sun Sep 30 11:27:57 2018 +0530
Committer: manishgupta88 
Committed: Thu Oct 4 14:39:25 2018 +0530

--
 .../core/indexstore/UnsafeMemoryDMStore.java| 14 +++
 .../core/memory/HeapMemoryAllocator.java|  5 ++-
 .../carbondata/core/memory/MemoryBlock.java | 14 ++-
 .../carbondata/core/memory/MemoryType.java  | 23 ++
 .../core/memory/UnsafeMemoryAllocator.java  |  2 +-
 .../core/memory/UnsafeMemoryManager.java| 44 +++-
 6 files changed, 70 insertions(+), 32 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fbd4a5f/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
index 196559a..0db1b0a 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
@@ -19,9 +19,9 @@ package org.apache.carbondata.core.indexstore;
 import org.apache.carbondata.core.indexstore.row.DataMapRow;
 import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow;
 import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema;
-import org.apache.carbondata.core.memory.MemoryAllocator;
 import org.apache.carbondata.core.memory.MemoryBlock;
 import org.apache.carbondata.core.memory.MemoryException;
+import org.apache.carbondata.core.memory.MemoryType;
 import org.apache.carbondata.core.memory.UnsafeMemoryManager;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
@@ -51,7 +51,7 @@ public class UnsafeMemoryDMStore extends 
AbstractMemoryDMStore {
   public UnsafeMemoryDMStore() throws MemoryException {
 this.allocatedSize = capacity;
 this.memoryBlock =
-UnsafeMemoryManager.allocateMemoryWithRetry(MemoryAllocator.HEAP, 
taskId, allocatedSize);
+UnsafeMemoryManager.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, 
allocatedSize);
 this.pointers = new int[1000];
   }
 
@@ -74,10 +74,10 @@ public class UnsafeMemoryDMStore extends 
AbstractMemoryDMStore {
 
   private void increaseMemory(int requiredMemory) throws MemoryException {
 MemoryBlock newMemoryBlock = UnsafeMemoryManager
-.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, allocatedSize + 
requiredMemory);
+.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, allocatedSize + 
requiredMemory);
 getUnsafe().copyMemory(this.memoryBlock.getBaseObject(), 
this.memoryBlock.getBaseOffset(),
 newMemoryBlock.getBaseObject(), newMemoryBlock.getBaseOffset(), 
runningLength);
-UnsafeMemoryManager.INSTANCE.freeMemory(MemoryAllocator.HEAP, taskId, 
this.memoryBlock);
+UnsafeMemoryManager.INSTANCE.freeMemory(taskId, this.memoryBlock);
 allocatedSize = allocatedSize + requiredMemory;
 this.memoryBlock = newMemoryBlock;
   }
@@ -190,10 +190,10 @@ public class UnsafeMemoryDMStore extends 
AbstractMemoryDMStore {
   public void finishWriting() throws MemoryException {
 if (runningLength < allocatedSize) {
   MemoryBlock allocate =
-  UnsafeMemoryManager.allocateMemoryWithRetry(MemoryAllocator.HEAP, 
taskId, runningLength);
+  UnsafeMemoryManager.allocateMemoryWithRetry(MemoryType.ONHEAP, 
taskId, runningLength);
   getUnsafe().copyMemory(memoryBlock.getBaseObject(), 
memoryBlock.getBaseOffset(),
   allocate.getBaseObject(), allocate.getBaseOffset(), runningLength);
-  UnsafeMemoryManager.INSTANCE.freeMemory(MemoryAllocator.HEAP, taskId, 
memor

carbondata git commit: [CARBONDATA-2982] CarbonSchemaReader support array

2018-10-03 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 7d1fcb309 -> d8a51c9bf


[CARBONDATA-2982] CarbonSchemaReader support array

This PR fix the issue and change :
org.apache.carbondata.sdk.file.CarbonSchemaReader#readSchemaInDataFile
org.apache.carbondata.sdk.file.CarbonSchemaReader#readSchemaInIndexFile

This PR remove child schema

This closes #2780


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8a51c9b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8a51c9b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8a51c9b

Branch: refs/heads/master
Commit: d8a51c9bf314fb1cd5f6112e66eb04e776a0553d
Parents: 7d1fcb3
Author: xubo245 
Authored: Fri Sep 28 11:47:22 2018 +0800
Committer: manishgupta88 
Committed: Wed Oct 3 16:24:38 2018 +0530

--
 .../examples/sdk/CarbonReaderExample.java   |  4 +-
 .../carbondata/examplesCI/RunExamples.scala |  5 ++
 .../carbondata/sdk/file/CarbonSchemaReader.java | 14 +++-
 .../carbondata/sdk/file/CarbonReaderTest.java   | 86 
 4 files changed, 105 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
--
diff --git 
a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
 
b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
index 9e80567..ef4ae7a 100644
--- 
a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
+++ 
b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java
@@ -122,10 +122,11 @@ public class CarbonReaderExample {
 for (int j = 0; j < arr.length; j++) {
 System.out.print(arr[j] + " ");
 }
+assert (arr[0].equals("Hello"));
+assert (arr[3].equals("Carbon"));
 System.out.println();
 i++;
 }
-System.out.println("\nFinished");
 reader.close();
 
 // Read data
@@ -148,7 +149,6 @@ public class CarbonReaderExample {
 row[5], row[6], row[7], row[8], row[9], row[10]));
 i++;
 }
-System.out.println("\nFinished");
 reader2.close();
 FileUtils.deleteDirectory(new File(path));
 } catch (Throwable e) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala
--
diff --git 
a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala
 
b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala
index 2b9b999..6a13dc3 100644
--- 
a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala
+++ 
b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala
@@ -23,6 +23,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.carbondata.examples._
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.examples.sdk.CarbonReaderExample
 
 /**
  * Test suite for examples
@@ -113,4 +114,8 @@ class RunExamples extends QueryTest with BeforeAndAfterAll {
   test("ExternalTableExample") {
 ExternalTableExample.exampleBody(spark)
   }
+
+  test("CarbonReaderExample") {
+CarbonReaderExample.main(null)
+  }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java
--
diff --git 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java
 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java
index d8882bc..e84a25a 100644
--- 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java
+++ 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java
@@ -65,7 +65,15 @@ public class CarbonSchemaReader {
*/
   public static Schema readSchemaInDataFile(String dataFilePath) throws 
IOException {
 CarbonHeaderReader reader = new CarbonHeaderReader(dataFilePath);
-return new Schema(reader.readSchema());
+List columnSchemaList = new ArrayList();
+List schemaList = reader.readSchema();
+for (int i = 0; i &

carbondata git commit: [CARBONDATA-2980][BloomDataMap] Fix bug in clearing bloomindex cache when recreating table and datamap

2018-09-30 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 9ae91cc5a -> 1c1ced32d


[CARBONDATA-2980][BloomDataMap] Fix bug in clearing bloomindex cache when 
recreating table and datamap

We use shard path as a part of key for bloomindex cache. However the
path separator in windows is different from that in linux, which causes
the cache not being cleaned when we clear the cache. (When we loading
the cache, the path separator is '/' while dropping the cache, the path
separator is '\')

Here we fixed the bug by uniforming the path separator while clearing
the cache.

This closes #2778


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1c1ced32
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1c1ced32
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1c1ced32

Branch: refs/heads/master
Commit: 1c1ced32d122ba8ce7cbad4fd29f778f5dbb4871
Parents: 9ae91cc
Author: xuchuanyin 
Authored: Sat Sep 29 14:03:09 2018 +0800
Committer: manishgupta88 
Committed: Sun Sep 30 12:19:56 2018 +0530

--
 .../datamap/bloom/BloomCoarseGrainDataMapFactory.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c1ced32/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
--
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
index 8c74c94..8974918 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
@@ -235,13 +235,13 @@ public class BloomCoarseGrainDataMapFactory extends 
DataMapFactory

carbondata git commit: [CARBONDATA-2972] Debug Logs and function added for Adaptive Encoding

2018-09-27 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 1b4109d5b -> 54bcf4963


[CARBONDATA-2972] Debug Logs and function added for Adaptive Encoding

Added a function to get the type of encoding used. Added the debug log for 
checking which type of encoding is used

This closes #2758


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/54bcf496
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/54bcf496
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/54bcf496

Branch: refs/heads/master
Commit: 54bcf49638262af82583d930632018da6c73c8c5
Parents: 1b4109d
Author: Manish Nalla 
Authored: Tue Sep 25 17:44:49 2018 +0530
Committer: manishgupta88 
Committed: Thu Sep 27 17:16:37 2018 +0530

--
 .../core/datastore/page/encoding/ColumnPageEncoder.java  | 8 
 .../org/apache/carbondata/processing/store/TablePage.java| 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/54bcf496/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
index 3067823..44e7192 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
@@ -78,6 +78,14 @@ public abstract class ColumnPageEncoder {
 }
   }
 
+  public Encoding getEncodingType() {
+List currEncodingList = getEncodingList();
+if (CarbonUtil.isEncodedWithMeta(currEncodingList)) {
+  return currEncodingList.get(0);
+}
+return null;
+  }
+
   /**
* Return a encoded column page by encoding the input page
* The encoded binary data and metadata are wrapped in encoding column page

http://git-wip-us.apache.org/repos/asf/carbondata/blob/54bcf496/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
index 791b4c6..82129db 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
@@ -424,7 +424,8 @@ public class TablePage {
   "Encoder result ---> Source data type: " + 
noDictDimensionPages[noDictIndex]
   .getDataType().getName() + " Destination data type: " + 
targetDataType
   .getName() + " for the column: " + 
noDictDimensionPages[noDictIndex]
-  .getColumnSpec().getFieldName());
+  .getColumnSpec().getFieldName() + " having encoding 
type: "
+  + columnPageEncoder.getEncodingType());
 }
   }
   noDictIndex++;



carbondata git commit: [CARBONDATA-2973] Added documentation for fallback condition for complex columns in local Dictionary

2018-09-26 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master a9ddfbd7b -> 3f99e9b7f


[CARBONDATA-2973] Added documentation for fallback condition for complex 
columns in local Dictionary

1. Added documentation for fallback condition for complex columns in local 
Dictionary
2. Added documentation for system level property" 
carbon.local.dictionary.decoder.fallback"

This closes #2766


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3f99e9b7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3f99e9b7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3f99e9b7

Branch: refs/heads/master
Commit: 3f99e9b7f87b387f03cb5bece2b2a8c5a50b
Parents: a9ddfbd
Author: praveenmeenakshi56 
Authored: Wed Sep 26 12:40:37 2018 +0530
Committer: manishgupta88 
Committed: Wed Sep 26 18:14:44 2018 +0530

--
 docs/configuration-parameters.md |  2 +-
 docs/ddl-of-carbondata.md| 16 +++-
 docs/faq.md  |  2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f99e9b7/docs/configuration-parameters.md
--
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index 7edae47..662525b 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -119,7 +119,7 @@ This section provides the details of all the configurations 
required for the Car
 
 | Parameter | Default Value | Description |
 
|--|---|---|
-| carbon.max.driver.lru.cache.size | -1 | Maximum memory **(in MB)** upto 
which the driver process can cache the data (BTree and dictionary values). 
Beyond this, least recently used data will be removed from cache before loading 
new set of values.Default value of -1 means there is no memory limit for 
caching. Only integer values greater than 0 are accepted.**NOTE:** Minimum 
number of entries that needs to be removed from cache in order to load the new 
set of data is determined and unloaded.ie.,for example if 3 cache entries 
qualify for pre-emption, out of these, those entries that free up more cache 
memory is removed prior to others. Please refer 
[FAQs](./faq.md#how-to-check-LRU-cache-memory-footprint) for checking LRU cache 
memory footprint. |
+| carbon.max.driver.lru.cache.size | -1 | Maximum memory **(in MB)** upto 
which the driver process can cache the data (BTree and dictionary values). 
Beyond this, least recently used data will be removed from cache before loading 
new set of values.Default value of -1 means there is no memory limit for 
caching. Only integer values greater than 0 are accepted.**NOTE:** Minimum 
number of entries that needs to be removed from cache in order to load the new 
set of data is determined and unloaded.ie.,for example if 3 cache entries 
qualify for pre-emption, out of these, those entries that free up more cache 
memory is removed prior to others. Please refer 
[FAQs](./faq.md#how-to-check-lru-cache-memory-footprint) for checking LRU cache 
memory footprint. |
 | carbon.max.executor.lru.cache.size | -1 | Maximum memory **(in MB)** upto 
which the executor process can cache the data (BTree and reverse dictionary 
values).Default value of -1 means there is no memory limit for caching. Only 
integer values greater than 0 are accepted.**NOTE:** If this parameter is not 
configured, then the value of ***carbon.max.driver.lru.cache.size*** will be 
used. |
 | max.query.execution.time | 60 | Maximum time allowed for one query to be 
executed. The value is in minutes. |
 | carbon.enableMinMax | true | CarbonData maintains the metadata which enables 
to prune unnecessary files from being scanned as per the query conditions.To 
achieve pruning, Min,Max of each column is maintined.Based on the filter 
condition in the query, certain data can be skipped from scanning by matching 
the filter value against the min,max values of the column(s) present in that 
carbondata file.This pruing enhances query performance significantly. |

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f99e9b7/docs/ddl-of-carbondata.md
--
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 2a467a2..5eeba86 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -231,7 +231,13 @@ CarbonData DDL statements are documented here,which 
includes:

* In case of multi-level complex dataType columns, primitive 
string/varchar/char columns are considered for local dictionary generation.
 
-   Local dictionary will have to be enabled explicitly during

[1/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types

2018-09-18 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 61fcdf286 -> c8f706304


http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java
 
b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java
index d3d538a..c4416d5 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java
@@ -18,6 +18,7 @@ package org.apache.carbondata.processing.sort.sortdata;
 
 import java.io.File;
 import java.io.Serializable;
+import java.util.Map;
 
 import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
@@ -88,6 +89,17 @@ public class SortParameters implements Serializable {
 
   private DataType[] measureDataType;
 
+  // no dictionary data types of the table
+  private DataType[] noDictDataType;
+
+  // no dictionary columns data types participating in sort
+  // used while writing the row to sort temp file where sort no dict columns 
are handled seperately
+  private DataType[] noDictSortDataType;
+
+  // no dictionary columns data types not participating in sort
+  // used while writing the row to sort temp file where nosort nodict columns 
are handled seperately
+  private DataType[] noDictNoSortDataType;
+
   /**
* To know how many columns are of high cardinality.
*/
@@ -111,6 +123,8 @@ public class SortParameters implements Serializable {
   private boolean[] noDictionaryDimnesionColumn;
 
   private boolean[] noDictionarySortColumn;
+
+  private boolean[] sortColumn;
   /**
* whether dimension is varchar data type.
* since all dimensions are string, we use an array of boolean instead of 
datatypes
@@ -142,11 +156,15 @@ public class SortParameters implements Serializable {
 parameters.databaseName = databaseName;
 parameters.tableName = tableName;
 parameters.measureDataType = measureDataType;
+parameters.noDictDataType = noDictDataType;
+parameters.noDictSortDataType = noDictSortDataType;
+parameters.noDictNoSortDataType = noDictNoSortDataType;
 parameters.noDictionaryCount = noDictionaryCount;
 parameters.partitionID = partitionID;
 parameters.segmentId = segmentId;
 parameters.taskNo = taskNo;
 parameters.noDictionaryDimnesionColumn = noDictionaryDimnesionColumn;
+parameters.sortColumn = sortColumn;
 parameters.isVarcharDimensionColumn = isVarcharDimensionColumn;
 parameters.noDictionarySortColumn = noDictionarySortColumn;
 parameters.numberOfSortColumns = numberOfSortColumns;
@@ -382,7 +400,10 @@ public class SortParameters implements Serializable {
 
 parameters.setNumberOfSortColumns(configuration.getNumberOfSortColumns());
 
parameters.setNumberOfNoDictSortColumns(configuration.getNumberOfNoDictSortColumns());
-setNoDictionarySortColumnMapping(parameters);
+parameters.setNoDictionarySortColumn(CarbonDataProcessorUtil
+
.getNoDictSortColMapping(configuration.getTableIdentifier().getDatabaseName(),
+configuration.getTableIdentifier().getTableName()));
+parameters.setSortColumn(configuration.getSortColumnMapping());
 parameters.setObserver(new SortObserver());
 // get sort buffer size
 parameters.setSortBufferSize(Integer.parseInt(carbonProperties
@@ -431,6 +452,14 @@ public class SortParameters implements Serializable {
 
 DataType[] measureDataType = configuration.getMeasureDataType();
 parameters.setMeasureDataType(measureDataType);
+parameters.setNoDictDataType(CarbonDataProcessorUtil
+
.getNoDictDataTypes(configuration.getTableIdentifier().getDatabaseName(),
+configuration.getTableIdentifier().getTableName()));
+Map noDictSortAndNoSortDataTypes = 
CarbonDataProcessorUtil
+
.getNoDictSortAndNoSortDataTypes(configuration.getTableIdentifier().getDatabaseName(),
+configuration.getTableIdentifier().getTableName());
+
parameters.setNoDictSortDataType(noDictSortAndNoSortDataTypes.get("noDictSortDataTypes"));
+
parameters.setNoDictNoSortDataType(noDictSortAndNoSortDataTypes.get("noDictNoSortDataTypes"));
 return parameters;
   }
 
@@ -442,28 +471,10 @@ public class SortParameters implements Serializable {
 this.rangeId = rangeId;
   }
 
-  /**
-   * this method will set the boolean mapping for no dictionary sort columns
-   *
-   * @param parameters
-   */
-  private static void setNoDictionarySortColumnMapping(SortParameters 
parameters) {
-if (parameters.getNumberOfSortColumns() == 
parameters.getNoDictionaryDimnesionColumn().length) {
-  

[3/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types

2018-09-18 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java
index cc044cc..f232652 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java
@@ -21,8 +21,8 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage;
 import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort;
-import org.apache.carbondata.core.datastore.columnar.IndexStorage;
 import org.apache.carbondata.core.datastore.compression.Compressor;
 import org.apache.carbondata.core.datastore.compression.CompressorFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
@@ -46,7 +46,7 @@ public class ComplexDimensionIndexCodec extends 
IndexStorageCodec {
 return new IndexStorageEncoder() {
   @Override
   void encodeIndexStorage(ColumnPage inputPage) {
-IndexStorage indexStorage =
+BlockIndexerStorage indexStorage =
 new BlockIndexerStorageForShort(inputPage.getByteArrayPage(), 
false, false, false);
 byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage());
 Compressor compressor = CompressorFactory.getInstance().getCompressor(

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java
index 66f5f1d..f3475fd 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java
@@ -21,9 +21,9 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage;
 import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort;
 import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort;
-import org.apache.carbondata.core.datastore.columnar.IndexStorage;
 import org.apache.carbondata.core.datastore.compression.Compressor;
 import org.apache.carbondata.core.datastore.compression.CompressorFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
@@ -47,7 +47,7 @@ public class DictDimensionIndexCodec extends 
IndexStorageCodec {
 return new IndexStorageEncoder() {
   @Override
   void encodeIndexStorage(ColumnPage inputPage) {
-IndexStorage indexStorage;
+BlockIndexerStorage indexStorage;
 byte[][] data = inputPage.getByteArrayPage();
 if (isInvertedIndex) {
   indexStorage = new BlockIndexerStorageForShort(data, true, false, 
isSort);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java
index a130cbd..15827f8 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java
@@ -21,9 +21,9 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage;
 import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort;
 import 
org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort;
-import org.apache.carbondata.core.datastore.columnar.IndexStorage;
 import 

[4/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types

2018-09-18 Thread manishgupta88
[CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types

Loading configurations and settings
(1) Parse data as like that of measure, so change in FieldEncoderFactory to 
take up measure flow
(2) While creating loading configurations, no dictionary, sort columns should 
be taken care in all the needed flows

Sort rows preparation
(1) Prepare the row to be sorted with original data for no dictionary columns
(2) Use data type based comparators for the no dictionary sort columns in all 
the flows like Intermediate Sort, Final sort, Unsafe sort
(3) Handle read write of row with no dictionary primitive data types to 
intermediate files and in the final file merger, as we will be reading and 
writing as original data
(4) Get the no dictionary sort data types from the load configurations what we 
set in LOAD step

Adding to Column page and apply adaptive encoding
(1) Add the no dictionary primitive datatypes data as original data
(2) Apply adaptive encoding to the page
(3) Reuse the adaptive encoding techniques existing for measure column

Writing inverted index to adaptive encoded page
(1) Prepare in the inverted inverted list based on the datatype based comparison
(2) Apply RLE on the inverted index
(3) Write the inverted index to the encoded page

Create decoder while querying
(1) Create proper decoder for the no dictionary column pages
(2) Uncompress the column page and also the inverted index

Filter flow changes
(1) FilterValues will be in bytes, so convert the data to bytes for comparison
(2) Change the isScanRequired to compare min/max values based on the data type

Fill output row in case of queries
(1) Change the noDictionaryKeys to Object, now it can be datatypes based data 
for no dictionary primitive data types

Bloom filter changes
(1) Change bloom filter load
(2) While rebuilding the data map, the load expects the data to original data. 
Therefore a conversion is used
(3) Fill the no dictionary primitive data as original data

Compaction Changes
Compaction will get the rows from the result collectors. But the result 
collectors will give bytes as no dictionary columns.
So a conversion is needed to convert the bytes to original data based on the 
data type.

This closes #2654


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c8f70630
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c8f70630
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c8f70630

Branch: refs/heads/master
Commit: c8f7063048115d161de539cf277cc1ccb015159b
Parents: 61fcdf2
Author: dhatchayani 
Authored: Wed Aug 22 12:45:44 2018 +0530
Committer: manishgupta88 
Committed: Tue Sep 18 19:12:56 2018 +0530

--
 .../carbondata/core/datastore/TableSpec.java|  17 +
 ...mpressedDimensionChunkFileBasedReaderV3.java |  42 +-
 .../chunk/store/ColumnPageWrapper.java  | 106 -
 ...feVariableLengthDimensionDataChunkStore.java |  28 +-
 .../datastore/columnar/BlockIndexerStorage.java | 104 +
 .../BlockIndexerStorageForNoDictionary.java | 116 ++
 ...ndexerStorageForNoInvertedIndexForShort.java |  17 +-
 .../columnar/BlockIndexerStorageForShort.java   |  71 +---
 .../ColumnWithRowIdForNoDictionary.java |  72 
 .../core/datastore/columnar/IndexStorage.java   |  35 --
 .../page/encoding/ColumnPageEncoder.java|  48 ++-
 .../page/encoding/DefaultEncodingFactory.java   |  70 +++-
 .../page/encoding/EncodingFactory.java  |  18 +-
 .../page/encoding/adaptive/AdaptiveCodec.java   | 195 -
 .../adaptive/AdaptiveDeltaFloatingCodec.java|  31 +-
 .../adaptive/AdaptiveDeltaIntegralCodec.java|  30 +-
 .../adaptive/AdaptiveFloatingCodec.java |  30 +-
 .../adaptive/AdaptiveIntegralCodec.java |  30 +-
 .../legacy/ComplexDimensionIndexCodec.java  |   4 +-
 .../legacy/DictDimensionIndexCodec.java |   4 +-
 .../legacy/DirectDictDimensionIndexCodec.java   |   4 +-
 .../legacy/HighCardDictDimensionIndexCodec.java |   4 +-
 .../dimension/legacy/IndexStorageEncoder.java   |   8 +-
 .../core/datastore/page/key/TablePageKey.java   |   3 +-
 .../page/statistics/TablePageStatistics.java|  14 +-
 .../core/datastore/row/WriteStepRowUtil.java|  28 +-
 .../core/scan/executor/util/QueryUtil.java  |  36 ++
 .../carbondata/core/scan/filter/FilterUtil.java |  42 +-
 .../executer/ExcludeFilterExecuterImpl.java |   2 +-
 .../executer/IncludeFilterExecuterImpl.java |  58 ++-
 .../executer/RangeValueFilterExecuterImpl.java  |  39 +-
 .../executer/RestructureEvaluatorImpl.java  |   4 +-
 .../executer/RowLevelFilterExecuterImpl.java|  10 +-
 .../RowLevelRangeGrtThanFiterExecuterImpl.java  |  55 ++-
 ...elRangeGrtrThanEquaToFilterExecuterImpl.java |  55 ++-
 ...velRangeLessThanEqualFilterExecuterImpl.java |  53 ++-
 ...RowLevelRangeLessThanFilterExecuterImpl.java |  53

[2/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types

2018-09-18 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java
--
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java
index 29e3060..29a4098 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java
@@ -27,6 +27,7 @@ import 
org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
 
 /**
  * Implementation for BloomFilter DataMap to rebuild the datamap for main 
table with existing data
@@ -61,8 +62,12 @@ public class BloomDataMapBuilder extends 
AbstractBloomDataMapWriter implements D
   }
 
   @Override
-  protected byte[] convertNonDictionaryValue(int indexColIdx, byte[] value) {
-return value;
+  protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) {
+// no dictionary measure columns will be of original data, so convert it 
to bytes
+if 
(DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) {
+  return 
CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value);
+}
+return (byte[]) value;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java
--
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java
index cad9787..61bd036 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java
@@ -29,6 +29,7 @@ import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
 
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.collections.Predicate;
@@ -73,11 +74,14 @@ public class BloomDataMapWriter extends 
AbstractBloomDataMapWriter {
 }
   }
 
-  protected byte[] convertNonDictionaryValue(int indexColIdx, byte[] value) {
+  protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) {
 if (DataTypes.VARCHAR == indexColumns.get(indexColIdx).getDataType()) {
-  return DataConvertUtil.getRawBytesForVarchar(value);
+  return DataConvertUtil.getRawBytesForVarchar((byte[]) value);
+} else if 
(DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) {
+  // get bytes for the original value of the no dictionary column
+  return 
CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value);
 } else {
-  return DataConvertUtil.getRawBytes(value);
+  return DataConvertUtil.getRawBytes((byte[]) value);
 }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
--
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
index 7cd241a..5525941 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
@@ -245,7 +245,7 @@ public class StoreCreator {
 date.setEncodingList(encodings);
 date.setColumnUniqueId(UUID.randomUUID().toString());
 date.setDimensionColumn(true);
-date.setColumnReferenceId(id.getColumnUniqueId());
+date.setColumnReferenceId(date.getColumnUniqueId());
 date.setSchemaOrdinal(schemaOrdinal++);
 if (sortColumns.contains(date.getColumnName())) {
   date.setSortColumn(true);
@@ -263,7 +263,7 @@ public class StoreCreator {
 if (sortColumns.contains(country.getColumnName())) {
   country.setSortColumn(true);
 }
-country.setColumnReferenceId(id.getColumnUniqueId());
+country.setColumnReferenceId(country.getColumnUniqueId());
 columnSchemas.add(country);
 
 

carbondata git commit: [HOTFIX] Fixed 2.3 CI

2018-09-14 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master ac79a343f -> 2fb7dc9a7


[HOTFIX] Fixed 2.3 CI

Fixed following issues:

1. FIxed lz4 jar issue by excluding from kafka dependency
2. Fixed constructor not found for reset command.
3. Removed warn logger for sparkcontext to reduce logs in CI.

This closes #2716


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2fb7dc9a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2fb7dc9a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2fb7dc9a

Branch: refs/heads/master
Commit: 2fb7dc9a7831b0d092b98d6716b9e065bd859fe1
Parents: ac79a34
Author: ravipesala 
Authored: Fri Sep 14 17:38:30 2018 +0530
Committer: manishgupta88 
Committed: Fri Sep 14 20:10:58 2018 +0530

--
 core/pom.xml|  6 ++---
 datamap/mv/plan/pom.xml |  6 -
 examples/spark2/pom.xml | 15 +--
 .../sdv/generated/SetParameterTestCase.scala|  2 ++
 integration/spark-common-test/pom.xml   | 26 
 ...eneFineGrainDataMapWithSearchModeSuite.scala |  1 -
 .../preaggregate/TestPreAggregateLoad.scala | 17 +
 .../preaggregate/TestPreAggregateMisc.scala |  1 +
 ...tSparkCarbonFileFormatWithSparkSession.scala |  2 --
 .../detailquery/SearchModeTestCase.scala|  1 -
 integration/spark-datasource/pom.xml|  9 ---
 integration/spark2/pom.xml  | 21 +---
 .../commands/SetCommandTestCase.scala   |  1 +
 pom.xml |  6 +
 14 files changed, 37 insertions(+), 77 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 51c603e..a7d6f4d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -114,9 +114,9 @@
   4.0.42.Final
 
 
-  net.jpountz.lz4
-  lz4
-  1.3.0
+  org.lz4
+  lz4-java
+  1.4.0
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/datamap/mv/plan/pom.xml
--
diff --git a/datamap/mv/plan/pom.xml b/datamap/mv/plan/pom.xml
index 982724d..ff6976d 100644
--- a/datamap/mv/plan/pom.xml
+++ b/datamap/mv/plan/pom.xml
@@ -48,12 +48,6 @@
   org.apache.spark
   spark-core_${scala.binary.version}
   ${spark.version}
-  
-
-  net.jpountz.lz4
-  lz4
-
-  
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/examples/spark2/pom.xml
--
diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml
index bd497c5..aa7b7c5 100644
--- a/examples/spark2/pom.xml
+++ b/examples/spark2/pom.xml
@@ -56,6 +56,12 @@
 
   org.apache.spark
   spark-sql-kafka-0-10_${scala.binary.version}
+  
+
+  net.jpountz.lz4
+  lz4
+
+  
 
 
   org.apache.spark
@@ -91,15 +97,6 @@
   org.apache.carbondata
   carbondata-core
   ${project.version}
-  
-
-
-  net.jpountz.lz4
-  lz4
-
-  
 
   
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala
--
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala
index f3622dc..8c336d8 100644
--- 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala
@@ -40,6 +40,7 @@ class SetParameterTestCase extends QueryTest with 
BeforeAndAfterAll {
 sql("drop table if exists carbon_table_single_pass")
 sql("drop table if exists carbon_table_disable_bad_record_logger")
 sql("drop table if exists carbon_table_load")
+sqlContext.sparkSession.catalog.clearCache()
 sql("RESET")
   }
 
@@ -155,6 +156,7 @@ class SetParameterTestCase extends QueryTest with 
BeforeAndAfterAll {
 
   test("TC_007-test SET property IS__EMPTY_DATA_BAD_RECORD=FALSE") {
 sql("drop table if exists emptyColumnValues")
+sqlContext.sp

carbondata git commit: [CARBONDATA-2876]Fix Avro decimal datatype with precision and scale

2018-09-10 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 0483b46e9 -> 9ebab5748


[CARBONDATA-2876]Fix Avro decimal datatype with precision and scale

1.Add precision and scale for fieldvalue for Avro Decimal logical type.
2.If Avro schema is of union type with multiple record or multiple enum, then 
add check for schema.

This closes #2687


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9ebab574
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9ebab574
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9ebab574

Branch: refs/heads/master
Commit: 9ebab5748969398cf12969eedd4701c30bc028cd
Parents: 0483b46
Author: Indhumathi27 
Authored: Mon Sep 3 17:35:01 2018 +0530
Committer: manishgupta88 
Committed: Mon Sep 10 12:14:24 2018 +0530

--
 ...ansactionalCarbonTableWithAvroDataType.scala | 470 ++-
 .../carbondata/sdk/file/AvroCarbonWriter.java   |  77 ++-
 2 files changed, 505 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ebab574/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
index 29aa2de..dc13b16 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
@@ -18,8 +18,14 @@
 package org.apache.carbondata.spark.testsuite.createTable
 
 import java.io.File
+import java.nio.ByteBuffer
+import javax.xml.bind.DatatypeConverter
+
 import scala.collection.mutable
 
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.{LogicalTypes, Schema}
+import org.apache.avro.generic.GenericData
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
@@ -46,6 +52,8 @@ class TestNonTransactionalCarbonTableWithAvroDataType extends 
QueryTest with Bef
 
   writerPath = writerPath.replace("\\", "/")
 
+  val decimalConversion = new DecimalConversion
+
   override def beforeAll(): Unit = {
 sql("DROP TABLE IF EXISTS sdkOutputTable")
 CarbonProperties.getInstance()
@@ -678,7 +686,7 @@ class TestNonTransactionalCarbonTableWithAvroDataType 
extends QueryTest with Bef
 |  "name": "StudentActivity",
 |  "fields": [
 |  {
-|"name": "enum_field", "type": [{
+|"name": "union_field", "type": [{
 |  "namespace": "org.example.avro",
 |  "name": "dec",
 |  "type": "bytes",
@@ -689,15 +697,27 @@ class TestNonTransactionalCarbonTableWithAvroDataType 
extends QueryTest with Bef
 |  }]
 |}""".stripMargin
 
-val json1 =
-  """{"enum_field":{"bytes":"1010"}}""".stripMargin
-
 val nn = new org.apache.avro.Schema.Parser().parse(schema1)
+val decimalConversion = new DecimalConversion
+val logicalType = LogicalTypes.decimal(10, 2)
+val decimal = new java.math.BigDecimal("1010").setScale(2)
+//get unscaled 2's complement bytearray
+val bytes =
+  decimalConversion.toBytes(decimal, nn.getField("union_field").schema, 
logicalType)
+val data = DatatypeConverter.printBase64Binary(bytes.array())
+val json1 =
+  s"""{"union_field":{"bytes":"$data"}}""".stripMargin
 val record = testUtil.jsonToAvro(json1, schema1)
+val data1 = new String(record.get(0).asInstanceOf[ByteBuffer].array(),
+  CarbonCommonConstants.DEFAULT_CHARSET_CLASS)
+val bytes1 = ByteBuffer.wrap(DatatypeConverter.parseBase64Binary(data1))
+val avroRec = new GenericData. Record(nn)
+avroRec.put("union_field", bytes1)
+
 
 val writer = CarbonWriter.builder
   
.outputPath(writerPath).isTransactionalTable(false).buildWriterForAvroInput(nn)
-writer.write(record)
+writer.write(avroRec)
   

[2/2] carbondata git commit: [CARBONDATA-2910] Support backward compatability in fileformat and added tests for load with different sort orders

2018-09-07 Thread manishgupta88
[CARBONDATA-2910] Support backward compatability in fileformat and added tests 
for load with different sort orders

1. The data loaded by old version with all dictionary exclude can now work with 
fileformat if the segment folder is given for reading.
2. Now user can specify different sort options per load while loading data 
through sdk, fileformat can read now.

This closes #2685


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3894e1d0
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3894e1d0
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3894e1d0

Branch: refs/heads/master
Commit: 3894e1d050cc39959a6445f97a7850ac922b7bd8
Parents: b6bd90d
Author: ravipesala 
Authored: Thu Aug 30 20:41:06 2018 +0530
Committer: manishgupta88 
Committed: Fri Sep 7 20:17:44 2018 +0530

--
 .../carbondata/core/datamap/TableDataMap.java   |  34 
 .../carbondata/core/datamap/dev/DataMap.java|  13 +-
 .../dev/cgdatamap/CoarseGrainDataMap.java   |  12 ++
 .../datamap/dev/fgdatamap/FineGrainDataMap.java |  12 ++
 .../indexstore/blockletindex/BlockDataMap.java  |  27 +++-
 .../blockletindex/BlockletDataMapFactory.java   |   3 +-
 .../core/metadata/schema/table/CarbonTable.java |  11 +-
 .../executor/impl/AbstractQueryExecutor.java|  97 
 .../core/scan/executor/util/QueryUtil.java  |  19 +++
 .../core/scan/expression/ColumnExpression.java  |   7 +
 .../carbondata/core/scan/model/QueryModel.java  |  64 ++--
 .../core/scan/model/QueryModelBuilder.java  |  21 ++-
 .../util/AbstractDataFileFooterConverter.java   |  12 ++
 .../core/util/BlockletDataMapUtil.java  |  13 +-
 .../hadoop/api/CarbonFileInputFormat.java   |  11 +-
 .../hadoop/api/CarbonInputFormat.java   |  26 ++--
 .../hadoop/api/CarbonTableInputFormat.java  |  23 ++-
 .../hadoop/testutil/StoreCreator.java   | 101 +---
 .../hadoop/ft/CarbonTableInputFormatTest.java   |  27 ++--
 .../hadoop/ft/CarbonTableOutputFormatTest.java  |   3 +-
 ...ithColumnMetCacheAndCacheLevelProperty.scala |   4 +-
 .../TestNonTransactionalCarbonTable.scala   |  19 +--
 .../execution/datasources/CarbonFileIndex.scala |  10 +-
 .../datasources/SparkCarbonFileFormat.scala |  14 +-
 .../datasource/SparkCarbonDataSourceTest.scala  | 156 ++-
 .../streaming/CarbonStreamOutputFormatTest.java |   3 +-
 26 files changed, 617 insertions(+), 125 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3894e1d0/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java
index aed8c60..a272777 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java
@@ -36,6 +36,7 @@ import org.apache.carbondata.core.indexstore.PartitionSpec;
 import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
+import org.apache.carbondata.core.scan.expression.Expression;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
 import org.apache.carbondata.events.Event;
 import org.apache.carbondata.events.OperationContext;
@@ -79,6 +80,39 @@ public final class TableDataMap extends 
OperationEventListener {
 return blockletDetailsFetcher;
   }
 
+
+  /**
+   * Pass the valid segments and prune the datamap using filter expression
+   *
+   * @param segments
+   * @param filterExp
+   * @return
+   */
+  public List prune(List segments, Expression 
filterExp,
+  List partitions) throws IOException {
+List blocklets = new ArrayList<>();
+SegmentProperties segmentProperties;
+Map> dataMaps = 
dataMapFactory.getDataMaps(segments);
+for (Segment segment : segments) {
+  List pruneBlocklets = new ArrayList<>();
+  // if filter is not passed then return all the blocklets
+  if (filterExp == null) {
+pruneBlocklets = blockletDetailsFetcher.getAllBlocklets(segment, 
partitions);
+  } else {
+segmentProperties = 
segmentPropertiesFetcher.getSegmentProperties(segment);
+for (DataMap dataMap : dataMaps.get(segment)) {
+
+  pruneBlocklets
+  .addAll(dataMap.prune(filterExp, segmentProperties, partitions, 
identifier));
+}
+  }
+  blocklets.addAll(addSegmentId(
+  blockletDetailsFetcher.getExtendedBlocklets(pruneBlocklets, segment),
+  s

[1/2] carbondata git commit: [CARBONDATA-2910] Support backward compatability in fileformat and added tests for load with different sort orders

2018-09-07 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master b6bd90d80 -> 3894e1d05


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3894e1d0/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
--
diff --git 
a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
 
b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
index 837bc4f..dcc76d8 100644
--- 
a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
+++ 
b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
@@ -17,6 +17,11 @@
 package org.apache.spark.sql.carbondata.datasource
 
 
+import java.io.File
+import java.util
+
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.carbondata.datasource.TestUtil._
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
@@ -24,6 +29,9 @@ import org.scalatest.{BeforeAndAfterAll, FunSuite}
 import org.apache.carbondata.core.datamap.DataMapStoreManager
 import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.hadoop.testutil.StoreCreator
+import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema}
 
 class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
 
@@ -346,7 +354,7 @@ class SparkCarbonDataSourceTest extends FunSuite with 
BeforeAndAfterAll {
 df.write.format("carbon").save(warehouse1 + "/test_folder/")
 if (!spark.sparkContext.version.startsWith("2.1")) {
   spark
-.sql(s"create table test123 (c1 string, c2 string, arrayc array, 
structc struct<_1:string, _2:decimal(38,18)>, shortc smallint,intc int, longc 
bigint,  doublec double, bigdecimalc decimal(38,18)) using carbon location 
'$warehouse1/test_folder/'")
+.sql(s"create table test123 (c1 string, c2 string, shortc 
smallint,intc int, longc bigint,  doublec double, bigdecimalc decimal(38,18), 
arrayc array, structc struct<_1:string, _2:decimal(38,18)>) using carbon 
location '$warehouse1/test_folder/'")
 
   checkAnswer(spark.sql("select * from test123"),
 spark.read.format("carbon").load(warehouse1 + "/test_folder/"))
@@ -613,6 +621,152 @@ class SparkCarbonDataSourceTest extends FunSuite with 
BeforeAndAfterAll {
   
FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(warehouse1 + 
"/test_folder"))
 }
   }
+
+  test("test read using old data") {
+val store = new StoreCreator(new File(warehouse1).getAbsolutePath,
+  new File(warehouse1 + 
"../../../../../hadoop/src/test/resources/data.csv").getCanonicalPath,
+  false)
+store.createCarbonStore()
+FileFactory.deleteAllFilesOfDir(new 
File(warehouse1+"/testdb/testtable/Fact/Part0/Segment_0/0"))
+val dfread = 
spark.read.format("carbon").load(warehouse1+"/testdb/testtable/Fact/Part0/Segment_0")
+dfread.show(false)
+spark.sql("drop table if exists parquet_table")
+  }
+
+  test("test read using different sort order data") {
+if (!spark.sparkContext.version.startsWith("2.1")) {
+  spark.sql("drop table if exists old_comp")
+  FileFactory.deleteAllFilesOfDir(new File(warehouse1 + "/testdb"))
+  val store = new StoreCreator(new File(warehouse1).getAbsolutePath,
+new File(warehouse1 + 
"../../../../../hadoop/src/test/resources/data.csv").getCanonicalPath,
+false)
+  store.setSortColumns(new util.ArrayList[String](Seq("name").asJava))
+  var model = store.createTableAndLoadModel(false)
+  model.setSegmentId("0")
+  store.createCarbonStore(model)
+  FileFactory.deleteAllFilesOfDir(new File(warehouse1 + 
"/testdb/testtable/Fact/Part0/Segment_0/0"))
+  store.setSortColumns(new 
util.ArrayList[String](Seq("country,phonetype").asJava))
+  model = store.createTableAndLoadModel(false)
+  model.setSegmentId("1")
+  store.createCarbonStore(model)
+  FileFactory.deleteAllFilesOfDir(new File(warehouse1 + 
"/testdb/testtable/Fact/Part0/Segment_1/0"))
+  store.setSortColumns(new util.ArrayList[String](Seq("date").asJava))
+  model = store.createTableAndLoadModel(false)
+  model.setSegmentId("2")
+  store.createCarbonStore(model)
+  FileFactory.deleteAllFilesOfDir(new File(warehouse1 + 
"/testdb/testtable/Fact/Part0/Segment_2/0"))
+  store.setSortColumns(new 
util.ArrayList[String](Seq("serialname").asJava))
+  model = store.createTableAndLoadModel(false)
+  model.setSegmentId("3")
+  store.createCarbonStore(model)
+  FileFactory.deleteAllFilesOfDir(new File(warehouse1 + 

carbondata git commit: [CARBONDATA-2876]AVRO datatype support through SDK

2018-09-06 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 67a8a37bf -> b6bd90d80


[CARBONDATA-2876]AVRO datatype support through SDK

This PR supports following Avro DataTypes to carbon format through SDK. Avro 
datatypes include,

1. Avro Logical type TimeMillis
2. Avro Logical type TimeMicros

This closes #2694


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b6bd90d8
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b6bd90d8
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b6bd90d8

Branch: refs/heads/master
Commit: b6bd90d80106a3a4f7e24fc6bf63aa2ac135f2fc
Parents: 67a8a37
Author: Indhumathi27 
Authored: Mon Sep 3 10:17:20 2018 +0530
Committer: manishgupta88 
Committed: Fri Sep 7 10:44:16 2018 +0530

--
 ...ansactionalCarbonTableWithAvroDataType.scala | 94 
 .../carbondata/sdk/file/AvroCarbonWriter.java   | 24 ++---
 2 files changed, 106 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b6bd90d8/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
index 7616ea3..29aa2de 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
@@ -790,4 +790,98 @@ class TestNonTransactionalCarbonTableWithAvroDataType 
extends QueryTest with Bef
 checkExistence(sql("select * from sdkOutputTable"), true, "32.0")
   }
 
+  test("test logical type time-millis") {
+sql("drop table if exists sdkOutputTable")
+
FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(writerPath))
+val schema1 =
+  """{
+|  "namespace": "com.apache.schema",
+|  "type": "record",
+|  "name": "StudentActivity",
+|  "fields": [
+|  {
+|  "name": "id",
+|  "type": {"type" : 
"int", "logicalType": "time-millis"}
+|  },
+|  {
+|  "name": "course_details",
+|  "type": {
+|  "name": "course_details",
+|  "type": "record",
+|  "fields": [
+|  {
+|  "name": 
"course_struct_course_time",
+|  "type": {"type" : 
"int", "logicalType": "time-millis"}
+|  }
+|  ]
+|  }
+|  }
+|  ]
+|}""".stripMargin
+
+val json1 =
+  """{"id": 172800,"course_details": { 
"course_struct_course_time":172800}}""".stripMargin
+
+val nn = new org.apache.avro.Schema.Parser().parse(schema1)
+val record = testUtil.jsonToAvro(json1, schema1)
+
+
+val writer = CarbonWriter.builder
+  
.outputPath(writerPath).isTransactionalTable(false).buildWriterForAvroInput(nn)
+writer.write(record)
+writer.close()
+sql(
+  s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY
+ |'carbondata' LOCATION
+ |'$writerPath' """.stripMargin)
+checkAnswer(sql("select * from sdkOutputTable"), Seq(Row(172800, 
Row(172800
+  }
+
+  test("test logical type time-micros") {
+sql("drop table if exists sdkOutputTable")
+
FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(writerPath))
+val schema1 =
+  """{
+|  "namespace": &

carbondata git commit: [CARBONDATA-2876]Support Avro datatype conversion through SDK

2018-08-31 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master f012f5b13 -> b588cb655


[CARBONDATA-2876]Support Avro datatype conversion through SDK

This PR supports following Avro DataTypes to carbon format through SDK. Avro 
datatypes include,
1. Avro Union
2. Avro Enum
3. Avro Logical type Decimal

This closes #2671


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b588cb65
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b588cb65
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b588cb65

Branch: refs/heads/master
Commit: b588cb65564d26cdf55da7482ae7b1ee79173067
Parents: f012f5b
Author: Indhumathi27 
Authored: Thu Aug 30 14:50:06 2018 +0530
Committer: manishgupta88 
Committed: Fri Aug 31 14:41:56 2018 +0530

--
 ...ansactionalCarbonTableWithAvroDataType.scala | 793 +++
 .../carbondata/sdk/file/AvroCarbonWriter.java   | 331 +++-
 2 files changed, 1088 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b588cb65/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
new file mode 100644
index 000..b50407c
--- /dev/null
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala
@@ -0,0 +1,793 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.createTable
+
+import java.io.File
+import scala.collection.mutable
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.sdk.file.CarbonWriter
+
+/**
+ * Test class for Avro supported data types through SDK
+ */
+class TestNonTransactionalCarbonTableWithAvroDataType extends QueryTest with 
BeforeAndAfterAll {
+
+
+  val badRecordAction = CarbonProperties.getInstance()
+.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
+  var writerPath = new File(this.getClass.getResource("/").getPath
++
+"../." +
+"./target/SparkCarbonFileFormat/WriterOutput/")
+.getCanonicalPath
+
+  writerPath = writerPath.replace("\\", "/")
+
+  override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "force")
+  }
+
+  override def afterAll(): Unit = {
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, 
badRecordAction)
+  }
+
+  test("test enum") {
+sql("drop table if exists sdkOutputTable")
+
FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(writerPath))
+val schema1 =
+  """{
+|  "namespace": "com.apache.schema",
+|  "type": "record",
+|  "name": "StudentActivity",
+| "fields":
+| [{
+| "name": "id",
+| 

carbondata git commit: [HOTFIX] Support TableProperties Map API for SDK

2018-08-24 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 6029b2800 -> 137245057


[HOTFIX] Support TableProperties Map API for SDK

Currently SDK supports load options as map input. But table properties is not a 
map.
So this PR supports a API that can take already supported table properties as 
map.

This is will help for easy configuration for end user of SDK.
Also later if new table properties added.
No need to create separate API to support that.

This closes #2651


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/13724505
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/13724505
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/13724505

Branch: refs/heads/master
Commit: 13724505759a9b471ce78c1196bfa5337516e6e1
Parents: 6029b28
Author: ajantha-bhat 
Authored: Thu Aug 23 15:39:29 2018 +0530
Committer: manishgupta88 
Committed: Fri Aug 24 18:29:55 2018 +0530

--
 docs/sdk-guide.md   | 18 +++
 .../TestNonTransactionalCarbonTable.scala   | 10 ++--
 .../sdk/file/CarbonWriterBuilder.java   | 52 +++-
 3 files changed, 76 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/13724505/docs/sdk-guide.md
--
diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md
index e592aa5..8120efa 100644
--- a/docs/sdk-guide.md
+++ b/docs/sdk-guide.md
@@ -351,6 +351,24 @@ public CarbonWriterBuilder withLoadOptions(Map options);
 
 ```
 /**
+ * To support the table properties for sdk writer
+ *
+ * @param options key,value pair of create table properties.
+ * supported keys values are
+ * a. blocksize -- [1-2048] values in MB. Default value is 1024
+ * b. blockletsize -- values in MB. Default value is 64 MB
+ * c. localDictionaryThreshold -- positive value, default is 1
+ * d. enableLocalDictionary -- true / false. Default is false
+ * e. sortcolumns -- comma separated column. "c1,c2". Default all dimensions 
are sorted.
+ *
+ * @return updated CarbonWriterBuilder
+ */
+public CarbonWriterBuilder withTableProperties(Map options);
+```
+
+
+```
+/**
 * Build a {@link CarbonWriter}, which accepts row in CSV format object
 * @param schema carbon Schema object {org.apache.carbondata.sdk.file.Schema}
 * @return CSVCarbonWriter

http://git-wip-us.apache.org/repos/asf/carbondata/blob/13724505/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 39f6ddc..b08a8dd 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -33,8 +33,8 @@ import org.apache.avro.file.DataFileWriter
 import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, 
GenericRecord}
 import org.apache.avro.io.{DecoderFactory, Encoder}
 import org.apache.commons.io.FileUtils
-import org.apache.spark.sql.{CarbonEnv, Row}
 import org.apache.spark.sql.test.util.QueryTest
+import org.apache.spark.sql.{CarbonEnv, Row}
 import org.junit.Assert
 import org.scalatest.BeforeAndAfterAll
 
@@ -2386,9 +2386,13 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 
   test("test LocalDictionary with custom Threshold") {
 FileUtils.deleteDirectory(new File(writerPath))
+val tablePropertiesMap: util.Map[String, String] =
+  Map("blocksize" -> "12",
+"sortcolumns" -> "name",
+"localDictionaryThreshold" -> "200",
+"enableLocalDictionary" -> "true").asJava
 val builder = CarbonWriter.builder.isTransactionalTable(false)
-  
.sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true)
-  .localDictionaryThreshold(200)
+  .withTableProperties(tablePropertiesMap)
   
.uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath)
 generateCarbonData(builder)
 assert(FileFactory.getCarbonFile(writerPath).exists())

http://git-wip-us.apache.org/repos/asf/carbondata/blob/13724505/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder

carbondata git commit: [CARBONDATA-2829][CARBONDATA-2832] Fix creating merge index on older V1 V2 store

2018-08-07 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 40571b846 -> b702a1b01


[CARBONDATA-2829][CARBONDATA-2832] Fix creating merge index on older V1 V2 store

Block merge index creation for the old store V1 V2 versions

This closes #2608


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b702a1b0
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b702a1b0
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b702a1b0

Branch: refs/heads/master
Commit: b702a1b01414308de710c1d1471a064184843c37
Parents: 40571b8
Author: dhatchayani 
Authored: Mon Aug 6 12:15:26 2018 +0530
Committer: manishgupta88 
Committed: Tue Aug 7 14:10:44 2018 +0530

--
 .../management/CarbonAlterTableCompactionCommand.scala| 10 ++
 1 file changed, 10 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b702a1b0/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala
index a4adbbb..e0b0547 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala
@@ -37,6 +37,7 @@ import 
org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.exception.ConcurrentOperationException
 import org.apache.carbondata.core.locks.{CarbonLockFactory, LockUsage}
+import org.apache.carbondata.core.metadata.ColumnarFormatVersion
 import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, 
TableInfo}
 import org.apache.carbondata.core.mutate.CarbonUpdateUtil
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager
@@ -122,6 +123,15 @@ case class CarbonAlterTableCompactionCommand(
   "Unsupported alter operation on carbon table: Merge index is not 
supported on streaming" +
   " table")
   }
+  val version = CarbonUtil.getFormatVersion(table)
+  val isOlderVersion = version == ColumnarFormatVersion.V1 ||
+   version == ColumnarFormatVersion.V2
+  if (isOlderVersion) {
+throw new MalformedCarbonCommandException(
+  "Unsupported alter operation on carbon table: Merge index is not 
supported on V1 V2 " +
+  "store segments")
+  }
+
   val alterTableMergeIndexEvent: AlterTableMergeIndexEvent =
 AlterTableMergeIndexEvent(sparkSession, table, alterTableModel)
   OperationListenerBus.getInstance



carbondata git commit: [CARBONDATA-2813] Fixed code to get data size from LoadDetails if size is written there

2018-08-02 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master f2e898ac5 -> 38384cb9f


[CARBONDATA-2813] Fixed code to get data size from LoadDetails if size is 
written there

Problem:
In 1.3.x when index files are merged to form mergeindex file a mapping of which 
index files if merged to which mergeindex is kept in the segments
file. In 1.4.x both the index and merge index files are scanned to calculate 
the size of segments for major compaction. As the index file was
deleted in the 1.3.x store therefore in 1.4.x it was throwing "Unable to get 
File status exception".

Solution:
Try to the size of the segments from LoadMetadataDetails. If not present then 
try to read the size from index files.

This closes #2600


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/38384cb9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/38384cb9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/38384cb9

Branch: refs/heads/master
Commit: 38384cb9f309cc7eb83e61e85c48dd8583921004
Parents: f2e898a
Author: kunal642 
Authored: Thu Aug 2 11:44:20 2018 +0530
Committer: manishgupta88 
Committed: Thu Aug 2 18:14:56 2018 +0530

--
 .../processing/merger/CarbonDataMergerUtil.java | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/38384cb9/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
index 1162fc2..e3da86d 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
@@ -49,6 +49,8 @@ import 
org.apache.carbondata.core.writer.CarbonDeleteDeltaWriterImpl;
 import org.apache.carbondata.processing.loading.model.CarbonLoadModel;
 import org.apache.carbondata.processing.util.CarbonLoaderUtil;
 
+import org.apache.commons.lang.StringUtils;
+
 /**
  * utility class for load merging.
  */
@@ -649,8 +651,14 @@ public final class CarbonDataMergerUtil {
   // variable to store one  segment size across partition.
   long sizeOfOneSegmentAcrossPartition;
   if (segment.getSegmentFile() != null) {
-sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(
-carbonTable.getTablePath(), new Segment(segId, 
segment.getSegmentFile()));
+// If LoadMetaDataDetail already has data size no need to calculate 
the data size from
+// index files. If not there then read the index file and calculate 
size.
+if (!StringUtils.isEmpty(segment.getDataSize())) {
+  sizeOfOneSegmentAcrossPartition = 
Long.parseLong(segment.getDataSize());
+} else {
+  sizeOfOneSegmentAcrossPartition = 
CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(),
+  new Segment(segId, segment.getSegmentFile()));
+}
   } else {
 sizeOfOneSegmentAcrossPartition = 
getSizeOfSegment(carbonTable.getTablePath(), segId);
   }



carbondata git commit: [CARBONDATA-2805] Fix the ordering mismatch of segment numbers during cutom compaction

2018-08-01 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master cfbf7b6ec -> c29aef880


[CARBONDATA-2805] Fix the ordering mismatch of segment numbers during cutom 
compaction

Problem:
when we have segments from 0 to 6 and i give 1, 2, 3 for custom compaction, 
then it should create 1.1 as compacted segment, but sometimes
it will create 3.1 as compacted segment which is wrong. This is beacuse custom 
Segment IDs were passing in hashset and finally inserted in
hashmap, while identifying segments to be merged. hashmap and hashset does not 
guarantee the insertion order which may lead to missmatch of segment numbers.

Solution:
Use LinkedHashSet and LinkedHashMap which always sure about the insertion order.

This closes #2585


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c29aef88
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c29aef88
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c29aef88

Branch: refs/heads/master
Commit: c29aef880a57d1f1297361a5296e77af3904d661
Parents: cfbf7b6
Author: akashrn5 
Authored: Mon Jul 30 19:22:29 2018 +0530
Committer: manishgupta88 
Committed: Wed Aug 1 13:38:08 2018 +0530

--
 .../processing/merger/CarbonDataMergerUtil.java   | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/c29aef88/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
index 78af751..1162fc2 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java
@@ -391,7 +391,6 @@ public final class CarbonDataMergerUtil {
   CarbonLoadModel carbonLoadModel, long compactionSize,
   List segments, CompactionType compactionType,
   List customSegmentIds) throws IOException, 
MalformedCarbonCommandException {
-String tablePath = carbonLoadModel.getTablePath();
 Map tableLevelProperties = 
carbonLoadModel.getCarbonDataLoadSchema()
 
.getCarbonTable().getTableInfo().getFactTable().getTableProperties();
 List sortedSegments = new 
ArrayList(segments);
@@ -400,7 +399,7 @@ public final class CarbonDataMergerUtil {
 
 if (CompactionType.CUSTOM == compactionType) {
   return identitySegmentsToBeMergedBasedOnSpecifiedSegments(sortedSegments,
-  new HashSet<>(customSegmentIds));
+  new LinkedHashSet<>(customSegmentIds));
 }
 
 // Check for segments which are qualified for IUD compaction.
@@ -424,7 +423,7 @@ public final class CarbonDataMergerUtil {
 if (CompactionType.MAJOR == compactionType) {
 
   listOfSegmentsToBeMerged = 
identifySegmentsToBeMergedBasedOnSize(compactionSize,
-  listOfSegmentsLoadedInSameDateInterval, carbonLoadModel, 
tablePath);
+  listOfSegmentsLoadedInSameDateInterval, carbonLoadModel);
 } else {
 
   listOfSegmentsToBeMerged =
@@ -462,7 +461,7 @@ public final class CarbonDataMergerUtil {
   List listOfSegments,
   Set segmentIds) throws MalformedCarbonCommandException {
 Map specifiedSegments =
-new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+new LinkedHashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
 for (LoadMetadataDetails detail : listOfSegments) {
   if (segmentIds.contains(detail.getLoadName())) {
 specifiedSegments.put(detail.getLoadName(), detail);
@@ -623,13 +622,12 @@ public final class CarbonDataMergerUtil {
* @param listOfSegmentsAfterPreserve  the segments list after
*preserving the configured number of latest loads
* @param carbonLoadModel carbon load model
-   * @param tablePath the store location of the segment
* @return the list of segments that need to be merged
* based on the Size in case of Major compaction
*/
   private static List 
identifySegmentsToBeMergedBasedOnSize(
   long compactionSize, List 
listOfSegmentsAfterPreserve,
-  CarbonLoadModel carbonLoadModel, String tablePath) throws IOException {
+  CarbonLoadModel carbonLoadModel) throws IOException {
 
 List segmentsToBeMerged =
 new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);



carbondata git commit: [HotFix][CARBONDATA-2788][BloomDataMap] Fix bugs in incorrect query result with bloom datamap

2018-07-31 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 34ca02142 -> 1cea4d33f


[HotFix][CARBONDATA-2788][BloomDataMap] Fix bugs in incorrect query result with 
bloom datamap

This PR solve two problems which will affect the correctness of the query on 
bloom.

Revert PR2539
After review the code, we found that modification in PR2539 is not needed, so 
we revert that PR.

Bugs in overflow for blocklet count
Carbondata stores blocklet count for each block in byte data type, when a block 
contains more than 128 blocklets, it will overflow the byte
limits. Here we change the data type to short.

For cache_leve=block, after pruning by main BlockDataMap, the blockletNo in 
Blocklet is -1, which indicate that the following percedure will scan
the whole block -- all the blocklets in the block. So, when doing intersection 
with the pruned result from BloomDataMap, we
need to take care of these blocklets. In this implementation, we added the 
result from BloomDataMap based on the blocklet's existence in BlockDataMap.

This closes #2565


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1cea4d33
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1cea4d33
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1cea4d33

Branch: refs/heads/master
Commit: 1cea4d33ff9096fab5d38a1403e1e78c2fa2d6dc
Parents: 34ca021
Author: xuchuanyin 
Authored: Thu Jul 26 23:22:58 2018 +0800
Committer: manishgupta88 
Committed: Wed Aug 1 10:40:07 2018 +0530

--
 .../indexstore/blockletindex/BlockDataMap.java  | 24 +++---
 .../blockletindex/BlockletDataMapFactory.java   |  2 +-
 .../hadoop/api/CarbonInputFormat.java   | 28 ++--
 .../lucene/LuceneFineGrainDataMapSuite.scala| 14 +++---
 .../datamap/IndexDataMapRebuildRDD.scala| 10 -
 .../BloomCoarseGrainDataMapFunctionSuite.scala  | 46 +++-
 .../bloom/BloomCoarseGrainDataMapSuite.scala|  2 +-
 7 files changed, 104 insertions(+), 22 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1cea4d33/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
index 82006c3..f4bb58e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
@@ -17,6 +17,7 @@
 package org.apache.carbondata.core.indexstore.blockletindex;
 
 import java.io.*;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.List;
@@ -58,7 +59,6 @@ import org.apache.carbondata.core.util.CarbonUtil;
 import org.apache.carbondata.core.util.DataFileFooterConverter;
 import org.apache.carbondata.core.util.path.CarbonTablePath;
 
-import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.Path;
 
@@ -248,8 +248,8 @@ public class BlockDataMap extends CoarseGrainDataMap
 byte[][] blockMinValues = null;
 byte[][] blockMaxValues = null;
 DataMapRowImpl summaryRow = null;
-List blockletCountInEachBlock = new ArrayList<>(indexInfo.size());
-byte totalBlockletsInOneBlock = 0;
+List blockletCountInEachBlock = new ArrayList<>(indexInfo.size());
+short totalBlockletsInOneBlock = 0;
 boolean isLastFileFooterEntryNeedToBeAdded = false;
 CarbonRowSchema[] schema = getFileFooterEntrySchema();
 for (DataFileFooter fileFooter : indexInfo) {
@@ -318,13 +318,22 @@ public class BlockDataMap extends CoarseGrainDataMap
   blockMinValues, blockMaxValues);
   blockletCountInEachBlock.add(totalBlockletsInOneBlock);
 }
-byte[] blockletCount = ArrayUtils
-.toPrimitive(blockletCountInEachBlock.toArray(new 
Byte[blockletCountInEachBlock.size()]));
+byte[] blockletCount = 
convertRowCountFromShortToByteArray(blockletCountInEachBlock);
 // blocklet count index is the last index
 summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1);
 return summaryRow;
   }
 
+  private byte[] convertRowCountFromShortToByteArray(List 
blockletCountInEachBlock) {
+int bufferSize = blockletCountInEachBlock.size() * 2;
+ByteBuffer byteBuffer = ByteBuffer.allocate(bufferSize);
+for (Short blockletCount : blockletCountInEachBlock) {
+  byteBuffer.putShort(blockletCount);
+}
+byteBuffer.rewind();
+return byteBuffer.array();
+  }
+
   protected void setLocations(String[] locations, DataMapRow r

carbondata git commit: [CARBONDATA-2778]Fixed bug when select after delete and cleanup is showing empty records

2018-07-26 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 005db3fa3 -> d62fe9e65


[CARBONDATA-2778]Fixed bug when select after delete and cleanup is showing 
empty records

Problem: In case if delete operation when it is found that the data being 
deleted is leading to a state where one complete block data is getting
deleted. In that case the status if that block is marked for delete and during 
the next delete operation run the block is deleted along with its
carbonIndex file. The problem arises due to deletion of carbonIndex file 
because for multiple blocks there can be one carbonIndex file as one
carbonIndex file represents one task.

Solution: Do not delete the carbondata and carbonIndex file. After compaction 
it will automatically take care of deleting the stale data and stale segments.

This closes #2548


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d62fe9e6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d62fe9e6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d62fe9e6

Branch: refs/heads/master
Commit: d62fe9e65a0fd61832f7b4080672c1503a7a0ae3
Parents: 005db3f
Author: kunal642 
Authored: Tue Jul 24 16:12:54 2018 +0530
Committer: manishgupta88 
Committed: Thu Jul 26 15:08:15 2018 +0530

--
 .../core/mutate/CarbonUpdateUtil.java   | 16 
 .../SegmentUpdateStatusManager.java | 27 
 2 files changed, 43 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d62fe9e6/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java 
b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
index 4a8d2e8..7df3937 100644
--- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
@@ -541,22 +541,6 @@ public class CarbonUpdateUtil {
   compareTimestampsAndDelete(invalidFile, forceDelete, false);
 }
 
-CarbonFile[] blockRelatedFiles = updateStatusManager
-.getAllBlockRelatedFiles(allSegmentFiles,
-block.getActualBlockName());
-
-// now for each invalid index file need to check the query 
execution time out
-// and then delete.
-
-for (CarbonFile invalidFile : blockRelatedFiles) {
-
-  if (compareTimestampsAndDelete(invalidFile, forceDelete, false)) 
{
-if 
(invalidFile.getName().endsWith(CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)) {
-  updateSegmentFile = true;
-}
-  }
-}
-
   } else {
 invalidDeleteDeltaFiles = updateStatusManager
 .getDeleteDeltaInvalidFilesList(block, false,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d62fe9e6/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
index 083325d..5d5e8b0 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
@@ -826,31 +826,4 @@ public class SegmentUpdateStatusManager {
 
 return files.toArray(new CarbonFile[files.size()]);
   }
-
-  /**
-   *
-   * @param allSegmentFiles
-   * @return
-   */
-  public CarbonFile[] getAllBlockRelatedFiles(CarbonFile[] allSegmentFiles,
-  String actualBlockName) {
-List files = new 
ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
-
-for (CarbonFile eachFile : allSegmentFiles) {
-
-  // for carbon data.
-  if (eachFile.getName().equalsIgnoreCase(actualBlockName)) {
-files.add(eachFile);
-  }
-
-  // get carbon index files of the block.
-  String indexFileName = 
CarbonTablePath.getCarbonIndexFileName(actualBlockName);
-  if (eachFile.getName().equalsIgnoreCase(indexFileName)) {
-files.add(eachFile);
-  }
-
-}
-
-return files.toArray(new CarbonFile[files.size()]);
-  }
 }



carbondata git commit: [CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store

2018-07-25 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 34e74174e -> 316e9de65


[CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store

Problem:
Filter query is failing for V1/V2 carbondata store

Root Cause:
in V1 store measure min max was not added in blockminmaxindex in executor when 
filter is applied min max pruning is failing with array index out of cound 
exception

Solution:
Need to add min max for measure column same as already handled in driver block 
pruning

This closes #2550


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/316e9de6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/316e9de6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/316e9de6

Branch: refs/heads/master
Commit: 316e9de658735fe177ca737f35ad23762aa18ad2
Parents: 34e7417
Author: kumarvishal09 
Authored: Tue Jul 24 20:10:54 2018 +0530
Committer: manishgupta88 
Committed: Wed Jul 25 20:13:02 2018 +0530

--
 .../indexstore/blockletindex/IndexWrapper.java  |  8 +-
 .../executor/impl/AbstractQueryExecutor.java| 95 ++--
 2 files changed, 50 insertions(+), 53 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/316e9de6/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
index 1de3122..9588f57 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
@@ -16,7 +16,6 @@
  */
 package org.apache.carbondata.core.indexstore.blockletindex;
 
-import java.io.IOException;
 import java.util.List;
 
 import org.apache.carbondata.core.datastore.block.AbstractIndex;
@@ -34,12 +33,11 @@ public class IndexWrapper extends AbstractIndex {
 
   private List blockInfos;
 
-  public IndexWrapper(List blockInfos) throws IOException {
+  public IndexWrapper(List blockInfos, SegmentProperties 
segmentProperties) {
 this.blockInfos = blockInfos;
-segmentProperties = new 
SegmentProperties(blockInfos.get(0).getDetailInfo().getColumnSchemas(),
-blockInfos.get(0).getDetailInfo().getDimLens());
+this.segmentProperties = segmentProperties;
 dataRefNode = new BlockletDataRefNode(blockInfos, 0,
-segmentProperties.getDimensionColumnsValueSize());
+this.segmentProperties.getDimensionColumnsValueSize());
   }
 
   @Override public void buildIndex(List footerList) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/316e9de6/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index c8c8a0f..5b67921 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -40,13 +40,11 @@ import org.apache.carbondata.core.datastore.IndexKey;
 import org.apache.carbondata.core.datastore.block.AbstractIndex;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.block.TableBlockInfo;
-import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
 import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode;
 import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
 import org.apache.carbondata.core.memory.UnsafeMemoryManager;
-import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
 import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
 import org.apache.carbondata.core.metadata.datatype.DataType;
@@ -65,6 +63,7 @@ import 
org.apache.carbondata.core.scan.model.ProjectionMeasure;
 import org.apache.carbondata.core.scan.model.QueryModel;
 import org.apache.carbondata.core.stats.QueryStatistic;
 import org.apache.carbondata.core.stats.QueryStatisticsConstants;
+import org.apache.carbondata.core.util.BlockletDataMapUtil;
 import org.apache.carbondata.core.util.CarbonProperties;
 imp

carbondata git commit: [CARBONDATA-2648] Documentation for support for COLUMN_META_CACHE and CACHE_LEVEL in create table and alter table properties

2018-07-25 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 06d38ff4b -> 34e74174e


[CARBONDATA-2648] Documentation for support for COLUMN_META_CACHE and 
CACHE_LEVEL in create table and alter table properties

Documentation for support for COLUMN_META_CACHE and CACHE_LEVEL in create table 
and alter table properties

This closes #2558


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/34e74174
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/34e74174
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/34e74174

Branch: refs/heads/master
Commit: 34e74174e0e83b00a6dc603eb86bbcc64533d1ac
Parents: 06d38ff
Author: sgururajshetty 
Authored: Wed Jul 25 18:14:07 2018 +0530
Committer: manishgupta88 
Committed: Wed Jul 25 19:00:15 2018 +0530

--
 docs/data-management-on-carbondata.md | 98 +-
 1 file changed, 97 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/34e74174/docs/data-management-on-carbondata.md
--
diff --git a/docs/data-management-on-carbondata.md 
b/docs/data-management-on-carbondata.md
index 4532b41..da259a6 100644
--- a/docs/data-management-on-carbondata.md
+++ b/docs/data-management-on-carbondata.md
@@ -141,7 +141,103 @@ This tutorial is going to introduce all commands and data 
operations on CarbonDa
'SORT_SCOPE'='NO_SORT')
```
   **NOTE:** CarbonData also supports "using carbondata". Find example code at 
[SparkSessionExample](https://github.com/apache/carbondata/blob/master/examples/spark2/src/main/scala/org/apache/carbondata/examples/SparkSessionExample.scala)
 in the CarbonData repo.
-
+   
+   - **Caching Min/Max Value for Required Columns**
+ By default, CarbonData caches min and max values of all the columns in 
schema.  As the load increases, the memory required to hold the min and max 
values increases considerably. This feature enables you to configure min and 
max values only for the required columns, resulting in optimized memory usage. 
+
+Following are the valid values for COLUMN_META_CACHE:
+* If you want no column min/max values to be cached in the driver.
+
+```
+COLUMN_META_CACHE=’’
+```
+
+* If you want only col1 min/max values to be cached in the driver.
+
+```
+COLUMN_META_CACHE=’col1’
+```
+
+* If you want min/max values to be cached in driver for all the 
specified columns.
+
+```
+COLUMN_META_CACHE=’col1,col2,col3,…’
+```
+
+Columns to be cached can be specifies either while creating tale or 
after creation of the table.
+During create table operation; specify the columns to be cached in 
table properties.
+
+Syntax:
+
+```
+CREATE TABLE [dbName].tableName (col1 String, col2 String, col3 
int,…) STORED BY ‘carbondata’ TBLPROPERTIES 
(‘COLUMN_META_CACHE’=’col1,col2,…’)
+```
+
+Example:
+
+```
+CREATE TABLE employee (name String, city String, id int) STORED BY 
‘carbondata’ TBLPROPERTIES (‘COLUMN_META_CACHE’=’name’)
+```
+
+After creation of table or on already created tables use the alter 
table command to configure the columns to be cached.
+
+Syntax:
+
+```
+ALTER TABLE [dbName].tableName SET TBLPROPERTIES 
(‘COLUMN_META_CACHE’=’col1,col2,…’)
+```
+
+Example:
+
+```
+ALTER TABLE employee SET TBLPROPERTIES 
(‘COLUMN_META_CACHE’=’city’)
+```
+
+   - **Caching at Block or Blocklet Level**
+ This feature allows you to maintain the cache at Block level, resulting 
in optimized usage of the memory. The memory consumption is high if the 
Blocklet level caching is maintained as a Block can have multiple Blocklet.
+
+Following are the valid values for CACHE_LEVEL:
+* Configuration for caching in driver at Block level (default value).
+
+```
+CACHE_LEVEL= ‘BLOCK’
+```
+
+* Configuration for caching in driver at Blocklet level.
+
+```
+CACHE_LEVEL= ‘BLOCKLET’
+```
+
+Cache level can be specified either while creating table or after 
creation of the table.
+During create table operation specify the cache level in table 
properties.
+
+Syntax:
+
+```
+CREATE TABLE [dbName].tableName (col1 String, col2 String, col3 
int,…) STORED BY ‘carbondata’ TBLP

carbondata git commit: [CARBONDATA-2753] Fix Compatibility issues

2018-07-25 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 6d40d3a98 -> a37a2ff7f


[CARBONDATA-2753] Fix Compatibility issues

Dictionary path is set to the AbsolteTableIdentifier. So that the child 
tables/dependent tables can use the dictionary path of parent/own

This closes #2530


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a37a2ff7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a37a2ff7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a37a2ff7

Branch: refs/heads/master
Commit: a37a2ff7f450a297590d4e97ec81ec5c56a9cc4a
Parents: 6d40d3a
Author: dhatchayani 
Authored: Thu Jul 19 19:13:24 2018 +0530
Committer: manishgupta88 
Committed: Wed Jul 25 13:52:33 2018 +0530

--
 .../core/metadata/AbsoluteTableIdentifier.java  | 12 
 .../core/metadata/schema/table/CarbonTable.java |  6 +++-
 .../carbondata/core/scan/filter/FilterUtil.java | 29 
 .../TestBlockletDataMapFactory.java |  6 
 4 files changed, 34 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/a37a2ff7/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java
index 3ea1f60..4cd33f6 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java
@@ -36,6 +36,11 @@ public class AbsoluteTableIdentifier implements Serializable 
{
*/
   private String tablePath;
 
+  /**
+   * dictionary path of the table
+   */
+  private String dictionaryPath;
+
 
   /**
* carbon table identifier which will have table name and table database
@@ -146,4 +151,11 @@ public class AbsoluteTableIdentifier implements 
Serializable {
 return carbonTableIdentifier.toString();
   }
 
+  public String getDictionaryPath() {
+return dictionaryPath;
+  }
+
+  public void setDictionaryPath(String dictionaryPath) {
+this.dictionaryPath = dictionaryPath;
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/a37a2ff7/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 995f943..850a791 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -805,7 +805,11 @@ public class CarbonTable implements Serializable {
* @return absolute table identifier
*/
   public AbsoluteTableIdentifier getAbsoluteTableIdentifier() {
-return tableInfo.getOrCreateAbsoluteTableIdentifier();
+AbsoluteTableIdentifier absoluteTableIdentifier =
+tableInfo.getOrCreateAbsoluteTableIdentifier();
+absoluteTableIdentifier.setDictionaryPath(
+
tableInfo.getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH));
+return absoluteTableIdentifier;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/a37a2ff7/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
index 0587b33..bae608f 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java
@@ -56,12 +56,10 @@ import org.apache.carbondata.core.keygenerator.KeyGenerator;
 import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
 import 
org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
-import org.apache.carbondata.core.metadata.CarbonMetadata;
 import org.apache.carbondata.core.metadata.ColumnIdentifier;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.encoder.Encoding;
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 imp

carbondata git commit: [CARBONDATA-2753] Fix Compatibility issues on index Files with 1.3 store

2018-07-23 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 45960f4a8 -> 7ab670652


[CARBONDATA-2753] Fix Compatibility issues on index Files with 1.3 store

Problem:
Currently,in the segmentFile we are writing the index files list in files 
field, only if it exists, otherwise it will be empty(in case if it is
merged to merge index file). But in the old store, we were writing both the 
files and mergeFileName fields even if the index files are merged.

Solution:
While querying we have to check the physical existence of the index files 
listed in the files field. If it physically exists, then we have to consider 
that.

This closes #2534


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7ab67065
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7ab67065
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7ab67065

Branch: refs/heads/master
Commit: 7ab6706523d2da008585b41f8587762f94c1bdd4
Parents: 45960f4
Author: dhatchayani 
Authored: Fri Jul 20 20:06:00 2018 +0530
Committer: manishgupta88 
Committed: Mon Jul 23 18:27:51 2018 +0530

--
 .../carbondata/core/metadata/SegmentFileStore.java   | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/7ab67065/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java 
b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
index 9681e37..28ac47e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
@@ -580,7 +580,7 @@ public class SegmentFileStore {
* Gets all index files from this segment
* @return
*/
-  public Map getIndexOrMergeFiles() {
+  public Map getIndexOrMergeFiles() throws IOException {
 Map indexFiles = new HashMap<>();
 if (segmentFile != null) {
   for (Map.Entry entry : 
getLocationMap().entrySet()) {
@@ -597,7 +597,14 @@ public class SegmentFileStore {
   Set files = entry.getValue().getFiles();
   if (null != files && !files.isEmpty()) {
 for (String indexFile : files) {
-  indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + 
indexFile, null);
+  String indexFilePath = location + 
CarbonCommonConstants.FILE_SEPARATOR + indexFile;
+  // In the 1.3 store, files field contain the carbonindex files 
names
+  // even if they are merged to a carbonindexmerge file. In that 
case we have to check
+  // for the physical existence of the file to decide
+  // on whether it is already merged or not.
+  if (FileFactory.isFileExist(indexFilePath)) {
+indexFiles.put(indexFilePath, null);
+  }
 }
   }
 }



carbondata git commit: [CARBONDATA-2734] Update is not working on the table which has segmentfile present

2018-07-22 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master a20f22eda -> 0c363bd18


[CARBONDATA-2734] Update is not working on the table which has segmentfile 
present

It fixes the IUD on the flat folder

This closes #2503


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0c363bd1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0c363bd1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0c363bd1

Branch: refs/heads/master
Commit: 0c363bd18ae29f13dffb04acb3c2193d9befd1c2
Parents: a20f22e
Author: ravipesala 
Authored: Fri Jul 13 13:15:15 2018 +0530
Committer: manishgupta88 
Committed: Mon Jul 23 08:39:58 2018 +0530

--
 .../core/mutate/CarbonUpdateUtil.java   |  4 ++--
 .../executor/impl/AbstractQueryExecutor.java|  5 +++--
 .../SegmentUpdateStatusManager.java | 12 +--
 .../apache/carbondata/core/util/CarbonUtil.java | 19 ++---
 .../FlatFolderTableLoadingTestCase.scala| 21 +++
 .../iud/DeleteCarbonTableTestCase.scala | 22 +++-
 .../iud/UpdateCarbonTableTestCase.scala | 13 
 .../command/mutation/DeleteExecution.scala  | 14 +++--
 8 files changed, 81 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c363bd1/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java 
b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
index d0a204c..4a8d2e8 100644
--- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
@@ -81,10 +81,10 @@ public class CarbonUpdateUtil {
   /**
* Returns block path from tuple id
*/
-  public static String getTableBlockPath(String tid, String tablePath, boolean 
isPartitionTable) {
+  public static String getTableBlockPath(String tid, String tablePath, boolean 
isStandardTable) {
 String partField = getRequiredFieldFromTID(tid, TupleIdEnum.PART_ID);
 // If it has segment file then partfield can be appended directly to table 
path
-if (isPartitionTable) {
+if (!isStandardTable) {
   return tablePath + CarbonCommonConstants.FILE_SEPARATOR + 
partField.replace("#", "/");
 }
 String part = CarbonTablePath.addPartPrefix(partField);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c363bd1/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index 180ca4d..910ae3e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -344,11 +344,12 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 queryModel.getProjectionDimensions(), tableBlockDimensions,
 segmentProperties.getComplexDimensions(), 
queryModel.getProjectionMeasures().size(),
 queryModel.getTable().getTableInfo().isTransactionalTable());
+boolean isStandardTable = 
CarbonUtil.isStandardCarbonTable(queryModel.getTable());
 String blockId = CarbonUtil
 .getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath, 
segment.getSegmentNo(),
 queryModel.getTable().getTableInfo().isTransactionalTable(),
-queryModel.getTable().isHivePartitionTable());
-if (queryModel.getTable().isHivePartitionTable()) {
+isStandardTable);
+if (!isStandardTable) {
   
blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockIdForPartitionTable(blockId));
 } else {
   blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c363bd1/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
 
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
index 55381fb..0c2098a 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
+++ 
b/core/src/main/java/org

carbondata git commit: [CARBONDATA-2754] Fixed testcases if HiveMetastore is enabled

2018-07-20 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master ce2d1a3da -> 9f42fbf33


[CARBONDATA-2754] Fixed testcases if HiveMetastore is enabled

Fixed testcase for if HiveMetastore is enabled

This closes #2518


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9f42fbf3
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9f42fbf3
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9f42fbf3

Branch: refs/heads/master
Commit: 9f42fbf333295cad1b4b052bf0d1cabb8ee21bc3
Parents: ce2d1a3
Author: rahul 
Authored: Tue Jul 17 19:19:27 2018 +0530
Committer: manishgupta88 
Committed: Fri Jul 20 16:13:44 2018 +0530

--
 .../createTable/TestCreateExternalTable.scala   | 46 ++--
 .../iud/DeleteCarbonTableTestCase.scala |  2 +-
 .../carbondata/store/SparkCarbonStore.scala | 16 ---
 .../carbondata/store/SparkCarbonStoreTest.scala |  8 ++--
 .../apache/spark/util/CarbonCommandSuite.scala  |  3 +-
 .../apache/carbondata/store/CarbonStore.java|  5 ++-
 .../carbondata/store/LocalCarbonStore.java  | 15 ---
 .../carbondata/store/LocalCarbonStoreTest.java  |  4 +-
 8 files changed, 63 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/9f42fbf3/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
index 3b21d0a..519089b 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala
@@ -23,6 +23,9 @@ import org.apache.spark.sql.{AnalysisException, CarbonEnv}
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
 class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll {
 
   var originDataPath: String = _
@@ -43,25 +46,40 @@ class TestCreateExternalTable extends QueryTest with 
BeforeAndAfterAll {
   test("create external table with existing files") {
 assert(new File(originDataPath).exists())
 sql("DROP TABLE IF EXISTS source")
+if (CarbonProperties.getInstance()
+  .getProperty(CarbonCommonConstants.ENABLE_HIVE_SCHEMA_META_STORE,
+
CarbonCommonConstants.ENABLE_HIVE_SCHEMA_META_STORE_DEFAULT).equalsIgnoreCase("false"))
 {
 
-// create external table with existing files
-sql(
-  s"""
- |CREATE EXTERNAL TABLE source
- |STORED BY 'carbondata'
- |LOCATION '$storeLocation/origin'
+  // create external table with existing files
+  sql(
+s"""
+   |CREATE EXTERNAL TABLE source
+   |STORED BY 'carbondata'
+   |LOCATION '$storeLocation/origin'
""".stripMargin)
-checkAnswer(sql("SELECT count(*) from source"), sql("SELECT count(*) from 
origin"))
+  checkAnswer(sql("SELECT count(*) from source"), sql("SELECT count(*) 
from origin"))
 
-checkExistence(sql("describe formatted source"), true, 
storeLocation+"/origin")
+  checkExistence(sql("describe formatted source"), true, storeLocation + 
"/origin")
 
-val carbonTable = CarbonEnv.getCarbonTable(None, 
"source")(sqlContext.sparkSession)
-assert(carbonTable.isExternalTable)
-
-sql("DROP TABLE IF EXISTS source")
+  val carbonTable = CarbonEnv.getCarbonTable(None, 
"source")(sqlContext.sparkSession)
+  assert(carbonTable.isExternalTable)
 
-// DROP TABLE should not delete data
-assert(new File(originDataPath).exists())
+  sql("DROP TABLE IF EXISTS source")
+
+  // DROP TABLE should not delete data
+  assert(new File(originDataPath).exists())
+}
+else {
+  intercept[Exception] {
+// create external table with existing files
+sql(
+  s"""
+ |CREATE EXTERNAL TABLE source
+ |STORED BY 'carbondata'
+ |LOCATION '$storeLocation/origin'
+   """.stripMargin)
+  }
+}
   }
 
   test(

carbondata git commit: [CARBONDATA-2710][Spark Integration] Refactor CarbonSparkSqlParser for better code reuse.

2018-07-18 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master a4c2ef5f8 -> 5aada46e7


[CARBONDATA-2710][Spark Integration] Refactor CarbonSparkSqlParser for better 
code reuse.

Refactor CarbonSparkSqlParser for better code reuse

This closes #2466


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5aada46e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5aada46e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5aada46e

Branch: refs/heads/master
Commit: 5aada46e7bb6bcbb11652979862e3ccebaa6e3e8
Parents: a4c2ef5
Author: mohammadshahidkhan 
Authored: Mon Jul 9 16:08:47 2018 +0530
Committer: manishgupta88 
Committed: Wed Jul 18 16:20:30 2018 +0530

--
 .../spark/sql/parser/CarbonSparkSqlParser.scala | 293 ++-
 .../sql/parser/CarbonSparkSqlParserUtil.scala   | 367 +++
 .../spark/sql/hive/CarbonSessionState.scala |   4 +-
 .../spark/sql/hive/CarbonSqlAstBuilder.scala|   4 +-
 4 files changed, 397 insertions(+), 271 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/5aada46e/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala
index 4cc0e1b..39dce3a 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala
@@ -16,35 +16,25 @@
  */
 package org.apache.spark.sql.parser
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.antlr.v4.runtime.tree.TerminalNode
-import org.apache.spark.sql.{CarbonEnv, CarbonSession, SparkSession}
-import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, 
ParseException, SqlBaseParser}
-import org.apache.spark.sql.catalyst.parser.ParserUtils._
+import org.apache.spark.sql.{CarbonSession, SparkSession}
+import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, SqlBaseParser}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkSqlAstBuilder
-import org.apache.spark.sql.execution.command.{PartitionerField, TableModel, 
TableNewProcessor}
-import 
org.apache.spark.sql.execution.command.table.{CarbonCreateTableAsSelectCommand, 
CarbonCreateTableCommand}
+import org.apache.spark.sql.execution.command.PartitionerField
 import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution}
 import org.apache.spark.sql.types.StructField
 import org.apache.spark.sql.util.CarbonException
 import org.apache.spark.util.CarbonReflectionUtils
 
 import 
org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
-import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.datastore.impl.FileFactory
-import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
-import org.apache.carbondata.core.metadata.datatype.DataTypes
-import org.apache.carbondata.core.metadata.schema.SchemaReader
-import org.apache.carbondata.core.util.path.CarbonTablePath
-import org.apache.carbondata.spark.CarbonOption
-import org.apache.carbondata.spark.util.{CarbonScalaUtil, CommonUtil}
+import org.apache.carbondata.spark.util.CarbonScalaUtil
 
 /**
- * Concrete parser for Spark SQL stateENABLE_INMEMORY_MERGE_SORT_DEFAULTments 
and carbon specific
+ * Concrete parser for Spark SQL statements and carbon specific
  * statements
  */
 class CarbonSparkSqlParser(conf: SQLConf, sparkSession: SparkSession) extends 
AbstractSqlParser {
@@ -90,60 +80,12 @@ class CarbonHelperSqlAstBuilder(conf: SQLConf,
 parser: CarbonSpark2SqlParser,
 sparkSession: SparkSession)
   extends SparkSqlAstBuilder(conf) {
-
-  def getFileStorage(createFileFormat: CreateFileFormatContext): String = {
-Option(createFileFormat) match {
-  case Some(value) =>
-val result = value.children.get(1).getText
-if (result.equalsIgnoreCase("by")) {
-  value.storageHandler().STRING().getSymbol.getText
-} else if (result.equalsIgnoreCase("as") && value.children.size() > 1) 
{
-  value.children.get(2).getText
-} else {
-  // The case of "STORED AS PARQUET/ORC"
-  ""
-}
-  case _ => ""
-}
-  }
-
-  /**
-   * This method will convert the database name to lower case
-   *
-   * @param dbName
-   * @return Option of String
-   */
-  def convertDbNameToL

carbondata git commit: [CARBONDATA-2704] Index file size in describe formatted command is not updated correctly with the segment file

2018-07-15 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master cdee81d4d -> eb604fdb7


[CARBONDATA-2704] Index file size in describe formatted command is not updated 
correctly with the segment file

Problem:
Describe formatted command is not showing correct index files size after index 
files merge.
Solution:
Segment file should be updated with the actual index files size of that segment 
after index files merge.

This closes #2462


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/eb604fdb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/eb604fdb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/eb604fdb

Branch: refs/heads/master
Commit: eb604fdb73983dfe9396d488a51907d90ed51d3e
Parents: cdee81d
Author: dhatchayani 
Authored: Mon Jul 9 11:19:51 2018 +0530
Committer: manishgupta88 
Committed: Sun Jul 15 20:34:32 2018 +0530

--
 .../core/metadata/SegmentFileStore.java |  4 +-
 .../apache/carbondata/core/util/CarbonUtil.java | 48 ---
 .../core/writer/CarbonIndexFileMergeWriter.java | 17 +++
 .../CarbonIndexFileMergeTestCase.scala  | 50 
 .../spark/rdd/CarbonDataRDDFactory.scala|  4 +-
 5 files changed, 96 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb604fdb/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java 
b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
index 3d3b245..ce79e65 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
@@ -281,7 +281,7 @@ public class SegmentFileStore {
* @throws IOException
*/
   public static boolean updateSegmentFile(String tablePath, String segmentId, 
String segmentFile,
-  String tableId) throws IOException {
+  String tableId, SegmentFileStore segmentFileStore) throws IOException {
 boolean status = false;
 String tableStatusPath = CarbonTablePath.getTableStatusFilePath(tablePath);
 if (!FileFactory.isFileExist(tableStatusPath)) {
@@ -308,6 +308,8 @@ public class SegmentFileStore {
   // if the segments is in the list of marked for delete then update 
the status.
   if (segmentId.equals(detail.getLoadName())) {
 detail.setSegmentFile(segmentFile);
+detail.setIndexSize(String.valueOf(CarbonUtil
+.getCarbonIndexSize(segmentFileStore, 
segmentFileStore.getLocationMap(;
 break;
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb604fdb/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index e87e52c..9796696 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -2647,23 +2647,7 @@ public final class CarbonUtil {
   fileStore.readIndexFiles();
   Map> indexFilesMap = fileStore.getIndexFilesMap();
   // get the size of carbonindex file
-  for (Map.Entry entry : 
locationMap.entrySet()) {
-SegmentFileStore.FolderDetails folderDetails = entry.getValue();
-Set carbonindexFiles = folderDetails.getFiles();
-String mergeFileName = folderDetails.getMergeFileName();
-if (null != mergeFileName) {
-  String mergeIndexPath =
-  fileStore.getTablePath() + entry.getKey() + 
CarbonCommonConstants.FILE_SEPARATOR
-  + mergeFileName;
-  carbonIndexSize += 
FileFactory.getCarbonFile(mergeIndexPath).getSize();
-}
-for (String indexFile : carbonindexFiles) {
-  String indexPath =
-  fileStore.getTablePath() + entry.getKey() + 
CarbonCommonConstants.FILE_SEPARATOR
-  + indexFile;
-  carbonIndexSize += FileFactory.getCarbonFile(indexPath).getSize();
-}
-  }
+  carbonIndexSize = getCarbonIndexSize(fileStore, locationMap);
   for (Map.Entry> entry : indexFilesMap.entrySet()) {
 // get the size of carbondata files
 for (String blockFile : entry.getValue()) {
@@ -2676,6 +2660,36 @@ public final class CarbonUtil {
 return dataAndIndexSize;
   }
 
+  /**
+   * Calcuate the index files size of the segment
+   *
+   * @param fileStore
+   * @param locationMap
+   

[1/2] carbondata git commit: Code Generator Error is thrown when Select filter contains more than one count of distinct of ComplexColumn with group by Clause

2018-07-05 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 020335a8c -> 19a99e15e


Code Generator Error is thrown when Select filter contains more than one count 
of distinct of ComplexColumn with group by Clause


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cbecadfa
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cbecadfa
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cbecadfa

Branch: refs/heads/master
Commit: cbecadfad2408c18322e87aa5ac054d6d190d34c
Parents: 020335a
Author: Indhumathi27 
Authored: Tue Jul 3 22:31:40 2018 +0530
Committer: manishgupta88 
Committed: Fri Jul 6 10:34:47 2018 +0530

--
 .../complexType/TestComplexDataType.scala   | 20 
 .../sql/optimizer/CarbonLateDecodeRule.scala|  7 ++-
 2 files changed, 26 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbecadfa/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
index 524289c..ba0dc66 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
@@ -636,4 +636,24 @@ class TestComplexDataType extends QueryTest with 
BeforeAndAfterAll {
 sql("select b.c[0],a[0][0] from test").show(false)
   }
 
+  test("test structofarray with count(distinct)") {
+sql("DROP TABLE IF EXISTS test")
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "/MM/dd")
+sql(
+  "create table test(cus_id string, struct_of_array 
struct,sal:array,state:array,date1:array>) 
stored by " +
+  "'carbondata'")
+sql("insert into test 
values('cus_01','1$2017/01/01$1:2$2.0:3.0$ab:ac$2018/01/01')")
+sql("select *from test").show(false)
+sql(
+  "select struct_of_array.state[0],count(distinct struct_of_array.id) as 
count_int,count" +
+  "(distinct struct_of_array.state[0]) as count_string from test group by 
struct_of_array" +
+  ".state[0]")
+  .show(false)
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbecadfa/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
--
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
index 7ed1705..68e7f07 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala
@@ -90,7 +90,12 @@ class CarbonLateDecodeRule extends Rule[LogicalPlan] with 
PredicateHelper {
 LOGGER.info("skip CarbonOptimizer for scalar/predicate sub query")
 return false
   }
-  true
+  if(relations.exists(_.dictionaryMap.dictionaryMap.exists(_._2))) {
+true
+  } else {
+false
+  }
+
 } else {
   LOGGER.info("skip CarbonOptimizer")
   false



carbondata git commit: [CARBONDATA-2623][DataMap] Add DataMap Pre and Pevent listener

2018-06-21 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 55f4bc6c8 -> b3f782062


[CARBONDATA-2623][DataMap] Add DataMap Pre and Pevent listener

Added Pre and Post Execution Events for index datamap

This closes #2389


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b3f78206
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b3f78206
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b3f78206

Branch: refs/heads/master
Commit: b3f7820623d4bc9ab4408beb8ad708ba9b19b899
Parents: 55f4bc6
Author: mohammadshahidkhan 
Authored: Wed Jun 20 19:52:51 2018 +0530
Committer: manishgupta88 
Committed: Thu Jun 21 17:37:48 2018 +0530

--
 .../carbondata/events/DataMapEvents.scala   | 68 
 .../org/apache/carbondata/events/Events.scala   | 18 +-
 .../datamap/IndexDataMapRebuildRDD.scala| 11 +++-
 .../spark/rdd/CarbonTableCompactor.scala| 23 ++-
 .../datamap/CarbonCreateDataMapCommand.scala| 22 +++
 .../datamap/CarbonDataMapRebuildCommand.scala   | 12 
 .../datamap/CarbonDropDataMapCommand.scala  | 11 
 .../management/CarbonLoadDataCommand.scala  | 21 +-
 8 files changed, 181 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b3f78206/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala
--
diff --git 
a/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala
 
b/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala
new file mode 100644
index 000..8fb374f
--- /dev/null
+++ 
b/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.events
+
+import org.apache.spark.sql.SparkSession
+
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
+
+/**
+ * For handling operation's after finish of index creation over table with 
index datamap
+ * example: bloom datamap, Lucene datamap
+ */
+case class CreateDataMapPostExecutionEvent(sparkSession: SparkSession,
+storePath: String) extends Event with CreateDataMapEventsInfo
+
+/**
+ * For handling operation's before start of update index datmap status over 
table with index datamap
+ * example: bloom datamap, Lucene datamap
+ */
+case class UpdateDataMapPreExecutionEvent(sparkSession: SparkSession,
+storePath: String) extends Event with CreateDataMapEventsInfo
+
+/**
+ * For handling operation's after finish of  update index datmap status over 
table with index
+ * datamap
+ * example: bloom datamap, Lucene datamap
+ */
+case class UpdateDataMapPostExecutionEvent(sparkSession: SparkSession,
+storePath: String) extends Event with CreateDataMapEventsInfo
+
+/**
+ * For handling operation's before start of index build over table with index 
datamap
+ * example: bloom datamap, Lucene datamap
+ */
+case class BuildDataMapPreExecutionEvent(sparkSession: SparkSession,
+identifier: AbsoluteTableIdentifier, dataMapNames: 
scala.collection.mutable.Seq[String])
+  extends Event with BuildDataMapEventsInfo
+
+/**
+ * For handling operation's after finish of index build over table with index 
datamap
+ * example: bloom datamap, Lucene datamap
+ */
+case class BuildDataMapPostExecutionEvent(sparkSession: SparkSession,
+identifier: AbsoluteTableIdentifier)
+  extends Event with TableEventInfo
+
+/**
+ * For handling operation's before start of index creation over table with 
index datamap
+ * example: bloom datamap, Lucene datamap
+ */
+case class CreateDataMapPreExecutionEvent(sparkSession: SparkSession,
+storePath: String) extends Event with CreateDataMapEventsInfo
+

http://git-wip-us.apache.org/repos/asf/carbondata/blob/b3f78206/integration/spark-common/src/main/scala/org/apache/carbond

carbondata git commit: [CARBONDATA-2617] Invalid tuple-id and block id getting formed for Non partition table

2018-06-20 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master dc53dee24 -> 0e1d550e8


[CARBONDATA-2617] Invalid tuple-id and block id getting formed for Non 
partition table

Problem
Invalid tuple and block id getting formed for non partition table

Analysis
While creating a partition table a segment file was written in the Metadata 
folder under table structure. This was introduced during
development of partition table feature. At that time segment file was written 
only for partition table and it was used to distinguish between
parition and non partition table in the code. But later the code was modified 
to write the segment file for both parititon and non partition
table and the code to distinguish partition and non partition table was not 
modified which is causing this incorrect formation of block and tuple id.

Fix
Modify the logic to distinguish partitioned and non partitioned table and the 
same has been handled in this PR.

This closes #2385


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0e1d550e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0e1d550e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0e1d550e

Branch: refs/heads/master
Commit: 0e1d550e8dacba798e9ffbdda25c4388e8933632
Parents: dc53dee
Author: rahul 
Authored: Tue Jun 19 19:23:26 2018 +0530
Committer: manishgupta88 
Committed: Wed Jun 20 16:37:23 2018 +0530

--
 .../core/mutate/CarbonUpdateUtil.java   |  4 +-
 .../executor/impl/AbstractQueryExecutor.java|  4 +-
 .../SegmentUpdateStatusManager.java | 20 ++---
 .../apache/carbondata/core/util/CarbonUtil.java |  4 +-
 .../iud/DeleteCarbonTableTestCase.scala | 83 
 .../command/mutation/DeleteExecution.scala  |  6 +-
 6 files changed, 100 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0e1d550e/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java 
b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
index 40d498c..8627bdb 100644
--- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java
@@ -81,10 +81,10 @@ public class CarbonUpdateUtil {
   /**
* Returns block path from tuple id
*/
-  public static String getTableBlockPath(String tid, String tablePath, boolean 
isSegmentFile) {
+  public static String getTableBlockPath(String tid, String tablePath, boolean 
isPartitionTable) {
 String partField = getRequiredFieldFromTID(tid, TupleIdEnum.PART_ID);
 // If it has segment file then partfield can be appended directly to table 
path
-if (isSegmentFile) {
+if (isPartitionTable) {
   return tablePath + CarbonCommonConstants.FILE_SEPARATOR + 
partField.replace("#", "/");
 }
 String part = CarbonTablePath.addPartPrefix(partField);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0e1d550e/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index 2bbe75c..f365045 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -296,8 +296,8 @@ public abstract class AbstractQueryExecutor implements 
QueryExecutor {
 String blockId = CarbonUtil
 .getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath, 
segment.getSegmentNo(),
 queryModel.getTable().getTableInfo().isTransactionalTable(),
-segment.getSegmentFileName() != null);
-if (segment.getSegmentFileName() != null) {
+queryModel.getTable().isHivePartitionTable());
+if (queryModel.getTable().isHivePartitionTable()) {
   
blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockIdForPartitionTable(blockId));
 } else {
   blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0e1d550e/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java

carbondata git commit: [CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after IUD in cluster is fixed

2018-06-13 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master ff0364599 -> efad40d57


[CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after 
IUD in cluster is fixed

Issue: if some records are deleted then during filling the measure and 
dimension data no of valid rows count and actual rows may be different if
some records are deleted . and during filling of measure data it will iterrate 
over the scanned result. so it will cause ArrayIndexOutOfBoundException

Solution : Make a new temp list to collect the measure and dimension data 
during scan and fill inside RawBasedResultCollector and add it to final list

This closes #2369


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/efad40d5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/efad40d5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/efad40d5

Branch: refs/heads/master
Commit: efad40d5723849a351ec700e8e4e346cac8c3454
Parents: ff03645
Author: rahul 
Authored: Tue Jun 12 19:26:40 2018 +0530
Committer: manishgupta88 
Committed: Wed Jun 13 20:38:24 2018 +0530

--
 .../collector/impl/RawBasedResultCollector.java  | 12 +---
 .../sdv/generated/DataLoadingIUDTestCase.scala   | 19 +++
 2 files changed, 28 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
index d28df0a..7302b2c 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
@@ -92,16 +92,22 @@ public class RawBasedResultCollector extends 
AbstractScannedResultCollector {
 // re initialized with left over value
 batchSize = 0;
   }
+  // for every iteration of available rows filling newly created list of 
Object[] and add it to
+  // the final list so there is no mismatch in the counter while filling 
dimension and
+  // measure data
+  List collectedData = new ArrayList<>(availableBatchRowCount);
   // fill dimension data
-  fillDimensionData(scannedResult, listBasedResult, queryMeasures, 
availableBatchRowCount);
-  fillMeasureData(scannedResult, listBasedResult);
+  fillDimensionData(scannedResult, collectedData, queryMeasures, 
availableBatchRowCount);
+  fillMeasureData(scannedResult, collectedData);
   // increment the number of rows scanned in scanned result statistics
   incrementScannedResultRowCounter(scannedResult, availableBatchRowCount);
   // assign the left over rows to batch size if the number of rows fetched 
are lesser
   // than batchSize
-  if (listBasedResult.size() < availableBatchRowCount) {
+  if (collectedData.size() < availableBatchRowCount) {
 batchSize += availableBatchRowCount - listBasedResult.size();
   }
+  // add the collected data to the final list
+  listBasedResult.addAll(collectedData);
 }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
--
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
index 4c232be..79458f5 100644
--- 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
@@ -3671,6 +3671,23 @@ test("HQ_Defect_TC_2016110901163", Include) {
sql(s"""drop table default.t_carbn01  """).collect
 }
 
+  test("[CARBONDATA-2604] ", Include){
+sql("drop table if exists brinjal").collect
+sql("create table brinjal (imei string,AMSize string,channelsId 
string,ActiveCountry string, Activecity string,gamePointId 
double,deviceInformationId double,productionDate Timestamp,deliveryDate 
timestamp,deliverycharge double) STORED BY 'org.apache.carbondata.format' 
TBLPROPERTIES('table_

carbondata git commit: [CARBONDATA-2571] Calculating the carbonindex and carbondata file size of a table is wrong

2018-06-05 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 92d9b9256 -> 27d705998


[CARBONDATA-2571] Calculating the carbonindex and carbondata file size of a 
table is wrong

Problem:
While calculating the carbonindex files size, we are checking either index file 
or merge file. But in PR#2333, implementation is changed to fill both
the file name and the merge file name. So, we have to consider both fields.

Solution:
While calculating the carbonindex files size, we have to consider both the 
files and mergeFileName fields. We should get the list of index files from
these 2 fields and then calculate the size of the files.

This closes #2358


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/27d70599
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/27d70599
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/27d70599

Branch: refs/heads/master
Commit: 27d7059984962b97bcaf576fed496653932ea743
Parents: 92d9b92
Author: dhatchayani 
Authored: Fri Jun 1 15:13:38 2018 +0530
Committer: manishgupta88 
Committed: Tue Jun 5 11:51:52 2018 +0530

--
 .../apache/carbondata/core/util/CarbonUtil.java | 37 +++-
 1 file changed, 20 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/27d70599/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 1526047..5a7bce3 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -2688,27 +2688,30 @@ public final class CarbonUtil {
   throws IOException {
 long carbonDataSize = 0L;
 long carbonIndexSize = 0L;
-List listOfFilesRead = new ArrayList<>();
 HashMap dataAndIndexSize = new HashMap();
-if (fileStore.getLocationMap() != null) {
+Map locationMap = 
fileStore.getLocationMap();
+if (locationMap != null) {
   fileStore.readIndexFiles();
-  Map indexFiles = fileStore.getIndexFiles();
   Map> indexFilesMap = fileStore.getIndexFilesMap();
-  for (Map.Entry> entry : indexFilesMap.entrySet()) {
-// get the size of carbonindex file
-String indexFile = entry.getKey();
-String mergeIndexFile = indexFiles.get(indexFile);
-if (null != mergeIndexFile) {
-  String mergeIndexPath = indexFile
-  .substring(0, 
indexFile.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR) + 1)
-  + mergeIndexFile;
-  if (!listOfFilesRead.contains(mergeIndexPath)) {
-carbonIndexSize += 
FileFactory.getCarbonFile(mergeIndexPath).getSize();
-listOfFilesRead.add(mergeIndexPath);
-  }
-} else {
-  carbonIndexSize += FileFactory.getCarbonFile(indexFile).getSize();
+  // get the size of carbonindex file
+  for (Map.Entry entry : 
locationMap.entrySet()) {
+SegmentFileStore.FolderDetails folderDetails = entry.getValue();
+Set carbonindexFiles = folderDetails.getFiles();
+String mergeFileName = folderDetails.getMergeFileName();
+if (null != mergeFileName) {
+  String mergeIndexPath =
+  fileStore.getTablePath() + entry.getKey() + 
CarbonCommonConstants.FILE_SEPARATOR
+  + mergeFileName;
+  carbonIndexSize += 
FileFactory.getCarbonFile(mergeIndexPath).getSize();
 }
+for (String indexFile : carbonindexFiles) {
+  String indexPath =
+  fileStore.getTablePath() + entry.getKey() + 
CarbonCommonConstants.FILE_SEPARATOR
+  + indexFile;
+  carbonIndexSize += FileFactory.getCarbonFile(indexPath).getSize();
+}
+  }
+  for (Map.Entry> entry : indexFilesMap.entrySet()) {
 // get the size of carbondata files
 for (String blockFile : entry.getValue()) {
   carbonDataSize += FileFactory.getCarbonFile(blockFile).getSize();



carbondata git commit: [HOTFIX] Changes in selecting the carbonindex files

2018-05-28 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master d7773187f -> 22d5035c8


[HOTFIX] Changes in selecting the carbonindex files

Currently, in the query flow while getting the index files we are checking for 
either mergeFileName or the list of files. After this change, we will
be checking for both files and mergeFileName

This closes #2333


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22d5035c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22d5035c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22d5035c

Branch: refs/heads/master
Commit: 22d5035c84342e0c0b15a87abbdd4dca5e6d4976
Parents: d777318
Author: dhatchayani 
Authored: Tue May 22 17:26:37 2018 +0530
Committer: manishgupta88 
Committed: Tue May 29 11:01:08 2018 +0530

--
 .../core/metadata/SegmentFileStore.java   | 18 --
 .../core/writer/CarbonIndexFileMergeWriter.java   |  2 ++
 2 files changed, 14 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/22d5035c/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java 
b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
index d72ded3..acfc145 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
@@ -151,7 +151,8 @@ public class SegmentFileStore {
 CarbonFile segmentFolder = FileFactory.getCarbonFile(segmentPath);
 CarbonFile[] indexFiles = segmentFolder.listFiles(new CarbonFileFilter() {
   @Override public boolean accept(CarbonFile file) {
-return file.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT);
+return (file.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT) || 
file.getName()
+.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT));
   }
 });
 if (indexFiles != null && indexFiles.length > 0) {
@@ -160,7 +161,11 @@ public class SegmentFileStore {
   folderDetails.setRelative(true);
   folderDetails.setStatus(SegmentStatus.SUCCESS.getMessage());
   for (CarbonFile file : indexFiles) {
-folderDetails.getFiles().add(file.getName());
+if (file.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) {
+  folderDetails.setMergeFileName(file.getName());
+} else {
+  folderDetails.getFiles().add(file.getName());
+}
   }
   String segmentRelativePath = segmentPath.substring(tablePath.length(), 
segmentPath.length());
   segmentFile.addPath(segmentRelativePath, folderDetails);
@@ -508,10 +513,11 @@ public class SegmentFileStore {
   if (null != mergeFileName) {
 indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + 
mergeFileName,
 entry.getValue().mergeFileName);
-  } else {
-for (String indexFile : entry.getValue().getFiles()) {
-  indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + 
indexFile,
-  entry.getValue().mergeFileName);
+  }
+  Set files = entry.getValue().getFiles();
+  if (null != files && !files.isEmpty()) {
+for (String indexFile : files) {
+  indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + 
indexFile, null);
 }
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/22d5035c/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java
 
b/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java
index ceeb431..cb53c0b 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java
@@ -21,6 +21,7 @@ import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
@@ -140,6 +141,7 @@ public class CarbonIndexFileMergeWriter {
 }
 if (new Path(entry.getKey()).equals(new Path(location))) {
   segentry.getValue().setMergeFileName(mergeIndexFile);
+  segentry.getValue().setFiles(new HashSet());
   break;
 }
   }



carbondata git commit: [CARBONDATA-2538] added filter while listing files from writer path

2018-05-28 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 8b80b12ec -> d7773187f


[CARBONDATA-2538] added filter while listing files from writer path

1. Added filter to list only index and carbondata files. So even if the lock 
files are present proper exception can be thrown
2. Updated complex type docs

This closes #2344


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d7773187
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d7773187
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d7773187

Branch: refs/heads/master
Commit: d7773187f72c73b7f9514f13bce17de3c552247c
Parents: 8b80b12
Author: kunal642 
Authored: Fri May 25 16:21:45 2018 +0530
Committer: manishgupta88 
Committed: Tue May 29 10:49:59 2018 +0530

--
 .../core/metadata/schema/table/CarbonTable.java  |  2 +-
 .../readcommitter/LatestFilesReadCommittedScope.java |  9 -
 .../java/org/apache/carbondata/core/util/CarbonUtil.java | 11 ---
 docs/supported-data-types-in-carbondata.md   |  2 ++
 .../command/table/CarbonDescribeFormattedCommand.scala   |  2 +-
 5 files changed, 20 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7773187/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 8528d6f..b1ed981 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -892,7 +892,7 @@ public class CarbonTable implements Serializable {
 
 
   public long size() throws IOException {
-Map dataIndexSize = CarbonUtil.calculateDataIndexSize(this);
+Map dataIndexSize = CarbonUtil.calculateDataIndexSize(this, 
true);
 Long dataSize = 
dataIndexSize.get(CarbonCommonConstants.CARBON_TOTAL_DATA_SIZE);
 if (dataSize == null) {
   dataSize = 0L;

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7773187/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
 
b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
index 6106174..14bba65 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
@@ -26,6 +26,7 @@ import 
org.apache.carbondata.common.annotations.InterfaceAudience;
 import org.apache.carbondata.common.annotations.InterfaceStability;
 import org.apache.carbondata.core.datamap.Segment;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import 
org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
 import org.apache.carbondata.core.mutate.UpdateVO;
@@ -138,7 +139,13 @@ public class LatestFilesReadCommittedScope implements 
ReadCommittedScope {
   @Override public void takeCarbonIndexFileSnapShot() throws IOException {
 // Read the current file Path get the list of indexes from the path.
 CarbonFile file = FileFactory.getCarbonFile(carbonFilePath);
-if (file.listFiles().length == 0) {
+CarbonFile[] files = file.listFiles(new CarbonFileFilter() {
+  @Override public boolean accept(CarbonFile file) {
+return file.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT) || 
file.getName()
+.endsWith(CarbonTablePath.CARBON_DATA_EXT);
+  }
+});
+if (files.length == 0) {
   // For nonTransactional table, files can be removed at any point of time.
   // So cannot assume files will be present
   throw new IOException("No files are present in the table location :" + 
carbonFilePath);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7773187/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 23d02ef..9ccd772 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/

carbondata git commit: [CARBONDATA-2514] Added condition to check for duplicate column names

2018-05-23 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 16ed99a11 -> cf666c17b


[CARBONDATA-2514] Added condition to check for duplicate column names

1. Duplicate columns check was not present.
2. IndexFileReader was not being closed due to which index file could not be 
deleted.

This closes #2332


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cf666c17
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cf666c17
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cf666c17

Branch: refs/heads/master
Commit: cf666c17b8be9f11dd9c0b51503ca194162ee782
Parents: 16ed99a
Author: kunal642 <kunalkapoor...@gmail.com>
Authored: Tue May 22 15:16:32 2018 +0530
Committer: manishgupta88 <tomanishgupt...@gmail.com>
Committed: Thu May 24 10:19:47 2018 +0530

--
 .../apache/carbondata/core/util/CarbonUtil.java | 44 +++-
 .../carbondata/core/util/DataTypeUtil.java  |  2 +
 .../sdk/file/CarbonWriterBuilder.java   |  7 
 .../sdk/file/AvroCarbonWriterTest.java  | 40 ++
 4 files changed, 73 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 9dc4aa2..23d02ef 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -2380,27 +2380,31 @@ public final class CarbonUtil {
   public static org.apache.carbondata.format.TableInfo 
inferSchemaFromIndexFile(
   String indexFilePath, String tableName) throws IOException {
 CarbonIndexFileReader indexFileReader = new CarbonIndexFileReader();
-indexFileReader.openThriftReader(indexFilePath);
-org.apache.carbondata.format.IndexHeader readIndexHeader = 
indexFileReader.readIndexHeader();
-List columnSchemaList = new ArrayList();
-List table_columns =
-readIndexHeader.getTable_columns();
-for (int i = 0; i < table_columns.size(); i++) {
-  
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+try {
+  indexFileReader.openThriftReader(indexFilePath);
+  org.apache.carbondata.format.IndexHeader readIndexHeader = 
indexFileReader.readIndexHeader();
+  List columnSchemaList = new ArrayList();
+  List table_columns =
+  readIndexHeader.getTable_columns();
+  for (int i = 0; i < table_columns.size(); i++) {
+
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+  }
+  // only columnSchema is the valid entry, reset all dummy entries.
+  TableSchema tableSchema = getDummyTableSchema(tableName, 
columnSchemaList);
+
+  ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
+  new ThriftWrapperSchemaConverterImpl();
+  org.apache.carbondata.format.TableSchema thriftFactTable =
+  
thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
+  org.apache.carbondata.format.TableInfo tableInfo =
+  new org.apache.carbondata.format.TableInfo(thriftFactTable,
+  new ArrayList());
+
+  tableInfo.setDataMapSchemas(null);
+  return tableInfo;
+} finally {
+  indexFileReader.closeThriftReader();
 }
-// only columnSchema is the valid entry, reset all dummy entries.
-TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
-
-ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter =
-new ThriftWrapperSchemaConverterImpl();
-org.apache.carbondata.format.TableSchema thriftFactTable =
-
thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
-org.apache.carbondata.format.TableInfo tableInfo =
-new org.apache.carbondata.format.TableInfo(thriftFactTable,
-new ArrayList());
-
-tableInfo.setDataMapSchemas(null);
-return tableInfo;
   }
 
   private static TableSchema getDummyTableSchema(String tableName,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index f7f71b3..e06c82e 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/u

carbondata git commit: [CARBONDATA-2503] Data write fails if empty value is provided for sort columns in sdk is fixed

2018-05-22 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master e1ef85ac7 -> 9aa3a8c22


[CARBONDATA-2503] Data write fails if empty value is provided for sort columns 
in sdk is fixed

SortColumn with empty value was giving exception

This closes #2326


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9aa3a8c2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9aa3a8c2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9aa3a8c2

Branch: refs/heads/master
Commit: 9aa3a8c22460f58691e0de7dee97dade5a567285
Parents: e1ef85a
Author: rahulforallp <rahul.ku...@knoldus.in>
Authored: Mon May 21 15:17:10 2018 +0530
Committer: manishgupta88 <tomanishgupt...@gmail.com>
Committed: Tue May 22 17:25:53 2018 +0530

--
 .../TestNonTransactionalCarbonTable.scala   | 21 +---
 .../sdk/file/CarbonWriterBuilder.java   |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/9aa3a8c2/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 1c74adc..afb9b2f 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -322,14 +322,12 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
   Row("robot0", 0, 0.0),
   Row("robot1", 1, 0.5),
   Row("robot2", 2, 1.0)))
-new File(writerPath).listFiles().map(x => LOGGER.audit(x.getName +" : 
"+x.lastModified()))
 FileUtils.deleteDirectory(new File(writerPath))
 // Thread.sleep is required because it is possible sometime deletion
 // and creation of new file can happen at same timestamp.
 Thread.sleep(1000)
 assert(!new File(writerPath).exists())
 buildTestDataWithSameUUID(4, false, null, List("name"))
-new File(writerPath).listFiles().map(x => LOGGER.audit(x.getName +" : 
"+x.lastModified()))
 checkAnswer(sql("select * from sdkOutputTable"), Seq(
   Row("robot0", 0, 0.0),
   Row("robot1", 1, 0.5),
@@ -379,9 +377,26 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 
 checkExistence(sql("describe formatted sdkOutputTable"), true, "name")
 
+buildTestDataWithSortColumns(List())
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+// with partition
+sql(
+  s"""CREATE EXTERNAL TABLE sdkOutputTable(name string) PARTITIONED BY 
(age int) STORED BY
+ |'carbondata' LOCATION
+ |'$writerPath' """.stripMargin)
+
+sql("describe formatted sdkOutputTable").show(false)
+sql("select * from sdkOutputTable").show()
+
+intercept[RuntimeException] {
+  buildTestDataWithSortColumns(List(""))
+}
+
 sql("DROP TABLE sdkOutputTable")
 // drop table should not delete the files
-assert(new File(writerPath).exists())
+assert(!(new File(writerPath).exists()))
 cleanTestData()
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/9aa3a8c2/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
--
diff --git 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index bf99e05..e846da4 100644
--- 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -377,7 +377,7 @@ public class CarbonWriterBuilder {
 }
 
 List sortColumnsList = new ArrayList<>();
-if (sortColumns == null) {
+if (sortColumns == null || sortColumns.length == 0) {
   // If sort columns are not specified, default set all dimensions to sort 
column.
   // When dimensions are default set to sort column,
   // Inverted index will be supported by default for sort columns.



carbondata git commit: [CARBONDATA-2496] Changed to hadoop bloom implementation and added compress option to compress bloom on disk

2018-05-22 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master d9534c2c0 -> 77a11107c


[CARBONDATA-2496] Changed to hadoop bloom implementation and added compress 
option to compress bloom on disk

This PR removes the guava bloom and adds the hadoop bloom. And also added the 
compress bloom option to compress bloom on disk and in memory as well.
The user can use bloom_compress property to enable/disable compression. By 
default, it is enabled.

Please check the performance of bloom
Loaded 100 million data with bloom datamap on a column with a cardinality of 5 
million with 'BLOOM_SIZE'='500', 'bloom_fpp'='0.001'.

This closes #2324


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/77a11107
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/77a11107
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/77a11107

Branch: refs/heads/master
Commit: 77a11107c57beebda74925dbb328f7bad6c72136
Parents: d9534c2
Author: ravipesala <ravi.pes...@gmail.com>
Authored: Sun May 20 21:52:57 2018 +0530
Committer: manishgupta88 <tomanishgupt...@gmail.com>
Committed: Tue May 22 14:24:58 2018 +0530

--
 .../blockletindex/BlockletDataMapFactory.java   |   5 +-
 .../datamap/bloom/BloomCoarseGrainDataMap.java  |   8 +-
 .../bloom/BloomCoarseGrainDataMapFactory.java   |  42 ++--
 .../carbondata/datamap/bloom/BloomDMModel.java  |  35 --
 .../datamap/bloom/BloomDataMapBuilder.java  |  12 ++-
 .../datamap/bloom/BloomDataMapCache.java|  12 +--
 .../datamap/bloom/BloomDataMapWriter.java   |  60 ++-
 .../hadoop/util/bloom/CarbonBloomFilter.java| 108 +++
 8 files changed, 225 insertions(+), 57 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/77a11107/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
index 0188281..318fc6e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
@@ -18,6 +18,7 @@ package org.apache.carbondata.core.indexstore.blockletindex;
 
 import java.io.IOException;
 import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.carbondata.core.cache.Cache;
 import org.apache.carbondata.core.cache.CacheProvider;
@@ -78,7 +79,7 @@ public class BlockletDataMapFactory extends 
CoarseGrainDataMapFactory
   private AbsoluteTableIdentifier identifier;
 
   // segmentId -> list of index file
-  private Map<String, Set> segmentMap = new 
HashMap<>();
+  private Map<String, Set> segmentMap = new 
ConcurrentHashMap<>();
 
   private Cache<TableBlockIndexUniqueIdentifier, BlockletDataMapIndexWrapper> 
cache;
 
@@ -279,7 +280,7 @@ public class BlockletDataMapFactory extends 
CoarseGrainDataMapFactory
   }
 
   @Override
-  public void clear() {
+  public synchronized void clear() {
 if (segmentMap.size() > 0) {
   for (String segmentId : segmentMap.keySet().toArray(new 
String[segmentMap.size()])) {
 clear(new Segment(segmentId, null, null));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/77a11107/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
--
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index 09de25e..a5a141c 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -44,6 +44,7 @@ import org.apache.carbondata.core.util.CarbonUtil;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.bloom.Key;
 
 /**
  * BloomDataCoarseGrainMap is constructed in blocklet level. For each indexed 
column,
@@ -83,7 +84,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
 
   @Override
   public List prune(FilterResolverIntf filterExp, SegmentProperties 
segmentProperties,
-  List partitions) throws IOException {
+  List partitions) {
 List hitBlocklets = new ArrayList();
 if (filterExp == null) {
   // null is different from empty here. E

carbondata git commit: [CARBONDATA-2227] Added support to show partition details in describe formatted

2018-05-22 Thread manishgupta88
Repository: carbondata
Updated Branches:
  refs/heads/master 2ebfab151 -> 604902b9a


[CARBONDATA-2227] Added support to show partition details in describe formatted

Added Detailed information in describe formatted command like partition 
location and partition values.
Example Usage: Descsribe formatted partition(partition_col_name=partition_value)

This closes #2033


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/604902b9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/604902b9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/604902b9

Branch: refs/heads/master
Commit: 604902b9a52ec613c1ec025b4dc33657b179895e
Parents: 2ebfab1
Author: kunal642 <kunalkapoor...@gmail.com>
Authored: Mon Mar 5 20:33:06 2018 +0530
Committer: manishgupta88 <tomanishgupt...@gmail.com>
Committed: Tue May 22 11:37:07 2018 +0530

--
 .../describeTable/TestDescribeTable.scala   | 20 +++
 .../partition/TestDDLForPartitionTable.scala|  2 +-
 .../table/CarbonDescribeFormattedCommand.scala  | 26 +---
 .../sql/execution/strategy/DDLStrategy.scala|  1 +
 4 files changed, 44 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/604902b9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
index fe01598..1e333ee 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala
@@ -28,6 +28,7 @@ class TestDescribeTable extends QueryTest with 
BeforeAndAfterAll {
   override def beforeAll: Unit = {
 sql("DROP TABLE IF EXISTS Desc1")
 sql("DROP TABLE IF EXISTS Desc2")
+sql("drop table if exists a")
 sql("CREATE TABLE Desc1(Dec1Col1 String, Dec1Col2 String, Dec1Col3 int, 
Dec1Col4 double) stored by 'carbondata'")
 sql("DESC Desc1")
 sql("DROP TABLE Desc1")
@@ -56,9 +57,28 @@ class TestDescribeTable extends QueryTest with 
BeforeAndAfterAll {
 assert(sql("desc formatted desc1").count() == 20)
   }
 
+  test("test describe formatted for partition table") {
+sql("create table a(a string) partitioned by (b int) stored by 
'carbondata'")
+sql("insert into a values('a',1)")
+sql("insert into a values('a',2)")
+val desc = sql("describe formatted a").collect()
+assert(desc(desc.indexWhere(_.get(0).toString.contains("#Partition")) + 
2).get(0).toString.contains("b"))
+val descPar = sql("describe formatted a partition(b=1)").collect
+descPar.find(_.get(0).toString.contains("Partition Value:")) match {
+  case Some(row) => assert(row.get(1).toString.contains("1"))
+  case None => fail("Partition Value not found in describe formatted")
+}
+descPar.find(_.get(0).toString.contains("Location:")) match {
+  case Some(row) => 
assert(row.get(1).toString.contains("target/warehouse/a/b=1"))
+  case None => fail("Partition Location not found in describe formatted")
+}
+assert(descPar.exists(_.toString().contains("Partition Parameters:")))
+  }
+
   override def afterAll: Unit = {
 sql("DROP TABLE Desc1")
 sql("DROP TABLE Desc2")
+sql("drop table if exists a")
   }
 
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/604902b9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala
index 2cbafa8..cafd465 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala
@@ -14

[39/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/nation.csv
--
diff --git a/integration/spark-common-test/src/test/resources/tpch/nation.csv 
b/integration/spark-common-test/src/test/resources/tpch/nation.csv
new file mode 100644
index 000..ed3fd5b
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/tpch/nation.csv
@@ -0,0 +1,25 @@
+0|ALGERIA|0| haggle. carefully final deposits detect slyly agai|
+1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold 
requests alon|
+2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are 
about the ironic forges. slyly special |
+3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are 
furiously over the tithes. fluffily bold|
+4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly 
across the furiously regular d|
+5|ETHIOPIA|0|ven packages wake quickly. regu|
+6|FRANCE|3|refully final requests. regular, ironi|
+7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco|
+8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun|
+9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. 
carefully ironic hockey players sleep blithely. carefull|
+10|IRAN|4|efully alongside of the slyly final dependencies. |
+11|IRAQ|4|nic deposits boost atop the quickly final requests? quickly regula|
+12|JAPAN|2|ously. final, express gifts cajole a|
+13|JORDAN|4|ic deposits are blithely about the carefully regular pa|
+14|KENYA|0| pending excuses haggle furiously deposits. pending, express pinto 
beans wake fluffily past t|
+15|MOROCCO|0|rns. blithely bold courts among the closely regular packages use 
furiously bold platelets?|
+16|MOZAMBIQUE|0|s. ironic, unusual asymptotes wake blithely r|
+17|PERU|1|platelets. blithely pending dependencies use fluffily across the 
even pinto beans. carefully silent accoun|
+18|CHINA|2|c dependencies. furiously express notornis sleep slyly regular 
accounts. ideas sleep. depos|
+19|ROMANIA|3|ular asymptotes are about the furious multipliers. express 
dependencies nag above the ironically ironic account|
+20|SAUDI ARABIA|4|ts. silent requests haggle. closely express packages sleep 
across the blithely|
+21|VIETNAM|2|hely enticingly express accounts. even, final |
+22|RUSSIA|3| requests against the platelets use never according to the quickly 
regular pint|
+23|UNITED KINGDOM|3|eans boost carefully special requests. accounts are. 
carefull|
+24|UNITED STATES|1|y final packages. slow foxes cajole quickly. quickly silent 
platelets breach ironic accounts. unusual pinto be|



[40/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/lineitem.csv
--
diff --git a/integration/spark-common-test/src/test/resources/tpch/lineitem.csv 
b/integration/spark-common-test/src/test/resources/tpch/lineitem.csv
new file mode 100644
index 000..e792319
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/tpch/lineitem.csv
@@ -0,0 +1,1000 @@
+1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER
 IN PERSON|TRUCK|egular courts above the|
+1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE 
BACK RETURN|MAIL|ly final dependencies: slyly bold |
+1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE 
BACK RETURN|REG AIR|riously. regular, express dep|
+1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites.
 fluffily even de|
+1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB|
 pending foxes. slyly re|
+1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER
 IN PERSON|MAIL|arefully slyly ex|
+2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE
 BACK RETURN|RAIL|ven requests. deposits breach a|
+3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside
 of the furiously brave acco|
+3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE 
BACK RETURN|RAIL| unusual accounts. eve|
+3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER
 IN PERSON|SHIP|nal foxes wake. |
+3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-01|NONE|TRUCK|y.
 fluffily pending d|
+3|183095|650|5|28|32986.52|0.04|0.00|R|F|1993-12-14|1994-01-10|1994-01-01|TAKE 
BACK RETURN|FOB|ages nag slyly pending|
+3|62143|9662|6|26|28733.64|0.10|0.02|A|F|1993-10-29|1993-12-18|1993-11-04|TAKE 
BACK RETURN|RAIL|ges sleep after the caref|
+4|88035|5560|1|30|30690.90|0.03|0.08|N|O|1996-01-10|1995-12-14|1996-01-18|DELIVER
 IN PERSON|REG AIR|- quickly regular packages sleep. idly|
+5|108570|8571|1|15|23678.55|0.02|0.04|R|F|1994-10-31|1994-08-31|1994-11-20|NONE|AIR|ts
 wake furiously |
+5|123927|3928|2|26|50723.92|0.07|0.08|R|F|1994-10-16|1994-09-25|1994-10-19|NONE|FOB|sts
 use slyly quickly special instruc|
+5|37531|35|3|50|73426.50|0.08|0.03|A|F|1994-08-08|1994-10-13|1994-08-26|DELIVER
 IN PERSON|AIR|eodolites. fluffily unusual|
+6|139636|2150|1|37|61998.31|0.08|0.03|A|F|1992-04-27|1992-05-15|1992-05-02|TAKE
 BACK RETURN|TRUCK|p furiously special foxes|
+7|182052|9607|1|12|13608.60|0.07|0.03|N|O|1996-05-07|1996-03-13|1996-06-03|TAKE
 BACK RETURN|FOB|ss pinto beans wake against th|
+7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-02-19|TAKE 
BACK RETURN|SHIP|es. instructions|
+7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-02-03|COLLECT
 COD|MAIL| unusual reques|
+7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-04-20|NONE|FOB|.
 slyly special requests haggl|
+7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-02-18|DELIVER
 IN PERSON|TRUCK|ns haggle carefully ironic deposits. bl|
+7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-01-22|TAKE 
BACK RETURN|FOB|jole. excuses wake carefully alongside of |
+7|157238|2269|7|5|6476.15|0.04|0.02|N|O|1996-02-10|1996-03-26|1996-02-13|NONE|FOB|ithely
 regula|
+32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-10-26|TAKE
 BACK RETURN|TRUCK|sleep quickly. req|
+32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-08-27|COLLECT
 COD|AIR|lithely regular deposits. fluffily |
+32|44161||3|2|2210.32|0.09|0.02|N|O|1995-08-07|1995-10-07|1995-08-23|DELIVER
 IN PERSON|AIR| express accounts wake according to the|
+32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-03|NONE|REG
 AIR|e slyly final pac|
+32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-09-14|DELIVER
 IN PERSON|AIR|symptotes nag according to the ironic depo|
+32|11615|4117|6|6|9159.66|0.04|0.03|N|O|1995-07-21|1995-09-23|1995-07-25|COLLECT
 COD|RAIL| gifts cajole carefully.|
+33|61336|8855|1|31|40217.23|0.09|0.04|A|F|1993-10-29|1993-12-19|1993-11-08|COLLECT
 COD|TRUCK|ng to the furiously ironic package|
+33|60519|5532|2|32|47344.32|0.02|0.05|A|F|1993-12-09|1994-01-04|1993-12-28|COLLECT
 COD|MAIL|gular theodolites|
+33|137469|9983|3|5|7532.30|0.05|0.03|A|F|1993-12-09|1993-12-25|1993-12-23|TAKE 
BACK RETURN|AIR|. stealthily bold exc|
+33|33918|3919|4|41|75928.31|0.09|0.00|R|F|1993-11-09|1994-01-24|1993-11-11|TAKE
 BACK RETURN|MAIL|unusual packages doubt caref|
+34|88362|871|1|13|17554.68|0.00|0.07|N|O|1998-10-23|1998-09-14|1998-11-06|NONE|REG
 AIR|nic accounts. deposits are alon|

[25/50] [abbrv] carbondata git commit: [CARBONDATA-2369] updated the document about AVRO to carbon schema converter

2018-05-17 Thread manishgupta88
[CARBONDATA-2369] updated the document about AVRO to carbon schema converter

updated the document about AVRO to carbon schema converter

This closes #2296


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/443b717d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/443b717d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/443b717d

Branch: refs/heads/spark-2.3
Commit: 443b717d20e0eedf8be75dae177d573e53c050fc
Parents: d85fb72
Author: ajantha-bhat 
Authored: Thu May 10 21:03:28 2018 +0530
Committer: kumarvishal09 
Committed: Fri May 11 15:47:17 2018 +0530

--
 README.md|  1 +
 docs/sdk-writer-guide.md | 25 ++---
 2 files changed, 19 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/443b717d/README.md
--
diff --git a/README.md b/README.md
index 3f45917..4b4577e 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ CarbonData is built using Apache Maven, to [build 
CarbonData](https://github.com
 * [Cluster Installation and 
Deployment](https://github.com/apache/carbondata/blob/master/docs/installation-guide.md)
 * [Configuring 
Carbondata](https://github.com/apache/carbondata/blob/master/docs/configuration-parameters.md)
 * [Streaming 
Ingestion](https://github.com/apache/carbondata/blob/master/docs/streaming-guide.md)
+* [SDK Writer 
Guide](https://github.com/apache/carbondata/blob/master/docs/sdk-writer-guide.md)
 * [CarbonData Pre-aggregate 
DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/preaggregate-datamap-guide.md)
 * [CarbonData Timeseries 
DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/timeseries-datamap-guide.md)
 * [FAQ](https://github.com/apache/carbondata/blob/master/docs/faq.md)

http://git-wip-us.apache.org/repos/asf/carbondata/blob/443b717d/docs/sdk-writer-guide.md
--
diff --git a/docs/sdk-writer-guide.md b/docs/sdk-writer-guide.md
index 18b583a..9878b71 100644
--- a/docs/sdk-writer-guide.md
+++ b/docs/sdk-writer-guide.md
@@ -52,6 +52,7 @@ import java.io.IOException;
 
 import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.sdk.file.AvroCarbonWriter;
 import org.apache.carbondata.sdk.file.CarbonWriter;
 import org.apache.carbondata.sdk.file.Field;
 
@@ -75,25 +76,24 @@ public class TestSdkAvro {
 "   \"type\" : \"record\"," +
 "   \"name\" : \"Acme\"," +
 "   \"fields\" : ["
-+ "{ \"name\" : \"name\", \"type\" : \"string\" },"
++ "{ \"name\" : \"fname\", \"type\" : \"string\" },"
 + "{ \"name\" : \"age\", \"type\" : \"int\" }]" +
 "}";
 
-String json = "{\"name\":\"bob\", \"age\":10}";
+String json = "{\"fname\":\"bob\", \"age\":10}";
 
 // conversion to GenericData.Record
 JsonAvroConverter converter = new JsonAvroConverter();
 GenericData.Record record = converter.convertToGenericDataRecord(
 json.getBytes(CharEncoding.UTF_8), new 
org.apache.avro.Schema.Parser().parse(avroSchema));
 
-// for sdk schema
-Field[] fields = new Field[2];
-fields[0] = new Field("name", DataTypes.STRING);
-fields[1] = new Field("age", DataTypes.STRING);
+// prepare carbon schema from avro schema 
+org.apache.carbondata.sdk.file.Schema carbonSchema =
+AvroCarbonWriter.getCarbonSchemaFromAvroSchema(avroSchema);
 
 try {
   CarbonWriter writer = CarbonWriter.builder()
-  .withSchema(new org.apache.carbondata.sdk.file.Schema(fields))
+  .withSchema(carbonSchema)
   .outputPath(path)
   .buildWriterForAvroInput();
 
@@ -345,4 +345,15 @@ public Schema(Field[] fields);
 * @return Schema
 */
 public static Schema parseJson(String json);
+```
+
+### Class org.apache.carbondata.sdk.file.AvroCarbonWriter
+```
+/**
+* converts avro schema to carbon schema, required by carbonWriter
+*
+* @param avroSchemaString json formatted avro schema as string
+* @return carbon sdk schema
+*/
+public static org.apache.carbondata.sdk.file.Schema 
getCarbonSchemaFromAvroSchema(String avroSchemaString);
 ```
\ No newline at end of file



[45/50] [abbrv] carbondata git commit: [CARBONDATA-2370] Added document for presto multinode setup for carbondata

2018-05-17 Thread manishgupta88
[CARBONDATA-2370] Added document for presto multinode setup for carbondata

Added document for presto multinode setup for carbondata

This closes #2199


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3d23fa69
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3d23fa69
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3d23fa69

Branch: refs/heads/spark-2.3
Commit: 3d23fa693a604701e3ab3b574b20f21d089e8b43
Parents: 1c5b526
Author: Geetika Gupta 
Authored: Fri Apr 20 16:47:35 2018 +0530
Committer: chenliang613 
Committed: Mon May 14 21:43:24 2018 +0800

--
 .../Presto_Cluster_Setup_For_Carbondata.md  | 133 +++
 1 file changed, 133 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3d23fa69/integration/presto/Presto_Cluster_Setup_For_Carbondata.md
--
diff --git a/integration/presto/Presto_Cluster_Setup_For_Carbondata.md 
b/integration/presto/Presto_Cluster_Setup_For_Carbondata.md
new file mode 100644
index 000..082b8fe
--- /dev/null
+++ b/integration/presto/Presto_Cluster_Setup_For_Carbondata.md
@@ -0,0 +1,133 @@
+# Presto Multinode Cluster setup For Carbondata
+
+## Installing Presto
+
+  1. Download the 0.187 version of Presto using:
+  `wget 
https://repo1.maven.org/maven2/com/facebook/presto/presto-server/0.187/presto-server-0.187.tar.gz`
+
+  2. Extract Presto tar file: `tar zxvf presto-server-0.187.tar.gz`.
+
+  3. Download the Presto CLI for the coordinator and name it presto.
+
+  ```
+wget 
https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/0.187/presto-cli-0.187-executable.jar
+
+mv presto-cli-0.187-executable.jar presto
+
+chmod +x presto
+  ```
+
+ ## Create Configuration Files
+
+  1. Create `etc` folder in presto-server-0.187 directory.
+  2. Create `config.properties`, `jvm.config`, `log.properties`, and 
`node.properties` files.
+  3. Install uuid to generate a node.id.
+
+  ```
+  sudo apt-get install uuid
+
+  uuid
+  ```
+
+
+# Contents of your node.properties file
+
+  ```
+  node.environment=production
+  node.id=
+  node.data-dir=/home/ubuntu/data
+  ```
+
+# Contents of your jvm.config file
+
+  ```
+  -server
+  -Xmx16G
+  -XX:+UseG1GC
+  -XX:G1HeapRegionSize=32M
+  -XX:+UseGCOverheadLimit
+  -XX:+ExplicitGCInvokesConcurrent
+  -XX:+HeapDumpOnOutOfMemoryError
+  -XX:OnOutOfMemoryError=kill -9 %p
+  ```
+
+# Contents of your log.properties file
+  ```
+  com.facebook.presto=INFO
+  ```
+
+ The default minimum level is `INFO`. There are four levels: `DEBUG`, `INFO`, 
`WARN` and `ERROR`.
+
+## Coordinator Configurations
+
+  # Contents of your config.properties
+  ```
+  coordinator=true
+  node-scheduler.include-coordinator=false
+  http-server.http.port=8086
+  query.max-memory=50GB
+  query.max-memory-per-node=2GB
+  discovery-server.enabled=true
+  discovery.uri=:8086
+  ```
+The options `node-scheduler.include-coordinator=false` and `coordinator=true` 
indicate that the node is the coordinator and tells the coordinator not to do 
any of the computation work itself and to use the workers.
+
+**Note**: We recommend setting `query.max-memory-per-node` to half of the JVM 
config max memory, though if your workload is highly concurrent, you may want 
to use a lower value for `query.max-memory-per-node`.
+
+Also relation between below two configuration-properties should be like:
+If, `query.max-memory-per-node=30GB`
+Then, `query.max-memory=<30GB * number of nodes>`.
+
+## Worker Configurations
+
+# Contents of your config.properties
+
+  ```
+  coordinator=false
+  http-server.http.port=8086
+  query.max-memory=50GB
+  query.max-memory-per-node=2GB
+  discovery.uri=:8086
+  ```
+
+**Note**: `jvm.config` and `node.properties` files are same for all the nodes 
(worker + coordinator). All the nodes should have different `node.id`.
+
+## Catalog Configurations
+
+1. Create a folder named `catalog` in etc directory of presto on all the nodes 
of the cluster including the coordinator.
+
+# Configuring Carbondata in Presto
+1. Create a file named `carbondata.properties` in the `catalog` folder and set 
the required properties on all the nodes.
+
+## Add Plugins
+
+1. Create a directory named `carbondata` in plugin directory of presto.
+2. Copy `carbondata` jars to `plugin/carbondata` directory on all nodes.
+
+## Start Presto Server on all nodes
+
+```
+./presto-server-0.187/bin/launcher start
+```
+To run it as a background process.
+
+```
+./presto-server-0.187/bin/launcher run
+```
+To run it in foreground.
+
+## Start Presto CLI
+```
+./presto
+```
+To connect to carbondata catalog use the following command:
+
+```
+./presto 

[43/50] [abbrv] carbondata git commit: [CARBONDATA-2431] Incremental data added after external table creation is not reflecting while doing select query issue is fixed.

2018-05-17 Thread manishgupta88
[CARBONDATA-2431] Incremental data added after external table creation is not 
reflecting while doing select query issue is fixed.

This closes #2262


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f1a6c7cf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f1a6c7cf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f1a6c7cf

Branch: refs/heads/spark-2.3
Commit: f1a6c7cf548cd33ef26bd99f26c7fcf7e367c9c7
Parents: 2881c6b
Author: rahulforallp 
Authored: Thu May 3 14:11:12 2018 +0530
Committer: ravipesala 
Committed: Mon May 14 11:27:33 2018 +0530

--
 .../core/datamap/DataMapStoreManager.java   |  19 +--
 .../apache/carbondata/core/datamap/Segment.java |   7 +
 .../LatestFilesReadCommittedScope.java  |  32 -
 .../ReadCommittedIndexFileSnapShot.java |  10 +-
 .../core/readcommitter/ReadCommittedScope.java  |   5 +
 .../TableStatusReadCommittedScope.java  |  13 ++
 .../core/statusmanager/SegmentRefreshInfo.java  |  65 +
 .../hadoop/api/CarbonTableInputFormat.java  |  10 +-
 .../TestNonTransactionalCarbonTable.scala   | 136 +++
 9 files changed, 282 insertions(+), 15 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f1a6c7cf/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index a3be26a..072b86e 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -43,6 +43,7 @@ import 
org.apache.carbondata.core.metadata.schema.table.DiskBasedDMSchemaStorage
 import org.apache.carbondata.core.metadata.schema.table.RelationIdentifier;
 import org.apache.carbondata.core.mutate.SegmentUpdateDetails;
 import org.apache.carbondata.core.mutate.UpdateVO;
+import org.apache.carbondata.core.statusmanager.SegmentRefreshInfo;
 import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonSessionInfo;
@@ -454,7 +455,7 @@ public final class DataMapStoreManager {
 
 // This map stores the latest segment refresh time.So in case of 
update/delete we check the
 // time against this map.
-private Map segmentRefreshTime = new HashMap<>();
+private Map segmentRefreshTime = new 
HashMap<>();
 
 // This map keeps the manual refresh entries from users. It is mainly used 
for partition
 // altering.
@@ -465,23 +466,25 @@ public final class DataMapStoreManager {
   SegmentUpdateDetails[] updateStatusDetails = 
statusManager.getUpdateStatusDetails();
   for (SegmentUpdateDetails updateDetails : updateStatusDetails) {
 UpdateVO updateVO = 
statusManager.getInvalidTimestampRange(updateDetails.getSegmentName());
-segmentRefreshTime.put(updateVO.getSegmentId(), 
updateVO.getCreatedOrUpdatedTimeStamp());
+segmentRefreshTime.put(updateVO.getSegmentId(),
+new SegmentRefreshInfo(updateVO.getCreatedOrUpdatedTimeStamp(), 
0));
   }
 }
 
-public boolean isRefreshNeeded(String segmentId, 
SegmentUpdateStatusManager statusManager) {
-  UpdateVO updateVO = statusManager.getInvalidTimestampRange(segmentId);
+public boolean isRefreshNeeded(Segment seg, UpdateVO updateVo) throws 
IOException {
+  SegmentRefreshInfo segmentRefreshInfo =
+  seg.getSegmentRefreshInfo(updateVo);
+  String segmentId = seg.getSegmentNo();
   if (segmentRefreshTime.get(segmentId) == null) {
-segmentRefreshTime.put(segmentId, 
updateVO.getCreatedOrUpdatedTimeStamp());
+segmentRefreshTime.put(segmentId, segmentRefreshInfo);
 return true;
   }
   if (manualSegmentRefresh.get(segmentId) != null && 
manualSegmentRefresh.get(segmentId)) {
 manualSegmentRefresh.put(segmentId, false);
 return true;
   }
-  Long updateTimestamp = updateVO.getLatestUpdateTimestamp();
-  boolean isRefresh =
-  updateTimestamp != null && (updateTimestamp > 
segmentRefreshTime.get(segmentId));
+
+  boolean isRefresh = 
segmentRefreshInfo.compare(segmentRefreshTime.get(segmentId));
   if (isRefresh) {
 segmentRefreshTime.remove(segmentId);
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f1a6c7cf/core/src/main/java/org/apache/carbondata/core/datamap/Segment.java

[50/50] [abbrv] carbondata git commit: [CARBONDATA-2465] Improve the carbondata file reliability in data load when direct hdfs write is enabled

2018-05-17 Thread manishgupta88
[CARBONDATA-2465] Improve the carbondata file reliability in data load when 
direct hdfs write is enabled

Problem: At present if we enable direct write on HDFS, data is written with 
replication of 1 which can cause data loss.
Solution: Write with cluster replication. With this change No need to invoke
CompleteHdfsBackendThread/completeRemainingHdfsReplicas for direct hdfs write 
case.

This closes #2235


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8fe16566
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8fe16566
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8fe16566

Branch: refs/heads/spark-2.3
Commit: 8fe165668e2662455991f9de6af817ccc99b81ee
Parents: 6297ea0
Author: KanakaKumar 
Authored: Thu Apr 26 23:39:29 2018 +0530
Committer: kunal642 
Committed: Thu May 17 19:42:59 2018 +0530

--
 .../apache/carbondata/core/util/CarbonUtil.java | 27 ---
 .../store/writer/AbstractFactDataWriter.java| 47 ++--
 2 files changed, 14 insertions(+), 60 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fe16566/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index ac0a800..9dc4aa2 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -2817,33 +2817,6 @@ public final class CarbonUtil {
   }
 
   /**
-   * This method will complete the remaining hdfs replications
-   *
-   * @param fileName hdfs file name
-   * @param fileType filetype
-   * @throws CarbonDataWriterException if error occurs
-   */
-  public static void completeRemainingHdfsReplicas(String fileName, 
FileFactory.FileType fileType)
-throws CarbonDataWriterException {
-try {
-  long startTime = System.currentTimeMillis();
-  short replication = FileFactory.getDefaultReplication(fileName, 
fileType);
-  if (1 == replication) {
-return;
-  }
-  boolean replicateFlag = FileFactory.setReplication(fileName, fileType, 
replication);
-  if (!replicateFlag) {
-LOGGER.error("Failed to set replication for " + fileName + " with 
factor " + replication);
-  }
-  LOGGER.info(
-  "Total copy time (ms) to copy file " + fileName + " is " + 
(System.currentTimeMillis()
-  - startTime));
-} catch (IOException e) {
-  throw new CarbonDataWriterException("Problem while completing remaining 
HDFS backups", e);
-}
-  }
-
-  /**
* This method will read the local carbon data file and write to carbon data 
file in HDFS
*
* @param carbonStoreFilePath

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fe16566/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
index 6e557cd..8115f97 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java
@@ -176,6 +176,7 @@ public abstract class AbstractFactDataWriter implements 
CarbonFactDataWriter {
 CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS,
 
CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS_DEFAULT);
 this.enableDirectlyWriteData2Hdfs = 
"TRUE".equalsIgnoreCase(directlyWriteData2Hdfs);
+
 if (enableDirectlyWriteData2Hdfs) {
   LOGGER.info("Carbondata will directly write fact data to HDFS.");
 } else {
@@ -274,22 +275,13 @@ public abstract class AbstractFactDataWriter implements 
CarbonFactDataWriter {
   protected void commitCurrentFile(boolean copyInCurrentThread) {
 notifyDataMapBlockEnd();
 CarbonUtil.closeStreams(this.fileOutputStream, this.fileChannel);
-if (enableDirectlyWriteData2Hdfs) {
-  if (copyInCurrentThread) {
-CarbonUtil.completeRemainingHdfsReplicas(carbonDataFileHdfsPath,
-FileFactory.FileType.HDFS);
-  } else {
-executorServiceSubmitList.add(executorService.submit(
-new CompleteHdfsBackendThread(carbonDataFileHdfsPath, 
FileFactory.FileType.HDFS)));
-  }
-} else {
+if (!enableDirectlyWriteData2Hdfs) {
   if 

[44/50] [abbrv] carbondata git commit: [CARBONDATA-2468] addition of column to default sort_column is handled

2018-05-17 Thread manishgupta88
[CARBONDATA-2468] addition of column to default sort_column is handled

issue : default sort_column handling was missing
solution : condition added for default sort_columns

This closes #2293


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1c5b5265
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1c5b5265
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1c5b5265

Branch: refs/heads/spark-2.3
Commit: 1c5b5265921e89f20c5f5b69c135c9a1acc2d1df
Parents: f1a6c7c
Author: rahulforallp 
Authored: Thu May 10 16:17:39 2018 +0530
Committer: kumarvishal09 
Committed: Mon May 14 17:47:45 2018 +0530

--
 .../TestNonTransactionalCarbonTable.scala   | 28 +---
 .../sdk/file/CarbonWriterBuilder.java   |  8 +-
 2 files changed, 32 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c5b5265/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 58ce5fa..fb9c862 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -98,9 +98,9 @@ class TestNonTransactionalCarbonTable extends QueryTest with 
BeforeAndAfterAll {
 buildTestData(3, false, options)
   }
 
-  def buildTestDataWithSortColumns(): Any = {
+  def buildTestDataWithSortColumns(sortColumns: List[String]): Any = {
 FileUtils.deleteDirectory(new File(writerPath))
-buildTestData(3, false, null, List("age", "name"))
+buildTestData(3, false, null, sortColumns)
   }
 
   def buildTestData(rows: Int, persistSchema: Boolean, options: 
util.Map[String, String]): Any = {
@@ -302,7 +302,7 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
   }
 
   test("test create external table with sort columns") {
-buildTestDataWithSortColumns()
+buildTestDataWithSortColumns(List("age","name"))
 assert(new File(writerPath).exists())
 sql("DROP TABLE IF EXISTS sdkOutputTable")
 
@@ -316,6 +316,28 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 
 checkExistence(sql("describe formatted sdkOutputTable"), true, writerPath)
 
+buildTestDataWithSortColumns(List("age"))
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+// with partition
+sql(
+  s"""CREATE EXTERNAL TABLE sdkOutputTable(name string) PARTITIONED BY 
(age int) STORED BY
+ |'carbondata' LOCATION
+ |'$writerPath' """.stripMargin)
+
+checkExistence(sql("describe formatted sdkOutputTable"), true, "age")
+
+buildTestDataSingleFile()
+assert(new File(writerPath).exists())
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+// with partition
+sql(
+  s"""CREATE EXTERNAL TABLE sdkOutputTable(name string) PARTITIONED BY 
(age int) STORED BY
+ |'carbondata' LOCATION
+ |'$writerPath' """.stripMargin)
+
+checkExistence(sql("describe formatted sdkOutputTable"), true, "name")
+
 sql("DROP TABLE sdkOutputTable")
 // drop table should not delete the files
 assert(new File(writerPath).exists())

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c5b5265/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
--
diff --git 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index 00ba8a5..1816539 100644
--- 
a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ 
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -445,6 +445,7 @@ public class CarbonWriterBuilder {
 "column: " + sortColumn + " specified in sort columns does not 
exist in schema");
   }
 }
+int i = 0;
 for (Field field : fields) {
   if (null != field) {
 int isSortColumn = sortColumnsList.indexOf(field.getFieldName());
@@ -481,9 +482,14 @@ public class CarbonWriterBuilder {
   ColumnSchema columnSchema = tableSchemaBuilder

[37/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/region.csv
--
diff --git a/integration/spark-common-test/src/test/resources/tpch/region.csv 
b/integration/spark-common-test/src/test/resources/tpch/region.csv
new file mode 100644
index 000..c5ebb63
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/tpch/region.csv
@@ -0,0 +1,5 @@
+0|AFRICA|lar deposits. blithely final packages cajole. regular waters are 
final requests. regular accounts are according to |
+1|AMERICA|hs use ironic, even requests. s|
+2|ASIA|ges. thinly even pinto beans ca|
+3|EUROPE|ly final courts cajole furiously final excuse|
+4|MIDDLE EAST|uickly special accounts cajole carefully blithely close 
requests. carefully final asymptotes haggle furiousl|



[47/50] [abbrv] carbondata git commit: [CARBONDATA-2486][DOC] Update set search mode information in the documentation

2018-05-17 Thread manishgupta88
[CARBONDATA-2486][DOC] Update set search mode information in the documentation

Update set search mode information in the documentation

This closes #2312


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d302a8b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d302a8b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d302a8b

Branch: refs/heads/spark-2.3
Commit: 1d302a8b563c80e762985e10ed2bf9c9917e812d
Parents: fc4b7f9
Author: xubo245 <601450...@qq.com>
Authored: Wed May 16 20:43:59 2018 +0800
Committer: chenliang613 
Committed: Thu May 17 11:55:19 2018 +0800

--
 docs/configuration-parameters.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d302a8b/docs/configuration-parameters.md
--
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index e39d61b..11cc6ea 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -133,7 +133,8 @@ This section provides the details of all the configurations 
required for CarbonD
 | carbon.enableMinMax | true | Min max is feature added to enhance query 
performance. To disable this feature, set it false. |
 | carbon.dynamicallocation.schedulertimeout | 5 | Specifies the maximum time 
(unit in seconds) the scheduler can wait for executor to be active. Minimum 
value is 5 sec and maximum value is 15 sec. |
 | carbon.scheduler.minregisteredresourcesratio | 0.8 | Specifies the minimum 
resource (executor) ratio needed for starting the block distribution. The 
default value is 0.8, which indicates 80% of the requested resource is 
allocated for starting block distribution.  The minimum value is 0.1 min and 
the maximum value is 1.0. | 
-  
+| carbon.search.enabled | false | If set to true, it will use CarbonReader to 
do distributed scan directly instead of using compute framework like spark, 
thus avoiding limitation of compute framework like SQL optimizer and task 
scheduling overhead. |
+
 * **Global Dictionary Configurations**
   
 | Parameter | Default Value | Description |



[27/50] [abbrv] carbondata git commit: [CARBONDATA-2474] Support Modular Plan for Materialized View DataMap

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ffddba70/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala
--
diff --git 
a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala
 
b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala
new file mode 100644
index 000..8262dfa
--- /dev/null
+++ 
b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala
@@ -0,0 +1,4293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.mv.testutil
+
+object Tpcds_1_4_QueryBatch {
+
+  // should be random generated based on scale
+  // RC=ulist(random(1, rowcount("store_sales")/5,uniform),5);
+  val rc = Array(100, 100, 100, 100, 100)
+
+  // Queries the TPCDS 1.4 queries using the qualifcations values in the 
templates.
+
+  val tpcds1_4Queries = Seq(
+("q1",
+  """
+| WITH customer_total_return AS
+|   (SELECT sr_customer_sk AS ctr_customer_sk, sr_store_sk AS 
ctr_store_sk,
+|   sum(sr_return_amt) AS ctr_total_return
+|FROM store_returns, date_dim
+|WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000
+|GROUP BY sr_customer_sk, sr_store_sk)
+| SELECT c_customer_id
+|   FROM customer_total_return ctr1, store, customer
+|   WHERE ctr1.ctr_total_return >
+|(SELECT avg(ctr_total_return)*1.2
+|  FROM customer_total_return ctr2
+|   WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+|   AND s_store_sk = ctr1.ctr_store_sk
+|   AND s_state = 'TN'
+|   AND ctr1.ctr_customer_sk = c_customer_sk
+|   ORDER BY c_customer_id LIMIT 100
+  """.stripMargin),
+("q2",
+  """
+| WITH wscs as
+| (SELECT sold_date_sk, sales_price
+|  FROM (SELECT ws_sold_date_sk sold_date_sk, ws_ext_sales_price 
sales_price
+|FROM web_sales) x
+|UNION ALL
+|   (SELECT cs_sold_date_sk sold_date_sk, cs_ext_sales_price 
sales_price
+|FROM catalog_sales)),
+| wswscs AS
+| (SELECT d_week_seq,
+|sum(case when (d_day_name='Sunday') then sales_price else 
null end) sun_sales,
+|sum(case when (d_day_name='Monday') then sales_price else 
null end) mon_sales,
+|sum(case when (d_day_name='Tuesday') then sales_price else  
null end) tue_sales,
+|sum(case when (d_day_name='Wednesday') then sales_price else 
null end) wed_sales,
+|sum(case when (d_day_name='Thursday') then sales_price else 
null end) thu_sales,
+|sum(case when (d_day_name='Friday') then sales_price else 
null end) fri_sales,
+|sum(case when (d_day_name='Saturday') then sales_price else 
null end) sat_sales
+| FROM wscs, date_dim
+| WHERE d_date_sk = sold_date_sk
+| GROUP BY d_week_seq)
+| SELECT d_week_seq1
+|   ,round(sun_sales1/sun_sales2,2)
+|   ,round(mon_sales1/mon_sales2,2)
+|   ,round(tue_sales1/tue_sales2,2)
+|   ,round(wed_sales1/wed_sales2,2)
+|   ,round(thu_sales1/thu_sales2,2)
+|   ,round(fri_sales1/fri_sales2,2)
+|   ,round(sat_sales1/sat_sales2,2)
+| FROM
+| (SELECT wswscs.d_week_seq d_week_seq1
+|,sun_sales sun_sales1
+|,mon_sales mon_sales1
+|,tue_sales tue_sales1
+|,wed_sales wed_sales1
+|,thu_sales thu_sales1
+|,fri_sales fri_sales1
+|,sat_sales sat_sales1
+|  FROM wswscs,date_dim
+|  WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y,
+| (SELECT wswscs.d_week_seq d_week_seq2
+|,sun_sales sun_sales2
+|,mon_sales mon_sales2
+|,tue_sales tue_sales2
+|,wed_sales wed_sales2
+|,thu_sales thu_sales2
+|

[26/50] [abbrv] carbondata git commit: [CARBONDATA-2474] Support Modular Plan for Materialized View DataMap

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ffddba70/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala
--
diff --git 
a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala
 
b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala
new file mode 100644
index 000..97772c7
--- /dev/null
+++ 
b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala
@@ -0,0 +1,819 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.mv.testutil
+
+object Tpcds_1_4_Tables {
+  val tpcds1_4Tables = Seq[String](
+s"""
+   |CREATE TABLE catalog_sales (
+   |  `cs_sold_date_sk` int,
+   |  `cs_sold_time_sk` int,
+   |  `cs_ship_date_sk` int,
+   |  `cs_bill_customer_sk` int,
+   |  `cs_bill_cdemo_sk` int,
+   |  `cs_bill_hdemo_sk` int,
+   |  `cs_bill_addr_sk` int,
+   |  `cs_ship_customer_sk` int,
+   |  `cs_ship_cdemo_sk` int,
+   |  `cs_ship_hdemo_sk` int,
+   |  `cs_ship_addr_sk` int,
+   |  `cs_call_center_sk` int,
+   |  `cs_catalog_page_sk` int,
+   |  `cs_ship_mode_sk` int,
+   |  `cs_warehouse_sk` int,
+   |  `cs_item_sk` int,
+   |  `cs_promo_sk` int,
+   |  `cs_order_number` bigint,
+   |  `cs_quantity` int,
+   |  `cs_wholesale_cost` decimal(7,2),
+   |  `cs_list_price` decimal(7,2),
+   |  `cs_sales_price` decimal(7,2),
+   |  `cs_ext_discount_amt` decimal(7,2),
+   |  `cs_ext_sales_price` decimal(7,2),
+   |  `cs_ext_wholesale_cost` decimal(7,2),
+   |  `cs_ext_list_price` decimal(7,2),
+   |  `cs_ext_tax` decimal(7,2),
+   |  `cs_coupon_amt` decimal(7,2),
+   |  `cs_ext_ship_cost` decimal(7,2),
+   |  `cs_net_paid` decimal(7,2),
+   |  `cs_net_paid_inc_tax` decimal(7,2),
+   |  `cs_net_paid_inc_ship` decimal(7,2),
+   |  `cs_net_paid_inc_ship_tax` decimal(7,2),
+   |  `cs_net_profit` decimal(7,2)
+   |)
+   |STORED BY 'org.apache.carbondata.format'
+  """.stripMargin.trim,
+s"""
+   |CREATE TABLE catalog_returns (
+   |  `cr_returned_date_sk` int,
+   |  `cr_returned_time_sk` int,
+   |  `cr_item_sk` int,
+   |  `cr_refunded_customer_sk` int,
+   |  `cr_refunded_cdemo_sk` int,
+   |  `cr_refunded_hdemo_sk` int,
+   |  `cr_refunded_addr_sk` int,
+   |  `cr_returning_customer_sk` int,
+   |  `cr_returning_cdemo_sk` int,
+   |  `cr_returning_hdemo_sk` int,
+   |  `cr_returning_addr_sk` int,
+   |  `cr_call_center_sk` int,
+   |  `cr_catalog_page_sk` int,
+   |  `cr_ship_mode_sk` int,
+   |  `cr_warehouse_sk` int,
+   |  `cr_reason_sk` int,
+   |  `cr_order_number` bigint,
+   |  `cr_return_quantity` int,
+   |  `cr_return_amount` decimal(7,2),
+   |  `cr_return_tax` decimal(7,2),
+   |  `cr_return_amt_inc_tax` decimal(7,2),
+   |  `cr_fee` decimal(7,2),
+   |  `cr_return_ship_cost` decimal(7,2),
+   |  `cr_refunded_cash` decimal(7,2),
+   |  `cr_reversed_charge` decimal(7,2),
+   |  `cr_store_credit` decimal(7,2),
+   |  `cr_net_loss` decimal(7,2)
+   |)
+   |STORED BY 'org.apache.carbondata.format'
+  """.stripMargin.trim,
+s"""
+   |CREATE TABLE inventory (
+   |  `inv_date_sk` int,
+   |  `inv_item_sk` int,
+   |  `inv_warehouse_sk` int,
+   |  `inv_quantity_on_hand` int
+   |)
+   |STORED BY 'org.apache.carbondata.format'
+  """.stripMargin.trim,
+s"""
+   |CREATE TABLE store_sales (
+   |  `ss_sold_date_sk` int,
+   |  `ss_sold_time_sk` int,
+   |  `ss_item_sk` int,
+   |  `ss_customer_sk` int,
+   |  `ss_cdemo_sk` int,
+   |  `ss_hdemo_sk` int,
+   |  `ss_addr_sk` int,
+   |  `ss_store_sk` int,
+   |  `ss_promo_sk` int,
+   |  `ss_ticket_number` bigint,
+   |  `ss_quantity` int,
+   |  `ss_wholesale_cost` decimal(7,2),
+   |  `ss_list_price` decimal(7,2),
+   |  `ss_sales_price` decimal(7,2),
+   |  `ss_ext_discount_amt` 

[33/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala
--
diff --git 
a/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala 
b/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala
new file mode 100644
index 000..074d369
--- /dev/null
+++ 
b/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.mv.rewrite
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, 
AttributeMap, Expression, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+
+import org.apache.carbondata.mv.plans.modular
+import org.apache.carbondata.mv.plans.modular.ModularPlan
+
+/**
+ * Utility functions used by mqo matcher to convert our plan to new 
aggregation code path
+ */
+private[rewrite] object Utils extends PredicateHelper {
+
+  // use for match qb_2a, qb_2q and sel_3a, sel_3q
+  private def doMatch(
+  operator_a: modular.Matchable,
+  operator_q: modular.Matchable,
+  alias_m: AttributeMap[Alias]): Option[modular.Matchable] = {
+var matchable = true
+val matched = operator_q.transformExpressions {
+  case cnt_q@AggregateExpression(Count(exprs_q), _, false, _) =>
+operator_a.outputList.find {
+  case alias: Alias if alias_m.contains(alias.toAttribute) &&
+   
alias_m(alias.toAttribute).child.isInstanceOf[AggregateExpression] &&
+   
alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression]
+ .aggregateFunction.isInstanceOf[Count] =>
+// case for groupby
+val cnt_a = 
alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression]
+val exprs_a = cnt_a.aggregateFunction.asInstanceOf[Count].children
+if (cnt_a.isDistinct != cnt_q.isDistinct || exprs_q.length != 
exprs_a.length) {
+  false
+} else {
+  exprs_a.sortBy(_.hashCode()).zip(exprs_q.sortBy(_.hashCode()))
+.forall(p => p._1.semanticEquals(p._2))
+}
+
+  case attr: Attribute if alias_m.contains(attr) &&
+  
alias_m(attr).child.isInstanceOf[AggregateExpression] &&
+  
alias_m(attr).child.asInstanceOf[AggregateExpression]
+.aggregateFunction.isInstanceOf[Count] =>
+val cnt_a = alias_m(attr).child.asInstanceOf[AggregateExpression]
+val exprs_a = cnt_a.aggregateFunction.asInstanceOf[Count].children
+if (cnt_a.isDistinct != cnt_q.isDistinct || exprs_q.length != 
exprs_a.length) {
+  false
+} else {
+  exprs_a.sortBy(_.hashCode()).zip(exprs_q.sortBy(_.hashCode()))
+.forall(p => p._1.semanticEquals(p._2))
+}
+
+  case _ => false
+}.map { cnt => AggregateExpression(
+Sum(cnt.toAttribute),
+cnt_q.mode,
+isDistinct = false,
+cnt_q.resultId)
+}.getOrElse { matchable = false; cnt_q }
+
+  case sum_q@AggregateExpression(Sum(expr_q), _, false, _) =>
+operator_a.outputList.find {
+  case alias: Alias if alias_m.contains(alias.toAttribute) &&
+   
alias_m(alias.toAttribute).child.isInstanceOf[AggregateExpression] &&
+   
alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression]
+ .aggregateFunction.isInstanceOf[Sum] =>
+val sum_a = 
alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression]
+val expr_a = sum_a.aggregateFunction.asInstanceOf[Sum].child
+if (sum_a.isDistinct != sum_q.isDistinct) {
+  false
+} else {
+  expr_a.semanticEquals(expr_q)
+}
+
+  case attr: Attribute if alias_m.contains(attr) &&
+  

[48/50] [abbrv] carbondata git commit: [CARBONDATA-2479] Multiple issue fixes in SDK writer and external table flow

2018-05-17 Thread manishgupta88
[CARBONDATA-2479] Multiple issue fixes in SDK writer and external table flow

[CARBONDATA-2479] Multiple issues:
fixed external table path display
fixed default value for array in AVRO
fixed NPE when delete folder before the second select query
fixed: avro float value precision change issue

This closes #2306


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cf1b50bc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cf1b50bc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cf1b50bc

Branch: refs/heads/spark-2.3
Commit: cf1b50bcc697be5353be469737a7dacdc57b1d7e
Parents: 1d302a8
Author: ajantha-bhat 
Authored: Mon May 14 15:28:23 2018 +0530
Committer: ravipesala 
Committed: Thu May 17 18:47:04 2018 +0530

--
 .../LatestFilesReadCommittedScope.java  |  2 +-
 .../TestNonTransactionalCarbonTable.scala   | 82 ++--
 .../table/CarbonDescribeFormattedCommand.scala  |  7 +-
 .../carbondata/sdk/file/AvroCarbonWriter.java   | 12 +--
 .../carbondata/sdk/file/CarbonReaderTest.java   |  2 +-
 5 files changed, 85 insertions(+), 20 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
 
b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
index 2306330..6106174 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
@@ -138,7 +138,7 @@ public class LatestFilesReadCommittedScope implements 
ReadCommittedScope {
   @Override public void takeCarbonIndexFileSnapShot() throws IOException {
 // Read the current file Path get the list of indexes from the path.
 CarbonFile file = FileFactory.getCarbonFile(carbonFilePath);
-if (file == null) {
+if (file.listFiles().length == 0) {
   // For nonTransactional table, files can be removed at any point of time.
   // So cannot assume files will be present
   throw new IOException("No files are present in the table location :" + 
carbonFilePath);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 5ab1c60..cc3cbb5 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -1050,7 +1050,7 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 | "type": "record",
 | "fields": [
 |  { "name": "name", "type": "string"},
-|  { "name": "age", "type": "int"},
+|  { "name": "age", "type": "float"},
 |  { "name": "address",  "type": {
 |"type" : "record",  "name" : "my_address",
 |"fields" : [
@@ -1059,11 +1059,11 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 |]}
   """.stripMargin
 
-val json = """ {"name":"bob", "age":10, "address" : {"street":"abc", 
"city":"bang"}} """
+val json = """ {"name":"bob", "age":10.24, "address" : {"street":"abc", 
"city":"bang"}} """
 
 val fields = new Array[Field](3)
 fields(0) = new Field("name", DataTypes.STRING)
-fields(1) = new Field("age", DataTypes.INT)
+fields(1) = new Field("age", DataTypes.DOUBLE)
 val fld = new util.ArrayList[StructField]
 fld.add(new StructField("street", DataTypes.STRING))
 fld.add(new StructField("city", DataTypes.STRING))
@@ -1340,11 +1340,10 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
   s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
  |'$writerPath' """.stripMargin)
 
-
 checkAnswer(sql("select * from sdkOutputTable"), Seq(
-  Row("bob", 10, Row("abc","bang")),
-  Row("bob", 10, Row("abc","bang")),
- 

[30/50] [abbrv] carbondata git commit: [CARBONDATA-2459][DataMap] Add cache for bloom filter datamap

2018-05-17 Thread manishgupta88
[CARBONDATA-2459][DataMap] Add cache for bloom filter datamap

Loading bloom filter from bloomindex file is slow. Adding cache for this 
procedure will surely improve the query performance

This closes #2300


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d14c403f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d14c403f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d14c403f

Branch: refs/heads/spark-2.3
Commit: d14c403f6282ca8b574dae2fa5ab77caa5cf3c18
Parents: ffddba7
Author: xuchuanyin 
Authored: Fri May 11 21:49:43 2018 +0800
Committer: Jacky Li 
Committed: Sun May 13 02:05:30 2018 +0800

--
 .../core/constants/CarbonCommonConstants.java   |  13 ++
 .../datamap/bloom/BloomCoarseGrainDataMap.java  | 108 +++--
 .../bloom/BloomCoarseGrainDataMapFactory.java   |   4 +
 .../datamap/bloom/BloomDataMapCache.java| 232 +++
 .../datamap/bloom/BloomDataMapWriter.java   |   5 +-
 5 files changed, 283 insertions(+), 79 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d14c403f/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 56607b9..f3a821b 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1729,6 +1729,19 @@ public final class CarbonCommonConstants {
   // Property to enable parallel datamap loading for a table
   public static final String CARBON_LOAD_DATAMAPS_PARALLEL = 
"carbon.load.datamaps.parallel.";
 
+  /**
+   * Cache size in MB for bloom filter datamap. It is an integer and should be 
greater than 0
+   * and it will be used during query.
+   */
+  @CarbonProperty
+  public static final String CARBON_QUERY_DATAMAP_BLOOM_CACHE_SIZE =
+  "carbon.query.datamap.bloom.cache.size";
+
+  /**
+   * default value in size for cache size of bloom filter datamap.
+   */
+  public static final String CARBON_QUERY_DATAMAP_BLOOM_CACHE_SIZE_DEFAULT_VAL 
= "512";
+
   private CarbonCommonConstants() {
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d14c403f/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
--
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index 725d5cd..09de25e 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -17,13 +17,10 @@
 
 package org.apache.carbondata.datamap.bloom;
 
-import java.io.DataInputStream;
-import java.io.EOFException;
+import java.io.File;
 import java.io.IOException;
-import java.io.ObjectInputStream;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -45,13 +42,8 @@ import 
org.apache.carbondata.core.scan.expression.conditional.EqualToExpression;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
 import org.apache.carbondata.core.util.CarbonUtil;
 
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.Multimap;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
 
 /**
  * BloomDataCoarseGrainMap is constructed in blocklet level. For each indexed 
column,
@@ -62,15 +54,16 @@ import org.apache.hadoop.fs.PathFilter;
 public class BloomCoarseGrainDataMap extends CoarseGrainDataMap {
   private static final LogService LOGGER =
   LogServiceFactory.getLogService(BloomCoarseGrainDataMap.class.getName());
+  public static final String BLOOM_INDEX_SUFFIX = ".bloomindex";
   private Set indexedColumn;
   private List bloomIndexList;
-  private Multimap indexCol2BloomDMList;
-  public static final String BLOOM_INDEX_SUFFIX = ".bloomindex";
   private String shardName;
+  private BloomDataMapCache bloomDataMapCache;
+  private Path indexPath;
 
   @Override
   public void init(DataMapModel dataMapModel) throws IOException {
-Path indexPath = 

[23/50] [abbrv] carbondata git commit: [CARBONDATA-2460] [CARBONDATA-2461] [CARBONDATA-2462] Fixed bug in AvroCarbonWriter

2018-05-17 Thread manishgupta88
[CARBONDATA-2460] [CARBONDATA-2461] [CARBONDATA-2462] Fixed bug in 
AvroCarbonWriter

Issue1: If Null type is passed from avro schema then Unsupported data
type exception is thrown.
Solution1: Ignore column which has NULL data type.

Issue2: Array fields were being cast to ArrayList without any instance
check.
Solution2: Check the instance of Array fields and cast appropriately.

This closes #2291


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3d8b085a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3d8b085a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3d8b085a

Branch: refs/heads/spark-2.3
Commit: 3d8b085a55f551122c7528b6981f1785a44fef3c
Parents: 61afa42
Author: kunal642 
Authored: Wed May 9 18:32:23 2018 +0530
Committer: kumarvishal09 
Committed: Fri May 11 13:38:53 2018 +0530

--
 .../TestNonTransactionalCarbonTable.scala   |  47 -
 .../carbondata/sdk/file/AvroCarbonWriter.java   | 103 ++-
 2 files changed, 122 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3d8b085a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 376501b..86fda21 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -32,8 +32,6 @@ import 
org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandExcepti
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile
 import org.apache.carbondata.core.datastore.impl.FileFactory
-import org.apache.carbondata.core.util.CarbonUtil
-import org.apache.carbondata.sdk.file.AvroCarbonWriter
 import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -43,7 +41,7 @@ import org.apache.commons.lang.CharEncoding
 import tech.allegro.schema.json2avro.converter.JsonAvroConverter
 
 import org.apache.carbondata.core.metadata.datatype.{DataTypes, StructField}
-import org.apache.carbondata.sdk.file.{CarbonWriter, CarbonWriterBuilder, 
Field, Schema}
+import org.apache.carbondata.sdk.file.{AvroCarbonWriter, CarbonWriter, 
CarbonWriterBuilder, Field, Schema}
 
 
 class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll 
{
@@ -51,7 +49,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with 
BeforeAndAfterAll {
   var writerPath = new File(this.getClass.getResource("/").getPath
 +
 "../." +
-
"./src/test/resources/SparkCarbonFileFormat/WriterOutput/")
+"./target/SparkCarbonFileFormat/WriterOutput/")
 .getCanonicalPath
   //getCanonicalPath gives path with \, so code expects /. Need to handle in 
code ?
   writerPath = writerPath.replace("\\", "/")
@@ -1795,6 +1793,47 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 }.getMessage.toLowerCase.contains("column: name specified in sort 
columns"))
   }
 
+  test("test if load is passing with NULL type") {
+val schema1 =
+  """{
+|  "namespace": "com.apache.schema",
+|  "type": "record",
+|  "name": "StudentActivity",
+|  "fields": [
+|  {
+|  "name": "id",
+|  "type": "null"
+|  },
+|  {
+|  "name": "course_details",
+|  "type": {
+|  "name": "course_details",
+|  "type": "record",
+|  "fields": [
+|  {
+|  "name": 
"course_struct_course_time",
+|  "type": "string"
+|  }
+|  ]
+|  }
+|  

[36/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/supplier.csv
--
diff --git a/integration/spark-common-test/src/test/resources/tpch/supplier.csv 
b/integration/spark-common-test/src/test/resources/tpch/supplier.csv
new file mode 100644
index 000..0f20c29
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/tpch/supplier.csv
@@ -0,0 +1,1000 @@
+1|Supplier#1| N kD4on9OM 
Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful|
+2|Supplier#2|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly 
bold instructions. idle dependen|
+3|Supplier#3|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely
 silent requests after the express dependencies are sl|
+4|Supplier#4|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously
 even requests above the exp|
+5|Supplier#5|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly 
regular pinto bea|
+6|Supplier#6|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. 
regular dolphins use against the furiously ironic decoys. |
+7|Supplier#7|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind 
silently furiously regular courts. final requests are deposits. requests wake 
quietly blit|
+8|Supplier#8|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al
 pinto beans. asymptotes haggl|
+9|Supplier#9|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. 
unusual, even requests along the furiously regular pac|
+10|Supplier#00010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing 
waters. regular requests ar|
+11|Supplier#00011|JfwTs,LZrV, M,9C|18|28-613-996-1505|3393.08|y ironic 
packages. slyly ironic accounts affix furiously; ironically unusual excuses 
across the flu|
+12|Supplier#00012|aLIW  q0HYd|8|18-179-925-7181|1432.69|al packages nag 
alongside of the bold instructions. express, daring accounts|
+13|Supplier#00013|HK71HQyWoqRWOX8GI 
FpgAifW,2PoH|3|13-727-620-7813|9107.22|requests engage regularly instructions. 
furiously special requests ar|
+14|Supplier#00014|EXsnO5pTNj4iZRm|15|25-656-247-5058|9189.82|l accounts 
boost. fluffily bold warhorses wake|
+15|Supplier#00015|olXVbNBfVzRqgokr1T,Ie|8|18-453-357-6394|308.56| across 
the furiously regular platelets wake even deposits. quickly express she|
+16|Supplier#00016|YjP5C55zHDXL7LalK27zfQnwejdpin4AMpvh|22|32-822-502-4215|2972.26|ously
 express ideas haggle quickly dugouts? fu|
+17|Supplier#00017|c2d,ESHRSkK3WYnxpgw6aOqN0q|19|29-601-884-9219|1687.81|eep
 against the furiously bold ideas. fluffily bold packa|
+18|Supplier#00018|PGGVE5PWAMwKDZw |16|26-729-551-1115|7040.82|accounts 
snooze slyly furiously bold |
+19|Supplier#00019|edZT3es,nBFD8lBXTGeTl|24|34-278-310-2731|6150.38|refully 
final foxes across the dogged theodolites sleep slyly abou|
+20|Supplier#00020|iybAE,RmTymrZVYaFZva2SH,j|3|13-715-945-6730|530.82|n, 
ironic ideas would nag blithely about the slyly regular accounts. silent, expr|
+21|Supplier#00021|81CavellcrJ0PQ3CPBID0Z0JwyJm0ka5igEs|2|12-253-590-5816|9365.80|d.
 instructions integrate sometimes slyly pending instructions. accounts nag 
among the |
+22|Supplier#00022|okiiQFk 8lm6EVX6Q0,bEcO|4|14-144-830-2814|-966.20| 
ironically among the deposits. closely expre|
+23|Supplier#00023|ssetugTcXc096qlD7 
2TL5crEEeS3zk|9|19-559-422-5776|5926.41|ges could have to are ironic deposits. 
regular, even request|
+24|Supplier#00024|C4nPvLrVmKPPabFCj|0|10-620-939-2254|9170.71|usly pending 
deposits. slyly final accounts run |
+25|Supplier#00025|RCQKONXMFnrodzz6w7fObFVV6CUm2q|22|32-431-945-3541|9198.31|ely
 regular deposits. carefully regular sauternes engage furiously above the 
regular accounts. idly |
+26|Supplier#00026|iV,MHzAx6Z939uzFNkq09M0a1 
MBfH7|21|31-758-894-4436|21.18| ideas poach carefully after the blithely bold 
asymptotes. furiously pending theodoli|
+27|Supplier#00027|lC4CjKwNHUr6L4xIpzOBK4NlHkFTg|18|28-708-999-2028|1887.62|s
 according to the quickly regular hockey playe|
+28|Supplier#00028|GBhvoRh,7YIN V|0|10-538-384-8460|-891.99|ld requests 
across the pinto beans are carefully against the quickly final courts. accounts 
sleep |
+29|Supplier#00029|658tEqXLPvRd6xpFdqC2|1|11-555-705-5922|-811.62|y express 
ideas play furiously. even accounts sleep fluffily across the accounts. careful|
+30|Supplier#00030|84NmC1rmQfO0fj3zkobLT|16|26-940-594-4852|8080.14|ias. 
carefully silent accounts cajole blithely. pending, special accounts cajole 
quickly above the f|
+31|Supplier#00031|fRJimA7zchyApqRLHcQeocVpP|16|26-515-530-4159|5916.91|into
 beans wake after the special packages. slyly fluffy requests cajole furio|
+32|Supplier#00032|yvoD3TtZSx1skQNCK8agk5bZlZLug|23|33-484-637-7873|3556.47|usly
 even depths. quickly ironic theodolites s|

[42/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
[CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query 
matching and rewriting

Integrate MV DataMap to Carbon

This closes #2302


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2881c6bb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2881c6bb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2881c6bb

Branch: refs/heads/spark-2.3
Commit: 2881c6bbc17f34c0e17b6483130e70311e41c653
Parents: bf73e9f
Author: ravipesala 
Authored: Sat May 12 10:41:01 2018 +0530
Committer: Jacky Li 
Committed: Sun May 13 17:08:56 2018 +0800

--
 .../datamap/status/DataMapStatusManager.java|   16 +
 .../mv/rewrite/MVCreateTestCase.scala   |   16 +
 .../mv/rewrite/MVSampleTestCase.scala   |   16 +
 .../carbondata/mv/rewrite/MVTPCDSTestCase.scala |   16 +
 .../carbondata/mv/rewrite/MVTpchTestCase.scala  |   16 +
 .../carbondata/mv/rewrite/Tpcds_1_4_Suite.scala |   80 --
 .../mv/plans/LogicalToModularPlanSuite.scala|8 +-
 .../carbondata/mv/plans/ModularToSQLSuite.scala |5 +-
 .../src/test/resources/data_big.csv |   91 ++
 .../src/test/resources/tpch/customers.csv   |  500 +
 .../src/test/resources/tpch/lineitem.csv| 1000 ++
 .../src/test/resources/tpch/nation.csv  |   25 +
 .../src/test/resources/tpch/orders.csv  | 1000 ++
 .../src/test/resources/tpch/region.csv  |5 +
 .../src/test/resources/tpch/supplier.csv| 1000 ++
 .../apache/spark/sql/hive/CarbonAnalyzer.scala  |   19 +-
 pom.xml |1 +
 17 files changed, 3727 insertions(+), 87 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java
 
b/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java
index b540146..d0ff589 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java
@@ -53,6 +53,22 @@ public class DataMapStatusManager {
 return storageProvider.getDataMapStatusDetails();
   }
 
+  /**
+   * Get enabled datamap status details
+   * @return
+   * @throws IOException
+   */
+  public static DataMapStatusDetail[] getEnabledDataMapStatusDetails() throws 
IOException {
+DataMapStatusDetail[] dataMapStatusDetails = 
storageProvider.getDataMapStatusDetails();
+List statusDetailList = new ArrayList<>();
+for (DataMapStatusDetail statusDetail : dataMapStatusDetails) {
+  if (statusDetail.getStatus() == DataMapStatus.ENABLED) {
+statusDetailList.add(statusDetail);
+  }
+}
+return statusDetailList.toArray(new 
DataMapStatusDetail[statusDetailList.size()]);
+  }
+
   public static Map readDataMapStatusMap() throws 
IOException {
 DataMapStatusDetail[] details = storageProvider.getDataMapStatusDetails();
 Map map = new HashMap<>(details.length);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala
--
diff --git 
a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala
 
b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala
index 184fdc1..4b636db 100644
--- 
a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala
+++ 
b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * 

[15/50] [abbrv] carbondata git commit: [CARBONDATA-2401] Date and Timestamp options are not working in SDK

2018-05-17 Thread manishgupta88
[CARBONDATA-2401] Date and Timestamp options are not working in SDK

Issue:- Date and Timestamp format is passed in options of SDK but data
load is failed even correct data is set as per format .
Cause:- Load model is getting overwritten with default .
Fix :- if user has passed the options then Load model should use from
Options otherwise from Default.

This closes #2227


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b2060c61
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b2060c61
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b2060c61

Branch: refs/heads/spark-2.3
Commit: b2060c61104895d5599b1c044b725d56a39748f8
Parents: ceb7c8d
Author: BJangir 
Authored: Wed Apr 25 18:27:58 2018 +0530
Committer: kumarvishal09 
Committed: Thu May 10 14:00:59 2018 +0530

--
 .../TestNonTransactionalCarbonTable.scala   | 39 
 .../loading/model/CarbonLoadModelBuilder.java   |  4 --
 2 files changed, 39 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/b2060c61/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
--
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 2f88c40..ca6ac3c 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -17,6 +17,8 @@
 
 package org.apache.carbondata.spark.testsuite.createTable
 
+import java.sql.Timestamp
+import java.io.{File, FileFilter, IOException}
 import java.io.{File, FileFilter}
 import java.util
 
@@ -31,6 +33,7 @@ import 
org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile
 import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.util.CarbonUtil
+import org.apache.carbondata.sdk.file.{CarbonWriter, CarbonWriterBuilder, 
Field, Schema}
 import org.apache.carbondata.sdk.file.{AvroCarbonWriter, CarbonWriter, Field, 
Schema}
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -39,6 +42,10 @@ import org.apache.avro
 import org.apache.commons.lang.CharEncoding
 import tech.allegro.schema.json2avro.converter.JsonAvroConverter
 
+import org.apache.carbondata.core.metadata.datatype.{DataTypes, StructField}
+import org.apache.carbondata.sdk.file.{CarbonWriter, CarbonWriterBuilder, 
Field, Schema}
+
+
 class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll 
{
 
   var writerPath = new File(this.getClass.getResource("/").getPath
@@ -669,6 +676,38 @@ class TestNonTransactionalCarbonTable extends QueryTest 
with BeforeAndAfterAll {
 cleanTestData()
   }
 
+  test("test custom  format for date and timestamp in sdk") {
+
+cleanTestData()
+var options = Map("dateformat" -> "dd-MM-" ,"timestampformat" -> 
"dd-MM- HH:mm:ss").asJava
+
+val fields: Array[Field] = new Array[Field](4)
+fields(0) = new Field("stringField", DataTypes.STRING)
+fields(1) = new Field("intField", DataTypes.INT)
+fields(2) = new Field("mydate", DataTypes.DATE)
+fields(3) = new Field("mytime", DataTypes.TIMESTAMP)
+
+val builder: CarbonWriterBuilder = CarbonWriter.builder.withSchema(new 
Schema(fields))
+  
.outputPath(writerPath).isTransactionalTable(false).withLoadOptions(options)
+
+val writer: CarbonWriter = builder.buildWriterForCSVInput
+writer.write(Array("babu","1","02-01-2002","02-01-2002 01:01:00"));
+writer.close()
+
+assert(new File(writerPath).exists())
+
+sql("DROP TABLE IF EXISTS sdkOutputTable")
+sql(
+  s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+ |'$writerPath' """.stripMargin)
+
+checkAnswer(sql("select * from sdkOutputTable"), Seq(
+  Row("babu", 1, 
java.sql.Date.valueOf("2002-01-02"),Timestamp.valueOf("2002-01-02 
01:01:00.0"
+sql("DROP TABLE sdkOutputTable")
+cleanTestData()
+
+  }
+
   test("test huge data write with one batch having bad record") {
 
 val exception =

http://git-wip-us.apache.org/repos/asf/carbondata/blob/b2060c61/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java

[24/50] [abbrv] carbondata git commit: [CARBONDATA-2471]Added support for No Dictionary Complex type for Double, Decimal, Date type in SDK

2018-05-17 Thread manishgupta88
[CARBONDATA-2471]Added support for No Dictionary Complex type for Double, 
Decimal, Date type in SDK

Added support for No Dictionary Complex type for Double, Decimal, Date type in 
SDK

This is closes #2297


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d85fb72e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d85fb72e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d85fb72e

Branch: refs/heads/spark-2.3
Commit: d85fb72e2f24107769c7b5ce7d454d52cbaee49d
Parents: 3d8b085
Author: kumarvishal09 
Authored: Thu May 10 22:52:09 2018 +0530
Committer: ravipesala 
Committed: Fri May 11 15:38:27 2018 +0530

--
 .../scan/complextypes/PrimitiveQueryType.java   |  18 +-
 .../apache/carbondata/core/util/ByteUtil.java   |   8 +
 .../carbondata/core/util/DataTypeUtil.java  |  18 ++
 ...ransactionalCarbonTableWithComplexType.scala | 232 +++
 .../command/carbonTableSchemaCommon.scala   |   9 +-
 .../processing/datatypes/PrimitiveDataType.java |  29 ++-
 6 files changed, 297 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d85fb72e/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
index 2db590b..edae4da 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java
@@ -22,13 +22,16 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import org.apache.carbondata.core.cache.dictionary.Dictionary;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
 import 
org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
 import 
org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
 import org.apache.carbondata.core.keygenerator.mdkey.Bits;
 import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.scan.filter.GenericQueryType;
 import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks;
+import org.apache.carbondata.core.util.ByteUtil;
 import org.apache.carbondata.core.util.DataTypeUtil;
 
 public class PrimitiveQueryType extends ComplexQueryType implements 
GenericQueryType {
@@ -46,6 +49,8 @@ public class PrimitiveQueryType extends ComplexQueryType 
implements GenericQuery
 
   private boolean isDictionary;
 
+  private DirectDictionaryGenerator directDictGenForDate;
+
   public PrimitiveQueryType(String name, String parentname, int blockIndex,
   DataType dataType, int keySize,
   Dictionary dictionary, boolean isDirectDictionary) {
@@ -57,6 +62,8 @@ public class PrimitiveQueryType extends ComplexQueryType 
implements GenericQuery
 this.parentname = parentname;
 this.isDirectDictionary = isDirectDictionary;
 this.isDictionary = (dictionary != null && isDirectDictionary == false);
+this.directDictGenForDate =
+
DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(DataTypes.DATE);
   }
 
   @Override public void addChildren(GenericQueryType children) {
@@ -116,7 +123,16 @@ public class PrimitiveQueryType extends ComplexQueryType 
implements GenericQuery
   int size = dataBuffer.getInt();
   byte[] value = new byte[size];
   dataBuffer.get(value, 0, size);
-  actualData = 
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(value, this.dataType);
+  if (dataType == DataTypes.DATE) {
+if (value.length == 0) {
+  actualData = null;
+} else {
+  actualData = this.directDictGenForDate.getValueFromSurrogate(
+  ByteUtil.toInt(value, 0, 
CarbonCommonConstants.INT_SIZE_IN_BYTE));
+}
+  } else {
+actualData = 
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(value, this.dataType);
+  }
 } else {
   // Dictionary Column
   byte[] data = new byte[keySize];

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d85fb72e/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
index 52fc3c3..661384c 

[49/50] [abbrv] carbondata git commit: [CARBONDATA-2477]Fixed No dictionary Complex type with double/date/decimal data type

2018-05-17 Thread manishgupta88
[CARBONDATA-2477]Fixed No dictionary Complex type with double/date/decimal data 
type

Problem: SDK create table with No Dictionary complex type is failing when 
complex type child contain double/date/decimal data type
Solution: In complex type validation , it is not allowing double/date/decimal 
data , need to remove the same
Changed no dictionary complex type storage format, instead of storing length in 
int , now storing in short to reduce storage space

This closes #2304


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6297ea0b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6297ea0b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6297ea0b

Branch: refs/heads/spark-2.3
Commit: 6297ea0b4092539fa0aa2c6f772d6984850c6110
Parents: cf1b50b
Author: kumarvishal09 
Authored: Mon May 14 14:17:38 2018 +0530
Committer: ravipesala 
Committed: Thu May 17 19:05:30 2018 +0530

--
 .../carbondata/core/datastore/ColumnType.java   | 14 ++-
 .../core/datastore/page/ColumnPage.java | 82 --
 .../core/datastore/page/ComplexColumnPage.java  | 16 +++-
 .../core/datastore/page/LazyColumnPage.java | 13 ++-
 .../datastore/page/SafeFixLengthColumnPage.java | 25 +-
 .../datastore/page/SafeVarLengthColumnPage.java | 21 +
 .../page/UnsafeFixLengthColumnPage.java | 39 -
 .../datastore/page/VarLengthColumnPageBase.java | 90 ++--
 .../page/encoding/ColumnPageEncoder.java|  9 +-
 .../scan/complextypes/PrimitiveQueryType.java   |  4 +-
 .../core/scan/complextypes/StructQueryType.java |  8 +-
 .../apache/carbondata/core/util/ByteUtil.java   |  9 ++
 ...ransactionalCarbonTableWithComplexType.scala | 76 -
 .../processing/datatypes/ArrayDataType.java |  7 ++
 .../processing/datatypes/GenericDataType.java   |  4 +
 .../processing/datatypes/PrimitiveDataType.java | 17 ++--
 .../processing/datatypes/StructDataType.java| 30 +++
 .../carbondata/processing/store/TablePage.java  |  6 +-
 .../sdk/file/CarbonWriterBuilder.java   |  9 --
 19 files changed, 407 insertions(+), 72 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6297ea0b/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java 
b/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java
index f98307b..8bbf12d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java
@@ -31,7 +31,13 @@ public enum ColumnType {
   COMPLEX,
 
   // measure column, numerical data type
-  MEASURE;
+  MEASURE,
+
+  COMPLEX_STRUCT,
+
+  COMPLEX_ARRAY,
+
+  COMPLEX_PRIMITIVE;
 
   public static ColumnType valueOf(int ordinal) {
 if (ordinal == GLOBAL_DICTIONARY.ordinal()) {
@@ -44,6 +50,12 @@ public enum ColumnType {
   return COMPLEX;
 } else if (ordinal == MEASURE.ordinal()) {
   return MEASURE;
+} else if (ordinal == COMPLEX_STRUCT.ordinal()) {
+  return COMPLEX_STRUCT;
+} else if (ordinal == COMPLEX_ARRAY.ordinal()) {
+  return COMPLEX_ARRAY;
+} else if (ordinal == COMPLEX_PRIMITIVE.ordinal()) {
+  return COMPLEX_PRIMITIVE;
 } else {
   throw new RuntimeException("create ColumnType with invalid ordinal: " + 
ordinal);
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6297ea0b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
index 68269fb..69ed437 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
@@ -22,6 +22,7 @@ import java.math.BigDecimal;
 import java.util.BitSet;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.ColumnType;
 import org.apache.carbondata.core.datastore.TableSpec;
 import org.apache.carbondata.core.datastore.compression.Compressor;
 import org.apache.carbondata.core.datastore.compression.CompressorFactory;
@@ -153,6 +154,19 @@ public abstract class ColumnPage {
 }
   }
 
+  private static ColumnPage createFixLengthByteArrayPage(TableSpec.ColumnSpec 
columnSpec,
+  DataType dataType, int pageSize, int eachValueSize) {
+if (unsafe) {
+  try {
+return 

[32/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala
--
diff --git 
a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala
 
b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala
new file mode 100644
index 000..89813b5
--- /dev/null
+++ 
b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala
@@ -0,0 +1,247 @@
+package org.apache.carbondata.mv.rewrite
+
+import java.io.File
+
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+class MVTpchTestCase extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll {
+drop()
+val projectPath = new File(this.getClass.getResource("/").getPath + 
"../../../../../")
+  .getCanonicalPath.replaceAll("", "/")
+val integrationPath = s"$projectPath/integration"
+val resourcesPath = 
s"$integrationPath/spark-common-test/src/test/resources"
+
+sql(s"""create table if not exists LINEITEM(  L_SHIPDATE date,  L_SHIPMODE 
string,  L_SHIPINSTRUCT string,  L_RETURNFLAG string,  L_RECEIPTDATE date,  
L_ORDERKEY INT ,  L_PARTKEY INT ,  L_SUPPKEY   string,  L_LINENUMBER int,  
L_QUANTITY double,  L_EXTENDEDPRICE double,  L_DISCOUNT double,  L_TAX double,  
L_LINESTATUS string,  L_COMMITDATE date,  L_COMMENT  string) STORED BY 
'org.apache.carbondata.format'""")
+sql(s"""create table if not exists ORDERS(  O_ORDERDATE date,  
O_ORDERPRIORITY string,  O_ORDERSTATUS string,  O_ORDERKEY int,  O_CUSTKEY 
string,  O_TOTALPRICE double,  O_CLERK string,  O_SHIPPRIORITY int,  O_COMMENT 
string) STORED BY 'org.apache.carbondata.format'""")
+sql(s"""create table if not exists CUSTOMER(  C_MKTSEGMENT string,  
C_NATIONKEY string,  C_CUSTKEY string,  C_NAME string,  C_ADDRESS string,  
C_PHONE string,  C_ACCTBAL double,  C_COMMENT string) STORED BY 
'org.apache.carbondata.format'""")
+sql(s"""create table if not exists REGION(  R_NAME string,  R_REGIONKEY 
string,  R_COMMENT string) STORED BY 'org.apache.carbondata.format'""")
+sql(s"""create table if not exists NATION (  N_NAME string,  N_NATIONKEY 
string,  N_REGIONKEY string,  N_COMMENT  string) STORED BY 
'org.apache.carbondata.format'""")
+sql(s"""create table if not exists SUPPLIER(S_COMMENT string,S_SUPPKEY 
string,S_NAME string, S_ADDRESS string, S_NATIONKEY string, S_PHONE string, 
S_ACCTBAL double) STORED BY 'org.apache.carbondata.format'""")
+
+sql(s"""load data inpath "$resourcesPath/tpch/lineitem.csv" into table 
lineitem 
options('DELIMITER'='|','FILEHEADER'='L_ORDERKEY,L_PARTKEY,L_SUPPKEY,L_LINENUMBER,L_QUANTITY,L_EXTENDEDPRICE,L_DISCOUNT,L_TAX,L_RETURNFLAG,L_LINESTATUS,L_SHIPDATE,L_COMMITDATE,L_RECEIPTDATE,L_SHIPINSTRUCT,L_SHIPMODE,L_COMMENT')""")
+sql(s"""load data inpath "$resourcesPath/tpch/orders.csv" into table 
ORDERS 
options('DELIMITER'='|','FILEHEADER'='O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_TOTALPRICE,O_ORDERDATE,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT')""")
+sql(s"""load data inpath "$resourcesPath/tpch/customers.csv" into  table 
CUSTOMER 
options('DELIMITER'='|','FILEHEADER'='C_CUSTKEY,C_NAME,C_ADDRESS,C_NATIONKEY,C_PHONE,C_ACCTBAL,C_MKTSEGMENT,C_COMMENT')""")
+sql(s"""load data inpath "$resourcesPath/tpch/region.csv" into table 
REGION options('DELIMITER'='|','FILEHEADER'='R_REGIONKEY,R_NAME,R_COMMENT')""")
+sql(s"""load data inpath "$resourcesPath/tpch/nation.csv" into table 
NATION 
options('DELIMITER'='|','FILEHEADER'='N_NATIONKEY,N_NAME,N_REGIONKEY,N_COMMENT')""")
+sql(s"""load data inpath "$resourcesPath/tpch/supplier.csv" into table 
SUPPLIER 
options('DELIMITER'='|','FILEHEADER'='S_SUPPKEY,S_NAME,S_ADDRESS,S_NATIONKEY,S_PHONE,S_ACCTBAL,S_COMMENT')""")
+
+
+sql(s"""create table if not exists LINEITEM1(  L_SHIPDATE date,  
L_SHIPMODE string,  L_SHIPINSTRUCT string,  L_RETURNFLAG string,  L_RECEIPTDATE 
date,  L_ORDERKEY INT ,  L_PARTKEY INT ,  L_SUPPKEY   string,  L_LINENUMBER 
int,  L_QUANTITY double,  L_EXTENDEDPRICE double,  L_DISCOUNT double,  L_TAX 
double,  L_LINESTATUS string,  L_COMMITDATE date,  L_COMMENT  string) STORED BY 
'org.apache.carbondata.format'""")
+sql(s"""create table if not exists ORDERS1(  O_ORDERDATE date,  
O_ORDERPRIORITY string,  O_ORDERSTATUS string,  O_ORDERKEY int,  O_CUSTKEY 
string,  O_TOTALPRICE double,  O_CLERK string,  O_SHIPPRIORITY int,  O_COMMENT 
string) STORED BY 'org.apache.carbondata.format'""")
+sql(s"""create table if not exists CUSTOMER1( 

[41/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/customers.csv
--
diff --git 
a/integration/spark-common-test/src/test/resources/tpch/customers.csv 
b/integration/spark-common-test/src/test/resources/tpch/customers.csv
new file mode 100644
index 000..7e46e5f
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/tpch/customers.csv
@@ -0,0 +1,500 @@
+1|Customer#1|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to 
the even, regular platelets. regular, ironic epitaphs nag e|
+2|Customer#2|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l
 accounts. blithely ironic theodolites integrate boldly: caref|
+3|Customer#3|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| 
deposits eat slyly ironic, even instructions. express foxes detect slyly. 
blithely even accounts abov|
+4|Customer#4|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| 
requests. final, regular ideas sleep final accou|
+5|Customer#5|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n
 accounts will have to unwind. foxes cajole accor|
+6|Customer#6|sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh 
yVn|20|30-114-968-4951|7638.57|AUTOMOBILE|tions. even deposits boost according 
to the slyly bold packages. final accounts cajole requests. furious|
+7|Customer#7|TcGe5gaZNgVePxU5kRrvXBfkasDTea|18|28-190-982-9759|9561.95|AUTOMOBILE|ainst
 the ironic, express theodolites. express, even pinto beans among the exp|
+8|Customer#8|I0B10bB0AymmC, 
0PrRYBCP1yGJ8xcBPmWhl5|17|27-147-574-9335|6819.74|BUILDING|among the slyly 
regular theodolites kindle blithely courts. carefully even theodolites haggle 
slyly along the ide|
+9|Customer#9|xKiAFTjUsCuxfeleNqefumTrjS|8|18-338-906-3675|8324.07|FURNITURE|r
 theodolites according to the requests wake thinly excuses: pending requests 
haggle furiousl|
+10|Customer#00010|6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 
v2|5|15-741-346-9870|2753.54|HOUSEHOLD|es regular deposits haggle. fur|
+11|Customer#00011|PkWS 
3HlXqwTuzrKg633BEi|23|33-464-151-3439|-272.60|BUILDING|ckages. requests sleep 
slyly. quickly even pinto beans promise above the slyly regular pinto beans. |
+12|Customer#00012|9PWKuhzT4Zr1Q|13|23-791-276-1263|3396.49|HOUSEHOLD| to 
the carefully final braids. blithely regular requests nag. ironic theodolites 
boost quickly along|
+13|Customer#00013|nsXQu0oVjD7PM659uC3SRSp|3|13-761-547-5974|3857.34|BUILDING|ounts
 sleep carefully after the close frays. carefully bold notornis use ironic 
requests. blithely|
+14|Customer#00014|KXkletMlL2JQEA |1|11-845-129-3851|5266.30|FURNITURE|, 
ironic packages across the unus|
+15|Customer#00015|YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn|23|33-687-542-7601|2788.52|HOUSEHOLD|
 platelets. regular deposits detect asymptotes. blithely unusual packages nag 
slyly at the fluf|
+16|Customer#00016|cYiaeMLZSMAOQ2 
d0W,|10|20-781-609-3107|4681.03|FURNITURE|kly silent courts. thinly regular 
theodolites sleep fluffily after |
+17|Customer#00017|izrh 
6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7|2|12-970-682-3487|6.34|AUTOMOBILE|packages 
wake! blithely even pint|
+18|Customer#00018|3txGO 
AiuFux3zT0Z9NYaFRnZt|6|16-155-215-1315|5494.43|BUILDING|s sleep. carefully even 
instructions nag furiously alongside of t|
+19|Customer#00019|uc,3bHIx84H,wdrmLOjVsiqXCq2tr|18|28-396-526-5053|8914.71|HOUSEHOLD|
 nag. furiously careful packages are slyly at the accounts. furiously regular 
in|
+20|Customer#00020|JrPk8Pqplj4Ne|22|32-957-234-8742|7603.40|FURNITURE|g 
alongside of the special excuses-- fluffily enticing packages wake |
+21|Customer#00021|XYmVpr9yAHDEn|8|18-902-614-8344|1428.25|MACHINERY| 
quickly final accounts integrate blithely furiously u|
+22|Customer#00022|QI6p41,FNs5k7RZoCCVPUTkUdYpB|3|13-806-545-9701|591.98|MACHINERY|s
 nod furiously above the furiously ironic ideas. |
+23|Customer#00023|OdY 
W13N7Be3OC5MpgfmcYss0Wn6TKT|3|13-312-472-8245|3332.02|HOUSEHOLD|deposits. 
special deposits cajole slyly. fluffily special deposits about the furiously |
+24|Customer#00024|HXAFgIAyjxtdqwimt13Y3OZO 
4xeLe7U8PqG|13|23-127-851-8031|9255.67|MACHINERY|into beans. fluffily final 
ideas haggle fluffily|
+25|Customer#00025|Hp8GyFQgGHFYSilH5tBfe|12|22-603-468-3533|7133.70|FURNITURE|y.
 accounts sleep ruthlessly according to the regular theodolites. unusual 
instructions sleep. ironic, final|
+26|Customer#00026|8ljrc5ZeMl7UciP|22|32-363-455-4837|5182.05|AUTOMOBILE|c 
requests use furiously ironic requests. slyly ironic dependencies us|
+27|Customer#00027|IS8GIyxpBrLpMT0u7|3|13-137-193-2709|5679.84|BUILDING| 
about the carefully ironic pinto beans. accoun|
+28|Customer#00028|iVyg0daQ,Tha8x2WPWA9m2529m|8|18-774-241-1462|1007.18|FURNITURE|
 along the regular deposits. furiously final pac|

[31/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala
--
diff --git 
a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala
 
b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala
new file mode 100644
index 000..074bf00
--- /dev/null
+++ 
b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala
@@ -0,0 +1,2496 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.mv.rewrite.matching
+
+object TestTPCDS_1_4_Batch {
+  val tpcds_1_4_testCases = Seq(
+  // sequence of triples.  each triple denotes (MV, user query, rewritten 
query)
+  // test case 1: test SELECT-SELECT-EXACT_MATCH with simple SELECT 
(extract from q45)
+  ("case_1",
+   """
+|SELECT i_item_id, i_item_sk
+|FROM item
+|WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+""".stripMargin.trim,
+   """
+|SELECT i_item_id
+|FROM item
+|WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19)
+""".stripMargin.trim,
+   """
+|SELECT
+|FROM 
+|WHERE 
+""".stripMargin.trim),
+  // test case 2: test SELECT-SELECT-EXACT_MATCH with SELECT containing 
join (derive from q64)
+  ("case_2",
+   """
+|SELECT cs1.product_name, cs1.store_name, cs1.store_zip, 
cs1.b_street_number,
+|   cs1.b_streen_name, cs1.b_city, cs1.b_zip, cs1.c_street_number, 
cs1.c_street_name,
+|   cs1.c_city, cs1.c_zip, cs1.syear, cs1.cnt, cs1.s1, cs1.s2, 
cs1.s3, cs2.s1,
+|   cs2.s2, cs2.s3, cs2.syear, cs2.cnt
+|FROM cross_sales cs1,cross_sales cs2
+|WHERE cs1.item_sk=cs2.item_sk AND
+| cs1.syear = 1999 AND
+| cs2.syear = 1999 + 1 AND
+| cs2.cnt <= cs1.cnt AND
+| cs1.store_name = cs2.store_name AND
+| cs1.store_zip = cs2.store_zip
+""".stripMargin.trim,
+   """
+|SELECT cs1.product_name, cs1.store_name, cs1.store_zip, 
cs1.b_street_number,
+|   cs1.b_streen_name, cs1.b_city, cs1.b_zip, cs1.c_street_number, 
cs1.c_street_name,
+|   cs1.c_city, cs1.c_zip, cs1.syear, cs1.cnt, cs1.s1, cs1.s2, 
cs1.s3, cs2.s1,
+|   cs2.s2, cs2.s3
+|FROM cross_sales cs1,cross_sales cs2
+|WHERE cs1.item_sk=cs2.item_sk AND
+| cs1.syear = 1999 AND
+| cs2.syear = 1999 + 1 AND
+| cs2.cnt <= cs1.cnt AND
+| cs1.store_name = cs2.store_name AND
+| cs1.store_zip = cs2.store_zip
+|ORDER BY cs1.product_name, cs1.store_name, cs2.cnt
+""".stripMargin.trim,
+   """
+|SELECT
+|FROM
+|WHERE
+""".stripMargin.trim),
+  // test case 3: test simple SELECT with GROUPBY (from q99)
+  ("case_3",
+   """
+|SELECT count(ss_sold_date_sk) as not_null_total,
+|   max(ss_sold_date_sk) as max_ss_sold_date_sk,
+|   max(ss_sold_time_sk) as max_ss_sold_time_sk,
+|   ss_item_sk,
+|   ss_store_sk
+|FROM store_sales
+|GROUP BY ss_item_sk, ss_store_sk
+""".stripMargin.trim,
+   """
+|SELECT count(ss_sold_date_sk) as not_null_total,
+|   max(ss_sold_date_sk) as max_ss_sold_date_sk,
+|   ss_item_sk,
+|   ss_store_sk
+|FROM store_sales
+|GROUP BY ss_item_sk, ss_store_sk
+""".stripMargin.trim,
+   """
+|SELECT gen_subsumer_0.`not_null_total`,
+|   gen_subsumer_0.`max_ss_sold_date_sk`,
+|   gen_subsumer_0.`ss_item_sk`,
+|   gen_subsumer_0.`ss_store_sk`
+|FROM
+|  (SELECT count(`ss_sold_date_sk`) AS `not_null_total`, 
max(`ss_sold_date_sk`) AS `max_ss_sold_date_sk`, max(`ss_sold_time_sk`) AS 
`max_ss_sold_time_sk`, `ss_item_sk`, `ss_store_sk` 
+|  FROM store_sales
+|  GROUP BY `ss_item_sk`, 

[35/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala
--
diff --git 
a/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala
 
b/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala
index 88beb68..dfb89fd 100644
--- 
a/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala
+++ 
b/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala
@@ -20,15 +20,32 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.analysis.Analyzer
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.CarbonReflectionUtils
 
 class CarbonAnalyzer(catalog: SessionCatalog,
 conf: SQLConf,
 sparkSession: SparkSession,
 analyzer: Analyzer) extends Analyzer(catalog, conf) {
+
+  val mvPlan = try {
+CarbonReflectionUtils.createObject(
+  "org.apache.carbondata.mv.datamap.MVAnalyzerRule",
+  sparkSession)._1.asInstanceOf[Rule[LogicalPlan]]
+  } catch {
+case e: Exception =>
+  null
+  }
+
   override def execute(plan: LogicalPlan): LogicalPlan = {
 var logicalPlan = analyzer.execute(plan)
 logicalPlan = 
CarbonPreAggregateDataLoadingRules(sparkSession).apply(logicalPlan)
-CarbonPreAggregateQueryRules(sparkSession).apply(logicalPlan)
+logicalPlan = CarbonPreAggregateQueryRules(sparkSession).apply(logicalPlan)
+if (mvPlan != null) {
+  mvPlan.apply(logicalPlan)
+} else {
+  logicalPlan
+}
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 7273c76..e9551c0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -640,6 +640,7 @@
   mv
   
 datamap/mv/plan
+datamap/mv/core
   
 
   



[34/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting

2018-05-17 Thread manishgupta88
[CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query 
matching and rewriting

Support Modular Core for Materialized View DataMap

This closes #2302


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bf73e9fe
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bf73e9fe
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bf73e9fe

Branch: refs/heads/spark-2.3
Commit: bf73e9fe77523e23be46e7597e2c990e855401e5
Parents: d14c403
Author: ravipesala 
Authored: Sat May 12 22:49:19 2018 +0530
Committer: Jacky Li 
Committed: Sun May 13 17:08:19 2018 +0800

--
 datamap/mv/core/pom.xml |  169 ++
 .../carbondata/mv/datamap/MVAnalyzerRule.scala  |  105 +
 .../mv/datamap/MVDataMapProvider.scala  |  125 +
 .../apache/carbondata/mv/datamap/MVHelper.scala |  377 +++
 .../apache/carbondata/mv/datamap/MVState.scala  |   55 +
 .../mv/rewrite/DefaultMatchMaker.scala  |  647 +
 .../carbondata/mv/rewrite/MatchConditions.scala |   28 +
 .../carbondata/mv/rewrite/MatchMaker.scala  |   47 +
 .../carbondata/mv/rewrite/Navigator.scala   |  196 ++
 .../carbondata/mv/rewrite/QueryRewrite.scala|   53 +
 .../mv/rewrite/SummaryDatasetCatalog.scala  |  168 ++
 .../apache/carbondata/mv/rewrite/Utils.scala|  358 +++
 .../mv/rewrite/MVCreateTestCase.scala   |  676 +
 .../mv/rewrite/MVSampleTestCase.scala   |  167 ++
 .../carbondata/mv/rewrite/MVTPCDSTestCase.scala |  146 +
 .../carbondata/mv/rewrite/MVTpchTestCase.scala  |  247 ++
 .../SelectSelectExactChildrenSuite.scala|   76 +
 .../carbondata/mv/rewrite/Tpcds_1_4_Suite.scala |   80 +
 .../mv/rewrite/matching/TestSQLBatch.scala  |  214 ++
 .../rewrite/matching/TestTPCDS_1_4_Batch.scala  | 2496 ++
 20 files changed, 6430 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/pom.xml
--
diff --git a/datamap/mv/core/pom.xml b/datamap/mv/core/pom.xml
new file mode 100644
index 000..99a8e22
--- /dev/null
+++ b/datamap/mv/core/pom.xml
@@ -0,0 +1,169 @@
+
+
+http://maven.apache.org/POM/4.0.0; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd;>
+
+  4.0.0
+
+  
+org.apache.carbondata
+carbondata-parent
+1.4.0-SNAPSHOT
+../../../pom.xml
+  
+
+  carbondata-mv-core
+  Apache CarbonData :: Materialized View Core
+
+  
+${basedir}/../../../dev
+  
+
+  
+
+  org.apache.carbondata
+  carbondata-mv-plan
+  ${project.version}
+
+
+  org.apache.carbondata
+  carbondata-spark2
+  ${project.version}
+
+
+  org.scalatest
+  scalatest_${scala.binary.version}
+  test
+
+  
+
+  
+src/test/scala
+
+  
+maven-compiler-plugin
+
+  1.8
+  1.8
+
+  
+  
+org.apache.maven.plugins
+maven-surefire-plugin
+2.18
+
+
+  false
+  
${project.build.directory}/surefire-reports
+  -Xmx3g -XX:MaxPermSize=512m 
-XX:ReservedCodeCacheSize=512m
+  
+true
+  
+  false
+  false
+
+  
+
+  
+org.apache.maven.plugins
+maven-checkstyle-plugin
+2.17
+
+  true
+
+  
+  
+org.scala-tools
+maven-scala-plugin
+2.15.2
+
+  
+compile
+
+  compile
+
+compile
+  
+  
+testCompile
+
+  testCompile
+
+test
+  
+  
+process-resources
+
+  compile
+
+  
+
+  
+  
+org.apache.maven.plugins
+maven-enforcer-plugin
+1.4.1
+
+  true
+
+  
+  
+com.ning.maven.plugins
+maven-duplicate-finder-plugin
+
+  true
+
+  
+  
+org.scalatest
+scalatest-maven-plugin
+1.0
+
+
+  
${project.build.directory}/surefire-reports
+  .
+  false
+  CarbonTestSuite.txt
+  -ea -Xmx3g -XX:MaxPermSize=512m 
-XX:ReservedCodeCacheSize=512m
+  
+  
+  
+  
+  
+true
+  
+
+
+  
+test
+
+  test
+
+  
+
+  
+
+  
+  
+  
+sdvtest
+
+  true
+

[14/50] [abbrv] carbondata git commit: [CARBONDATA-2464]Fixed OOM issue in case of Complex type

2018-05-17 Thread manishgupta88
[CARBONDATA-2464]Fixed OOM issue in case of Complex type

Problem: Query with Complex type is failing with OOM

Root Cause: Complex type child column(No-dictionary) values are written in LV 
format, while reading the data it will read length then based on length it is 
reading the data. Converting byte array to int is giving wrong length value, 
because of this it's trying to create big memory chunk and as memory is not 
available in Unsafe it is failing with OOM.

Code issue: While converting byte array to int it is not masking the the byte 
values and because of this is giving wrong integer value.

Solution: Mask each byte and then left shift the bits

This closes #2288


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ceb7c8dd
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ceb7c8dd
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ceb7c8dd

Branch: refs/heads/spark-2.3
Commit: ceb7c8dd1ced457c7ce34f016abf30102e4931a9
Parents: cc0cbba
Author: kumarvishal09 
Authored: Wed May 9 17:04:21 2018 +0530
Committer: Jacky Li 
Committed: Thu May 10 15:27:32 2018 +0800

--
 .../main/java/org/apache/carbondata/core/util/ByteUtil.java| 4 ++--
 .../java/org/apache/carbondata/core/util/ByteUtilTest.java | 6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb7c8dd/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
--
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java 
b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
index d1c16bb..52fc3c3 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
@@ -517,8 +517,8 @@ public final class ByteUtil {
   }
 
   public static int toInt(byte[] bytes, int offset) {
-return (((int)bytes[offset]) << 24) + (((int)bytes[offset + 1]) << 16) +
-(((int)bytes[offset + 2]) << 8) + bytes[offset + 3];
+return (((int)bytes[offset] & 0xff) << 24) + (((int)bytes[offset + 1] & 
0xff) << 16) +
+(((int)bytes[offset + 2] & 0xff) << 8) + ((int)bytes[offset + 3] & 
0xff);
   }
 
   public static void setInt(byte[] data, int offset, int value) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb7c8dd/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java
--
diff --git 
a/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java 
b/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java
index d51e184..d93aa49 100644
--- a/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java
@@ -113,6 +113,12 @@ public class ByteUtilTest extends TestCase {
 prepareBuffers();
 assertFalse(UnsafeComparer.INSTANCE.compareTo(buff1, buff2) < 0);
 }
+@Test
+public void testIntConversion() {
+byte[] data = new byte[4];
+ByteUtil.setInt(data, 0, 968);
+assertEquals(ByteUtil.toInt(data, 0), 968);
+}
 
 @Test
 public void testEqualToCase() {



[16/50] [abbrv] carbondata git commit: [CARBONDATA-2435] Remove SDK dependency on spark jars.

2018-05-17 Thread manishgupta88
[CARBONDATA-2435] Remove SDK dependency on spark jars.

[CARBONDATA-2435] Remove SDK dependency on spark jars.
Problem and cause : when sdk writer is used in standalone cluster
without spark jars,
exception is thrown during reverse dictionary cache initialize time.

Solution: carbon SDK doesn't support dictionary encoding, This spark
dependency is only for dictionary encoding.
Move the spark dependency code inside dictionary encoding if block.
So that SDK flow will not have to access spark class.

This closes #2289


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ff5166ef
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ff5166ef
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ff5166ef

Branch: refs/heads/spark-2.3
Commit: ff5166ef78c42ca0819d3d9fa439aa56d32953eb
Parents: b2060c6
Author: ajantha-bhat 
Authored: Wed May 9 18:07:56 2018 +0530
Committer: ravipesala 
Committed: Thu May 10 16:23:12 2018 +0530

--
 .../processing/datatypes/PrimitiveDataType.java | 12 ++
 .../impl/DictionaryFieldConverterImpl.java  | 12 ++
 .../converter/impl/FieldEncoderFactory.java | 25 +++-
 .../converter/impl/RowConverterImpl.java| 13 ++
 .../InputProcessorStepWithNoConverterImpl.java  |  2 +-
 5 files changed, 28 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/ff5166ef/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
 
b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
index dee8968..e34c184 100644
--- 
a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
+++ 
b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -28,6 +28,8 @@ import java.util.Map;
 import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
 import org.apache.carbondata.core.cache.dictionary.Dictionary;
 import 
org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -135,7 +137,6 @@ public class PrimitiveDataType implements 
GenericDataType {
* @param parentname
* @param columnId
* @param carbonDimension
-   * @param cache
* @param absoluteTableIdentifier
* @param client
* @param useOnePass
@@ -144,9 +145,9 @@ public class PrimitiveDataType implements 
GenericDataType {
* @param isEmptyBadRecords
*/
   public PrimitiveDataType(CarbonColumn carbonColumn, String parentname, 
String columnId,
-  CarbonDimension carbonDimension, Cache cache,
-  AbsoluteTableIdentifier absoluteTableIdentifier, DictionaryClient 
client, Boolean useOnePass,
-  Map localCache, String nullFormat, Boolean 
isEmptyBadRecords) {
+  CarbonDimension carbonDimension, AbsoluteTableIdentifier 
absoluteTableIdentifier,
+  DictionaryClient client, Boolean useOnePass, Map 
localCache,
+  String nullFormat, Boolean isEmptyBadRecords) {
 this.name = carbonColumn.getColName();
 this.parentname = parentname;
 this.columnId = columnId;
@@ -163,6 +164,9 @@ public class PrimitiveDataType implements 
GenericDataType {
 dictionaryGenerator = new 
DirectDictionary(DirectDictionaryKeyGeneratorFactory
 .getDirectDictionaryGenerator(carbonDimension.getDataType()));
   } else if (carbonDimension.hasEncoding(Encoding.DICTIONARY)) {
+CacheProvider cacheProvider = CacheProvider.getInstance();
+Cache cache =
+cacheProvider.createCache(CacheType.REVERSE_DICTIONARY);
 Dictionary dictionary = null;
 if (useOnePass) {
   if (CarbonUtil.isFileExistsForGivenColumn(identifier)) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/ff5166ef/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/DictionaryFieldConverterImpl.java
--
diff --git 
a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/DictionaryFieldConverterImpl.java
 

[21/50] [abbrv] carbondata git commit: [CARBONDATA-2452] [CARBONDATA-2451] [CARBONDATA-2450] [CARBONDATA-2453] Fixed issues related to complex types

2018-05-17 Thread manishgupta88
[CARBONDATA-2452] [CARBONDATA-2451] [CARBONDATA-2450] [CARBONDATA-2453] Fixed 
issues related to complex types

Issue 1: Dictionary encoding was being added to complex types in SDK
case which led to data load failure
Issue 2: Sort columns were not being checked against table schema to
validate the same.
Issue 3: Bad record handling was not there for complex types.
Issue 4: Parent name was not being prepended to field name before
checking for duplicates which threw duplicate column exception

This closes #2278


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6b70b7e4
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6b70b7e4
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6b70b7e4

Branch: refs/heads/spark-2.3
Commit: 6b70b7e47b05a612ccb5a5ad01ee2d5a05ffa600
Parents: 8e7fceb
Author: kunal642 
Authored: Mon May 7 20:58:21 2018 +0530
Committer: kumarvishal09 
Committed: Fri May 11 03:27:36 2018 +0530

--
 .../schema/table/TableSchemaBuilder.java|  21 +-
 .../complexType/TestComplexTypeQuery.scala  |   2 +
 .../TestNonTransactionalCarbonTable.scala   | 410 +--
 .../processing/datatypes/ArrayDataType.java |  11 +-
 .../processing/datatypes/GenericDataType.java   |   3 +-
 .../processing/datatypes/PrimitiveDataType.java |  41 +-
 .../processing/datatypes/StructDataType.java|  11 +-
 .../loading/DataLoadProcessBuilder.java |   9 +
 .../impl/ComplexFieldConverterImpl.java |   2 +-
 .../DirectDictionaryFieldConverterImpl.java |   1 -
 .../loading/model/CarbonLoadModelBuilder.java   |  15 +-
 .../InputProcessorStepWithNoConverterImpl.java  |  32 +-
 .../sdk/file/CarbonWriterBuilder.java   |  24 +-
 13 files changed, 524 insertions(+), 58 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6b70b7e4/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index b078400..03d03f8 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -122,7 +122,13 @@ public class TableSchemaBuilder {
   private ColumnSchema addColumn(StructField field, String parentName, 
AtomicInteger valIndex,
   boolean isSortColumn, boolean isComplexChild) {
 Objects.requireNonNull(field);
-checkRepeatColumnName(field);
+if (isComplexChild) {
+  // if field is complex then append parent name to the child field to 
check
+  // if any other field with same name exists
+  checkRepeatColumnName(field, parentName);
+} else {
+  checkRepeatColumnName(field);
+}
 ColumnSchema newColumn = new ColumnSchema();
 if (parentName != null) {
   newColumn.setColumnName(parentName + "." + field.getFieldName());
@@ -156,7 +162,7 @@ public class TableSchemaBuilder {
 // SO, this will not have any impact.
 newColumn.setColumnUniqueId(field.getFieldName());
 newColumn.setColumnReferenceId(newColumn.getColumnUniqueId());
-newColumn.setEncodingList(createEncoding(field.getDataType(), 
isSortColumn));
+newColumn.setEncodingList(createEncoding(field.getDataType(), 
isSortColumn, isComplexChild));
 if (field.getDataType().isComplexType()) {
   if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) {
 newColumn.setNumberOfChild(1);
@@ -209,6 +215,12 @@ public class TableSchemaBuilder {
   /**
* Throw exception if {@param field} name is repeated
*/
+  private void checkRepeatColumnName(StructField field, String parentName) {
+checkRepeatColumnName(
+new StructField(parentName + "." + field.getFieldName(), 
field.getDataType(),
+field.getChildren()));
+  }
+
   private void checkRepeatColumnName(StructField field) {
 for (ColumnSchema column : sortColumns) {
   if (column.getColumnName().equalsIgnoreCase(field.getFieldName())) {
@@ -234,9 +246,10 @@ public class TableSchemaBuilder {
 }
   }
 
-  private List createEncoding(DataType dataType, boolean 
isSortColumn) {
+  private List createEncoding(DataType dataType, boolean 
isSortColumn,
+  boolean isComplexChild) {
 List encodings = new LinkedList<>();
-if (dataType == DataTypes.TIMESTAMP || dataType == DataTypes.DATE) {
+if (dataType == DataTypes.DATE && !isComplexChild) {
   encodings.add(Encoding.DIRECT_DICTIONARY);
   

[19/50] [abbrv] carbondata git commit: [CARBONDATA-2442][CARBONDATA-2469] Fixed: multiple issues in sdk writer and external table

2018-05-17 Thread manishgupta88
[CARBONDATA-2442][CARBONDATA-2469] Fixed: multiple issues in sdk writer and 
external table

problem1: when two sdk writer output with differnt schema is placed in
same folder for reading, output is not as expected. It has many null
output.

root cause: when multiple carbondata and indexx files is placed in same
folder. table schema is inferred by first file.
comparing table schema with all other index file schema validation is
not present

solution: compare table schema with all other index file schema, if
there is a mismatch throw exception

problem2: External Table must show its location instead of default store 
location
solution: For external tables, show the carbon table path instead of default 
store location
in describe formatted

This closes #2273


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1dfbcfcc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1dfbcfcc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1dfbcfcc

Branch: refs/heads/spark-2.3
Commit: 1dfbcfccca5302fd02c31d2c2386cafa75b1f10c
Parents: fe436c3
Author: ajantha-bhat 
Authored: Sat May 5 16:59:44 2018 +0530
Committer: kunal642 
Committed: Thu May 10 23:02:43 2018 +0530

--
 .../schema/table/column/ColumnSchema.java   |  30 ++
 .../LatestFilesReadCommittedScope.java  |   5 +
 .../apache/carbondata/core/util/CarbonUtil.java |  64 +---
 .../hadoop/api/CarbonTableInputFormat.java  |  43 
 .../createTable/TestCreateExternalTable.scala   |   2 +
 .../TestNonTransactionalCarbonTable.scala   | 103 ++-
 .../table/CarbonDescribeFormattedCommand.scala  |   4 +-
 7 files changed, 233 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1dfbcfcc/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
index edede18..1f05f63 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
@@ -343,6 +343,36 @@ public class ColumnSchema implements Serializable, 
Writable {
   }
 
   /**
+   * method to compare columnSchema,
+   * other parameters along with just column name and column data type
+   * @param obj
+   * @return
+   */
+  public boolean equalsWithStrictCheck(Object obj) {
+if (!this.equals(obj)) {
+  return false;
+}
+ColumnSchema other = (ColumnSchema) obj;
+if (!columnUniqueId.equals(other.columnUniqueId) ||
+(isDimensionColumn != other.isDimensionColumn) ||
+(scale != other.scale) ||
+(precision != other.precision) ||
+(isSortColumn != other.isSortColumn)) {
+  return false;
+}
+if (encodingList.size() != other.encodingList.size()) {
+  return false;
+}
+for (int i = 0; i < encodingList.size(); i++) {
+  if (encodingList.get(i).compareTo(other.encodingList.get(i)) != 0) {
+return false;
+  }
+}
+
+return true;
+  }
+
+  /**
* @return the dataType
*/
   public DataType getDataType() {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1dfbcfcc/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
 
b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
index 3f870b8..8abf537 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java
@@ -122,6 +122,11 @@ public class LatestFilesReadCommittedScope implements 
ReadCommittedScope {
   @Override public void takeCarbonIndexFileSnapShot() throws IOException {
 // Read the current file Path get the list of indexes from the path.
 CarbonFile file = FileFactory.getCarbonFile(carbonFilePath);
+if (file == null) {
+  // For nonTransactional table, files can be removed at any point of time.
+  // So cannot assume files will be present
+  throw new IOException("No files are present in the table location :"+ 
carbonFilePath);
+}
 Map indexFileStore = new HashMap<>();
 if (file.isDirectory()) {
   

[28/50] [abbrv] carbondata git commit: [CARBONDATA-2474] Support Modular Plan for Materialized View DataMap

2018-05-17 Thread manishgupta88
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ffddba70/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala
--
diff --git 
a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala
 
b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala
new file mode 100644
index 000..6363089
--- /dev/null
+++ 
b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.mv.plans.util
+
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _}
+import org.apache.spark.sql.catalyst.rules.{RuleExecutor, _}
+import org.apache.spark.sql.internal.SQLConf
+
+object BirdcageOptimizer extends RuleExecutor[LogicalPlan] {
+
+  val conf = new SQLConf()
+.copy(SQLConf.CASE_SENSITIVE -> true, SQLConf.STARSCHEMA_DETECTION -> true)
+  protected val fixedPoint = FixedPoint(conf.optimizerMaxIterations)
+
+  def batches: Seq[Batch] = {
+// Technically some of the rules in Finish Analysis are not optimizer 
rules and belong more
+// in the analyzer, because they are needed for correctness (e.g. 
ComputeCurrentTime).
+// However, because we also use the analyzer to canonicalized queries (for 
view definition),
+// we do not eliminate subqueries or compute current time in the analyzer.
+Batch(
+  "Finish Analysis", Once,
+  EliminateSubqueryAliases,
+  EliminateView,
+  ReplaceExpressions,
+  ComputeCurrentTime,
+  //  GetCurrentDatabase(sessionCatalog),
+  RewriteDistinctAggregates,
+  ReplaceDeduplicateWithAggregate) ::
+
//
+// Optimizer rules start here
+
//
+// - Do the first call of CombineUnions before starting the major 
Optimizer rules,
+//   since it can reduce the number of iteration and the other rules could 
add/move
+//   extra operators between two adjacent Union operators.
+// - Call CombineUnions again in Batch("Operator Optimizations"),
+//   since the other rules might make two separate Unions operators 
adjacent.
+Batch(
+  "Union", Once,
+  CombineUnions) ::
+Batch(
+  "Pullup Correlated Expressions", Once,
+  PullupCorrelatedPredicates) ::
+Batch(
+  "Subquery", Once,
+  OptimizeSubqueries) ::
+Batch(
+  "Replace Operators", fixedPoint,
+  ReplaceIntersectWithSemiJoin,
+  ReplaceExceptWithAntiJoin,
+  ReplaceDistinctWithAggregate) ::
+Batch(
+  "Aggregate", fixedPoint,
+  RemoveLiteralFromGroupExpressions,
+  RemoveRepetitionFromGroupExpressions) ::
+Batch(
+  "Operator Optimizations", fixedPoint, Seq(
+// Operator push down
+PushProjectionThroughUnion,
+ReorderJoin(conf),
+EliminateOuterJoin(conf),
+PushPredicateThroughJoin,
+PushDownPredicate,
+//  LimitPushDown(conf),
+ColumnPruning,
+//  InferFiltersFromConstraints(conf),
+// Operator combine
+CollapseRepartition,
+CollapseProject,
+CollapseWindow,
+CombineFilters,
+CombineLimits,
+CombineUnions,
+// Constant folding and strength reduction
+NullPropagation(conf),
+FoldablePropagation,
+//  OptimizeIn(conf),
+ConstantFolding,
+ReorderAssociativeOperator,
+LikeSimplification,
+BooleanSimplification,
+SimplifyConditionals,
+RemoveDispensableExpressions,
+SimplifyBinaryComparison,
+//  PruneFilters(conf),
+EliminateSorts,
+SimplifyCasts,
+SimplifyCaseConversionExpressions,
+RewriteCorrelatedScalarSubquery,
+

[10/50] [abbrv] carbondata git commit: [CARBONDATA-2416] Support DEFERRED REBUILD when creating DataMap

2018-05-17 Thread manishgupta88
[CARBONDATA-2416] Support DEFERRED REBUILD when creating DataMap

1. REFRESH DATAMAP is changed to REBUILD DATAMAP command
2. When creating datamap, user can choose to load the datamap immediately or 
later by manually trigger REBUILD DATAMAP command

This closes #2255


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/747be9b1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/747be9b1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/747be9b1

Branch: refs/heads/spark-2.3
Commit: 747be9b111ab0a12e7550124a9facccacf8ad861
Parents: fb12897
Author: Jacky Li 
Authored: Tue May 1 17:17:33 2018 +0800
Committer: QiangCai 
Committed: Wed May 9 18:59:03 2018 +0800

--
 .../carbondata/core/datamap/DataMapChooser.java |  75 +++--
 .../core/datamap/DataMapProvider.java   |   3 +-
 .../core/datamap/DataMapStoreManager.java   |  19 +-
 .../core/datamap/dev/DataMapBuilder.java|  38 +++
 .../core/datamap/dev/DataMapFactory.java|   5 +-
 .../core/datamap/dev/DataMapRefresher.java  |  36 ---
 .../datamap/status/DataMapStatusDetail.java |   4 +
 .../datamap/status/DataMapStatusManager.java|  30 +-
 .../blockletindex/BlockletDataMapFactory.java   |   4 +-
 .../schema/datamap/DataMapProperty.java |  39 +++
 .../metadata/schema/table/DataMapSchema.java|  16 +-
 .../bloom/BloomCoarseGrainDataMapFactory.java   |   6 +-
 .../datamap/bloom/BloomDataMapBuilder.java  |  91 ++
 .../datamap/bloom/BloomDataMapRefresher.java|  91 --
 .../examples/MinMaxIndexDataMapFactory.java |  30 +-
 .../datamap/lucene/LuceneDataMapBuilder.java| 224 +
 .../lucene/LuceneDataMapFactoryBase.java|   9 +-
 .../datamap/lucene/LuceneDataMapRefresher.java  | 224 -
 .../hadoop/api/CarbonInputFormat.java   |  32 +-
 .../hadoop/api/CarbonOutputCommitter.java   |   2 +-
 .../lucene/LuceneFineGrainDataMapSuite.scala|  68 ++--
 .../preaggregate/TestPreAggCreateCommand.scala  |  10 +
 .../testsuite/datamap/CGDataMapTestCase.scala   |   6 +-
 .../testsuite/datamap/DataMapWriterSuite.scala  |   6 +-
 .../testsuite/datamap/FGDataMapTestCase.scala   |   8 +-
 .../testsuite/datamap/TestDataMapStatus.scala   |  71 -
 .../detailquery/SearchModeTestCase.scala|   2 -
 .../TestInsertAndOtherCommandConcurrent.scala   |  59 ++--
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |   3 +-
 .../carbondata/datamap/DataMapProperty.java |  33 --
 .../datamap/IndexDataMapProvider.java   |   2 +-
 .../datamap/PreAggregateDataMapProvider.java|  12 +-
 .../datamap/IndexDataMapRebuildRDD.scala| 318 +++
 .../datamap/IndexDataMapRefreshRDD.scala| 317 --
 .../spark/rdd/CarbonDataRDDFactory.scala|   4 +-
 .../org/apache/spark/sql/CarbonSession.scala|  11 +
 .../datamap/CarbonCreateDataMapCommand.scala|  29 +-
 .../datamap/CarbonDataMapRebuildCommand.scala   |  56 
 .../datamap/CarbonDataMapRefreshCommand.scala   |  56 
 .../datasources/SparkCarbonFileFormat.scala |   5 +-
 .../sql/parser/CarbonSpark2SqlParser.scala  |  20 +-
 .../bloom/BloomCoarseGrainDataMapSuite.scala| 187 +--
 .../datamap/DataMapWriterListener.java  |   8 +-
 .../store/worker/SearchRequestHandler.java  |  56 ++--
 .../scala/org/apache/spark/rpc/Master.scala |  18 +-
 .../org/apache/spark/search/Searcher.scala  |   8 +-
 46 files changed, 1318 insertions(+), 1033 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/747be9b1/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java 
b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
index 478254d..7cdabd6 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
@@ -21,7 +21,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Objects;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
@@ -29,6 +29,8 @@ import 
org.apache.carbondata.core.datamap.dev.expr.AndDataMapExprWrapper;
 import org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapper;
 import org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapperImpl;
 import org.apache.carbondata.core.datamap.dev.expr.OrDataMapExprWrapper;
+import org.apache.carbondata.core.datamap.status.DataMapStatusDetail;
+import 

[13/50] [abbrv] carbondata git commit: [CARBONDATA-2352] Added SDV test cases for Partition with Pre-Aggregate table

2018-05-17 Thread manishgupta88
[CARBONDATA-2352] Added SDV test cases for Partition with Pre-Aggregate table

Added SDV test cases for Support of partitioning with Pre-Aggregate tables.

This closes #2175


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cc0cbbac
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cc0cbbac
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cc0cbbac

Branch: refs/heads/spark-2.3
Commit: cc0cbbac7dca8f14904f231df16a524c07c9af5f
Parents: e70b6b4
Author: praveenmeenakshi56 
Authored: Mon Apr 16 15:14:20 2018 +0530
Committer: kunal642 
Committed: Thu May 10 12:49:39 2018 +0530

--
 .../PartitionWithPreAggregateTestCase.scala | 275 +++
 .../cluster/sdv/suite/SDVSuites.scala   |   6 +-
 2 files changed, 279 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/cc0cbbac/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/PartitionWithPreAggregateTestCase.scala
--
diff --git 
a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/PartitionWithPreAggregateTestCase.scala
 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/PartitionWithPreAggregateTestCase.scala
new file mode 100644
index 000..87f5fda
--- /dev/null
+++ 
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/PartitionWithPreAggregateTestCase.scala
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.cluster.sdv.generated
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util._
+import org.scalatest.BeforeAndAfterAll
+/**
+  * Test Class for Support of Partition with PreAggregate table
+  */
+class PartitionWithPreAggregateTestCase extends QueryTest with 
BeforeAndAfterAll {
+  override def beforeAll = {
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "/MM/dd 
HH:mm:ss")
+  .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "/MM/dd")
+  }
+
+  //Loading data into partitioned table with SORT_SCOPE=LOCAL_SORT
+  test("Partition-With-PreAggregate_TC001", Include) {
+sql("drop table if exists partition_table")
+sql(
+  s"""CREATE TABLE partition_table(shortField SHORT, intField INT, 
bigintField LONG,
+ |doubleField DOUBLE, timestamp TIMESTAMP, decimalField 
DECIMAL(18,2),dateField DATE,
+ |charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField 
STRING) STORED BY
+ |'carbondata' TBLPROPERTIES('SORT_SCOPE'='LOCAL_SORT')""".stripMargin)
+sql(
+  s"""load data inpath 
'$resourcesPath/Data/partition/list_partition_table.csv' into table
+ |partition_table""".stripMargin)
+sql(
+  "create datamap ag1 on table partition_table using 'preaggregate' as 
select shortField, sum" +
+  "(intField) from partition_table group by shortField")
+checkAnswer(sql(
+  s"""select decimalfield from partition_table where charfield='e' and
+ |floatfield=307.301 group by decimalfield limit 1""".stripMargin),
+  Seq(Row(159.10)))
+  }
+
+  //Loading data into partitioned table with SORT_SCOPE=GLOBAL_SORT
+  test("Partition-With-PreAggregate_TC002", Include) {
+sql("drop table if exists partition_table")
+sql(
+  s"""CREATE TABLE partition_table(shortField SHORT, intField INT, 
bigintField LONG,
+ |doubleField DOUBLE, timestamp TIMESTAMP, decimalField 
DECIMAL(18,2),dateField DATE,
+ |charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField 
STRING) STORED BY
+ |'carbondata' 
TBLPROPERTIES('SORT_SCOPE'='GLOBAL_SORT')""".stripMargin)
+

[17/50] [abbrv] carbondata git commit: [CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK

2018-05-17 Thread manishgupta88
[CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK

Problem:
Problem inferring the complex type schema with boolean array type from the 
store created using SDK writer

Analysis:
When we create an external table and infer the schema from store created using 
SDK writer, the operation fails because of complex type field with boolean 
array dataType. This is because during schema creation by SDK writer, for array 
type children a child with column name val is added.
While parsing the logic to append the parent name with child column name is 
missing for boolean type which is causing this problem.

Solution:
Handle the parsing for boolean type

This closes #2294


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/35a7b5e9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/35a7b5e9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/35a7b5e9

Branch: refs/heads/spark-2.3
Commit: 35a7b5e9af5cabe85794274e07cab9a6c53f
Parents: ff5166e
Author: manishgupta88 <tomanishgupt...@gmail.com>
Authored: Thu May 10 17:09:17 2018 +0530
Committer: ravipesala <ravi.pes...@gmail.com>
Committed: Thu May 10 20:49:28 2018 +0530

--
 .../schema/table/TableSchemaBuilder.java| 33 
 .../schema/table/TableSchemaBuilderSuite.java   | 13 +---
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |  5 +++
 .../sdk/file/CarbonWriterBuilder.java   | 13 ++--
 4 files changed, 36 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
--
diff --git 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index ca082e1..b078400 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants;
@@ -114,12 +115,12 @@ public class TableSchemaBuilder {
 this.sortColumns = sortColumns;
   }
 
-  public ColumnSchema addColumn(StructField field, boolean isSortColumn) {
-return addColumn(field, null, isSortColumn, false);
+  public ColumnSchema addColumn(StructField field, AtomicInteger valIndex, 
boolean isSortColumn) {
+return addColumn(field, null, valIndex, isSortColumn, false);
   }
 
-  private ColumnSchema addColumn(StructField field, String parentName, boolean 
isSortColumn,
-  boolean isComplexChild) {
+  private ColumnSchema addColumn(StructField field, String parentName, 
AtomicInteger valIndex,
+  boolean isSortColumn, boolean isComplexChild) {
 Objects.requireNonNull(field);
 checkRepeatColumnName(field);
 ColumnSchema newColumn = new ColumnSchema();
@@ -184,33 +185,25 @@ public class TableSchemaBuilder {
 if (field.getDataType().isComplexType()) {
   String parentFieldName = newColumn.getColumnName();
   if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) {
-String colName = getColNameForArray(parentFieldName);
-addColumn(new StructField(colName,
-((ArrayType) field.getDataType()).getElementType()), 
field.getFieldName(), false, true);
+String colName = getColNameForArray(valIndex);
+addColumn(new StructField(colName, ((ArrayType) 
field.getDataType()).getElementType()),
+field.getFieldName(), valIndex, false, true);
   } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT")
   && ((StructType) field.getDataType()).getFields().size() > 0) {
 // This field has children.
 List fields = ((StructType) 
field.getDataType()).getFields();
 for (int i = 0; i < fields.size(); i++) {
-  addColumn(fields.get(i), parentFieldName, false, true);
+  addColumn(fields.get(i), parentFieldName, valIndex, false, true);
 }
   }
 }
 return newColumn;
   }
 
-  private String getColNameForArray(String parentFieldName) {
-if (!parentFieldName.endsWith(".val")) {
-  return "val";
-} else {
-  String[] splits = parentFieldName.split("val");
-  if (splits.length == 1) {
-return "va

  1   2   3   >