[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2019-01-07 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r245633754
  
--- Diff: pom.xml ---
@@ -527,6 +526,7 @@
 examples/spark2
 datamap/lucene
 datamap/bloom
+datamap/example
--- End diff --

Excluding this will cause the datamap example module outdated and has 
potential unfixed bugs later, which is the previous status of this module.

Maybe we can execlude this from the assembly jar


---


[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2019-01-03 Thread jackylk
Github user jackylk commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r245013088
  
--- Diff: pom.xml ---
@@ -527,6 +526,7 @@
 examples/spark2
 datamap/lucene
 datamap/bloom
+datamap/example
--- End diff --

I think it is better not to add this, since it will make the assembling 
bigger


---


[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-28 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r244344781
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/MinMaxDataMapFactory.java
 ---
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.datamap.DataMapDistributable;
+import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.DataMapMeta;
+import org.apache.carbondata.core.datamap.DataMapStoreManager;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.TableDataMap;
+import org.apache.carbondata.core.datamap.dev.DataMapBuilder;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMapFactory;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.features.TableOperation;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.events.Event;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Min Max DataMap Factory
+ */
+@InterfaceAudience.Internal
+public class MinMaxDataMapFactory extends CoarseGrainDataMapFactory {
+  private static final Logger LOGGER =
+  
LogServiceFactory.getLogService(MinMaxDataMapFactory.class.getName());
+  private DataMapMeta dataMapMeta;
+  private String dataMapName;
+  // segmentId -> list of index files
+  private Map> segmentMap = new ConcurrentHashMap<>();
+  private Cache cache;
+
+  public MinMaxDataMapFactory(CarbonTable carbonTable, DataMapSchema 
dataMapSchema)
+  throws MalformedDataMapCommandException {
+super(carbonTable, dataMapSchema);
+
+// this is an example for datamap, we can choose the columns and 
operations that
+// will be supported by this datamap. Furthermore, we can add 
cache-support for this datamap.
+
+this.dataMapName = dataMapSchema.getDataMapName();
+List indexedColumns = 
carbonTable.getIndexedColumns(dataMapSchema);
+
+// operations that will be supported on the indexed columns
+List optOperations = new ArrayList<>();
+optOperations.add(ExpressionType.NOT);
+optOperations.add(ExpressionType.EQUALS);
+optOperations.add(ExpressionType.NOT_EQUALS);
+optOperations.add(ExpressionType.GREATERTHAN);
+optOperations.add(ExpressionType.GREATERTHAN_EQUALTO);
+optOperations.add(ExpressionType.LESSTHAN);
 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-26 Thread jackylk
Github user jackylk commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r244007049
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/MinMaxDataMapFactory.java
 ---
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.datamap.DataMapDistributable;
+import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.DataMapMeta;
+import org.apache.carbondata.core.datamap.DataMapStoreManager;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.TableDataMap;
+import org.apache.carbondata.core.datamap.dev.DataMapBuilder;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMapFactory;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.features.TableOperation;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.events.Event;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Min Max DataMap Factory
+ */
+@InterfaceAudience.Internal
+public class MinMaxDataMapFactory extends CoarseGrainDataMapFactory {
+  private static final Logger LOGGER =
+  
LogServiceFactory.getLogService(MinMaxDataMapFactory.class.getName());
+  private DataMapMeta dataMapMeta;
+  private String dataMapName;
+  // segmentId -> list of index files
+  private Map> segmentMap = new ConcurrentHashMap<>();
+  private Cache cache;
+
+  public MinMaxDataMapFactory(CarbonTable carbonTable, DataMapSchema 
dataMapSchema)
+  throws MalformedDataMapCommandException {
+super(carbonTable, dataMapSchema);
+
+// this is an example for datamap, we can choose the columns and 
operations that
+// will be supported by this datamap. Furthermore, we can add 
cache-support for this datamap.
+
+this.dataMapName = dataMapSchema.getDataMapName();
+List indexedColumns = 
carbonTable.getIndexedColumns(dataMapSchema);
+
+// operations that will be supported on the indexed columns
+List optOperations = new ArrayList<>();
+optOperations.add(ExpressionType.NOT);
+optOperations.add(ExpressionType.EQUALS);
+optOperations.add(ExpressionType.NOT_EQUALS);
+optOperations.add(ExpressionType.GREATERTHAN);
+optOperations.add(ExpressionType.GREATERTHAN_EQUALTO);
+optOperations.add(ExpressionType.LESSTHAN);

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-11 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r240581684
  
--- Diff: 
integration/spark2/src/test/scala/org/apache/carbondata/datamap/minmax/MinMaxDataMapFunctionSuite.scala
 ---
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax
+
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+class MinMaxDataMapFunctionSuite extends QueryTest with BeforeAndAfterAll {
+  private val minmaxDataMapFactoryName = 
"org.apache.carbondata.datamap.minmax.MinMaxDataMapFactory"
+  var originalStatEnabled = CarbonProperties.getInstance().getProperty(
+CarbonCommonConstants.ENABLE_QUERY_STATISTICS,
+CarbonCommonConstants.ENABLE_QUERY_STATISTICS_DEFAULT)
+
+  override protected def beforeAll(): Unit = {
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_QUERY_STATISTICS, "true")
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+  "-MM-dd")
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+  "-MM-dd HH:mm:ss")
--- End diff --

I think this modification is OK.
We explicitly specify the format here to indicate that this is just the 
format of our input data. (I'm afraid the default behavior will change later)


---


[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-11 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r240580992
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/MinMaxDataMapFactory.java
 ---
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.datamap.DataMapDistributable;
+import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.DataMapMeta;
+import org.apache.carbondata.core.datamap.DataMapStoreManager;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.TableDataMap;
+import org.apache.carbondata.core.datamap.dev.DataMapBuilder;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMapFactory;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.features.TableOperation;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.events.Event;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Min Max DataMap Factory
+ */
+@InterfaceAudience.Internal
+public class MinMaxDataMapFactory extends CoarseGrainDataMapFactory {
+  private static final Logger LOGGER =
+  
LogServiceFactory.getLogService(MinMaxDataMapFactory.class.getName());
+  private DataMapMeta dataMapMeta;
+  private String dataMapName;
+  // segmentId -> list of index files
+  private Map> segmentMap = new ConcurrentHashMap<>();
+  private Cache cache;
+
+  public MinMaxDataMapFactory(CarbonTable carbonTable, DataMapSchema 
dataMapSchema)
+  throws MalformedDataMapCommandException {
+super(carbonTable, dataMapSchema);
+
+// this is an example for datamap, we can choose the columns and 
operations that
+// will be supported by this datamap. Furthermore, we can add 
cache-support for this datamap.
+
+this.dataMapName = dataMapSchema.getDataMapName();
+List indexedColumns = 
carbonTable.getIndexedColumns(dataMapSchema);
+
+// operations that will be supported on the indexed columns
+List optOperations = new ArrayList<>();
+optOperations.add(ExpressionType.NOT);
+optOperations.add(ExpressionType.EQUALS);
+optOperations.add(ExpressionType.NOT_EQUALS);
+optOperations.add(ExpressionType.GREATERTHAN);
+optOperations.add(ExpressionType.GREATERTHAN_EQUALTO);
+optOperations.add(ExpressionType.LESSTHAN);
 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-11 Thread qiuchenjian
Github user qiuchenjian commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r240581131
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream = 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-11 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r240579947
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream = 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-11 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r240579236
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream = 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-11 Thread xuchuanyin
Github user xuchuanyin commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r240578382
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream = 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-02 Thread Indhumathi27
Github user Indhumathi27 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r238156188
  
--- Diff: 
integration/spark2/src/test/scala/org/apache/carbondata/datamap/minmax/MinMaxDataMapFunctionSuite.scala
 ---
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax
+
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+class MinMaxDataMapFunctionSuite extends QueryTest with BeforeAndAfterAll {
+  private val minmaxDataMapFactoryName = 
"org.apache.carbondata.datamap.minmax.MinMaxDataMapFactory"
+  var originalStatEnabled = CarbonProperties.getInstance().getProperty(
+CarbonCommonConstants.ENABLE_QUERY_STATISTICS,
+CarbonCommonConstants.ENABLE_QUERY_STATISTICS_DEFAULT)
+
+  override protected def beforeAll(): Unit = {
+CarbonProperties.getInstance()
+  .addProperty(CarbonCommonConstants.ENABLE_QUERY_STATISTICS, "true")
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+  "-MM-dd")
+
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+  "-MM-dd HH:mm:ss")
--- End diff --

Please check whether we can use the default timestamp/date format here.

org.apache.carbondata.core.constants.CarbonCommonConstants#CARBON_TIMESTAMP_DEFAULT_FORMAT


---


[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-02 Thread dhatchayani
Github user dhatchayani commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r238154502
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/MinMaxDataMapFactory.java
 ---
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.datamap.DataMapDistributable;
+import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.DataMapMeta;
+import org.apache.carbondata.core.datamap.DataMapStoreManager;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.TableDataMap;
+import org.apache.carbondata.core.datamap.dev.DataMapBuilder;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMapFactory;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.features.TableOperation;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.events.Event;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Min Max DataMap Factory
+ */
+@InterfaceAudience.Internal
+public class MinMaxDataMapFactory extends CoarseGrainDataMapFactory {
+  private static final Logger LOGGER =
+  
LogServiceFactory.getLogService(MinMaxDataMapFactory.class.getName());
+  private DataMapMeta dataMapMeta;
+  private String dataMapName;
+  // segmentId -> list of index files
+  private Map> segmentMap = new ConcurrentHashMap<>();
+  private Cache cache;
+
+  public MinMaxDataMapFactory(CarbonTable carbonTable, DataMapSchema 
dataMapSchema)
+  throws MalformedDataMapCommandException {
+super(carbonTable, dataMapSchema);
+
+// this is an example for datamap, we can choose the columns and 
operations that
+// will be supported by this datamap. Furthermore, we can add 
cache-support for this datamap.
+
+this.dataMapName = dataMapSchema.getDataMapName();
+List indexedColumns = 
carbonTable.getIndexedColumns(dataMapSchema);
+
+// operations that will be supported on the indexed columns
+List optOperations = new ArrayList<>();
+optOperations.add(ExpressionType.NOT);
+optOperations.add(ExpressionType.EQUALS);
+optOperations.add(ExpressionType.NOT_EQUALS);
+optOperations.add(ExpressionType.GREATERTHAN);
+optOperations.add(ExpressionType.GREATERTHAN_EQUALTO);
+optOperations.add(ExpressionType.LESSTHAN);

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-02 Thread dhatchayani
Github user dhatchayani commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r238153927
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/MinMaxDataMapFactory.java
 ---
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.carbondata.common.annotations.InterfaceAudience;
+import 
org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.datamap.DataMapDistributable;
+import org.apache.carbondata.core.datamap.DataMapLevel;
+import org.apache.carbondata.core.datamap.DataMapMeta;
+import org.apache.carbondata.core.datamap.DataMapStoreManager;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.TableDataMap;
+import org.apache.carbondata.core.datamap.dev.DataMapBuilder;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
+import 
org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMapFactory;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.features.TableOperation;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.DataMapSchema;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.events.Event;
+
+import org.apache.log4j.Logger;
+
+/**
+ * Min Max DataMap Factory
+ */
+@InterfaceAudience.Internal
+public class MinMaxDataMapFactory extends CoarseGrainDataMapFactory {
+  private static final Logger LOGGER =
+  
LogServiceFactory.getLogService(MinMaxDataMapFactory.class.getName());
+  private DataMapMeta dataMapMeta;
+  private String dataMapName;
+  // segmentId -> list of index files
+  private Map> segmentMap = new ConcurrentHashMap<>();
+  private Cache cache;
+
+  public MinMaxDataMapFactory(CarbonTable carbonTable, DataMapSchema 
dataMapSchema)
+  throws MalformedDataMapCommandException {
+super(carbonTable, dataMapSchema);
+
+// this is an example for datamap, we can choose the columns and 
operations that
+// will be supported by this datamap. Furthermore, we can add 
cache-support for this datamap.
+
+this.dataMapName = dataMapSchema.getDataMapName();
+List indexedColumns = 
carbonTable.getIndexedColumns(dataMapSchema);
+
+// operations that will be supported on the indexed columns
+List optOperations = new ArrayList<>();
+optOperations.add(ExpressionType.NOT);
+optOperations.add(ExpressionType.EQUALS);
+optOperations.add(ExpressionType.NOT_EQUALS);
+optOperations.add(ExpressionType.GREATERTHAN);
+optOperations.add(ExpressionType.GREATERTHAN_EQUALTO);
+optOperations.add(ExpressionType.LESSTHAN);

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-02 Thread dhatchayani
Github user dhatchayani commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r238153188
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream = 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-12-02 Thread Indhumathi27
Github user Indhumathi27 commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r238152299
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream = 

[GitHub] carbondata pull request #2963: [CARBONDATA-3139] Fix bugs in MinMaxDataMap e...

2018-11-29 Thread qiuchenjian
Github user qiuchenjian commented on a diff in the pull request:

https://github.com/apache/carbondata/pull/2963#discussion_r237452138
  
--- Diff: 
datamap/example/src/main/java/org/apache/carbondata/datamap/minmax/AbstractMinMaxDataMapWriter.java
 ---
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.datamap.minmax;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.datamap.dev.DataMapWriter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
+import 
org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import 
org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.KeyPageStatsCollector;
+import 
org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import 
org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
+
+import org.apache.log4j.Logger;
+
+/**
+ * We will record the min & max value for each index column in each 
blocklet.
+ * Since the size of index is quite small, we will combine the index for 
all index columns
+ * in one file.
+ */
+public abstract class AbstractMinMaxDataMapWriter extends DataMapWriter {
+  private static final Logger LOGGER = LogServiceFactory.getLogService(
+  AbstractMinMaxDataMapWriter.class.getName());
+
+  private ColumnPageStatsCollector[] indexColumnMinMaxCollectors;
+  protected int currentBlockletId;
+  private String currentIndexFile;
+  private DataOutputStream currentIndexFileOutStream;
+
+  public AbstractMinMaxDataMapWriter(String tablePath, String dataMapName,
+  List indexColumns, Segment segment, String shardName) 
throws IOException {
+super(tablePath, dataMapName, indexColumns, segment, shardName);
+initStatsCollector();
+initDataMapFile();
+  }
+
+  private void initStatsCollector() {
+indexColumnMinMaxCollectors = new 
ColumnPageStatsCollector[indexColumns.size()];
+CarbonColumn indexCol;
+for (int i = 0; i < indexColumns.size(); i++) {
+  indexCol = indexColumns.get(i);
+  if (indexCol.isMeasure()
+  || (indexCol.isDimension()
+  && DataTypeUtil.isPrimitiveColumn(indexCol.getDataType())
+  && !indexCol.hasEncoding(Encoding.DICTIONARY)
+  && !indexCol.hasEncoding(Encoding.DIRECT_DICTIONARY))) {
+indexColumnMinMaxCollectors[i] = 
PrimitivePageStatsCollector.newInstance(
+indexColumns.get(i).getDataType());
+  } else {
+indexColumnMinMaxCollectors[i] = 
KeyPageStatsCollector.newInstance(DataTypes.BYTE_ARRAY);
+  }
+}
+  }
+
+  private void initDataMapFile() throws IOException {
+if (!FileFactory.isFileExist(dataMapPath) &&
+!FileFactory.mkdirs(dataMapPath, 
FileFactory.getFileType(dataMapPath))) {
+  throw new IOException("Failed to create directory " + dataMapPath);
+}
+
+try {
+  currentIndexFile = MinMaxIndexDataMap.getIndexFile(dataMapPath,
+  MinMaxIndexHolder.MINMAX_INDEX_PREFFIX + indexColumns.size());
+  FileFactory.createNewFile(currentIndexFile, 
FileFactory.getFileType(currentIndexFile));
+  currentIndexFileOutStream =