[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
Github user sounakr closed the pull request at: https://github.com/apache/carbondata/pull/1329 ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1329#discussion_r137213430 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/MinMaxDataMap.java --- @@ -0,0 +1,193 @@ +package org.apache.carbondata.spark.testsuite.datamap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.indexstore.row.DataMapRowImpl; +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; + +public class MinMaxDataMap implements DataMap { + + public static final String NAME = "clustered.btree.minmax"; + + private static int KEY_INDEX = 0; + + private static int MIN_VALUES_INDEX = 1; + + private static int MAX_VALUES_INDEX = 2; + + private UnsafeMemoryDMStore unsafeMemoryDMStore; + + private SegmentProperties segmentProperties; + + private int[] columnCardinality; + + + @Override public void init(String filePath) throws MemoryException, IOException { +long startTime = System.currentTimeMillis(); +DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(); +List indexInfo = fileFooterConverter.getIndexInfo(filePath); +for (DataFileFooter fileFooter : indexInfo) { + List columnInTable = fileFooter.getColumnInTable(); + if (segmentProperties == null) { +columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality(); +segmentProperties = new SegmentProperties(columnInTable, columnCardinality); +//createSchema(segmentProperties); + } + TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo(); + if (fileFooter.getBlockletList() == null || fileFooter.getBlockletList().size() == 0) { +//LOGGER +//.info("Reading carbondata file footer to get blocklet info " + blockInfo.getFilePath()); +fileFooter = CarbonUtil.readMetadatFile(blockInfo); + } + + loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath()); +} +if (unsafeMemoryDMStore != null) { + unsafeMemoryDMStore.finishWriting(); +} +//LOGGER.info("Time taken to load blocklet datamap from file : " + filePath + "is " + +//(System.currentTimeMillis() - startTime)); + + } + + @Override public List prune(FilterResolverIntf filterExp) { +return null; + } + + @Override public void clear() { + + } + + public void updateMinMaxIndex(String filePath) throws IOException, MemoryException { --- End diff -- it shouldn't be read from index file. Use the Datamap writer to write example index file and read here ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1329#discussion_r137213067 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/DataMapMinMaxWriter.scala --- @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.datamap + +import org.apache.spark.sql.test.util.QueryTest +import org.apache.spark.sql.{DataFrame, SaveMode} +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.datamap.{DataMapStoreManager, TableDataMap} +import org.apache.carbondata.core.datamap.dev.DataMapWriter +import org.apache.carbondata.core.datastore.page.ColumnPage +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.datatype.DataType +import org.apache.carbondata.core.util.CarbonProperties + + +class DataMapMinMaxWriter extends QueryTest with BeforeAndAfterAll { --- End diff -- Append testcase to the file name ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1329#discussion_r137212887 --- Diff: core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/DataMapMinMax.java --- @@ -0,0 +1,454 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Comparator; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datamap.dev.DataMap; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; +import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore; +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.indexstore.row.DataMapRowImpl; +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.keygenerator.KeyGenException; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; + +/** + * Datamap implementation for blocklet. + */ +public class DataMapMinMax implements DataMap, Cacheable { --- End diff -- No need to use Unsafe in example, don't confuse users with unsafe in example. just make it as simple as possibe ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1329#discussion_r137212634 --- Diff: core/src/main/java/org/apache/carbondata/core/cache/CacheType.java --- @@ -61,6 +61,9 @@ public static final CacheTypeDRIVER_BLOCKLET_DATAMAP = new CacheType("driver_blocklet_datamap"); + public static final CacheType --- End diff -- Don't add cache type, no need to handle cache in example ---
[GitHub] carbondata pull request #1329: [WIP] Min Max DataMap Example
GitHub user sounakr opened a pull request: https://github.com/apache/carbondata/pull/1329 [WIP] Min Max DataMap Example Min Max DataMap Example You can merge this pull request into a Git repository by running: $ git pull https://github.com/sounakr/incubator-carbondata MinMaxDataMap Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/1329.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1329 commit c01d4cd6dfa172564eff2cf53cadab5cb3d2b46a Author: sounakrDate: 2017-09-06T04:45:25Z Min Max DataMap 1st Phase ---