[jira] [Commented] (CARBONDATA-302) 7. Add DataWriterProcessorStep which reads the data from sort temp files and creates carbondata files.
[ https://issues.apache.org/jira/browse/CARBONDATA-302?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610793#comment-15610793 ] ASF GitHub Bot commented on CARBONDATA-302: --- Github user ravipesala commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/251#discussion_r85270229 --- Diff: processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java --- @@ -304,4 +311,92 @@ public static String getLocalDataFolderLocation(String databaseName, String tabl return ArrayUtils .toPrimitive(noDictionaryMapping.toArray(new Boolean[noDictionaryMapping.size()])); } + + /** + * Preparing the boolean [] to map whether the dimension use inverted index or not. + */ + public static boolean[] getIsUseInvertedIndex(DataField[] fields) { +List isUseInvertedIndexList = new ArrayList(); +for (DataField field : fields) { + if (field.getColumn().isUseInvertedIndnex() && field.getColumn().isDimesion()) { +isUseInvertedIndexList.add(true); + } else if(field.getColumn().isDimesion()){ +isUseInvertedIndexList.add(false); + } +} +return ArrayUtils +.toPrimitive(isUseInvertedIndexList.toArray(new Boolean[isUseInvertedIndexList.size()])); + } + + private static String getComplexTypeString(DataField[] dataFields) { +StringBuilder dimString = new StringBuilder(); +for (int i = 0; i < dataFields.length; i++) { + DataField dataField = dataFields[i]; + if (dataField.getColumn().getDataType().equals(DataType.ARRAY) || dataField.getColumn() + .getDataType().equals(DataType.STRUCT)) { +addAllComplexTypeChildren((CarbonDimension) dataField.getColumn(), dimString, ""); +dimString.append(CarbonCommonConstants.SEMICOLON_SPC_CHARACTER); + } +} +return dimString.toString(); + } + + /** + * This method will return all the child dimensions under complex dimension + * + */ + private static void addAllComplexTypeChildren(CarbonDimension dimension, StringBuilder dimString, + String parent) { +dimString.append( +dimension.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER + dimension.getDataType() --- End diff -- ok > 7. Add DataWriterProcessorStep which reads the data from sort temp files and > creates carbondata files. > -- > > Key: CARBONDATA-302 > URL: https://issues.apache.org/jira/browse/CARBONDATA-302 > Project: CarbonData > Issue Type: Sub-task >Reporter: Ravindra Pesala >Assignee: Ravindra Pesala > Fix For: 0.3.0-incubating > > > Add DataWriterProcessorStep which reads the data from sort temp files and > merge sort it, and apply mdk generator on key and creates carbondata files. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-302) 7. Add DataWriterProcessorStep which reads the data from sort temp files and creates carbondata files.
[ https://issues.apache.org/jira/browse/CARBONDATA-302?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610795#comment-15610795 ] ASF GitHub Bot commented on CARBONDATA-302: --- Github user ravipesala commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/251#discussion_r85270264 --- Diff: processing/src/main/java/org/apache/carbondata/processing/newflow/steps/writer/DataWriterProcessorStepImpl.java --- @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.processing.newflow.steps.writer; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.carbon.CarbonTableIdentifier; +import org.apache.carbondata.core.carbon.datastore.block.SegmentProperties; +import org.apache.carbondata.core.carbon.metadata.CarbonMetadata; +import org.apache.carbondata.core.carbon.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.carbon.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.carbon.path.CarbonStorePath; +import org.apache.carbondata.core.carbon.path.CarbonTablePath; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.IgnoreDictionary; +import org.apache.carbondata.core.keygenerator.KeyGenerator; +import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.processing.datatypes.GenericDataType; +import org.apache.carbondata.processing.newflow.AbstractDataLoadProcessorStep; +import org.apache.carbondata.processing.newflow.CarbonDataLoadConfiguration; +import org.apache.carbondata.processing.newflow.DataField; +import org.apache.carbondata.processing.newflow.constants.DataLoadProcessorConstants; +import org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.newflow.row.CarbonRow; +import org.apache.carbondata.processing.newflow.row.CarbonRowBatch; +import org.apache.carbondata.processing.store.CarbonDataFileAttributes; +import org.apache.carbondata.processing.store.CarbonFactDataHandlerModel; +import org.apache.carbondata.processing.store.CarbonFactHandler; +import org.apache.carbondata.processing.store.CarbonFactHandlerFactory; +import org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException; +import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; + +/** + * It reads data from sorted files which are generated in previous sort step. + * And it writes data to carbondata file. It also generates mdk key while writing to carbondata file + */ +public class DataWriterProcessorStepImpl extends AbstractDataLoadProcessorStep { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(DataWriterProcessorStepImpl.class.getName()); + + private String storeLocation; + + private boolean[] isUseInvertedIndex; + + private int[] dimLens; + + private int dimensionCount; + + private List wrapperColumnSchema; + + private int[] colCardinality; + + private SegmentProperties segmentProperties; + + private KeyGenerator keyGenerator; + + private CarbonFactHandler dataHandler; + + private MapcomplexIndexMap; + + private int noDictionaryCount; + + private int complexDimensionCount; + + private int measureCount; + + private long readCounter; + + private long
[jira] [Commented] (CARBONDATA-302) 7. Add DataWriterProcessorStep which reads the data from sort temp files and creates carbondata files.
[ https://issues.apache.org/jira/browse/CARBONDATA-302?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610712#comment-15610712 ] ASF GitHub Bot commented on CARBONDATA-302: --- Github user ravipesala commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/251#discussion_r85267495 --- Diff: processing/src/main/java/org/apache/carbondata/processing/newflow/steps/writer/DataWriterProcessorStepImpl.java --- @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.processing.newflow.steps.writer; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.carbon.CarbonTableIdentifier; +import org.apache.carbondata.core.carbon.datastore.block.SegmentProperties; +import org.apache.carbondata.core.carbon.metadata.CarbonMetadata; +import org.apache.carbondata.core.carbon.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.carbon.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.carbon.path.CarbonStorePath; +import org.apache.carbondata.core.carbon.path.CarbonTablePath; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.IgnoreDictionary; +import org.apache.carbondata.core.keygenerator.KeyGenerator; +import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.processing.datatypes.GenericDataType; +import org.apache.carbondata.processing.newflow.AbstractDataLoadProcessorStep; +import org.apache.carbondata.processing.newflow.CarbonDataLoadConfiguration; +import org.apache.carbondata.processing.newflow.DataField; +import org.apache.carbondata.processing.newflow.constants.DataLoadProcessorConstants; +import org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.newflow.row.CarbonRow; +import org.apache.carbondata.processing.newflow.row.CarbonRowBatch; +import org.apache.carbondata.processing.store.CarbonDataFileAttributes; +import org.apache.carbondata.processing.store.CarbonFactDataHandlerModel; +import org.apache.carbondata.processing.store.CarbonFactHandler; +import org.apache.carbondata.processing.store.CarbonFactHandlerFactory; +import org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException; +import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; + +/** + * It reads data from sorted files which are generated in previous sort step. + * And it writes data to carbondata file. It also generates mdk key while writing to carbondata file + */ +public class DataWriterProcessorStepImpl extends AbstractDataLoadProcessorStep { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(DataWriterProcessorStepImpl.class.getName()); + + private String storeLocation; + + private boolean[] isUseInvertedIndex; + + private int[] dimLens; + + private int dimensionCount; + + private List wrapperColumnSchema; + + private int[] colCardinality; + + private SegmentProperties segmentProperties; + + private KeyGenerator keyGenerator; + + private CarbonFactHandler dataHandler; + + private MapcomplexIndexMap; + + private int noDictionaryCount; + + private int complexDimensionCount; + + private int measureCount; + + private long readCounter; + + private long
[jira] [Commented] (CARBONDATA-302) 7. Add DataWriterProcessorStep which reads the data from sort temp files and creates carbondata files.
[ https://issues.apache.org/jira/browse/CARBONDATA-302?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610709#comment-15610709 ] ASF GitHub Bot commented on CARBONDATA-302: --- Github user ravipesala commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/251#discussion_r85267443 --- Diff: processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactHandlerFactory.java --- @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.processing.store; + +/** + * Factory class for CarbonFactHandler. + */ +public final class CarbonFactHandlerFactory { + + /** + * Creating fact handler to write data. + * @param model + * @param handlerType + * @return + */ + public static CarbonFactHandler createCarbonFactHandler(CarbonFactDataHandlerModel model, --- End diff -- Yes, I don't see the advantage of using semaphore here because we are already using fixed thread pool to control the threads. I will discuss with team and confirm whether it is needed. > 7. Add DataWriterProcessorStep which reads the data from sort temp files and > creates carbondata files. > -- > > Key: CARBONDATA-302 > URL: https://issues.apache.org/jira/browse/CARBONDATA-302 > Project: CarbonData > Issue Type: Sub-task >Reporter: Ravindra Pesala >Assignee: Ravindra Pesala > Fix For: 0.3.0-incubating > > > Add DataWriterProcessorStep which reads the data from sort temp files and > merge sort it, and apply mdk generator on key and creates carbondata files. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-308) Use CarbonInputFormat in CarbonScanRDD compute
[ https://issues.apache.org/jira/browse/CARBONDATA-308?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610586#comment-15610586 ] ASF GitHub Bot commented on CARBONDATA-308: --- GitHub user jackylk opened a pull request: https://github.com/apache/incubator-carbondata/pull/262 [CARBONDATA-308] [WIP] Use CarbonInputFormat in CarbonScanRDD compute Use CarbonInputFormat in CarbonScanRDD compute function 1. In driver side, only getSplit is required, so only filter condition is required, no need to create full QueryModel object, so creation of QueryModel is moved from driver side to executor side. 2. use CarbonInputFormat.createRecordReader in CarbonScanRDD.compute instead of use QueryExecutor directly You can merge this pull request into a Git repository by running: $ git pull https://github.com/jackylk/incubator-carbondata scanrdd Alternatively you can review and apply these changes as the patch at: https://github.com/apache/incubator-carbondata/pull/262.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #262 commit ef4a889db9b86653c273794c9a810a9cd9683437 Author: jackylkDate: 2016-10-22T18:43:53Z use CarbonInputFormat in executor commit a5c17f523c7127b538cc2d384cbff4fa454a007a Author: jackylk Date: 2016-10-27T04:01:36Z modify getPartition > Use CarbonInputFormat in CarbonScanRDD compute > -- > > Key: CARBONDATA-308 > URL: https://issues.apache.org/jira/browse/CARBONDATA-308 > Project: CarbonData > Issue Type: Sub-task > Components: spark-integration >Reporter: Jacky Li > Fix For: 0.2.0-incubating > > > Take CarbonScanRDD as the target RDD, modify as following: > 1. In driver side, only getSplit is required, so only filter condition is > required, no need to create full QueryModel object, so we can move creation > of QueryModel from driver side to executor side. > 2. use CarbonInputFormat.createRecordReader in CarbonScanRDD.compute instead > of use QueryExecutor directly -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (CARBONDATA-308) Use CarbonInputFormat in CarbonScanRDD compute
[ https://issues.apache.org/jira/browse/CARBONDATA-308?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jacky Li updated CARBONDATA-308: Summary: Use CarbonInputFormat in CarbonScanRDD compute (was: Unify CarbonScanRDD and CarbonHadoopFSRDD) > Use CarbonInputFormat in CarbonScanRDD compute > -- > > Key: CARBONDATA-308 > URL: https://issues.apache.org/jira/browse/CARBONDATA-308 > Project: CarbonData > Issue Type: Sub-task > Components: spark-integration >Reporter: Jacky Li > Fix For: 0.2.0-incubating > > > Take CarbonScanRDD as the target RDD, modify as following: > 1. In driver side, only getSplit is required, so only filter condition is > required, no need to create full QueryModel object, so we can move creation > of QueryModel from driver side to executor side. > 2. use CarbonInputFormat.createRecordReader in CarbonScanRDD.compute instead > of use QueryExecutor directly -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-339) Align storePath name in generateGlobalDictionary() of GlobalDictionaryUtil.scala
[ https://issues.apache.org/jira/browse/CARBONDATA-339?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610471#comment-15610471 ] ASF GitHub Bot commented on CARBONDATA-339: --- GitHub user hseagle opened a pull request: https://github.com/apache/incubator-carbondata/pull/261 fix issue carbondata-339 fix jira issue carbondata-339, replace hdfsLocation with storePath in the function generateGlobalDictionary https://issues.apache.org/jira/browse/CARBONDATA-339 You can merge this pull request into a Git repository by running: $ git pull https://github.com/hseagle/incubator-carbondata carbondata-339 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/incubator-carbondata/pull/261.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #261 commit 64d4d6daaf6e8adede6cfffe94221d20f365631c Author: hseagleDate: 2016-10-27T02:55:53Z fix issue carbondata-339 > Align storePath name in generateGlobalDictionary() of > GlobalDictionaryUtil.scala > > > Key: CARBONDATA-339 > URL: https://issues.apache.org/jira/browse/CARBONDATA-339 > Project: CarbonData > Issue Type: Bug >Reporter: Liang Chen >Assignee: pengxu >Priority: Trivial > Fix For: 0.2.0-incubating > > > Align storePath name in generateGlobalDictionary() of > GlobalDictionaryUtil.scala: Change all "hdfsLocation" to "storePath". > It can support any path, not only hdfs path,need to change. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-37) Support Date/Time format for Timestamp columns to be defined at column level
[ https://issues.apache.org/jira/browse/CARBONDATA-37?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610394#comment-15610394 ] ASF GitHub Bot commented on CARBONDATA-37: -- Github user lion-x commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/219#discussion_r85256460 --- Diff: processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java --- @@ -111,7 +110,7 @@ /** * timeFormat */ - protected SimpleDateFormat timeFormat; + protected String dateFormat; --- End diff -- ok > Support Date/Time format for Timestamp columns to be defined at column level > > > Key: CARBONDATA-37 > URL: https://issues.apache.org/jira/browse/CARBONDATA-37 > Project: CarbonData > Issue Type: Improvement >Reporter: Vimal Das Kammath >Assignee: Lionx > > Carbon support defining the Date/Time format. But the configuration for the > same is present in carbon.properties and hence is global for all tables. > This global configuration for timestamp format cannot support scenarios where > different tables or different Timestamp columns in the same table. > Suggest to provide option in the create table DDL itself to define the format > for each Timestamp column. Also provide defaults so that users can create > table with Timestamp columns without having to always define the Date/Time > format. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-37) Support Date/Time format for Timestamp columns to be defined at column level
[ https://issues.apache.org/jira/browse/CARBONDATA-37?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610231#comment-15610231 ] ASF GitHub Bot commented on CARBONDATA-37: -- Github user lion-x commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/219#discussion_r85250472 --- Diff: processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java --- @@ -651,6 +654,7 @@ public void setDefault() { columnSchemaDetails = ""; columnsDataTypeString=""; tableOption = ""; +dateFormat = CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT; --- End diff -- ok > Support Date/Time format for Timestamp columns to be defined at column level > > > Key: CARBONDATA-37 > URL: https://issues.apache.org/jira/browse/CARBONDATA-37 > Project: CarbonData > Issue Type: Improvement >Reporter: Vimal Das Kammath >Assignee: Lionx > > Carbon support defining the Date/Time format. But the configuration for the > same is present in carbon.properties and hence is global for all tables. > This global configuration for timestamp format cannot support scenarios where > different tables or different Timestamp columns in the same table. > Suggest to provide option in the create table DDL itself to define the format > for each Timestamp column. Also provide defaults so that users can create > table with Timestamp columns without having to always define the Date/Time > format. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-37) Support Date/Time format for Timestamp columns to be defined at column level
[ https://issues.apache.org/jira/browse/CARBONDATA-37?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610210#comment-15610210 ] ASF GitHub Bot commented on CARBONDATA-37: -- Github user lion-x commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/219#discussion_r85249559 --- Diff: core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/TimeStampDirectDictionaryGenerator.java --- @@ -39,37 +39,32 @@ */ public class TimeStampDirectDictionaryGenerator implements DirectDictionaryGenerator { - private TimeStampDirectDictionaryGenerator() { + private ThreadLocal threadLocal = new ThreadLocal<>(); - } - - public static TimeStampDirectDictionaryGenerator instance = - new TimeStampDirectDictionaryGenerator(); + private String dateFormat; /** * The value of 1 unit of the SECOND, MINUTE, HOUR, or DAY in millis. */ - public static final long granularityFactor; + public long granularityFactor; /** * The date timestamp to be considered as start date for calculating the timestamp * java counts the number of milliseconds from start of "January 1, 1970", this property is * customized the start of position. for example "January 1, 2000" */ - public static final long cutOffTimeStamp; + public long cutOffTimeStamp; /** * Logger instance */ + private static final LogService LOGGER = - LogServiceFactory.getLogService(TimeStampDirectDictionaryGenerator.class.getName()); + LogServiceFactory.getLogService(TimeStampDirectDictionaryGenerator.class.getName()); --- End diff -- done > Support Date/Time format for Timestamp columns to be defined at column level > > > Key: CARBONDATA-37 > URL: https://issues.apache.org/jira/browse/CARBONDATA-37 > Project: CarbonData > Issue Type: Improvement >Reporter: Vimal Das Kammath >Assignee: Lionx > > Carbon support defining the Date/Time format. But the configuration for the > same is present in carbon.properties and hence is global for all tables. > This global configuration for timestamp format cannot support scenarios where > different tables or different Timestamp columns in the same table. > Suggest to provide option in the create table DDL itself to define the format > for each Timestamp column. Also provide defaults so that users can create > table with Timestamp columns without having to always define the Date/Time > format. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-37) Support Date/Time format for Timestamp columns to be defined at column level
[ https://issues.apache.org/jira/browse/CARBONDATA-37?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15610197#comment-15610197 ] ASF GitHub Bot commented on CARBONDATA-37: -- Github user lion-x commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/219#discussion_r85248921 --- Diff: processing/src/test/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/TimeStampDirectDictionaryGeneratorTest.java --- @@ -37,7 +37,7 @@ private int surrogateKey = -1; @Before public void setUp() throws Exception { -TimeStampDirectDictionaryGenerator generator = TimeStampDirectDictionaryGenerator.instance; +TimeStampDirectDictionaryGenerator generator = new TimeStampDirectDictionaryGenerator(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT); --- End diff -- This file is a test file, I think the TimeStampDirectDictionaryGenerator should be set 'CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT' for testing. pls check again. > Support Date/Time format for Timestamp columns to be defined at column level > > > Key: CARBONDATA-37 > URL: https://issues.apache.org/jira/browse/CARBONDATA-37 > Project: CarbonData > Issue Type: Improvement >Reporter: Vimal Das Kammath >Assignee: Lionx > > Carbon support defining the Date/Time format. But the configuration for the > same is present in carbon.properties and hence is global for all tables. > This global configuration for timestamp format cannot support scenarios where > different tables or different Timestamp columns in the same table. > Suggest to provide option in the create table DDL itself to define the format > for each Timestamp column. Also provide defaults so that users can create > table with Timestamp columns without having to always define the Date/Time > format. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-302) 7. Add DataWriterProcessorStep which reads the data from sort temp files and creates carbondata files.
[ https://issues.apache.org/jira/browse/CARBONDATA-302?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15608849#comment-15608849 ] ASF GitHub Bot commented on CARBONDATA-302: --- Github user jackylk commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/251#discussion_r85157225 --- Diff: processing/src/main/java/org/apache/carbondata/processing/newflow/steps/writer/DataWriterProcessorStepImpl.java --- @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.processing.newflow.steps.writer; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.carbon.CarbonTableIdentifier; +import org.apache.carbondata.core.carbon.datastore.block.SegmentProperties; +import org.apache.carbondata.core.carbon.metadata.CarbonMetadata; +import org.apache.carbondata.core.carbon.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.carbon.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.carbon.path.CarbonStorePath; +import org.apache.carbondata.core.carbon.path.CarbonTablePath; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.IgnoreDictionary; +import org.apache.carbondata.core.keygenerator.KeyGenerator; +import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.processing.datatypes.GenericDataType; +import org.apache.carbondata.processing.newflow.AbstractDataLoadProcessorStep; +import org.apache.carbondata.processing.newflow.CarbonDataLoadConfiguration; +import org.apache.carbondata.processing.newflow.DataField; +import org.apache.carbondata.processing.newflow.constants.DataLoadProcessorConstants; +import org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException; +import org.apache.carbondata.processing.newflow.row.CarbonRow; +import org.apache.carbondata.processing.newflow.row.CarbonRowBatch; +import org.apache.carbondata.processing.store.CarbonDataFileAttributes; +import org.apache.carbondata.processing.store.CarbonFactDataHandlerModel; +import org.apache.carbondata.processing.store.CarbonFactHandler; +import org.apache.carbondata.processing.store.CarbonFactHandlerFactory; +import org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException; +import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; + +/** + * It reads data from sorted files which are generated in previous sort step. + * And it writes data to carbondata file. It also generates mdk key while writing to carbondata file + */ +public class DataWriterProcessorStepImpl extends AbstractDataLoadProcessorStep { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(DataWriterProcessorStepImpl.class.getName()); + + private String storeLocation; + + private boolean[] isUseInvertedIndex; + + private int[] dimLens; + + private int dimensionCount; + + private List wrapperColumnSchema; + + private int[] colCardinality; + + private SegmentProperties segmentProperties; + + private KeyGenerator keyGenerator; + + private CarbonFactHandler dataHandler; + + private MapcomplexIndexMap; + + private int noDictionaryCount; + + private int complexDimensionCount; + + private int measureCount; + + private long readCounter; + + private long
[jira] [Updated] (CARBONDATA-339) Align storePath name in generateGlobalDictionary() of GlobalDictionaryUtil.scala
[ https://issues.apache.org/jira/browse/CARBONDATA-339?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Liang Chen updated CARBONDATA-339: -- Fix Version/s: 0.2.0-incubating > Align storePath name in generateGlobalDictionary() of > GlobalDictionaryUtil.scala > > > Key: CARBONDATA-339 > URL: https://issues.apache.org/jira/browse/CARBONDATA-339 > Project: CarbonData > Issue Type: Bug >Reporter: Liang Chen >Assignee: pengxu >Priority: Trivial > Fix For: 0.2.0-incubating > > > Align storePath name in generateGlobalDictionary() of > GlobalDictionaryUtil.scala: Change all "hdfsLocation" to "storePath". > It can support any path, not only hdfs path,need to change. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Updated] (CARBONDATA-339) Align storePath name in generateGlobalDictionary() of GlobalDictionaryUtil.scala
[ https://issues.apache.org/jira/browse/CARBONDATA-339?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Liang Chen updated CARBONDATA-339: -- Assignee: pengxu (was: Liang Chen) > Align storePath name in generateGlobalDictionary() of > GlobalDictionaryUtil.scala > > > Key: CARBONDATA-339 > URL: https://issues.apache.org/jira/browse/CARBONDATA-339 > Project: CarbonData > Issue Type: Bug >Reporter: Liang Chen >Assignee: pengxu >Priority: Trivial > > Align storePath name in generateGlobalDictionary() of > GlobalDictionaryUtil.scala: Change all "hdfsLocation" to "storePath". > It can support any path, not only hdfs path,need to change. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Created] (CARBONDATA-339) Align storePath name in generateGlobalDictionary() of GlobalDictionaryUtil.scala
Liang Chen created CARBONDATA-339: - Summary: Align storePath name in generateGlobalDictionary() of GlobalDictionaryUtil.scala Key: CARBONDATA-339 URL: https://issues.apache.org/jira/browse/CARBONDATA-339 Project: CarbonData Issue Type: Bug Reporter: Liang Chen Assignee: Liang Chen Priority: Trivial Align storePath name in generateGlobalDictionary() of GlobalDictionaryUtil.scala: Change all "hdfsLocation" to "storePath". I can support any path, not only hdfs path,need to change. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Resolved] (CARBONDATA-337) Correct Inverted Index spelling mistakes
[ https://issues.apache.org/jira/browse/CARBONDATA-337?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jacky Li resolved CARBONDATA-337. - Resolution: Fixed Fix Version/s: 0.2.0-incubating > Correct Inverted Index spelling mistakes > > > Key: CARBONDATA-337 > URL: https://issues.apache.org/jira/browse/CARBONDATA-337 > Project: CarbonData > Issue Type: Improvement >Reporter: Lionx >Assignee: Lionx >Priority: Minor > Fix For: 0.2.0-incubating > > > Correct Inverted Index spelling mistakes in three files. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-337) Correct Inverted Index spelling mistakes
[ https://issues.apache.org/jira/browse/CARBONDATA-337?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15608099#comment-15608099 ] ASF GitHub Bot commented on CARBONDATA-337: --- Github user asfgit closed the pull request at: https://github.com/apache/incubator-carbondata/pull/257 > Correct Inverted Index spelling mistakes > > > Key: CARBONDATA-337 > URL: https://issues.apache.org/jira/browse/CARBONDATA-337 > Project: CarbonData > Issue Type: Improvement >Reporter: Lionx >Assignee: Lionx >Priority: Minor > > Correct Inverted Index spelling mistakes in three files. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-338) Remove the method arguments as they are never used inside the method
[ https://issues.apache.org/jira/browse/CARBONDATA-338?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15608090#comment-15608090 ] ASF GitHub Bot commented on CARBONDATA-338: --- Github user asfgit closed the pull request at: https://github.com/apache/incubator-carbondata/pull/258 > Remove the method arguments as they are never used inside the method > > > Key: CARBONDATA-338 > URL: https://issues.apache.org/jira/browse/CARBONDATA-338 > Project: CarbonData > Issue Type: Improvement > Components: core >Reporter: Shivansh > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-330) Fix compiler warnings - Java related
[ https://issues.apache.org/jira/browse/CARBONDATA-330?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15608076#comment-15608076 ] ASF GitHub Bot commented on CARBONDATA-330: --- Github user asfgit closed the pull request at: https://github.com/apache/incubator-carbondata/pull/250 > Fix compiler warnings - Java related > > > Key: CARBONDATA-330 > URL: https://issues.apache.org/jira/browse/CARBONDATA-330 > Project: CarbonData > Issue Type: Improvement > Components: build, core >Affects Versions: 0.2.0-incubating >Reporter: Aniket Adnaik >Priority: Trivial > Fix For: 0.2.0-incubating > > > Fix java compiler warnings and code cleanup. -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-338) Remove the method arguments as they are never used inside the method
[ https://issues.apache.org/jira/browse/CARBONDATA-338?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15607808#comment-15607808 ] ASF GitHub Bot commented on CARBONDATA-338: --- GitHub user shiv4nsh opened a pull request: https://github.com/apache/incubator-carbondata/pull/258 [CARBONDATA-338] Removed the unused value inside the method Be sure to do all of the following to help us incorporate your contribution quickly and easily: - [ ] Make sure the PR title is formatted like: `[CARBONDATA-] Description of pull request` - [ ] Make sure tests pass via `mvn clean verify`. (Even better, enable Travis-CI on your fork and ensure the whole test matrix passes). - [ ] Replace `` in the title with the actual Jira issue number, if there is one. - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.txt). - [ ] Testing done Please provide details on - Whether new unit test cases have been added or why no new tests are required? - What manual testing you have done? - Any additional information to help reviewers in testing this change. - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. --- You can merge this pull request into a Git repository by running: $ git pull https://github.com/shiv4nsh/incubator-carbondata improvement/CARBONDATA-338 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/incubator-carbondata/pull/258.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #258 commit 97cdfdc6bd4fc112253437628683d8fbdaab8c6f Author: KnoldusDate: 2016-10-26T08:01:35Z Removed the unused value inside the method > Remove the method arguments as they are never used inside the method > > > Key: CARBONDATA-338 > URL: https://issues.apache.org/jira/browse/CARBONDATA-338 > Project: CarbonData > Issue Type: Improvement > Components: core >Reporter: Shivansh > -- This message was sent by Atlassian JIRA (v6.3.4#6332)
[jira] [Commented] (CARBONDATA-284) Abstracting Index and Segment interface
[ https://issues.apache.org/jira/browse/CARBONDATA-284?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15607805#comment-15607805 ] ASF GitHub Bot commented on CARBONDATA-284: --- Github user QiangCai commented on a diff in the pull request: https://github.com/apache/incubator-carbondata/pull/208#discussion_r85061184 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/internal/index/memory/InMemoryBTreeIndex.java --- @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.hadoop.internal.index.memory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.carbon.AbsoluteTableIdentifier; +import org.apache.carbondata.core.carbon.datastore.DataRefNode; +import org.apache.carbondata.core.carbon.datastore.DataRefNodeFinder; +import org.apache.carbondata.core.carbon.datastore.IndexKey; +import org.apache.carbondata.core.carbon.datastore.SegmentTaskIndexStore; +import org.apache.carbondata.core.carbon.datastore.block.AbstractIndex; +import org.apache.carbondata.core.carbon.datastore.block.BlockletInfos; +import org.apache.carbondata.core.carbon.datastore.block.SegmentProperties; +import org.apache.carbondata.core.carbon.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.carbon.datastore.exception.IndexBuilderException; +import org.apache.carbondata.core.carbon.datastore.impl.btree.BTreeDataRefNodeFinder; +import org.apache.carbondata.core.carbon.datastore.impl.btree.BlockBTreeLeafNode; +import org.apache.carbondata.core.carbon.querystatistics.QueryStatistic; +import org.apache.carbondata.core.carbon.querystatistics.QueryStatisticsConstants; +import org.apache.carbondata.core.carbon.querystatistics.QueryStatisticsRecorder; +import org.apache.carbondata.core.keygenerator.KeyGenException; +import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; +import org.apache.carbondata.hadoop.CarbonInputSplit; +import org.apache.carbondata.hadoop.internal.index.Index; +import org.apache.carbondata.hadoop.internal.segment.Segment; +import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil; +import org.apache.carbondata.scan.executor.exception.QueryExecutionException; +import org.apache.carbondata.scan.filter.FilterExpressionProcessor; +import org.apache.carbondata.scan.filter.FilterUtil; +import org.apache.carbondata.scan.filter.resolver.FilterResolverIntf; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; + +class InMemoryBTreeIndex implements Index { + + private static final Log LOG = LogFactory.getLog(InMemoryBTreeIndex.class); + private Segment segment; + + InMemoryBTreeIndex(Segment segment) { +this.segment = segment; + } + + @Override + public String getName() { +return null; + } + + @Override + public List filter(JobContext job, FilterResolverIntf filter) + throws IOException { + +List result = new LinkedList(); + +FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor(); + +AbsoluteTableIdentifier absoluteTableIdentifier = null; + //CarbonInputFormatUtil.getAbsoluteTableIdentifier(job.getConfiguration()); + +//for this segment fetch blocks matching filter in BTree +List dataRefNodes = null; +try { + dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, + filter, segment.getId()); +} catch (IndexBuilderException e) { + throw new IOException(e.getMessage()); +}
[jira] [Created] (CARBONDATA-338) Remove the method arguments as they are never used inside the method
Shivansh created CARBONDATA-338: --- Summary: Remove the method arguments as they are never used inside the method Key: CARBONDATA-338 URL: https://issues.apache.org/jira/browse/CARBONDATA-338 Project: CarbonData Issue Type: Improvement Components: core Reporter: Shivansh -- This message was sent by Atlassian JIRA (v6.3.4#6332)