[GitHub] carbondata issue #2391: [CARBONDATA-2625] Optimize the performance of Carbon...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2391 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5265/ ---
[GitHub] carbondata pull request #2374: [CARBONDATA-2613] Support csv based carbon ta...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2374#discussion_r197016816 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/CsvRecordReader.java --- @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.hadoop; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.math.BigDecimal; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.GenericQueryType; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.intf.RowImpl; +import org.apache.carbondata.core.scan.filter.intf.RowIntf; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.scan.model.QueryModel; +import org.apache.carbondata.core.statusmanager.FileFormatProperties; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; +import org.apache.carbondata.hadoop.api.CarbonTableInputFormat; +import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport; +import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat; + +import com.univocity.parsers.csv.CsvParser; +import com.univocity.parsers.csv.CsvParserSettings; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; + +/** + * scan csv file and filter on it + */ +@InterfaceStability.Evolving +@InterfaceAudience.Internal +public class CsvRecordReader extends AbstractRecordReader { + private static final LogService LOGGER = LogServiceFactory.getLogService( + CsvRecordReader.class.getName()); + private static final int MAX_BATCH_SIZE = + CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + // vector reader + private boolean isVectorReader; + private T columnarBatch; + + // metadata + private CarbonTable carbonTable; + private CarbonColumn[] carbonColumns; + // input + private QueryModel queryModel; + private CarbonReadSupport readSupport; + private FileSplit fileSplit; + private Configuration hadoopConf; + // the index is schema ordinal, the value is the csv ordinal + private int[] schema2csvIdx; + + // filter + private FilterExecuter filter; --- End diff -- And also there should be an option whether we should push down filters or execution
[GitHub] carbondata pull request #2374: [CARBONDATA-2613] Support csv based carbon ta...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2374#discussion_r197016418 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/CsvRecordReader.java --- @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.hadoop; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.math.BigDecimal; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.GenericQueryType; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.intf.RowImpl; +import org.apache.carbondata.core.scan.filter.intf.RowIntf; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.scan.model.QueryModel; +import org.apache.carbondata.core.statusmanager.FileFormatProperties; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; +import org.apache.carbondata.hadoop.api.CarbonTableInputFormat; +import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport; +import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat; + +import com.univocity.parsers.csv.CsvParser; +import com.univocity.parsers.csv.CsvParserSettings; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; + +/** + * scan csv file and filter on it + */ +@InterfaceStability.Evolving +@InterfaceAudience.Internal +public class CsvRecordReader extends AbstractRecordReader { + private static final LogService LOGGER = LogServiceFactory.getLogService( + CsvRecordReader.class.getName()); + private static final int MAX_BATCH_SIZE = + CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + // vector reader + private boolean isVectorReader; + private T columnarBatch; + + // metadata + private CarbonTable carbonTable; + private CarbonColumn[] carbonColumns; + // input + private QueryModel queryModel; + private CarbonReadSupport readSupport; + private FileSplit fileSplit; + private Configuration hadoopConf; + // the index is schema ordinal, the value is the csv ordinal + private int[] schema2csvIdx; + + // filter + private FilterExecuter filter; --- End diff -- FIltering logic should be out of readers. Because in future if we add more readers,
[GitHub] carbondata pull request #2387: [CARBONDATA-2621][BloomDataMap] Lock problem ...
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2387#discussion_r197015265 --- Diff: integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala --- @@ -37,6 +39,11 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with val dataMapName = "bloom_dm" override protected def beforeAll(): Unit = { +val path: String = new File( + classOf[DiskBasedDMSchemaStorageProvider].getResource("/").getPath + "../").getCanonicalPath + .replaceAll("", "/") --- End diff -- Also check if there is any generic location that we can give here...may be you can take reference from the spark UT framework where we add the location in spark-common ---
[jira] [Resolved] (CARBONDATA-2615) Support page size less than 32000 in CarbondataV3
[ https://issues.apache.org/jira/browse/CARBONDATA-2615?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] kumar vishal resolved CARBONDATA-2615. -- Resolution: Fixed > Support page size less than 32000 in CarbondataV3 > - > > Key: CARBONDATA-2615 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2615 > Project: CarbonData > Issue Type: Sub-task >Reporter: xuchuanyin >Assignee: xuchuanyin >Priority: Major > Time Spent: 5h 10m > Remaining Estimate: 0h > > Since we support super long string, if it is long enough, a column page with > 32000 rows will exceed 2GB, so we support a page less than 32000 rows. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata pull request #2383: [CARBONDATA-2615][32K] Support page size less...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2383 ---
[GitHub] carbondata issue #2391: [CARBONDATA-2625] Optimize the performance of Carbon...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2391 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6434/ ---
[GitHub] carbondata pull request #2387: [CARBONDATA-2621][BloomDataMap] Lock problem ...
Github user mohammadshahidkhan commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2387#discussion_r197014465 --- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java --- @@ -1530,6 +1531,22 @@ public String getSystemFolderLocation() { if (systemLocation == null) { systemLocation = getStorePath(); } +// append the HDFS uri to the system folder if not already added. +systemLocation = CarbonUtil.checkAndAppendFileSystemURIScheme(systemLocation); +FileFactory.FileType fileType = FileFactory.getFileType(systemLocation); +switch (fileType) { + case HDFS: + case VIEWFS: + case ALLUXIO: +break; + case LOCAL: +// for local fs remove the URI scheme and unify the path representation +systemLocation = FileFactory.getUpdatedFilePath(systemLocation); +break; + default: +// for local fs remove the URI scheme and unify the path representation +systemLocation = FileFactory.getUpdatedFilePath(systemLocation); +} --- End diff -- Thanks @manishgupta88 , missed to to check the internal implementation of FileFactory.getUpdatedFilePath(). ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user kumarvishal09 commented on the issue: https://github.com/apache/carbondata/pull/2383 LGTM ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2390 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5367/ ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2372 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5264/ ---
[GitHub] carbondata pull request #2387: [CARBONDATA-2621][BloomDataMap] Lock problem ...
Github user manishgupta88 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2387#discussion_r197010690 --- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java --- @@ -1530,6 +1531,22 @@ public String getSystemFolderLocation() { if (systemLocation == null) { systemLocation = getStorePath(); } +// append the HDFS uri to the system folder if not already added. +systemLocation = CarbonUtil.checkAndAppendFileSystemURIScheme(systemLocation); +FileFactory.FileType fileType = FileFactory.getFileType(systemLocation); +switch (fileType) { + case HDFS: + case VIEWFS: + case ALLUXIO: +break; + case LOCAL: +// for local fs remove the URI scheme and unify the path representation +systemLocation = FileFactory.getUpdatedFilePath(systemLocation); +break; + default: +// for local fs remove the URI scheme and unify the path representation +systemLocation = FileFactory.getUpdatedFilePath(systemLocation); +} --- End diff -- switch case is not required here as only one operation is performed...you can directly write systemLocation = FileFactory.getUpdatedFilePath(CarbonUtil.checkAndAppendFileSystemURIScheme(systemLocation)); getUpdatedFilePath will internally handle to make the required modification only for Local file system ---
[GitHub] carbondata pull request #2374: [CARBONDATA-2613] Support csv based carbon ta...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2374#discussion_r197010299 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java --- @@ -515,12 +573,72 @@ private CarbonInputSplit convertToCarbonInputSplit(ExtendedBlocklet blocklet) th return split; } + private List convertToInputSplit4ExternalFormat(JobContext jobContext, --- End diff -- Why don't use CSVInputFormat.getSplits? I can see the code is almost similar as `FileInputFormat.getSplits` ---
[GitHub] carbondata pull request #2374: [CARBONDATA-2613] Support csv based carbon ta...
Github user ravipesala commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2374#discussion_r197008527 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/CsvRecordReader.java --- @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.hadoop; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.math.BigDecimal; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.GenericQueryType; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.intf.RowImpl; +import org.apache.carbondata.core.scan.filter.intf.RowIntf; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.scan.model.QueryModel; +import org.apache.carbondata.core.statusmanager.FileFormatProperties; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; +import org.apache.carbondata.hadoop.api.CarbonTableInputFormat; +import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport; +import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat; + +import com.univocity.parsers.csv.CsvParser; +import com.univocity.parsers.csv.CsvParserSettings; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; + +/** + * scan csv file and filter on it + */ +@InterfaceStability.Evolving +@InterfaceAudience.Internal +public class CsvRecordReader extends AbstractRecordReader { --- End diff -- Why can't you use our existing `CSVInputFormat` and `CSVRecordReader`? why duplicate the code? ---
[GitHub] carbondata pull request #2391: [CARBONDATA-2625] Optimize the performance of...
GitHub user xubo245 opened a pull request: https://github.com/apache/carbondata/pull/2391 [CARBONDATA-2625] Optimize the performance of CarbonReader read many files optimize the build process, including cache.getAll, getDatamaps and create carbonRecordReader Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: - [ ] Any interfaces changed? Yes, add new one for optimizing performance - [ ] Any backward compatibility impacted? NA - [ ] Document update required? NO - [ ] Testing done add example for it - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. NO You can merge this pull request into a Git repository by running: $ git pull https://github.com/xubo245/carbondata CARBONDATA-2625-SDKReaderFiled Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2391.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2391 commit 6b489a7ad1f1a2c73d7b990e831230da04180a7c Author: xubo245 <601450868@...> Date: 2018-06-21T04:25:27Z [CARBONDATA-2625] Optimize the performance of CarbonReader read many files optimize the build process, including cache.getAll, getDatamaps and create carbonRecordReader ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2372 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6433/ ---
[jira] [Resolved] (CARBONDATA-2504) Support StreamSQL for streaming job
[ https://issues.apache.org/jira/browse/CARBONDATA-2504?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ravindra Pesala resolved CARBONDATA-2504. - Resolution: Fixed Assignee: Jacky Li > Support StreamSQL for streaming job > --- > > Key: CARBONDATA-2504 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2504 > Project: CarbonData > Issue Type: New Feature >Reporter: Jacky Li >Assignee: Jacky Li >Priority: Major > Fix For: 1.4.1 > > Time Spent: 14h 10m > Remaining Estimate: 0h > > Currently carbon supports creating streaming job via Spark Streaming API, > this requires user to use spark-submit to create the streaming job. > To make it easier for SQL users, carbon should support StreamSQL to manage > the streaming job. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user brijoobopanna commented on the issue: https://github.com/apache/carbondata/pull/2380 retest SDV please ---
[GitHub] carbondata pull request #2328: [CARBONDATA-2504][STREAM] Support StreamSQL f...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2328 ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2328 LGTM ---
[jira] [Resolved] (CARBONDATA-2616) Incorrect explain and query result while using bloomfilter datamap
[ https://issues.apache.org/jira/browse/CARBONDATA-2616?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jacky Li resolved CARBONDATA-2616. -- Resolution: Fixed Fix Version/s: 1.4.1 1.5.0 > Incorrect explain and query result while using bloomfilter datamap > -- > > Key: CARBONDATA-2616 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2616 > Project: CarbonData > Issue Type: Bug >Reporter: xuchuanyin >Assignee: xuchuanyin >Priority: Major > Fix For: 1.5.0, 1.4.1 > > Time Spent: 1h > Remaining Estimate: 0h > > 1. create a bloomfilter datamap with 2 index columns; > 2. (explain) query on this table with index columns as filters > 3. The explain result shows skipping negative number of blocklets > 4. The query result is duplicated -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata pull request #2386: [CARBONDATA-2616][BloomDataMap] Fix bugs in q...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2386 ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2390 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5366/ ---
[GitHub] carbondata issue #2386: [CARBONDATA-2616][BloomDataMap] Fix bugs in querying...
Github user jackylk commented on the issue: https://github.com/apache/carbondata/pull/2386 LGTM ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2390 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5263/ ---
[GitHub] carbondata pull request #2386: [CARBONDATA-2616][BloomDataMap] Fix bugs in q...
Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2386#discussion_r197002129 --- Diff: datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java --- @@ -85,7 +86,7 @@ public void setIndexedColumn(Set indexedColumn) { @Override public List prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List partitions) { -List hitBlocklets = new ArrayList(); +Set hitBlocklets = new HashSet<>(); --- End diff -- There is one optimization we can add: If the blocklet is hit, then skip the following filter condition test for the same blocklet. Please raise another JIRA to track this optimization. ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2390 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6432/ ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2382 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5262/ ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user kumarvishal09 commented on the issue: https://github.com/apache/carbondata/pull/2390 retest this please ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6431/ ---
[GitHub] carbondata issue #2387: [CARBONDATA-2621][BloomDataMap] Lock problem in inde...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2387 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5365/ ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5261/ ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2382 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6430/ ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2382 retest this please ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2380 retest this please ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2372 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5364/ ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2372 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5363/ ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2328 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5362/ ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2390 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5260/ ---
[GitHub] carbondata issue #2390: [CARBONDATA-2624] Added validations for complex data...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2390 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6429/ ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2328 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5361/ ---
[GitHub] carbondata pull request #2390: [CARBONDATA-2624] Added validations for compl...
GitHub user praveenmeenakshi56 opened a pull request: https://github.com/apache/carbondata/pull/2390 [CARBONDATA-2624] Added validations for complex dataType columns in create table command for Local Dictionary Support Added Validations for Complex DataType command in create table command. Added Unit Test cases for the same Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: - [ ] Any interfaces changed? NA - [ ] Any backward compatibility impacted? NA - [ ] Document update required? will be updated in another PR - [ ] Testing done Please provide details on - Whether new unit test cases have been added or why no new tests are required? - How it is tested? Please attach test report. - Is it a performance related change? Please attach the performance test report. - Any additional information to help reviewers in testing this change. Unit Test cases tested and added in this PR - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/praveenmeenakshi56/carbondata local_dict1 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2390.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2390 commit ee3eeaf008ed14d32f10ab69f84ade4494bf522b Author: praveenmeenakshi56 Date: 2018-06-20T22:03:11Z Added validations for create table command with complex dataType columns for Local Dictionary Support ---
[jira] [Created] (CARBONDATA-2624) Add validations for Create table command for complex dataType columns for Local Dictionary Support
Praveen M P created CARBONDATA-2624: --- Summary: Add validations for Create table command for complex dataType columns for Local Dictionary Support Key: CARBONDATA-2624 URL: https://issues.apache.org/jira/browse/CARBONDATA-2624 Project: CarbonData Issue Type: Sub-task Reporter: Praveen M P Assignee: Praveen M P -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata issue #2387: [CARBONDATA-2621][BloomDataMap] Lock problem in inde...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2387 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5360/ ---
[GitHub] carbondata issue #2388: [WIP] Fix test on pr-2328
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2388 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5358/ ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2328 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6426/ ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2372 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6428/ ---
[GitHub] carbondata issue #2387: [CARBONDATA-2621][BloomDataMap] Lock problem in inde...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2387 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6427/ ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2328 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5357/ ---
[GitHub] carbondata issue #2387: [CARBONDATA-2621][BloomDataMap] Lock problem in inde...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2387 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5259/ ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6424/ ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2372 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6425/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2383 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6423/ ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2382 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5356/ ---
[GitHub] carbondata issue #2389: [CARBONDATA-2623][DataMap] Add DataMap Pre and Peven...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2389 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6421/ ---
[GitHub] carbondata issue #2372: [CARBONDATA-2609] Change RPC implementation to Hadoo...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2372 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5258/ ---
[GitHub] carbondata issue #2389: [CARBONDATA-2623][DataMap] Add DataMap Pre and Peven...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2389 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5255/ ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2328 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5257/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2383 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5355/ ---
[jira] [Updated] (CARBONDATA-2611) Add test cases for Create table statement for Local Dictionary Support
[ https://issues.apache.org/jira/browse/CARBONDATA-2611?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Praveen M P updated CARBONDATA-2611: Description: *What changes were made?* Unti Test cases and SDV Test cases were added for Local Dictionary Support for Create table command and Describe formatted command *What scenarios were covered?* Create table command with all combinations of configurations for table properties like * *LOCAL_DICTIONARY_ENABLE* * *LOCAL_DICTIONARY_THRESHOLD* * *LOCAL_DICTIONARY_INCLUDE* * *LOCAL_DICTIONARY_EXCLUDE* Verifying exception and error messages for all the invalid scenarios Describe formatted command to verify the values configured was: *What changes were made?* Unti Test cases and SDV Test cases were added for Local Dictionary Support for Create table command and Describe formatted command *What scenarios were covered?* Create table command with all combinations of configurations for table properties like * *LOCAL_DICTIONARY_ENABLE* * *LOCAL_DICTIONARY_THRESHOLD* * *LOCAL_DICTIONARY_INCLUDE* * *LOCAL_DICTIONARY_EXCLUDE* Verifying exception and error messages for all the invalid scenarios Describe formatted command to verify the values configured val localDictIncludeCols = tableProperties(CarbonCommonConstants.LOCAL_DICTIONARY_INCLUDE).split(",").map(_.trim) + val localDictExcludeCols = tableProperties(CarbonCommonConstants.LOCAL_DICTIONARY_EXCLUDE).split(",").map(_.trim) + localDictIncludeCols.foreach { distCol => + if (localDictExcludeCols.exists(x => x.equalsIgnoreCase(distCol.trim))) { + val duplicateColumns = (localDictIncludeCols ++ localDictExcludeColumns).diff((localDictIncludeCols ++ localDictExcludeColumns).distinct).distinct + val errMsg = + "Column ambiguity as duplicate column(s):" + + duplicateColumns.mkString(",") + " is present in LOCAL_DICTIONARY_INCLUDE " + + "and LOCAL_DICTIONARY_EXCLUDE. Duplicate columns are not allowed." + throw new MalformedCarbonCommandException(errMsg) + } } +// if (List(localDictIncludeCols, localDictExcludeCols).mkString(",") +// .distinct.length != +// List(localDictIncludeCols, localDictExcludeCols).mkString(",") +// .length) { +// val duplicateColumns = localDictIncludeCols ++ localDictExcludeCols.split(",").diff(localDictIncludeCols ++ localDictExcludeCols.split(",").distinct).distinct +// val errMsg = +// "Column ambiguity as duplicate column(s):" + +// duplicateColumns.mkString("") + " are present in LOCAL_DICTIONARY_INCLUDE " + +// "and LOCAL_DICTIONARY_EXCLUDE. Duplicate columns are not allowed." +// throw new MalformedCarbonCommandException(errMsg) +// } } } > Add test cases for Create table statement for Local Dictionary Support > -- > > Key: CARBONDATA-2611 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2611 > Project: CarbonData > Issue Type: Sub-task >Reporter: Praveen M P >Assignee: Praveen M P >Priority: Minor > Time Spent: 3h 50m > Remaining Estimate: 0h > > *What changes were made?* > Unti Test cases and SDV Test cases were added for Local Dictionary Support > for Create table command and Describe formatted command > *What scenarios were covered?* > Create table command with all combinations of configurations for table > properties like > * *LOCAL_DICTIONARY_ENABLE* > * *LOCAL_DICTIONARY_THRESHOLD* > * *LOCAL_DICTIONARY_INCLUDE* > * *LOCAL_DICTIONARY_EXCLUDE* > Verifying exception and error messages for all the invalid scenarios > Describe formatted command to verify the values configured > -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata issue #2387: [CARBONDATA-2621][BloomDataMap] Lock problem in inde...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2387 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6422/ ---
[GitHub] carbondata issue #2328: [CARBONDATA-2504][STREAM] Support StreamSQL for stre...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2328 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6420/ ---
[jira] [Updated] (CARBONDATA-2611) Add test cases for Create table statement for Local Dictionary Support
[ https://issues.apache.org/jira/browse/CARBONDATA-2611?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Praveen M P updated CARBONDATA-2611: Description: *What changes were made?* Unti Test cases and SDV Test cases were added for Local Dictionary Support for Create table command and Describe formatted command *What scenarios were covered?* Create table command with all combinations of configurations for table properties like * *LOCAL_DICTIONARY_ENABLE* * *LOCAL_DICTIONARY_THRESHOLD* * *LOCAL_DICTIONARY_INCLUDE* * *LOCAL_DICTIONARY_EXCLUDE* Verifying exception and error messages for all the invalid scenarios Describe formatted command to verify the values configured val localDictIncludeCols = tableProperties(CarbonCommonConstants.LOCAL_DICTIONARY_INCLUDE).split(",").map(_.trim) + val localDictExcludeCols = tableProperties(CarbonCommonConstants.LOCAL_DICTIONARY_EXCLUDE).split(",").map(_.trim) + localDictIncludeCols.foreach { distCol => + if (localDictExcludeCols.exists(x => x.equalsIgnoreCase(distCol.trim))) { + val duplicateColumns = (localDictIncludeCols ++ localDictExcludeColumns).diff((localDictIncludeCols ++ localDictExcludeColumns).distinct).distinct + val errMsg = + "Column ambiguity as duplicate column(s):" + + duplicateColumns.mkString(",") + " is present in LOCAL_DICTIONARY_INCLUDE " + + "and LOCAL_DICTIONARY_EXCLUDE. Duplicate columns are not allowed." + throw new MalformedCarbonCommandException(errMsg) + } } +// if (List(localDictIncludeCols, localDictExcludeCols).mkString(",") +// .distinct.length != +// List(localDictIncludeCols, localDictExcludeCols).mkString(",") +// .length) { +// val duplicateColumns = localDictIncludeCols ++ localDictExcludeCols.split(",").diff(localDictIncludeCols ++ localDictExcludeCols.split(",").distinct).distinct +// val errMsg = +// "Column ambiguity as duplicate column(s):" + +// duplicateColumns.mkString("") + " are present in LOCAL_DICTIONARY_INCLUDE " + +// "and LOCAL_DICTIONARY_EXCLUDE. Duplicate columns are not allowed." +// throw new MalformedCarbonCommandException(errMsg) +// } } } was: *What changes were made?* Unti Test cases and SDV Test cases were added for Local Dictionary Support for Create table command and Describe formatted command *What scenarios were covered?* Create table command with all combinations of configurations for table properties like * *LOCAL_DICTIONARY_ENABLE* * *LOCAL_DICTIONARY_THRESHOLD* * *LOCAL_DICTIONARY_INCLUDE* * *LOCAL_DICTIONARY_EXCLUDE* Verifying exception and error messages for all the invalid scenarios Describe formatted command to verify the values configured > Add test cases for Create table statement for Local Dictionary Support > -- > > Key: CARBONDATA-2611 > URL: https://issues.apache.org/jira/browse/CARBONDATA-2611 > Project: CarbonData > Issue Type: Sub-task >Reporter: Praveen M P >Assignee: Praveen M P >Priority: Minor > Time Spent: 3h 50m > Remaining Estimate: 0h > > *What changes were made?* > Unti Test cases and SDV Test cases were added for Local Dictionary Support > for Create table command and Describe formatted command > *What scenarios were covered?* > Create table command with all combinations of configurations for table > properties like > * *LOCAL_DICTIONARY_ENABLE* > * *LOCAL_DICTIONARY_THRESHOLD* > * *LOCAL_DICTIONARY_INCLUDE* > * *LOCAL_DICTIONARY_EXCLUDE* > Verifying exception and error messages for all the invalid scenarios > Describe formatted command to verify the values configured > val localDictIncludeCols = > tableProperties(CarbonCommonConstants.LOCAL_DICTIONARY_INCLUDE).split(",").map(_.trim) > + val localDictExcludeCols = > tableProperties(CarbonCommonConstants.LOCAL_DICTIONARY_EXCLUDE).split(",").map(_.trim) > + localDictIncludeCols.foreach { distCol => > + if (localDictExcludeCols.exists(x => x.equalsIgnoreCase(distCol.trim))) { > + val duplicateColumns = (localDictIncludeCols ++ > localDictExcludeColumns).diff((localDictIncludeCols ++ > localDictExcludeColumns).distinct).distinct > + val errMsg = > + "Column ambiguity as duplicate column(s):" + > + duplicateColumns.mkString(",") + " is present in LOCAL_DICTIONARY_INCLUDE " > + > + "and LOCAL_DICTIONARY_EXCLUDE. Duplicate columns are not allowed." > + throw new MalformedCarbonCommandException(errMsg) > + } > } > +// if (List(localDictIncludeCols, localDictExcludeCols).mkString(",") > +// .distinct.length != > +// List(localDictIncludeCols, localDictExcludeCols).mkString(",") > +// .length) { > +// val duplicateColumns = localDictIncludeCols ++ > localDictExcludeCols.split(",").diff(localDictIncludeCols ++ >
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6419/ ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2380 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5354/ ---
[GitHub] carbondata issue #2387: [CARBONDATA-2621][BloomDataMap] Lock problem in inde...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2387 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5256/ ---
[GitHub] carbondata issue #2265: Added Performance Optimization for Presto by using M...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2265 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5254/ ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5253/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2383 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6417/ ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2382 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6418/ ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2380 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5353/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2383 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5252/ ---
[GitHub] carbondata pull request #2372: [CARBONDATA-2609] Change RPC implementation t...
Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2372#discussion_r196823447 --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonRecordReader.java --- @@ -80,7 +80,7 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext context) } // It should use the exists tableBlockInfos if tableBlockInfos of queryModel is not empty // otherwise the prune is no use before this method -if (!queryModel.isFG()) { +if (queryModel.getTableBlockInfos().isEmpty()) { --- End diff -- I have added a new RecordReader called `IndexedRecordReader`, I am using this one in search mode now. So this problem will not come. I will remove line 83. ---
[GitHub] carbondata issue #2386: [CARBONDATA-2616][BloomDataMap] Fix bugs in querying...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2386 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5352/ ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2265 ---
[GitHub] carbondata issue #2388: [WIP] Fix test on pr-2328
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2388 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6416/ ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6415/ ---
[GitHub] carbondata issue #2265: Added Performance Optimization for Presto by using M...
Github user chenliang613 commented on the issue: https://github.com/apache/carbondata/pull/2265 verified, looks good to me. ---
[GitHub] carbondata pull request #2375: [CARBONDATA-2585][CARBONDATA-2586][Local Dict...
Github user akashrn5 closed the pull request at: https://github.com/apache/carbondata/pull/2375 ---
[GitHub] carbondata pull request #2389: [CARBONDATA-2623][DataMap] Add DataMap Pre an...
GitHub user mohammadshahidkhan opened a pull request: https://github.com/apache/carbondata/pull/2389 [CARBONDATA-2623][DataMap] Add DataMap Pre and Pevent listener Added Pre and Post Execution Events for index datamap - [X] Any interfaces changed? None - [X] Any backward compatibility impacted? None - [X] Document update required? None - [X] Testing done Please provide details on - Whether new unit test cases have been added or why no new tests are required? - How it is tested? Please attach test report. - Is it a performance related change? Please attach the performance test report. - Any additional information to help reviewers in testing this change. Test addition is not applicable - [X] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. NA You can merge this pull request into a Git repository by running: $ git pull https://github.com/mohammadshahidkhan/incubator-carbondata bloom_internal_event Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2389.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2389 commit 0938102f3457e659e00783d1bf385c1b677ecfbc Author: mohammadshahidkhan Date: 2018-06-20T14:22:51Z [CARBONDATA-2623][DataMap] Add DataMap Pre and Pevent listener ---
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2380 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5249/ ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user bhavya411 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2265#discussion_r196799725 --- Diff: integration/presto/README.md --- @@ -113,6 +116,10 @@ Please follow the below steps to query carbondata in presto enable.unsafe.in.query.processing property by default is true in CarbonData system, the carbon.unsafe.working.memory.in.mb property defines the limit for Unsafe Memory usage in Mega Bytes, the default value is 512 MB. If your tables are big you can increase the unsafe memory, or disable unsafe via setting enable.unsafe.in.query.processing=false. + + If you do not want to use unsafe memory at all please set the below properties to false as well. --- End diff -- This has been corrected ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user bhavya411 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2265#discussion_r196799800 --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java --- @@ -152,19 +176,20 @@ private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) Type spiType = carbonDataType2SpiMapper(cs); columnHandles.put(cs.getColumnName(), - new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, column.getSchemaOrdinal(), - column.getKeyOrdinal(), column.getColumnGroupOrdinal(), false, cs.getColumnGroupId(), - cs.getColumnUniqueId(), cs.isUseInvertedIndex(), cs.getPrecision(), cs.getScale())); + new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, + column.getSchemaOrdinal(), column.getKeyOrdinal(), column.getColumnGroupOrdinal(), + false, cs.getColumnGroupId(), cs.getColumnUniqueId(), cs.isUseInvertedIndex(), + cs.getPrecision(), cs.getScale())); } for (CarbonMeasure measure : cb.getMeasureByTableName(tableName)) { ColumnSchema cs = measure.getColumnSchema(); - Type spiType = carbonDataType2SpiMapper(cs); columnHandles.put(cs.getColumnName(), - new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, cs.getSchemaOrdinal(), - measure.getOrdinal(), cs.getColumnGroupId(), true, cs.getColumnGroupId(), - cs.getColumnUniqueId(), cs.isUseInvertedIndex(), cs.getPrecision(), cs.getScale())); + new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, + cs.getSchemaOrdinal(), measure.getOrdinal(), cs.getColumnGroupId(), true, + cs.getColumnGroupId(), cs.getColumnUniqueId(), cs.isUseInvertedIndex(), + cs.getPrecision(), cs.getScale())); } //should i cache it? --- End diff -- Removed the comment ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user bhavya411 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2265#discussion_r196799513 --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSourceProvider.java --- @@ -129,23 +135,31 @@ private QueryModel createQueryModel(CarbondataSplit carbondataSplit, String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath(); conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath); + conf.set("query.id", queryId); JobConf jobConf = new JobConf(conf); CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getConstraints()), carbonProjection); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); - CarbonInputSplit carbonInputSplit = - CarbonLocalInputSplit.convertSplit(carbondataSplit.getLocalInputSplit()); + CarbonMultiBlockSplit carbonInputSplit = + CarbonLocalMultiBlockSplit.convertSplit(carbondataSplit.getLocalInputSplit()); QueryModel queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext); + queryModel.setQueryId(queryId); queryModel.setVectorReader(true); + queryModel.setStatisticsRecorder( + CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId())); + /* List splitList = new ArrayList<>(1); - splitList.add(carbonInputSplit); - List tableBlockInfoList = CarbonInputSplit.createBlocks(splitList); --- End diff -- This has been fixed ---
[jira] [Created] (CARBONDATA-2623) Add DataMap Pre and Pevent listener
Mohammad Shahid Khan created CARBONDATA-2623: Summary: Add DataMap Pre and Pevent listener Key: CARBONDATA-2623 URL: https://issues.apache.org/jira/browse/CARBONDATA-2623 Project: CarbonData Issue Type: Improvement Reporter: Mohammad Shahid Khan -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[GitHub] carbondata issue #2380: [CARBONDATA-2509][CARBONDATA-2510][CARBONDATA-2511][...
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2380 retest this please ---
[GitHub] carbondata issue #2366: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2366 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5351/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user xuchuanyin commented on the issue: https://github.com/apache/carbondata/pull/2383 retest this please ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2382 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6414/ ---
[GitHub] carbondata issue #2047: [CARBONDATA-2240] Refactored TestPreaggregateExpress...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2047 Build Failed with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5250/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2383 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6413/ ---
[GitHub] carbondata issue #2383: [CARBONDATA-2615][32K] Support page size less than 3...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2383 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5247/ ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user chenliang613 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2265#discussion_r196773919 --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataPageSourceProvider.java --- @@ -129,23 +135,31 @@ private QueryModel createQueryModel(CarbondataSplit carbondataSplit, String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath(); conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath); + conf.set("query.id", queryId); JobConf jobConf = new JobConf(conf); CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getConstraints()), carbonProjection); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); - CarbonInputSplit carbonInputSplit = - CarbonLocalInputSplit.convertSplit(carbondataSplit.getLocalInputSplit()); + CarbonMultiBlockSplit carbonInputSplit = + CarbonLocalMultiBlockSplit.convertSplit(carbondataSplit.getLocalInputSplit()); QueryModel queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext); + queryModel.setQueryId(queryId); queryModel.setVectorReader(true); + queryModel.setStatisticsRecorder( + CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId())); + /* List splitList = new ArrayList<>(1); - splitList.add(carbonInputSplit); - List tableBlockInfoList = CarbonInputSplit.createBlocks(splitList); --- End diff -- please remove these dummy code. ---
[GitHub] carbondata pull request #2388: [WIP] Fix test on pr-2328
GitHub user ravipesala opened a pull request: https://github.com/apache/carbondata/pull/2388 [WIP] Fix test on pr-2328 Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [ ] Testing done Please provide details on - Whether new unit test cases have been added or why no new tests are required? - How it is tested? Please attach test report. - Is it a performance related change? Please attach the performance test report. - Any additional information to help reviewers in testing this change. - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/ravipesala/incubator-carbondata pr-2328 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2388.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2388 commit 173f16e91a951e22446b6b96eb262724e98aea8a Author: Jacky Li Date: 2018-05-21T13:49:33Z support StreamSQL add test add test fix comment fix comment commit 6f708ada7052a8dc25454c47a1432f7bb7b8a107 Author: Jacky Li Date: 2018-06-12T17:52:22Z fix comment commit 9f1643a454884fa100f2c1522bd2503f5779d2e5 Author: Jacky Li Date: 2018-06-13T03:51:45Z fix test commit 2419422920f760fdd8bc6b6f08e15128c8f9920b Author: Jacky Li Date: 2018-06-13T08:48:10Z change parser option commit 55f22507b116e247d73897ac2290b3c367abcbf0 Author: Jacky Li Date: 2018-06-15T07:07:55Z fix test commit aec29e7a13f95574b14d3671e849849869278406 Author: Jacky Li Date: 2018-06-18T15:58:56Z fix comment commit 471e5dc504b5514ccd47a93f650364c1f3796a26 Author: Jacky Li Date: 2018-06-19T06:26:09Z fix test commit a33a82a1a26cdb3be93fa12231cc1a52fe060692 Author: Jacky Li Date: 2018-06-19T14:22:00Z fix test commit 8286484af7b8370f265f8a8d9397e3092980ecce Author: Jacky Li Date: 2018-06-19T14:23:08Z fix test commit 4cb6cca901fe556c6d942d1e5d72c863b7dc41c5 Author: Jacky Li Date: 2018-06-19T16:55:44Z fix test commit d769c7e04283a300ec1b9b695a0ec3e3d6ac0ff7 Author: Jacky Li Date: 2018-06-20T06:59:55Z fix test commit dd4a4fb11e08a72e40c84d36e26fd1ee26891c03 Author: ravipesala Date: 2018-06-20T13:19:18Z Fixed Tests ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2382 Build Success with Spark 2.2.1, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/5248/ ---
[GitHub] carbondata issue #2382: [CARBONDATA-2513][32K] Support write long string fro...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2382 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5350/ ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user chenliang613 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2265#discussion_r196768157 --- Diff: integration/presto/README.md --- @@ -113,6 +116,10 @@ Please follow the below steps to query carbondata in presto enable.unsafe.in.query.processing property by default is true in CarbonData system, the carbon.unsafe.working.memory.in.mb property defines the limit for Unsafe Memory usage in Mega Bytes, the default value is 512 MB. If your tables are big you can increase the unsafe memory, or disable unsafe via setting enable.unsafe.in.query.processing=false. + + If you do not want to use unsafe memory at all please set the below properties to false as well. --- End diff -- currently , carbon-presto integration doesn't support unsafe, so here the description should be : please disable unsafe feature for presto integration module. ---
[GitHub] carbondata issue #2366: [CARBONDATA-2532][Integration] Carbon to support spa...
Github user CarbonDataQA commented on the issue: https://github.com/apache/carbondata/pull/2366 Build Success with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/6411/ ---
[GitHub] carbondata pull request #2265: Added Performance Optimization for Presto by ...
Github user chenliang613 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2265#discussion_r196753301 --- Diff: integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java --- @@ -152,19 +176,20 @@ private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) Type spiType = carbonDataType2SpiMapper(cs); columnHandles.put(cs.getColumnName(), - new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, column.getSchemaOrdinal(), - column.getKeyOrdinal(), column.getColumnGroupOrdinal(), false, cs.getColumnGroupId(), - cs.getColumnUniqueId(), cs.isUseInvertedIndex(), cs.getPrecision(), cs.getScale())); + new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, + column.getSchemaOrdinal(), column.getKeyOrdinal(), column.getColumnGroupOrdinal(), + false, cs.getColumnGroupId(), cs.getColumnUniqueId(), cs.isUseInvertedIndex(), + cs.getPrecision(), cs.getScale())); } for (CarbonMeasure measure : cb.getMeasureByTableName(tableName)) { ColumnSchema cs = measure.getColumnSchema(); - Type spiType = carbonDataType2SpiMapper(cs); columnHandles.put(cs.getColumnName(), - new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, cs.getSchemaOrdinal(), - measure.getOrdinal(), cs.getColumnGroupId(), true, cs.getColumnGroupId(), - cs.getColumnUniqueId(), cs.isUseInvertedIndex(), cs.getPrecision(), cs.getScale())); + new CarbondataColumnHandle(connectorId, cs.getColumnName(), spiType, + cs.getSchemaOrdinal(), measure.getOrdinal(), cs.getColumnGroupId(), true, + cs.getColumnGroupId(), cs.getColumnUniqueId(), cs.isUseInvertedIndex(), + cs.getPrecision(), cs.getScale())); } //should i cache it? --- End diff -- this comment, suggest optimizing it. ---
[GitHub] carbondata issue #2385: [CARBONDATA-2617]invalid tuple-id and block id getti...
Github user ravipesala commented on the issue: https://github.com/apache/carbondata/pull/2385 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/5349/ ---