[GitHub] [carbondata] vikramahuja1001 opened a new pull request #3873: Repair SI Command
vikramahuja1001 opened a new pull request #3873: URL: https://github.com/apache/carbondata/pull/3873 ### Why is this PR needed? ### What changes were proposed in this PR? ### Does this PR introduce any user interface change? - No - Yes. (please explain the change and update document) ### Is any new testcase added? - No - Yes This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] QiangCai opened a new pull request #3872: [CARBONDATA-3889] Move java files into the java source directory
QiangCai opened a new pull request #3872: URL: https://github.com/apache/carbondata/pull/3872 ### Why is this PR needed? There are java files under the scala source directory ### What changes were proposed in this PR? 1. Move java files into the java source directory 2. Fix code style issue ### Does this PR introduce any user interface change? - No ### Is any new testcase added? - No This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3774: [CARBONDATA-3833] Make geoID visible
CarbonDataQA1 commented on pull request #3774: URL: https://github.com/apache/carbondata/pull/3774#issuecomment-665593991 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3773: [CARBONDATA-3830]Presto complex columns read support
ajantha-bhat commented on a change in pull request #3773: URL: https://github.com/apache/carbondata/pull/3773#discussion_r462270452 ## File path: integration/presto/src/main/java/org/apache/carbondata/presto/PrestoCarbonVectorizedRecordReader.java ## @@ -176,8 +191,16 @@ private void initBatch() { for (int i = 0; i < queryDimension.size(); i++) { ProjectionDimension dim = queryDimension.get(i); if (dim.getDimension().isComplex()) { +List childDimensions = +dim.getDimension().getListOfChildDimensions(); +ArrayList childFields = new ArrayList(); +for (int ind = 0; ind < childDimensions.size(); ind++) { + ColumnSchema childSchema = childDimensions.get(ind).getColumnSchema(); Review comment: go to each file where new code is added and look for unused variable/function added (IDE can show in grey) and remove it. Here childSchema is not used. ## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/readers/ArrayStreamReader.java ## @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.readers; + +import io.prestosql.spi.type.*; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.StructField; +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl; + +import io.prestosql.spi.block.Block; +import io.prestosql.spi.block.BlockBuilder; + +import org.apache.carbondata.presto.CarbonVectorBatch; + +/** + * Class to read the Array Stream + */ + +public class ArrayStreamReader extends CarbonColumnVectorImpl implements PrestoVectorBlockBuilder { + + protected int batchSize; + + protected Type type; + protected BlockBuilder builder; + Block childBlock = null; + private int index = 0; + + public ArrayStreamReader(int batchSize, DataType dataType, StructField field) { +super(batchSize, dataType); +this.batchSize = batchSize; +this.type = getArrayOfType(field, dataType); +setChildrenVector( +CarbonVectorBatch.createDirectStreamReader(this.batchSize, field.getDataType(), field)); +this.builder = type.createBlockBuilder(null, batchSize); + } + + public int getIndex() { +return index; + } + + public void setIndex(int index) { +this.index = index; + } + + public String getDataTypeName() { +return "ARRAY"; + } + + Type getArrayOfType(StructField field, DataType dataType) { +if (dataType == DataTypes.STRING) { + return new ArrayType(VarcharType.VARCHAR); Review comment: use equals, it is string ## File path: integration/presto/src/main/prestosql/org/apache/carbondata/presto/readers/ArrayStreamReader.java ## @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.presto.readers; + +import io.prestosql.spi.type.*; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.StructField; +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl; + +import io.prestosql.spi.block.Block; +import io.prestosql.spi.block.BlockBuilder; + +import org.apache.carbondata.presto.CarbonVectorBatch; + +/** + * Clas
[GitHub] [carbondata] ajantha-bhat opened a new pull request #3871: [WIP] Fix multiple issues
ajantha-bhat opened a new pull request #3871: URL: https://github.com/apache/carbondata/pull/3871 ### Why is this PR needed? Auto compaction/minor compaction was happening multiple times for same segments. Executor (for merge index and merge data files) segmentfile write failure and table status update failure is not handled. when compaction failed, no need to call merge index Segment file not cleaned up when table status update failed for compaction Some table status retry issues ### What changes were proposed in this PR? ### Does this PR introduce any user interface change? - No - Yes. (please explain the change and update document) ### Is any new testcase added? - No - Yes This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3873: Repair SI Command
CarbonDataQA1 commented on pull request #3873: URL: https://github.com/apache/carbondata/pull/3873#issuecomment-665651226 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.
CarbonDataQA1 commented on pull request #3865: URL: https://github.com/apache/carbondata/pull/3865#issuecomment-665611488 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] vikramahuja1001 commented on pull request #3873: [WIP] Repair SI Command
vikramahuja1001 commented on pull request #3873: URL: https://github.com/apache/carbondata/pull/3873#issuecomment-665784554 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3871: [WIP] Fix multiple issues
CarbonDataQA1 commented on pull request #3871: URL: https://github.com/apache/carbondata/pull/3871#issuecomment-665517022 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ShreelekhyaG commented on a change in pull request #3774: [CARBONDATA-3833] Make geoID visible
ShreelekhyaG commented on a change in pull request #3774: URL: https://github.com/apache/carbondata/pull/3774#discussion_r462128423 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SICreationCommand.scala ## @@ -210,6 +210,14 @@ private[sql] case class CarbonCreateSecondaryIndexCommand( .get }") } + val properties = carbonTable.getTableInfo.getFactTable.getTableProperties.asScala + val spatialProperty = properties.get(CarbonCommonConstants.SPATIAL_INDEX) + if (spatialProperty.isDefined) { +if (dims.find(x => x.getColName.equalsIgnoreCase(spatialProperty.get.trim)).isDefined) { Review comment: changed now ## File path: integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonAnalysisRules.scala ## @@ -266,16 +266,24 @@ case class CarbonPreInsertionCasts(sparkSession: SparkSession) extends Rule[Logi relation: LogicalRelation, child: LogicalPlan): LogicalPlan = { val carbonDSRelation = relation.relation.asInstanceOf[CarbonDatasourceHadoopRelation] -if (carbonDSRelation.carbonRelation.output.size > CarbonCommonConstants +val carbonTable = carbonDSRelation.carbonRelation.carbonTable +val properties = carbonTable.getTableInfo.getFactTable.getTableProperties.asScala +val spatialProperty = properties.get(CarbonCommonConstants.SPATIAL_INDEX) +var expectedOutput = carbonDSRelation.carbonRelation.output +// have to remove geo column to support insert with original schema Review comment: same reason as above. To support inset without geo column. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3872: [CARBONDATA-3889] Move java files into the java source directory
CarbonDataQA1 commented on pull request #3872: URL: https://github.com/apache/carbondata/pull/3872#issuecomment-665604813 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3869: Exception added for index creation on long string columns
CarbonDataQA1 commented on pull request #3869: URL: https://github.com/apache/carbondata/pull/3869#issuecomment-665577522 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Indhumathi27 opened a new pull request #3874: [CARBONDATA-3931]Fix Secondary index with index column as DateType giving wrong results
Indhumathi27 opened a new pull request #3874: URL: https://github.com/apache/carbondata/pull/3874 ### Why is this PR needed? On data load to SI with date type, dictionary values is loaded from factToIndexDictColumnMapping instead of getting from wrapper. ### What changes were proposed in this PR? Get dictionary Keys from wrapper and convert it to Int and load to SI ### Does this PR introduce any user interface change? - No ### Is any new testcase added? - Yes This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[jira] [Created] (CARBONDATA-3931) Secondary index with index column as DateType gives wrong results
Indhumathi Muthumurugesh created CARBONDATA-3931: Summary: Secondary index with index column as DateType gives wrong results Key: CARBONDATA-3931 URL: https://issues.apache.org/jira/browse/CARBONDATA-3931 Project: CarbonData Issue Type: Bug Reporter: Indhumathi Muthumurugesh -- This message was sent by Atlassian Jira (v8.3.4#803005)
[GitHub] [carbondata] akkio-97 opened a new pull request #3866: [CARBONDATA-3915] Correction in the documentation for spark-shell
akkio-97 opened a new pull request #3866: URL: https://github.com/apache/carbondata/pull/3866 ### Why is this PR needed? Data load fails because the spark-shell code uses carbonSessions instead of carbonExtensions. ### What changes were proposed in this PR? Have made use of carbonExtensions. ### Does this PR introduce any user interface change? - No ### Is any new testcase added? - No This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akkio-97 closed pull request #3859: [CARBONDATA-3921] SI load fails with 'unable to get filestatus error' in concurrent scenario
akkio-97 closed pull request #3859: URL: https://github.com/apache/carbondata/pull/3859 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3868: [CARBONDATA-3889] Cleanup code in carbondata-core module
CarbonDataQA1 commented on pull request #3868: URL: https://github.com/apache/carbondata/pull/3868#issuecomment-665182981 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3856: [CARBONDATA-3929]Improve CDC performance
CarbonDataQA1 commented on pull request #3856: URL: https://github.com/apache/carbondata/pull/3856#issuecomment-665195269 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3857: [CARBONDATA-3914] Fixed issue on reading data from carbon table through hive beeline when no data is present in table.
akashrn5 commented on pull request #3857: URL: https://github.com/apache/carbondata/pull/3857#issuecomment-664777049 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] asfgit closed pull request #3864: [HOTFIX] Show Segment with stage returns empty
asfgit closed pull request #3864: URL: https://github.com/apache/carbondata/pull/3864 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3773: [CARBONDATA-3830]Presto complex columns read support
CarbonDataQA1 commented on pull request #3773: URL: https://github.com/apache/carbondata/pull/3773#issuecomment-665065271 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kevinjmh commented on a change in pull request #3868: [CARBONDATA-3889] Cleanup code in carbondata-core module
kevinjmh commented on a change in pull request #3868: URL: https://github.com/apache/carbondata/pull/3868#discussion_r461986247 ## File path: core/src/main/java/org/apache/carbondata/core/datastore/page/SafeFixLengthColumnPage.java ## @@ -283,8 +283,8 @@ public BigDecimal getDecimal(int rowId) { @Override public byte[][] getByteArrayPage() { byte[][] data = new byte[arrayElementCount][]; -for (int i = 0; i < arrayElementCount; i++) { - data[i] = fixedLengthData[i]; +if (arrayElementCount >= 0) { Review comment: ```suggestion if (arrayElementCount = 0) { ``` ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexMeta.java ## @@ -78,7 +78,7 @@ public String toString() { return new StringBuilder("IndexMeta{") .append("indexName='").append(indexName).append('\'') .append(", indexedColumns=[") -.append(StringUtils.join(getIndexedColumnNames(), ", ")).append("]\'") +.append(StringUtils.join(getIndexedColumnNames(), ", ")).append("]'") .append(", optimizedOperation=").append(optimizedOperation) .append('}') .toString(); Review comment: ``` return new StringBuilder("IndexMeta{indexName='") .append(indexName) .append("', indexedColumns='[") .append(StringUtils.join(getIndexedColumnNames(), ", ")) .append("]', optimizedOperation='") .append(optimizedOperation) .append("'}") .toString(); ``` keep same quotation marks ## File path: core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java ## @@ -105,20 +106,18 @@ public boolean equals(Object o) { Blocklet blocklet = (Blocklet) o; -if (filePath != null ? !filePath.equals(blocklet.filePath) : blocklet.filePath != null) { +if (!Objects.equals(filePath, blocklet.filePath)) { return false; } if (!compareBlockletIdForObjectMatching) { return true; } -return blockletId != null ? -blockletId.equals(blocklet.blockletId) : -blocklet.blockletId == null; +return Objects.equals(blockletId, blocklet.blockletId); } @Override public String toString() { -final StringBuffer sb = new StringBuffer("Blocklet{"); +final StringBuilder sb = new StringBuilder("Blocklet{"); sb.append("filePath='").append(filePath).append('\''); sb.append(", blockletId='").append(blockletId).append('\''); sb.append('}'); Review comment: ditto ## File path: core/src/main/java/org/apache/carbondata/core/metadata/datatype/Field.java ## @@ -193,6 +192,7 @@ public void setParent(String parent) { } public String getStoreType() { +String storeType = "columnar"; Review comment: Q1: why this change? why not mark the string as static instead? Q2: this method is never used ## File path: core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java ## @@ -722,15 +719,11 @@ public int getBlockletSizeInMB() { public String getBucketHashMethod() { String configuredMethod = tableInfo.getFactTable().getTableProperties() .get(CarbonCommonConstants.BUCKET_HASH_METHOD); -if (configuredMethod == null) { - return CarbonCommonConstants.BUCKET_HASH_METHOD_DEFAULT; -} else { - if (CarbonCommonConstants.BUCKET_HASH_METHOD_NATIVE.equals(configuredMethod)) { -return CarbonCommonConstants.BUCKET_HASH_METHOD_NATIVE; - } - // by default we use spark_hash_expression hash method - return CarbonCommonConstants.BUCKET_HASH_METHOD_DEFAULT; +if (CarbonCommonConstants.BUCKET_HASH_METHOD_NATIVE.equals(configuredMethod)) { Review comment: should keep (not) null check This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3819: [CARBONDATA-3855]support carbon SDK to load data from different files
CarbonDataQA1 commented on pull request #3819: URL: https://github.com/apache/carbondata/pull/3819#issuecomment-665015972 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] QiangCai opened a new pull request #3868: [CARBONDATA-3889] Cleanup code in carbondata-core module
QiangCai opened a new pull request #3868: URL: https://github.com/apache/carbondata/pull/3868 ### Why is this PR needed? 1. Redundant 'if' statement 2. Array access many times in for loop 3. Repeated switch branch 4. Use StringBuffer ### What changes were proposed in this PR? 1. Change redundant 'if' statement 2. Use enhanced for loop 3. Merge switch branch 4. Use StringBuilder instead of StringBuffer ### Does this PR introduce any user interface change? - No - Yes. (please explain the change and update document) ### Is any new testcase added? - No - Yes This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Karan980 commented on pull request #3857: [CARBONDATA-3914] Fixed issue on reading data from carbon table through hive beeline when no data is present in table.
Karan980 commented on pull request #3857: URL: https://github.com/apache/carbondata/pull/3857#issuecomment-664850764 @VenuReddy2103 Done This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3870: [CARBONDATA-3930] Fix DataLoadingException in MVExample
CarbonDataQA1 commented on pull request #3870: URL: https://github.com/apache/carbondata/pull/3870#issuecomment-665433865 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] nihal0107 commented on a change in pull request #3819: [CARBONDATA-3855]support carbon SDK to load data from different files
nihal0107 commented on a change in pull request #3819: URL: https://github.com/apache/carbondata/pull/3819#discussion_r461474532 ## File path: sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java ## @@ -823,6 +834,30 @@ public void write(Object object) throws IOException { } } + /** + * Load data of all avro files at given location iteratively. + * + * @throws IOException + */ + @Override + public void write() throws IOException { +if (this.dataFiles == null || this.dataFiles.length == 0) { + throw new RuntimeException("'withAvroPath()' must be called to support loading avro files"); +} +Arrays.sort(this.dataFiles); +for (File dataFile : this.dataFiles) { + this.loadSingleFile(dataFile); +} + } + + private void loadSingleFile(File file) throws IOException { +DataFileReader avroReader = SDKUtil.buildAvroReader(String.valueOf(file)); Review comment: done ## File path: sdk/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriter.java ## @@ -49,4 +49,6 @@ public static CarbonWriterBuilder builder() { return new CarbonWriterBuilder(); } + public void write() throws IOException { } Review comment: done This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on pull request #3773: [CARBONDATA-3830]Presto complex columns read support
ajantha-bhat commented on pull request #3773: URL: https://github.com/apache/carbondata/pull/3773#issuecomment-665473589 @akkio-97 : Also change description to presto supporting array columns reading This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] niuge01 commented on pull request #3864: [HOTFIX] Show Segment with stage returns empty
niuge01 commented on pull request #3864: URL: https://github.com/apache/carbondata/pull/3864#issuecomment-664720572 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] QiangCai opened a new pull request #3870: [CARBONDATA-3930] Fix DataLoadingException in MVExample
QiangCai opened a new pull request #3870: URL: https://github.com/apache/carbondata/pull/3870 ### Why is this PR needed? MVExample is throwing DataLoadingException because the path of the input file is wrong. ### What changes were proposed in this PR? 1. Correct the path of the input file 2. Remove unused CarbonMVRules 3. Both CarbonExtensions and CarbonSession use MVRewriteRule ### Does this PR introduce any user interface change? - No ### Is any new testcase added? - No This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3789: [CARBONDATA-3864] Store Size Optimization
CarbonDataQA1 commented on pull request #3789: URL: https://github.com/apache/carbondata/pull/3789#issuecomment-665169330 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] VenuReddy2103 commented on a change in pull request #3774: [CARBONDATA-3833] Make geoID visible
VenuReddy2103 commented on a change in pull request #3774: URL: https://github.com/apache/carbondata/pull/3774#discussion_r461692563 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SICreationCommand.scala ## @@ -210,6 +210,14 @@ private[sql] case class CarbonCreateSecondaryIndexCommand( .get }") } + val properties = carbonTable.getTableInfo.getFactTable.getTableProperties.asScala + val spatialProperty = properties.get(CarbonCommonConstants.SPATIAL_INDEX) + if (spatialProperty.isDefined) { +if (dims.find(x => x.getColName.equalsIgnoreCase(spatialProperty.get.trim)).isDefined) { Review comment: you would want to check for index column names. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3867: [CARBONDATA-3889] Cleanup code typo in carbondata-spark module
CarbonDataQA1 commented on pull request #3867: URL: https://github.com/apache/carbondata/pull/3867#issuecomment-665157764 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3865: [CARBONDATA-3928] Handled the Strings which length is greater than 32000 as a bad record.
CarbonDataQA1 commented on pull request #3865: URL: https://github.com/apache/carbondata/pull/3865#issuecomment-665003765 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3774: [CARBONDATA-3833] Make geoID visible
CarbonDataQA1 commented on pull request #3774: URL: https://github.com/apache/carbondata/pull/3774#issuecomment-664592745 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3787: [WIP][CARBONDATA-3923] support global sort for SI
CarbonDataQA1 commented on pull request #3787: URL: https://github.com/apache/carbondata/pull/3787#issuecomment-665125549 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] VenuReddy2103 commented on pull request #3857: [CARBONDATA-3914] Fixed issue on reading data from carbon table through hive beeline when no data is present in table.
VenuReddy2103 commented on pull request #3857: URL: https://github.com/apache/carbondata/pull/3857#issuecomment-665140464 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #3773: [CARBONDATA-3830]Presto complex columns read support
Indhumathi27 commented on a change in pull request #3773: URL: https://github.com/apache/carbondata/pull/3773#discussion_r461655861 ## File path: core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java ## @@ -98,6 +98,14 @@ void prepareDimensionAndMeasureColumnVectors() { columnVectorInfo.dimension = queryDimensions[i]; columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; + } else if (queryDimensions[i].getDimension().isComplex()) { +ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); +complexList.add(columnVectorInfo); +columnVectorInfo.dimension = queryDimensions[i]; +columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); +columnVectorInfo.genericQueryType = + executionInfo.getComlexDimensionInfoMap().get(columnVectorInfo.ordinal); +allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; Review comment: can add a check in line No.109 ` } else if (queryDimensions[i].getDimension().getDataType() != DataTypes.DATE && !queryDimensions[i].getDimension().isComplex()) {` to avoid moving this method Up. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3857: [CARBONDATA-3914] Fixed issue on reading data from carbon table through hive beeline when no data is present in table.
CarbonDataQA1 commented on pull request #3857: URL: https://github.com/apache/carbondata/pull/3857#issuecomment-664827883 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akkio-97 commented on pull request #3866: [CARBONDATA-3915] Correction in the documentation for spark-shell
akkio-97 commented on pull request #3866: URL: https://github.com/apache/carbondata/pull/3866#issuecomment-665281574 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Indhumathi27 commented on pull request #3848: [CARBONDATA-3891] Fix loading data will update all segments updateDeltaEndTimestamp
Indhumathi27 commented on pull request #3848: URL: https://github.com/apache/carbondata/pull/3848#issuecomment-664772987 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] QiangCai opened a new pull request #3867: [CARBONDATA-3889] Cleanup code typo in carbondata-spark module
QiangCai opened a new pull request #3867: URL: https://github.com/apache/carbondata/pull/3867 ### Why is this PR needed? There are many typos in carbondata-spark module. ### What changes were proposed in this PR? Cleanup code typo in carbondata-spark module ### Does this PR introduce any user interface change? - No, not change interface name ### Is any new testcase added? - No, not impact function. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3869: Exception added for index creation on long string columns
CarbonDataQA1 commented on pull request #3869: URL: https://github.com/apache/carbondata/pull/3869#issuecomment-665263897 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] VenuReddy2103 commented on a change in pull request #3857: [CARBONDATA-3914] Fixed issue on reading data from carbon table through hive beeline when no data is present in table.
VenuReddy2103 commented on a change in pull request #3857: URL: https://github.com/apache/carbondata/pull/3857#discussion_r461385726 ## File path: integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java ## @@ -116,7 +117,10 @@ private static CarbonTable getCarbonTable(Configuration configuration, String pa try { carbonTable = getCarbonTable(jobContext.getConfiguration(), jobContext.getConfiguration().get(hive_metastoreConstants.META_TABLE_LOCATION)); -} catch (Exception e) { +} catch (FileNotFoundException e) { + return new InputSplit[0]; +} +catch (Exception e) { Review comment: formatting issue. Suggest to move it to previous line. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] asfgit closed pull request #3860: [CARBONDATA-3889] Cleanup duplicated code in carbondata-core module
asfgit closed pull request #3860: URL: https://github.com/apache/carbondata/pull/3860 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] asfgit closed pull request #3857: [CARBONDATA-3914] Fixed issue on reading data from carbon table through hive beeline when no data is present in table.
asfgit closed pull request #3857: URL: https://github.com/apache/carbondata/pull/3857 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kevinjmh commented on a change in pull request #3867: [CARBONDATA-3889] Cleanup code typo in carbondata-spark module
kevinjmh commented on a change in pull request #3867: URL: https://github.com/apache/carbondata/pull/3867#discussion_r462058720 ## File path: integration/spark/src/main/scala/org/apache/carbondata/view/MVRefresher.scala ## @@ -68,19 +68,19 @@ object MVRefresher { // Clean up the old invalid segment data before creating a new entry for new load. SegmentStatusManager.deleteLoadsAndUpdateMetadata(viewTable, false, null) val segmentStatusManager: SegmentStatusManager = new SegmentStatusManager(viewTableIdentifier) -// Acquire table status lock to handle concurrent dataloading +// Acquire table status lock to handle concurrent data loading val lock: ICarbonLock = segmentStatusManager.getTableStatusLock val segmentMapping: util.Map[String, util.List[String]] = new util.HashMap[String, util.List[String]] val viewManager = MVManagerInSpark.get(session) try if (lock.lockWithRetries) { - LOGGER.info("Acquired lock for mv " + viewIdentifier + " for table status updation") + LOGGER.info("Acquired lock for mv " + viewIdentifier + " for table status te") Review comment: check this ## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/DeleteExecution.scala ## @@ -50,7 +50,7 @@ import org.apache.carbondata.events.{IndexServerLoadEvent, OperationContext, Ope import org.apache.carbondata.hadoop.api.{CarbonInputFormat, CarbonTableInputFormat} import org.apache.carbondata.processing.exception.MultipleMatchingException import org.apache.carbondata.processing.loading.FailureCauses -import org.apache.carbondata.spark.DeleteDelataResultImpl +import org.apache.carbondata.spark.DeleteDelateResultImpl Review comment: delta ## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/mutation/DeleteExecution.scala ## @@ -266,9 +266,9 @@ object DeleteExecution { CarbonUpdateUtil.getRequiredFieldFromTID(TID, TupleIdEnum.BLOCK_ID) + CarbonCommonConstants.FACT_FILE_EXT) } - val deleteDeletaPath = CarbonUpdateUtil + val deleteDeletePath = CarbonUpdateUtil Review comment: ditto ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -693,10 +693,10 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { case sort@Sort(order, global, plan) => addProjection = true (sort, true) - case filter@Filter(condition, logicalRelation@MatchIndexableRelation(indexableRelation)) + case filter@Filter(condition, logicalRelation@MatchIndexTableRelation(indexTableRelation)) Review comment: rename is changing meaning. And actual code is to match CarbonDatasourceHadoopRelation This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3848: [CARBONDATA-3891] Fix loading data will update all segments updateDeltaEndTimestamp
CarbonDataQA1 commented on pull request #3848: URL: https://github.com/apache/carbondata/pull/3848#issuecomment-664820504 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3866: [CARBONDATA-3915] Correction in the documentation for spark-shell
CarbonDataQA1 commented on pull request #3866: URL: https://github.com/apache/carbondata/pull/3866#issuecomment-664852730 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akkio-97 opened a new pull request #3869: Exception added for index creation on long string columns
akkio-97 opened a new pull request #3869: URL: https://github.com/apache/carbondata/pull/3869 ### Why is this PR needed? Index creation for long string columns are not yet supported. ### What changes were proposed in this PR? Exceptions are thrown if user tries to create the same. ### Does this PR introduce any user interface change? - No ### Is any new testcase added? - Yes This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ShreelekhyaG commented on a change in pull request #3774: [CARBONDATA-3833] Make geoID visible
ShreelekhyaG commented on a change in pull request #3774: URL: https://github.com/apache/carbondata/pull/3774#discussion_r461343500 ## File path: integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala ## @@ -112,6 +238,23 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach { result) } + test("test insert into non-geo table select from geo table") { Review comment: modified existing test case and added validation for the geo column. ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/SICreationCommand.scala ## @@ -209,6 +209,11 @@ private[sql] case class CarbonCreateSecondaryIndexCommand( .get }") } + val isSpatialColPresent = dims.find(x => x.getColumnSchema.isSpatialColumn) Review comment: removed isSpatialColumn from schema. ## File path: integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonFileMetastore.scala ## @@ -228,8 +230,15 @@ class CarbonFileMetastore extends CarbonMetaStore { c.getClass.getName.equals("org.apache.spark.sql.catalyst.catalog.HiveTableRelation") || c.getClass.getName.equals( "org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation")) => -val catalogTable = +var catalogTable = CarbonReflectionUtils.getFieldOfCatalogTable("tableMeta", c).asInstanceOf[CatalogTable] +// remove spatial column from schema Review comment: Here, catalogTable will have spatial column in schema which is used to build carbon table. As spatial column is not supposed to be present in user-defined columns, removing it here. Later from tableproperties the column will be added in carbonTable. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3773: [CARBONDATA-3830]Presto complex columns read support
ajantha-bhat commented on a change in pull request #3773: URL: https://github.com/apache/carbondata/pull/3773#discussion_r461597459 ## File path: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/FillVector.java ## @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.encoding; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.BitSet; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl; +import org.apache.carbondata.core.util.ByteUtil; + +public class FillVector { Review comment: check if it can be a static class ## File path: core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/FillVector.java ## @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.page.encoding; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.BitSet; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl; +import org.apache.carbondata.core.util.ByteUtil; + +public class FillVector { + private byte[] pageData; + private float floatFactor = 0; + private double factor = 0; + private ColumnVectorInfo vectorInfo; + private BitSet nullBits; + + public FillVector(byte[] pageData, ColumnVectorInfo vectorInfo, BitSet nullBits) { +this.pageData = pageData; +this.vectorInfo = vectorInfo; +this.nullBits = nullBits; + } + + public void setFactor(double factor) { +this.factor = factor; + } + + public void setFloatFactor(float floatFactor) { +this.floatFactor = floatFactor; + } + + public void basedOnType(CarbonColumnVector vector, DataType vectorDataType, int pageSize, + DataType pageDataType) { +if (vectorInfo.vector.getColumnVector() != null && ((CarbonColumnVectorImpl) vectorInfo.vector +.getColumnVector()).isComplex()) { + fillComplexType(vector.getColumnVector(), pageDataType); +} else { + fillPrimitiveType(vector, vectorDataType, pageSize, pageDataType); + vector.setIndex(0); +} + } + + private void fillComplexType(CarbonColumnVector vector, DataType pageDataType) { +CarbonColumnVectorImpl vectorImpl = (CarbonColumnVectorImpl) vector; +if (vector != null && vector.getChildrenVector() != null) { + ArrayList childElements = ((CarbonColumnVectorImpl) vector).getChildrenElements(); + for (int i = 0; i < childElements.size(); i++) { +int count = childElements.get(i); +typeComplexObject(vectorImpl.getChildrenVector(), count, pageDataType); +vector.putArrayObject(); + } +} + } + + private void fillPri