[GitHub] [carbondata] akashrn5 commented on a change in pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
akashrn5 commented on a change in pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#discussion_r480909454 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala ## @@ -624,9 +625,7 @@ class TableNewProcessor(cm: TableModel) { if (isVarcharColumn(colName)) { columnSchema.setDataType(DataTypes.VARCHAR) } -// TODO: Need to fill RowGroupID, converted type Review comment: is this `TODO` is completed/not required? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3789: [CARBONDATA-3864] Store Size Optimization
CarbonDataQA1 commented on pull request #3789: URL: https://github.com/apache/carbondata/pull/3789#issuecomment-684508344 Build Failed with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2200/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3789: [CARBONDATA-3864] Store Size Optimization
CarbonDataQA1 commented on pull request #3789: URL: https://github.com/apache/carbondata/pull/3789#issuecomment-684508598 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3940/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on a change in pull request #3778: [CARBONDATA-3916] Support array complex type with SI
kunal642 commented on a change in pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#discussion_r480920128 ## File path: integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java ## @@ -243,46 +258,115 @@ private void processResult(List> detailQueryResultItera /** * This method will prepare the data from raw object that will take part in sorting */ - private Object[] prepareRowObjectForSorting(Object[] row) { + private Object[] prepareRowObjectForSorting(Object[] row, + Map complexDimensionInfoMap, int[] complexColumnParentBlockIndexes) + throws SecondaryIndexException { ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0]; -// ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount]; +byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray(); List dimensions = segmentProperties.getDimensions(); Object[] preparedRow = new Object[dimensions.size() + measureCount]; +Map complexDataMap = new HashMap<>(); int noDictionaryIndex = 0; int dictionaryIndex = 0; +int complexIndex = 0; int i = 0; // loop excluding last dimension as last one is implicit column. for (; i < dimensions.size() - 1; i++) { CarbonDimension dims = dimensions.get(i); - if (dims.hasEncoding(Encoding.DICTIONARY)) { + boolean isComplexColumn = false; + // check if dimension is a complex data type + if (!complexDimensionInfoMap.isEmpty() && complexColumnParentBlockIndexes.length > 0) { Review comment: Can we use dim.isComplex to do the same check? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on a change in pull request #3778: [CARBONDATA-3916] Support array complex type with SI
kunal642 commented on a change in pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#discussion_r480922354 ## File path: integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java ## @@ -243,46 +258,115 @@ private void processResult(List> detailQueryResultItera /** * This method will prepare the data from raw object that will take part in sorting */ - private Object[] prepareRowObjectForSorting(Object[] row) { + private Object[] prepareRowObjectForSorting(Object[] row, + Map complexDimensionInfoMap, int[] complexColumnParentBlockIndexes) + throws SecondaryIndexException { ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0]; -// ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount]; +byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray(); List dimensions = segmentProperties.getDimensions(); Object[] preparedRow = new Object[dimensions.size() + measureCount]; +Map complexDataMap = new HashMap<>(); int noDictionaryIndex = 0; int dictionaryIndex = 0; +int complexIndex = 0; int i = 0; // loop excluding last dimension as last one is implicit column. for (; i < dimensions.size() - 1; i++) { CarbonDimension dims = dimensions.get(i); - if (dims.hasEncoding(Encoding.DICTIONARY)) { + boolean isComplexColumn = false; + // check if dimension is a complex data type + if (!complexDimensionInfoMap.isEmpty() && complexColumnParentBlockIndexes.length > 0) { +for (GenericQueryType queryType : complexDimensionInfoMap.values()) { + if (queryType.getName().equalsIgnoreCase(dims.getColName())) { +isComplexColumn = true; +break; + } +} + } + if (dims.hasEncoding(Encoding.DICTIONARY) && !isComplexColumn) { // dictionary preparedRow[i] = wrapper.getDictionaryKeyByIndex(dictionaryIndex++); } else { -// no dictionary dims -byte[] noDictionaryKeyByIndex = wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++); -// no dictionary primitive columns are expected to be in original data while loading, -// so convert it to original data -if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) { - Object dataFromBytes = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn( - noDictionaryKeyByIndex, dims.getDataType()); - if (null != dataFromBytes && dims.getDataType() == DataTypes.TIMESTAMP) { -dataFromBytes = (long) dataFromBytes / 1000L; +if (isComplexColumn) { + byte[] complexKeyByIndex = wrapper.getComplexKeyByIndex(complexIndex); + ByteBuffer byteArrayInput = ByteBuffer.wrap(complexKeyByIndex); + GenericQueryType genericQueryType = + complexDimensionInfoMap.get(complexColumnParentBlockIndexes[complexIndex++]); + int complexDataLength = byteArrayInput.getShort(2); + // In case, if array is empty + if (complexDataLength == 0) { +complexDataLength = complexDataLength + 1; + } + // get flattened array data + Object[] complexFlattenedData = new Object[complexDataLength]; + Object[] data = genericQueryType.getObjectArrayDataBasedOnDataType(byteArrayInput); + for (int index = 0; index < complexDataLength; index++) { +complexFlattenedData[index] = +getData(data, index, dims.getColumnSchema().getDataType()); } - preparedRow[i] = dataFromBytes; + complexDataMap.put(i, complexFlattenedData); } else { - preparedRow[i] = noDictionaryKeyByIndex; + // no dictionary dims + byte[] noDictionaryKeyByIndex = wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++); + // no dictionary primitive columns are expected to be in original data while loading, + // so convert it to original data + if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) { +Object dataFromBytes = DataTypeUtil + .getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeyByIndex, +dims.getDataType()); +if (null != dataFromBytes && dims.getDataType() == DataTypes.TIMESTAMP) { + dataFromBytes = (long) dataFromBytes / 1000L; +} +preparedRow[i] = dataFromBytes; + } else { +preparedRow[i] = noDictionaryKeyByIndex; + } } } } // at last add implicit column position reference(PID) +preparedRow[i] = implicitColumnByteArray; -preparedRow[i] = wrapper.getImplicitColumnByteArray(); +// In case of complex array type, flatten the data and add for sorting +// TODO: Handle for nested array and other complex types Review comment: Please mention in some doc
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3819: [CARBONDATA-3855]support carbon SDK to load data from different files
CarbonDataQA1 commented on pull request #3819: URL: https://github.com/apache/carbondata/pull/3819#issuecomment-684545249 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2201/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3819: [CARBONDATA-3855]support carbon SDK to load data from different files
CarbonDataQA1 commented on pull request #3819: URL: https://github.com/apache/carbondata/pull/3819#issuecomment-684545809 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3941/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #3778: [CARBONDATA-3916] Support array complex type with SI
Indhumathi27 commented on a change in pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#discussion_r480960162 ## File path: integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java ## @@ -243,46 +258,115 @@ private void processResult(List> detailQueryResultItera /** * This method will prepare the data from raw object that will take part in sorting */ - private Object[] prepareRowObjectForSorting(Object[] row) { + private Object[] prepareRowObjectForSorting(Object[] row, + Map complexDimensionInfoMap, int[] complexColumnParentBlockIndexes) + throws SecondaryIndexException { ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0]; -// ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount]; +byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray(); List dimensions = segmentProperties.getDimensions(); Object[] preparedRow = new Object[dimensions.size() + measureCount]; +Map complexDataMap = new HashMap<>(); int noDictionaryIndex = 0; int dictionaryIndex = 0; +int complexIndex = 0; int i = 0; // loop excluding last dimension as last one is implicit column. for (; i < dimensions.size() - 1; i++) { CarbonDimension dims = dimensions.get(i); - if (dims.hasEncoding(Encoding.DICTIONARY)) { + boolean isComplexColumn = false; + // check if dimension is a complex data type + if (!complexDimensionInfoMap.isEmpty() && complexColumnParentBlockIndexes.length > 0) { +for (GenericQueryType queryType : complexDimensionInfoMap.values()) { + if (queryType.getName().equalsIgnoreCase(dims.getColName())) { +isComplexColumn = true; +break; + } +} + } + if (dims.hasEncoding(Encoding.DICTIONARY) && !isComplexColumn) { // dictionary preparedRow[i] = wrapper.getDictionaryKeyByIndex(dictionaryIndex++); } else { -// no dictionary dims -byte[] noDictionaryKeyByIndex = wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++); -// no dictionary primitive columns are expected to be in original data while loading, -// so convert it to original data -if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) { - Object dataFromBytes = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn( - noDictionaryKeyByIndex, dims.getDataType()); - if (null != dataFromBytes && dims.getDataType() == DataTypes.TIMESTAMP) { -dataFromBytes = (long) dataFromBytes / 1000L; +if (isComplexColumn) { + byte[] complexKeyByIndex = wrapper.getComplexKeyByIndex(complexIndex); + ByteBuffer byteArrayInput = ByteBuffer.wrap(complexKeyByIndex); + GenericQueryType genericQueryType = + complexDimensionInfoMap.get(complexColumnParentBlockIndexes[complexIndex++]); + int complexDataLength = byteArrayInput.getShort(2); + // In case, if array is empty + if (complexDataLength == 0) { +complexDataLength = complexDataLength + 1; + } + // get flattened array data + Object[] complexFlattenedData = new Object[complexDataLength]; + Object[] data = genericQueryType.getObjectArrayDataBasedOnDataType(byteArrayInput); + for (int index = 0; index < complexDataLength; index++) { +complexFlattenedData[index] = +getData(data, index, dims.getColumnSchema().getDataType()); } - preparedRow[i] = dataFromBytes; + complexDataMap.put(i, complexFlattenedData); } else { - preparedRow[i] = noDictionaryKeyByIndex; + // no dictionary dims + byte[] noDictionaryKeyByIndex = wrapper.getNoDictionaryKeyByIndex(noDictionaryIndex++); + // no dictionary primitive columns are expected to be in original data while loading, + // so convert it to original data + if (DataTypeUtil.isPrimitiveColumn(dims.getDataType())) { +Object dataFromBytes = DataTypeUtil + .getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeyByIndex, +dims.getDataType()); +if (null != dataFromBytes && dims.getDataType() == DataTypes.TIMESTAMP) { + dataFromBytes = (long) dataFromBytes / 1000L; +} +preparedRow[i] = dataFromBytes; + } else { +preparedRow[i] = noDictionaryKeyByIndex; + } } } } // at last add implicit column position reference(PID) +preparedRow[i] = implicitColumnByteArray; -preparedRow[i] = wrapper.getImplicitColumnByteArray(); +// In case of complex array type, flatten the data and add for sorting +// TODO: Handle for nested array and other complex types Review comment: added
[GitHub] [carbondata] QiangCai commented on a change in pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
QiangCai commented on a change in pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#discussion_r480958551 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 + * + * OrOr + * Or Or =>OrOr + * col3 col1 col2 col1 col1 col1col2 col3 + * + * **Similar expression filters are reordered globally** + * + * @param filter the filter expression to be reordered + * @return The reordered filter with the current ordinal + */ + def reorderFilter(filter: Filter, table: CarbonTable): (Filter, Int) = { +val filterMap = mutable.HashMap[String, List[(Filter, Int)]]() + def sortFilter(filter: Filter): (Filter, Int) = { +filter match { + case sources.And(left, right) => +filterMap.getOrElseUpdate("AND", List()) +if (left.references.toSeq == right.references.toSeq || +right.references.diff(left.references).length == 0) { + val sorted = sortFilter(left) Review comment: please add a comment to explain the reason why it only sort left side ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 + * + * OrOr + * Or Or =>OrOr + * col3 col1 col2 col1 col1 col1col2 col3 + * + * **Similar expression filters are reordered gl
[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #3778: [CARBONDATA-3916] Support array complex type with SI
Indhumathi27 commented on a change in pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#discussion_r480961113 ## File path: integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java ## @@ -243,46 +258,115 @@ private void processResult(List> detailQueryResultItera /** * This method will prepare the data from raw object that will take part in sorting */ - private Object[] prepareRowObjectForSorting(Object[] row) { + private Object[] prepareRowObjectForSorting(Object[] row, + Map complexDimensionInfoMap, int[] complexColumnParentBlockIndexes) + throws SecondaryIndexException { ByteArrayWrapper wrapper = (ByteArrayWrapper) row[0]; -// ByteBuffer[] noDictionaryBuffer = new ByteBuffer[noDictionaryCount]; +byte[] implicitColumnByteArray = wrapper.getImplicitColumnByteArray(); List dimensions = segmentProperties.getDimensions(); Object[] preparedRow = new Object[dimensions.size() + measureCount]; +Map complexDataMap = new HashMap<>(); int noDictionaryIndex = 0; int dictionaryIndex = 0; +int complexIndex = 0; int i = 0; // loop excluding last dimension as last one is implicit column. for (; i < dimensions.size() - 1; i++) { CarbonDimension dims = dimensions.get(i); - if (dims.hasEncoding(Encoding.DICTIONARY)) { + boolean isComplexColumn = false; + // check if dimension is a complex data type + if (!complexDimensionInfoMap.isEmpty() && complexColumnParentBlockIndexes.length > 0) { Review comment: dim is actually the index table dimension, where complex type of main table is stored as its primitive type in SI. Hence, we cannot check isComplex from dimension. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] VenuReddy2103 commented on pull request #3819: [CARBONDATA-3855]support carbon SDK to load data from different files
VenuReddy2103 commented on pull request #3819: URL: https://github.com/apache/carbondata/pull/3819#issuecomment-684567252 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] QiangCai commented on pull request #3901: [CARBONDATA-3820] CDC update as new segment should use target table sort_scope
QiangCai commented on pull request #3901: URL: https://github.com/apache/carbondata/pull/3901#issuecomment-684628244 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] asfgit closed pull request #3901: [CARBONDATA-3820] CDC update as new segment should use target table sort_scope
asfgit closed pull request #3901: URL: https://github.com/apache/carbondata/pull/3901 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
ajantha-bhat commented on a change in pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#discussion_r481013721 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Unit = { +// 1. check all the filter columns present in SI +val originalFilterAttributes = filter.condition collect { + case attr: AttributeReference => +attr.name.toLowerCase +} +val filterAttributes = filter.condition collect { + case attr: AttributeReference => attr.name.toLowerCase +} +val indexTableRelation = MatchIndexableRelation.unapply(filter.child).get +val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables( + filterAttributes.toSet.asJava, + CarbonIndexUtil.getSecondaryIndexes(indexTableRelation).mapValues(_.toList.asJava).asJava) + .asScala +val databaseName = filter.child.asInstanceOf[LogicalRelation].relation + .asInstanceOf[CarbonDatasourceHadoopRelation].carbonRelation.databaseName +// filter out all the index tables which are disabled +val enabledMatchingIndexTables = matchingIndexTables + .filter(table => { +sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(table, +Some(databaseName))).storage + .properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + }) +// 2. check if only one SI matches for the filter columns +if (enabledMatchingIndexTables.nonEmpty && enabledMatchingIndexTables.size == 1 && +filterAttributes.intersect(originalFilterAttributes).size == +originalFilterAttributes.size) { + // 3. check if all the sort columns is in SI + val sortColumns = sort +.order +.map(_.child.asInstanceOf[AttributeReference].name.toLowerCase()) +.toSet + val indexCarbonTable = CarbonEnv +.getCarbonTable(Some(databaseName), enabledMatchingIndexTables.head)(sparkSession) Review comment: indexTableRelation.carbonTable will have main table. Hence this code. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
ajantha-bhat commented on a change in pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#discussion_r481024238 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Unit = { +// 1. check all the filter columns present in SI +val originalFilterAttributes = filter.condition collect { + case attr: AttributeReference => +attr.name.toLowerCase +} +val filterAttributes = filter.condition collect { + case attr: AttributeReference => attr.name.toLowerCase +} +val indexTableRelation = MatchIndexableRelation.unapply(filter.child).get +val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables( + filterAttributes.toSet.asJava, + CarbonIndexUtil.getSecondaryIndexes(indexTableRelation).mapValues(_.toList.asJava).asJava) + .asScala +val databaseName = filter.child.asInstanceOf[LogicalRelation].relation + .asInstanceOf[CarbonDatasourceHadoopRelation].carbonRelation.databaseName +// filter out all the index tables which are disabled +val enabledMatchingIndexTables = matchingIndexTables + .filter(table => { +sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(table, +Some(databaseName))).storage + .properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + }) +// 2. check if only one SI matches for the filter columns +if (enabledMatchingIndexTables.nonEmpty && enabledMatchingIndexTables.size == 1 && +filterAttributes.intersect(originalFilterAttributes).size == +originalFilterAttributes.size) { + // 3. check if all the sort columns is in SI + val sortColumns = sort +.order +.map(_.child.asInstanceOf[AttributeReference].name.toLowerCase()) +.toSet + val indexCarbonTable = CarbonEnv +.getCarbonTable(Some(databaseName), enabledMatchingIndexTables.head)(sparkSession) Review comment: This part of code is referred from the existing code. Let me rename it here This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3778: [CARBONDATA-3916] Support array complex type with SI
CarbonDataQA1 commented on pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#issuecomment-684727237 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3942/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Karan980 commented on pull request #3876: TestingCI
Karan980 commented on pull request #3876: URL: https://github.com/apache/carbondata/pull/3876#issuecomment-684736007 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3778: [CARBONDATA-3916] Support array complex type with SI
CarbonDataQA1 commented on pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#issuecomment-684734917 Build Failed with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2202/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Indhumathi27 commented on pull request #3778: [CARBONDATA-3916] Support array complex type with SI
Indhumathi27 commented on pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#issuecomment-684738151 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
akashrn5 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-684773205 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3893: [CARBONDATA-3959] Added new property to set the value of executor LRU cache size to 70% of the total executor memory in IndexServer
CarbonDataQA1 commented on pull request #3893: URL: https://github.com/apache/carbondata/pull/3893#issuecomment-684773131 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3944/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3893: [CARBONDATA-3959] Added new property to set the value of executor LRU cache size to 70% of the total executor memory in IndexServer
CarbonDataQA1 commented on pull request #3893: URL: https://github.com/apache/carbondata/pull/3893#issuecomment-684774434 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2204/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on pull request #3873: [CARBONDATA-3956] Reindex command on SI table
kunal642 commented on pull request #3873: URL: https://github.com/apache/carbondata/pull/3873#issuecomment-684781373 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3873: [CARBONDATA-3956] Reindex command on SI table
akashrn5 commented on pull request #3873: URL: https://github.com/apache/carbondata/pull/3873#issuecomment-684785341 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] asfgit closed pull request #3873: [CARBONDATA-3956] Reindex command on SI table
asfgit closed pull request #3873: URL: https://github.com/apache/carbondata/pull/3873 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[jira] [Resolved] (CARBONDATA-3956) Implementing a new Reindex command to repair the missing SI Segments
[ https://issues.apache.org/jira/browse/CARBONDATA-3956?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Kunal Kapoor resolved CARBONDATA-3956. -- Fix Version/s: 2.1.0 Resolution: Fixed > Implementing a new Reindex command to repair the missing SI Segments > > > Key: CARBONDATA-3956 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3956 > Project: CarbonData > Issue Type: New Feature >Reporter: Vikram Ahuja >Priority: Minor > Fix For: 2.1.0 > > Time Spent: 9h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3876: TestingCI
CarbonDataQA1 commented on pull request #3876: URL: https://github.com/apache/carbondata/pull/3876#issuecomment-684810505 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2205/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3876: TestingCI
CarbonDataQA1 commented on pull request #3876: URL: https://github.com/apache/carbondata/pull/3876#issuecomment-684811094 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3945/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Indhumathi27 opened a new pull request #3910: [WIP] Fix Deserialization issue with DataType class
Indhumathi27 opened a new pull request #3910: URL: https://github.com/apache/carbondata/pull/3910 ### Why is this PR needed? When DataType(For eg, StringType) object is serialized and deserialized using Gson, DataType object after deserialization, gets changed to Main class (DataType) instance instead of child type(StringType in this example). Because of this, presto query on table with complex columns fails with NPE, because while filing DimensionAndMeasureDetails(SegmentProperties.fillDimensionAndMeasureDetails) from CarbonLocalInputSplit.getDetailInfo, list of child dimensions will be left unfilled for Parent Carbon Dimension. ### What changes were proposed in this PR? 1. Override JsonDeserializer method for DataType and deserialize the jsonObject based on its DataType child class instance. 2. Add registerTypeAdapter for DataType class to gsonObject, while deserializing carbonLocalInputSplit.detailInfo. ### Does this PR introduce any user interface change? - No ### Is any new testcase added? - Yes This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] vikramahuja1001 commented on a change in pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILo
vikramahuja1001 commented on a change in pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#discussion_r481107503 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/events/SILoadEventListenerForFailedSegments.scala ## @@ -57,189 +58,201 @@ class SILoadEventListenerForFailedSegments extends OperationEventListener with L event match { case postStatusUpdateEvent: LoadTablePostStatusUpdateEvent => LOGGER.info("Load post status update event-listener called") -val loadTablePostStatusUpdateEvent = event.asInstanceOf[LoadTablePostStatusUpdateEvent] -val carbonLoadModel = loadTablePostStatusUpdateEvent.getCarbonLoadModel -val sparkSession = SparkSession.getActiveSession.get -// when Si creation and load to main table are parallel, get the carbonTable from the -// metastore which will have the latest index Info -val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetaStore -val carbonTable = metaStore - .lookupRelation(Some(carbonLoadModel.getDatabaseName), - carbonLoadModel.getTableName)(sparkSession).asInstanceOf[CarbonRelation].carbonTable -val indexMetadata = carbonTable.getIndexMetadata -val secondaryIndexProvider = IndexType.SI.getIndexProviderName -if (null != indexMetadata && null != indexMetadata.getIndexesMap && +if (CarbonProperties.getInstance().isSIRepairEnabled) { + val loadTablePostStatusUpdateEvent = event.asInstanceOf[LoadTablePostStatusUpdateEvent] + val carbonLoadModel = loadTablePostStatusUpdateEvent.getCarbonLoadModel + val sparkSession = SparkSession.getActiveSession.get + // when Si creation and load to main table are parallel, get the carbonTable from the + // metastore which will have the latest index Info + val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetaStore + val carbonTable = metaStore +.lookupRelation(Some(carbonLoadModel.getDatabaseName), + carbonLoadModel.getTableName)(sparkSession).asInstanceOf[CarbonRelation].carbonTable + val indexMetadata = carbonTable.getIndexMetadata + val secondaryIndexProvider = IndexType.SI.getIndexProviderName + if (null != indexMetadata && null != indexMetadata.getIndexesMap && null != indexMetadata.getIndexesMap.get(secondaryIndexProvider)) { - val indexTables = indexMetadata.getIndexesMap -.get(secondaryIndexProvider).keySet().asScala - // if there are no index tables for a given fact table do not perform any action - if (indexTables.nonEmpty) { -val mainTableDetails = - SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) -indexTables.foreach { - indexTableName => -val isLoadSIForFailedSegments = sparkSession.sessionState.catalog - .getTableMetadata(TableIdentifier(indexTableName, -Some(carbonLoadModel.getDatabaseName))).storage.properties - .getOrElse("isSITableEnabled", "true").toBoolean -val indexTable = metaStore - .lookupRelation(Some(carbonLoadModel.getDatabaseName), indexTableName)( -sparkSession) - .asInstanceOf[CarbonRelation] - .carbonTable +val indexTables = indexMetadata.getIndexesMap + .get(secondaryIndexProvider).keySet().asScala +// if there are no index tables for a given fact table do not perform any action +if (indexTables.nonEmpty) { + val mainTableDetails = + SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) + indexTables.foreach { +indexTableName => + val isLoadSIForFailedSegments = sparkSession.sessionState.catalog +.getTableMetadata(TableIdentifier(indexTableName, + Some(carbonLoadModel.getDatabaseName))).storage.properties +.getOrElse("isSITableEnabled", "true").toBoolean + val indexTable = metaStore +.lookupRelation(Some(carbonLoadModel.getDatabaseName), indexTableName)( + sparkSession) +.asInstanceOf[CarbonRelation] +.carbonTable -val mainTblLoadMetadataDetails: Array[LoadMetadataDetails] = - SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) -val siTblLoadMetadataDetails: Array[LoadMetadataDetails] = - SegmentStatusManager.readLoadMetadata(indexTable.getMetadataPath) -var segmentLocks: ListBuffer[ICarbonLock] = ListBuffer.empty -if (!isLoadSIForFailedSegments + val mainTblLoadMetadata
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
CarbonDataQA1 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-684838466 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2207/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
CarbonDataQA1 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-684845760 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3947/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3778: [CARBONDATA-3916] Support array complex type with SI
CarbonDataQA1 commented on pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#issuecomment-684847720 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3948/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
kunal642 commented on a change in pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#discussion_r481168720 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +926,42 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Boolean = { +val filterAttributes = filter.condition collect { + case attr: AttributeReference => attr.name.toLowerCase +} +val parentTableRelation = MatchIndexableRelation.unapply(filter.child).get +val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables( + filterAttributes.toSet.asJava, + CarbonIndexUtil.getSecondaryIndexes(parentTableRelation).mapValues(_.toList.asJava).asJava) + .asScala +val databaseName = parentTableRelation.carbonRelation.databaseName +// filter out all the index tables which are disabled +val enabledMatchingIndexTables = matchingIndexTables + .filter(table => { +sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(table, +Some(databaseName))).storage + .properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + }) +// 1. check if only one SI matches for the filter columns +if (enabledMatchingIndexTables.nonEmpty && enabledMatchingIndexTables.size == 1) { + // 2. check if all the sort columns is in SI + val sortColumns = sort +.order +.map(_.child.asInstanceOf[AttributeReference].name.toLowerCase()) +.toSet + val indexCarbonTable = CarbonEnv +.getCarbonTable(Some(databaseName), enabledMatchingIndexTables.head)(sparkSession) + if (sortColumns.forall { x => indexCarbonTable.getColumnByName(x) != null }) { Review comment: directly return sortColumns.forall { x => indexCarbonTable.getColumnByName(x) != null } This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
kunal642 commented on a change in pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#discussion_r481170533 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -684,21 +730,71 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { .getBoolean("spark.carbon.pushdown.join.as.filter", defaultValue = true) val transformChild = false var addProjection = needProjection +// to store the sort node per query +var sortNodeForPushDown: Sort = null +// to store the limit literal per query +var limitLiteral: Literal = null +// by default do not push down notNull filter, +// but for orderby limit push down, push down notNull filter also. Else we get wrong results. +var pushDownNotNullFilter: Boolean = false val transformedPlan = transformPlan(plan, { - case union@Union(children) => + case union@Union(_) => // In case of Union, Extra Project has to be added to the Plan. Because if left table is // pushed to SI and right table is not pushed, then Output Attribute mismatch will happen addProjection = true (union, true) - case sort@Sort(order, global, plan) => + case sort@Sort(_, _, _) => addProjection = true (sort, true) - case filter@Filter(condition, logicalRelation@MatchIndexableRelation(indexableRelation)) + case limit@Limit(literal: Literal, sort@Sort(_, _, child)) => +child match { + case filter: Filter => +if (checkIfPushDownOrderByLimitAndNotNullFilter(literal, sort, filter)) { + sortNodeForPushDown = sort + limitLiteral = literal + pushDownNotNullFilter = true +} + case p: Project if (p.child.isInstanceOf[Filter]) => +if (checkIfPushDownOrderByLimitAndNotNullFilter(literal, + sort, + p.child.asInstanceOf[Filter])) { + sortNodeForPushDown = sort + limitLiteral = literal + pushDownNotNullFilter = true +} + case _ => +} +(limit, transformChild) + case limit@Limit(literal: Literal, _@Project(_, child)) if child.isInstanceOf[Sort] => Review comment: if you use the following then you will not have to check for isInstanceOf of cast the child to Sort. `case limit@Limit(literal: Literal, _@Project(_, Sort(_, _)))` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
ajantha-bhat commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481175661 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: 1) If segment object exist and segment fileName is not null, then that means segment was there. If somebody deletes the segment file manually. Then better to throw an exception instead of initializing to 0 ? 2) when the segment.getLoadMetadataDetails() will be empty ? 3) now in positive case also we check file exist ? in object storage file exist check can be costly. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3910: [WIP] Fix Deserialization issue with DataType class
CarbonDataQA1 commented on pull request #3910: URL: https://github.com/apache/carbondata/pull/3910#issuecomment-684895757 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2209/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3910: [WIP] Fix Deserialization issue with DataType class
CarbonDataQA1 commented on pull request #3910: URL: https://github.com/apache/carbondata/pull/3910#issuecomment-684896877 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3949/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListene
CarbonDataQA1 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-684900945 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3950/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListene
CarbonDataQA1 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-684902776 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2210/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
ajantha-bhat commented on a change in pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#discussion_r481146873 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Unit = { +// 1. check all the filter columns present in SI +val originalFilterAttributes = filter.condition collect { + case attr: AttributeReference => +attr.name.toLowerCase +} +val filterAttributes = filter.condition collect { Review comment: Agree. it was overlooked I guess. we cannot compare here. I moved this comparison in `createIndexFilterDataFrame` where I decide `needPushDown` ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Unit = { +// 1. check all the filter columns present in SI +val originalFilterAttributes = filter.condition collect { + case attr: AttributeReference => +attr.name.toLowerCase +} +val filterAttributes = filter.condition collect { + case attr: AttributeReference => attr.name.toLowerCase +} +val indexTableRelation = MatchIndexableRelation.unapply(filter.child).get +val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables( + filterAttributes.toSet.asJava, + CarbonIndexUtil.getSecondaryIndexes(indexTableRelation).mapValues(_.toList.asJava).asJava) + .asScala +val databaseName = filter.child.asInstanceOf[LogicalRelation].relation Review comment: done ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Unit = { +// 1. check all the filter columns present in SI +val originalFilterAttributes = filter.condition collect { + case attr: AttributeReference => +attr.name.toLowerCase +} +val filterAttributes = filter.condition collect { + case attr: AttributeReference => attr.name.toLowerCase +} +val indexTableRelation = MatchIndexableRelation.unapply(filter.child).get +val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables( + filterAttributes.toSet.asJava, + CarbonIndexUtil.getSecondaryIndexes(indexTableRelation).mapValues(_.toList.asJava).asJava) + .asScala +val databaseName = filter.child.asInstanceOf[LogicalRelation].relation + .asInstanceOf[CarbonDatasourceHadoopRelation].carbonRelation.databaseName +// filter out all the index tables which are disabled +val enabledMatchingIndexTables = matchingIndexTables + .filter(table => { +sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(table, +Some(databaseName))).storage + .properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + }) +// 2. check if only one SI matches for the filter columns +if (enabledMatchingIndexTables.nonEmpty && enabledMatchingIndexTables.size == 1 && +filterAttributes.intersect(originalFilterAttributes).size == +originalFilterAttributes.size) { + // 3. check if all the sort columns is in SI + val sortColumns = sort +.order +.map(_.child.asInstanceOf[AttributeReference].name.toLowerCase()) +.toSet + val indexCarbonTable = CarbonEnv +.getCarbonTable(Some(databaseName), enabledMatchingIndexTables.head)(sparkSession) + var allColumnsFound = true Review comment: done ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -58,6 +58,16 @@ object NodeType extends Enumeration { */ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { + // to store the sort node per query + var sortNodeForPushDown: Sort = _ + + // to store the limit literal per query + var limitLiteral : Literal = _ + + // by default do not push down notNull filter, + // but for orderby limit push down, push down notNull filter also. Else we get wrong results. + var pushDownNotNullFilter : Boolean = _ Review comment: because too many functions need to change to pass arguments. I used default arguments and changed
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
ajantha-bhat commented on a change in pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#discussion_r481197993 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -684,21 +730,71 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { .getBoolean("spark.carbon.pushdown.join.as.filter", defaultValue = true) val transformChild = false var addProjection = needProjection +// to store the sort node per query +var sortNodeForPushDown: Sort = null +// to store the limit literal per query +var limitLiteral: Literal = null +// by default do not push down notNull filter, +// but for orderby limit push down, push down notNull filter also. Else we get wrong results. +var pushDownNotNullFilter: Boolean = false val transformedPlan = transformPlan(plan, { - case union@Union(children) => + case union@Union(_) => // In case of Union, Extra Project has to be added to the Plan. Because if left table is // pushed to SI and right table is not pushed, then Output Attribute mismatch will happen addProjection = true (union, true) - case sort@Sort(order, global, plan) => + case sort@Sort(_, _, _) => addProjection = true (sort, true) - case filter@Filter(condition, logicalRelation@MatchIndexableRelation(indexableRelation)) + case limit@Limit(literal: Literal, sort@Sort(_, _, child)) => +child match { + case filter: Filter => +if (checkIfPushDownOrderByLimitAndNotNullFilter(literal, sort, filter)) { + sortNodeForPushDown = sort + limitLiteral = literal + pushDownNotNullFilter = true +} + case p: Project if (p.child.isInstanceOf[Filter]) => +if (checkIfPushDownOrderByLimitAndNotNullFilter(literal, + sort, + p.child.asInstanceOf[Filter])) { + sortNodeForPushDown = sort + limitLiteral = literal + pushDownNotNullFilter = true +} + case _ => +} +(limit, transformChild) + case limit@Limit(literal: Literal, _@Project(_, child)) if child.isInstanceOf[Sort] => Review comment: done ## File path: integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala ## @@ -824,6 +926,42 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) { } } + private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, sort: Sort, + filter: Filter): Boolean = { +val filterAttributes = filter.condition collect { + case attr: AttributeReference => attr.name.toLowerCase +} +val parentTableRelation = MatchIndexableRelation.unapply(filter.child).get +val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables( + filterAttributes.toSet.asJava, + CarbonIndexUtil.getSecondaryIndexes(parentTableRelation).mapValues(_.toList.asJava).asJava) + .asScala +val databaseName = parentTableRelation.carbonRelation.databaseName +// filter out all the index tables which are disabled +val enabledMatchingIndexTables = matchingIndexTables + .filter(table => { +sparkSession.sessionState.catalog + .getTableMetadata(TableIdentifier(table, +Some(databaseName))).storage + .properties + .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true") + }) +// 1. check if only one SI matches for the filter columns +if (enabledMatchingIndexTables.nonEmpty && enabledMatchingIndexTables.size == 1) { + // 2. check if all the sort columns is in SI + val sortColumns = sort +.order +.map(_.child.asInstanceOf[AttributeReference].name.toLowerCase()) +.toSet + val indexCarbonTable = CarbonEnv +.getCarbonTable(Some(databaseName), enabledMatchingIndexTables.head)(sparkSession) + if (sortColumns.forall { x => indexCarbonTable.getColumnByName(x) != null }) { Review comment: yeah. done This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481198186 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: 1. even though when the segment object exists, if concurrently operations are happening like like or compaction, you can consider SI also, then we will modify the segment file, which is basically overwrite, then that corner case it will fail, so in that case, above scenario will happen, so no need to throw exception. 2. can refer PR #3814 3. we cannot differentiate between positive case and negative case, in order to query or the operation to succeed, we should take these steps, which we follow all places in carbon. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
ajantha-bhat commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481221931 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: > can refer PR #3814 Seems that PR is referring from load meta details, it doesn't mention when it is null > we cannot differentiate between positive case and negative case Agree, but instead of adding file exist check , may acquire segment lock (to make sure segment file there) or make sure load metadetails is filled always so it wont have to check file exist. Else many places we have to add file exist check where and all we are accessing the segment This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
ajantha-bhat commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481221931 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: > can refer PR #3814 Seems that PR is referring from load meta details, it doesn't mention when it is null > we cannot differentiate between positive case and negative case Agree, but instead of adding file exist check , may acquire segment lock (to make sure segment file there) or make sure load metadetails is filled always so it wont have to check file exist. Else many places we have to add file exist check where and all we are accessing the segment This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481226211 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: > Seems that PR is referring from load meta details, it doesn't mention when it is null actually it depends on how the segment object is made, since we have many methods to do it, These all things can be avoided when the segment refactoring is done, as already discussions are going on. >may acquire segment lock (to make sure segment file there) actually we have segment lock for actual segments, not the segment file which is metadata, that is the problem, its not like table status file. So during segment refactor we can consider these things into consideration and design it to avoid all these things, But now for existing users it will create, issue so we can consider this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#issuecomment-684934937 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481226211 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: > actually it depends on how the segment object is made, since we have many methods to do it, These all things can be avoided when the segment refactoring is done, as already discussions are going on. > actually we have segment lock for actual segments, not the segment file which is metadata, that is the problem, its not like table status file. So during segment refactor we can consider these things into consideration and design it to avoid all these things, But now for existing users it will create, issue so we can consider this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
akashrn5 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-684935390 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481226211 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: > Seems that PR is referring from load meta details, it doesn't mention when it is null actually it depends on how the segment object is made, since we have many methods to do it, These all things can be avoided when the segment refactoring is done, as already discussions are going on. >may acquire segment lock (to make sure segment file there) actually we have segment lock for actual segments, not the segment file which is metadata, that is the problem, its not like table status file. So during segment refactor we can consider these things into consideration and design it to avoid all these things, But now for existing users it will create issue, so we can consider this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
CarbonDataQA1 commented on pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#issuecomment-684938297 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3954/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#issuecomment-684943246 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3778: [CARBONDATA-3916] Support array complex type with SI
CarbonDataQA1 commented on pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#issuecomment-684963426 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3952/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3778: [CARBONDATA-3916] Support array complex type with SI
CarbonDataQA1 commented on pull request #3778: URL: https://github.com/apache/carbondata/pull/3778#issuecomment-684965465 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2212/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
CarbonDataQA1 commented on pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#issuecomment-684993460 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2213/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
CarbonDataQA1 commented on pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#issuecomment-684994151 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3953/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
CarbonDataQA1 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-685014264 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3955/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
CarbonDataQA1 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-685018314 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2215/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
CarbonDataQA1 commented on pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#issuecomment-685020692 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3956/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
CarbonDataQA1 commented on pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#issuecomment-685022558 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2216/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListenerForF
kunal642 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-685023276 please add the new properties in docs This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] Karan980 commented on pull request #3876: TestingCI
Karan980 commented on pull request #3876: URL: https://github.com/apache/carbondata/pull/3876#issuecomment-685054651 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3876: TestingCI
CarbonDataQA1 commented on pull request #3876: URL: https://github.com/apache/carbondata/pull/3876#issuecomment-685113587 Build Success with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2217/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3876: TestingCI
CarbonDataQA1 commented on pull request #3876: URL: https://github.com/apache/carbondata/pull/3876#issuecomment-685115973 Build Success with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3957/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries
ajantha-bhat commented on pull request #3861: URL: https://github.com/apache/carbondata/pull/3861#issuecomment-685305774 @kunal642 : PR is ready. please check and merge This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
ajantha-bhat commented on a change in pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#discussion_r481694795 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 Review comment: right side after optimization when it becomes (col1 And col1), better to modify filter to just col1 ? ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 + * + * OrOr + * Or Or =>OrOr + * col3 col1 col2 col1 col1 col1col2 col3 + * + * **Similar expression filters are reordered globally** + * + * @param filter the filter expression to be reordered + * @return The reordered filter with the current ordinal + */ + def reorderFilter(filter: Filter, table: CarbonTable): (Filter, Int) = { +val filterMap = mutable.HashMap[String, List[(Filter, Int)]]() + def sortFilter(filter: Filter): (Filter, Int) = { Review comment: Agree with david. I too felt it is not easy to read ## File path: integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala ## @@ -72,7 +72,9 @@ case class CarbonDatasourceHadoopRelation( projects: Seq[NamedExpression], filters: Array[Filter], partitions: Seq[PartitionSpec]): RDD[InternalRow] = { -val filterExpression: Option[Expression] = filters.flatMap { filter => +val reorderedFilter = filters.map(CarbonFilters.reorde
[GitHub] [carbondata] vikramahuja1001 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListe
vikramahuja1001 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-685308575 Added new properties in docs as well This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
ajantha-bhat commented on pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#issuecomment-685310097 @kunal : Also please add some report in the description on how much was the performance with and without this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat edited a comment on pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
ajantha-bhat edited a comment on pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#issuecomment-685310097 @kunal642 : Also please add some report in the description on how much was the performance with and without this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[jira] [Created] (CARBONDATA-3969) Fix Deserialization issue with DataType class
Indhumathi Muthumurugesh created CARBONDATA-3969: Summary: Fix Deserialization issue with DataType class Key: CARBONDATA-3969 URL: https://issues.apache.org/jira/browse/CARBONDATA-3969 Project: CarbonData Issue Type: Bug Reporter: Indhumathi Muthumurugesh -- This message was sent by Atlassian Jira (v8.3.4#803005)
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListene
CarbonDataQA1 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-685312203 Build Failed with Spark 2.4.5, Please check CI http://121.244.95.60:12545/job/ApacheCarbon_PR_Builder_2.4.5/2218/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] CarbonDataQA1 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListene
CarbonDataQA1 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-685312703 Build Failed with Spark 2.3.4, Please check CI http://121.244.95.60:12545/job/ApacheCarbonPRBuilder2.3/3958/ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on pull request #3894: [CARBONDATA-3957] Added property to enable disable the repair logic and provide a limit to number of segments in SILoadEventListenerForF
kunal642 commented on pull request #3894: URL: https://github.com/apache/carbondata/pull/3894#issuecomment-685313907 @vikramahuja1001 please fix the build This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
ajantha-bhat commented on a change in pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#discussion_r481737995 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. Review comment: I feel if user specify that don't change filter order (because he knows the cardinality) by a carbon property. we should not change the order. Else we can change ! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on a change in pull request #3893: [CARBONDATA-3959] Added new property to set the value of executor LRU cache size to 70% of the total executor memory in Inde
kunal642 commented on a change in pull request #3893: URL: https://github.com/apache/carbondata/pull/3893#discussion_r481745121 ## File path: core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java ## @@ -148,19 +148,39 @@ private void createLRULevelCacheInstance() { carbonLRUCache = new CarbonLRUCache(CarbonCommonConstants.CARBON_MAX_DRIVER_LRU_CACHE_SIZE, CarbonCommonConstants.CARBON_MAX_LRU_CACHE_SIZE_DEFAULT); } else { - // if executor cache size is not configured then driver cache conf will be used String executorCacheSize = CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_MAX_EXECUTOR_LRU_CACHE_SIZE); - if (null != executorCacheSize) { -carbonLRUCache = -new CarbonLRUCache(CarbonCommonConstants.CARBON_MAX_EXECUTOR_LRU_CACHE_SIZE, -CarbonCommonConstants.CARBON_MAX_LRU_CACHE_SIZE_DEFAULT); - } else { -LOGGER.info( -"Executor LRU cache size not configured. Initializing with driver LRU cache size."); -carbonLRUCache = new CarbonLRUCache(CarbonCommonConstants.CARBON_MAX_DRIVER_LRU_CACHE_SIZE, -CarbonCommonConstants.CARBON_MAX_LRU_CACHE_SIZE_DEFAULT); + if (null == executorCacheSize) { +String executorCachePercent = CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_EXECUTOR_LRU_CACHE_PERCENT); +long mSizeMB = Runtime.getRuntime().maxMemory() / (1024 * 1024); +long executorLruCache; +if (null != executorCachePercent) { + int percentValue = Integer.parseInt(executorCachePercent); + if (percentValue >= 5 && percentValue <= 95) { +double mPercent = (double) percentValue / 100; +executorLruCache = (long) (mSizeMB * mPercent); + } + else { Review comment: please fix the indentation This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] kunal642 commented on a change in pull request #3893: [CARBONDATA-3959] Added new property to set the value of executor LRU cache size to 70% of the total executor memory in Inde
kunal642 commented on a change in pull request #3893: URL: https://github.com/apache/carbondata/pull/3893#discussion_r481745121 ## File path: core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java ## @@ -148,19 +148,39 @@ private void createLRULevelCacheInstance() { carbonLRUCache = new CarbonLRUCache(CarbonCommonConstants.CARBON_MAX_DRIVER_LRU_CACHE_SIZE, CarbonCommonConstants.CARBON_MAX_LRU_CACHE_SIZE_DEFAULT); } else { - // if executor cache size is not configured then driver cache conf will be used String executorCacheSize = CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_MAX_EXECUTOR_LRU_CACHE_SIZE); - if (null != executorCacheSize) { -carbonLRUCache = -new CarbonLRUCache(CarbonCommonConstants.CARBON_MAX_EXECUTOR_LRU_CACHE_SIZE, -CarbonCommonConstants.CARBON_MAX_LRU_CACHE_SIZE_DEFAULT); - } else { -LOGGER.info( -"Executor LRU cache size not configured. Initializing with driver LRU cache size."); -carbonLRUCache = new CarbonLRUCache(CarbonCommonConstants.CARBON_MAX_DRIVER_LRU_CACHE_SIZE, -CarbonCommonConstants.CARBON_MAX_LRU_CACHE_SIZE_DEFAULT); + if (null == executorCacheSize) { +String executorCachePercent = CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.CARBON_EXECUTOR_LRU_CACHE_PERCENT); +long mSizeMB = Runtime.getRuntime().maxMemory() / (1024 * 1024); +long executorLruCache; +if (null != executorCachePercent) { + int percentValue = Integer.parseInt(executorCachePercent); + if (percentValue >= 5 && percentValue <= 95) { +double mPercent = (double) percentValue / 100; +executorLruCache = (long) (mSizeMB * mPercent); + } + else { Review comment: move else to previous line This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
ajantha-bhat commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481755396 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: In some customer scenarios, we have observed huge segment count (more than 100K), so here file exist check for all the valid segment not even pruned segment. so I feel this will slow down the query. so, I suggest to analyze and make flow to enter `null != segment.getLoadMetadataDetails()` block. I agree that segment refactor might solve this. It might take few months to finish and we cannot slow down queries till then also. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 closed pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 closed pull request #3907: URL: https://github.com/apache/carbondata/pull/3907 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#issuecomment-685325305 @ajantha-bhat with recent changes, we are taking modified time from metadata details, so for metadata details already null check is present in getValidSegments API, so may be this changes doesnt affect, but still we need to test , and may be handle in segment refactor, but for now, i close this PR This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] QiangCai commented on a change in pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
QiangCai commented on a change in pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#discussion_r481766413 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala ## @@ -624,9 +625,7 @@ class TableNewProcessor(cm: TableModel) { if (isVarcharColumn(colName)) { columnSchema.setDataType(DataTypes.VARCHAR) } -// TODO: Need to fill RowGroupID, converted type Review comment: not required now This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[jira] [Commented] (CARBONDATA-3966) NullPointerException is thrown in case of reliability testing of load, compaction and query
[ https://issues.apache.org/jira/browse/CARBONDATA-3966?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17189011#comment-17189011 ] Akash R Nilugal commented on CARBONDATA-3966: - this issue might not happen with recent changes in community of getting the modified time from metadata details, so closing the jira now, once segment refactoring is done, we can check again > NullPointerException is thrown in case of reliability testing of load, > compaction and query > --- > > Key: CARBONDATA-3966 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3966 > Project: CarbonData > Issue Type: Bug >Reporter: Akash R Nilugal >Assignee: Akash R Nilugal >Priority: Minor > Time Spent: 3h > Remaining Estimate: 0h > > Sometimes NullPointerException is thrown in case of reliability testing of > load, compaction and query -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Closed] (CARBONDATA-3966) NullPointerException is thrown in case of reliability testing of load, compaction and query
[ https://issues.apache.org/jira/browse/CARBONDATA-3966?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Akash R Nilugal closed CARBONDATA-3966. --- Resolution: Not A Problem > NullPointerException is thrown in case of reliability testing of load, > compaction and query > --- > > Key: CARBONDATA-3966 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3966 > Project: CarbonData > Issue Type: Bug >Reporter: Akash R Nilugal >Assignee: Akash R Nilugal >Priority: Minor > Time Spent: 3h > Remaining Estimate: 0h > > Sometimes NullPointerException is thrown in case of reliability testing of > load, compaction and query -- This message was sent by Atlassian Jira (v8.3.4#803005)
[GitHub] [carbondata] akashrn5 commented on pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
akashrn5 commented on pull request #3898: URL: https://github.com/apache/carbondata/pull/3898#issuecomment-685326743 LGTM This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] asfgit closed pull request #3898: [CARBONDATA-3960] Default comment should be null when adding columns
asfgit closed pull request #3898: URL: https://github.com/apache/carbondata/pull/3898 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[jira] [Resolved] (CARBONDATA-3960) Column comment should be null by default when adding column
[ https://issues.apache.org/jira/browse/CARBONDATA-3960?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Akash R Nilugal resolved CARBONDATA-3960. - Fix Version/s: 2.1.0 Resolution: Fixed > Column comment should be null by default when adding column > --- > > Key: CARBONDATA-3960 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3960 > Project: CarbonData > Issue Type: Improvement >Reporter: David Cai >Priority: Minor > Fix For: 2.1.0 > > Time Spent: 2.5h > Remaining Estimate: 0h > > 1. create table > create table test_add_column_with_comment( > col1 string comment 'col1 comment', > col2 int, > col3 string) > stored as carbondata > 2 . alter table > alter table test_add_column_with_comment add columns( > col4 string comment "col4 comment", > col5 int, > col6 string comment "") > 3. describe table > describe test_add_column_with_comment > ++-++ > |col_name|data_type|comment | > ++-++ > |col1 |string |col1 comment| > |col2 |int |null | > |col3 |string |null | > |col4 |string |col4 comment| > |col5 |int | | > |col6 |string | | > ++-++ > the comment of col5 is "" by default -- This message was sent by Atlassian Jira (v8.3.4#803005)
[GitHub] [carbondata] kunal642 commented on a change in pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
kunal642 commented on a change in pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#discussion_r481777679 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 + * + * OrOr + * Or Or =>OrOr + * col3 col1 col2 col1 col1 col1col2 col3 + * + * **Similar expression filters are reordered globally** + * + * @param filter the filter expression to be reordered + * @return The reordered filter with the current ordinal + */ + def reorderFilter(filter: Filter, table: CarbonTable): (Filter, Int) = { +val filterMap = mutable.HashMap[String, List[(Filter, Int)]]() + def sortFilter(filter: Filter): (Filter, Int) = { +filter match { + case sources.And(left, right) => +filterMap.getOrElseUpdate("AND", List()) +if (left.references.toSeq == right.references.toSeq || +right.references.diff(left.references).length == 0) { + val sorted = sortFilter(left) Review comment: done ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 + * + * OrOr + * Or Or =>OrOr + * col3 col1 col2 col1 col1 col1col2 col3 + * + * **Similar expression filters are reordered globally** + * + * @param filter the filter expression to be re
[GitHub] [carbondata] kunal642 commented on a change in pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
kunal642 commented on a change in pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#discussion_r481778448 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,146 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 Review comment: no, filter values can be different This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on a change in pull request #3907: [CARBONDATA-3966]Fix NullPointerException issue in case of reliability testing of load and compaction
akashrn5 commented on a change in pull request #3907: URL: https://github.com/apache/carbondata/pull/3907#discussion_r481784694 ## File path: core/src/main/java/org/apache/carbondata/core/index/IndexStoreManager.java ## @@ -515,15 +516,16 @@ public TableSegmentRefresher getTableSegmentRefresher(CarbonTable table) { UpdateVO updateVO = SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()); SegmentRefreshInfo segmentRefreshInfo; -if ((updateVO != null && updateVO.getLatestUpdateTimestamp() != null) -|| segment.getSegmentFileName() != null) { - long segmentFileTimeStamp; +if (updateVO.getLatestUpdateTimestamp() != null || segment.getSegmentFileName() != null) { + long segmentFileTimeStamp = 0L; if (null != segment.getLoadMetadataDetails()) { segmentFileTimeStamp = segment.getLoadMetadataDetails().getLastModifiedTime(); } else { -segmentFileTimeStamp = FileFactory.getCarbonFile(CarbonTablePath -.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())) -.getLastModifiedTime(); +CarbonFile segmentFile = FileFactory.getCarbonFile(CarbonTablePath +.getSegmentFilePath(table.getTablePath(), segment.getSegmentFileName())); +if (segmentFile.exists()) { Review comment: i agree, but just because we need query fast, we cant compromise to make query fail also, this is my view. I have already replied and closed PR, you can check. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [carbondata] akashrn5 commented on a change in pull request #3902: [CARBONDATA-3961] reorder filter expression based on storage ordinal
akashrn5 commented on a change in pull request #3902: URL: https://github.com/apache/carbondata/pull/3902#discussion_r481787443 ## File path: integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala ## @@ -373,6 +375,164 @@ object CarbonFilters { val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession) getPartitions(partitionFilters, sparkSession, carbonTable) } + + def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = { +val column = filter.references.map(carbonTable.getColumnByName) +if (column.isEmpty) { + -1 +} else { + if (column.head.isDimension) { +column.head.getOrdinal + } else { +column.head.getOrdinal + carbonTable.getAllDimensions.size() + } +} + } + + def collectSimilarExpressions(filter: Filter, table: CarbonTable): Seq[(Filter, Int)] = { +filter match { + case sources.And(left, right) => +collectSimilarExpressions(left, table) ++ collectSimilarExpressions(right, table) + case sources.Or(left, right) => collectSimilarExpressions(left, table) ++ + collectSimilarExpressions(right, table) + case others => Seq((others, getStorageOrdinal(others, table))) +} + } + + /** + * This method will reorder the filter based on the Storage Ordinal of the column references. + * + * Example1: + * And And + * Or And =>OrAnd + * col3 col1 col2 col1col1 col3col1 col2 + * + * **Mixed expression filter reordered locally, but wont be reordered globally.** + * + * Example2: + * And And + * And And => AndAnd + * col3 col1 col2 col1col1 col1col2 col3 + * + * OrOr + * Or Or =>OrOr + * col3 col1 col2 col1 col1 col1col2 col3 + * + * **Similar expression filters are reordered globally** + * + * @param filter the filter expression to be reordered + * @return The reordered filter with the current ordinal + */ + def reorderFilter(filter: Filter, table: CarbonTable): (Filter, Int) = { +val filterMap = mutable.HashMap[String, List[(Filter, Int)]]() +// If the filter size is one then no need to reorder. +val sortedFilter = if (filter.references.toSet.size == 1) { + (filter, -1) +} else { + sortFilter(filter, filterMap, table) +} +// If filter has only AND/PR expression then sort the nodes globally using the filterMap. Review comment: ```suggestion // If filter has only AND/OR expression then sort the nodes globally using the filterMap. ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org