[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2535 ---
[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2535#discussion_r204983701 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala --- @@ -77,6 +77,7 @@ import org.apache.carbondata.spark.dictionary.provider.SecureDictionaryServicePr import org.apache.carbondata.spark.dictionary.server.SecureDictionaryServer import org.apache.carbondata.spark.load.{CsvRDDHelper, DataLoadProcessorStepOnSpark} import org.apache.carbondata.spark.rdd.CarbonDataRDDFactory +import org.apache.carbondata.spark.rdd.CarbonDataRDDFactory.LOGGER --- End diff -- no need for this import. CarbonLoadDataCommand already has a LOGGER ---
[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2535#discussion_r204642515 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala --- @@ -885,4 +885,47 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll { checkExistence(sql("select * from table1"),true,"1.0E9") } + test("test block compaction - auto merge") { +sql("DROP TABLE IF EXISTS table1") +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_AUTO_LOAD_MERGE, "true") +sql( + "create table table1 (roll int,person Struct) stored " + + "by 'carbondata'") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +checkExistence(sql("show segments for table table1"),false, "Compacted") +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_AUTO_LOAD_MERGE, "false") --- End diff -- add this in afterAll too ---
[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2535#discussion_r204296432 --- Diff: integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala --- @@ -885,4 +885,36 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll { checkExistence(sql("select * from table1"),true,"1.0E9") } + test("test block compaction - auto merge") { +sql("DROP TABLE IF EXISTS table1") +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.ENABLE_AUTO_LOAD_MERGE, "true") +sql( + "create table table1 (roll int,person Struct) stored " + + "by 'carbondata'") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +sql( + "load data inpath '" + resourcesPath + + "/Struct.csv' into table table1 options('delimiter'=','," + + "'quotechar'='\"','fileheader'='roll,person','complex_delimiter_level_1'='$'," + + "'complex_delimiter_level_2'='&')") +checkAnswer(sql("select count(*) from table1"),Seq(Row(40))) --- End diff -- check for segments whether compaction has happened or not ---
[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2535#discussion_r204296035 --- Diff: integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala --- @@ -578,13 +578,19 @@ object CarbonDataRDDFactory { if (carbonTable.isHivePartitionTable) { carbonLoadModel.setFactTimeStamp(System.currentTimeMillis()) } -val compactedSegments = new util.ArrayList[String]() -handleSegmentMerging(sqlContext, - carbonLoadModel, - carbonTable, - compactedSegments, - operationContext) -carbonLoadModel.setMergedSegmentIds(compactedSegments) +// Block compaction for table containing complex datatype +if (carbonTable.getTableInfo.getFactTable.getListOfColumns.asScala + .exists(m => m.getDataType.isComplexType)) { + LOGGER.info("Compaction is skipped as table contains complex columns") --- End diff -- change to warn ---
[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2535#discussion_r204296013 --- Diff: integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala --- @@ -823,15 +824,21 @@ case class CarbonLoadDataCommand( } try { carbonLoadModel.setFactTimeStamp(System.currentTimeMillis()) - val compactedSegments = new util.ArrayList[String]() - // Trigger auto compaction - CarbonDataRDDFactory.handleSegmentMerging( -sparkSession.sqlContext, -carbonLoadModel, -table, -compactedSegments, -operationContext) - carbonLoadModel.setMergedSegmentIds(compactedSegments) + // Block compaction for table containing complex datatype + if (table.getTableInfo.getFactTable.getListOfColumns.asScala +.exists(m => m.getDataType.isComplexType)) { +LOGGER.info("Compaction is skipped as table contains complex columns") --- End diff -- change to warn ---
[GitHub] carbondata pull request #2535: [CARBONDATA-2606]Fix Complex array Pushdown
GitHub user Indhumathi27 opened a pull request: https://github.com/apache/carbondata/pull/2535 [CARBONDATA-2606]Fix Complex array Pushdown **What is PR for?** Check for if Complex Column contains ArrayType at n levels and add parent to projection if contains array. - [ ] Any interfaces changed? - [ ] Any backward compatibility impacted? - [ ] Document update required? - [ ] Testing done - [ ] For large changes, please consider breaking it into sub-tasks under an umbrella JIRA. You can merge this pull request into a Git repository by running: $ git pull https://github.com/Indhumathi27/carbondata arrayfix Alternatively you can review and apply these changes as the patch at: https://github.com/apache/carbondata/pull/2535.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #2535 commit e5b3d345d8054eaf1f326e70a2ab3453e9e1c153 Author: Indhumathi27 Date: 2018-07-21T10:46:21Z [CARBONDATA-2606]Fix Complex array Pushdown ---