[ https://issues.apache.org/jira/browse/CARBONDATA-3248?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16741834#comment-16741834 ]
xubo245 commented on CARBONDATA-3248: ------------------------------------- LOAD DATA is not supported for datasource tables: > Spark carbon file format can't read transactional table segment path > -------------------------------------------------------------------- > > Key: CARBONDATA-3248 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3248 > Project: CarbonData > Issue Type: Improvement > Reporter: xubo245 > Priority: Major > > Code: > {code:java} > val tableNameForAllTypeOriginal = "alluxio_table_all_type_original" > val tableNameForAllType = "alluxio_table_all_type" > try { > sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal) > sql( > s"""create table $tableNameForAllTypeOriginal( > | smallIntField SMALLINT, > | intField INT, > | bigIntField BIGINT, > | floatField FLOAT, > | doubleField DOUBLE, > | decimalField DECIMAL(25, 4), > | timestampField TIMESTAMP, > | dateField DATE, > | stringField STRING, > | varcharField VARCHAR(10), > | charField CHAR(10), > | arrayField ARRAY<string>, > | structField STRUCT<col1:STRING, col2:STRING, > col3:STRING>, > | booleanField BOOLEAN) > | using carbondata > """.stripMargin) > val path = localAlluxioCluster.getMasterURI + allDataTypeRemote > try { > sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE > $tableNameForAllTypeOriginal " + > > "options('COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'=':')") > sql(s"select * from $tableNameForAllTypeOriginal").show() > assert(false) > // Don't support like that, TODO: to analysis whether can > support it > } catch { > case e: Exception => > // e.printStackTrace() > assert(true) > } finally { > sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal) > } > println("\n\n\n\n") > sql("DROP TABLE IF EXISTS " + tableNameForAllType) > sql("DROP TABLE IF EXISTS " + tableNameForAllTypeOriginal) > sql( > s"""create table $tableNameForAllTypeOriginal( > | smallIntField SMALLINT, > | intField INT, > | bigIntField BIGINT, > | floatField FLOAT, > | doubleField DOUBLE, > | decimalField DECIMAL(25, 4), > | timestampField TIMESTAMP, > | dateField DATE, > | stringField STRING, > | varcharField VARCHAR(10), > | charField CHAR(10), > | arrayField ARRAY<string>, > | structField STRUCT<col1:STRING, col2:STRING, > col3:STRING>, > | booleanField BOOLEAN) > | stored by 'carbondata' > """.stripMargin) > sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE > $tableNameForAllTypeOriginal " + > > "options('COMPLEX_DELIMITER_LEVEL_1'='$','COMPLEX_DELIMITER_LEVEL_2'=':')") > fileSystemShell.run("ls", carbonAndAlluxio + "/default") > val externalTablePath = localAlluxioCluster.getMasterURI + > carbonAndAlluxio + "/default/" + tableNameForAllTypeOriginal + > "/Fact/Part0/Segment_0" > fileSystemShell.run("ls",externalTablePath) > sql(s"CREATE TABLE $tableNameForAllType using carbon" + > s" LOCATION '$externalTablePath'") > {code} > Exception: > {code:java} > 2019-01-14 15:09:10 AUDIT audit:93 - {"time":"January 13, 2019 11:09:10 PM > PST","username":"xubo","opName":"CREATE > TABLE","opId":"15248775671301","opStatus":"SUCCESS","opTime":"140 > ms","table":"default.alluxio_table_all_type_original","extraInfo":{"bad_record_path":"","local_dictionary_enable":"true","external":"false","sort_columns":"","comment":""}} > 2019-01-14 15:09:10 AUDIT audit:72 - {"time":"January 13, 2019 11:09:10 PM > PST","username":"xubo","opName":"LOAD > DATA","opId":"15248921660444","opStatus":"START"} > 2019-01-14 15:09:10 AUDIT audit:93 - {"time":"January 13, 2019 11:09:10 PM > PST","username":"xubo","opName":"LOAD > DATA","opId":"15248921660444","opStatus":"SUCCESS","opTime":"511 > ms","table":"default.alluxio_table_all_type_original","extraInfo":{"SegmentId":"0","DataSize":"5.07KB","IndexSize":"2.48KB"}} > drwxr-xr-x xubo staff 3 PERSISTED > 01-13-2019 23:09:10:129 DIR > /CarbonAndAlluxio/default/alluxio_table_all_type_original > -rw-r--r-- xubo staff 2588 PERSISTED > 01-13-2019 23:09:10:498 100% > /CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/0_1547449750488.carbonindexmerge > -rw-r--r-- xubo staff 5187 PERSISTED > 01-13-2019 23:09:10:303 0% > /CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/part-0-0_batchno0-0-0-1547449750082.carbondata > 2019-01-14 15:09:10 ERROR AbstractQueryExecutor:280 - Schema of > alluxio://xubodembp:52203/CarbonAndAlluxio/default/alluxio_table_all_type_original/Fact/Part0/Segment_0/part-0-0_batchno0-0-0-1547449750082.carbondata > doesn't match with the table's schema > 2019-01-14 15:09:10 ERROR Executor:91 - Exception in task 0.0 in stage 5.0 > (TID 5) > java.io.IOException: All the files doesn't have same schema. Unsupported > operation on nonTransactional table. Check logs. > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:281) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:406) > at > org.apache.carbondata.core.scan.executor.impl.DetailQueryExecutor.execute(DetailQueryExecutor.java:47) > at > org.apache.carbondata.hadoop.CarbonRecordReader.initialize(CarbonRecordReader.java:112) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:427) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:381) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:174) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:105) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:234) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:228) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > 2019-01-14 15:09:10 ERROR TaskSetManager:70 - Task 0 in stage 5.0 failed 1 > times; aborting job > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)