[ https://issues.apache.org/jira/browse/CARBONDATA-3356?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
xubo245 updated CARBONDATA-3356: -------------------------------- Description: There are some exception when carbonData DataSource read SDK files with varchar ## write data: {code:java} public void testReadSchemaFromDataFileArrayString() { String path = "./testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[11]; fields[0] = new Field("stringField", DataTypes.STRING); fields[1] = new Field("shortField", DataTypes.SHORT); fields[2] = new Field("intField", DataTypes.INT); fields[3] = new Field("longField", DataTypes.LONG); fields[4] = new Field("doubleField", DataTypes.DOUBLE); fields[5] = new Field("boolField", DataTypes.BOOLEAN); fields[6] = new Field("dateField", DataTypes.DATE); fields[7] = new Field("timeField", DataTypes.TIMESTAMP); fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2)); fields[9] = new Field("varcharField", DataTypes.VARCHAR); fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING)); Map<String, String> map = new HashMap<>(); map.put("complex_delimiter_level_1", "#"); CarbonWriter writer = CarbonWriter.builder() .outputPath(path) .withLoadOptions(map) .withCsvInput(new Schema(fields)) .writtenBy("CarbonReaderTest") .build(); for (int i = 0; i < 10; i++) { String[] row2 = new String[]{ "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon" }; writer.write(row2); } writer.close(); File[] dataFiles = new File(path).listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name == null) { return false; } return name.endsWith("carbondata"); } }); if (dataFiles == null || dataFiles.length < 1) { throw new RuntimeException("Carbon data file not exists."); } Schema schema = CarbonSchemaReader .readSchema(dataFiles[0].getAbsolutePath()) .asOriginOrder(); // Transform the schema String[] strings = new String[schema.getFields().length]; for (int i = 0; i < schema.getFields().length; i++) { strings[i] = (schema.getFields())[i].getFieldName(); } // Read data CarbonReader reader = CarbonReader .builder(path, "_temp") .projection(strings) .build(); int i = 0; while (reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); assert (row[0].equals("robot" + i)); assert (row[2].equals(i)); assert (row[6].equals(17957)); Object[] arr = (Object[]) row[10]; assert (arr[0].equals("Hello")); assert (arr[3].equals("Carbon")); i++; } reader.close(); // FileUtils.deleteDirectory(new File(path)); } catch (Throwable e) { e.printStackTrace(); Assert.fail(e.getMessage()); } } {code} ## read data {code:java} test("Test read image carbon with spark carbon file format, generate by sdk, CTAS") { sql("DROP TABLE IF EXISTS binaryCarbon") sql("DROP TABLE IF EXISTS binaryCarbon3") if (SparkUtil.isSparkVersionEqualTo("2.1")) { sql(s"CREATE TABLE binaryCarbon USING CARBON OPTIONS(PATH '$writerPath')") sql(s"CREATE TABLE binaryCarbon3 USING CARBON OPTIONS(PATH '$outputPath')" + " AS SELECT * FROM binaryCarbon") } else { // sql(s"CREATE TABLE binaryCarbon USING CARBON LOCATION '$writerPath'") sql(s"CREATE TABLE binaryCarbon USING CARBON LOCATION '/Users/xubo/Desktop/xubo/git/carbondata3/store/sdk/testWriteFiles'") sql("SELECT COUNT(*) FROM binaryCarbon").show() } } {code} ## exception: {code:java} java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2019-04-20 10:42:08 ERROR TaskSetManager:70 - Task 0 in stage 0.0 failed 1 times; aborting job Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1872) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1821) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1810) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:363) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3278) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258) at org.apache.spark.sql.Dataset.head(Dataset.scala:2489) at org.apache.spark.sql.Dataset.take(Dataset.scala:2703) at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) at org.apache.spark.sql.Dataset.show(Dataset.scala:723) at org.apache.spark.sql.Dataset.show(Dataset.scala:682) at org.apache.spark.sql.Dataset.show(Dataset.scala:691) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply$mcV$sp(SparkCarbonDataSourceBinaryTest.scala:93) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply(SparkCarbonDataSourceBinaryTest.scala:84) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply(SparkCarbonDataSourceBinaryTest.scala:84) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) at org.scalatest.Suite$class.withFixture(Suite.scala:1122) at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) at scala.collection.immutable.List.foreach(List.scala:381) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) at org.scalatest.Suite$class.run(Suite.scala:1424) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) at org.scalatest.SuperEngine.runImpl(Engine.scala:545) at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest.org$scalatest$BeforeAndAfterAll$$super$run(SparkCarbonDataSourceBinaryTest.scala:32) at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest.run(SparkCarbonDataSourceBinaryTest.scala:32) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557) at scala.collection.immutable.List.foreach(List.scala:381) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043) at org.scalatest.tools.Runner$.run(Runner.scala:883) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) Caused by: java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} ## Analysis: In carbon DataSource, will change carbon Varchar to spark String, inorg.apache.spark.sql.util.SparkTypeConverter#convertCarbonToSparkDataType parquet doesn't support varchar in Spark {code:java} sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2))") sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2)) using parquet") sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2)) stored as parquet") {code} ### exception: {code:java} DataType varchar is not supported.(line 1, pos 68) {code} was: There are some exception when carbonData DataSource read SDK files with varchar ## write data: {code:java} public void testReadSchemaFromDataFileArrayString() { String path = "./testWriteFiles"; try { FileUtils.deleteDirectory(new File(path)); Field[] fields = new Field[11]; fields[0] = new Field("stringField", DataTypes.STRING); fields[1] = new Field("shortField", DataTypes.SHORT); fields[2] = new Field("intField", DataTypes.INT); fields[3] = new Field("longField", DataTypes.LONG); fields[4] = new Field("doubleField", DataTypes.DOUBLE); fields[5] = new Field("boolField", DataTypes.BOOLEAN); fields[6] = new Field("dateField", DataTypes.DATE); fields[7] = new Field("timeField", DataTypes.TIMESTAMP); fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2)); fields[9] = new Field("varcharField", DataTypes.VARCHAR); fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING)); Map<String, String> map = new HashMap<>(); map.put("complex_delimiter_level_1", "#"); CarbonWriter writer = CarbonWriter.builder() .outputPath(path) .withLoadOptions(map) .withCsvInput(new Schema(fields)) .writtenBy("CarbonReaderTest") .build(); for (int i = 0; i < 10; i++) { String[] row2 = new String[]{ "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon" }; writer.write(row2); } writer.close(); File[] dataFiles = new File(path).listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name == null) { return false; } return name.endsWith("carbondata"); } }); if (dataFiles == null || dataFiles.length < 1) { throw new RuntimeException("Carbon data file not exists."); } Schema schema = CarbonSchemaReader .readSchema(dataFiles[0].getAbsolutePath()) .asOriginOrder(); // Transform the schema String[] strings = new String[schema.getFields().length]; for (int i = 0; i < schema.getFields().length; i++) { strings[i] = (schema.getFields())[i].getFieldName(); } // Read data CarbonReader reader = CarbonReader .builder(path, "_temp") .projection(strings) .build(); int i = 0; while (reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); assert (row[0].equals("robot" + i)); assert (row[2].equals(i)); assert (row[6].equals(17957)); Object[] arr = (Object[]) row[10]; assert (arr[0].equals("Hello")); assert (arr[3].equals("Carbon")); i++; } reader.close(); // FileUtils.deleteDirectory(new File(path)); } catch (Throwable e) { e.printStackTrace(); Assert.fail(e.getMessage()); } } {code} ## read data {code:java} test("Test read image carbon with spark carbon file format, generate by sdk, CTAS") { sql("DROP TABLE IF EXISTS binaryCarbon") sql("DROP TABLE IF EXISTS binaryCarbon3") if (SparkUtil.isSparkVersionEqualTo("2.1")) { sql(s"CREATE TABLE binaryCarbon USING CARBON OPTIONS(PATH '$writerPath')") sql(s"CREATE TABLE binaryCarbon3 USING CARBON OPTIONS(PATH '$outputPath')" + " AS SELECT * FROM binaryCarbon") } else { // sql(s"CREATE TABLE binaryCarbon USING CARBON LOCATION '$writerPath'") sql(s"CREATE TABLE binaryCarbon USING CARBON LOCATION '/Users/xubo/Desktop/xubo/git/carbondata3/store/sdk/testWriteFiles'") sql("SELECT COUNT(*) FROM binaryCarbon").show() } } {code} ## exception: {code:java} java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2019-04-20 10:42:08 ERROR TaskSetManager:70 - Task 0 in stage 0.0 failed 1 times; aborting job Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1872) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1821) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1810) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:363) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3278) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258) at org.apache.spark.sql.Dataset.head(Dataset.scala:2489) at org.apache.spark.sql.Dataset.take(Dataset.scala:2703) at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) at org.apache.spark.sql.Dataset.show(Dataset.scala:723) at org.apache.spark.sql.Dataset.show(Dataset.scala:682) at org.apache.spark.sql.Dataset.show(Dataset.scala:691) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply$mcV$sp(SparkCarbonDataSourceBinaryTest.scala:93) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply(SparkCarbonDataSourceBinaryTest.scala:84) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply(SparkCarbonDataSourceBinaryTest.scala:84) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) at org.scalatest.Suite$class.withFixture(Suite.scala:1122) at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) at scala.collection.immutable.List.foreach(List.scala:381) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) at org.scalatest.Suite$class.run(Suite.scala:1424) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) at org.scalatest.SuperEngine.runImpl(Engine.scala:545) at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest.org$scalatest$BeforeAndAfterAll$$super$run(SparkCarbonDataSourceBinaryTest.scala:32) at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) at org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest.run(SparkCarbonDataSourceBinaryTest.scala:32) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557) at scala.collection.immutable.List.foreach(List.scala:381) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043) at org.scalatest.tools.Runner$.run(Runner.scala:883) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) Caused by: java.io.IOException: All common columns present in the files doesn't have same datatype. Unsupported operation on nonTransactional table. Check logs. at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) at org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) at org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) at org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:109) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} ## Analysis: parquet doesn't support varchar in Spark {code:java} sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2))") sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2)) using parquet") sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2)) stored as parquet") {code} ### exception: {code:java} // Some comments here public String getFoo() { return foo; } {code} > There are some exception when carbonData DataSource read SDK files with > varchar > --------------------------------------------------------------------------------- > > Key: CARBONDATA-3356 > URL: https://issues.apache.org/jira/browse/CARBONDATA-3356 > Project: CarbonData > Issue Type: Bug > Reporter: xubo245 > Priority: Major > > There are some exception when carbonData DataSource read SDK files with > varchar > ## write data: > {code:java} > public void testReadSchemaFromDataFileArrayString() { > String path = "./testWriteFiles"; > try { > FileUtils.deleteDirectory(new File(path)); > Field[] fields = new Field[11]; > fields[0] = new Field("stringField", DataTypes.STRING); > fields[1] = new Field("shortField", DataTypes.SHORT); > fields[2] = new Field("intField", DataTypes.INT); > fields[3] = new Field("longField", DataTypes.LONG); > fields[4] = new Field("doubleField", DataTypes.DOUBLE); > fields[5] = new Field("boolField", DataTypes.BOOLEAN); > fields[6] = new Field("dateField", DataTypes.DATE); > fields[7] = new Field("timeField", DataTypes.TIMESTAMP); > fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, > 2)); > fields[9] = new Field("varcharField", DataTypes.VARCHAR); > fields[10] = new Field("arrayField", > DataTypes.createArrayType(DataTypes.STRING)); > Map<String, String> map = new HashMap<>(); > map.put("complex_delimiter_level_1", "#"); > CarbonWriter writer = CarbonWriter.builder() > .outputPath(path) > .withLoadOptions(map) > .withCsvInput(new Schema(fields)) > .writtenBy("CarbonReaderTest") > .build(); > for (int i = 0; i < 10; i++) { > String[] row2 = new String[]{ > "robot" + (i % 10), > String.valueOf(i % 10000), > String.valueOf(i), > String.valueOf(Long.MAX_VALUE - i), > String.valueOf((double) i / 2), > String.valueOf(true), > "2019-03-02", > "2019-02-12 03:03:34", > "12.345", > "varchar", > "Hello#World#From#Carbon" > }; > writer.write(row2); > } > writer.close(); > File[] dataFiles = new File(path).listFiles(new FilenameFilter() { > @Override > public boolean accept(File dir, String name) { > if (name == null) { > return false; > } > return name.endsWith("carbondata"); > } > }); > if (dataFiles == null || dataFiles.length < 1) { > throw new RuntimeException("Carbon data file not exists."); > } > Schema schema = CarbonSchemaReader > .readSchema(dataFiles[0].getAbsolutePath()) > .asOriginOrder(); > // Transform the schema > String[] strings = new String[schema.getFields().length]; > for (int i = 0; i < schema.getFields().length; i++) { > strings[i] = (schema.getFields())[i].getFieldName(); > } > // Read data > CarbonReader reader = CarbonReader > .builder(path, "_temp") > .projection(strings) > .build(); > int i = 0; > while (reader.hasNext()) { > Object[] row = (Object[]) reader.readNextRow(); > assert (row[0].equals("robot" + i)); > assert (row[2].equals(i)); > assert (row[6].equals(17957)); > Object[] arr = (Object[]) row[10]; > assert (arr[0].equals("Hello")); > assert (arr[3].equals("Carbon")); > i++; > } > reader.close(); > // FileUtils.deleteDirectory(new File(path)); > } catch (Throwable e) { > e.printStackTrace(); > Assert.fail(e.getMessage()); > } > } > {code} > ## read data > {code:java} > test("Test read image carbon with spark carbon file format, generate by > sdk, CTAS") { > sql("DROP TABLE IF EXISTS binaryCarbon") > sql("DROP TABLE IF EXISTS binaryCarbon3") > if (SparkUtil.isSparkVersionEqualTo("2.1")) { > sql(s"CREATE TABLE binaryCarbon USING CARBON OPTIONS(PATH > '$writerPath')") > sql(s"CREATE TABLE binaryCarbon3 USING CARBON OPTIONS(PATH > '$outputPath')" + " AS SELECT * FROM binaryCarbon") > } else { > // sql(s"CREATE TABLE binaryCarbon USING CARBON LOCATION > '$writerPath'") > sql(s"CREATE TABLE binaryCarbon USING CARBON LOCATION > '/Users/xubo/Desktop/xubo/git/carbondata3/store/sdk/testWriteFiles'") > sql("SELECT COUNT(*) FROM binaryCarbon").show() > } > } > {code} > ## exception: > {code:java} > java.io.IOException: All common columns present in the files doesn't have > same datatype. Unsupported operation on nonTransactional table. Check logs. > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) > at > org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) > at > org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:109) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > 2019-04-20 10:42:08 ERROR TaskSetManager:70 - Task 0 in stage 0.0 failed 1 > times; aborting job > Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most > recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor > driver): java.io.IOException: All common columns present in the files doesn't > have same datatype. Unsupported operation on nonTransactional table. Check > logs. > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) > at > org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) > at > org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:109) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Driver stacktrace: > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in > stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 > (TID 0, localhost, executor driver): java.io.IOException: All common columns > present in the files doesn't have same datatype. Unsupported operation on > nonTransactional table. Check logs. > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) > at > org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) > at > org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:109) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1651) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1639) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1638) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1638) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1872) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1821) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1810) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2034) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2055) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2074) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:363) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3278) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489) > at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2489) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2703) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) > at org.apache.spark.sql.Dataset.show(Dataset.scala:723) > at org.apache.spark.sql.Dataset.show(Dataset.scala:682) > at org.apache.spark.sql.Dataset.show(Dataset.scala:691) > at > org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply$mcV$sp(SparkCarbonDataSourceBinaryTest.scala:93) > at > org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply(SparkCarbonDataSourceBinaryTest.scala:84) > at > org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest$$anonfun$2.apply(SparkCarbonDataSourceBinaryTest.scala:84) > at > org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) > at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166) > at org.scalatest.Suite$class.withFixture(Suite.scala:1122) > at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at > org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) > at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413) > at > org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) > at > org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483) > at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1555) > at org.scalatest.Suite$class.run(Suite.scala:1424) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at > org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212) > at org.scalatest.SuperEngine.runImpl(Engine.scala:545) > at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212) > at > org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest.org$scalatest$BeforeAndAfterAll$$super$run(SparkCarbonDataSourceBinaryTest.scala:32) > at > org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257) > at > org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256) > at > org.apache.spark.sql.carbondata.datasource.SparkCarbonDataSourceBinaryTest.run(SparkCarbonDataSourceBinaryTest.scala:32) > at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55) > at > org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563) > at > org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557) > at scala.collection.immutable.List.foreach(List.scala:381) > at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557) > at > org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044) > at > org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043) > at > org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722) > at > org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043) > at org.scalatest.tools.Runner$.run(Runner.scala:883) > at org.scalatest.tools.Runner.run(Runner.scala) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) > Caused by: java.io.IOException: All common columns present in the files > doesn't have same datatype. Unsupported operation on nonTransactional table. > Check logs. > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.updateColumns(AbstractQueryExecutor.java:290) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getDataBlocks(AbstractQueryExecutor.java:234) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.initQuery(AbstractQueryExecutor.java:138) > at > org.apache.carbondata.core.scan.executor.impl.AbstractQueryExecutor.getBlockExecutionInfos(AbstractQueryExecutor.java:407) > at > org.apache.carbondata.core.scan.executor.impl.VectorDetailQueryExecutor.execute(VectorDetailQueryExecutor.java:44) > at > org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.initialize(VectorizedCarbonRecordReader.java:146) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:422) > at > org.apache.spark.sql.carbondata.execution.datasources.SparkCarbonFileFormat$$anonfun$buildReaderWithPartitionValues$2.apply(SparkCarbonFileFormat.scala:383) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) > at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:109) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} > ## Analysis: > In carbon DataSource, will change carbon Varchar to spark String, > inorg.apache.spark.sql.util.SparkTypeConverter#convertCarbonToSparkDataType > parquet doesn't support varchar in Spark > {code:java} > sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField > VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2))") > sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField > VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2)) using > parquet") > sql("CREATE TABLE hivetable(intField INT,stringField STRING,varcharField > VARCHAR,timestampField TIMESTAMP," +"decimalField DECIMAL(6,2)) stored as > parquet") > {code} > ### exception: > {code:java} > DataType varchar is not supported.(line 1, pos 68) > {code} > -- This message was sent by Atlassian JIRA (v7.6.3#76005)