khiem291 opened a new issue #3558: URL: https://github.com/apache/hudi/issues/3558
**Description** Hi all, I tested schema evolution change data type from int to double, but it did not work with Hudi DeltaStream. (hudi doc: https://github.com/apache/hudi/blob/asf-site/website/docs/schema_evolution.md). I also tried to test change data type from float to double, it's had the same error. **To Reproduce** Steps to reproduce the behavior: 1. init spark context ``` ./spark-shell \ --packages org.apache.spark:spark-avro_2.12:3.1.2,org.apache.hadoop:hadoop-aws:3.2.0,org.apache.hudi:hudi-spark3-bundle_2.12:0.8.0 \ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'\ --conf 'spark.hadoop.fs.s3a.access.key=xx'\ --conf 'spark.hadoop.fs.s3a.secret.key=xx'\ --conf 'spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem'\ --conf 'spark.hadoop.fs.s3a.endpoint=s3.amazonaws.com'\ --conf 'spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.profile.ProfileCredentialsProvider'\ --conf 'spark.hadoop.fs.s3a.fast.upload=true'\ --conf 'spark.hadoop.fs.s3a.multiobjectdelete.enable=false'\ --conf 'spark.sql.parquet.filterPushdown=true'\ --conf 'spark.sql.parquet.mergeSchema=false'\ --conf 'spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2'\ --conf 'spark.speculation=false'\ --conf 'hive.metastore.schema.verification=false'\ --conf 'hive.metastore.schema.verification.record.version=false'\ --conf spark.sql.hive.convertMetastoreParquet=false ``` 2. create base hudi table ``` import org.apache.hudi.QuickstartUtils._ import scala.collection.JavaConversions._ import org.apache.spark.sql.SaveMode._ import org.apache.hudi.DataSourceReadOptions._ import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.config.HoodieWriteConfig._ import org.apache.spark.sql.types._ import org.apache.spark.sql.Row val tableName = "hudi_trips_cow" val basePath = "s3a://data-lake/hudi_test/hudi_trips_cow_schema_change" val schema = StructType( Array( StructField("rowId", StringType,true), StructField("partitionId", StringType,true), StructField("preComb", LongType,true), StructField("name", StringType,true), StructField("versionId", StringType,true), StructField("intToLong", IntegerType,true),//ok StructField("intToDouble", IntegerType,true), StructField("longToFloat", LongType,true),//ok // StructField("longToDouble", IntegerType,true), StructField("floatToDouble", FloatType,true) )) // 9 cols val data1 = Seq(Row("row_1", "part_0", 0L, "bob", "v_0", 0, 1, 1L, 1.1f), Row("row_2", "part_0", 0L, "john", "v_0", 0, 1, 2L, 1.2f), Row("row_3", "part_3", 0L, "tom", "v_0", 0, 1, 3L, 1.3f)) var dfFromData1 = spark.createDataFrame(data1, schema) dfFromData1.write.format("hudi"). options(getQuickstartWriteConfigs). option(PRECOMBINE_FIELD_OPT_KEY, "preComb"). option(RECORDKEY_FIELD_OPT_KEY, "rowId"). option(PARTITIONPATH_FIELD_OPT_KEY, "partitionId"). option("hoodie.index.type","SIMPLE"). option("hoodie.datasource.write.hive_style_partitioning", true). option(TABLE_NAME, tableName). mode(Overwrite). save(basePath) ``` 3. Change column `intToDouble` data type from int to double and append new data to old table. ``` // Int to double val newSchema = StructType( Array( StructField("rowId", StringType,true), StructField("partitionId", StringType,true), StructField("preComb", LongType,true), StructField("name", StringType,true), StructField("versionId", StringType,true), StructField("intToLong", IntegerType,true), StructField("intToDouble", DoubleType,true), StructField("longToFloat", LongType,true), // StructField("longToDouble", IntegerType,true), StructField("floatToDouble", FloatType,true) )) // 9 col val data2 = Seq(Row("row_2", "part_0", 5L, "john", "v_3", 3, 1D, 2l, 1.8f), Row("row_5", "part_0", 5L, "maroon", "v_2", 2, 1D, 2l, 1.8f), Row("row_9", "part_9", 5L, "michael", "v_2", 2, 1D, 2l, 1.8f)) var dfFromData2 = spark.createDataFrame(data2, newSchema) dfFromData2.write.format("hudi"). options(getQuickstartWriteConfigs). option(PRECOMBINE_FIELD_OPT_KEY, "preComb"). option(RECORDKEY_FIELD_OPT_KEY, "rowId"). option(PARTITIONPATH_FIELD_OPT_KEY, "partitionId"). option("hoodie.datasource.write.hive_style_partitioning", true). option("hoodie.index.type","SIMPLE"). option(TABLE_NAME, tableName). mode(Append). save(basePath) ``` 4. Read hudi table failed ``` scala> val tripsSnapshotDF1 = spark.read.format("hudi").load(basePath + "/*/*") scala> tripsSnapshotDF1.show() 21/08/29 12:47:42 ERROR Executor: Exception in task 0.0 in stage 59.0 (TID 447)] org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file s3a://de-dev-sb-data-lake/hudi_test/hudi_trips_cow_schema_change_full_hudi0.9/partitionId=part_3/aa852039-ee90-4365-8da9-9ec824b0d872-0_1-19-214_20210829123247.parquet. Column: [intToDouble], Expected: double, Found: INT32 ``` **Expected behavior** can read hudi table with spark **Environment Description** * Hudi version : 0.8.0 * Spark version : 3.1.2 * Hive version : no * Hadoop version : 3.2 * Storage (HDFS/S3/GCS..) : S3 * Running on Docker? (yes/no) : no **Stacktrace** ``` scala> tripsSnapshotDF1.show() 21/08/29 12:47:42 ERROR Executor: Exception in task 0.0 in stage 59.0 (TID 447)] org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file s3a://de-dev-sb-data-lake/hudi_test/hudi_trips_cow_schema_change_full_hudi0.9/partitionId=part_3/aa852039-ee90-4365-8da9-9ec824b0d872-0_1-19-214_20210829123247.parquet. Column: [intToDouble], Expected: double, Found: INT32 at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:179) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:503) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:755) at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:345) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:339) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:571) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:294) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:283) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:181) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:37) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:173) ... 20 more 21/08/29 12:47:42 WARN TaskSetManager: Lost task 0.0 in stage 59.0 (TID 447) (192.168.1.5 executor driver): org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file s3a://de-dev-sb-data-lake/hudi_test/hudi_trips_cow_schema_change_full_hudi0.9/partitionId=part_3/aa852039-ee90-4365-8da9-9ec824b0d872-0_1-19-214_20210829123247.parquet. Column: [intToDouble], Expected: double, Found: INT32 at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:179) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:503) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:755) at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:345) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:339) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:571) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:294) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:283) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:181) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:37) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:173) ... 20 more 21/08/29 12:47:42 ERROR TaskSetManager: Task 0 in stage 59.0 failed 1 times; aborting job org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 59.0 failed 1 times, most recent failure: Lost task 0.0 in stage 59.0 (TID 447) (192.168.1.5 executor driver): org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file s3a://de-dev-sb-data-lake/hudi_test/hudi_trips_cow_schema_change_full_hudi0.9/partitionId=part_3/aa852039-ee90-4365-8da9-9ec824b0d872-0_1-19-214_20210829123247.parquet. Column: [intToDouble], Expected: double, Found: INT32 at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:179) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:503) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:755) at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:345) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:339) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:571) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:294) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:283) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:181) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:37) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:173) ... 20 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2258) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2207) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2206) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2206) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1079) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1079) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1079) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2445) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2387) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2376) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2196) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2217) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2236) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:472) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:425) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3696) at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2722) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) at org.apache.spark.sql.Dataset.head(Dataset.scala:2722) at org.apache.spark.sql.Dataset.take(Dataset.scala:2929) at org.apache.spark.sql.Dataset.getRows(Dataset.scala:301) at org.apache.spark.sql.Dataset.showString(Dataset.scala:338) at org.apache.spark.sql.Dataset.show(Dataset.scala:825) at org.apache.spark.sql.Dataset.show(Dataset.scala:784) at org.apache.spark.sql.Dataset.show(Dataset.scala:793) ... 61 elided Caused by: org.apache.spark.sql.execution.QueryExecutionException: Parquet column cannot be converted in file s3a://de-dev-sb-data-lake/hudi_test/hudi_trips_cow_schema_change_full_hudi0.9/partitionId=part_3/aa852039-ee90-4365-8da9-9ec824b0d872-0_1-19-214_20210829123247.parquet. Column: [intToDouble], Expected: double, Found: INT32 at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:179) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:503) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:755) at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:345) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:339) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:571) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:294) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:283) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:181) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:37) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:173) ... 20 more ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
