Amar1404 opened a new issue, #10488: URL: https://github.com/apache/hudi/issues/10488
**_Tips before filing an issue_** - Have you gone through our [FAQs](https://hudi.apache.org/learn/faq/)? - Join the mailing list to engage in conversations and get faster support at [email protected]. - If you have triaged this as a bug, then file an [issue](https://issues.apache.org/jira/projects/HUDI/issues) directly. I am not able to do simple count when hoodie.schema.on.read.enable=true A clear and concise description of the problem. **To Reproduce** Steps to reproduce the behavior: val hudiOptions= Map( "hoodie.parquet.compression.codec"->"zstd", "hoodie.datasource.write.hive_style_partitioning"->"true", "hoodie.embed.timeline.server"->"true", "hoodie.datasource.write.reconcile.schema"-> "false", "hoodie.schema.on.read.enable"-> "true", "hoodie.datasource.write.keygenerator.class"-> "org.apache.hudi.keygen.SimpleKeyGenerator", "hoodie.metadata.enable"->"true", "hoodie.index.type"->"BLOOM", ) val columns = Seq("ts","uuid","rider","driver","fare","city") val data = Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"), (1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70 ,"san_francisco"), (1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90 ,"san_francisco"), (1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"), (1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai")) var inserts = spark.createDataFrame(data).toDF(columns:_*) inserts.write.format("org.apache.hudi") .option(OPERATION_OPT_KEY, "insert"). option(PARTITIONPATH_FIELD_OPT_KEY, "city") .option(PRECOMBINE_FIELD_OPT_KEY, "ts") .option(RECORDKEY_FIELD_OPT_KEY, "uuid") .option(TABLE_NAME, "test_hudi") .options(hudiOptions) .mode(Overwrite). save(Path) spark.read.format(hudi).options(hudiOptions).load(Path).count **Expected behavior** The output should be shown by not working **Environment Description** * Hudi version : 0.12.3,0.14.0 * Spark version : 3.3 * Hive version : * Hadoop version : * Storage (HDFS/S3/GCS..) : s3 * Running on Docker? (yes/no) :no **Additional context** Add any other context about the problem here. **Stacktrace** The Spark SQL phase planning failed with an internal error. Please, fill a bug report in, and provide the full stack trace. at org.apache.spark.sql.execution.QueryExecution$.toInternalError(QueryExecution.scala:542) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:554) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:213) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:212) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:153) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:146) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:166) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:213) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:552) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:213) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:212) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:163) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:159) at org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$5(QueryExecution.scala:298) at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:657) at org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:298) at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:313) at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:267) at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:246) at org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$7(SQLExecution.scala:139) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107) at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:139) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:245) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:138) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3920) at org.apache.spark.sql.Dataset.head(Dataset.scala:2904) at org.apache.spark.sql.Dataset.take(Dataset.scala:3125) at org.apache.spark.sql.Dataset.getRows(Dataset.scala:290) at org.apache.spark.sql.Dataset.showString(Dataset.scala:329) at org.apache.spark.sql.Dataset.show(Dataset.scala:849) at org.apache.spark.sql.Dataset.show(Dataset.scala:808) at org.apache.spark.sql.Dataset.show(Dataset.scala:817) ... 108 elided Caused by: java.lang.NullPointerException at org.apache.hudi.internal.schema.utils.InternalSchemaUtils.pruneInternalSchemaByID(InternalSchemaUtils.java:97) at org.apache.hudi.internal.schema.utils.InternalSchemaUtils.pruneInternalSchema(InternalSchemaUtils.java:72) at org.apache.hudi.HoodieBaseRelation$.projectSchema(HoodieBaseRelation.scala:702) at org.apache.hudi.HoodieBaseRelation.buildScan(HoodieBaseRelation.scala:337) at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.$anonfun$apply$4(DataSourceStrategy.scala:364) at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.$anonfun$pruneFilterProject$1(DataSourceStrategy.scala:398) at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProjectRaw(DataSourceStrategy.scala:454) at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProject(DataSourceStrategy.scala:397) at org.apache.spark.sql.execution.datasources.DataSourceStrategy$.apply(DataSourceStrategy.scala:364) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78) at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196) at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199) at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192) at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78) at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196) at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199) at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192) at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431) at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72) at org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:495) at org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:153) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:213) at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:552) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
