Amar1404 opened a new issue, #10488:
URL: https://github.com/apache/hudi/issues/10488

   **_Tips before filing an issue_**
   
   - Have you gone through our [FAQs](https://hudi.apache.org/learn/faq/)?
   
   - Join the mailing list to engage in conversations and get faster support at 
[email protected].
   
   - If you have triaged this as a bug, then file an 
[issue](https://issues.apache.org/jira/projects/HUDI/issues) directly.
   
   I am not able to do simple count when hoodie.schema.on.read.enable=true
   
   A clear and concise description of the problem.
   
   **To Reproduce**
   
   Steps to reproduce the behavior:
   
   
   
   val hudiOptions= Map(
       "hoodie.parquet.compression.codec"->"zstd",
       "hoodie.datasource.write.hive_style_partitioning"->"true",
       "hoodie.embed.timeline.server"->"true",
         "hoodie.datasource.write.reconcile.schema"-> "false",
         "hoodie.schema.on.read.enable"-> "true",
         "hoodie.datasource.write.keygenerator.class"-> 
"org.apache.hudi.keygen.SimpleKeyGenerator",
   "hoodie.metadata.enable"->"true",
   "hoodie.index.type"->"BLOOM",
   )
   
   val columns = Seq("ts","uuid","rider","driver","fare","city")
   val data =
     
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
       
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
 ,"san_francisco"),
       
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
 ,"san_francisco"),
       
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"),
       
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"))
   
   
   var inserts = spark.createDataFrame(data).toDF(columns:_*)
   
   
   inserts.write.format("org.apache.hudi")
   .option(OPERATION_OPT_KEY, "insert").
     option(PARTITIONPATH_FIELD_OPT_KEY, "city")
     .option(PRECOMBINE_FIELD_OPT_KEY, "ts")
     .option(RECORDKEY_FIELD_OPT_KEY, "uuid")
     .option(TABLE_NAME, "test_hudi")
    .options(hudiOptions)
     .mode(Overwrite).
     save(Path)
     
     spark.read.format(hudi).options(hudiOptions).load(Path).count
   
   **Expected behavior**
   
   The output should be shown by not working
   
   **Environment Description**
   
   * Hudi version : 0.12.3,0.14.0
   
   * Spark version : 3.3
   
   * Hive version :
   
   * Hadoop version :
   
   * Storage (HDFS/S3/GCS..) : s3
   
   * Running on Docker? (yes/no) :no
   
   
   **Additional context**
   
   Add any other context about the problem here.
   
   **Stacktrace**
   
   The Spark SQL phase planning failed with an internal error. Please, fill a 
bug report in, and provide the full stack trace.
     at 
org.apache.spark.sql.execution.QueryExecution$.toInternalError(QueryExecution.scala:542)
     at 
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:554)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:213)
     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
     at 
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:212)
     at 
org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:153)
     at 
org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:146)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:166)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:213)
     at 
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:552)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:213)
     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
     at 
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:212)
     at 
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:163)
     at 
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:159)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$5(QueryExecution.scala:298)
     at 
org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:657)
     at 
org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:298)
     at 
org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:313)
     at 
org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:267)
     at 
org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:246)
     at 
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107)
     at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$7(SQLExecution.scala:139)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
     at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
     at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:139)
     at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:245)
     at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:138)
     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
     at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
     at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3920)
     at org.apache.spark.sql.Dataset.head(Dataset.scala:2904)
     at org.apache.spark.sql.Dataset.take(Dataset.scala:3125)
     at org.apache.spark.sql.Dataset.getRows(Dataset.scala:290)
     at org.apache.spark.sql.Dataset.showString(Dataset.scala:329)
     at org.apache.spark.sql.Dataset.show(Dataset.scala:849)
     at org.apache.spark.sql.Dataset.show(Dataset.scala:808)
     at org.apache.spark.sql.Dataset.show(Dataset.scala:817)
     ... 108 elided
   Caused by: java.lang.NullPointerException
     at 
org.apache.hudi.internal.schema.utils.InternalSchemaUtils.pruneInternalSchemaByID(InternalSchemaUtils.java:97)
     at 
org.apache.hudi.internal.schema.utils.InternalSchemaUtils.pruneInternalSchema(InternalSchemaUtils.java:72)
     at 
org.apache.hudi.HoodieBaseRelation$.projectSchema(HoodieBaseRelation.scala:702)
     at 
org.apache.hudi.HoodieBaseRelation.buildScan(HoodieBaseRelation.scala:337)
     at 
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.$anonfun$apply$4(DataSourceStrategy.scala:364)
     at 
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.$anonfun$pruneFilterProject$1(DataSourceStrategy.scala:398)
     at 
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProjectRaw(DataSourceStrategy.scala:454)
     at 
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProject(DataSourceStrategy.scala:397)
     at 
org.apache.spark.sql.execution.datasources.DataSourceStrategy$.apply(DataSourceStrategy.scala:364)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
     at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
     at 
org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
     at 
scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196)
     at 
scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194)
     at scala.collection.Iterator.foreach(Iterator.scala:943)
     at scala.collection.Iterator.foreach$(Iterator.scala:943)
     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
     at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199)
     at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192)
     at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
     at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
     at 
org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
     at 
scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196)
     at 
scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194)
     at scala.collection.Iterator.foreach(Iterator.scala:943)
     at scala.collection.Iterator.foreach$(Iterator.scala:943)
     at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
     at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199)
     at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192)
     at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
     at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
     at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
     at 
org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72)
     at 
org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:495)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:153)
     at 
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192)
     at 
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:213)
     at 
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:552)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to