[ 
https://issues.apache.org/jira/browse/HUDI-3335?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Harsha Teja Kanna updated HUDI-3335:
------------------------------------
    Description: 
Environment

Spark 3.1.2

Hudi 0.10.1

Query

import org.apache.hudi.DataSourceReadOptions
import org.apache.hudi.common.config.HoodieMetadataConfig

val basePath = "s3a://datalake-hudi/v1"

 val df = spark.
    read.
    format("org.apache.hudi").
    option(HoodieMetadataConfig.ENABLE.key(), "true").
    option(DataSourceReadOptions.QUERY_TYPE.key(), 
DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL).
    load(s"${basePath}/sessions/")
 df.createOrReplaceTempView(table)

 

Have a COW table with metadata enabled. Loading from Spark query fails with 
java.lang.NullPointerException
  at 
org.sparkproject.guava.base.Preconditions.checkNotNull(Preconditions.java:191)
  at org.sparkproject.guava.cache.LocalCache.put(LocalCache.java:4210)
  at 
org.sparkproject.guava.cache.LocalCache$LocalManualCache.put(LocalCache.java:4804)
  at 
org.apache.spark.sql.execution.datasources.SharedInMemoryCache$$anon$3.putLeafFiles(FileStatusCache.scala:161)
  at 
org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4(HoodieFileIndex.scala:631)
  at 
org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4$adapted(HoodieFileIndex.scala:629)
  at scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:234)
  at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
  at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
  at 
org.apache.hudi.HoodieFileIndex.loadPartitionPathFiles(HoodieFileIndex.scala:629)
  at org.apache.hudi.HoodieFileIndex.refresh0(HoodieFileIndex.scala:387)
  at org.apache.hudi.HoodieFileIndex.<init>(HoodieFileIndex.scala:184)
  at org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:199)
  at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:119)
  at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69)
  at 
org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:355)
  at 
org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:325)
  at 
org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:307)
  at scala.Option.getOrElse(Option.scala:189)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:307)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:239)
  at $anonfun$res3$1(<console>:46)
  at $anonfun$res3$1$adapted(<console>:40)
  at scala.collection.Iterator.foreach(Iterator.scala:941)
  at scala.collection.Iterator.foreach$(Iterator.scala:941)
  at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
  at scala.collection.IterableLike.foreach(IterableLike.scala:74)
  at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
  at scala.collection.AbstractIterable.foreach(Iterable.scala:56)

  was:
Environment

Spark 3.1.2

Hudi 0.10.1

Have a COW table with metadata enabled. Loading from Spark query fails with 
java.lang.NullPointerException
  at 
org.sparkproject.guava.base.Preconditions.checkNotNull(Preconditions.java:191)
  at org.sparkproject.guava.cache.LocalCache.put(LocalCache.java:4210)
  at 
org.sparkproject.guava.cache.LocalCache$LocalManualCache.put(LocalCache.java:4804)
  at 
org.apache.spark.sql.execution.datasources.SharedInMemoryCache$$anon$3.putLeafFiles(FileStatusCache.scala:161)
  at 
org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4(HoodieFileIndex.scala:631)
  at 
org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4$adapted(HoodieFileIndex.scala:629)
  at scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:234)
  at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
  at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
  at 
org.apache.hudi.HoodieFileIndex.loadPartitionPathFiles(HoodieFileIndex.scala:629)
  at org.apache.hudi.HoodieFileIndex.refresh0(HoodieFileIndex.scala:387)
  at org.apache.hudi.HoodieFileIndex.<init>(HoodieFileIndex.scala:184)
  at org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:199)
  at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:119)
  at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69)
  at 
org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:355)
  at 
org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:325)
  at 
org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:307)
  at scala.Option.getOrElse(Option.scala:189)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:307)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:239)
  at $anonfun$res3$1(<console>:46)
  at $anonfun$res3$1$adapted(<console>:40)
  at scala.collection.Iterator.foreach(Iterator.scala:941)
  at scala.collection.Iterator.foreach$(Iterator.scala:941)
  at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
  at scala.collection.IterableLike.foreach(IterableLike.scala:74)
  at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
  at scala.collection.AbstractIterable.foreach(Iterable.scala:56)


> Loading Hudi table fails with NullPointerException
> --------------------------------------------------
>
>                 Key: HUDI-3335
>                 URL: https://issues.apache.org/jira/browse/HUDI-3335
>             Project: Apache Hudi
>          Issue Type: Bug
>    Affects Versions: 0.10.1
>            Reporter: Harsha Teja Kanna
>            Priority: Critical
>
> Environment
> Spark 3.1.2
> Hudi 0.10.1
> Query
> import org.apache.hudi.DataSourceReadOptions
> import org.apache.hudi.common.config.HoodieMetadataConfig
> val basePath = "s3a://datalake-hudi/v1"
>  val df = spark.
>     read.
>     format("org.apache.hudi").
>     option(HoodieMetadataConfig.ENABLE.key(), "true").
>     option(DataSourceReadOptions.QUERY_TYPE.key(), 
> DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL).
>     load(s"${basePath}/sessions/")
>  df.createOrReplaceTempView(table)
>  
> Have a COW table with metadata enabled. Loading from Spark query fails with 
> java.lang.NullPointerException
>   at 
> org.sparkproject.guava.base.Preconditions.checkNotNull(Preconditions.java:191)
>   at org.sparkproject.guava.cache.LocalCache.put(LocalCache.java:4210)
>   at 
> org.sparkproject.guava.cache.LocalCache$LocalManualCache.put(LocalCache.java:4804)
>   at 
> org.apache.spark.sql.execution.datasources.SharedInMemoryCache$$anon$3.putLeafFiles(FileStatusCache.scala:161)
>   at 
> org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4(HoodieFileIndex.scala:631)
>   at 
> org.apache.hudi.HoodieFileIndex.$anonfun$loadPartitionPathFiles$4$adapted(HoodieFileIndex.scala:629)
>   at scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:234)
>   at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
>   at scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
>   at 
> org.apache.hudi.HoodieFileIndex.loadPartitionPathFiles(HoodieFileIndex.scala:629)
>   at org.apache.hudi.HoodieFileIndex.refresh0(HoodieFileIndex.scala:387)
>   at org.apache.hudi.HoodieFileIndex.<init>(HoodieFileIndex.scala:184)
>   at 
> org.apache.hudi.DefaultSource.getBaseFileOnlyView(DefaultSource.scala:199)
>   at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:119)
>   at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:69)
>   at 
> org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:355)
>   at 
> org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:325)
>   at 
> org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:307)
>   at scala.Option.getOrElse(Option.scala:189)
>   at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:307)
>   at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:239)
>   at $anonfun$res3$1(<console>:46)
>   at $anonfun$res3$1$adapted(<console>:40)
>   at scala.collection.Iterator.foreach(Iterator.scala:941)
>   at scala.collection.Iterator.foreach$(Iterator.scala:941)
>   at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
>   at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>   at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>   at scala.collection.AbstractIterable.foreach(Iterable.scala:56)



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to