prodeezy opened a new issue #99: Iceberg fails to return results when filtered 
on complex columns
URL: https://github.com/apache/incubator-iceberg/issues/99
 
 
   # Sample JSON Data
   scala> json.show
   +----+-------+--------------------+
   | age|   name|             friends|
   +----+-------+--------------------+
   |null|Michael|                null|
   |  30|   Andy|[Josh -> 10, Bria...|
   |  19| Justin|[Bharat -> 15, Gau...|
   +----+-------+--------------------+
   
   # Json Table Schema
   scala> json.printSchema
   root
    |-- age: integer (nullable = true)
    |-- name: string (nullable = true)
    |-- friends: map (nullable = true)
    |    |-- key: string
    |    |-- value: integer (valueContainsNull = true)
   
   # Predicate works on vanilla spark
   scala> spark.sql("select * from people_json_complex where friends['Josh'] = 
10").show()
   sqlDF: org.apache.spark.sql.DataFrame = [age: int, name: string ... 1 more 
field]
   
   +---+----+--------------------+
   |age|name|             friends|
   +---+----+--------------------+
   | 30|Andy|[Josh -> 10, Bria...|
   +---+----+--------------------+
   
   
   
   
   # Sample data in Iceberg format
   scala> val iceDf = 
spark.read.format("iceberg").load("iceberg-people-complex")
   scala> iceDf.show()
   
   +----+-------+--------------------+
   | age|   name|             friends|
   +----+-------+--------------------+
   |null|Michael|                null|
   |  30|   Andy|[Josh -> 10, Bria...|
   |  19| Justin|[Bharat -> 15, Xa...|
   +----+-------+--------------------+
   
   # Schema on iceberg table 
   scala> iceDf.printSchema
   root
    |-- age: integer (nullable = true)
    |-- name: string (nullable = true)
    |-- friends: map (nullable = true)
    |    |-- key: string
    |    |-- value: integer (valueContainsNull = true)
   
   # Create table over iceberg data
   scala> iceDf.createOrReplaceTempView("people_iceberg_complex")
   
   # Filter on Complex (map) column fails to return result
   scala> spark.sql("select * from people_iceberg_complex where friends['Josh'] 
= 10").show()
   sqlDF: org.apache.spark.sql.DataFrame = [age: int, name: string ... 1 more 
field]
   
   +---+----+-------+
   |age|name|friends|
   +---+----+-------+
   +---+----+-------+
   
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to