[
https://issues.apache.org/jira/browse/HUDI-1078?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
wangxianghu updated HUDI-1078:
------------------------------
Description:
When running the [Delete data |#deletes]demo in Quick-Start Guide, I got this
Exception:
{code:java}
java.lang.IllegalArgumentException: Field "partitionPath" does not exist.{code}
Step to reproduce:
{code:java}
// spark-shell
spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--packages
org.apache.hudi:hudi-spark-bundle_2.11:0.5.3,org.apache.spark:spark-avro_2.11:2.4.4
\
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
val tableName = "hudi_trips_cow"
val basePath = "file:///tmp/hudi_trips_cow"
val dataGen = new DataGenerator
// insert
val inserts = convertToStringList(dataGen.generateInserts(10))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD_OPT_KEY, "ts").
option(RECORDKEY_FIELD_OPT_KEY, "uuid").
option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
option(TABLE_NAME, tableName).
mode(Overwrite).
save(basePath)
// create view
val tripsSnapshotDF = spark.
read.
format("hudi").
load(basePath + "/*/*/*/*")
tripsSnapshotDF.createOrReplaceTempView("hudi_trips_snapshot")
// delete
spark.
read.
format("hudi").
load(basePath + "/*/*/*/*").
createOrReplaceTempView("hudi_trips_snapshot")
spark.sql("select uuid, partitionpath from hudi_trips_snapshot").count()
// fetch two records to be deleted
val ds = spark.sql("select uuid, partitionpath from
hudi_trips_snapshot").limit(2)
// issue deletes
val deletes = dataGen.generateDeletes(ds.collectAsList()){code}
will get this:
{code:java}
java.lang.IllegalArgumentException: Field "partitionPath" does not exist.
Available fields: uuid, partitionpath
at
org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303)
at
org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303)
at scala.collection.MapLike$class.getOrElse(MapLike.scala:128)
at scala.collection.AbstractMap.getOrElse(Map.scala:59)
at org.apache.spark.sql.types.StructType.fieldIndex(StructType.scala:302)
at
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.fieldIndex(rows.scala:187)
at org.apache.spark.sql.Row$class.getAs(Row.scala:333)
at org.apache.spark.sql.catalyst.expressions.GenericRow.getAs(rows.scala:166)
at
org.apache.hudi.QuickstartUtils$DataGenerator.lambda$generateDeletes$1(QuickstartUtils.java:182)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at
java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
at
org.apache.hudi.QuickstartUtils$DataGenerator.generateDeletes(QuickstartUtils.java:183)
... 61 elided
{code}
was:
When running the [Delete data|#deletes]] demo in Quick-Start Guide, I got this
Exception:
{code:java}
java.lang.IllegalArgumentException: Field "partitionPath" does not exist.{code}
Step to reproduce:
{code:java}
// spark-shell
spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--packages
org.apache.hudi:hudi-spark-bundle_2.11:0.5.3,org.apache.spark:spark-avro_2.11:2.4.4
\
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
import org.apache.hudi.QuickstartUtils._
import scala.collection.JavaConversions._
import org.apache.spark.sql.SaveMode._
import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._
val tableName = "hudi_trips_cow"
val basePath = "file:///tmp/hudi_trips_cow"
val dataGen = new DataGenerator
// insert
val inserts = convertToStringList(dataGen.generateInserts(10))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
option(PRECOMBINE_FIELD_OPT_KEY, "ts").
option(RECORDKEY_FIELD_OPT_KEY, "uuid").
option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
option(TABLE_NAME, tableName).
mode(Overwrite).
save(basePath)
// create view
val tripsSnapshotDF = spark.
read.
format("hudi").
load(basePath + "/*/*/*/*")
tripsSnapshotDF.createOrReplaceTempView("hudi_trips_snapshot")
// delete
spark.
read.
format("hudi").
load(basePath + "/*/*/*/*").
createOrReplaceTempView("hudi_trips_snapshot")
spark.sql("select uuid, partitionpath from hudi_trips_snapshot").count()
// fetch two records to be deleted
val ds = spark.sql("select uuid, partitionpath from
hudi_trips_snapshot").limit(2)
// issue deletes
val deletes = dataGen.generateDeletes(ds.collectAsList()){code}
will get this:
{code:java}
java.lang.IllegalArgumentException: Field "partitionPath" does not exist.
Available fields: uuid, partitionpath
at
org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303)
at
org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303)
at scala.collection.MapLike$class.getOrElse(MapLike.scala:128)
at scala.collection.AbstractMap.getOrElse(Map.scala:59)
at org.apache.spark.sql.types.StructType.fieldIndex(StructType.scala:302)
at
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.fieldIndex(rows.scala:187)
at org.apache.spark.sql.Row$class.getAs(Row.scala:333)
at org.apache.spark.sql.catalyst.expressions.GenericRow.getAs(rows.scala:166)
at
org.apache.hudi.QuickstartUtils$DataGenerator.lambda$generateDeletes$1(QuickstartUtils.java:182)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at
java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
at
java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
at
org.apache.hudi.QuickstartUtils$DataGenerator.generateDeletes(QuickstartUtils.java:183)
... 61 elided
{code}
Issue Type: Bug (was: Task)
> Fix IllegalArgumentException in Delete data demo of Quick-Start Guide
> ---------------------------------------------------------------------
>
> Key: HUDI-1078
> URL: https://issues.apache.org/jira/browse/HUDI-1078
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: wangxianghu
> Assignee: Trevorzhang
> Priority: Minor
> Fix For: 0.6.0
>
>
> When running the [Delete data |#deletes]demo in Quick-Start Guide, I got this
> Exception:
> {code:java}
> java.lang.IllegalArgumentException: Field "partitionPath" does not
> exist.{code}
> Step to reproduce:
> {code:java}
> // spark-shell
> spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
> --packages
> org.apache.hudi:hudi-spark-bundle_2.11:0.5.3,org.apache.spark:spark-avro_2.11:2.4.4
> \
> --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
> import org.apache.hudi.QuickstartUtils._
> import scala.collection.JavaConversions._
> import org.apache.spark.sql.SaveMode._
> import org.apache.hudi.DataSourceReadOptions._
> import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.config.HoodieWriteConfig._
> val tableName = "hudi_trips_cow"
> val basePath = "file:///tmp/hudi_trips_cow"
> val dataGen = new DataGenerator
> // insert
> val inserts = convertToStringList(dataGen.generateInserts(10))
> val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
> df.write.format("hudi").
> options(getQuickstartWriteConfigs).
> option(PRECOMBINE_FIELD_OPT_KEY, "ts").
> option(RECORDKEY_FIELD_OPT_KEY, "uuid").
> option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
> option(TABLE_NAME, tableName).
> mode(Overwrite).
> save(basePath)
> // create view
> val tripsSnapshotDF = spark.
> read.
> format("hudi").
> load(basePath + "/*/*/*/*")
> tripsSnapshotDF.createOrReplaceTempView("hudi_trips_snapshot")
> // delete
> spark.
> read.
> format("hudi").
> load(basePath + "/*/*/*/*").
> createOrReplaceTempView("hudi_trips_snapshot")
> spark.sql("select uuid, partitionpath from hudi_trips_snapshot").count()
> // fetch two records to be deleted
> val ds = spark.sql("select uuid, partitionpath from
> hudi_trips_snapshot").limit(2)
> // issue deletes
> val deletes = dataGen.generateDeletes(ds.collectAsList()){code}
> will get this:
> {code:java}
> java.lang.IllegalArgumentException: Field "partitionPath" does not exist.
> Available fields: uuid, partitionpath
> at
> org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303)
> at
> org.apache.spark.sql.types.StructType$$anonfun$fieldIndex$1.apply(StructType.scala:303)
> at scala.collection.MapLike$class.getOrElse(MapLike.scala:128)
> at scala.collection.AbstractMap.getOrElse(Map.scala:59)
> at org.apache.spark.sql.types.StructType.fieldIndex(StructType.scala:302)
> at
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema.fieldIndex(rows.scala:187)
> at org.apache.spark.sql.Row$class.getAs(Row.scala:333)
> at
> org.apache.spark.sql.catalyst.expressions.GenericRow.getAs(rows.scala:166)
> at
> org.apache.hudi.QuickstartUtils$DataGenerator.lambda$generateDeletes$1(QuickstartUtils.java:182)
> at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
> at
> java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
> at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
> at
> java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
> at
> java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
> at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
> at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
> at
> org.apache.hudi.QuickstartUtils$DataGenerator.generateDeletes(QuickstartUtils.java:183)
> ... 61 elided
> {code}
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)