[
https://issues.apache.org/jira/browse/HUDI-5527?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Luning Wang updated HUDI-5527:
------------------------------
Description:
When I execute the following bootstrap command, it throws an error about
SimpleKeyGenerator. I have set the keygen class to
`NonpartitionedKeyGenerator`, but it is invalid.
{code:java}
bin/spark-submit --master yarn \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer
/opt/hudi-utilities-bundle_2.12-0.12.2.jar \
--run-bootstrap \
--target-base-path /tpcds_hudi_3.db/call_center \
--target-table call_center \
--table-type COPY_ON_WRITE \
--hoodie-conf
hoodie.bootstrap.base.path=h/tpcds_bin_partitioned_parquet_3.db/call_center \
--hoodie-conf
hoodie.bootstrap.keygen.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
\
--hoodie-conf hoodie.datasource.write.recordkey.field=cc_call_center_sk \
--hoodie-conf
hoodie.bootstrap.full.input.provider=org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
\
--hoodie-conf
hoodie.bootstrap.mode.selector=org.apache.hudi.client.bootstrap.selector.BootstrapRegexModeSelector
\
--hoodie-conf hoodie.bootstrap.mode.selector.regex.mode=FULL_RECORD \
--hoodie-conf hoodie.datasource.write.hive_style_partitioning=true {code}
Error message:
{code:java}
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
class org.apache.hudi.keygen.SimpleKeyGenerator
Caused by: java.lang.IllegalArgumentException: Property
hoodie.datasource.write.partitionpath.field not found {code}
Detail error message:
{code:java}
Exception in thread "main" java.io.IOException: Could not load key generator
class org.apache.hudi.keygen.SimpleKeyGenerator
at
org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory.createKeyGenerator(HoodieSparkKeyGeneratorFactory.java:74)
at
org.apache.hudi.util.SparkKeyGenUtils$.getPartitionColumns(SparkKeyGenUtils.scala:36)
at
org.apache.hudi.util.SparkKeyGenUtils.getPartitionColumns(SparkKeyGenUtils.scala)
at
org.apache.hudi.utilities.deltastreamer.BootstrapExecutor.initializeTable(BootstrapExecutor.java:233)
at
org.apache.hudi.utilities.deltastreamer.BootstrapExecutor.execute(BootstrapExecutor.java:158)
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:189)
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:573)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:958)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1046)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1055)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
class org.apache.hudi.keygen.SimpleKeyGenerator
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:91)
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:118)
at
org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory.createKeyGenerator(HoodieSparkKeyGeneratorFactory.java:72)
... 18 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:89)
... 20 more
Caused by: java.lang.IllegalArgumentException: Property
hoodie.datasource.write.partitionpath.field not found
at
org.apache.hudi.common.config.TypedProperties.checkKey(TypedProperties.java:67)
at
org.apache.hudi.common.config.TypedProperties.getString(TypedProperties.java:72)
at
org.apache.hudi.keygen.SimpleKeyGenerator.<init>(SimpleKeyGenerator.java:41)
... 25 more {code}
was:
When I execute the following bootstrap command, it throws an error about
SimpleKeyGenerator. I have set the keygen class to
`NonpartitionedKeyGenerator`, but it is invalid.
{code:java}
bin/spark-submit --master yarn \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer
/opt/hudi-utilities-bundle_2.12-0.12.2.jar \
--run-bootstrap \
--target-base-path /tpcds_hudi_3.db/call_center \
--target-table call_center \
--table-type COPY_ON_WRITE \
--hoodie-conf
hoodie.bootstrap.base.path=h/tpcds_bin_partitioned_parquet_3.db/call_center \
--hoodie-conf
hoodie.bootstrap.keygen.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
\
--hoodie-conf hoodie.datasource.write.recordkey.field=cc_call_center_sk \
--hoodie-conf
hoodie.bootstrap.full.input.provider=org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
\
--hoodie-conf
hoodie.bootstrap.mode.selector=org.apache.hudi.client.bootstrap.selector.BootstrapRegexModeSelector
\
--hoodie-conf hoodie.bootstrap.mode.selector.regex.mode=FULL_RECORD \
--hoodie-conf hoodie.datasource.write.hive_style_partitioning=true {code}
Error message:
{code:java}
Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
class org.apache.hudi.keygen.SimpleKeyGenerator
Caused by: java.lang.IllegalArgumentException: Property
hoodie.datasource.write.partitionpath.field not found {code}
> Can't set keygen class in bootstrap
> -----------------------------------
>
> Key: HUDI-5527
> URL: https://issues.apache.org/jira/browse/HUDI-5527
> Project: Apache Hudi
> Issue Type: Bug
> Components: bootstrap, hudi-utilities
> Affects Versions: 0.12.2
> Environment: Spark 3.3.1
> Reporter: Luning Wang
> Priority: Major
>
> When I execute the following bootstrap command, it throws an error about
> SimpleKeyGenerator. I have set the keygen class to
> `NonpartitionedKeyGenerator`, but it is invalid.
> {code:java}
> bin/spark-submit --master yarn \
> --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
> --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer
> /opt/hudi-utilities-bundle_2.12-0.12.2.jar \
> --run-bootstrap \
> --target-base-path /tpcds_hudi_3.db/call_center \
> --target-table call_center \
> --table-type COPY_ON_WRITE \
> --hoodie-conf
> hoodie.bootstrap.base.path=h/tpcds_bin_partitioned_parquet_3.db/call_center \
> --hoodie-conf
> hoodie.bootstrap.keygen.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
> \
> --hoodie-conf hoodie.datasource.write.recordkey.field=cc_call_center_sk \
> --hoodie-conf
> hoodie.bootstrap.full.input.provider=org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
> \
> --hoodie-conf
> hoodie.bootstrap.mode.selector=org.apache.hudi.client.bootstrap.selector.BootstrapRegexModeSelector
> \
> --hoodie-conf hoodie.bootstrap.mode.selector.regex.mode=FULL_RECORD \
> --hoodie-conf hoodie.datasource.write.hive_style_partitioning=true {code}
> Error message:
> {code:java}
> Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
> class org.apache.hudi.keygen.SimpleKeyGenerator
> Caused by: java.lang.IllegalArgumentException: Property
> hoodie.datasource.write.partitionpath.field not found {code}
> Detail error message:
> {code:java}
> Exception in thread "main" java.io.IOException: Could not load key generator
> class org.apache.hudi.keygen.SimpleKeyGenerator
> at
> org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory.createKeyGenerator(HoodieSparkKeyGeneratorFactory.java:74)
> at
> org.apache.hudi.util.SparkKeyGenUtils$.getPartitionColumns(SparkKeyGenUtils.scala:36)
> at
> org.apache.hudi.util.SparkKeyGenUtils.getPartitionColumns(SparkKeyGenUtils.scala)
> at
> org.apache.hudi.utilities.deltastreamer.BootstrapExecutor.initializeTable(BootstrapExecutor.java:233)
> at
> org.apache.hudi.utilities.deltastreamer.BootstrapExecutor.execute(BootstrapExecutor.java:158)
> at
> org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:189)
> at
> org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:573)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> at
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:958)
> at
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
> at
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1046)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1055)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: org.apache.hudi.exception.HoodieException: Unable to instantiate
> class org.apache.hudi.keygen.SimpleKeyGenerator
> at
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:91)
> at
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:118)
> at
> org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory.createKeyGenerator(HoodieSparkKeyGeneratorFactory.java:72)
> ... 18 more
> Caused by: java.lang.reflect.InvocationTargetException
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
> Method)
> at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
> at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
> at
> org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:89)
> ... 20 more
> Caused by: java.lang.IllegalArgumentException: Property
> hoodie.datasource.write.partitionpath.field not found
> at
> org.apache.hudi.common.config.TypedProperties.checkKey(TypedProperties.java:67)
> at
> org.apache.hudi.common.config.TypedProperties.getString(TypedProperties.java:72)
> at
> org.apache.hudi.keygen.SimpleKeyGenerator.<init>(SimpleKeyGenerator.java:41)
> ... 25 more {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)