xuranyang opened a new issue #4165:
URL: https://github.com/apache/hudi/issues/4165
EMR Hudi-0.8.0
spark-submit --class
org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer \
`ls /usr/lib/hudi/hudi-utilities-bundle_2.12-0.8.0-amzn-0.jar` \
--props
file:///opt/config/multi_delta_streamer_test/kafka-source.properties \
--config-folder
file:///opt/config/multi_delta_streamer_test/hudi-ingestion-config \
--schemaprovider-class
org.apache.hudi.utilities.schema.FilebasedSchemaProvider \
--source-class org.apache.hudi.utilities.sources.JsonKafkaSource \
--target-table dummy_table \
--op UPSERT --table-type MERGE_ON_READ \
--enable-hive-sync
Exception in thread "main" java.lang.NullPointerException
at
org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer.resetTarget(HoodieMultiTableDeltaStreamer.java:345)
at
org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer.populateTableExecutionContextList(HoodieMultiTableDeltaStreamer.java:124)
at
org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer.<init>(HoodieMultiTableDeltaStreamer.java:81)
at
org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer.main(HoodieMultiTableDeltaStreamer.java:204)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:959)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1047)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1056)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
### config_table1.properties:
hoodie.datasource.write.recordkey.field=id
hoodie.datasource.write.partitionpath.field=
hoodie.deltastreamer.source.kafka.topic=HUDI_MDS_TEST_1
hoodie.datasource.hive_sync.table=hudi_mul_ds_table1
hoodie.deltastreamer.ingestion.targetBasePath=hdfs:///tmp/multi_delta_streamer/table1
hoodie.deltastreamer.schemaprovider.source.schema.file=file:///opt/config/delta_streamer_test/source-schema-json-table1.avsc
hoodie.deltastreamer.schemaprovider.target.schema.file=file:///opt/config/delta_streamer_test/target-schema-json-table1.avsc
hoodie.datasource.hive_sync.partition_fields=
hoodie.datasource.hive_sync.assume_date_partitioning=false
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor
### config_table2.properties:
hoodie.datasource.write.recordkey.field=id
hoodie.datasource.write.partitionpath.field=
hoodie.deltastreamer.source.kafka.topic=HUDI_MDS_TEST_2
hoodie.datasource.hive_sync.table=hudi_mul_ds_table2
hoodie.deltastreamer.ingestion.targetBasePath=hdfs:///tmp/multi_delta_streamer/table2
hoodie.deltastreamer.schemaprovider.source.schema.file=file:///opt/config/delta_streamer_test/source-schema-json-table2.avsc
hoodie.deltastreamer.schemaprovider.target.schema.file=file:///opt/config/delta_streamer_test/target-schema-json-table2.avsc
hoodie.datasource.hive_sync.partition_fields=
hoodie.datasource.hive_sync.assume_date_partitioning=false
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor
### kafka-source.properties:
include=file:///opt/config/delta_streamer_test/base.properties
hoodie.deltastreamer.ingestion.tablesToBeIngested=db1.table1,db1.table2
hoodie.deltastreamer.ingestion.db1.table1.configFile=file:///opt/config/multi_delta_streamer_test/hudi-ingestion-config/config_table1.properties
hoodie.deltastreamer.ingestion.db1.table2.configFile=file:///opt/config/multi_delta_streamer_test/hudi-ingestion-config/config_table2.properties
#Kafka props
bootstrap.servers=172.28.66.93:9092
auto.offset.reset=earliest
group.id=multi-delta-streamer-test-group
enable.auto.commit=false
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]