bvaradar commented on issue #2149: URL: https://github.com/apache/hudi/issues/2149#issuecomment-712676325
@ashishmgofficial : THis turned out to be unrelated to Hudi. I tested with the debezium local setup. Debezium is writing 2 kafka records for each delete records with one of the record having value set to "null". You can inspect the kafka topic using kafka-avro-console-consumer. This "null" record is causing the spark row encoding to fail. root@schemaregistry:/# kafka-avro-console-consumer --bootstrap-server kafka:9092 --topic debezium.public.motor_crash_violation_incidents --offset 'earliest' --partition 0 -property schema.registry.url=http://localhost:8085 --property print.key=true ``` {"inc_id":3} {"before":{"debezium.public.motor_crash_violation_incidents.Value":{"inc_id":3,"year":{"int":2016},"violation_desc":{"string":"AGGRAVATED UNLIC OPER 2ND/PREV CONV"},"violation_code":{"string":"5112A1"},"case_individual_id":{"int":17475367},"flag":{"string":"U"},"last_modified_ts":1600959600000000}},"after":null,"source":{"version":"1.3.0.Final","connector":"postgresql","name":"debezium","ts_ms":1603179674623,"snapshot":{"string":"false"},"db":"debezium","schema":"public","table":"motor_crash_violation_incidents","txId":{"long":638},"lsn":{"long":34213136},"xmin":null},"op":"d","ts_ms":{"long":1603179675108},"transaction":null} {"inc_id":3} null ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
