chandu-1101 commented on issue #9141:
URL: https://github.com/apache/hudi/issues/9141#issuecomment-1643139279
The fix is to change the below
```
"addressLines": [null],
```
to
```
"addressLines": [""],
```
in the source JSON.
code to reproduce the issue.
```
val df1 = spark.read.json(Seq(json1).toDS)
import org.apache.spark.sql.{Column, DataFrame}
import org.apache.commons.lang3.ClassUtils.getCanonicalName
import org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
import org.apache.hudi.{DataSourceWriteOptions, QuickstartUtils}
import org.apache.hudi.common.model.{HoodieAvroPayload,
HoodieFileFormat, WriteOperationType}
import org.apache.hudi.common.table.HoodieTableConfig
import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig}
import org.apache.hudi.keygen.constant.KeyGeneratorOptions
import java.util
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
import org.apache.hudi.keygen.{NonpartitionedKeyGenerator,
SimpleKeyGenerator}
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.functions.{col, hash, lit}
import org.apache.hudi.QuickstartUtils._
df1.write.format("hudi")
.options(getQuickstartWriteConfigs)
.option("hoodie.datasource.hive_sync.partition_extractor_class","org.apache.hudi.hive.NonPartitionedExtractor")
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "cdc_pk")
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "_id.oid")
.option(HoodieWriteConfig.TABLE_NAME, "GE11")
.mode(SaveMode.Overwrite)
.save("s3://bucket/snapshots-hudi/ge11-drop/snapshot1");
```
The record that doesnt work
```
{
"_id": {
"oid": "1"
},
"cdc_pk": "45",
"addressLogs": [{
"createdDate": "2021-09-06T17:17:41.576Z",
"fieldId": "eb4b6bd9-1fc0-4d38-b2d4-4cba87bb65a4",
"isDerived": false,
"location": "hyderabad (HC) PL",
"original": {
"location": "hyderabad (HC) PL"
},
"source": "p2",
"standardized": false,
"updatedDate": "2023-06-29T20:44:26.788Z"
}, {
"addressLines": [null],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-29T20:44:26.788Z",
"fieldId": "1beefa35-7d08-4ca7-9fe1-88e59abb4c89",
"isDerived": false,
"location": "hyderabad, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, Srilanka"
},
"residentialType": "HOME",
"source": "p2",
"standardized": false,
"updatedDate": "2023-06-29T20:44:26.788Z"
}, {
"addressLines": ["raddison,Business Park,cly B-"],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-29T20:44:26.788Z",
"fieldId": "42720793-1920-4a35-9e3e-23f91e00341e",
"isDerived": false,
"location": "hyderabad, TN, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, TN, Srilanka",
"state": "TN"
},
"residentialType": "WORK",
"source": "p2",
"standardized": false,
"state": "TN",
"updatedDate": "2023-06-29T20:44:26.788Z",
"zipCode": "02-583"
}, {
"addressLines": [null],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-28T19:48:31.948Z",
"fieldId": "7b56cdae-fbbc-4dd4-996e-6214b590db4a",
"isDerived": false,
"location": "hyderabad, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, Srilanka"
},
"residentialType": "HOME",
"source": "p2",
"standardized": false,
"updatedDate": "2023-06-28T19:48:31.948Z"
}, {
"addressLines": ["raddison,Business Park,cly B-"],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-28T19:48:31.948Z",
"fieldId": "27e67381-c688-4879-a0a4-319ae051dca8",
"isDerived": false,
"location": "hyderabad, TN, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, TN, Srilanka",
"state": "TN"
},
"residentialType": "WORK",
"source": "p2",
"standardized": false,
"state": "TN",
"updatedDate": "2023-06-28T19:48:31.948Z",
"zipCode": "02-583"
}, {
"addressLines": [null],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2021-11-26T14:49:38.305Z",
"fieldId": "0352928b-1a42-40d7-81fd-cf711a2ecda1",
"isDerived": false,
"location": "hyderabad, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, Srilanka"
},
"residentialType": "HOME",
"source": "p4",
"standardized": false,
"updatedDate": "2022-03-24T13:52:11.876Z"
}, {
"addressLines": ["raddison,Business Park,cly B-"],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2021-11-26T14:49:38.305Z",
"fieldId": "058fc73f-559c-414c-8dfb-9ad830fbbdf3",
"isDerived": false,
"location": "hyderabad, TN, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, TN, Srilanka",
"state": "TN"
},
"residentialType": "WORK",
"source": "p4",
"standardized": false,
"state": "TN",
"updatedDate": "2022-03-24T13:52:11.876Z",
"zipCode": "02-583"
}, {
"createdDate": "2021-09-03T16:36:57.802Z",
"fieldId": "c8c7e9c7-f7ad-4df4-90d2-5d07eeaa141f",
"isDerived": false,
"location": "hyderabad (HC) PL",
"original": {
"location": "hyderabad (HC) PL"
},
"source": "p4",
"standardized": false,
"updatedDate": "2022-03-24T13:52:11.876Z"
}, {
"country": "Srilanka",
"createdDate": "2020-01-29T16:14:00.050Z",
"fieldId": "4fbcc142-565d-4aa4-af00-6daa807dd951",
"isDerived": false,
"location": "Srilanka",
"locationIp": {
"city": "Amsterdam",
"continentCode": "EU",
"continentName": "Europe",
"country": "Netherlands",
"countryIsoCode": "NL",
"latitude": "52.3759",
"longitude": "4.8975",
"postalCode": "1012",
"registeredCountry": "United Kingdom",
"registeredCountryIsoCode": "GB",
"subDivisions": "North Holland",
"subDivisionsIsoCode": "NH",
"timeZone": "Europe/Amsterdam"
},
"original": {
"country": "Srilanka",
"location": "Srilanka"
},
"source": "p9",
"standardized": false,
"updatedDate": "2023-02-25T19:31:23.901Z"
}, {
"addressLines": [null],
"city": "hyderabad",
"country": "POL",
"createdDate": "2021-08-10T16:34:32.662Z",
"fieldId": "48318942-e268-4d66-8084-e19e302a73d7",
"isDerived": false,
"location": "hyderabad, POL",
"original": {
"city": "hyderabad",
"country": "POL",
"location": "hyderabad, POL"
},
"source": "p11",
"standardized": false,
"updatedDate": "2021-08-11T10:47:02.326Z"
}]
}
```
The corrected record that works
```
{
"_id": {
"oid": "1"
},
"cdc_pk": "45",
"addressLogs": [{
"createdDate": "2021-09-06T17:17:41.576Z",
"fieldId": "eb4b6bd9-1fc0-4d38-b2d4-4cba87bb65a4",
"isDerived": false,
"location": "hyderabad (HC) PL",
"original": {
"location": "hyderabad (HC) PL"
},
"source": "p2",
"standardized": false,
"updatedDate": "2023-06-29T20:44:26.788Z"
}, {
"addressLines": [""],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-29T20:44:26.788Z",
"fieldId": "1beefa35-7d08-4ca7-9fe1-88e59abb4c89",
"isDerived": false,
"location": "hyderabad, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, Srilanka"
},
"residentialType": "HOME",
"source": "p2",
"standardized": false,
"updatedDate": "2023-06-29T20:44:26.788Z"
}, {
"addressLines": ["raddison,Business Park,cly B-"],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-29T20:44:26.788Z",
"fieldId": "42720793-1920-4a35-9e3e-23f91e00341e",
"isDerived": false,
"location": "hyderabad, TN, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, TN, Srilanka",
"state": "TN"
},
"residentialType": "WORK",
"source": "p2",
"standardized": false,
"state": "TN",
"updatedDate": "2023-06-29T20:44:26.788Z",
"zipCode": "02-583"
}, {
"addressLines": [""],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-28T19:48:31.948Z",
"fieldId": "7b56cdae-fbbc-4dd4-996e-6214b590db4a",
"isDerived": false,
"location": "hyderabad, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, Srilanka"
},
"residentialType": "HOME",
"source": "p2",
"standardized": false,
"updatedDate": "2023-06-28T19:48:31.948Z"
}, {
"addressLines": ["raddison,Business Park,cly B-"],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2023-06-28T19:48:31.948Z",
"fieldId": "27e67381-c688-4879-a0a4-319ae051dca8",
"isDerived": false,
"location": "hyderabad, TN, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, TN, Srilanka",
"state": "TN"
},
"residentialType": "WORK",
"source": "p2",
"standardized": false,
"state": "TN",
"updatedDate": "2023-06-28T19:48:31.948Z",
"zipCode": "02-583"
}, {
"addressLines": [""],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2021-11-26T14:49:38.305Z",
"fieldId": "0352928b-1a42-40d7-81fd-cf711a2ecda1",
"isDerived": false,
"location": "hyderabad, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, Srilanka"
},
"residentialType": "HOME",
"source": "p4",
"standardized": false,
"updatedDate": "2022-03-24T13:52:11.876Z"
}, {
"addressLines": ["raddison,Business Park,cly B-"],
"city": "hyderabad",
"country": "Srilanka",
"createdDate": "2021-11-26T14:49:38.305Z",
"fieldId": "058fc73f-559c-414c-8dfb-9ad830fbbdf3",
"isDerived": false,
"location": "hyderabad, TN, Srilanka",
"original": {
"city": "hyderabad",
"country": "Srilanka",
"location": "hyderabad, TN, Srilanka",
"state": "TN"
},
"residentialType": "WORK",
"source": "p4",
"standardized": false,
"state": "TN",
"updatedDate": "2022-03-24T13:52:11.876Z",
"zipCode": "02-583"
}, {
"createdDate": "2021-09-03T16:36:57.802Z",
"fieldId": "c8c7e9c7-f7ad-4df4-90d2-5d07eeaa141f",
"isDerived": false,
"location": "hyderabad (HC) PL",
"original": {
"location": "hyderabad (HC) PL"
},
"source": "p4",
"standardized": false,
"updatedDate": "2022-03-24T13:52:11.876Z"
}, {
"country": "Srilanka",
"createdDate": "2020-01-29T16:14:00.050Z",
"fieldId": "4fbcc142-565d-4aa4-af00-6daa807dd951",
"isDerived": false,
"location": "Srilanka",
"locationIp": {
"city": "Amsterdam",
"continentCode": "EU",
"continentName": "Europe",
"country": "Netherlands",
"countryIsoCode": "NL",
"latitude": "52.3759",
"longitude": "4.8975",
"postalCode": "1012",
"registeredCountry": "United Kingdom",
"registeredCountryIsoCode": "GB",
"subDivisions": "North Holland",
"subDivisionsIsoCode": "NH",
"timeZone": "Europe/Amsterdam"
},
"original": {
"country": "Srilanka",
"location": "Srilanka"
},
"source": "p9",
"standardized": false,
"updatedDate": "2023-02-25T19:31:23.901Z"
}, {
"addressLines": [""],
"city": "hyderabad",
"country": "POL",
"createdDate": "2021-08-10T16:34:32.662Z",
"fieldId": "48318942-e268-4d66-8084-e19e302a73d7",
"isDerived": false,
"location": "hyderabad, POL",
"original": {
"city": "hyderabad",
"country": "POL",
"location": "hyderabad, POL"
},
"source": "p11",
"standardized": false,
"updatedDate": "2021-08-11T10:47:02.326Z"
}]
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]