chandu-1101 commented on issue #9141:
URL: https://github.com/apache/hudi/issues/9141#issuecomment-1643139279

   
   The fix is to change the below 
   
   ```
   "addressLines": [null],
   ```
   to
   
   ```
   "addressLines": [""],
   ```
   in the source JSON. 
   
   code to reproduce the issue. 
   ```
   val df1 = spark.read.json(Seq(json1).toDS)
       import org.apache.spark.sql.{Column, DataFrame}
       import org.apache.commons.lang3.ClassUtils.getCanonicalName
       import org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
       import org.apache.hudi.{DataSourceWriteOptions, QuickstartUtils}
       import org.apache.hudi.common.model.{HoodieAvroPayload, 
HoodieFileFormat, WriteOperationType}
       import org.apache.hudi.common.table.HoodieTableConfig
       import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig}
       import org.apache.hudi.keygen.constant.KeyGeneratorOptions
   
       import java.util
       import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
       import org.apache.hudi.keygen.{NonpartitionedKeyGenerator, 
SimpleKeyGenerator}
       import org.apache.spark.sql.SaveMode
       import org.apache.spark.sql.functions.{col, hash, lit}
       import org.apache.hudi.QuickstartUtils._
       
       df1.write.format("hudi")
         .options(getQuickstartWriteConfigs)
         
.option("hoodie.datasource.hive_sync.partition_extractor_class","org.apache.hudi.hive.NonPartitionedExtractor")
         .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "cdc_pk")
         .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "_id.oid")
         .option(HoodieWriteConfig.TABLE_NAME, "GE11")
         .mode(SaveMode.Overwrite)
         .save("s3://bucket/snapshots-hudi/ge11-drop/snapshot1");
   ```
   
   
   
   
   
   
   The record that doesnt work
   ```
   {
        "_id": {
                "oid": "1"
        },
        "cdc_pk": "45",
        "addressLogs": [{
                "createdDate": "2021-09-06T17:17:41.576Z",
                "fieldId": "eb4b6bd9-1fc0-4d38-b2d4-4cba87bb65a4",
                "isDerived": false,
                "location": "hyderabad (HC) PL",
                "original": {
                        "location": "hyderabad (HC) PL"
                },
                "source": "p2",
                "standardized": false,
                "updatedDate": "2023-06-29T20:44:26.788Z"
        }, {
                "addressLines": [null],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-29T20:44:26.788Z",
                "fieldId": "1beefa35-7d08-4ca7-9fe1-88e59abb4c89",
                "isDerived": false,
                "location": "hyderabad, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, Srilanka"
                },
                "residentialType": "HOME",
                "source": "p2",
                "standardized": false,
                "updatedDate": "2023-06-29T20:44:26.788Z"
        }, {
                "addressLines": ["raddison,Business Park,cly B-"],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-29T20:44:26.788Z",
                "fieldId": "42720793-1920-4a35-9e3e-23f91e00341e",
                "isDerived": false,
                "location": "hyderabad, TN, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, TN, Srilanka",
                        "state": "TN"
                },
                "residentialType": "WORK",
                "source": "p2",
                "standardized": false,
                "state": "TN",
                "updatedDate": "2023-06-29T20:44:26.788Z",
                "zipCode": "02-583"
        }, {
                "addressLines": [null],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-28T19:48:31.948Z",
                "fieldId": "7b56cdae-fbbc-4dd4-996e-6214b590db4a",
                "isDerived": false,
                "location": "hyderabad, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, Srilanka"
                },
                "residentialType": "HOME",
                "source": "p2",
                "standardized": false,
                "updatedDate": "2023-06-28T19:48:31.948Z"
        }, {
                "addressLines": ["raddison,Business Park,cly B-"],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-28T19:48:31.948Z",
                "fieldId": "27e67381-c688-4879-a0a4-319ae051dca8",
                "isDerived": false,
                "location": "hyderabad, TN, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, TN, Srilanka",
                        "state": "TN"
                },
                "residentialType": "WORK",
                "source": "p2",
                "standardized": false,
                "state": "TN",
                "updatedDate": "2023-06-28T19:48:31.948Z",
                "zipCode": "02-583"
        }, {
                "addressLines": [null],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2021-11-26T14:49:38.305Z",
                "fieldId": "0352928b-1a42-40d7-81fd-cf711a2ecda1",
                "isDerived": false,
                "location": "hyderabad, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, Srilanka"
                },
                "residentialType": "HOME",
                "source": "p4",
                "standardized": false,
                "updatedDate": "2022-03-24T13:52:11.876Z"
        }, {
                "addressLines": ["raddison,Business Park,cly B-"],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2021-11-26T14:49:38.305Z",
                "fieldId": "058fc73f-559c-414c-8dfb-9ad830fbbdf3",
                "isDerived": false,
                "location": "hyderabad, TN, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, TN, Srilanka",
                        "state": "TN"
                },
                "residentialType": "WORK",
                "source": "p4",
                "standardized": false,
                "state": "TN",
                "updatedDate": "2022-03-24T13:52:11.876Z",
                "zipCode": "02-583"
        }, {
                "createdDate": "2021-09-03T16:36:57.802Z",
                "fieldId": "c8c7e9c7-f7ad-4df4-90d2-5d07eeaa141f",
                "isDerived": false,
                "location": "hyderabad (HC) PL",
                "original": {
                        "location": "hyderabad (HC) PL"
                },
                "source": "p4",
                "standardized": false,
                "updatedDate": "2022-03-24T13:52:11.876Z"
        }, {
                "country": "Srilanka",
                "createdDate": "2020-01-29T16:14:00.050Z",
                "fieldId": "4fbcc142-565d-4aa4-af00-6daa807dd951",
                "isDerived": false,
                "location": "Srilanka",
                "locationIp": {
                        "city": "Amsterdam",
                        "continentCode": "EU",
                        "continentName": "Europe",
                        "country": "Netherlands",
                        "countryIsoCode": "NL",
                        "latitude": "52.3759",
                        "longitude": "4.8975",
                        "postalCode": "1012",
                        "registeredCountry": "United Kingdom",
                        "registeredCountryIsoCode": "GB",
                        "subDivisions": "North Holland",
                        "subDivisionsIsoCode": "NH",
                        "timeZone": "Europe/Amsterdam"
                },
                "original": {
                        "country": "Srilanka",
                        "location": "Srilanka"
                },
                "source": "p9",
                "standardized": false,
                "updatedDate": "2023-02-25T19:31:23.901Z"
        }, {
                "addressLines": [null],
                "city": "hyderabad",
                "country": "POL",
                "createdDate": "2021-08-10T16:34:32.662Z",
                "fieldId": "48318942-e268-4d66-8084-e19e302a73d7",
                "isDerived": false,
                "location": "hyderabad, POL",
                "original": {
                        "city": "hyderabad",
                        "country": "POL",
                        "location": "hyderabad, POL"
                },
                "source": "p11",
                "standardized": false,
                "updatedDate": "2021-08-11T10:47:02.326Z"
        }]
   }
   ```
   
   The corrected record that works
   
   ```
   {
        "_id": {
                "oid": "1"
        },
        "cdc_pk": "45",
        "addressLogs": [{
                "createdDate": "2021-09-06T17:17:41.576Z",
                "fieldId": "eb4b6bd9-1fc0-4d38-b2d4-4cba87bb65a4",
                "isDerived": false,
                "location": "hyderabad (HC) PL",
                "original": {
                        "location": "hyderabad (HC) PL"
                },
                "source": "p2",
                "standardized": false,
                "updatedDate": "2023-06-29T20:44:26.788Z"
        }, {
                "addressLines": [""],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-29T20:44:26.788Z",
                "fieldId": "1beefa35-7d08-4ca7-9fe1-88e59abb4c89",
                "isDerived": false,
                "location": "hyderabad, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, Srilanka"
                },
                "residentialType": "HOME",
                "source": "p2",
                "standardized": false,
                "updatedDate": "2023-06-29T20:44:26.788Z"
        }, {
                "addressLines": ["raddison,Business Park,cly B-"],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-29T20:44:26.788Z",
                "fieldId": "42720793-1920-4a35-9e3e-23f91e00341e",
                "isDerived": false,
                "location": "hyderabad, TN, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, TN, Srilanka",
                        "state": "TN"
                },
                "residentialType": "WORK",
                "source": "p2",
                "standardized": false,
                "state": "TN",
                "updatedDate": "2023-06-29T20:44:26.788Z",
                "zipCode": "02-583"
        }, {
                "addressLines": [""],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-28T19:48:31.948Z",
                "fieldId": "7b56cdae-fbbc-4dd4-996e-6214b590db4a",
                "isDerived": false,
                "location": "hyderabad, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, Srilanka"
                },
                "residentialType": "HOME",
                "source": "p2",
                "standardized": false,
                "updatedDate": "2023-06-28T19:48:31.948Z"
        }, {
                "addressLines": ["raddison,Business Park,cly B-"],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2023-06-28T19:48:31.948Z",
                "fieldId": "27e67381-c688-4879-a0a4-319ae051dca8",
                "isDerived": false,
                "location": "hyderabad, TN, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, TN, Srilanka",
                        "state": "TN"
                },
                "residentialType": "WORK",
                "source": "p2",
                "standardized": false,
                "state": "TN",
                "updatedDate": "2023-06-28T19:48:31.948Z",
                "zipCode": "02-583"
        }, {
                "addressLines": [""],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2021-11-26T14:49:38.305Z",
                "fieldId": "0352928b-1a42-40d7-81fd-cf711a2ecda1",
                "isDerived": false,
                "location": "hyderabad, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, Srilanka"
                },
                "residentialType": "HOME",
                "source": "p4",
                "standardized": false,
                "updatedDate": "2022-03-24T13:52:11.876Z"
        }, {
                "addressLines": ["raddison,Business Park,cly B-"],
                "city": "hyderabad",
                "country": "Srilanka",
                "createdDate": "2021-11-26T14:49:38.305Z",
                "fieldId": "058fc73f-559c-414c-8dfb-9ad830fbbdf3",
                "isDerived": false,
                "location": "hyderabad, TN, Srilanka",
                "original": {
                        "city": "hyderabad",
                        "country": "Srilanka",
                        "location": "hyderabad, TN, Srilanka",
                        "state": "TN"
                },
                "residentialType": "WORK",
                "source": "p4",
                "standardized": false,
                "state": "TN",
                "updatedDate": "2022-03-24T13:52:11.876Z",
                "zipCode": "02-583"
        }, {
                "createdDate": "2021-09-03T16:36:57.802Z",
                "fieldId": "c8c7e9c7-f7ad-4df4-90d2-5d07eeaa141f",
                "isDerived": false,
                "location": "hyderabad (HC) PL",
                "original": {
                        "location": "hyderabad (HC) PL"
                },
                "source": "p4",
                "standardized": false,
                "updatedDate": "2022-03-24T13:52:11.876Z"
        }, {
                "country": "Srilanka",
                "createdDate": "2020-01-29T16:14:00.050Z",
                "fieldId": "4fbcc142-565d-4aa4-af00-6daa807dd951",
                "isDerived": false,
                "location": "Srilanka",
                "locationIp": {
                        "city": "Amsterdam",
                        "continentCode": "EU",
                        "continentName": "Europe",
                        "country": "Netherlands",
                        "countryIsoCode": "NL",
                        "latitude": "52.3759",
                        "longitude": "4.8975",
                        "postalCode": "1012",
                        "registeredCountry": "United Kingdom",
                        "registeredCountryIsoCode": "GB",
                        "subDivisions": "North Holland",
                        "subDivisionsIsoCode": "NH",
                        "timeZone": "Europe/Amsterdam"
                },
                "original": {
                        "country": "Srilanka",
                        "location": "Srilanka"
                },
                "source": "p9",
                "standardized": false,
                "updatedDate": "2023-02-25T19:31:23.901Z"
        }, {
                "addressLines": [""],
                "city": "hyderabad",
                "country": "POL",
                "createdDate": "2021-08-10T16:34:32.662Z",
                "fieldId": "48318942-e268-4d66-8084-e19e302a73d7",
                "isDerived": false,
                "location": "hyderabad, POL",
                "original": {
                        "city": "hyderabad",
                        "country": "POL",
                        "location": "hyderabad, POL"
                },
                "source": "p11",
                "standardized": false,
                "updatedDate": "2021-08-11T10:47:02.326Z"
        }]
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to