[ https://issues.apache.org/jira/browse/HIVE-9312?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14269704#comment-14269704 ]
Tom Snee commented on HIVE-9312: -------------------------------- nested.avsc: { "namespace": "com.example", "name": "BugTickler", "type": "record", "fields": [ { "name": "Records", "default": null, "type": [ "null", { "type": "array", "items": { "name": "Record", "type": "record", "fields": [ { "name": "ThreeDigits", "default": null, "type": ["null", "string"] }, { "name": "FourteenDigits", "default": null, "type": ["null", "string"] }, { "name": "ThirteenDigits", "default": null, "type": [ "null", { "type": "array", "items": "string" } ] }, { "name": "Events", "default": null, "type": [ "null", { "type": "array", "items": { "name": "Event", "type": "record", "fields": [ { "name": "Subject", "default": null, "type": [ "null", { "name": "CriticalSubject", "type": "record", "fields": [ { "name": "SubjectNumber", "default": null, "type": ["null", "string"] }, { "name": "FirstName", "default":null, "type": ["null", "string"] }, { "name": "LastName", "default":null, "type": ["null", "string"] }, { "name": "MiddleName", "default":null, "type": ["null", "string"] }, { "name": "BirthDtm", "default":null, "type": ["null", "string"] }, { "name": "SocialSecurityNumber", "default":null, "type": ["null", "string"] }, { "name": "GenderCode", "default":null, "type": ["null", "string"] }, { "name": "TypeCode", "default":null, "type": ["null", "string"] }, { "name": "CodeCode", "default":null, "type": ["null", "string"] }, { "name": "StreetAddress", "default":null, "type": ["null", "string"] }, { "name": "StreetAddress2", "default":null, "type": ["null", "string"] }, { "name": "City", "default":null, "type": ["null", "string"] }, { "name": "State", "default":null, "type": ["null", "string"] }, { "name": "StateCode", "default":null, "type": ["null", "string"] }, { "name": "ZipCode", "default":null, "type": ["null", "string"] }, { "name": "County", "default":null, "type": ["null", "string"] }, { "name": "CountyCode", "default":null, "type": ["null", "string"] }, { "name": "Country", "default":null, "type": ["null", "string"] }, { "name": "HomePhone", "default":null, "type": ["null", "string"] }, { "name": "BusinessPhone", "default":null, "type": ["null", "string"] }, { "name": "PrimaryLanguage", "default":null, "type": ["null", "string"] }, { "name": "MaritalStatusCode", "default":null, "type": ["null", "string"] }, { "name": "ReligionCode", "default":null, "type": ["null", "string"] }, { "name": "Citizenship", "default":null, "type": ["null", "string"] }, { "name": "Nationality", "default":null, "type": ["null", "string"] }, { "name": "DeathDtm", "default":null, "type": ["null", "string"] }, { "name": "DeathIndicatorCode", "default":null, "type": ["null", "string"] }, { "name": "AllergiesHint", "default":null, "type": ["null", "string"] }, { "name": "SubjectHint", "default":null, "type": ["null", "string"] } ] } ] }, { "name": "Groups", "default": null, "type": [ "null", { "type": "array", "items": { "name": "CriticalGroup", "type": "record", "fields": [ { "name": "SequenceNumber", "default": null, "type": ["null", "string"] }, { "name": "ElevenDigits", "default": null, "type": ["null", "string"] }, { "name": "EightDigits", "default": null, "type": ["null", "string"] }, { "name": "UniqueOrderNumber", "default": null, "type": ["null", "string"] }, { "name": "FiveDigits", "default": null, "type": ["null", "string"] }, { "name": "ServiceCodingSystem", "default": null, "type": ["null", "string"] }, { "name": "ServiceName", "default": null, "type": ["null", "string"] }, { "name": "ServiceSuggestionDtm", "default": null, "type": ["null", "string"] }, { "name": "ReportStatusDtm", "default": null, "type": ["null", "string"] }, { "name": "ResultStatusCode", "default": null, "type": ["null", "string"] }, { "name": "Note", "default": null, "type": ["null", "string"] }, { "name": "Results", "default": null, "type": [ "null", { "type": "array", "items": { "name": "CriticalResult", "type": "record", "fields": [ { "name": "SequenceNumber", "default": null, "type": ["null", "string"] }, { "name": "SuggestionTypeCode", "default": null, "type": ["null", "string"] }, { "name": "SuggestionTypeCodingSystemCode", "default": null, "type": ["null", "string"] }, { "name": "LoincSuggestionTypeCode", "default": null, "type": ["null", "string"] }, { "name": "SuggestionTypeName", "default": null, "type": ["null", "string"] }, { "name": "SuggestionValue", "default": null, "type": ["null", "string"] }, { "name": "UnitCode", "default": null, "type": ["null", "string"] }, { "name": "ConvertedSuggestionValue", "default": null, "type": ["null", "string"] }, { "name": "ConvertedUnitCode", "default": null, "type": ["null", "string"] }, { "name": "SuggestionDtm", "default": null, "type": ["null", "string"] }, { "name": "ResultStatusCode", "default": null, "type": ["null", "string"] }, { "name": "Note", "default": null, "type": ["null", "string"] }, { "name": "SuggestionGroupUniqueOrderNumber", "default": null, "type": ["null", "string"] } ] } } ] } ] } } ] } ] } } ] } ] } } ] }, { "name": "MasterSubjectNumber", "default": null, "type": ["null", "string"] }, { "name": "RecordUpdateDtm", "default": null, "type": ["null", "string"] } ] } > Literal string "\n" confuses Avro SerDe > --------------------------------------- > > Key: HIVE-9312 > URL: https://issues.apache.org/jira/browse/HIVE-9312 > Project: Hive > Issue Type: Bug > Components: Serializers/Deserializers > Affects Versions: 0.13.0 > Environment: Hortonworks Data Platform 2.1.2.1 on Centos 6.5 > Reporter: Tom Snee > > Avro files with string fields that contain a backslash followed by 'n' > confuse the Avro SerDe. > Steps to recreate: > 1. Put attached schema nested.avsc into HDFS under /user/someone. > 2. Convert attached JSON file example.json into Avro with avro-tools, like > so: "java -jar avro-tools-1.7.7.jar fromjson --schema-file nested.avsc > example.json > example.avro" > 3. Put example.avro into HDFS under /user/someone/avro-files. > 4. Create a Hive table with this statement: > CREATE EXTERNAL TABLE avro_table > ROW FORMAT SERDE > 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > STORED AS INPUTFORMAT > 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > OUTPUTFORMAT > 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > LOCATION > '/user/someone/avro-files/' > TBLPROPERTIES ( > 'avro.schema.url'='hdfs:///user/someone/nested.avsc' > ); > 5. Observe that "select * from avro_table;" returns one row, as expected. > 6. Observe that "select * from avro_table where > mastersubjectnumber='A12B3CDE-FGH4-5I67-89J0-KLMN1OPQ23R4';" returns 13 > garbled rows. -- This message was sent by Atlassian JIRA (v6.3.4#6332)