[
https://issues.apache.org/jira/browse/HIVE-16219?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15955003#comment-15955003
]
anishek commented on HIVE-16219:
--------------------------------
[~vgumashta] i was just looking at the commit in apache / master and there
seems to be additional changes in root pom.xml which are not part of the patch,
can you please tell me why that was required and if not how do i go about
fixing this, do i create another jira for it ?
cc [~thejas]/[~sushanth]
> metastore notification_log contains serialized message with non functional
> fields
> ----------------------------------------------------------------------------------
>
> Key: HIVE-16219
> URL: https://issues.apache.org/jira/browse/HIVE-16219
> Project: Hive
> Issue Type: Bug
> Components: Metastore
> Affects Versions: 2.2.0
> Reporter: anishek
> Assignee: anishek
> Fix For: 2.3.0, 3.0.0
>
> Attachments: HIVE-16219.3.patch
>
>
> the event notification logs stored in hive metastore have json serialized
> messages stored in NOTIFICATION_LOG table, these messages also store the
> serialized Thrift API objects in them. when doing a reply dump we are however
> serializing both the metadata for replication event + event Message +
> additional helper method getters representing the thrift objects.
> We should only serialize metadata for replication event + event Message
> for ex for create table :
> {code}
> {
> "eventType": "CREATE_TABLE",
> "server": "",
> "servicePrincipal": "",
> "db": "default",
> "table": "a",
> "tableObjJson":
> "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
> "timestamp": 1489552350,
> "files": [],
> "tableObj": {
> "tableName": "a",
> "dbName": "default",
> "owner": "anagarwal",
> "createTime": 1489552350,
> "lastAccessTime": 0,
> "retention": 0,
> "sd": {
> "cols": [
> {
> "name": "name",
> "type": "string",
> "comment": null,
> "setName": true,
> "setType": true,
> "setComment": false
> }
> ],
> "location": "file:/tmp/warehouse/a",
> "inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
> "outputFormat":
> "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
> "compressed": false,
> "numBuckets": -1,
> "serdeInfo": {
> "name": null,
> "serializationLib":
> "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
> "parameters": {
> "serialization.format": "\n",
> "field.delim": "\n"
> },
> "setName": false,
> "parametersSize": 2,
> "setParameters": true,
> "setSerializationLib": true
> },
> "bucketCols": [],
> "sortCols": [],
> "parameters": {},
> "skewedInfo": {
> "skewedColNames": [],
> "skewedColValues": [],
> "skewedColValueLocationMaps": {},
> "setSkewedColNames": true,
> "setSkewedColValues": true,
> "setSkewedColValueLocationMaps": true,
> "skewedColNamesSize": 0,
> "skewedColNamesIterator": [],
> "skewedColValuesSize": 0,
> "skewedColValuesIterator": [],
> "skewedColValueLocationMapsSize": 0
> },
> "storedAsSubDirectories": false,
> "setSkewedInfo": true,
> "parametersSize": 0,
> "colsSize": 1,
> "setParameters": true,
> "setLocation": true,
> "setInputFormat": true,
> "setCols": true,
> "setOutputFormat": true,
> "setSerdeInfo": true,
> "setBucketCols": true,
> "setSortCols": true,
> "colsIterator": [
> {
> "name": "name",
> "type": "string",
> "comment": null,
> "setName": true,
> "setType": true,
> "setComment": false
> }
> ],
> "bucketColsSize": 0,
> "bucketColsIterator": [],
> "sortColsSize": 0,
> "sortColsIterator": [],
> "setStoredAsSubDirectories": true,
> "setCompressed": true,
> "setNumBuckets": true
> },
> "partitionKeys": [],
> "parameters": {
> "totalSize": "0",
> "EXTERNAL": "TRUE",
> "numRows": "0",
> "rawDataSize": "0",
> "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}",
> "numFiles": "0",
> "transient_lastDdlTime": "1489552350"
> },
> "viewOriginalText": null,
> "viewExpandedText": null,
> "tableType": "EXTERNAL_TABLE",
> "privileges": {
> "userPrivileges": {
> "anagarwal": [
> {
> "privilege": "INSERT",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> },
> {
> "privilege": "SELECT",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> },
> {
> "privilege": "UPDATE",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> },
> {
> "privilege": "DELETE",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> }
> ]
> },
> "groupPrivileges": null,
> "rolePrivileges": null,
> "rolePrivilegesSize": 0,
> "setUserPrivileges": true,
> "setGroupPrivileges": false,
> "setRolePrivileges": false,
> "userPrivilegesSize": 1,
> "groupPrivilegesSize": 0
> },
> "temporary": false,
> "rewriteEnabled": false,
> "setTableName": true,
> "setDbName": true,
> "setOwner": true,
> "setViewOriginalText": false,
> "setViewExpandedText": false,
> "setTableType": true,
> "setPrivileges": true,
> "setCreateTime": true,
> "setLastAccessTime": true,
> "setRetention": true,
> "partitionKeysIterator": [],
> "parametersSize": 7,
> "setTemporary": true,
> "setRewriteEnabled": false,
> "setParameters": true,
> "setPartitionKeys": true,
> "setSd": true,
> "partitionKeysSize": 0
> }
> }
> {code}
> it should only be the json message required as :
> {code}
> {
> "eventType": "CREATE_TABLE",
> "server": "",
> "servicePrincipal": "",
> "db": "default",
> "table": "a",
> "tableObjJson":
> "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
> "timestamp": 1489552350,
> "files": [],
> }
> {code}
> this will require adding serialization features to mapper use such that it
> only serializes the annotated fields.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)