[ https://issues.apache.org/jira/browse/FLINK-20253?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
zouyunhe updated FLINK-20253: ----------------------------- Description: We run a flink job and the input data is json format, and found the exception will throw while non utf8 string flows in, the exception as below {code:java} 2020-06-15 20:55:48,777 WARN org.apache.flink.formats.json.JsonRowDeserializationSchema - Failed to deserialize JSON object: {"rip":"127.0.0.1","rtime":"2020-06-15 16:43:46","rchannel":"http_server","be_time ":1592210626,"be_ip":"127.0.0.1","uid":0,"activity_id":"{\"page\":\"share-video\",\"event\":\"testBClickVideo\",\"click_post_id\":\"6817589584893303314\",\"owner\":\"others\",\"currentUrl\":\"https:\/\/l.c.cc\/v\/IWB\",\"af_b1\":\"20459_ref\",\"af_sub2\":\"1_2971\",\"msg_text\":\"1\",\"share_text1\":\"100 cel✌❤\",\"share_text2\":\"5 teenal\",\"share_test\":2,\"e\":\"detail\",\"videoType\":\"playing\",\"req_id\":\"4C3BCE7A-AEE4-11EA-83BC-8397E4C235C9\",\"networkType\":\"3g\"}","country_code":"ID","os":"Mozilla\/5.0 (Li nux; Android 7.0; L52F) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/79.0.3945.93 Mobile Safari\/537.36","recv_time":1592210626,"remark":"","client_ip":"127.0.0.1"} org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParseException: Invalid UTF-8 start byte 0xbe at [Source: (byte[])"{"rip":"127.0.0.1","rtime":"2020-06-15 16:43:46","rchannel":"http_server","be_time":1592210626,"be_ip":"127.0.0.1","uid":0,"activity_id":"{\"page\":\"share-video\",\"event\":\"testBClic kVideo\",\"click_post_id\":\"6817589584893303314\",\"owner\":\"others\",\"currentUrl\":\"https:\/\/l.c.cc\/v\/IWB\",\"af_sub1\":\"20459_ref\",\"af_sub2\":\"1_2971\",\"msg_text\":\"1\",\"share_text1\":\ "100 cel✌❤\",\"share_text2\":\"5 teenal"[truncated 397 bytes]; line: 1, column: 406] at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1840) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:712) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidInitial(UTF8StreamJsonParser.java:3569) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidChar(UTF8StreamJsonParser.java:3565) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishString2(UTF8StreamJsonParser.java:2511) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishAndReturnString(UTF8StreamJsonParser.java:2437) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser.getText(UTF8StreamJsonParser.java:293) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:267) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper._readTreeAndClose(ObjectMapper.java:4254) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper.readTree(ObjectMapper.java:2725) at org.apache.flink.formats.json.JsonRowDeserializationSchema.deserialize(JsonRowDeserializationSchema.java:166) {code} was: We run a flink job and the input data is json format, and found the exception will throw while non utf8 string flows in, the exception as below {code:java} 2020-06-15 20:55:48,777 WARN org.apache.flink.formats.json.JsonRowDeserializationSchema - Failed to deserialize JSON object: {"rip":"127.0.0.1","rtime":"2020-06-15 16:43:46","rchannel":"http_server","be_time ":1592210626,"be_ip":"127.0.0.1","uid":0,"activity_id":"{\"page\":\"share-video\",\"event\":\"testBClickVideo\",\"click_post_id\":\"6817589584893303314\",\"owner\":\"others\",\"currentUrl\":\"https:\/\/l.lik ee.video\/v\/IysQWB\",\"af_sub1\":\"20459_ref\",\"af_sub2\":\"1_2971\",\"msg_text\":\"1\",\"share_text1\":\"100 ner yg gak cek Nyesel✌❤\",\"share_text2\":\"Ayok Guys Ikuti syarat nya dan tag 5 temen kalian semog a kalian Jadi Likers terkenal\",\"share_test\":2,\"e\":\"detail\",\"videoType\":\"playing\",\"req_id\":\"4C3BCE7A-AEE4-11EA-83BC-8397E4C235C9\",\"networkType\":\"3g\"}","country_code":"ID","os":"Mozilla\/5.0 (Li nux; Android 7.0; L52F) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/79.0.3945.93 Mobile Safari\/537.36","recv_time":1592210626,"remark":"","client_ip":"127.0.0.1"} org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParseException: Invalid UTF-8 start byte 0xbe at [Source: (byte[])"{"rip":"127.0.0.1","rtime":"2020-06-15 16:43:46","rchannel":"http_server","be_time":1592210626,"be_ip":"127.0.0.1","uid":0,"activity_id":"{\"page\":\"share-video\",\"event\":\"testBClic kVideo\",\"click_post_id\":\"6817589584893303314\",\"owner\":\"others\",\"currentUrl\":\"https:\/\/l.likee.video\/v\/IysQWB\",\"af_sub1\":\"20459_ref\",\"af_sub2\":\"1_2971\",\"msg_text\":\"1\",\"share_text1\":\ "100 ner yg gak cek Nyesel✌❤\",\"share_text2\":\"Ayok Guys Ikuti syarat nya dan tag 5 teme"[truncated 397 bytes]; line: 1, column: 406] at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1840) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:712) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidInitial(UTF8StreamJsonParser.java:3569) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidChar(UTF8StreamJsonParser.java:3565) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishString2(UTF8StreamJsonParser.java:2511) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishAndReturnString(UTF8StreamJsonParser.java:2437) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser.getText(UTF8StreamJsonParser.java:293) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:267) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper._readTreeAndClose(ObjectMapper.java:4254) at org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper.readTree(ObjectMapper.java:2725) at org.apache.flink.formats.json.JsonRowDeserializationSchema.deserialize(JsonRowDeserializationSchema.java:166) {code} > `JsonRowDeserializationSchema` can not deserialize non utf-8 json string > ------------------------------------------------------------------------ > > Key: FLINK-20253 > URL: https://issues.apache.org/jira/browse/FLINK-20253 > Project: Flink > Issue Type: Bug > Components: Formats (JSON, Avro, Parquet, ORC, SequenceFile), Table > SQL / Ecosystem > Affects Versions: 1.11.0 > Reporter: zouyunhe > Priority: Major > > We run a flink job and the input data is json format, and found the exception > will throw while non utf8 string flows in, the exception as below > {code:java} > 2020-06-15 20:55:48,777 WARN > org.apache.flink.formats.json.JsonRowDeserializationSchema - Failed to > deserialize JSON object: {"rip":"127.0.0.1","rtime":"2020-06-15 > 16:43:46","rchannel":"http_server","be_time > ":1592210626,"be_ip":"127.0.0.1","uid":0,"activity_id":"{\"page\":\"share-video\",\"event\":\"testBClickVideo\",\"click_post_id\":\"6817589584893303314\",\"owner\":\"others\",\"currentUrl\":\"https:\/\/l.c.cc\/v\/IWB\",\"af_b1\":\"20459_ref\",\"af_sub2\":\"1_2971\",\"msg_text\":\"1\",\"share_text1\":\"100 > cel✌❤\",\"share_text2\":\"5 > teenal\",\"share_test\":2,\"e\":\"detail\",\"videoType\":\"playing\",\"req_id\":\"4C3BCE7A-AEE4-11EA-83BC-8397E4C235C9\",\"networkType\":\"3g\"}","country_code":"ID","os":"Mozilla\/5.0 > (Li > nux; Android 7.0; L52F) AppleWebKit\/537.36 (KHTML, like Gecko) > Chrome\/79.0.3945.93 Mobile > Safari\/537.36","recv_time":1592210626,"remark":"","client_ip":"127.0.0.1"} > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParseException: > Invalid UTF-8 start byte 0xbe > at [Source: (byte[])"{"rip":"127.0.0.1","rtime":"2020-06-15 > 16:43:46","rchannel":"http_server","be_time":1592210626,"be_ip":"127.0.0.1","uid":0,"activity_id":"{\"page\":\"share-video\",\"event\":\"testBClic > kVideo\",\"click_post_id\":\"6817589584893303314\",\"owner\":\"others\",\"currentUrl\":\"https:\/\/l.c.cc\/v\/IWB\",\"af_sub1\":\"20459_ref\",\"af_sub2\":\"1_2971\",\"msg_text\":\"1\",\"share_text1\":\ > "100 cel✌❤\",\"share_text2\":\"5 teenal"[truncated 397 bytes]; line: 1, > column: 406] > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1840) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:712) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidInitial(UTF8StreamJsonParser.java:3569) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidChar(UTF8StreamJsonParser.java:3565) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishString2(UTF8StreamJsonParser.java:2511) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishAndReturnString(UTF8StreamJsonParser.java:2437) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.json.UTF8StreamJsonParser.getText(UTF8StreamJsonParser.java:293) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:267) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper._readTreeAndClose(ObjectMapper.java:4254) > at > org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper.readTree(ObjectMapper.java:2725) > at > org.apache.flink.formats.json.JsonRowDeserializationSchema.deserialize(JsonRowDeserializationSchema.java:166) > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)