Yingyi Bu has submitted this change and it was merged. Change subject: ASTERIXDB-1478: fix the utf8 reader. ......................................................................
ASTERIXDB-1478: fix the utf8 reader. 1. Fix the ASTERIXDB-1478. 2. Add the utf8 testCases. Change-Id: Idb302dc604fcd71811de550d3d4bd727c81a13ee Reviewed-on: https://asterix-gerrit.ics.uci.edu/1077 Sonar-Qube: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Yingyi Bu <[email protected]> --- A asterixdb/asterix-app/data/adm-load/utf8.adm A asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql A asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java M asterixdb/asterix-external-data/src/test/resources/results/beer.txt 8 files changed, 198 insertions(+), 2 deletions(-) Approvals: Yingyi Bu: Looks good to me, approved Jenkins: Verified; No violations found diff --git a/asterixdb/asterix-app/data/adm-load/utf8.adm b/asterixdb/asterix-app/data/adm-load/utf8.adm new file mode 100644 index 0000000..2621bc4 --- /dev/null +++ b/asterixdb/asterix-app/data/adm-load/utf8.adm @@ -0,0 +1,100 @@ +{"id":"1","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"2","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"3","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"4","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"5","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"6","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"7","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"8","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"9","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"10","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"11","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"12","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"13","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"14","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"15","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"16","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"17","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"18","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"19","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"20","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"21","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"22","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"23","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"24","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"25","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"26","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"27","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"28","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"29","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"30","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"31","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"32","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"33","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"34","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"35","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"36","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"37","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"38","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"39","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"40","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"41","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"42","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"43","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"44","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"45","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"46","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"47","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"48","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"49","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"50","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"51","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"52","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"53","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"54","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"55","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"56","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"57","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"58","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"59","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"60","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"61","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"62","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"63","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"64","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"65","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"66","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"67","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"68","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"69","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"70","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"71","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"72","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"73","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"74","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"75","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"76","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"77","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"78","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"79","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"80","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"81","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"82","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"83","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"84","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"85","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"86","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"87","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"88","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"89","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"90","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"91","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"92","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"93","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"94","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"95","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"96","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"97","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"98","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"99","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} +{"id":"100","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"} diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql new file mode 100644 index 0000000..bcd3d46 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +drop dataverse test if exists; +create dataverse test +use dataverse test; + +create type DocType as open { + id: string, + description: string? +}; + +create dataset Doc (DocType) +primary key id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql new file mode 100644 index 0000000..4d4f4e5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * + * CSV file loading utf8 + * Expected result: success + * + */ + +use dataverse test; + +load dataset Doc +using localfs +(("path"="asterix_nc1://data/adm-load/utf8.adm"),("format"="adm")); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql new file mode 100644 index 0000000..95507bf --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse test; + +let $s := count( +for $i in dataset Doc +return $i) +return $s diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm new file mode 100644 index 0000000..29d6383 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm @@ -0,0 +1 @@ +100 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml index cf5bda3..749965e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml @@ -6560,6 +6560,11 @@ <output-dir compare="Text">adm_binary</output-dir> </compilation-unit> </test-case> + <test-case FilePath="load"> + <compilation-unit name="utf8"> + <output-dir compare="Text">utf8</output-dir> + </compilation-unit> + </test-case> </test-group> <test-group name="hints"> <test-case FilePath="hints"> diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java index 94333d1..8e166c0 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java @@ -38,6 +38,7 @@ private CharBuffer charBuffer = CharBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE); private CharsetDecoder decoder; private boolean done = false; + private boolean remaining = false; public AsterixInputStreamReader(AsterixInputStream in) { this.in = in; @@ -75,6 +76,7 @@ charBuffer.clear(); while (charBuffer.position() == 0) { if (byteBuffer.hasRemaining()) { + remaining = true; decoder.decode(byteBuffer, charBuffer, false); System.arraycopy(charBuffer.array(), 0, cbuf, offset, charBuffer.position()); if (charBuffer.position() > 0) { @@ -97,8 +99,13 @@ done = true; return len; } - byteBuffer.position(len); + if (remaining) { + byteBuffer.position(len + byteBuffer.position()); + } else { + byteBuffer.position(len); + } byteBuffer.flip(); + remaining = false; decoder.decode(byteBuffer, charBuffer, false); System.arraycopy(charBuffer.array(), 0, cbuf, offset, charBuffer.position()); } diff --git a/asterixdb/asterix-external-data/src/test/resources/results/beer.txt b/asterixdb/asterix-external-data/src/test/resources/results/beer.txt index bcb3631..5a7983d 100644 --- a/asterixdb/asterix-external-data/src/test/resources/results/beer.txt +++ b/asterixdb/asterix-external-data/src/test/resources/results/beer.txt @@ -1450,7 +1450,7 @@ { "name": "Baron Helles Bock", "abv": 6.4, "ibu": 0.0, "srm": 0.0, "upc": 0, "type": "beer", "brewery_id": "baron_brewing_company", "updated": "2010-07-22 20:00:20", "description": "The Helles-Bock is similar to a traditional Maibock. Bocks are traditionally brewed in the winter / early spring months and are served during the spring / early summer months. The Helles Bock has a copper golden color with a brilliant white head. The body showcases a clean sweet maltiness that is offset by just enough hops to balance it. Very smooth and easy, drinkable yet deceptive at 6.4%.\r\n\r\nAll ingredients for the beer are imported from Germany. Brewed in accordance to the German Beer Purity Law (Reinheitsgebot) of 1516.", "style": "German-Style Heller Bock/Maibock", "category": "German Lager" } { "id": "baron_brewing_company-baron_helles_bock", "flags": 0, "expiration": 0, "cas": 244367687683, "rev": 1, "vbid": 27, "dtype": 1 } "baron_brewing_company-baron_helles_bock" -{ "name": "Basil T's Brew Pub and Italian Grill", "city": "Toms River", "state": "New Jersey", "code": "8753", "country": "United States", "phone": "1-732-244-7566", "website": "", "type": "rewery", "updated": "2010-07-22 20:00:20", "description": "", "address": [ "1171 Hooper Avenue" ], "geo": { "accuracy": "RANGE_INTERPOLATED", "lat": 39.9767, "lon": -74.1829 } } +{ "name": "Basil T's Brew Pub and Italian Grill", "city": "Toms River", "state": "New Jersey", "code": "8753", "country": "United States", "phone": "1-732-244-7566", "website": "", "type": "brewery", "updated": "2010-07-22 20:00:20", "description": "", "address": [ "1171 Hooper Avenue" ], "geo": { "accuracy": "RANGE_INTERPOLATED", "lat": 39.9767, "lon": -74.1829 } } { "id": "basil_t_s_brew_pub_and_italian_grill", "flags": 0, "expiration": 0, "cas": 244364410882, "rev": 1, "vbid": 20, "dtype": 1 } "basil_t_s_brew_pub_and_italian_grill" { "name": "Nieuw Ligt Grand Cru 2006", "abv": 12.0, "ibu": 0.0, "srm": 0.0, "upc": 0, "type": "beer", "brewery_id": "stadsbrouwerij_de_hemel", "updated": "2010-07-22 20:00:20", "description": "" } -- To view, visit https://asterix-gerrit.ics.uci.edu/1077 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: Idb302dc604fcd71811de550d3d4bd727c81a13ee Gerrit-PatchSet: 12 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Wenhai Li <[email protected]> Gerrit-Reviewer: Chen Li <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Jianfeng Jia <[email protected]> Gerrit-Reviewer: Michael Blow <[email protected]> Gerrit-Reviewer: Till Westmann <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-Reviewer: Yingyi Bu <[email protected]> Gerrit-Reviewer: abdullah alamoudi <[email protected]>
