This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit efa7eaea193527bb46b9d467609463d5c09566b6 Author: Ian Maxon <[email protected]> AuthorDate: Mon Aug 23 09:44:24 2021 -0700 [ASTERIXDB-2953][RT] Fix large buffer read bug - user mode changes: no - storage format changes: no - interface changes: no Dereference the memoryview when reallocating the bytearray instead of just setting it to None. Also add a test to check it. Change-Id: Ic95e592b42139b4750af8bb20291f926b3a973e1 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12943 Reviewed-by: Ian Maxon <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> --- .../asterix-app/src/main/resources/entrypoint.py | 6 +- .../src/test/resources/TweetSent/roundtrip.py | 3 + .../big_object_pyudf/big_object_pyudf.1.ddl.sqlpp | 82 ++++++++++++++++++++++ .../big_object_pyudf.2.update.sqlpp | 24 +++++++ .../big_object_pyudf/big_object_pyudf.3.lib.sqlpp | 19 +++++ .../big_object_pyudf/big_object_pyudf.4.ddl.sqlpp | 22 ++++++ .../big_object_pyudf.5.query.sqlpp | 32 +++++++++ .../external-library/big_object_pyudf/join.1.adm | 1 + .../resources/runtimets/testsuite_it_python.xml | 7 +- 9 files changed, 192 insertions(+), 4 deletions(-) diff --git a/asterixdb/asterix-app/src/main/resources/entrypoint.py b/asterixdb/asterix-app/src/main/resources/entrypoint.py index aba4f29..7bad7ef 100755 --- a/asterixdb/asterix-app/src/main/resources/entrypoint.py +++ b/asterixdb/asterix-app/src/main/resources/entrypoint.py @@ -246,11 +246,11 @@ class Wrapper(object): return pos += read while pos < self.sz: - vszchunk = sys.stdin.buffer.read1() + vszchunk = sys.stdin.buffer.read1(FRAMESZ) if len(vszchunk) == 0: self.alive = False return - self.readview = None + self.readview.release() self.readbuf.extend(vszchunk) self.readview = memoryview(self.readbuf) pos += len(vszchunk) @@ -258,7 +258,7 @@ class Wrapper(object): self.unpacked_msg = list(self.unpacker) self.msg_type = MessageType(self.unpacked_msg[0]) self.type_handler[self.msg_type](self) - except BaseException as e: + except BaseException: self.handle_error(traceback.format_exc()) def send_msg(self): diff --git a/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py b/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py index 9058a01..0d52f01 100644 --- a/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py +++ b/asterixdb/asterix-app/src/test/resources/TweetSent/roundtrip.py @@ -25,6 +25,9 @@ class Tests(object): def roundtrip(self, *args): return args + def roundstr(self, arg): + return arg + def warning(self): raise ArithmeticError("oof") diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.1.ddl.sqlpp new file mode 100644 index 0000000..abe0710 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.1.ddl.sqlpp @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +* Description : Create an external dataset that contains long comments fields, 10% of the records have a 32K size comments. +* This will trigger into the VSizeFrame path +* Expected Res : Success +* Date : Jun 16 2015 +*/ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.LineType as + closed { + l_orderkey : integer, + l_partkey : integer, + l_suppkey : integer, + l_linenumber : integer, + l_quantity : double, + l_extendedprice : double, + l_discount : double, + l_tax : double, + l_returnflag : string, + l_linestatus : string, + l_shipdate : string, + l_commitdate : string, + l_receiptdate : string, + l_shipinstruct : string, + l_shipmode : string, + l_comment : string +}; + +create type test.OrderType as + closed { + o_orderkey : integer, + o_custkey : integer, + o_orderstatus : string, + o_totalprice : double, + o_orderdate : string, + o_orderpriority : string, + o_clerk : string, + o_shippriority : integer, + o_comment : string +}; + +create type test.CustomerType as + closed { + c_custkey : integer, + c_name : string, + c_address : string, + c_nationkey : integer, + c_phone : string, + c_acctbal : double, + c_mktsegment : string, + c_comment : string +}; + +create external dataset Line(LineType) using `localfs`((`path`=`asterix_nc1://data/big-object/lineitem.tbl.big`),(`input-format`=`text-input-format`),(`format`=`delimited-text`),(`delimiter`=`|`)); + +create external dataset `Order`(OrderType) using `localfs`((`path`=`asterix_nc1://data/big-object/order.tbl.big`),(`input-format`=`text-input-format`),(`format`=`delimited-text`),(`delimiter`=`|`)); + +create external dataset Customer(CustomerType) using `localfs`((`path`=`asterix_nc1://data/big-object/customer.tbl.big`),(`input-format`=`text-input-format`),(`format`=`delimited-text`),(`delimiter`=`|`)); + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.2.update.sqlpp new file mode 100644 index 0000000..00a13e7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.2.update.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +* Description : Create an external dataset that contains long comments fields, 10% of the records have a 32K size comments. +* This will trigger into the VSizeFrame path +* Expected Res : Success +* Date : Jun 16 2015 +*/ diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.3.lib.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.3.lib.sqlpp new file mode 100644 index 0000000..7167161 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.3.lib.sqlpp @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +install test testlib python admin admin target/TweetSent.pyz diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.4.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.4.ddl.sqlpp new file mode 100644 index 0000000..28d8a90 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.4.ddl.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + USE test; + +create function roundtrip(s) as "roundtrip", + "Tests.roundstr" at testlib; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.5.query.sqlpp new file mode 100644 index 0000000..f04a02c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-library/big_object_pyudf/big_object_pyudf.5.query.sqlpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +* Description : Create an external dataset that contains long comments fields, 10% of the records have a 32K size comments. + This will trigger into the VSizeFrame path +* Expected Res : Success +* Date : Jun 16 2015 +*/ + +USE test; + + +SELECT ROUND(AVG(`string-length`(roundtrip(c.c_comment))),3) AS len_c_comment, ROUND(AVG(`string-length`(roundtrip(o.o_comment))),3) AS len_o_comment +FROM Customer AS c, + `Order` AS o +WHERE c.c_custkey = o.o_custkey; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/big_object_pyudf/join.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/big_object_pyudf/join.1.adm new file mode 100644 index 0000000..dfe2582 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-library/big_object_pyudf/join.1.adm @@ -0,0 +1 @@ +{ "len_c_comment": 2972.995, "len_o_comment": 375.386 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml index 35bec85..5fc7316 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_it_python.xml @@ -57,7 +57,7 @@ result[0].append(self.next_tuple(*arg, key=self.mid)) File "entrypoint.py", line 99, in next_tuple return self.wrapped_fns[key](*args) - File "site-packages/roundtrip.py", line 29, in warning + File "site-packages/roundtrip.py", line 32, in warning raise ArithmeticError("oof") ArithmeticError: oof (in line 28, at column 1)</expected-warn> @@ -80,5 +80,10 @@ ArithmeticError: oof <expected-warn>ASX0201: External UDF returned exception. Returned exception was: java.io.IOException: Python process exited with code: 1 (in line 23, at column 1)</expected-warn> </compilation-unit> </test-case> + <test-case FilePath="external-library"> + <compilation-unit name="big_object_pyudf"> + <output-dir compare="Text">big_object_pyudf</output-dir> + </compilation-unit> + </test-case> </test-group> </test-suite>
