This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository
https://gitbox.apache.org/repos/asf/arrow-flight-sql-postgresql.git
The following commit(s) were added to refs/heads/main by this push:
new 640dcf4 Fix out of range access with 1M over records response (#155)
640dcf4 is described below
commit 640dcf46850837a323083945e790c3dac9073de6
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sun Oct 29 07:15:38 2023 +0900
Fix out of range access with 1M over records response (#155)
Closes GH-154
---
src/afs.cc | 49 ++++++++++++++++++++++++++-----------------------
test/helper/sandbox.rb | 10 ++++++++++
test/test-flight-sql.rb | 39 +++++++++++++++++++++++++++++++++++++++
3 files changed, 75 insertions(+), 23 deletions(-)
diff --git a/src/afs.cc b/src/afs.cc
index aa2df7a..cd76824 100644
--- a/src/afs.cc
+++ b/src/afs.cc
@@ -1032,15 +1032,16 @@ class ArrowArrayBuilder<ArrowType,
arrow::enable_if_has_c_type<ArrowType>>
arrow::Status build_not_null(uint64_t iTuple, uint64_t iTupleEnd)
{
- values_.resize(iTupleEnd - iTuple);
- for (; iTuple < iTupleEnd; iTuple++)
+ uint64_t n = iTupleEnd - iTuple;
+ values_.resize(n);
+ for (uint64_t i = 0; i < n; ++i)
{
bool isNull;
- auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple],
+ auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple +
i],
SPI_tuptable->tupdesc,
iAttribute_ + 1,
&isNull);
- values_[iTuple] = convert_value<ArrowType>(datum);
+ values_[i] = convert_value<ArrowType>(datum);
}
return builder_->AppendValues(values_.data(), values_.size());
}
@@ -1048,24 +1049,25 @@ class ArrowArrayBuilder<ArrowType,
arrow::enable_if_has_c_type<ArrowType>>
arrow::Status build_may_null(uint64_t iTuple, uint64_t iTupleEnd)
{
bool haveNull = false;
- values_.resize(iTupleEnd - iTuple);
- validBytes_.resize(iTupleEnd - iTuple);
- for (; iTuple < iTupleEnd; iTuple++)
+ uint64_t n = iTupleEnd - iTuple;
+ values_.resize(n);
+ validBytes_.resize(n);
+ for (uint64_t i = 0; i < n; ++i)
{
bool isNull;
- auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple],
+ auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple +
i],
SPI_tuptable->tupdesc,
iAttribute_ + 1,
&isNull);
if (isNull)
{
haveNull = true;
- validBytes_[iTuple] = 0;
+ validBytes_[i] = 0;
}
else
{
- validBytes_[iTuple] = 1;
- values_[iTuple] =
convert_value<ArrowType>(datum);
+ validBytes_[i] = 1;
+ values_[i] = convert_value<ArrowType>(datum);
}
}
if (haveNull)
@@ -1148,15 +1150,16 @@ class ArrowArrayBuilder<ArrowType,
arrow::enable_if_base_binary<ArrowType>>
arrow::Status build_not_null(uint64_t iTuple, uint64_t iTupleEnd)
{
- values_.resize(iTupleEnd - iTuple);
- for (; iTuple < iTupleEnd; iTuple++)
+ uint64_t n = iTupleEnd - iTuple;
+ values_.resize(n);
+ for (uint64_t i = 0; i < n; ++i)
{
bool isNull;
- auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple],
+ auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple +
i],
SPI_tuptable->tupdesc,
iAttribute_ + 1,
&isNull);
- values_[iTuple] = std::string(VARDATA_ANY(datum),
VARSIZE_ANY_EXHDR(datum));
+ values_[i] = std::string(VARDATA_ANY(datum),
VARSIZE_ANY_EXHDR(datum));
}
return builder_->AppendValues(values_);
}
@@ -1164,25 +1167,25 @@ class ArrowArrayBuilder<ArrowType,
arrow::enable_if_base_binary<ArrowType>>
arrow::Status build_may_null(uint64_t iTuple, uint64_t iTupleEnd)
{
bool haveNull = false;
- values_.resize(iTupleEnd - iTuple);
- validBytes_.resize(iTupleEnd - iTuple);
- for (; iTuple < iTupleEnd; iTuple++)
+ uint64_t n = iTupleEnd - iTuple;
+ values_.resize(n);
+ validBytes_.resize(n);
+ for (uint64_t i = 0; i < n; ++i)
{
bool isNull;
- auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple],
+ auto datum = SPI_getbinval(SPI_tuptable->vals[iTuple +
i],
SPI_tuptable->tupdesc,
iAttribute_ + 1,
&isNull);
if (isNull)
{
haveNull = true;
- validBytes_[iTuple] = 0;
+ validBytes_[i] = 0;
}
else
{
- validBytes_[iTuple] = 1;
- values_[iTuple] =
- std::string(VARDATA_ANY(datum),
VARSIZE_ANY_EXHDR(datum));
+ validBytes_[i] = 1;
+ values_[i] = std::string(VARDATA_ANY(datum),
VARSIZE_ANY_EXHDR(datum));
}
}
if (haveNull)
diff --git a/test/helper/sandbox.rb b/test/helper/sandbox.rb
index 09c5898..53969e5 100644
--- a/test/helper/sandbox.rb
+++ b/test/helper/sandbox.rb
@@ -123,6 +123,7 @@ module Helper
end
def initdb(shared_preload_libraries: [],
+ max_n_rows_per_record_batch: nil,
db_path: "db",
port: 25432,
flight_sql_port: 35432)
@@ -163,6 +164,10 @@ module Helper
conf.puts("shared_preload_libraries = " +
"'#{shared_preload_libraries.join(",")}'")
conf.puts("arrow_flight_sql.uri = '#{@flight_sql_uri}'")
+ if max_n_rows_per_record_batch
+ conf.puts("arrow_flight_sql.max_n_rows_per_record_batch = " +
+ "#{max_n_rows_per_record_batch}")
+ end
yield(conf) if block_given?
end
pg_hba_conf = File.join(@dir, "pg_hba.conf")
@@ -343,6 +348,7 @@ module Helper
@postgresql = PostgreSQL.new(@tmp_dir)
options = {
shared_preload_libraries: shared_preload_libraries,
+ max_n_rows_per_record_batch: max_n_rows_per_record_batch,
}
@postgresql.initdb(**options)
yield
@@ -352,6 +358,10 @@ module Helper
["arrow_flight_sql"]
end
+ def max_n_rows_per_record_batch
+ nil
+ end
+
def start_postgres
@postgresql.start
end
diff --git a/test/test-flight-sql.rb b/test/test-flight-sql.rb
index 429fa95..87abc59 100644
--- a/test/test-flight-sql.rb
+++ b/test/test-flight-sql.rb
@@ -18,6 +18,10 @@
class FlightSQLTest < Test::Unit::TestCase
include Helper::Sandbox
+ def max_n_rows_per_record_batch
+ 10
+ end
+
setup do
unless flight_client.respond_to?(:authenticate_basic)
omit("ArrowFlight::Client#authenticate_basic is needed" )
@@ -123,6 +127,41 @@ class FlightSQLTest < Test::Unit::TestCase
reader.read_all)
end
+ data(:type, ["integer", "text"])
+ data(:may_null, [true, false])
+ def test_select_multiple_record_batches_response
+ type = data[:type]
+ may_null = data[:may_null]
+ if type == "integer"
+ arrow_type = Arrow::Int32DataType.new
+ values = 30.times.to_a
+ else
+ arrow_type = Arrow::StringDataType.new
+ values = 30.times.collect(&:to_s)
+ end
+
+ create_table = "CREATE TABLE data (value #{type}"
+ # XXX: "NOT NULL" doesn't change FormData_pg_attribute::attnotnull
+ # returned by "SELECT *". So the "may_null" data driven test
+ # parameter is meaningless...
+ create_table << " NOT NULL" unless may_null
+ create_table << ")"
+ run_sql(create_table)
+ run_sql("INSERT INTO data VALUES " +
+ values.collect {|value| "(#{value})"}.join(", "))
+
+ info = flight_sql_client.execute("SELECT * FROM data", @options)
+ assert_equal(Arrow::Schema.new(value: arrow_type),
+ info.get_schema)
+ endpoint = info.endpoints.first
+ reader = flight_sql_client.do_get(endpoint.ticket, @options)
+ chunks = values.each_slice(max_n_rows_per_record_batch).collect do |chunk|
+ arrow_type.array_class.new(chunk)
+ end
+ assert_equal(Arrow::Table.new(value: Arrow::ChunkedArray.new(chunks)),
+ reader.read_all)
+ end
+
def test_insert_direct
unless flight_sql_client.respond_to?(:execute_update)
omit("red-arrow-flight-sql 13.0.0 or later is required")