(arrow) branch main updated (a44b5372c3 -> ac1eadb5e0)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from a44b5372c3 GH-41493: [C++][S3] Add a new option to check existence before CreateDir (#41822) add ac1eadb5e0 GH-40494: [Go] add support for protobuf messages (#40496) No new revisions were added by this update. Summary of changes: dev/release/rat_exclude_files.txt | 1 + go/arrow/datatype_nested.go| 2 +- .../array => go/arrow/util/messages}/README.md | 7 +- go/arrow/util/messages/types.proto | 56 ++ go/arrow/util/protobuf_reflect.go | 865 + go/arrow/util/protobuf_reflect_test.go | 311 go/arrow/util/util_message/types.pb.go | 539 + go/go.mod | 2 + go/go.sum | 2 + 9 files changed, 1783 insertions(+), 2 deletions(-) copy {cpp/src/arrow/array => go/arrow/util/messages}/README.md (87%) create mode 100644 go/arrow/util/messages/types.proto create mode 100644 go/arrow/util/protobuf_reflect.go create mode 100644 go/arrow/util/protobuf_reflect_test.go create mode 100644 go/arrow/util/util_message/types.pb.go
(arrow) branch main updated (54bece3d4c -> 99014abd19)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 54bece3d4c GH-41648: [Java] Memory Leak about splitAndTransfer (#41898) add 99014abd19 GH-41887: [Go] Run linter via pre-commit (#41888) No new revisions were added by this update. Summary of changes: .gitignore | 5 + ci/conda_env_archery.txt => .golangci.yaml |35 +- .pre-commit-config.yaml|14 + go/arrow/array/bufferbuilder_numeric_test.go | 2 +- go/arrow/array/numeric.gen.go | 2 +- go/arrow/array/numeric_test.go |12 +- go/arrow/array/numericbuilder.gen_test.go | 8 +- go/arrow/array/record_test.go | 2 +- go/arrow/datatype_nested_test.go |16 +- go/arrow/flight/gen/flight/Flight.pb.go| 5 +- go/arrow/flight/gen/flight/FlightSql.pb.go | 5 +- go/arrow/flight/gen/flight/Flight_grpc.pb.go | 1 + go/arrow/float16/float16.go| 2 +- go/arrow/gen-flatbuffers.go| 1 + go/arrow/internal/debug/assert_off.go | 1 + go/arrow/internal/debug/assert_on.go | 1 + go/arrow/internal/debug/doc.go | 6 +- go/arrow/internal/debug/log_off.go | 1 + go/arrow/internal/debug/log_on.go | 1 + go/arrow/internal/debug/util.go| 1 + go/arrow/internal/flatbuf/Binary.go| 2 +- go/arrow/internal/flatbuf/BinaryView.go|14 +- go/arrow/internal/flatbuf/Block.go |19 +- go/arrow/internal/flatbuf/BodyCompression.go |18 +- go/arrow/internal/flatbuf/BodyCompressionMethod.go | 6 +- go/arrow/internal/flatbuf/Buffer.go|34 +- go/arrow/internal/flatbuf/Date.go |12 +- go/arrow/internal/flatbuf/Decimal.go |24 +- go/arrow/internal/flatbuf/DictionaryBatch.go |24 +- go/arrow/internal/flatbuf/DictionaryEncoding.go|48 +- go/arrow/internal/flatbuf/DictionaryKind.go|10 +- go/arrow/internal/flatbuf/Endianness.go| 4 +- go/arrow/internal/flatbuf/Feature.go |38 +- go/arrow/internal/flatbuf/Field.go |34 +- go/arrow/internal/flatbuf/FieldNode.go |40 +- go/arrow/internal/flatbuf/FixedSizeBinary.go | 4 +- go/arrow/internal/flatbuf/FixedSizeList.go | 4 +- go/arrow/internal/flatbuf/Footer.go|10 +- go/arrow/internal/flatbuf/KeyValue.go | 6 +- go/arrow/internal/flatbuf/LargeBinary.go | 4 +- go/arrow/internal/flatbuf/LargeList.go | 4 +- go/arrow/internal/flatbuf/LargeListView.go | 4 +- go/arrow/internal/flatbuf/LargeUtf8.go | 4 +- go/arrow/internal/flatbuf/ListView.go | 6 +- go/arrow/internal/flatbuf/Map.go |54 +- go/arrow/internal/flatbuf/MessageHeader.go |16 +- go/arrow/internal/flatbuf/Null.go | 2 +- go/arrow/internal/flatbuf/RecordBatch.go | 102 +- go/arrow/internal/flatbuf/RunEndEncoded.go |10 +- go/arrow/internal/flatbuf/Schema.go|20 +- go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go | 134 +- go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go | 142 +- go/arrow/internal/flatbuf/SparseTensor.go |28 +- go/arrow/internal/flatbuf/SparseTensorIndexCOO.go | 100 +- go/arrow/internal/flatbuf/SparseTensorIndexCSF.go | 254 +- go/arrow/internal/flatbuf/Struct_.go | 6 +- go/arrow/internal/flatbuf/Tensor.go|24 +- go/arrow/internal/flatbuf/TensorDim.go |14 +- go/arrow/internal/flatbuf/Time.go |28 +- go/arrow/internal/flatbuf/Timestamp.go | 250 +- go/arrow/internal/flatbuf/Type.go | 6 +- go/arrow/internal/flatbuf/Union.go | 8 +- go/arrow/internal/flatbuf/Utf8.go | 2 +- go/arrow/internal/flatbuf/Utf8View.go |14 +- go/arrow/internal/flight_integration/scenario.go | 2 +- go/arrow/ipc/cmd/arrow-cat/main.go |66 +- go/arrow/ipc/cmd/arrow-ls/main.go |62 +- go/arrow/math/math_amd64.go| 1 + go/arrow/math/math_arm64.go| 5 +- go/arrow/math/math_noasm.go| 1 + go/arrow/math/math_ppc64le.go | 1 + go/arrow/math/math_s390x.go| 1 + go/arrow/memory/cgo_allocator.go | 4 +- go/arrow/mem
(arrow) branch main updated (8f3bf67cca -> 235608beb6)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 8f3bf67cca GH-41841: [R][CI] Remove more defunct rhub containers (#41828) add 235608beb6 MINOR: [C++] Slight improvement for ArrayData device_type (#41814) No new revisions were added by this update. Summary of changes: cpp/src/arrow/array/data.cc | 12 cpp/src/arrow/array/data.h | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-)
(arrow-adbc) branch main updated: chore(go/adbc): bump arrow from v16 to v17 in template (#1880)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 179d02885 chore(go/adbc): bump arrow from v16 to v17 in template (#1880) 179d02885 is described below commit 179d02885548895297fe5e1adda8c835fbfe8fd2 Author: Cocoa AuthorDate: Wed May 22 15:18:23 2024 +0100 chore(go/adbc): bump arrow from v16 to v17 in template (#1880) Hi, this PR should be a minor update for the go driver template which bumps arrow from v16 to v17. --- go/adbc/pkg/_tmpl/driver.go.tmpl | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/adbc/pkg/_tmpl/driver.go.tmpl b/go/adbc/pkg/_tmpl/driver.go.tmpl index 21a9e0919..5a071ea83 100644 --- a/go/adbc/pkg/_tmpl/driver.go.tmpl +++ b/go/adbc/pkg/_tmpl/driver.go.tmpl @@ -59,10 +59,10 @@ import ( "unsafe" "github.com/apache/arrow-adbc/go/adbc" - "github.com/apache/arrow/go/v16/arrow/array" - "github.com/apache/arrow/go/v16/arrow/cdata" - "github.com/apache/arrow/go/v16/arrow/memory" - "github.com/apache/arrow/go/v16/arrow/memory/mallocator" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/cdata" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v17/arrow/memory/mallocator" ) // Must use malloc() to respect CGO rules
(arrow) branch main updated: GH-40078: [C++] Import/Export ArrowDeviceArrayStream (#40807)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 8169d6e719 GH-40078: [C++] Import/Export ArrowDeviceArrayStream (#40807) 8169d6e719 is described below commit 8169d6e719453acd0e7ca1b6f784d800cca4f113 Author: Matt Topol AuthorDate: Tue May 21 15:40:16 2024 -0400 GH-40078: [C++] Import/Export ArrowDeviceArrayStream (#40807) ### Rationale for this change The original PRs for adding support for importing and exporting the new C Device interface (#36488 / #36489) only added support for the Arrays themselves, not for the stream structure. We should support both. ### What changes are included in this PR? Adding parallel functions for Import/Export of streams that accept `ArrowDeviceArrayStream`. ### Are these changes tested? Test writing in progress, wanted to get this up for review while I write tests. ### Are there any user-facing changes? No, only new functions have been added. * GitHub Issue: #40078 Lead-authored-by: Matt Topol Co-authored-by: Felipe Oliveira Carvalho Co-authored-by: Benjamin Kietzman Co-authored-by: Antoine Pitrou Signed-off-by: Matt Topol --- cpp/src/arrow/array/array_base.h | 8 + cpp/src/arrow/array/array_test.cc | 5 + cpp/src/arrow/array/data.cc | 36 +++ cpp/src/arrow/array/data.h| 21 ++ cpp/src/arrow/array/util.cc | 2 +- cpp/src/arrow/c/bridge.cc | 278 +++- cpp/src/arrow/c/bridge.h | 61 + cpp/src/arrow/c/bridge_test.cc| 516 ++ cpp/src/arrow/c/helpers.h | 49 cpp/src/arrow/c/util_internal.h | 22 ++ cpp/src/arrow/record_batch.cc | 107 ++-- cpp/src/arrow/record_batch.h | 43 +++- python/pyarrow/tests/test_cffi.py | 2 +- 13 files changed, 1051 insertions(+), 99 deletions(-) diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 6411aebf80..716ae07220 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -224,6 +224,14 @@ class ARROW_EXPORT Array { /// \return Status Status ValidateFull() const; + /// \brief Return the device_type that this array's data is allocated on + /// + /// This just delegates to calling device_type on the underlying ArrayData + /// object which backs this Array. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const { return data_->device_type(); } + protected: Array() = default; ARROW_DEFAULT_MOVE_AND_ASSIGN(Array); diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 7e25ad61fa..32806d9d2e 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -478,6 +478,7 @@ TEST_F(TestArray, TestMakeArrayOfNull) { ASSERT_EQ(array->type(), type); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); if (is_union(type->id())) { ASSERT_EQ(array->null_count(), 0); ASSERT_EQ(array->ComputeLogicalNullCount(), length); @@ -719,6 +720,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); // test case for ARROW-13321 for (int64_t i : {int64_t{0}, length / 2, length - 1}) { @@ -744,6 +746,7 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) { auto sliced = array->Slice(1, 4); ASSERT_EQ(sliced->length(), 4); ASSERT_EQ(sliced->null_count(), 0); +ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); ARROW_EXPECT_OK(sliced->ValidateFull()); } } @@ -758,6 +761,7 @@ TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 4); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); for (int i = 0; i < 4; i++) { ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i)); @@ -797,6 +801,7 @@ TEST_F(TestArray, TestMakeEmptyArray) { ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type)); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 0); + CheckSpanRoundTrip(*array); } } diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index ac828a9c35..76a4352139 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -224,6 +224,42 @@ int64_t ArrayData::ComputeLogicalNullCount() const { return ArraySpan(*this).ComputeLogicalNullCount
(arrow-adbc) branch main updated: fix(go/adbc/driver/snowflake): Records dropped on ingestion when empty batch is present (#1866)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 75e392744 fix(go/adbc/driver/snowflake): Records dropped on ingestion when empty batch is present (#1866) 75e392744 is described below commit 75e3927444cb48f90769f198af118a8b20c0fae2 Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com> AuthorDate: Tue May 21 11:54:14 2024 -0400 fix(go/adbc/driver/snowflake): Records dropped on ingestion when empty batch is present (#1866) Reproduces and fixes: #1847 Parquet files with empty row groups are valid per the spec, but Snowflake does not currently handle them properly. To mitigate this we buffer writes to the parquet file so that a row group is not written until some amount of data has been received. The CheckedAllocator was enabled for all tests as part of this fix, which detected a leak in the BufferWriter that was fixed in: [https://github.com/apache/arrow/pull/41698](https://github.com/apache/arrow/pull/41698). There was an unrelated test failure that surfaced once the CheckedAllocator was enabled which had to do with casting decimals of certain precision. The fix is included in this PR as well. --- go/adbc/driver/snowflake/bulk_ingestion.go | 2 +- go/adbc/driver/snowflake/driver_test.go| 111 +++-- go/adbc/driver/snowflake/record_reader.go | 1 + go/adbc/go.mod | 2 +- go/adbc/go.sum | 4 +- 5 files changed, 64 insertions(+), 56 deletions(-) diff --git a/go/adbc/driver/snowflake/bulk_ingestion.go b/go/adbc/driver/snowflake/bulk_ingestion.go index 17d76195d..2e18428bd 100644 --- a/go/adbc/driver/snowflake/bulk_ingestion.go +++ b/go/adbc/driver/snowflake/bulk_ingestion.go @@ -342,7 +342,7 @@ func writeParquet( defer pqWriter.Close() for rec := range in { - err = pqWriter.Write(rec) + err = pqWriter.WriteBuffered(rec) rec.Release() if err != nil { return err diff --git a/go/adbc/driver/snowflake/driver_test.go b/go/adbc/driver/snowflake/driver_test.go index af94e6108..abc738306 100644 --- a/go/adbc/driver/snowflake/driver_test.go +++ b/go/adbc/driver/snowflake/driver_test.go @@ -325,19 +325,14 @@ type SnowflakeTests struct { stmt adbc.Statement } -func (suite *SnowflakeTests) SetupSuite() { +func (suite *SnowflakeTests) SetupTest() { var err error suite.ctx = context.Background() suite.driver = suite.Quirks.SetupDriver(suite.T()) suite.db, err = suite.driver.NewDatabase(suite.Quirks.DatabaseOptions()) suite.NoError(err) -} - -func (suite *SnowflakeTests) SetupTest() { - var err error suite.cnxn, err = suite.db.Open(suite.ctx) suite.NoError(err) - suite.stmt, err = suite.cnxn.NewStatement() suite.NoError(err) } @@ -345,11 +340,11 @@ func (suite *SnowflakeTests) SetupTest() { func (suite *SnowflakeTests) TearDownTest() { suite.NoError(suite.stmt.Close()) suite.NoError(suite.cnxn.Close()) -} - -func (suite *SnowflakeTests) TearDownSuite() { + suite.Quirks.TearDownDriver(suite.T(), suite.driver) + suite.cnxn = nil suite.NoError(suite.db.Close()) suite.db = nil + suite.driver = nil } func (suite *SnowflakeTests) TestSqlIngestTimestamp() { @@ -409,9 +404,6 @@ func (suite *SnowflakeTests) TestSqlIngestRecordAndStreamAreEquivalent() { suite.Require().NoError(suite.Quirks.DropTable(suite.cnxn, "bulk_ingest_bind")) suite.Require().NoError(suite.Quirks.DropTable(suite.cnxn, "bulk_ingest_bind_stream")) - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(suite.T(), 0) - sc := arrow.NewSchema([]arrow.Field{ { Name: "col_int64", Type: arrow.PrimitiveTypes.Int64, @@ -467,7 +459,7 @@ func (suite *SnowflakeTests) TestSqlIngestRecordAndStreamAreEquivalent() { }, }, nil) - bldr := array.NewRecordBuilder(mem, sc) + bldr := array.NewRecordBuilder(suite.Quirks.Alloc(), sc) defer bldr.Release() bldr.Field(0).(*array.Int64Builder).AppendValues([]int64{-1, 0, 25}, nil) @@ -538,9 +530,6 @@ func (suite *SnowflakeTests) TestSqlIngestRecordAndStreamAreEquivalent() { func (suite *SnowflakeTests) TestSqlIngestRoundtripTypes() { suite.Require().NoError(suite.Quirks.DropTable(suite.cnxn, "bulk_ingest_roundtrip")) - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(suite.T(), 0) - sc := arrow.NewSchema([]arrow.Field{ {
(arrow) branch main updated (e254c43c09 -> 34f0427620)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from e254c43c09 GH-41389: [Python] Expose byte_width and bit_width of ExtensionType in terms of the storage type (#41413) add 34f0427620 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.21.1 to 2.22.0 in /go (#41743) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow-adbc) branch dependabot/go_modules/go/adbc/google.golang.org/protobuf-1.34.1 deleted (was 426d3be47)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/adbc/google.golang.org/protobuf-1.34.1 in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git was 426d3be47 chore(go/adbc): bump google.golang.org/protobuf in /go/adbc The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow-adbc) branch main updated: chore(go/adbc): bump google.golang.org/protobuf from 1.33.0 to 1.34.1 in /go/adbc (#1827)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 987cd193e chore(go/adbc): bump google.golang.org/protobuf from 1.33.0 to 1.34.1 in /go/adbc (#1827) 987cd193e is described below commit 987cd193e25fd314a7eb21d31d705a602b0413be Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Tue May 21 11:24:26 2024 -0400 chore(go/adbc): bump google.golang.org/protobuf from 1.33.0 to 1.34.1 in /go/adbc (#1827) Bumps google.golang.org/protobuf from 1.33.0 to 1.34.1. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=google.golang.org/protobuf=go_modules=1.33.0=1.34.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- Dependabot commands and options You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go/adbc/go.mod | 2 +- go/adbc/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/adbc/go.mod b/go/adbc/go.mod index aed680c33..8a1fecb60 100644 --- a/go/adbc/go.mod +++ b/go/adbc/go.mod @@ -31,7 +31,7 @@ require ( golang.org/x/sync v0.7.0 golang.org/x/tools v0.21.0 google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.34.0 + google.golang.org/protobuf v1.34.1 ) require ( diff --git a/go/adbc/go.sum b/go/adbc/go.sum index 971cb267e..1f6db2e83 100644 --- a/go/adbc/go.sum +++ b/go/adbc/go.sum @@ -192,8 +192,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= -google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
(arrow-site) branch main updated: Add Dane Pitkin to committers list (#519)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-site.git The following commit(s) were added to refs/heads/main by this push: new 3c31678f6e1 Add Dane Pitkin to committers list (#519) 3c31678f6e1 is described below commit 3c31678f6e1303e623f4511e061ffc3dea20f6bc Author: Dane Pitkin AuthorDate: Thu May 16 15:43:53 2024 -0400 Add Dane Pitkin to committers list (#519) I was accepted as a committer to the Arrow project on May 7, 2024. See https://lists.apache.org/thread/9ysqj4qbmhhl8lp101ltq62ndf8vgsq3. --- _data/committers.yml | 4 1 file changed, 4 insertions(+) diff --git a/_data/committers.yml b/_data/committers.yml index 5705ca33fa6..1740e4f20dd 100644 --- a/_data/committers.yml +++ b/_data/committers.yml @@ -276,6 +276,10 @@ role: Committer alias: thinkharderdev affiliation: Coralogix +- name: Dane Pitkin + role: Committer + alias: dpitkin + affiliation: Voltron Data - name: David Alves role: Committer alias: dralves
(arrow-adbc) branch dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.10.0 deleted (was 840e6633c)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.10.0 in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git was 840e6633c chore(go/adbc): bump github.com/snowflakedb/gosnowflake in /go/adbc The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow-adbc) branch main updated (e184bce52 -> abe6d6aa4)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from e184bce52 fix(csharp): Fix packing process (#1862) add abe6d6aa4 chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 1.9.0 to 1.10.0 in /go/adbc (#1857) No new revisions were added by this update. Summary of changes: go/adbc/go.mod | 4 ++-- go/adbc/go.sum | 10 -- 2 files changed, 6 insertions(+), 8 deletions(-)
(arrow) branch main updated (63fddd7b2f -> e1de9c52d5)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 63fddd7b2f GH-41660: [CI][Java] Restore devtoolset relatead GANDIVA_CXX_FLAGS (#41661) add e1de9c52d5 GH-41541: [Go][Parquet] Fix writer performance regression (#41638) No new revisions were added by this update. Summary of changes: go/parquet/internal/encoding/types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
(arrow) branch main updated: GH-34484: [Substrait] add an option to disable augmented fields (#41583)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new a4a5cf1fbe GH-34484: [Substrait] add an option to disable augmented fields (#41583) a4a5cf1fbe is described below commit a4a5cf1fbe804f5b47184afe91b3c243e0487ab2 Author: David Sisson AuthorDate: Tue May 14 09:28:50 2024 -0700 GH-34484: [Substrait] add an option to disable augmented fields (#41583) ### Rationale for this change Augmented fields interfere with the schema passing between nodes. When enabled they cause names/schema mismatching at the end of the plan. ### What changes are included in this PR? Adds an option to disable augmented fields (defaulting to adding them), connects it everywhere it is called, and disables it in ReadRel conversion. ### Are these changes tested? Yes. ### Are there any user-facing changes? There are no API related changes however this will allow Substrait plans that consume local files to work without requiring a project/emit relation after the read relation to remove the unexpected fields. * GitHub Issue: #34484 Authored-by: David Sisson Signed-off-by: Matt Topol --- cpp/src/arrow/acero/sink_node.cc | 1 + cpp/src/arrow/dataset/discovery_test.cc| 3 +- cpp/src/arrow/dataset/file_parquet_test.cc | 5 +- cpp/src/arrow/dataset/scanner.cc | 35 ++ cpp/src/arrow/dataset/scanner.h| 9 ++- cpp/src/arrow/dataset/scanner_test.cc | 12 ++-- cpp/src/arrow/dataset/test_util_internal.h | 18 +++-- .../arrow/engine/substrait/relation_internal.cc| 1 + cpp/src/arrow/engine/substrait/serde_test.cc | 81 ++ 9 files changed, 138 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/acero/sink_node.cc b/cpp/src/arrow/acero/sink_node.cc index 4ab6b4537d..66f447aa87 100644 --- a/cpp/src/arrow/acero/sink_node.cc +++ b/cpp/src/arrow/acero/sink_node.cc @@ -423,6 +423,7 @@ class ConsumingSinkNode : public ExecNode, std::atomic backpressure_counter_ = 0; std::unique_ptr sequencer_; }; + static Result MakeTableConsumingSinkNode(ExecPlan* plan, std::vector inputs, const ExecNodeOptions& options) { diff --git a/cpp/src/arrow/dataset/discovery_test.cc b/cpp/src/arrow/dataset/discovery_test.cc index 92cec7f324..981146b799 100644 --- a/cpp/src/arrow/dataset/discovery_test.cc +++ b/cpp/src/arrow/dataset/discovery_test.cc @@ -144,7 +144,8 @@ class FileSystemDatasetFactoryTest : public DatasetFactoryTest { } options_ = std::make_shared(); options_->dataset_schema = schema; -ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default(*schema)); +ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default( + *schema, options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); ASSERT_OK_AND_ASSIGN(dataset_, factory_->Finish(schema)); ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset_->GetFragments()); diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 76cd0af3b8..bf626826d4 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -330,8 +330,9 @@ TEST_F(TestParquetFileFormat, CachedMetadata) { // Read the file the first time, will read metadata auto options = std::make_shared(); options->filter = literal(true); - ASSERT_OK_AND_ASSIGN(auto projection_descr, - ProjectionDescr::FromNames({"x"}, *test_schema)); + ASSERT_OK_AND_ASSIGN( + auto projection_descr, + ProjectionDescr::FromNames({"x"}, *test_schema, options->add_augmented_fields)); options->projected_schema = projection_descr.schema; options->projection = projection_descr.expression; ASSERT_OK_AND_ASSIGN(auto generator, fragment->ScanBatchesAsync(options)); diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index 18981d1451..a856a792a2 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -211,7 +211,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // create the projected schema only if the provided expressions // produces valid set of fields. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*projected_schema)); + ProjectionDescr::Default( + *projected_schema, scan_options->add_augmented_fie
(arrow) branch main updated (bd444106af -> 1c62df5255)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from bd444106af GH-39645: [Python] Fix read_table for encrypted parquet (#39438) add 1c62df5255 GH-41179: [Docs] Documentation for Dissociated IPC Protocol (#41180) No new revisions were added by this update. Summary of changes: docs/source/format/Columnar.rst| 2 + docs/source/format/DissociatedIPC.rst | 403 + .../ClientFlowchart.mmd} | 34 +- .../SequenceDiagramSame.mmd} | 38 +- .../DissociatedIPC/SequenceDiagramSeparate.mmd | 44 +++ docs/source/format/Flight.rst | 2 + docs/source/format/index.rst | 1 + 7 files changed, 499 insertions(+), 25 deletions(-) create mode 100644 docs/source/format/DissociatedIPC.rst copy docs/source/format/{Flight/DoExchange.mmd => DissociatedIPC/ClientFlowchart.mmd} (52%) copy docs/source/format/{Flight/DoExchange.mmd => DissociatedIPC/SequenceDiagramSame.mmd} (50%) create mode 100644 docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd
(arrow) branch main updated: GH-41594: [Go] Support reading `date64` type & properly validate list-like types (#41595)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 5252c6ce13 GH-41594: [Go] Support reading `date64` type & properly validate list-like types (#41595) 5252c6ce13 is described below commit 5252c6ce13694fa31dbcb2623d1629cd8fe53a47 Author: Alex Shcherbakov AuthorDate: Wed May 8 22:46:45 2024 +0300 GH-41594: [Go] Support reading `date64` type & properly validate list-like types (#41595) This PR includes 2 fixes: 1. support reading `date64` columns (as write is supported) 2. properly validate list-like data types (list of unsupported is unsupported) ### Rationale for this change See #41594 ### What changes are included in this PR? 1. Added `date64` reading & conversion funcs similar to `date32` 2. Refactored date type validation ### Are these changes tested? a55cd5324d2c47932410b0c7a9c46075386645d2 ### Are there any user-facing changes? No. * GitHub Issue: #41594 Authored-by: candiduslynx Signed-off-by: Matt Topol --- go/arrow/csv/common.go | 40 ++ go/arrow/csv/reader.go | 74 go/arrow/csv/reader_test.go | 8 + go/arrow/csv/testdata/header.csv | 8 ++--- go/arrow/csv/testdata/types.csv | 8 ++--- go/arrow/csv/transformer.go | 69 + 6 files changed, 86 insertions(+), 121 deletions(-) diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 4455c8b782..06fed69a77 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -239,21 +239,31 @@ func WithStringsReplacer(replacer *strings.Replacer) Option { func validate(schema *arrow.Schema) { for i, f := range schema.Fields() { - switch ft := f.Type.(type) { - case *arrow.BooleanType: - case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: - case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: - case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type: - case *arrow.StringType, *arrow.LargeStringType: - case *arrow.TimestampType: - case *arrow.Date32Type, *arrow.Date64Type: - case *arrow.Decimal128Type, *arrow.Decimal256Type: - case *arrow.ListType, *arrow.LargeListType, *arrow.FixedSizeListType: - case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.FixedSizeBinaryType: - case arrow.ExtensionType: - case *arrow.NullType: - default: - panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) + if !typeSupported(f.Type) { + panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, f.Type)) } } } + +func typeSupported(dt arrow.DataType) bool { + switch dt := dt.(type) { + case *arrow.BooleanType: + case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: + case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: + case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type: + case *arrow.StringType, *arrow.LargeStringType: + case *arrow.TimestampType: + case *arrow.Date32Type, *arrow.Date64Type: + case *arrow.Decimal128Type, *arrow.Decimal256Type: + case *arrow.MapType: + return false + case arrow.ListLikeType: + return typeSupported(dt.Elem()) + case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.FixedSizeBinaryType: + case arrow.ExtensionType: + case *arrow.NullType: + default: + return false + } + return true +} diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go index 18f1083e6a..46591a9a5a 100644 --- a/go/arrow/csv/reader.go +++ b/go/arrow/csv/reader.go @@ -474,6 +474,10 @@ func (r *Reader) initFieldConverter(bldr array.Builder) func(string) { return func(str string) { r.parseDate32(bldr, str) } + case *arrow.Date64Type: + return func(str string) { + r.parseDate64(bldr, str) + } case *arrow.Time32Type: return func(str string) { r.parseTime32(bldr, str, dt.Unit) @@ -486,17 +490,13 @@ func (r *Reader) initFieldConverter(bldr array.Builder) func(string) { return func(str string) { r.parseDecimal256(bldr, str, dt.Precision, dt.Scale)
(arrow) branch main updated (f462ec7e6b -> f672027654)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from f462ec7e6b MINOR: [Go] Bump golang.org/x/sys from 0.19.0 to 0.20.0 in /go (#41554) add f672027654 MINOR: [Go] Bump google.golang.org/protobuf from 1.34.0 to 1.34.1 in /go (#41553) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow) branch main updated (e21952f969 -> f462ec7e6b)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from e21952f969 GH-40750: [C++][Python] Map child Array constructed from keys and items shouldn't have offset (#40871) add f462ec7e6b MINOR: [Go] Bump golang.org/x/sys from 0.19.0 to 0.20.0 in /go (#41554) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow) branch dependabot/go_modules/go/google.golang.org/protobuf-1.34.0 deleted (was 95d38e2794)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/google.golang.org/protobuf-1.34.0 in repository https://gitbox.apache.org/repos/asf/arrow.git was 95d38e2794 MINOR: [Go] Bump google.golang.org/protobuf from 1.33.0 to 1.34.0 in /go The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow) branch main updated (56437409d1 -> 2b06472305)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 56437409d1 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 2.21.1 in /go (#41512) add 2b06472305 MINOR: [Go] Bump google.golang.org/protobuf from 1.33.0 to 1.34.0 in /go (#41513) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow) branch dependabot/go_modules/go/github.com/hamba/avro/v2-2.21.1 deleted (was 154bdb8655)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/github.com/hamba/avro/v2-2.21.1 in repository https://gitbox.apache.org/repos/asf/arrow.git was 154bdb8655 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 2.21.1 in /go The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow) branch main updated (cc9e65fb80 -> 56437409d1)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from cc9e65fb80 MINOR: [C#] Bump Grpc.Tools from 2.62.0 to 2.63.0 in /csharp (#41523) add 56437409d1 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 2.21.1 in /go (#41512) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow-adbc) branch main updated: feat(go/adbc/driver/flightsql): support stateless prepared statements (#1796)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new b64b73aa0 feat(go/adbc/driver/flightsql): support stateless prepared statements (#1796) b64b73aa0 is described below commit b64b73aa0cb24a0179e3e101f50e52a830d57d95 Author: David Li AuthorDate: Wed May 1 01:06:01 2024 +0900 feat(go/adbc/driver/flightsql): support stateless prepared statements (#1796) Fixes #1657. --- go/adbc/adbc.go| 4 ++-- go/adbc/driver/flightsql/cmd/testserver/main.go| 23 +- .../driver/flightsql/flightsql_adbc_server_test.go | 14 ++--- go/adbc/driver/flightsql/flightsql_adbc_test.go| 14 ++--- go/adbc/driver/flightsql/flightsql_connection.go | 14 ++--- go/adbc/driver/flightsql/flightsql_database.go | 6 +++--- go/adbc/driver/flightsql/flightsql_driver.go | 2 +- go/adbc/driver/flightsql/flightsql_statement.go| 10 +- go/adbc/driver/flightsql/record_reader.go | 10 +- go/adbc/driver/flightsql/record_reader_test.go | 12 +-- go/adbc/driver/internal/driverbase/connection.go | 6 +++--- go/adbc/driver/internal/driverbase/database.go | 2 +- go/adbc/driver/internal/driverbase/driver.go | 2 +- go/adbc/driver/internal/driverbase/driver_test.go | 6 +++--- go/adbc/driver/internal/shared_utils.go| 6 +++--- go/adbc/driver/panicdummy/panicdummy_adbc.go | 6 +++--- go/adbc/driver/snowflake/bulk_ingestion.go | 12 +-- go/adbc/driver/snowflake/connection.go | 4 ++-- go/adbc/driver/snowflake/driver.go | 2 +- go/adbc/driver/snowflake/driver_test.go| 8 go/adbc/driver/snowflake/record_reader.go | 10 +- go/adbc/driver/snowflake/statement.go | 6 +++--- go/adbc/drivermgr/wrapper.go | 6 +++--- go/adbc/drivermgr/wrapper_sqlite_test.go | 6 +++--- go/adbc/go.mod | 8 go/adbc/go.sum | 16 +++ go/adbc/pkg/flightsql/driver.go| 8 go/adbc/pkg/panicdummy/driver.go | 8 go/adbc/pkg/snowflake/driver.go| 8 go/adbc/sqldriver/driver.go| 10 +- go/adbc/sqldriver/driver_internals_test.go | 10 +- go/adbc/sqldriver/flightsql/flightsql.go | 2 +- go/adbc/sqldriver/flightsql/flightsql_test.go | 8 go/adbc/standard_schemas.go| 2 +- go/adbc/utils/utils.go | 2 +- go/adbc/validation/validation.go | 6 +++--- python/adbc_driver_flightsql/tests/test_errors.py | 6 ++ 37 files changed, 148 insertions(+), 137 deletions(-) diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go index 8622e71cb..b47f946f0 100644 --- a/go/adbc/adbc.go +++ b/go/adbc/adbc.go @@ -40,8 +40,8 @@ import ( "context" "fmt" - "github.com/apache/arrow/go/v16/arrow" - "github.com/apache/arrow/go/v16/arrow/array" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) diff --git a/go/adbc/driver/flightsql/cmd/testserver/main.go b/go/adbc/driver/flightsql/cmd/testserver/main.go index 8ce65c9f7..9951df235 100644 --- a/go/adbc/driver/flightsql/cmd/testserver/main.go +++ b/go/adbc/driver/flightsql/cmd/testserver/main.go @@ -32,11 +32,11 @@ import ( "strings" "sync" - "github.com/apache/arrow/go/v16/arrow" - "github.com/apache/arrow/go/v16/arrow/array" - "github.com/apache/arrow/go/v16/arrow/flight" - "github.com/apache/arrow/go/v16/arrow/flight/flightsql" - "github.com/apache/arrow/go/v16/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/flight" + "github.com/apache/arrow/go/v17/arrow/flight/flightsql" + "github.com/apache/arrow/go/v17/arrow/memory" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" @@ -268,6 +268,9 @@ func (srv *ExampleServer) DoGetPreparedStatement(ctx context.Context, cmd flight }() out = ch return + case "stateless_prepared_statement":
(arrow-adbc) branch main updated: docs: update driver status table (#1797)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 25456bf55 docs: update driver status table (#1797) 25456bf55 is described below commit 25456bf554c0d4748d41f7dac2634746f666dce0 Author: David Li AuthorDate: Tue Apr 30 23:56:05 2024 +0900 docs: update driver status table (#1797) Fixes #1786. --- docs/source/driver/postgresql.rst | 9 + docs/source/driver/status.rst | 22 ++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/docs/source/driver/postgresql.rst b/docs/source/driver/postgresql.rst index c76534d20..8ba54a013 100644 --- a/docs/source/driver/postgresql.rst +++ b/docs/source/driver/postgresql.rst @@ -165,6 +165,15 @@ The PostgreSQL driver mostly supports features defined in the ADBC API specification 1.0.0, but not all cases are fully implemented (particularly around bind parameters and prepared statements). +Bind Parameters and Prepared Statements +--- + +The PostgreSQL driver only supports executing prepared statements with +parameters that do not return result sets (basically, an INSERT with +parameters). Queries that return result sets are difficult with prepared +statements because the driver is built around using COPY for best +performance, which is not supported in this context. + Bulk Ingestion -- diff --git a/docs/source/driver/status.rst b/docs/source/driver/status.rst index 7337dd4e8..b25f2f492 100644 --- a/docs/source/driver/status.rst +++ b/docs/source/driver/status.rst @@ -24,7 +24,7 @@ Implementation Status **Experimental** drivers are not feature-complete and the implementation is still progressing. **Beta** drivers are (mostly) feature-complete but have only been available for a short time. -**Stable** drivers are feature-complete (as much as possible for the underlying database) and have been available/tested for a while. +**Stable** drivers are (mostly) feature-complete (as much as possible for the underlying database) and have been available/tested for a while. .. list-table:: :header-rows: 1 @@ -42,7 +42,7 @@ Implementation Status * - Flight SQL (Go) - C, Go - Go - - Beta + - Stable * - Flight SQL (Java) - Java @@ -57,17 +57,17 @@ Implementation Status * - PostgreSQL - C - C++ - - Beta + - Stable * - SQLite - C - C - - Beta + - Stable * - Snowflake - C, Go - Go - - Experimental + - Stable .. [#supported-languages] C drivers are usable from Go, Python, and Ruby as well. @@ -183,7 +183,7 @@ Update Queries * - PostgreSQL - N/A - N/A - - Y + - Y [#postgresql-prepared]_ - Y - Y - Y @@ -196,6 +196,12 @@ Update Queries - Y - Y +.. [#postgresql-prepared] The PostgreSQL driver only supports executing + prepared statements with parameters that do not return result sets + (basically, an INSERT with parameters). Queries that return result sets + are difficult with prepared statements because the driver is built around + using COPY for best performance, which is not supported in this context. + .. list-table:: Connection/database-level features :header-rows: 1 @@ -207,7 +213,7 @@ Update Queries * - Flight SQL (Go) - N - Y - - N + - Y * - Flight SQL (Java) - Y @@ -222,7 +228,7 @@ Update Queries * - PostgreSQL - Y - Y - - N + - Y * - SQLite - Y
(arrow-adbc) branch main updated: ci: disallow pings in PR body text (#1798)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 02aecca50 ci: disallow pings in PR body text (#1798) 02aecca50 is described below commit 02aecca50a9792d6087076b9fe44856615de173b Author: David Li AuthorDate: Tue Apr 30 23:55:04 2024 +0900 ci: disallow pings in PR body text (#1798) Fixes #1739. --- .github/workflows/dev_pr.yml | 8 1 file changed, 8 insertions(+) diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index a001695cf..805677cfb 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -57,3 +57,11 @@ jobs: PR_TITLE: ${{ github.event.pull_request.title }} run: | python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout "$PR_TITLE" + + # Pings make it into the commit message where they annoy the user every + # time the commit gets pushed somewhere + - name: Check PR body for pings +env: + PR_BODY: ${{ github.event.pull_request.body }} +run: | + [[ "${PR_BODY}" =~ @[a-zA-Z0-9]+ ]] && exit 1 || true
(arrow) branch dependabot/go_modules/go/github.com/apache/thrift-0.20.0 deleted (was 82b45881df)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/github.com/apache/thrift-0.20.0 in repository https://gitbox.apache.org/repos/asf/arrow.git was 82b45881df MINOR: [Go] Bump github.com/apache/thrift from 0.19.0 to 0.20.0 in /go The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow) branch main updated (c87073737b -> e3db586eb3)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from c87073737b MINOR: [R] refactor arrow_mask to include aggregations list (#41414) add e3db586eb3 MINOR: [Go] Bump github.com/apache/thrift from 0.19.0 to 0.20.0 in /go (#40777) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow-adbc) branch main updated (71072e06c -> 59eede462)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 71072e06c feat(csharp): Implement remaining functions in 1.0 spec (#1773) add 59eede462 fix(go/adbc/driver/flightsql): should use `ctx.Err().Error()` (#1769) No new revisions were added by this update. Summary of changes: go/adbc/driver/flightsql/utils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
(arrow-adbc) branch main updated: fix(go/adbc/driver/snowflake): handle quotes properly (#1738)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 8bd0e9bd3 fix(go/adbc/driver/snowflake): handle quotes properly (#1738) 8bd0e9bd3 is described below commit 8bd0e9bd308a4a1d8222d4c8317e98fd39e8d5ee Author: Matt Topol AuthorDate: Thu Apr 25 16:12:05 2024 -0400 fix(go/adbc/driver/snowflake): handle quotes properly (#1738) fixes #1721 --- go/adbc/driver/snowflake/bulk_ingestion.go | 24 +++- go/adbc/driver/snowflake/connection.go | 6 +++--- go/adbc/driver/snowflake/driver.go | 5 + go/adbc/driver/snowflake/driver_test.go| 12 go/adbc/driver/snowflake/statement.go | 6 +++--- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/go/adbc/driver/snowflake/bulk_ingestion.go b/go/adbc/driver/snowflake/bulk_ingestion.go index 5e1f1314f..9ec64f6e5 100644 --- a/go/adbc/driver/snowflake/bulk_ingestion.go +++ b/go/adbc/driver/snowflake/bulk_ingestion.go @@ -29,7 +29,6 @@ import ( "io" "math" "runtime" - "strconv" "strings" "sync" @@ -130,10 +129,13 @@ func (st *statement) ingestRecord(ctx context.Context) (nrows int64, err error) st.bound = nil }() - var initialRows int64 + var ( + initialRows int64 + target = quoteTblName(st.targetTable) + ) // Check final row count of target table to get definitive rows affected - initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, strconv.Quote(st.targetTable)) + initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, target) if err != nil { st.bound.Release() return @@ -182,13 +184,13 @@ func (st *statement) ingestRecord(ctx context.Context) (nrows int64, err error) } // Load the uploaded file into the target table - _, err = st.cnxn.cn.ExecContext(ctx, copyQuery, []driver.NamedValue{{Value: strconv.Quote(st.targetTable)}}) + _, err = st.cnxn.cn.ExecContext(ctx, copyQuery, []driver.NamedValue{{Value: target}}) if err != nil { return } // Check final row count of target table to get definitive rows affected - nrows, err = countRowsInTable(ctx, st.cnxn.sqldb, strconv.Quote(st.targetTable)) + nrows, err = countRowsInTable(ctx, st.cnxn.sqldb, target) nrows = nrows - initialRows return } @@ -204,9 +206,13 @@ func (st *statement) ingestStream(ctx context.Context) (nrows int64, err error) st.streamBind = nil }() - var initialRows int64 + var ( + initialRows int64 + target = quoteTblName(st.targetTable) + ) + // Check final row count of target table to get definitive rows affected - initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, strconv.Quote(st.targetTable)) + initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, target) if err != nil { return } @@ -214,7 +220,7 @@ func (st *statement) ingestStream(ctx context.Context) (nrows int64, err error) defer func() { // Always check the resulting row count, even in the case of an error. We may have ingested part of the data. ctx := context.Background() // TODO(joellubi): switch to context.WithoutCancel(ctx) once we're on Go 1.21 - n, countErr := countRowsInTable(ctx, st.cnxn.sqldb, strconv.Quote(st.targetTable)) + n, countErr := countRowsInTable(ctx, st.cnxn.sqldb, target) nrows = n - initialRows // Ingestion, row-count check, or both could have failed @@ -268,7 +274,7 @@ func (st *statement) ingestStream(ctx context.Context) (nrows int64, err error) } // Kickoff background tasks to COPY Parquet files into Snowflake table as they are uploaded - fileReady, finishCopy, cancelCopy := runCopyTasks(ctx, st.cnxn.cn, strconv.Quote(st.targetTable), int(st.ingestOptions.copyConcurrency)) + fileReady, finishCopy, cancelCopy := runCopyTasks(ctx, st.cnxn.cn, target, int(st.ingestOptions.copyConcurrency)) // Read Parquet files from buffer pool and upload to Snowflake stage in parallel g.Go(func() error { diff --git a/go/adbc/driver/snowflake/connection.go b/go/adbc/driver/snowflake/connection.go index 41a8c1665..94223bb92 100644 --- a/go/adbc/driver/snowflake/connection.go +++ b/go/adbc/driver/snowflake/connection.go @@ -1212,12 +1212,12 @@ func (c *connectionImpl) getStringQuery(query string) (string, error) { func (c *connectionImpl) GetTableSchema(ctx context.Context, catalog *string, dbSchema *string, tableNa
(arrow-experiments) branch main updated: add cudf-flight-ucx example (#28)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-experiments.git The following commit(s) were added to refs/heads/main by this push: new 05e4e88 add cudf-flight-ucx example (#28) 05e4e88 is described below commit 05e4e888b19dbd98b95d8984a8d1f97fb6570d00 Author: Matt Topol AuthorDate: Thu Apr 25 13:25:09 2024 -0400 add cudf-flight-ucx example (#28) * add cudf-flight-ucx example * Apply suggestions from code review Co-authored-by: Sutou Kouhei * Update dissociated-ipc/cudf-flight-poc.cc Co-authored-by: Sutou Kouhei * ran linting * Apply suggestions from code review * split poc file for readability * Update dissociated-ipc/README.md Co-authored-by: Sutou Kouhei * rename files - Co-authored-by: Sutou Kouhei Co-authored-by: Ian Cook --- .clang-format | 21 ++ .gitignore| 21 ++ data/taxi-data/README.md | 22 ++ data/taxi-data/train.parquet | 3 + dissociated-ipc/CMakeLists.txt| 112 ++ dissociated-ipc/README.md | 55 + dissociated-ipc/cudf-flight-client.cc | 384 dissociated-ipc/cudf-flight-server.cc | 408 ++ dissociated-ipc/cudf-flight-ucx.cc| 39 dissociated-ipc/cudf-flight-ucx.h | 38 dissociated-ipc/ucx_client.cc | 73 ++ dissociated-ipc/ucx_client.h | 40 dissociated-ipc/ucx_conn.cc | 355 + dissociated-ipc/ucx_conn.h| 90 dissociated-ipc/ucx_server.cc | 280 +++ dissociated-ipc/ucx_server.h | 88 dissociated-ipc/ucx_utils.cc | 287 dissociated-ipc/ucx_utils.h | 122 ++ 18 files changed, 2438 insertions(+) diff --git a/.clang-format b/.clang-format new file mode 100644 index 000..9448dc8 --- /dev/null +++ b/.clang-format @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +BasedOnStyle: Google +ColumnLimit: 90 +DerivePointerAlignment: false +IncludeBlocks: Preserve diff --git a/.gitignore b/.gitignore new file mode 100644 index 000..d997483 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +vendored +build +.vscode +cufile.log diff --git a/data/taxi-data/README.md b/data/taxi-data/README.md new file mode 100644 index 000..6a7416e --- /dev/null +++ b/data/taxi-data/README.md @@ -0,0 +1,22 @@ + + +# taxi-data + +A small subset of the public [NYC Taxi Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page) used in the dissociated-ipc example. diff --git a/data/taxi-data/train.parquet b/data/taxi-data/train.parquet new file mode 100755 index 000..7bf702b --- /dev/null +++ b/data/taxi-data/train.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854cf53ab8669aa260a8ae65beafe880ab1a0232dbdac09705fb9b6f3f84eacd +size 38521857 diff --git a/dissociated-ipc/CMakeLists.txt b/dissociated-ipc/CMakeLists.txt new file mode 100644 index 000..fa46397 --- /dev/null +++ b/dissociated-ipc/CMakeLists.txt @@ -0,0 +1,112 @@ +# Licensed to the Ap
(arrow-adbc) branch main updated (35d2c76f1 -> 96e05a0f1)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 35d2c76f1 feat(csharp/src/Apache.Arrow.Adbc): Cleanup use of List in APIs and implementation (#1761) add 96e05a0f1 fix(go/adbc/driver/snowflake): comment format (#1768) No new revisions were added by this update. Summary of changes: go/adbc/driver/snowflake/connection.go | 95 +- 1 file changed, 48 insertions(+), 47 deletions(-)
(arrow) branch main updated (f8ef09a2b9 -> 7b62460551)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from f8ef09a2b9 GH-41263: [C#][Integration] Ensure offset is considered in all branches of the bitmap comparison (#41264) add 7b62460551 GH-40563: [Go] Unable to JSON marshal float64 arrays which contain a NaN value (#41109) No new revisions were added by this update. Summary of changes: go/arrow/array/float16.go | 16 - go/arrow/array/numeric.gen.go | 34 -- go/arrow/array/numeric.gen.go.tmpl | 34 ++ go/arrow/array/numeric_test.go | 91 +- go/arrow/array/numericbuilder.gen_test.go | 45 + go/arrow/array/numericbuilder.gen_test.go.tmpl | 23 +++ go/arrow/float16/float16.go| 2 + 7 files changed, 235 insertions(+), 10 deletions(-)
(arrow) branch main updated (48a9639bb0 -> ec2d7cbfb4)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 48a9639bb0 GH-41140: [C#] Account for offset and length in union arrays (#41165) add ec2d7cbfb4 GH-41159: [Go][Parquet] Improvement Parquet BitWriter WriteVlqInt Performance (#41160) No new revisions were added by this update. Summary of changes: go/parquet/internal/utils/bit_reader_test.go | 17 + go/parquet/internal/utils/bit_writer.go | 6 +++--- 2 files changed, 20 insertions(+), 3 deletions(-)
(arrow) branch main updated (835e218735 -> 433ceef8a2)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 835e218735 MINOR: [Go] Bump golang.org/x/tools from 0.19.0 to 0.20.0 in /go (#41082) add 433ceef8a2 MINOR: [Go] Bump golang.org/x/sync from 0.6.0 to 0.7.0 in /go (#41079) No new revisions were added by this update. Summary of changes:
(arrow) branch main updated (75a100a113 -> 835e218735)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 75a100a113 GH-38768: [Python] Empty slicing an array backwards beyond the start is now empty (#40682) add 835e218735 MINOR: [Go] Bump golang.org/x/tools from 0.19.0 to 0.20.0 in /go (#41082) No new revisions were added by this update. Summary of changes: go/go.mod | 6 +++--- go/go.sum | 12 ++-- 2 files changed, 9 insertions(+), 9 deletions(-)
(arrow) branch dependabot/go_modules/go/golang.org/x/sync-0.7.0 deleted (was fde47fd718)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/golang.org/x/sync-0.7.0 in repository https://gitbox.apache.org/repos/asf/arrow.git was fde47fd718 MINOR: [Go] Bump golang.org/x/sync from 0.6.0 to 0.7.0 in /go The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow) branch dependabot/go_modules/go/golang.org/x/tools-0.20.0 deleted (was 0f25267df5)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/golang.org/x/tools-0.20.0 in repository https://gitbox.apache.org/repos/asf/arrow.git was 0f25267df5 MINOR: [Go] Bump golang.org/x/tools from 0.19.0 to 0.20.0 in /go The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow-adbc) branch main updated: chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 1.8.0 to 1.9.0 in /go/adbc (#1702)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new e9f2122b4 chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 1.8.0 to 1.9.0 in /go/adbc (#1702) e9f2122b4 is described below commit e9f2122b449e6a5a5a22b3c8365cedb4b5e561fc Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Tue Apr 2 11:51:06 2024 -0400 chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 1.8.0 to 1.9.0 in /go/adbc (#1702) Bumps [github.com/snowflakedb/gosnowflake](https://github.com/snowflakedb/gosnowflake) from 1.8.0 to 1.9.0. Release notes Sourced from https://github.com/snowflakedb/gosnowflake/releases;>github.com/snowflakedb/gosnowflake's releases. Release Please check Snowflake https://docs.snowflake.com/en/release-notes/clients-drivers/golang;>community page for release notes. Commits https://github.com/snowflakedb/gosnowflake/commit/a0ec4810a8c299eb5ad663533bd915326af828be;>a0ec481 MINOR: Bumped up GoLang connector MINOR version from 1.8.0 to 1.9.0 (https://redirect.github.com/snowflakedb/gosnowflake/issues/1086;>#1086) https://github.com/snowflakedb/gosnowflake/commit/2141603917def9b5950561213845915edca14c44;>2141603 Allow enableHigherPrecision to be used in arrow batches (https://redirect.github.com/snowflakedb/gosnowflake/issues/1080;>#1080) https://github.com/snowflakedb/gosnowflake/commit/bd8b73b051adbe96b9d701da3b113f52db9e028b;>bd8b73b ArrowBatch high precision fails when using compute divide from int64 to bigDe... https://github.com/snowflakedb/gosnowflake/commit/5b174a341d4062bac62cc422ec90ec82538c58d4;>5b174a3 SNOW-920995 add CI configuration for regression tests (https://redirect.github.com/snowflakedb/gosnowflake/issues/1075;>#1075) https://github.com/snowflakedb/gosnowflake/commit/bcb26f9b7ff621db8b03ea6810b955d99f19b048;>bcb26f9 SNOW-1256926 Add converter from snowflake date/time format to go (https://redirect.github.com/snowflakedb/gosnowflake/issues/1077;>#1077) https://github.com/snowflakedb/gosnowflake/commit/656ba611df5006f1485a442c118efa3a065cbc69;>656ba61 SNOW-1259439 Extract lint and format to separate build (https://redirect.github.com/snowflakedb/gosnowflake/issues/1078;>#1078) https://github.com/snowflakedb/gosnowflake/commit/3a5605dd2651d269d27776e09e5f33f1982dd348;>3a5605d SNOW-1234152 Add timestamp to bulk array insert test (https://redirect.github.com/snowflakedb/gosnowflake/issues/1074;>#1074) https://github.com/snowflakedb/gosnowflake/commit/0722bc9d1248ce4b3596c4c27e7acf4cfc814d33;>0722bc9 SNOW-1230690 Add UnsupportedArg check when requesting S3 accelerated config (... https://github.com/snowflakedb/gosnowflake/commit/1cbc05fef4765be72466d5c8433a46a2ce1e454c;>1cbc05f fix: Fix data race when initializing logging (https://redirect.github.com/snowflakedb/gosnowflake/issues/1060;>#1060) https://github.com/snowflakedb/gosnowflake/commit/7c2634a31c378fad29764a0e2f34839cf1fae243;>7c2634a Upgrade to apache/arrow/go/v15 (https://redirect.github.com/snowflakedb/gosnowflake/issues/1062;>#1062) Additional commits viewable in https://github.com/snowflakedb/gosnowflake/compare/v1.8.0...v1.9.0;>compare view [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/snowflakedb/gosnowflake=go_modules=1.8.0=1.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- Dependabot commands and options You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` w
(arrow-adbc) branch dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.9.0 deleted (was 4467c5f8c)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.9.0 in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git was 4467c5f8c chore(go/adbc): bump github.com/snowflakedb/gosnowflake in /go/adbc The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow) branch main updated: MINOR: [Go] Bump github.com/google/flatbuffers from 24.3.7+incompatible to 24.3.25+incompatible in /go (#40922)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new e44dc29df9 MINOR: [Go] Bump github.com/google/flatbuffers from 24.3.7+incompatible to 24.3.25+incompatible in /go (#40922) e44dc29df9 is described below commit e44dc29df9587a139fe539069c3dafc771256b90 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Mon Apr 1 14:02:32 2024 -0400 MINOR: [Go] Bump github.com/google/flatbuffers from 24.3.7+incompatible to 24.3.25+incompatible in /go (#40922) Bumps [github.com/google/flatbuffers](https://github.com/google/flatbuffers) from 24.3.7+incompatible to 24.3.25+incompatible. Release notes Sourced from https://github.com/google/flatbuffers/releases;>github.com/google/flatbuffers's releases. v24.3.25 What's Changed Fix License by https://github.com/p0fi;>@p0fi in https://redirect.github.com/google/flatbuffers/pull/8253;>google/flatbuffers#8253 Fix handling non null-terminated string_views in LookupByKey by https://github.com/mpawlowski-eyeo;>@mpawlowski-eyeo in https://redirect.github.com/google/flatbuffers/pull/8203;>google/flatbuffers#8203 New Contributors https://github.com/p0fi;>@p0fi made their first contribution in https://redirect.github.com/google/flatbuffers/pull/8253;>google/flatbuffers#8253 https://github.com/mpawlowski-eyeo;>@mpawlowski-eyeo made their first contribution in https://redirect.github.com/google/flatbuffers/pull/8203;>google/flatbuffers#8203 Full Changelog: https://github.com/google/flatbuffers/compare/v24.3.7...v24.3.25;>https://github.com/google/flatbuffers/compare/v24.3.7...v24.3.25 Commits https://github.com/google/flatbuffers/commit/595bf0007ab1929570c7671f091313c8fc20644e;>595bf00 FlatBuffers Version v24.3.25 https://github.com/google/flatbuffers/commit/0cfb7eb80b05c058e19e50fb575263908e601469;>0cfb7eb Fix handling non null-terminated string_views in LookupByKey (https://redirect.github.com/google/flatbuffers/issues/8203;>#8203) https://github.com/google/flatbuffers/commit/67eb95de9281087ccbba9aafd6e8ab1958d12045;>67eb95d presubmit.yml: Use xcode 14.2 https://github.com/google/flatbuffers/commit/b1f617fcb2821f67453dc037cd0a6ebd8eb44de0;>b1f617f Fix License (https://redirect.github.com/google/flatbuffers/issues/8253;>#8253) https://github.com/google/flatbuffers/commit/960cd4d635b98fc5daeeafee8b0a5601d45c70ad;>960cd4d Lobster: Support required fields See full diff in https://github.com/google/flatbuffers/compare/v24.3.7...v24.3.25;>compare view [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/google/flatbuffers=go_modules=24.3.7+incompatible=24.3.25+incompatible)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- Dependabot commands and options You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
(arrow) branch main updated: GH-40888: [Go][FlightRPC] support conversion from array.Duration in FlightSQL driver (#40889)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 68241d8a86 GH-40888: [Go][FlightRPC] support conversion from array.Duration in FlightSQL driver (#40889) 68241d8a86 is described below commit 68241d8a86e9923cda2b758d10176b8dfb1cfea7 Author: wayne AuthorDate: Mon Apr 1 12:01:49 2024 -0600 GH-40888: [Go][FlightRPC] support conversion from array.Duration in FlightSQL driver (#40889) ### Rationale for this change To enable the use of the flightsql driver's implementation of golang sql interfaces. ### What changes are included in this PR? A new switch branch for handling `array.Duration`. ### Are these changes tested? I manually tested and didn't add new unit tests because none of the other types handled in the same switch block are unit tested. ### Are there any user-facing changes? Just a more complete set of types handled by the sql driver. * GitHub Issue: #40888 Authored-by: wayne warren Signed-off-by: Matt Topol --- go/arrow/flight/flightsql/driver/utils.go | 4 go/arrow/flight/flightsql/driver/utils_test.go | 12 2 files changed, 16 insertions(+) diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go index a99c045e2e..84cf2110cc 100644 --- a/go/arrow/flight/flightsql/driver/utils.go +++ b/go/arrow/flight/flightsql/driver/utils.go @@ -104,6 +104,10 @@ func fromArrowType(arr arrow.Array, idx int) (interface{}, error) { return v.ToTime(ts.TimeUnit()), nil case *array.Date64: return c.Value(idx).ToTime(), nil + case *array.Duration: + dt := arr.DataType().(*arrow.DurationType) + duration := time.Duration(c.Value(idx)) * dt.Unit.Multiplier() + return duration, nil case *array.DayTimeInterval: durationDays := time.Duration(c.Value(idx).Days*24) * time.Hour duration := time.Duration(c.Value(idx).Milliseconds) * time.Millisecond diff --git a/go/arrow/flight/flightsql/driver/utils_test.go b/go/arrow/flight/flightsql/driver/utils_test.go index 6b1adfed47..8ea7921b64 100644 --- a/go/arrow/flight/flightsql/driver/utils_test.go +++ b/go/arrow/flight/flightsql/driver/utils_test.go @@ -50,6 +50,10 @@ func Test_fromArrowType(t *testing.T) { {Name: "f15-ts_us", Type: arrow.FixedWidthTypes.Timestamp_ns}, {Name: "f16-d64", Type: arrow.FixedWidthTypes.Date64}, {Name: "f17-dti", Type: arrow.FixedWidthTypes.DayTimeInterval}, + {Name: "f18-duration_s", Type: arrow.FixedWidthTypes.Duration_s}, + {Name: "f19-duration_ms", Type: arrow.FixedWidthTypes.Duration_ms}, + {Name: "f20-duration_us", Type: arrow.FixedWidthTypes.Duration_us}, + {Name: "f21-duration_ns", Type: arrow.FixedWidthTypes.Duration_ns}, } schema := arrow.NewSchema(fields, nil) @@ -90,6 +94,10 @@ func Test_fromArrowType(t *testing.T) { testTime := time.Now() b.Field(15).(*array.Date64Builder).Append(arrow.Date64FromTime(testTime)) b.Field(16).(*array.DayTimeIntervalBuilder).Append(arrow.DayTimeInterval{Days: 1, Milliseconds: 1000}) + b.Field(17).(*array.DurationBuilder).Append(1) + b.Field(18).(*array.DurationBuilder).Append(1) + b.Field(19).(*array.DurationBuilder).Append(1) + b.Field(20).(*array.DurationBuilder).Append(1) rec := b.NewRecord() defer rec.Release() @@ -123,4 +131,8 @@ func Test_fromArrowType(t *testing.T) { tf(t, 14, time.Date(1970, 1, 1, 12, 0, 0, 0, time.UTC)) // "f15-ts_us" tf(t, 15, testTime.In(time.UTC).Truncate(24*time.Hour)) // "f16-d64" tf(t, 16, time.Duration(24*time.Hour+time.Second)) // "f17-dti" + tf(t, 17, time.Duration(10)) // "f18-duration_s" + tf(t, 18, time.Duration(100))// "f19-duration_ms" + tf(t, 19, time.Duration(1000)) // "f20-duration_us" + tf(t, 20, time.Duration(1)) // "f21-duration_ns" }
(arrow) branch main updated: GH-40900: [Go] Fix Mallocator Weirdness (#40902)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 71321841eb GH-40900: [Go] Fix Mallocator Weirdness (#40902) 71321841eb is described below commit 71321841eb6d94946de43cccb7f04afe5cf2aa10 Author: Matt Topol AuthorDate: Mon Apr 1 11:15:59 2024 -0400 GH-40900: [Go] Fix Mallocator Weirdness (#40902) ### Rationale for this change With help from @ lidavidm and @ bkietz digging into the linked issue, we found the following: * Using `mtrace` and `strace` didn't produce much enlightenment to what was happening. * If the python adbc_driver_manager was built so that the cython lib is built using `CMAKE_BUILD_TYPE=Debug` then the crash/failure goes away * If the env var `MALLOC_MMAP_THRESHOLD_` is set to 128MB, the crash/failure goes away * It is only reproducible when calling through python, I haven't been able to reproduce it using pure Go * Calling `calloc` again after it fails, still fails * Calling `malloc` + `memset` immediately after the failing `calloc` works perfectly and doesn't fail anymore ### What changes are included in this PR? Adding a comment describing the situation and falling back to `malloc` + `memset` if `calloc` returns an error. If the pointer returned from `malloc` is `nil` then we surface the error. * GitHub Issue: #40900 Authored-by: Matt Topol Signed-off-by: Matt Topol --- go/arrow/memory/mallocator/mallocator.go | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/go/arrow/memory/mallocator/mallocator.go b/go/arrow/memory/mallocator/mallocator.go index 59d240a106..9483bdfc2a 100644 --- a/go/arrow/memory/mallocator/mallocator.go +++ b/go/arrow/memory/mallocator/mallocator.go @@ -60,10 +60,19 @@ func (alloc *Mallocator) Allocate(size int) []byte { } ptr, err := C.calloc(C.size_t(size), 1) if err != nil { - panic(err) + // under some circumstances and allocation patterns, we can end up in a scenario + // where for some reason calloc return ENOMEM even though there is definitely memory + // available for use. So we attempt to fallback to simply doing malloc + memset in + // this case. If malloc returns a nil pointer, then we know we're out of memory + // and will surface the error. + if ptr = C.malloc(C.size_t(size)); ptr == nil { + panic(err) + } + C.memset(ptr, 0, C.size_t(size)) } else if ptr == nil { panic("mallocator: out of memory") } + atomic.AddUint64(, uint64(size)) return unsafe.Slice((*byte)(ptr), size) }
(arrow-adbc) branch main updated: chore(dev/release): slight fix for non-conda verification and docs (#1682)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new a1deb83ad chore(dev/release): slight fix for non-conda verification and docs (#1682) a1deb83ad is described below commit a1deb83ad2e1e997004f18917eb0c1f1c40896cb Author: Matt Topol AuthorDate: Thu Mar 28 16:06:05 2024 -0400 chore(dev/release): slight fix for non-conda verification and docs (#1682) updating the docs and release verification script based on my experience for non-conda verification runs --- dev/release/verify-release-candidate.sh | 2 +- docs/source/development/releasing.rst | 4 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 0e2f1f6e3..ca81e78ba 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -455,7 +455,7 @@ test_python() { show_header "Build and test Python libraries" # Build and test Python - maybe_setup_virtualenv cython duckdb pandas protobuf pyarrow pytest setuptools_scm setuptools || exit 1 + maybe_setup_virtualenv cython duckdb pandas protobuf pyarrow pytest setuptools_scm setuptools importlib_resources || exit 1 maybe_setup_conda --file "${ADBC_DIR}/ci/conda_env_python.txt" || exit 1 if [ "${USE_CONDA}" -gt 0 ]; then diff --git a/docs/source/development/releasing.rst b/docs/source/development/releasing.rst index b758c0d30..3b930ee79 100644 --- a/docs/source/development/releasing.rst +++ b/docs/source/development/releasing.rst @@ -213,9 +213,13 @@ How to Verify Release Candidates - C and C++ compilers (or the equivalent of ``build-essential`` for your platform) - Python 3 - Ruby with headers + - meson is required - bundler, rake, red-arrow, and test-unit Ruby gems - GLib and gobject-introspection with headers + - pkg-config or cmake must be able to find libarrow-glib.so + - GI_TYPELIB_PATH should be set to the path to the girepository-1.0 directory - Java JRE and JDK (Java 8+) + - the javadoc command must also be accessible - Go - CMake, ninja-build, libpq (with headers), SQLite (with headers)
(arrow) branch update-go-readme deleted (was 88484e638b)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch update-go-readme in repository https://gitbox.apache.org/repos/asf/arrow.git was 88484e638b GH-40847: [Go] update readme The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow) branch main updated (950fbb62ce -> 7d1111214d)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 950fbb62ce GH-40733: [Go] Require Go 1.21 or later (#40848) add 7d214d GH-40847: [Go] update readme (#40877) No new revisions were added by this update. Summary of changes: go/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
(arrow) branch main updated: GH-40733: [Go] Require Go 1.21 or later (#40848)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 950fbb62ce GH-40733: [Go] Require Go 1.21 or later (#40848) 950fbb62ce is described below commit 950fbb62ce7388aad926c5af5861bf07f7db6de1 Author: Matt Topol AuthorDate: Thu Mar 28 15:59:14 2024 -0400 GH-40733: [Go] Require Go 1.21 or later (#40848) ### Rationale for this change Bumping to require Go 1.21 or later as 1.20 is EOL * GitHub Issue: #40733 Authored-by: Matt Topol Signed-off-by: Matt Topol --- .env | 4 +- .github/workflows/go.yml | 28 ++- ci/docker/conda-integration.dockerfile | 2 +- ci/docker/debian-12-go.dockerfile | 4 +- dev/release/verify-release-candidate.sh| 6 +-- dev/tasks/tasks.yml| 2 +- go/arrow/bitutil/bitutil.go| 35 + .../bitutil/bitutil_bytes.go} | 26 +- go/arrow/cdata/cdata_allocate.go | 57 ++ go/arrow/cdata/cdata_exports.go| 55 - go/arrow/compute/exec/span.go | 17 --- .../compute/exec/span_offsets.go} | 20 ++-- go/arrow/compute/fieldref.go | 17 --- .../compute/fieldref_hash.go} | 23 +++-- go/arrow/doc.go| 2 - go/arrow/flight/flightsql/driver/driver_test.go| 1 + go/arrow/memory/mallocator/mallocator.go | 11 ++--- go/arrow/memory/mallocator/mallocator_util.go | 26 ++ go/go.mod | 2 +- go/internal/hashing/hash_string.go | 4 ++ go/internal/hashing/xxh3_memo_table.go | 9 +--- go/parquet/types.go| 44 +++-- 22 files changed, 177 insertions(+), 218 deletions(-) diff --git a/.env b/.env index b5c66563f5..298c100c09 100644 --- a/.env +++ b/.env @@ -58,8 +58,8 @@ CUDA=11.2.2 DASK=latest DOTNET=7.0 GCC_VERSION="" -GO=1.19.13 -STATICCHECK=v0.4.5 +GO=1.21.8 +STATICCHECK=v0.4.7 HDFS=3.2.1 JDK=8 KARTOTHEK=latest diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 47148d9568..7ff781d35e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -59,13 +59,13 @@ jobs: { "arch-label": "AMD64", "arch": "amd64", -"go": "1.19", +"go": "1.21", "runs-on": "ubuntu-latest" }, { "arch-label": "AMD64", "arch": "amd64", -"go": "1.20", +"go": "1.22", "runs-on": "ubuntu-latest" } JSON @@ -75,13 +75,13 @@ jobs: { "arch-label": "ARM64", "arch": "arm64v8", -"go": "1.19", +"go": "1.21", "runs-on": ["self-hosted", "arm", "linux"] }, { "arch-label": "ARM64", "arch": "arm64v8", -"go": "1.20", +"go": "1.22", "runs-on": ["self-hosted", "arm", "linux"] } JSON @@ -169,10 +169,13 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Get required Go version +run: | + (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV - name: Install Go uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: "${{ env.GO_VERSION }}" cache: true cache-dependency-path: go/go.sum - name: Run build @@ -188,7 +191,7 @@ jobs: strategy: fail-fast: false matrix: -go: [1.19, '1.20'] +go: ['1.21', '1.22'] env: GO: ${{ matrix.go }} steps: @@ -229,7 +232,7 @@ jobs: strategy: fail-fast: false matrix: -go: [1.19, '1.20'] +go: ['1.21', '1.22'] env: GO: ${{ matrix.go }} steps: @@ -268,7 +271,7 @@ jobs: strategy: fail-fast: false matrix: -go: [1.19, '1.20'] +go: ['1.21', '1.22'] steps: - name: Checkout Arrow
(arrow) branch update-go-readme created (now 88484e638b)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch update-go-readme in repository https://gitbox.apache.org/repos/asf/arrow.git at 88484e638b GH-40847: [Go] update readme This branch includes the following new commits: new 88484e638b GH-40847: [Go] update readme The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
(arrow) 01/01: GH-40847: [Go] update readme
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch update-go-readme in repository https://gitbox.apache.org/repos/asf/arrow.git commit 88484e638bfecbbad8d59094a90d9574ea3be4a7 Author: Matt Topol AuthorDate: Thu Mar 28 15:25:29 2024 -0400 GH-40847: [Go] update readme Remove reference to deleted internal package --- go/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/README.md b/go/README.md index 4a9e151ddf..20bd7cd775 100644 --- a/go/README.md +++ b/go/README.md @@ -87,8 +87,8 @@ advanced optimizer and generate PLAN9 assembly functions from C/C++ code. The arrow package can be compiled without these optimizations using the `noasm` build tag. Alternatively, by configuring an environment variable, it is possible to dynamically configure which architecture optimizations are used at -runtime. See the `cpu` package [README](arrow/internal/cpu/README.md) for a -description of this environment variable. +runtime. We use the (cpu)[https://pkg.go.dev/golang.org/x/sys/cpu] package to +check dynamically for these features. ### Example Usage
(arrow) branch main updated: GH-40719: [Go] Make `arrow.Null` non-null for `arrow.TypeEqual` to work properly with `new(arrow.NullType)` (#40802)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new f710ac52b0 GH-40719: [Go] Make `arrow.Null` non-null for `arrow.TypeEqual` to work properly with `new(arrow.NullType)` (#40802) f710ac52b0 is described below commit f710ac52b049806515a14445b242c3ec819fb99d Author: Alex Shcherbakov AuthorDate: Tue Mar 26 21:17:04 2024 +0200 GH-40719: [Go] Make `arrow.Null` non-null for `arrow.TypeEqual` to work properly with `new(arrow.NullType)` (#40802) ### Rationale for this change Currently creating a record with a `null` type via `new(arrow.NullType)` in the schema will fail the schema validation. ### What changes are included in this PR? Made `arrow.Null` a non-null value instead of just a declaration. ### Are these changes tested? Yes, see cd4253a24e6d828128fbb7854da3c37951d74885 ### Are there any user-facing changes? `arrow.Null` became non-null, but the type is the same. * GitHub Issue: #40719 Authored-by: Alex Shcherbakov Signed-off-by: Matt Topol --- go/arrow/compare_test.go | 3 +++ go/arrow/datatype_null.go | 6 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go index 62e30e634e..ca87621ead 100644 --- a/go/arrow/compare_test.go +++ b/go/arrow/compare_test.go @@ -42,6 +42,9 @@ func TestTypeEqual(t *testing.T) { { Null, Null, true, false, }, + { + Null, new(NullType), true, false, + }, { {}, {}, false, false, }, diff --git a/go/arrow/datatype_null.go b/go/arrow/datatype_null.go index 2d2454c652..c852b854a7 100644 --- a/go/arrow/datatype_null.go +++ b/go/arrow/datatype_null.go @@ -27,7 +27,5 @@ func (*NullType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull()}} } -var ( - Null *NullType - _DataType = Null -) +// Null gives us both the compile-time assertion of DataType interface as well as serving a good element for use in schemas. +var Null DataType = new(NullType)
(arrow) branch main updated: GH-40630: [Go][Parquet] Enable writing of Parquet footer without closing file (#40654)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 5fd6b44936 GH-40630: [Go][Parquet] Enable writing of Parquet footer without closing file (#40654) 5fd6b44936 is described below commit 5fd6b44936a19761e45a8e43d7e76a0a23c5a222 Author: Peter Newcomb AuthorDate: Mon Mar 25 16:48:50 2024 -0400 GH-40630: [Go][Parquet] Enable writing of Parquet footer without closing file (#40654) ### Rationale for this change See #40630 ### What changes are included in this PR? 1. Added `FlushWithFooter` method to *file.Writer 2. To support `FlushWithFooter`, refactored `Close` in a way that changes the order of operations in two ways: a. closure of open row group writers is now done after using `defer` to ensure closure of the sink, instead of before b. wiping out of encryption keys is now done by the same deferred function, ensuring that it happens even upon error ### Are these changes tested? `file_writer_test.go` has been extended to cover `FlushWithFooter` in a manner equivalent to the existing coverage. ### Are there any user-facing changes? Only the addition of a new public method as described above. No breaking changes to any existing public interfaces, unless the two minor order-of-operation changes described above are somehow a problem. I'm not sure it's a critical fix, but one of the minor changes described above may reduce the likelihood that an attack could inject an error (e.g., an I/O error) to prevent an encryption key from being wiped from memory. * GitHub Issue: #40630 Authored-by: Peter Newcomb Signed-off-by: Matt Topol --- go/parquet/file/file_writer.go | 62 +++-- go/parquet/file/file_writer_test.go | 17 +- go/parquet/metadata/file.go | 15 - 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go index a2cf397cbc..57344b25cf 100644 --- a/go/parquet/file/file_writer.go +++ b/go/parquet/file/file_writer.go @@ -32,6 +32,7 @@ import ( type Writer struct { sink utils.WriteCloserTell open bool + footerFlushed bool props *parquet.WriterProperties rowGroups int nrows int @@ -125,6 +126,7 @@ func (fw *Writer) appendRowGroup(buffered bool) *rowGroupWriter { fw.rowGroupWriter.Close() } fw.rowGroups++ + fw.footerFlushed = false rgMeta := fw.metadata.AppendRowGroup() fw.rowGroupWriter = newRowGroupWriter(fw.sink, rgMeta, int16(fw.rowGroups)-1, fw.props, buffered, fw.fileEncryptor) return fw.rowGroupWriter @@ -172,12 +174,9 @@ func (fw *Writer) Close() (err error) { // if any functions here panic, we set open to be false so // that this doesn't get called again fw.open = false - if fw.rowGroupWriter != nil { - fw.nrows += fw.rowGroupWriter.nrows - fw.rowGroupWriter.Close() - } - fw.rowGroupWriter = nil + defer func() { + fw.closeEncryptor() ierr := fw.sink.Close() if err != nil { if ierr != nil { @@ -189,30 +188,48 @@ func (fw *Writer) Close() (err error) { err = ierr }() + err = fw.FlushWithFooter() + fw.metadata.Clear() + } + return nil +} + +// FlushWithFooter closes any open row group writer and writes the file footer, leaving +// the writer open for additional row groups. Additional footers written by later +// calls to FlushWithFooter or Close will be cumulative, so that only the last footer +// written need ever be read by a reader. +func (fw *Writer) FlushWithFooter() error { + if !fw.footerFlushed { + if fw.rowGroupWriter != nil { + fw.nrows += fw.rowGroupWriter.nrows + fw.rowGroupWriter.Close() + } + fw.rowGroupWriter = nil + + fileMetadata, err := fw.metadata.Snapshot() + if err != nil { + return err + } + fileEncryptProps := fw.props.FileEncryptionProperties() if fileEncryptProps == nil { // non encrypted file - fileMetadata, err := fw.metadata.Finish() - if err != nil { + if _, err = writeFileMetadata(fileMetadata, fw.sink); err != nil { + return err
(arrow) branch main updated (cc771a0133 -> 1781b32487)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from cc771a0133 GH-40634: [C#] ArrowStreamReader should not be null (#40765) add 1781b32487 GH-40693: [Go] Fix Decimal type precision loss on GetOneForMarshal (#40694) No new revisions were added by this update. Summary of changes: go/arrow/array/decimal128.go | 13 +--- go/arrow/array/decimal128_test.go | 59 - go/arrow/array/decimal256.go | 12 --- go/arrow/array/decimal256_test.go | 70 +-- 4 files changed, 142 insertions(+), 12 deletions(-)
(arrow) branch main updated (07e8aa2cae -> 1ee3da0064)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 07e8aa2cae GH-40568: [Java] Test failure in Dataset regarding TestAllTypes (#40662) add 1ee3da0064 GH-40672: [Go][Parquet] Add proper build tags for min_max (#40676) No new revisions were added by this update. Summary of changes: go/internal/utils/min_max_noasm.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
(arrow-adbc) branch main updated: refactor(go/adbc/driver): driverbase implementation for connection (#1590)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 302242849 refactor(go/adbc/driver): driverbase implementation for connection (#1590) 302242849 is described below commit 302242849ba09dbb4f4b6d95155421dffafb6105 Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com> AuthorDate: Tue Mar 19 11:43:42 2024 -0400 refactor(go/adbc/driver): driverbase implementation for connection (#1590) Implementation of Connection driver base, along with a refactor of Driver and Database bases. The bases have been refactored in the following way: - The `*Impl` interface (e.g. `DatabaseImpl`) now explicitly implements the corresponding `adbc` interface (e.g. `adbc.Database`). - We now check to guarantee the `DatabaseImplBase` implements the entire `DatabaseImpl` interface with stub methods or default implementations. - A new interface has been added (e.g. `driverbase.Database`) which contains all methods the _output_ of driverbase constructor `NewDatabase()` should be. This helps document and guarantee the "extra" behavior provided by using the driverbase. This interface should be internal to the library. - By embedding `DatabaseImpl` in the `database` struct (and similarly for the other bases) it automatically inherits implementations coming from the `DatabaseImpl`. This way we don't need to write out all the implementations a second time, hence the deletes. - The Connection base uses a builder for its constructor to register any helper methods (see discussion in comments). The Driver and Database bases use simple function constructors because they don't have any helpers to register. This felt simpler but I can make those into trivial builders as well if we prefer to have consistency between them. A new `DriverInfo` type has been introduced to help consolidate the collection and validation of metadata for `GetInfo()`. There are more small changes such as refactors of the flightsql and snowflake drivers to make use of the added functionality, as well as a new set of tests for the driverbase. Please let me know if anything else could use clarification. Resolves #1105. --- go/adbc/adbc.go| 11 + go/adbc/driver/driverbase/driver.go| 66 --- go/adbc/driver/flightsql/flightsql_connection.go | 578 +++- go/adbc/driver/flightsql/flightsql_database.go | 25 +- go/adbc/driver/flightsql/flightsql_driver.go | 45 +- go/adbc/driver/flightsql/flightsql_statement.go| 12 +- go/adbc/driver/internal/driverbase/connection.go | 497 + .../driver/{ => internal}/driverbase/database.go | 111 ++-- go/adbc/driver/internal/driverbase/driver.go | 116 go/adbc/driver/internal/driverbase/driver_info.go | 176 ++ .../driver/internal/driverbase/driver_info_test.go | 88 +++ go/adbc/driver/internal/driverbase/driver_test.go | 595 + go/adbc/driver/{ => internal}/driverbase/error.go | 0 .../driver/{ => internal}/driverbase/logging.go| 0 go/adbc/driver/snowflake/connection.go | 293 +++--- go/adbc/driver/snowflake/driver.go | 45 +- go/adbc/driver/snowflake/driver_test.go| 4 + go/adbc/driver/snowflake/snowflake_database.go | 41 +- go/adbc/driver/snowflake/statement.go | 2 +- go/adbc/go.mod | 1 + go/adbc/go.sum | 1 + 21 files changed, 1861 insertions(+), 846 deletions(-) diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go index f5514626a..6968faacf 100644 --- a/go/adbc/adbc.go +++ b/go/adbc/adbc.go @@ -355,6 +355,17 @@ const ( InfoDriverADBCVersion InfoCode = 103 // DriverADBCVersion ) +type InfoValueTypeCode = arrow.UnionTypeCode + +const ( + InfoValueStringType InfoValueTypeCode = 0 + InfoValueBooleanType InfoValueTypeCode = 1 + InfoValueInt64Type InfoValueTypeCode = 2 + InfoValueInt32BitmaskTypeInfoValueTypeCode = 3 + InfoValueStringListType InfoValueTypeCode = 4 + InfoValueInt32ToInt32ListMapType InfoValueTypeCode = 5 +) + type ObjectDepth int const ( diff --git a/go/adbc/driver/driverbase/driver.go b/go/adbc/driver/driverbase/driver.go deleted file mode 100644 index e4cfb9960..0 --- a/go/adbc/driver/driverbase/driver.go +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright own
(arrow) branch main updated (2e0d701029 -> 98a0fc8570)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 2e0d701029 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.0 to 2.20.1 in /go (#40637) add 98a0fc8570 MINOR: [Go] Bump google.golang.org/grpc from 1.58.3 to 1.62.1 in /go (#40638) No new revisions were added by this update. Summary of changes: go/go.mod | 4 ++-- go/go.sum | 10 +- 2 files changed, 7 insertions(+), 7 deletions(-)
(arrow) branch main updated (08401514a7 -> 2e0d701029)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 08401514a7 GH-40621: [C++] Add missing util/config.h in arrow/io/compressed_test.cc (#40625) add 2e0d701029 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.0 to 2.20.1 in /go (#40637) No new revisions were added by this update. Summary of changes: go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-)
(arrow) branch main updated (d10f468b06 -> 1dd0d45375)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from d10f468b06 GH-40395: [C++] Avoid simplifying expressions which call impure functions (#40396) add 1dd0d45375 MINOR: [Go] update go dependencies (#40511) No new revisions were added by this update. Summary of changes: go/go.mod | 28 ++-- go/go.sum | 58 +- 2 files changed, 43 insertions(+), 43 deletions(-)
(arrow) branch main updated: GH-40261: [Go] Don't export array functions with unexposed return types (#40272)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new fc48b8963d GH-40261: [Go] Don't export array functions with unexposed return types (#40272) fc48b8963d is described below commit fc48b8963d6486ac129a7c1365a35d02b28876e8 Author: Gabriel Tomitsuka AuthorDate: Thu Feb 29 19:00:33 2024 +0100 GH-40261: [Go] Don't export array functions with unexposed return types (#40272) ### Rationale for this change Exposing functions that return unexposed types in Go is considered poor practice. This approach complicates type handling, making it challenging for developers to utilize these return values in their functions. Developers must undertake the cumbersome process of identifying the applicable interface for the return type, a task that often results in significant time consumption and leads to confusing, non-informative types being suggested by godocs and IDEs. Consider the difficulty in discerning the relationship between two return types, `*simpleTable` and `arrow.Table`, at a glance. It is not immediately clear whether they implement the same interface or are distinct entities: https://github.com/apache/arrow/assets/10295671/463cd8a7-47f3-44ce-9871-2885025e5a5c;> https://github.com/apache/arrow/assets/10295671/4ffc049c-fb88-43fb-bd57-fc1ad5d4dc68;> Returning exposed interfaces is already commonly done in the Arrow package to ensure API consistency and usability, as evidenced in methods like `AddColumn() -> arrow.Table` and `RecordFromJSON() -> arrow.Record`. Extending this to all functions, including `NewTable`, `NewTableFromSlice`, and `NewRecord`, will standardize the codebase in line with these principles. The use of `*simpleTable` and similar types is restricted in explicit type declarations and function signatures. Therefore, transitioning to exposed return types is a backward-compatible improvement that will lead to enhanced documentation and better support in IDEs for Arrow users. ### What changes are included in this PR? * Change return signature of functions using the following unexposed return types: * `*simpleTable` --> `arrow.Table` * `*simpleRecord` --> `arrow.Record` * `*simpleRecords` --> `array.RecordReader` * Add the function `String()`, which is implemented by `*simpleTable`, to the `arrow.Table` interface. `*simpleTable` is the only implementation of `arrow.Table`, so this requires no further changes. ### Are these changes tested? Yes. The relevant code is already covered by tests in `arrow/array/table_test.go` (`TestTable`) and `arrow/array/record_test.go` (`TestRecord`, `TestRecordReader`). All tests pass (subpackages without tests omitted): ```bash ok github.com/apache/arrow/go/v16/arrow0.398s ok github.com/apache/arrow/go/v16/arrow/array 0.600s ok github.com/apache/arrow/go/v16/arrow/arrio 1.544s ok github.com/apache/arrow/go/v16/arrow/avro 0.629s ok github.com/apache/arrow/go/v16/arrow/bitutil1.001s ok github.com/apache/arrow/go/v16/arrow/compute2.147s ok github.com/apache/arrow/go/v16/arrow/compute/exec 0.813s ok github.com/apache/arrow/go/v16/arrow/compute/exprs 1.900s ok github.com/apache/arrow/go/v16/arrow/csv0.288s ok github.com/apache/arrow/go/v16/arrow/decimal128 1.356s ok github.com/apache/arrow/go/v16/arrow/decimal256 1.718s ok github.com/apache/arrow/go/v16/arrow/encoded0.493s ok github.com/apache/arrow/go/v16/arrow/flight 2.845s ok github.com/apache/arrow/go/v16/arrow/flight/flightsql 0.512s ok github.com/apache/arrow/go/v16/arrow/flight/flightsql/driver 7.386s ok github.com/apache/arrow/go/v16/arrow/float160.570s ok github.com/apache/arrow/go/v16/arrow/internal/arrjson 0.419s ok github.com/apache/arrow/go/v16/arrow/internal/dictutils 0.407s ok github.com/apache/arrow/go/v16/arrow/internal/testing/tools 0.247s ok github.com/apache/arrow/go/v16/arrow/ipc1.984s ok github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-cat 0.530s ok github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-file-to-stream 1.267s ok github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-json-integration-test 1.074s ok github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-ls 1.263s ok github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-stream-to-file 0.935s ok github.com/apache/arrow/go/v16/arrow/math 0.616s ok github.com/apache/arrow/go/v16/arrow/memory 1.275s ok github.com/apache/arrow/go/v16/ar
(arrow-adbc) branch fix-snowflake-ci deleted (was 63c6985f)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git was 63c6985f updates from feedback The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(arrow-adbc) branch main updated: test(go/adbc/driver/snowflake): fix flaky integration tests (#1561)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new aeabeacd test(go/adbc/driver/snowflake): fix flaky integration tests (#1561) aeabeacd is described below commit aeabeacd18f559a44d6fe0bcdc3d29c991a9b271 Author: Matt Topol AuthorDate: Fri Feb 23 16:59:15 2024 -0500 test(go/adbc/driver/snowflake): fix flaky integration tests (#1561) --- c/driver/snowflake/snowflake_test.cc| 111 +++- c/validation/adbc_validation.h | 4 + c/validation/adbc_validation_connection.cc | 29 +++-- c/validation/adbc_validation_statement.cc | 165 +--- go/adbc/driver/flightsql/flightsql_adbc_test.go | 15 ++- go/adbc/driver/snowflake/bulk_ingestion.go | 31 - go/adbc/driver/snowflake/connection.go | 8 +- go/adbc/driver/snowflake/driver_test.go | 61 - go/adbc/driver/snowflake/statement.go | 4 +- go/adbc/go.mod | 54 go/adbc/go.sum | 65 +- go/adbc/validation/validation.go| 14 +- 12 files changed, 391 insertions(+), 170 deletions(-) diff --git a/c/driver/snowflake/snowflake_test.cc b/c/driver/snowflake/snowflake_test.cc index cdd92e2c..1c423711 100644 --- a/c/driver/snowflake/snowflake_test.cc +++ b/c/driver/snowflake/snowflake_test.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include "validation/adbc_validation.h" #include "validation/adbc_validation_util.h" @@ -35,6 +36,26 @@ using adbc_validation::IsOkStatus; } \ } while (false) +namespace { +std::string GetUuid() { + static std::random_device dev; + static std::mt19937 rng(dev()); + + std::uniform_int_distribution dist(0, 15); + + const char* v = "0123456789ABCDEF"; + const bool dash[] = {0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0}; + + std::string res; + for (int i = 0; i < 16; i++) { +if (dash[i]) res += "-"; +res += v[dist(rng)]; +res += v[dist(rng)]; + } + return res; +} +} // namespace + class SnowflakeQuirks : public adbc_validation::DriverQuirks { public: SnowflakeQuirks() { @@ -47,6 +68,13 @@ class SnowflakeQuirks : public adbc_validation::DriverQuirks { AdbcStatusCode SetupDatabase(struct AdbcDatabase* database, struct AdbcError* error) const override { EXPECT_THAT(AdbcDatabaseSetOption(database, "uri", uri_, error), IsOkStatus(error)); +EXPECT_THAT(AdbcDatabaseSetOption( +database, "adbc.snowflake.sql.client_option.use_high_precision", +"false", error), +IsOkStatus(error)); +EXPECT_THAT(AdbcDatabaseSetOption(database, "adbc.snowflake.sql.schema", + schema_.c_str(), error), +IsOkStatus(error)); return ADBC_STATUS_OK; } @@ -119,11 +147,13 @@ class SnowflakeQuirks : public adbc_validation::DriverQuirks { bool supports_metadata_current_db_schema() const override { return false; } bool supports_partitioned_data() const override { return false; } bool supports_dynamic_parameter_binding() const override { return false; } + bool supports_error_on_incompatible_schema() const override { return false; } bool ddl_implicit_commit_txn() const override { return true; } - std::string db_schema() const override { return "ADBC_TESTING"; } + std::string db_schema() const override { return schema_; } const char* uri_; bool skip_{false}; + std::string schema_{"ADBC_TESTING"}; }; class SnowflakeTest : public ::testing::Test, public adbc_validation::DatabaseTest { @@ -175,6 +205,7 @@ class SnowflakeStatementTest : public ::testing::Test, public adbc_validation::StatementTest { public: const adbc_validation::DriverQuirks* quirks() const override { return _; } + void SetUp() override { if (quirks_.skip_) { GTEST_SKIP(); @@ -192,6 +223,78 @@ class SnowflakeStatementTest : public ::testing::Test, void TestSqlIngestColumnEscaping() { GTEST_SKIP(); } + public: + // will need to be updated to SetUpTestSuite when gtest is upgraded + static void SetUpTestCase() { +struct AdbcError error; +struct AdbcDatabase db; +struct AdbcConnection connection; +struct AdbcStatement statement; + +std::memset(, 0, sizeof(error)); +std::memset(, 0, sizeof(db)); +std::memset(, 0, sizeof(connection)); +std::memset(, 0, sizeof(statement)); + +ASSERT_THAT(AdbcDatabaseNew(, ), IsOkStatus()); +ASSERT_THAT(quirks_.SetupDatabase(, ), IsOkStatus()); +
(arrow-adbc) branch fix-snowflake-ci updated (f6be85fb -> 63c6985f)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from f6be85fb forgot to add the schema option add 63c6985f updates from feedback No new revisions were added by this update. Summary of changes: c/driver/snowflake/snowflake_test.cc | 54 1 file changed, 30 insertions(+), 24 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (76f4d075 -> f6be85fb)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 76f4d075 forgot to check support test add f6be85fb forgot to add the schema option No new revisions were added by this update. Summary of changes: c/driver/snowflake/snowflake_test.cc | 3 +++ 1 file changed, 3 insertions(+)
(arrow-adbc) branch fix-snowflake-ci updated (0c95216a -> 76f4d075)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 0c95216a fix lints add 76f4d075 forgot to check support test No new revisions were added by this update. Summary of changes: go/adbc/validation/validation.go | 4 1 file changed, 4 insertions(+)
(arrow-adbc) branch fix-snowflake-ci updated (b56b2a14 -> 0c95216a)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from b56b2a14 yet another attempt add 0c95216a fix lints No new revisions were added by this update. Summary of changes: c/driver/snowflake/snowflake_test.cc | 6 -- c/validation/adbc_validation_statement.cc | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (c01d1dd0 -> b56b2a14)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from c01d1dd0 fix postgres/sqlite tests add b56b2a14 yet another attempt No new revisions were added by this update. Summary of changes: c/driver/snowflake/snowflake_test.cc| 93 - c/validation/adbc_validation_statement.cc | 4 +- go/adbc/driver/flightsql/flightsql_adbc_test.go | 15 ++-- go/adbc/driver/snowflake/bulk_ingestion.go | 2 +- go/adbc/driver/snowflake/driver_test.go | 33 + go/adbc/validation/validation.go| 2 + 6 files changed, 122 insertions(+), 27 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (6c56796b -> c01d1dd0)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 6c56796b pre-commit linting add c01d1dd0 fix postgres/sqlite tests No new revisions were added by this update. Summary of changes: c/validation/adbc_validation_connection.cc | 21 +++-- go/adbc/driver/snowflake/bulk_ingestion.go | 2 +- go/adbc/driver/snowflake/driver_test.go| 2 +- 3 files changed, 13 insertions(+), 12 deletions(-)
(arrow) branch main updated (65c2b46c83 -> 036a22eaff)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 65c2b46c83 GH-40199: [R] dbplyr 2.5.0 forward compatibility (#40197) add 036a22eaff GH-40089: [Go] Concurrent Recordset for receiving huge recordset (#40090) No new revisions were added by this update. Summary of changes: go/arrow/flight/flightsql/driver/driver.go | 205 -- go/arrow/flight/flightsql/driver/driver_test.go | 937 2 files changed, 1076 insertions(+), 66 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (3c5c3b6e -> 6c56796b)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 3c5c3b6e more test fixes add 6c56796b pre-commit linting No new revisions were added by this update. Summary of changes: c/validation/adbc_validation_statement.cc | 65 +-- 1 file changed, 35 insertions(+), 30 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (7c2da50c -> 3c5c3b6e)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 7c2da50c pre-commit lint add 3c5c3b6e more test fixes No new revisions were added by this update. Summary of changes: c/driver/snowflake/snowflake_test.cc | 7 ++- c/validation/adbc_validation.h | 4 c/validation/adbc_validation_statement.cc | 22 -- go/adbc/driver/snowflake/bulk_ingestion.go | 20 +++- go/adbc/driver/snowflake/driver_test.go| 26 +- go/adbc/validation/validation.go | 8 6 files changed, 58 insertions(+), 29 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (6852eca4 -> 7c2da50c)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 6852eca4 fix trailing whitespace add 07eacffb fix quoting table names add 7c2da50c pre-commit lint No new revisions were added by this update. Summary of changes: c/validation/adbc_validation_statement.cc | 94 +- go/adbc/driver/snowflake/bulk_ingestion.go | 7 ++- go/adbc/driver/snowflake/statement.go | 4 +- 3 files changed, 58 insertions(+), 47 deletions(-)
(arrow-adbc) branch fix-snowflake-ci updated (44da2bda -> 6852eca4)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git from 44da2bda ci(driver/snowflake): fixing snowflake integration tests add 6852eca4 fix trailing whitespace No new revisions were added by this update. Summary of changes: c/validation/adbc_validation_connection.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
(arrow-adbc) 01/01: ci(driver/snowflake): fixing snowflake integration tests
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git commit 44da2bdad54050cb134a15b932261404427c11be Author: Matt Topol AuthorDate: Thu Feb 22 13:16:31 2024 -0500 ci(driver/snowflake): fixing snowflake integration tests --- c/validation/adbc_validation_connection.cc | 12 -- go/adbc/driver/snowflake/connection.go | 8 ++-- go/adbc/go.mod | 54 - go/adbc/go.sum | 65 +- 4 files changed, 102 insertions(+), 37 deletions(-) diff --git a/c/validation/adbc_validation_connection.cc b/c/validation/adbc_validation_connection.cc index f9af084e..f828ef97 100644 --- a/c/validation/adbc_validation_connection.cc +++ b/c/validation/adbc_validation_connection.cc @@ -550,7 +550,7 @@ void ConnectionTest::TestMetadataGetObjectsDbSchemas() { ASSERT_NO_FATAL_FAILURE(CheckGetObjectsSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_NE(nullptr, reader.array->release); -ASSERT_GT(reader.array->length, 0); +ASSERT_GE(reader.array->length, 0); do { for (int64_t row = 0; row < reader.array->length; row++) { struct ArrowArrayView* catalog_db_schemas_list = reader.array_view->children[1]; @@ -595,8 +595,12 @@ void ConnectionTest::TestMetadataGetObjectsTables() { ASSERT_NO_FATAL_FAILURE(reader.GetSchema()); ASSERT_NO_FATAL_FAILURE(CheckGetObjectsSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); -ASSERT_NE(nullptr, reader.array->release); -ASSERT_GT(reader.array->length, 0); +ASSERT_NE(nullptr, reader.array->release); +if (expected.second) { + ASSERT_GT(reader.array->length, 0); +} else { + ASSERT_EQ(reader.array->length, 0); +} bool found_expected_table = false; do { for (int64_t row = 0; row < reader.array->length; row++) { @@ -670,7 +674,7 @@ void ConnectionTest::TestMetadataGetObjectsTablesTypes() { ASSERT_NO_FATAL_FAILURE(CheckGetObjectsSchema()); ASSERT_NO_FATAL_FAILURE(reader.Next()); ASSERT_NE(nullptr, reader.array->release); -ASSERT_GT(reader.array->length, 0); +ASSERT_GE(reader.array->length, 0); bool found_expected_table = false; do { for (int64_t row = 0; row < reader.array->length; row++) { diff --git a/go/adbc/driver/snowflake/connection.go b/go/adbc/driver/snowflake/connection.go index 5855c875..1de2b6ae 100644 --- a/go/adbc/driver/snowflake/connection.go +++ b/go/adbc/driver/snowflake/connection.go @@ -968,14 +968,14 @@ func (c *cnxn) GetTableSchema(ctx context.Context, catalog *string, dbSchema *st defer rows.Close() var ( - name, typ, kind, isnull, primary, unique string - def, check, expr, comment, policyNamesql.NullString - fields = []arrow.Field{} + name, typ, kind, isnull, primary, unique string + def, check, expr, comment, policyName, privDomain sql.NullString + fields= []arrow.Field{} ) for rows.Next() { err := rows.Scan(, , , , , , , - , , , ) + , , , , ) if err != nil { return nil, errToAdbcErr(adbc.StatusIO, err) } diff --git a/go/adbc/go.mod b/go/adbc/go.mod index 6322f5f8..7e7b605e 100644 --- a/go/adbc/go.mod +++ b/go/adbc/go.mod @@ -23,13 +23,13 @@ require ( github.com/apache/arrow/go/v16 v16.0.0-20240129203910-c2ca9bcedeb0 github.com/bluele/gcache v0.0.2 github.com/golang/protobuf v1.5.3 - github.com/google/uuid v1.3.1 - github.com/snowflakedb/gosnowflake v1.7.2 + github.com/google/uuid v1.6.0 + github.com/snowflakedb/gosnowflake v1.8.0 github.com/stretchr/testify v1.8.4 github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a - golang.org/x/exp v0.0.0-20240119083558-1b970713d09a + golang.org/x/exp v0.0.0-20240213143201-ec583247a57a golang.org/x/sync v0.6.0 - golang.org/x/tools v0.17.0 + golang.org/x/tools v0.18.0 google.golang.org/grpc v1.58.3 google.golang.org/protobuf v1.31.0 ) @@ -37,26 +37,26 @@ require ( require ( github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.1 // indirect - github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.1 // indirect - github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.2 // indirect + github.com/Azure/azure-sdk-for
(arrow-adbc) branch fix-snowflake-ci created (now 44da2bda)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch fix-snowflake-ci in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git at 44da2bda ci(driver/snowflake): fixing snowflake integration tests This branch includes the following new commits: new 44da2bda ci(driver/snowflake): fixing snowflake integration tests The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
(arrow) branch main updated: GH-39870: [Go] Include buffered pages in TotalBytesWritten (#40105)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 29a0581f5b GH-39870: [Go] Include buffered pages in TotalBytesWritten (#40105) 29a0581f5b is described below commit 29a0581f5bfcad86a6493854f8be8fcb6ffe2fbc Author: Matthew McNew AuthorDate: Tue Feb 20 19:59:57 2024 -0600 GH-39870: [Go] Include buffered pages in TotalBytesWritten (#40105) ### Rationale for this change Currently, buffered data pages are not included in TotalBytesWritten this means that their is not an accurate estimate of the size of the current size. ### Are there any user-facing changes? `RowGroupTotalBytesWritten` will include the TotalBytes in buffered DataPages minus the buffered data pages headers. * Closes: #39870 Authored-by: Matthew McNew Signed-off-by: Matt Topol --- go/parquet/file/column_writer.go | 7 ++- go/parquet/file/column_writer_test.go | 14 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go index 4d603c547c..91f5d18942 100755 --- a/go/parquet/file/column_writer.go +++ b/go/parquet/file/column_writer.go @@ -198,7 +198,12 @@ func (w *columnWriter) TotalCompressedBytes() int64 { } func (w *columnWriter) TotalBytesWritten() int64 { - return w.totalBytesWritten + bufferedPagesBytes := int64(0) + for _, p := range w.pages { + bufferedPagesBytes += int64(len(p.Data())) + } + + return w.totalBytesWritten + bufferedPagesBytes } func (w *columnWriter) RowsWritten() int { diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index dd597e280b..d78e1c6761 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -430,6 +430,11 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque } func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) { + // skip boolean as dictionary encoding is not used + if p.Typ.Kind() == reflect.Bool { + return + } + p.GenerateData(SmallSize) props := parquet.DefaultColumnProperties() props.DictionaryEnabled = true @@ -440,13 +445,14 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(versi props.Encoding = parquet.Encodings.RLEDict } - writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version)) + writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version), parquet.WithDataPageSize(SmallSize-1)) p.WriteBatchValues(writer, nil, nil) + p.NotZero(writer.TotalBytesWritten()) writer.FallbackToPlain() - p.NotEqual(0, writer.TotalCompressedBytes()) + p.NotZero(writer.TotalCompressedBytes()) writer.Close() - p.NotEqual(0, writer.TotalCompressedBytes()) - p.NotEqual(0, writer.TotalBytesWritten()) + p.NotZero(writer.TotalCompressedBytes()) + p.NotZero(writer.TotalBytesWritten()) } func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {
(arrow) branch main updated (a690088193 -> 47f15b0708)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from a690088193 GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098) add 47f15b0708 GH-40113 [Go][Parquet] New RegisterCodec function (#40114) No new revisions were added by this update. Summary of changes: go/parquet/compress/brotli.go | 2 +- go/parquet/compress/compress.go | 20 go/parquet/compress/gzip.go | 2 +- go/parquet/compress/snappy.go | 2 +- go/parquet/compress/zstd.go | 2 +- 5 files changed, 24 insertions(+), 4 deletions(-)
(arrow) branch main updated: GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new a690088193 GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098) a690088193 is described below commit a690088193711447aa4d526f2257027f9a459efa Author: wayne AuthorDate: Tue Feb 20 08:38:06 2024 -0700 GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098) See https://github.com/apache/arrow/issues/40097 for more in-depth description about the problem that led me to file this PR. ### Rationale for this change Because it's annoying to not be able to connect to a non-TLS flightsql endpoint in my development environment just because my development environment happens to still use token authentication. ### What changes are included in this PR? Thread the flightsql `DriverConfig.TLSEnabled` parameter into the `grpcCredentials` type so that `grpcCredentials.RequireTransportSecurity` can return false if TLS is not enabled on the driver config. One thing that occurred to me about the `DriverConfig.TLSEnabled` field is that its semantics seem very mildly dangerous since golang `bool` types are `false` by default and golang doesn't require fields on structs to be explicitly initialized. It seems to me that `DriverConfig.TLSDisabled` would be better (semantically speaking) because then the API user doesn't have to explicitly enable TLS. But I suppose it's probably undesirable to change the name of a public field on a public type. ### Are these changes tested? I haven't written any tests, mostly because there weren't already any tests for the `grpcCredentials` type but I have manually verified this fixes the problem I described in https://github.com/apache/arrow/issues/40097 by rebuilding my tool and running it against the non-TLS listening thing in my development environment. ### Are there any user-facing changes? * Closes: #40097 Authored-by: wayne warren Signed-off-by: Matt Topol --- go/arrow/flight/flightsql/driver/driver.go | 9 + go/arrow/flight/flightsql/driver/utils.go | 11 ++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go index 852a97fb4d..65068048ab 100644 --- a/go/arrow/flight/flightsql/driver/driver.go +++ b/go/arrow/flight/flightsql/driver/driver.go @@ -364,10 +364,11 @@ func (c *Connector) Configure(config *DriverConfig) error { // Set authentication credentials rpcCreds := grpcCredentials{ - username: config.Username, - password: config.Password, - token:config.Token, - params: config.Params, + username: config.Username, + password: config.Password, + token: config.Token, + params: config.Params, + tlsEnabled: config.TLSEnabled, } c.options = append(c.options, grpc.WithPerRPCCredentials(rpcCreds)) diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go index f7bd2a2e02..a99c045e2e 100644 --- a/go/arrow/flight/flightsql/driver/utils.go +++ b/go/arrow/flight/flightsql/driver/utils.go @@ -27,10 +27,11 @@ import ( // *** GRPC helpers *** type grpcCredentials struct { - username string - password string - tokenstring - params map[string]string + username string + password string + token string + params map[string]string + tlsEnabled bool } func (g grpcCredentials) GetRequestMetadata(ctx context.Context, uri ...string) (map[string]string, error) { @@ -53,7 +54,7 @@ func (g grpcCredentials) GetRequestMetadata(ctx context.Context, uri ...string) } func (g grpcCredentials) RequireTransportSecurity() bool { - return g.token != "" || g.username != "" + return g.tlsEnabled && (g.token != "" || g.username != "") } // *** Type conversions ***
(arrow) branch main updated: GH-39910: [Go] Add func to load prepared statement from ActionCreatePreparedStatementResult (#39913)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new f609bb171a GH-39910: [Go] Add func to load prepared statement from ActionCreatePreparedStatementResult (#39913) f609bb171a is described below commit f609bb171a8bce973d7b040d8684b04a60e806ed Author: abandy AuthorDate: Wed Feb 7 16:01:55 2024 -0500 GH-39910: [Go] Add func to load prepared statement from ActionCreatePreparedStatementResult (#39913) Currently, in order to create a PreparedStatement a DoAction call will always be made via the client. I need to be able to make a PreparedStatement from persisted data that will not trigger the DoAction call to the server. * Closes: #39910 Authored-by: Alva Bandy Signed-off-by: Matt Topol --- go/arrow/flight/flightsql/client.go | 65 go/arrow/flight/flightsql/client_test.go | 30 +++ go/arrow/flight/flightsql/types.go | 2 + 3 files changed, 97 insertions(+) diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index 441f88f39f..068bfa84c3 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -450,6 +450,31 @@ func (c *Client) PrepareSubstrait(ctx context.Context, plan SubstraitPlan, opts return parsePreparedStatementResponse(c, c.Alloc, stream) } +func (c *Client) LoadPreparedStatementFromResult(result *CreatePreparedStatementResult) (*PreparedStatement, error) { + var ( + err error + dsSchema, paramSchema *arrow.Schema + ) + if result.DatasetSchema != nil { + dsSchema, err = flight.DeserializeSchema(result.DatasetSchema, c.Alloc) + if err != nil { + return nil, err + } + } + if result.ParameterSchema != nil { + paramSchema, err = flight.DeserializeSchema(result.ParameterSchema, c.Alloc) + if err != nil { + return nil, err + } + } + return { + client:c, + handle:result.PreparedStatementHandle, + datasetSchema: dsSchema, + paramSchema: paramSchema, + }, nil +} + func parsePreparedStatementResponse(c *Client, mem memory.Allocator, results pb.FlightService_DoActionClient) (*PreparedStatement, error) { if err := results.CloseSend(); err != nil { return nil, err @@ -1027,6 +1052,46 @@ func (p *PreparedStatement) Execute(ctx context.Context, opts ...grpc.CallOption return p.client.getFlightInfo(ctx, desc, opts...) } +// ExecutePut calls DoPut for the prepared statement on the server. If SetParameters +// has been called then the parameter bindings will be sent before execution. +// +// Will error if already closed. +func (p *PreparedStatement) ExecutePut(ctx context.Context, opts ...grpc.CallOption) error { + if p.closed { + return errors.New("arrow/flightsql: prepared statement already closed") + } + + cmd := {PreparedStatementHandle: p.handle} + + desc, err := descForCommand(cmd) + if err != nil { + return err + } + + if p.hasBindParameters() { + pstream, err := p.client.Client.DoPut(ctx, opts...) + if err != nil { + return err + } + + wr, err := p.writeBindParameters(pstream, desc) + if err != nil { + return err + } + if err = wr.Close(); err != nil { + return err + } + pstream.CloseSend() + + // wait for the server to ack the result + if _, err = pstream.Recv(); err != nil && err != io.EOF { + return err + } + } + + return nil +} + // ExecutePoll executes the prepared statement on the server and returns a PollInfo // indicating the progress of execution. // diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index c8b9f7f124..f35aeefcf4 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -665,6 +665,36 @@ func (s *FlightSqlClientSuite) TestRenewFlightEndpoint() { s.Equal(, renewedEndpoint) } +func (s *FlightSqlClientSuite) TestPreparedStatementLoadFromResult() { + const query = "query" + + result := { + PreparedStatementHandle: []byte(query), + } + + parameterSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "p_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}
(arrow-adbc) branch main updated: feat(go/adbc/driver/snowflake): add '[ADBC]' to snowflake application name (#1525)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 21fba985 feat(go/adbc/driver/snowflake): add '[ADBC]' to snowflake application name (#1525) 21fba985 is described below commit 21fba985231990fa9639d217157f8f5d38df47cf Author: Matt Topol AuthorDate: Wed Feb 7 13:29:56 2024 -0500 feat(go/adbc/driver/snowflake): add '[ADBC]' to snowflake application name (#1525) To help Snowflake track adoption and usage of the ADBC driver, we can explicitly add a prefix to any client application name to indicate the ADBC driver is the source of the requests. - Co-authored-by: David Li --- go/adbc/driver/snowflake/snowflake_database.go | 8 python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py | 4 2 files changed, 12 insertions(+) diff --git a/go/adbc/driver/snowflake/snowflake_database.go b/go/adbc/driver/snowflake/snowflake_database.go index 7b76fa5a..76ab4684 100644 --- a/go/adbc/driver/snowflake/snowflake_database.go +++ b/go/adbc/driver/snowflake/snowflake_database.go @@ -176,6 +176,11 @@ func (d *databaseImpl) SetOptions(cnOptions map[string]string) error { } } + defaultAppName := "[ADBC][Go-" + infoDriverVersion + "]" + // set default application name to track + // unless user overrides it + d.cfg.Application = defaultAppName + var err error for k, v := range cnOptions { v := v // copy into loop scope @@ -265,6 +270,9 @@ func (d *databaseImpl) SetOptions(cnOptions map[string]string) error { } d.cfg.ClientTimeout = dur case OptionApplicationName: + if !strings.HasPrefix(v, "[ADBC]") { + v = defaultAppName + v + } d.cfg.Application = v case OptionSSLSkipVerify: switch v { diff --git a/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py b/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py index 701de23e..19b3bbc1 100644 --- a/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py +++ b/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py @@ -131,6 +131,10 @@ def connect( kwargs = (db_kwargs or {}).copy() if uri is not None: kwargs["uri"] = uri +appname = kwargs.get(DatabaseOptions.APPLICATION_NAME.value, "") +kwargs[ +DatabaseOptions.APPLICATION_NAME.value +] = f"[ADBC][Python-{__version__}]{appname}" return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), **kwargs)
(arrow-site) branch main updated: Add Powered By note for pantab (#471)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-site.git The following commit(s) were added to refs/heads/main by this push: new ce3bc0b161c Add Powered By note for pantab (#471) ce3bc0b161c is described below commit ce3bc0b161c5f80137096fbcd07ac52321056bb3 Author: William Ayd AuthorDate: Mon Feb 5 23:41:52 2024 -0500 Add Powered By note for pantab (#471) Hi - I built pantab about 6 years ago to write pandas DataFrames to a proprietary database owned by Tableau called Hyper. Over time hacking into pandas internals proved to be difficult, and I very recently swapped over to using nanoarrow and the Arrow PyCapsule interface with great results. No hard feelings if there is not an appetite for this PR, but I figured I'd put it out there to celebrate the awesomeness of Arrow --- powered_by.md | 5 + 1 file changed, 5 insertions(+) diff --git a/powered_by.md b/powered_by.md index e5018fbfe79..1ba4278d0ea 100644 --- a/powered_by.md +++ b/powered_by.md @@ -154,6 +154,10 @@ short description of your use case. * **[pandas][12]:** data analysis toolkit for Python programmers. pandas supports reading and writing Parquet files using pyarrow. Several pandas core developers are also contributors to Apache Arrow. +* **[pantab][52]:** Allows high performance read/writes of popular dataframe libraries + like pandas, polars pyarrow, etc... to/from Tableau's Hyper database. pantab uses nanoarrow + and the Arrow PyCapsule interface to make that exchange process seamless. + core developers are also contributors to Apache Arrow. * **[Parseable][51]:** Log analytics platform built for scale and usability. Ingest logs from anywhere and unify logs with Parseable. Parseable uses Arrow as the intermediary, in-memory data format for log data ingestion. * **[Perspective][23]:** Perspective is a streaming data visualization engine in JavaScript for building real-time & user-configurable analytics entirely in the browser. * **[Petastorm][28]:** Petastorm enables single machine or distributed training @@ -262,3 +266,4 @@ short description of your use case. [49]: https://kaskada.io [50]: https://openobserve.ai [51]: https://parseable.io +[52]: https://github.com/innobi/pantab
(arrow) branch main updated: GH-39769: [C++][Device] Fix Importing nested and string types for DeviceArray (#39770)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 26801f147a GH-39769: [C++][Device] Fix Importing nested and string types for DeviceArray (#39770) 26801f147a is described below commit 26801f147a9e98bb6c5bc4e7131bdf1bc2794467 Author: Matt Topol AuthorDate: Mon Feb 5 15:29:06 2024 -0500 GH-39769: [C++][Device] Fix Importing nested and string types for DeviceArray (#39770) ### Rationale for this change In my testing with libcudf and other GPU data, I discovered a deficiency in ImportDeviceArray and thus ImportDeviceRecordBatch where the device type and memory manager aren't propagated to child importers and it fails to import offset-based types such as strings. ### What changes are included in this PR? These are relatively easily handled by first ensuring that `ImportChild` propagates the device_type and memory manager from the parent. Then for importing offset based values we merely need to use the memory manager to copy the final offset value to the CPU to use for the buffer size computation. This will work for any device which has implemented CopyBufferTo/From ### Are these changes tested? A new test is added to test these situations. * Closes: #39769 Authored-by: Matt Topol Signed-off-by: Matt Topol --- cpp/src/arrow/c/bridge.cc | 23 --- cpp/src/arrow/c/bridge_test.cc | 10 ++ cpp/src/arrow/device.cc| 14 ++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 9b165a10a6..119249da99 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1543,6 +1543,8 @@ struct ArrayImporter { if (recursion_level_ >= kMaxImportRecursionLevel) { return Status::Invalid("Recursion level in ArrowArray struct exceeded"); } +device_type_ = parent->device_type_; +memory_mgr_ = parent->memory_mgr_; // Child buffers will keep the entire parent import alive. // Perhaps we can move the child structs to an owned area // when the parent ImportedArrayData::Release() gets called, @@ -1857,10 +1859,25 @@ struct ArrayImporter { template Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id, int64_t byte_width = 1) { -auto offsets = data_->GetValues(offsets_buffer_id); +if (device_type_ == DeviceAllocationType::kCPU) { + auto offsets = data_->GetValues(offsets_buffer_id); + // Compute visible size of buffer + int64_t buffer_size = + (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0; + return ImportBuffer(buffer_id, buffer_size); +} + +// we only need the value of the last offset so let's just copy that +// one value from device to host. +auto single_value_buf = +SliceBuffer(data_->buffers[offsets_buffer_id], +c_struct_->length * sizeof(OffsetType), sizeof(OffsetType)); +ARROW_ASSIGN_OR_RAISE( +auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager())); +auto offsets = cpubuf->data_as(); // Compute visible size of buffer -int64_t buffer_size = -(c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0; +int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0; + return ImportBuffer(buffer_id, buffer_size); } diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 8b67027454..b8d5e0fcd3 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -4320,6 +4320,16 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) { TestWithJSON(mm, int32(), "[4, 5, null]"); } +TEST_F(TestDeviceArrayRoundtrip, Struct) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + auto type = struct_({field("ints", int16()), field("strs", utf8())}); + + TestWithJSON(mm, type, "[]"); + TestWithJSON(mm, type, R"([[4, "foo"], [5, "bar"]])"); + TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])"); +} + // Array stream export tests diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 616f89aae8..3736a4e018 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -195,6 +195,13 @@ Result> CPUMemoryManager::ViewBufferFrom( if (!from->is_cpu()) { return nullptr; } + // in this case the memory manager we're coming from is visible on the CPU, + // but uses
(arrow) branch main updated (5856421e31 -> 85e2a684b7)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 5856421e31 GH-39921: [Go][Parquet] ColumnWriter not reset TotalCompressedBytes after Flush (#39922) add 85e2a684b7 GH-39925: [Go][Parquet] Fix re-slicing in maybeReplaceValidity function (#39926) No new revisions were added by this update. Summary of changes: go/parquet/file/column_writer.go | 5 - go/parquet/file/column_writer_test.go | 38 +++ 2 files changed, 42 insertions(+), 1 deletion(-)
(arrow) branch main updated: GH-39921: [Go][Parquet] ColumnWriter not reset TotalCompressedBytes after Flush (#39922)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 5856421e31 GH-39921: [Go][Parquet] ColumnWriter not reset TotalCompressedBytes after Flush (#39922) 5856421e31 is described below commit 5856421e31b163104570d0305cb79f323cf488a6 Author: mwish AuthorDate: Mon Feb 5 23:14:48 2024 +0800 GH-39921: [Go][Parquet] ColumnWriter not reset TotalCompressedBytes after Flush (#39922) ### Rationale for this change See https://github.com/apache/arrow/issues/39921 ### What changes are included in this PR? Not clearing `totalCompressedBytes` when flush called ### Are these changes tested? Yes ### Are there any user-facing changes? Yes, it's a bugfix * Closes: #39921 Authored-by: mwish Signed-off-by: Matt Topol --- go/parquet/file/column_writer.go | 5 +++-- go/parquet/file/column_writer_test.go | 28 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go index ac857d17e6..36663b10b8 100755 --- a/go/parquet/file/column_writer.go +++ b/go/parquet/file/column_writer.go @@ -397,7 +397,6 @@ func (w *columnWriter) FlushBufferedDataPages() (err error) { } } w.pages = w.pages[:0] - w.totalCompressedBytes = 0 return } @@ -542,7 +541,9 @@ func (w *columnWriter) Close() (err error) { if !w.closed { w.closed = true if w.hasDict && !w.fallbackToNonDict { - w.WriteDictionaryPage() + if err = w.WriteDictionaryPage(); err != nil { + return err + } } if err = w.FlushBufferedDataPages(); err != nil { diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index 8011ac2487..321e7b730d 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -426,6 +426,26 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque } } +func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) { + p.GenerateData(SmallSize) + props := parquet.DefaultColumnProperties() + props.DictionaryEnabled = true + + if version == parquet.V1_0 { + props.Encoding = parquet.Encodings.PlainDict + } else { + props.Encoding = parquet.Encodings.RLEDict + } + + writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version)) + p.WriteBatchValues(writer, nil, nil) + writer.FallbackToPlain() + p.NotEqual(0, writer.TotalCompressedBytes()) + writer.Close() + p.NotEqual(0, writer.TotalCompressedBytes()) + p.NotEqual(0, writer.TotalBytesWritten()) +} + func (p *PrimitiveWriterTestSuite) TestRequiredPlain() { p.testRequiredWithEncoding(parquet.Encodings.Plain) } @@ -575,6 +595,14 @@ func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackEncodingV2() { p.testDictionaryFallbackEncoding(parquet.V2_LATEST) } +func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV1() { + p.testDictionaryFallbackAndCompressedSize(parquet.V1_0) +} + +func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV2() { + p.testDictionaryFallbackAndCompressedSize(parquet.V2_LATEST) +} + func (p *PrimitiveWriterTestSuite) TestOptionalNullValueChunk() { // test case for NULL values p.SetupSchema(parquet.Repetitions.Optional, 1)
(arrow) branch main updated: GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo memory types (#39772)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 87b515e920 GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo memory types (#39772) 87b515e920 is described below commit 87b515e9207509aa3f77e3e1c0122be314a77e6d Author: Matt Topol AuthorDate: Thu Feb 1 11:48:29 2024 -0500 GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo memory types (#39772) ### Rationale for this change Right now our MemoryManager interfaces operate solely at the buffer level and we do not provide any higher level facilities to copy an entire array or record batch between memory types. We should implement CopyArrayTo and CopyBatchTo functions which recursively utilize the buffer level copying to create a new Array/RecordBatch whose buffers have been copied to the destination memory manager. ### What changes are included in this PR? Exposing a `CopyArrayTo` and `CopyBatchTo` function for copying entire Array or RecordBatches between memory types. ### Are these changes tested? Tests are still being written but will be added. * Closes: #39771 Authored-by: Matt Topol Signed-off-by: Matt Topol --- cpp/src/arrow/array/array_base.cc| 12 +++ cpp/src/arrow/array/array_base.h | 16 +++ cpp/src/arrow/array/data.cc | 39 cpp/src/arrow/array/data.h | 19 +++--- cpp/src/arrow/buffer.h | 2 +- cpp/src/arrow/c/bridge.cc| 2 +- cpp/src/arrow/c/bridge_test.cc | 4 +++- cpp/src/arrow/device.cc | 2 ++ cpp/src/arrow/gpu/cuda_context.cc| 5 + cpp/src/arrow/ipc/read_write_test.cc | 27 - cpp/src/arrow/record_batch.cc| 24 ++ cpp/src/arrow/record_batch.h | 19 ++ 12 files changed, 142 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc index b483ec420c..6927f51283 100644 --- a/cpp/src/arrow/array/array_base.cc +++ b/cpp/src/arrow/array/array_base.cc @@ -307,6 +307,18 @@ Result> Array::View( return MakeArray(result); } +Result> Array::CopyTo( +const std::shared_ptr& to) const { + ARROW_ASSIGN_OR_RAISE(auto copied_data, data()->CopyTo(to)); + return MakeArray(copied_data); +} + +Result> Array::ViewOrCopyTo( +const std::shared_ptr& to) const { + ARROW_ASSIGN_OR_RAISE(auto new_data, data()->ViewOrCopyTo(to)); + return MakeArray(new_data); +} + // -- // NullArray diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 7e857bf205..6411aebf80 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -165,6 +165,22 @@ class ARROW_EXPORT Array { /// An error is returned if the types are not layout-compatible. Result> View(const std::shared_ptr& type) const; + /// \brief Construct a copy of the array with all buffers on destination + /// Memory Manager + /// + /// This method recursively copies the array's buffers and those of its children + /// onto the destination MemoryManager device and returns the new Array. + Result> CopyTo(const std::shared_ptr& to) const; + + /// \brief Construct a new array attempting to zero-copy view if possible. + /// + /// Like CopyTo this method recursively goes through all of the array's buffers + /// and those of it's children and first attempts to create zero-copy + /// views on the destination MemoryManager device. If it can't, it falls back + /// to performing a copy. See Buffer::ViewOrCopy. + Result> ViewOrCopyTo( + const std::shared_ptr& to) const; + /// Construct a zero-copy slice of the array with the indicated offset and /// length /// diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 8454ac8f1d..80c411dfa6 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -27,6 +27,7 @@ #include "arrow/array/util.h" #include "arrow/buffer.h" +#include "arrow/device.h" #include "arrow/scalar.h" #include "arrow/status.h" #include "arrow/type.h" @@ -36,6 +37,7 @@ #include "arrow/util/dict_util.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/range.h" #include "arrow/util/ree_util.h" #include "arrow/util/slice_util_internal.h" #include "arrow/util/union_util.h" @@ -140,6 +142,43 @@ std::shared_ptr ArrayData::Make(std::shared_ptr type, int64 return std::make_shared(std::move(type
(arrow) branch main updated: GH-39837: [Go][Flight] Allow cloning existing cookies in middleware (#39838)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new c2ca9bcede GH-39837: [Go][Flight] Allow cloning existing cookies in middleware (#39838) c2ca9bcede is described below commit c2ca9bcedeb004f9d7f5d3e1aafc7b83ce6c1e3f Author: Matt Topol AuthorDate: Mon Jan 29 15:39:10 2024 -0500 GH-39837: [Go][Flight] Allow cloning existing cookies in middleware (#39838) ### Rationale for this change This is needed for https://github.com/apache/arrow-adbc/issues/1194 to facilitate better connection handling for flight clients in ADBC by copying the existing cookies over when creating a sub-client. ### What changes are included in this PR? Creating a `Clone` method on the `CookieMiddleware` so that a user can create and hold a reference to a specific cookie middleware instance and then create new ones on the fly that copy over the existing cookies at that moment. ### Are these changes tested? Yes. ### Are there any user-facing changes? No * Closes: #39837 Authored-by: Matt Topol Signed-off-by: Matt Topol --- go/arrow/flight/cookie_middleware.go | 24 + go/arrow/flight/cookie_middleware_test.go | 60 +++ 2 files changed, 84 insertions(+) diff --git a/go/arrow/flight/cookie_middleware.go b/go/arrow/flight/cookie_middleware.go index 27754a13b8..39c86d8303 100644 --- a/go/arrow/flight/cookie_middleware.go +++ b/go/arrow/flight/cookie_middleware.go @@ -23,6 +23,7 @@ import ( "sync" "time" + "golang.org/x/exp/maps" "google.golang.org/grpc/metadata" ) @@ -40,11 +41,34 @@ func NewClientCookieMiddleware() ClientMiddleware { return CreateClientMiddleware({jar: make(map[string]http.Cookie)}) } +func NewCookieMiddleware() CookieMiddleware { + return {jar: make(map[string]http.Cookie)} +} + +// CookieMiddleware is a go-routine safe middleware for flight clients +// which properly handles Set-Cookie headers for storing cookies. +// This can be passed into `CreateClientMiddleware` to create a new +// middleware object. You can also clone it to create middleware for a +// new client which starts with the same cookies. +type CookieMiddleware interface { + CustomClientMiddleware + // Clone creates a new CookieMiddleware that starts out with the same + // cookies that this one already has. This is useful when creating a + // new client connection for the same server. + Clone() CookieMiddleware +} + type clientCookieMiddleware struct { jar map[string]http.Cookie mx sync.Mutex } +func (cc *clientCookieMiddleware) Clone() CookieMiddleware { + cc.mx.Lock() + defer cc.mx.Unlock() + return {jar: maps.Clone(cc.jar)} +} + func (cc *clientCookieMiddleware) StartCall(ctx context.Context) context.Context { cc.mx.Lock() defer cc.mx.Unlock() diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go index 0adf492765..4007d056b2 100644 --- a/go/arrow/flight/cookie_middleware_test.go +++ b/go/arrow/flight/cookie_middleware_test.go @@ -239,3 +239,63 @@ func TestCookieExpiration(t *testing.T) { cookieMiddleware.expectedCookies = map[string]string{} makeReq(client, t) } + +func TestCookiesClone(t *testing.T) { + cookieMiddleware := {} + + s := flight.NewServerWithMiddleware([]flight.ServerMiddleware{ + flight.CreateServerMiddleware(cookieMiddleware), + }) + s.Init("localhost:0") + f := {} + s.RegisterFlightService(f) + + go s.Serve() + defer s.Shutdown() + + makeReq := func(c flight.Client, t *testing.T) { + flightStream, err := c.ListFlights(context.Background(), {}) + assert.NoError(t, err) + + for { + _, err := flightStream.Recv() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + assert.NoError(t, err) + } + } + } + + credsOpt := grpc.WithTransportCredentials(insecure.NewCredentials()) + cookies := flight.NewCookieMiddleware() + client1, err := flight.NewClientWithMiddleware(s.Addr().String(), nil, + []flight.ClientMiddleware{flight.CreateClientMiddleware(cookies)}, credsOpt) + require.NoError(t, err) + defer client1.Close() + + // set cookies + cookieMiddleware.cookies = []*http.Cookie{ + {Name: "foo", Value: "bar"}, + {Name: &
(arrow-adbc) branch main updated: feat(go/adbc/driver/snowflake): improve bulk ingestion speed (#1456)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new b57e19b6 feat(go/adbc/driver/snowflake): improve bulk ingestion speed (#1456) b57e19b6 is described below commit b57e19b6c3dfdc65230b4cf9e8689cab51e3e89c Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com> AuthorDate: Fri Jan 26 15:01:40 2024 -0500 feat(go/adbc/driver/snowflake): improve bulk ingestion speed (#1456) # What - Replace Snowflake bulk ingestion with Parquet-based approach with higher throughput and better type support - Previously: INSERT bind parameters were uploaded to a CSV-based stage, once per record batch - Now: Parquet files written concurrently to stage independently of record batch size. Parquet logical types are used to infer schema on COPY. - Tests to validate type support and consistency through Arrow -> Parquet -> Snowflake -> Arrow roundtrip - Improved type mapping between Arrow <-> Snowflake timestamps. [TIMESTAMP_LTZ](https://docs.snowflake.com/en/sql-reference/data-types-datetime#timestamp-ltz-timestamp-ntz-timestamp-tz) is more consistent with Arrow timestamp semantics than TIMESTAMP_TZ, which can lead to lossy roundtrips. - Minor bugfix where Snowflake local timestamps with timezone set to UTC were being interpreted as non-local. # Why - Implements #1327, which comes from improvement request #1322 - BindStream ingestion is significantly faster - Arrow type support is improved # Methodology The general approach for ingestion is most clearly demonstrated by the path taken when `stmt.Bind()` for a single record is used: ### IngestRecord ```mermaid flowchart LR A(Record) --> B(Write Parquet) B --> C(Upload File) C --> D(Execute COPY) D --> E(Check Row Count) ``` The Arrow record is written to a Parquet file due to its logical type support, compressibility, and native Snowflake support. The file is then uploaded to a temporary Snowflake stage via PUT query, and then loaded into the target table via COPY query. Once the COPY has finished, one more query to check the resulting row count is dispatched to accurately return the number of rows affected. This is used instead of counting the Arrow rows written in case there are any undetected losses when importing the uploaded file into Snowflake. A similar approach is taken when ingesting an arbitrarily large stream of records via `stmt.BindStream()`, but makes use of several opportunities to parallelize the work involved at different stages: ### IngestStream ```mermaid flowchart LR A(Read Records) --> B(Write Parquet) A --> C(Write Parquet) A --> D(Write Parquet) A --> E(Write Parquet) B --> J(Buffer Pool) C --> J D --> J E --> J J --> K(Upload File) J --> L(Upload File) K --> M(Finalize COPY) L --> M M --> N(Check Row Count) O(File Ready) --> P(Execute COPY) P --> O ``` The same steps are used, but the stream of records is now distributed among a pool of Parquet writers. This step is inherently CPU-bound, so it is desirable for it to scale independently with the availability of logical cores for writing/compression. These Parquet files are written to a buffer pool in memory to help decouple the upload stage from writing, and so that a writer can start working on the next file _while_ the last file it wrote is being uploaded. Uploads from the buffer pool also benefit from parallelism, but more so to maximize network utilization by limiting idle time between uploads and amortizing potential slowdown in any one upload. Technically, only a single COPY command is required after the last file is uploaded in order to load the Parquet files into the Snowflake table. However, on many warehouses this operation takes as long or even longer than the upload itself but can be made faster by paying for a larger warehouse. Given the batched approach taken and that the COPY command is idempotent, we can execute COPY repeatedly as files are uploaded to load them into the table on an ongoing basis. These COPY queries are executed asynchronously and listen for an upload-completed callback to ensure at least one file will be loaded by the query (otherwise it will no-op so this just prevents spamming Snowflake with a bunch of no-op COPYs). Empirically, ingestion works reasonably well on an XS warehouse. COPY speed i
(arrow) branch main updated: GH-39774: [Go] Add public access to PreparedStatement handle (#39775)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new c97e6c46d9 GH-39774: [Go] Add public access to PreparedStatement handle (#39775) c97e6c46d9 is described below commit c97e6c46d969718e850d3fdeb7d77f998cc2342d Author: abandy AuthorDate: Thu Jan 25 10:20:54 2024 -0500 GH-39774: [Go] Add public access to PreparedStatement handle (#39775) * Closes: #39774 Authored-by: Alva Bandy Signed-off-by: Matt Topol --- go/arrow/flight/flightsql/client.go | 3 +++ go/arrow/flight/flightsql/client_test.go | 10 ++ 2 files changed, 13 insertions(+) diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index 928118cf29..441f88f39f 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -1165,6 +1165,9 @@ func (p *PreparedStatement) DatasetSchema() *arrow.Schema { return p.datasetSche // the prepared statement. func (p *PreparedStatement) ParameterSchema() *arrow.Schema { return p.paramSchema } +// The handle associated with this PreparedStatement +func (p *PreparedStatement) Handle() []byte { return p.handle } + // GetSchema re-requests the schema of the result set of the prepared // statement from the server. It should otherwise be identical to DatasetSchema. // diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index a4fb83f984..c8b9f7f124 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -384,6 +384,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() { s.NoError(err) defer prepared.Close(context.TODO(), s.callOpts...) + s.Equal(string(prepared.Handle()), "query") + info, err := prepared.Execute(context.TODO(), s.callOpts...) s.NoError(err) s.Equal(, info) @@ -445,11 +447,15 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() { s.NoError(err) defer prepared.Close(context.TODO(), s.callOpts...) + s.Equal(string(prepared.Handle()), "query") + paramSchema := prepared.ParameterSchema() rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"id": 1}]`)) s.NoError(err) defer rec.Release() + s.Equal(string(prepared.Handle()), "query") + prepared.SetParameters(rec) info, err := prepared.Execute(context.TODO(), s.callOpts...) s.NoError(err) @@ -517,6 +523,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteReaderBinding() { s.NoError(err) defer prepared.Close(context.TODO(), s.callOpts...) + s.Equal(string(prepared.Handle()), "query") + paramSchema := prepared.ParameterSchema() rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"id": 1}]`)) s.NoError(err) @@ -575,6 +583,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementClose() { err = prepared.Close(context.TODO(), s.callOpts...) s.NoError(err) + + s.Equal(string(prepared.Handle()), "query") } func (s *FlightSqlClientSuite) TestExecuteUpdate() {
(arrow-adbc) branch main updated: chore(deps): Update Snowflake dependency (#1474)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new d78ce71d chore(deps): Update Snowflake dependency (#1474) d78ce71d is described below commit d78ce71d4bd2932335d8f943be4817c47de43f81 Author: Matt Topol AuthorDate: Fri Jan 19 13:44:39 2024 -0500 chore(deps): Update Snowflake dependency (#1474) Updating snowflake Go driver and its dependencies, as mentioned in #1454 --- go/adbc/driver/flightsql/flightsql_adbc_test.go | 2 +- go/adbc/go.mod | 73 +- go/adbc/go.sum | 171 +++- 3 files changed, 117 insertions(+), 129 deletions(-) diff --git a/go/adbc/driver/flightsql/flightsql_adbc_test.go b/go/adbc/driver/flightsql/flightsql_adbc_test.go index dc7d207d..2fcc010d 100644 --- a/go/adbc/driver/flightsql/flightsql_adbc_test.go +++ b/go/adbc/driver/flightsql/flightsql_adbc_test.go @@ -260,7 +260,7 @@ func (s *FlightSQLQuirks) GetMetadata(code adbc.InfoCode) interface{} { case adbc.InfoVendorVersion: return "sqlite 3" case adbc.InfoVendorArrowVersion: - return "14.0.0" + return "14.0.2" } return nil diff --git a/go/adbc/go.mod b/go/adbc/go.mod index 69e9eaf5..2eff02d9 100644 --- a/go/adbc/go.mod +++ b/go/adbc/go.mod @@ -20,16 +20,16 @@ module github.com/apache/arrow-adbc/go/adbc go 1.19 require ( - github.com/apache/arrow/go/v14 v14.0.0 + github.com/apache/arrow/go/v14 v14.0.2 github.com/bluele/gcache v0.0.2 github.com/golang/protobuf v1.5.3 github.com/google/uuid v1.3.1 - github.com/snowflakedb/gosnowflake v1.6.22 + github.com/snowflakedb/gosnowflake v1.7.2 github.com/stretchr/testify v1.8.4 github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a - golang.org/x/exp v0.0.0-20231006140011-7918f672742d - golang.org/x/sync v0.4.0 - golang.org/x/tools v0.14.0 + golang.org/x/exp v0.0.0-20240119083558-1b970713d09a + golang.org/x/sync v0.6.0 + golang.org/x/tools v0.17.0 google.golang.org/grpc v1.58.3 google.golang.org/protobuf v1.31.0 ) @@ -37,32 +37,31 @@ require ( require ( github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 // indirect - github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.1 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect - github.com/andybalholm/brotli v1.0.5 // indirect - github.com/apache/arrow/go/v12 v12.0.1 // indirect - github.com/apache/thrift v0.17.0 // indirect - github.com/aws/aws-sdk-go-v2 v1.19.0 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.13.27 // indirect - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.72 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.35 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.29 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.27 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.30 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.29 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.4 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.37.0 // indirect - github.com/aws/smithy-go v1.13.5 // indirect - github.com/danieljoos/wincred v1.2.0 // indirect + github.com/andybalholm/brotli v1.1.0 // indirect + github.com/apache/thrift v0.19.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.24.1 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.16.16 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.13 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 // indirect + github.com/aws/aws-sdk
(arrow-adbc) branch main updated: feat(go/adbc)!: close database explicitly (#1460)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 3aa0d121 feat(go/adbc)!: close database explicitly (#1460) 3aa0d121 is described below commit 3aa0d12169764e2b0afabaf9b1f1f68c2d63aea8 Author: Anton Levakin <39916473+leva...@users.noreply.github.com> AuthorDate: Fri Jan 19 17:09:54 2024 +0100 feat(go/adbc)!: close database explicitly (#1460) Implicit database release behaves inconsistently on different OS, which leads to bugs. BREAKING CHANGE: adds Close to the Database interface. Closes #1306. - Co-authored-by: Matt Topol --- docs/source/driver/duckdb.rst | 1 + docs/source/driver/flight_sql.rst | 1 + docs/source/driver/postgresql.rst | 1 + docs/source/driver/snowflake.rst | 2 + docs/source/driver/sqlite.rst | 1 + go/adbc/adbc.go| 3 ++ go/adbc/driver/driverbase/database.go | 5 ++ go/adbc/driver/driverbase/driver.go| 4 +- .../driver/flightsql/flightsql_adbc_server_test.go | 1 + go/adbc/driver/flightsql/flightsql_adbc_test.go| 9 go/adbc/driver/flightsql/flightsql_database.go | 20 --- go/adbc/driver/flightsql/flightsql_driver.go | 1 + go/adbc/driver/panicdummy/panicdummy_adbc.go | 5 ++ go/adbc/driver/snowflake/connection.go | 6 +-- go/adbc/driver/snowflake/driver.go | 1 + go/adbc/driver/snowflake/driver_test.go| 62 -- go/adbc/driver/snowflake/snowflake_database.go | 4 ++ go/adbc/drivermgr/wrapper.go | 46 ++-- go/adbc/drivermgr/wrapper_sqlite_test.go | 5 ++ go/adbc/pkg/_tmpl/driver.go.tmpl | 11 ++-- go/adbc/pkg/flightsql/driver.go| 11 ++-- go/adbc/pkg/panicdummy/driver.go | 1 + go/adbc/pkg/snowflake/driver.go| 11 ++-- go/adbc/validation/validation.go | 3 ++ 24 files changed, 148 insertions(+), 67 deletions(-) diff --git a/docs/source/driver/duckdb.rst b/docs/source/driver/duckdb.rst index 410331c3..94460eb5 100644 --- a/docs/source/driver/duckdb.rst +++ b/docs/source/driver/duckdb.rst @@ -72,6 +72,7 @@ ADBC support in DuckDB requires the driver manager. if err != nil { // handle error } +defer db.Close() cnxn, err := db.Open(context.Background()) if err != nil { diff --git a/docs/source/driver/flight_sql.rst b/docs/source/driver/flight_sql.rst index aca95d86..7473a7cb 100644 --- a/docs/source/driver/flight_sql.rst +++ b/docs/source/driver/flight_sql.rst @@ -152,6 +152,7 @@ the :cpp:class:`AdbcDatabase`. if err != nil { // do something with the error } +defer db.Close() cnxn, err := db.Open(context.Background()) if err != nil { diff --git a/docs/source/driver/postgresql.rst b/docs/source/driver/postgresql.rst index ddf9115d..c724a2c1 100644 --- a/docs/source/driver/postgresql.rst +++ b/docs/source/driver/postgresql.rst @@ -124,6 +124,7 @@ the :cpp:class:`AdbcDatabase`. This should be a `connection URI if err != nil { // handle error } +defer db.Close() cnxn, err := db.Open(context.Background()) if err != nil { diff --git a/docs/source/driver/snowflake.rst b/docs/source/driver/snowflake.rst index 04023a62..bf445349 100644 --- a/docs/source/driver/snowflake.rst +++ b/docs/source/driver/snowflake.rst @@ -127,6 +127,7 @@ constructing the :cpp::class:`AdbcDatabase`. if err != nil { // handle error } +defer db.Close() cnxn, err := db.Open(context.Background()) if err != nil { @@ -241,6 +242,7 @@ a listing). if err != nil { // handle error } +defer db.Close() cnxn, err := db.Open(context.Background()) if err != nil { diff --git a/docs/source/driver/sqlite.rst b/docs/source/driver/sqlite.rst index 30e7d32b..96bd7bbd 100644 --- a/docs/source/driver/sqlite.rst +++ b/docs/source/driver/sqlite.rst @@ -140,6 +140,7 @@ shared across all connections. if err != nil { // handle error } +defer db.Close() cnxn, err := db.Open(context.Background()) if err != nil { diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go index 3fb61d69..71a75daf 100644 --- a/go/adbc/adbc.go +++ b/go/adbc/adbc.go @@ -329,6 +329,9 @@ type Driver interface { type Database int
(arrow) branch main updated (858574d0bd -> 55afcf0450)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 858574d0bd GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp Instant/Local Semantics (#39467) add 55afcf0450 GH-39672: [Go] Time to Date32/Date64 conversion issues for non-UTC timezones (#39674) No new revisions were added by this update. Summary of changes: go/arrow/compute/internal/kernels/cast_temporal.go | 8 go/arrow/datatype_fixedwidth.go| 10 -- go/arrow/datatype_fixedwidth_test.go | 10 ++ 3 files changed, 18 insertions(+), 10 deletions(-)
(arrow) branch main updated: GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp Instant/Local Semantics (#39467)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 858574d0bd GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp Instant/Local Semantics (#39467) 858574d0bd is described below commit 858574d0bd1f3ef4157d0446cfb05cef05aac96b Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com> AuthorDate: Thu Jan 18 11:09:50 2024 -0500 GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp Instant/Local Semantics (#39467) ### Rationale for this change Closes: #39466 ### What changes are included in this PR? - Update logic for determining whether an Arrow Timestamp should have `isAdjustedToUTC=true` on conversion to Parquet. - Update conversion from Parquet Timestamp to Arrow Timestamp to align with Parquet Format [backward-compatibilty](https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485) rules. - Refactor Timestamp serialization methods to reduce duplicated code ### Are these changes tested? Yes, - Logical type mapping in existing test updated. - New tests for roundtrip behavior of timestamps with various timezone settings, with/without store_schema enabled. - New test to clarify equality behavior of timestamps with instant semantics, as well as Go-related quirks with timezone-unaware timestamps. ### Are there any user-facing changes? Yes, users of `pqarrow.FileWriter` will produce Parquet files in which the `TIMESTAMP` type is normalized to UTC IFF the Arrow type provided has a timezone specified. This is different from the current Go behavior but aligned that of other implementations. The conversion from Parquet to Arrow has been updated as well to reflect the Parquet format [document](https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485). Rust already [implements](https://github.com/apache/arrow-rs/blob/a61e824abdd7b38ea214828480430ff2a13f2ead/parquet/src/arrow/schema/primitive.rs#L211-L239) the spec as described and #39489 has been reported due to a mismatch in the handling of convertedTypes in C++. * Closes: #39466 Authored-by: Joel Lubinitsky Signed-off-by: Matt Topol --- go/arrow/array/timestamp.go | 11 +++--- go/arrow/array/timestamp_test.go| 49 ++- go/arrow/datatype_fixedwidth.go | 19 +++-- go/parquet/pqarrow/encode_arrow_test.go | 70 + go/parquet/pqarrow/schema.go| 13 +++--- go/parquet/pqarrow/schema_test.go | 6 +-- 6 files changed, 140 insertions(+), 28 deletions(-) diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go index 6ffb43e067..0cc46a127f 100644 --- a/go/arrow/array/timestamp.go +++ b/go/arrow/array/timestamp.go @@ -91,16 +91,15 @@ func (a *Timestamp) ValueStr(i int) string { return NullValueStr } - dt := a.DataType().(*arrow.TimestampType) - z, _ := dt.GetZone() - return a.values[i].ToTime(dt.Unit).In(z).Format("2006-01-02 15:04:05.9Z0700") + toTime, _ := a.DataType().(*arrow.TimestampType).GetToTimeFunc() + return toTime(a.values[i]).Format("2006-01-02 15:04:05.9Z0700") } func (a *Timestamp) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil + if val := a.ValueStr(i); val != NullValueStr { + return val } - return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.9") + return nil } func (a *Timestamp) MarshalJSON() ([]byte, error) { diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go index acbad8b586..c172ad811d 100644 --- a/go/arrow/array/timestamp_test.go +++ b/go/arrow/array/timestamp_test.go @@ -234,7 +234,7 @@ func TestTimestampBuilder_Resize(t *testing.T) { assert.Equal(t, 5, ab.Len()) } -func TestTimestampValueStr(t *testing.T) { +func TestTimestampValueStr(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) @@ -251,3 +251,50 @@ func TestTimestampValueStr(t *testing.T) { assert.Equal(t, "1968-11-30 13:30:45-0700", arr.ValueStr(0)) assert.Equal(t, "2016-02-29 10:42:23-0700", arr.ValueStr(1)) } + +func TestTimestampEquality(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + tsDatatypes := []*arrow.TimestampType{ + {Unit: arrow.Second}, + {Unit: arrow.Secon
(arrow) branch main updated: GH-39552: [Go] inclusion of option to use replacer when creating csv strings with go library (#39576)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new c170af41ba GH-39552: [Go] inclusion of option to use replacer when creating csv strings with go library (#39576) c170af41ba is described below commit c170af41ba0c30b80aa4172da0b3637206368cf2 Author: Jânio AuthorDate: Wed Jan 17 14:00:39 2024 -0300 GH-39552: [Go] inclusion of option to use replacer when creating csv strings with go library (#39576) Rationale for this change Make it possible to remove unwanted characters from strings What changes are included in this PR? Add new function to optionally setup a replacer in csv Writer Write method Are these changes tested? Yes Are there any user-facing changes? Adds an optional methods. * Closes: #39552 Lead-authored-by: Jânio Co-authored-by: janiodev Signed-off-by: Matt Topol --- go/arrow/csv/common.go | 14 ++ go/arrow/csv/transformer.go | 12 ++-- go/arrow/csv/writer.go | 24 +--- go/arrow/csv/writer_test.go | 6 -- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 99dac29f4d..31ca61f323 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -21,6 +21,7 @@ package csv import ( "errors" "fmt" + "strings" "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/memory" @@ -223,6 +224,19 @@ func WithIncludeColumns(cols []string) Option { } } +// WithStringsReplacer receives a replacer to be applied in the string fields +// of the CSV. This is useful to remove unwanted characters from the string. +func WithStringsReplacer(replacer *strings.Replacer) Option { + return func(cfg config) { + switch cfg := cfg.(type) { + case *Writer: + cfg.stringReplacer = replacer.Replace + default: + panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) + } + } +} + func validate(schema *arrow.Schema) { for i, f := range schema.Fields() { switch ft := f.Type.(type) { diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go index 0f0181520b..78b16446d4 100644 --- a/go/arrow/csv/transformer.go +++ b/go/arrow/csv/transformer.go @@ -29,7 +29,7 @@ import ( "github.com/apache/arrow/go/v15/arrow/array" ) -func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []string { +func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string)string) []string { res := make([]string, col.Len()) switch typ.(type) { case *arrow.BooleanType: @@ -144,7 +144,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) [] arr := col.(*array.String) for i := 0; i < arr.Len(); i++ { if arr.IsValid(i) { - res[i] = arr.Value(i) + res[i] = stringsReplacer(arr.Value(i)) } else { res[i] = w.nullValue } @@ -153,7 +153,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) [] arr := col.(*array.LargeString) for i := 0; i < arr.Len(); i++ { if arr.IsValid(i) { - res[i] = arr.Value(i) + res[i] = stringsReplacer(arr.Value(i)) } else { res[i] = w.nullValue } @@ -224,7 +224,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) [] var b bytes.Buffer b.Write([]byte{'{'}) writer := csv.NewWriter() - writer.Write(w.transformColToStringArr(list.DataType(), list)) + writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) writer.Flush() b.Truncate(b.Len() - 1) b.Write([]byte{'}'}) @@ -243,7 +243,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) [] var b bytes.Buffer b.Write([]byte{'{'}) writer := csv.NewWriter() - writer.Write(w.transformColToStr
(arrow) branch main updated: GH-35718: [Go][Parquet] Fix for null-only encoding panic (#39497)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new f0879ed354 GH-35718: [Go][Parquet] Fix for null-only encoding panic (#39497) f0879ed354 is described below commit f0879ed3544bb12ee306eae1fb5c6d924dde02ed Author: MagicBoost <39663879+magicbo...@users.noreply.github.com> AuthorDate: Wed Jan 10 06:02:04 2024 +0800 GH-35718: [Go][Parquet] Fix for null-only encoding panic (#39497) ### Rationale for this change closes: #35718 ### What changes are included in this PR? Fix painc writing with DeltaBinaryPacked or DeltaByteArray when column only has nulls ### Are these changes tested? Yes - add a test writing nulls to columns with DeltaBinaryPacked / DeltaByteArray / DeltaLengthByteArray encodings ### Are there any user-facing changes? No * Closes: #35718 Lead-authored-by: yufanmo Co-authored-by: Matt Topol Signed-off-by: Matt Topol --- go/parquet/internal/encoding/delta_byte_array.go | 10 +++- go/parquet/pqarrow/encode_arrow_test.go | 58 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index 5e5002e34a..18bd12015a 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -40,7 +40,15 @@ type DeltaByteArrayEncoder struct { } func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 { - return enc.prefixEncoder.EstimatedDataEncodedSize() + enc.suffixEncoder.EstimatedDataEncodedSize() + prefixEstimatedSize := int64(0) + if enc.prefixEncoder != nil { + prefixEstimatedSize = enc.prefixEncoder.EstimatedDataEncodedSize() + } + suffixEstimatedSize := int64(0) + if enc.suffixEncoder != nil { + suffixEstimatedSize = enc.suffixEncoder.EstimatedDataEncodedSize() + } + return prefixEstimatedSize + suffixEstimatedSize } func (enc *DeltaByteArrayEncoder) initEncoders() { diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 3a8fef7e5a..75eb965d03 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -473,6 +473,64 @@ func TestWriteEmptyLists(t *testing.T) { require.NoError(t, err) } +func TestWriteAllNullsWithDeltaEncoding(t *testing.T) { + sc := arrow.NewSchema([]arrow.Field{ + {Name: "f1", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, + {Name: "f2", Type: arrow.ListOf(arrow.FixedWidthTypes.Date32)}, + {Name: "f3", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "f4", Type: arrow.ListOf(arrow.BinaryTypes.String)}, + {Name: "f5", Type: arrow.BinaryTypes.LargeString, Nullable: true}, + {Name: "f6", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)}, + {Name: "f7", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + {Name: "f8", Type: arrow.ListOf(arrow.FixedWidthTypes.Date64)}, + {Name: "f9", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "f10", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)}, + {Name: "f11", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, + {Name: "f12", Type: arrow.ListOf(arrow.FixedWidthTypes.Boolean)}, + {Name: "f13", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "f14", Type: arrow.ListOf(arrow.PrimitiveTypes.Float32)}, + }, nil) + bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc) + defer bldr.Release() + for _, b := range bldr.Fields() { + b.AppendNull() + } + + rec := bldr.NewRecord() + defer rec.Release() + + props := parquet.NewWriterProperties( + parquet.WithVersion(parquet.V1_0), + parquet.WithDictionaryDefault(false), + parquet.WithDictionaryFor("f9", true), + parquet.WithDictionaryFor("f10", true), + parquet.WithDictionaryFor("f13", true), + parquet.WithDictionaryFor("f14", true), + parquet.WithEncodingFor("f1", parquet.Encodings.DeltaBinaryPacked), + parquet.WithEncodingFor("f2", parquet.Encodings.DeltaBinaryPacked), + parquet.WithEncodingFor("f3", parquet.Encodings.DeltaByteArra
(arrow) branch main updated: GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to Parquet DATE Logical Type (#39460)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new eade9383fb GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to Parquet DATE Logical Type (#39460) eade9383fb is described below commit eade9383fb237006390c91cc90e52724262f7dd1 Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com> AuthorDate: Tue Jan 9 16:59:02 2024 -0500 GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to Parquet DATE Logical Type (#39460) ### Rationale for this change Closes: #39456 ### What changes are included in this PR? Update physical and logical type mapping from Arrow to Parquet for DATE64 type ### Are these changes tested? Yes, - Update expected schema mapping in existing test - Tests asserting new behavior - Arrow DATE64 will roundtrip -> Parquet -> Arrow as DATE32 - Arrow DATE64 _not aligned_ to exact date boundary will truncate to milliseconds at boundary of greatest full day on Parquet roundtrip ### Are there any user-facing changes? Yes, users of `pqarrow.FileWriter` will produce Parquet files containing `DATE` logical type instead of `TIMESTAMP[ms]` when writing Arrow data containing DATE64 field(s). The proposed implementation truncates `int64` values to be divisible by 8640 rather than validating that this is already the case, as some implementations do. I am happy to add this validation if it would be preferred, but the truncating behavior will likely break fewer existing users. I'm not sure whether this is technically considered a breaking change to a public API and if/how it should be communicated. Any direction regarding this would be appreciated. * Closes: #39456 Authored-by: Joel Lubinitsky Signed-off-by: Matt Topol --- go/parquet/pqarrow/encode_arrow_test.go | 84 + go/parquet/pqarrow/schema.go| 4 +- go/parquet/pqarrow/schema_test.go | 2 +- 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 565fa3b6b2..3a8fef7e5a 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -125,6 +125,52 @@ func makeDateTimeTypesTable(mem memory.Allocator, expected bool, addFieldMeta bo return array.NewTable(arrsc, cols, int64(len(isValid))) } +func makeDateTypeTable(mem memory.Allocator, expected bool, partialDays bool) arrow.Table { + const ( + millisPerHour int64 = 1000 * 60 * 60 + millisPerDay int64 = millisPerHour * 24 + ) + isValid := []bool{true, true, true, false, true, true} + + var field arrow.Field + if expected { + field = arrow.Field{Name: "date", Type: arrow.FixedWidthTypes.Date32, Nullable: true} + } else { + field = arrow.Field{Name: "date", Type: arrow.FixedWidthTypes.Date64, Nullable: true} + } + + field.Metadata = arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"1"}) + + arrsc := arrow.NewSchema([]arrow.Field{field}, nil) + + d32Values := []arrow.Date32{1489269000, 148927, 1489271000, 1489272000, 1489272000, 1489273000} + + d64Values := make([]arrow.Date64, len(d32Values)) + for i := range d64Values { + // Calculate number of milliseconds at date boundary + d64Values[i] = arrow.Date64(int64(d32Values[i]) * millisPerDay) + if partialDays { + // Offset 1 or more hours past the date boundary + hoursIntoDay := int64(i) * millisPerHour + d64Values[i] += arrow.Date64(hoursIntoDay) + } + } + + bldr := array.NewRecordBuilder(mem, arrsc) + defer bldr.Release() + + if expected { + bldr.Field(0).(*array.Date32Builder).AppendValues(d32Values, isValid) + } else { + bldr.Field(0).(*array.Date64Builder).AppendValues(d64Values, isValid) + } + + rec := bldr.NewRecord() + defer rec.Release() + + return array.NewTableFromRecords(arrsc, []arrow.Record{rec}) +} + func TestWriteArrowCols(t *testing.T) { mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) @@ -831,6 +877,44 @@ func (ps *ParquetIOTestSuite) TestDateTimeTypesWithInt96ReadWriteTable() { } } +func (ps *ParquetIOTestSuite) TestDate64ReadWriteTable() { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(ps.T(), 0) + + date64InputTable := makeDateTypeTable(mem, false, false) + defer date64In
(arrow) branch main updated: GH-38988: [Go] Expose dictionary size from DictionaryBuilder (#39521)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 92520c67b4 GH-38988: [Go] Expose dictionary size from DictionaryBuilder (#39521) 92520c67b4 is described below commit 92520c67b4fbeddf5a0c4e829ce2ca0bf54adccd Author: ella-chao AuthorDate: Tue Jan 9 21:25:24 2024 + GH-38988: [Go] Expose dictionary size from DictionaryBuilder (#39521) ### Rationale for this change Details are in https://github.com/apache/arrow/issues/38988 ### What changes are included in this PR? This adds a method to `DictionaryBuilder` that returns the current dictionary size. ### Are these changes tested? Updated an existing test to account for this new method. ### Are there any user-facing changes? Yes, a new method is added to `DictionaryBuilder`. * Closes: #38988 Authored-by: Ella Chao Signed-off-by: Matt Topol --- go/arrow/array/dictionary.go | 5 + go/arrow/array/dictionary_test.go | 2 ++ 2 files changed, 7 insertions(+) diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index 125c02391f..bbde4e4f1e 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -412,6 +412,7 @@ type DictionaryBuilder interface { AppendArray(arrow.Array) error AppendIndices([]int, []bool) ResetFull() + DictionarySize() int } type dictionaryBuilder struct { @@ -1004,6 +1005,10 @@ func (b *dictionaryBuilder) AppendIndices(indices []int, valid []bool) { } } +func (b *dictionaryBuilder) DictionarySize() int { + return b.memoTable.Size() +} + type NullDictionaryBuilder struct { dictionaryBuilder } diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go index 5a3e0e10c2..f32cc9555f 100644 --- a/go/arrow/array/dictionary_test.go +++ b/go/arrow/array/dictionary_test.go @@ -92,6 +92,8 @@ func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderBasic() { p.EqualValues(4, bldr.Len()) p.EqualValues(1, bldr.NullN()) + p.EqualValues(2, bldr.DictionarySize()) + arr := bldr.NewArray().(*array.Dictionary) defer arr.Release()
(arrow) branch main updated: GH-39309: [Go][Parquet] handle nil bitWriter for DeltaBinaryPacked (#39347)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 0aadd5a0e7 GH-39309: [Go][Parquet] handle nil bitWriter for DeltaBinaryPacked (#39347) 0aadd5a0e7 is described below commit 0aadd5a0e7fd58e286f2d0f03b8bdbf99a1d3108 Author: Matt Topol AuthorDate: Mon Jan 8 15:28:15 2024 -0500 GH-39309: [Go][Parquet] handle nil bitWriter for DeltaBinaryPacked (#39347) ### Rationale for this change If using the DeltaBinaryPacked encoding, we end up with a nil pointer dereference if we end up with an empty column. ### What changes are included in this PR? Add a nil check in `EstimatedDataEncodedSize` for the base `deltaBitPackEncoder`. This should only ever occur if we have an empty column with this encoding when closing a row group. ### Are these changes tested? Yes a unit test was added to verify the fix. * Closes: #39309 Authored-by: Matt Topol Signed-off-by: Matt Topol --- go/parquet/internal/encoding/delta_bit_packing.go | 4 +++ go/parquet/pqarrow/encode_arrow_test.go | 37 +++ 2 files changed, 41 insertions(+) diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index 560b77f4c6..6ac799f1c1 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -466,6 +466,10 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) { // EstimatedDataEncodedSize returns the current amount of data actually flushed out and written func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 { + if enc.bitWriter == nil { + return 0 + } + return int64(enc.bitWriter.Written()) } diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 95ea644dd8..565fa3b6b2 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -1983,3 +1983,40 @@ func TestWriteTableMemoryAllocation(t *testing.T) { require.Zero(t, mem.CurrentAlloc()) } + +func TestEmptyListDeltaBinaryPacked(t *testing.T) { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "ts", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint64), + Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})}}, nil) + builder := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer builder.Release() + + listBuilder := builder.Field(0).(*array.ListBuilder) + listBuilder.Append(true) + arrowRec := builder.NewRecord() + defer arrowRec.Release() + + var buf bytes.Buffer + wr, err := pqarrow.NewFileWriter(schema, , + parquet.NewWriterProperties( + parquet.WithDictionaryFor("ts.list.element", false), + parquet.WithEncodingFor("ts.list.element", parquet.Encodings.DeltaBinaryPacked)), + pqarrow.DefaultWriterProps()) + require.NoError(t, err) + + require.NoError(t, wr.WriteBuffered(arrowRec)) + require.NoError(t, wr.Close()) + + rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + reader, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator) + require.NoError(t, err) + defer rdr.Close() + + tbl, err := reader.ReadTable(context.Background()) + require.NoError(t, err) + defer tbl.Release() + + assert.True(t, schema.Equal(tbl.Schema())) + assert.EqualValues(t, 1, tbl.NumRows()) +}
(arrow) branch main updated: GH-38458: [Go] Add ValueLen to BinaryLike interface (#39242)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 213cadbbc0 GH-38458: [Go] Add ValueLen to BinaryLike interface (#39242) 213cadbbc0 is described below commit 213cadbbc080399b372291f9305fe0e67de1 Author: Matt Topol AuthorDate: Wed Jan 3 11:29:15 2024 -0500 GH-38458: [Go] Add ValueLen to BinaryLike interface (#39242) ### Rationale for this change Adding `ValueLen` to the `BinaryLike` interface for easy convenience of determining the length of an individual value for a Binary/String like array. ### Are these changes tested? yes * Closes: #38458 Authored-by: Matt Topol Signed-off-by: Matt Topol --- go/arrow/array/binary.go | 9 + go/arrow/array/string.go | 17 + 2 files changed, 26 insertions(+) diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index c226297da0..9e26de7a6d 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -30,6 +30,7 @@ import ( type BinaryLike interface { arrow.Array + ValueLen(int) int ValueBytes() []byte ValueOffset64(int) int64 } @@ -367,6 +368,11 @@ func (a *BinaryView) Value(i int) []byte { return buf.Bytes()[start : start+int32(s.Len())] } +func (a *BinaryView) ValueLen(i int) int { + s := a.ValueHeader(i) + return s.Len() +} + // ValueString returns the value at index i as a string instead of // a byte slice, without copying the underlying data. func (a *BinaryView) ValueString(i int) string { @@ -441,4 +447,7 @@ var ( _ arrow.Array = (*Binary)(nil) _ arrow.Array = (*LargeBinary)(nil) _ arrow.Array = (*BinaryView)(nil) + + _ BinaryLike = (*Binary)(nil) + _ BinaryLike = (*LargeBinary)(nil) ) diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go index 90a4628f0d..c8517ba305 100644 --- a/go/arrow/array/string.go +++ b/go/arrow/array/string.go @@ -31,6 +31,7 @@ import ( type StringLike interface { arrow.Array Value(int) string + ValueLen(int) int } // String represents an immutable sequence of variable-length UTF-8 strings. @@ -225,6 +226,14 @@ func (a *LargeString) ValueOffset64(i int) int64 { return a.ValueOffset(i) } +func (a *LargeString) ValueLen(i int) int { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + beg := a.array.data.offset + i + return int(a.offsets[beg+1] - a.offsets[beg]) +} + func (a *LargeString) ValueOffsets() []int64 { beg := a.array.data.offset end := beg + a.array.data.length + 1 @@ -364,6 +373,11 @@ func (a *StringView) Value(i int) string { return *(*string)(unsafe.Pointer()) } +func (a *StringView) ValueLen(i int) int { + s := a.ValueHeader(i) + return s.Len() +} + func (a *StringView) String() string { var o strings.Builder o.WriteString("[") @@ -698,4 +712,7 @@ var ( _ StringLikeBuilder = (*StringBuilder)(nil) _ StringLikeBuilder = (*LargeStringBuilder)(nil) _ StringLikeBuilder = (*StringViewBuilder)(nil) + _ StringLike= (*String)(nil) + _ StringLike= (*LargeString)(nil) + _ StringLike= (*StringView)(nil) )
(arrow) branch main updated: GH-39238:[Go] PATCH Prevents empty record to be appended to empty resultset (#39239)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 75c6b642b5 GH-39238:[Go] PATCH Prevents empty record to be appended to empty resultset (#39239) 75c6b642b5 is described below commit 75c6b642b5ff1ed171bc1d1a758a70098539c48e Author: Miguel Pragier AuthorDate: Fri Dec 15 20:03:18 2023 +0100 GH-39238:[Go] PATCH Prevents empty record to be appended to empty resultset (#39239) ### Rationale for this change When having an empty resultset, the driver tries to include an empty record referece, that cannot be scanned. So, any operation that relies on the returned Row(s) will trigger a "Index out of Range" error. ### What changes are included in this PR? We're preventing to include an invalid record (that can't be scanned) in an empty resultset ### Are these changes tested? Yes, there's a new test included ### Are there any user-facing changes? No **This PR contains a "Critical Fix".** * Closes: #39238 Authored-by: miguel pragier Signed-off-by: Matt Topol --- go/arrow/flight/flightsql/driver/driver.go | 7 ++-- go/arrow/flight/flightsql/driver/driver_test.go | 44 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go index e31e572586..f74bfa378a 100644 --- a/go/arrow/flight/flightsql/driver/driver.go +++ b/go/arrow/flight/flightsql/driver/driver.go @@ -487,9 +487,10 @@ func readEndpoint(ctx context.Context, client *flightsql.Client, endpoint *fligh schema := reader.Schema() var records []arrow.Record for reader.Next() { - record := reader.Record() - record.Retain() - records = append(records, record) + if record := reader.Record(); record.NumRows() > 0 { + record.Retain() + records = append(records, record) + } } if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) { diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go index a388bf155e..24eb5ee681 100644 --- a/go/arrow/flight/flightsql/driver/driver_test.go +++ b/go/arrow/flight/flightsql/driver/driver_test.go @@ -273,6 +273,50 @@ func (s *SqlTestSuite) TestQuery() { wg.Wait() } +func (s *SqlTestSuite) TestQueryWithEmptyResultset() { + t := s.T() + + // Create and start the server + server, addr, err := s.createServer() + require.NoError(t, err) + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + require.NoError(s.T(), s.startServer(server)) + }() + defer s.stopServer(server) + time.Sleep(100 * time.Millisecond) + + // Configure client + cfg := s.Config + cfg.Address = addr + db, err := sql.Open("flightsql", cfg.DSN()) + require.NoError(t, err) + defer db.Close() + + // Create the table + _, err = db.Exec(fmt.Sprintf(s.Statements["create table"], s.TableName)) + require.NoError(t, err) + + rows, err := db.Query(fmt.Sprintf(s.Statements["query"], s.TableName)) + require.NoError(t, err) + require.False(t, rows.Next()) + + row := db.QueryRow(fmt.Sprintf(s.Statements["query"], s.TableName)) + require.NotNil(t, row) + require.NoError(t, row.Err()) + + target := make(map[string]any) + err = row.Scan() + require.ErrorIs(t, err, sql.ErrNoRows) + + // Tear-down server + s.stopServer(server) + wg.Wait() +} + func (s *SqlTestSuite) TestPreparedQuery() { t := s.T()
(arrow) branch main updated: GH-38506: [Go][Parquet] Add NumRows and RowGroupNumRows to pqarrow.FileWriter (#38507)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 140ae018f3 GH-38506: [Go][Parquet] Add NumRows and RowGroupNumRows to pqarrow.FileWriter (#38507) 140ae018f3 is described below commit 140ae018f372ee14c9ff19f3e4c2af1b1a579f49 Author: Tim Schaub AuthorDate: Fri Dec 8 20:06:32 2023 +0100 GH-38506: [Go][Parquet] Add NumRows and RowGroupNumRows to pqarrow.FileWriter (#38507) ### Rationale for this change When using a chunked column reader to read from one Parquet file and a chunked column writer to write to another Parquet file, it can be useful to keep track of the number of rows written. ### What changes are included in this PR? This branch adds a new `RowGroupNumRows` method to the `pqarrow.FileWriter`. This is somewhat similar to the existing `RowGroupTotalBytesWritten` function. ### Are these changes tested? A new `file_writer_test.go` file is added that adds a test for the new method. ### Are there any user-facing changes? The new method is exported and documented. * Closes: #38506 Authored-by: Tim Schaub Signed-off-by: Matt Topol --- go/parquet/pqarrow/file_writer.go | 17 +++ go/parquet/pqarrow/file_writer_test.go | 89 ++ 2 files changed, 106 insertions(+) diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go index bc484ba243..1164cd690c 100644 --- a/go/parquet/pqarrow/file_writer.go +++ b/go/parquet/pqarrow/file_writer.go @@ -134,6 +134,23 @@ func (fw *FileWriter) RowGroupTotalBytesWritten() int64 { return 0 } +// RowGroupNumRows returns the number of rows written to the current row group. +// Returns an error if they are unequal between columns that have been written so far. +func (fw *FileWriter) RowGroupNumRows() (int, error) { + if fw.rgw != nil { + return fw.rgw.NumRows() + } + return 0, nil +} + +// NumRows returns the total number of rows that have been written so far. +func (fw *FileWriter) NumRows() int { + if fw.wr != nil { + return fw.wr.NumRows() + } + return 0 +} + // WriteBuffered will either append to an existing row group or create a new one // based on the record length and max row group length. // diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go new file mode 100644 index 00..0b76733a62 --- /dev/null +++ b/go/parquet/pqarrow/file_writer_test.go @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pqarrow_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/apache/arrow/go/v15/arrow" + "github.com/apache/arrow/go/v15/arrow/array" + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v15/parquet" + "github.com/apache/arrow/go/v15/parquet/pqarrow" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFileWriterRowGroupNumRows(t *testing.T) { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + {Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + }, nil) + + data := `[ + {"one": 1, "two": 2}, + {"one": 1, "two": null}, + {"one": null, "two": 2}, + {"one": null, "two": null} + ]` + record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data)) + require.NoError(t, err) + + output := {} + writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(100)) + writer, err := pqarrow.NewFileWriter(sc
(arrow-adbc) branch main updated: fix(go/adbc/sqldriver): Fix nil pointer panics for query parameters (#1342)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git The following commit(s) were added to refs/heads/main by this push: new 491ab8d4 fix(go/adbc/sqldriver): Fix nil pointer panics for query parameters (#1342) 491ab8d4 is described below commit 491ab8d4638391e5b60c32f1e29c5fe3bacbd0f5 Author: William AuthorDate: Tue Dec 5 20:20:34 2023 +0100 fix(go/adbc/sqldriver): Fix nil pointer panics for query parameters (#1342) Thought I would contribute some fixes I've been using locally for the issues described in #1341 I have no previous experience with this repository or with the Arrow memory model so I would say it's likely I've gotten something wrong. Feel free to ask me to improve on my contributions or merely take them as inspiration for some other fix. Resolves #1341 --- go/adbc/sqldriver/driver.go| 11 +++- go/adbc/sqldriver/driver_internals_test.go | 95 ++ go/adbc/sqldriver/driver_test.go | 6 +- 3 files changed, 106 insertions(+), 6 deletions(-) diff --git a/go/adbc/sqldriver/driver.go b/go/adbc/sqldriver/driver.go index 4b83495f..775f3f78 100644 --- a/go/adbc/sqldriver/driver.go +++ b/go/adbc/sqldriver/driver.go @@ -22,6 +22,7 @@ import ( "database/sql" "database/sql/driver" "errors" + "fmt" "io" "reflect" "strconv" @@ -463,16 +464,20 @@ func arrFromVal(val any) arrow.Array { case []byte: dt = arrow.BinaryTypes.Binary buffers[1] = memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, int32(len(v))})) - buffers[2] = memory.NewBufferBytes(v) + buffers = append(buffers, memory.NewBufferBytes(v)) case string: dt = arrow.BinaryTypes.String buffers[1] = memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, int32(len(v))})) var buf = *(*[]byte)(unsafe.Pointer()) (*reflect.SliceHeader)(unsafe.Pointer()).Cap = len(v) - buffers[2] = memory.NewBufferBytes(buf) + buffers = append(buffers, memory.NewBufferBytes(buf)) + default: + panic(fmt.Sprintf("unsupported type %T", val)) } for _, b := range buffers { - defer b.Release() + if b != nil { + defer b.Release() + } } data := array.NewData(dt, 1, buffers, nil, 0, 0) defer data.Release() diff --git a/go/adbc/sqldriver/driver_internals_test.go b/go/adbc/sqldriver/driver_internals_test.go index 8e9ce565..9981a40d 100644 --- a/go/adbc/sqldriver/driver_internals_test.go +++ b/go/adbc/sqldriver/driver_internals_test.go @@ -19,6 +19,7 @@ package sqldriver import ( "database/sql/driver" + "encoding/base64" "fmt" "strings" "testing" @@ -273,3 +274,97 @@ func TestNextRowTypes(t *testing.T) { }) } } + +func TestArrFromVal(t *testing.T) { + tests := []struct { + value any + expectedDataTypearrow.DataType + expectedStringValue string + }{ + { + value: true, + expectedDataType:arrow.FixedWidthTypes.Boolean, + expectedStringValue: "true", + }, + { + value: int8(1), + expectedDataType:arrow.PrimitiveTypes.Int8, + expectedStringValue: "1", + }, + { + value: uint8(1), + expectedDataType:arrow.PrimitiveTypes.Uint8, + expectedStringValue: "1", + }, + { + value: int16(1), + expectedDataType:arrow.PrimitiveTypes.Int16, + expectedStringValue: "1", + }, + { + value: uint16(1), + expectedDataType:arrow.PrimitiveTypes.Uint16, + expectedStringValue: "1", + }, + { + value: int32(1), + expectedDataType:arrow.PrimitiveTypes.Int32, + expectedStringValue: "1", + }, + { + value: uint32(1), + expectedDataType:arrow.PrimitiveTypes.
(arrow) branch main updated: GH-38918: [Go] Avoid schema.Fields allocations in some places (#38919)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git The following commit(s) were added to refs/heads/main by this push: new 82be2555ab GH-38918: [Go] Avoid schema.Fields allocations in some places (#38919) 82be2555ab is described below commit 82be2555abc6b06085976548ea3b77ba3f888a35 Author: Alfonso Subiotto Marqués AuthorDate: Tue Nov 28 21:58:56 2023 +0100 GH-38918: [Go] Avoid schema.Fields allocations in some places (#38919) ### Rationale for this change Unnecessary allocations. ### What changes are included in this PR? This PR is split into several commits. The first addresses allocations in the `dictutils` package, the second adds `NumFields` to `NestedType` so that the third commit, which is a purely mechanical change from `len(type.Fields())` to `type.NumFields` to avoid allocations in these specific cases can pass tests with no further changes. The last commit removes some Fields allocations that specifically hurt our project. Note that this is not an all-encompassing change (therefore this PR should probably not close the linked issue). ### Are these changes tested? These changes are implicitly tested by the existing test-suite. No functionality has been changed and they should be invisible to the user. ### Are there any user-facing changes? No. * Addresses: #38918 * Closes: #38918 Authored-by: Alfonso Subiotto Marques Signed-off-by: Matt Topol --- go/arrow/array/concat.go | 2 +- go/arrow/array/record.go | 12 ++-- go/arrow/array/struct.go | 2 +- go/arrow/array/table.go | 8 go/arrow/array/union.go | 4 ++-- go/arrow/array/util.go| 2 +- go/arrow/cdata/cdata_exports.go | 2 +- go/arrow/compute/cast.go | 4 ++-- go/arrow/compute/exec/span.go | 2 +- go/arrow/compute/exprs/builders.go| 2 +- go/arrow/compute/exprs/exec.go| 4 ++-- go/arrow/compute/exprs/types.go | 2 +- go/arrow/compute/fieldref_test.go | 4 ++-- go/arrow/datatype_encoded.go | 2 ++ go/arrow/datatype_extension.go| 7 +++ go/arrow/datatype_nested.go | 16 go/arrow/datatype_nested_test.go | 2 +- go/arrow/flight/flightsql/driver/driver.go| 2 +- go/arrow/flight/flightsql/example/sql_batch_reader.go | 2 +- go/arrow/internal/arrjson/arrjson.go | 8 go/arrow/internal/dictutils/dict.go | 11 --- go/arrow/ipc/file_reader.go | 10 +- go/arrow/ipc/metadata.go | 10 +- go/arrow/scalar/nested.go | 16 go/arrow/scalar/scalar.go | 8 go/arrow/scalar/scalar_test.go| 2 +- go/arrow/schema.go| 6 +++--- go/arrow/schema_test.go | 6 +++--- go/parquet/pqarrow/file_reader.go | 2 +- go/parquet/pqarrow/schema.go | 8 30 files changed, 99 insertions(+), 69 deletions(-) diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go index fa3554c1c0..f0bc2855eb 100644 --- a/go/arrow/array/concat.go +++ b/go/arrow/array/concat.go @@ -695,7 +695,7 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData, } out.childData = []arrow.ArrayData{children} case *arrow.StructType: - out.childData = make([]arrow.ArrayData, len(dt.Fields())) + out.childData = make([]arrow.ArrayData, dt.NumFields()) for i := range dt.Fields() { children := gatherChildren(data, i) for _, c := range children { diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go index d080f726e4..f25e7c9a87 100644 --- a/go/arrow/array/record.go +++ b/go/arrow/array/record.go @@ -185,7 +185,7 @@ func (rec *simpleRecord) validate() error { return nil } - if len(rec.arrs) != len(rec.schema.Fields()) { + if len(rec.arrs) != rec.schema.NumFields() { return fmt.Errorf("arrow/array: number of columns/fields mismatch") } @@ -285,11 +285,11 @@ func NewRecordBuilder(mem memory.Allocator, schema *arrow.Schema) *RecordBuilder refCount: 1,
(arrow) branch main updated (5ab60eaea3 -> b0e1f748f5)
This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git from 5ab60eaea3 GH-36760: [Go] Add Avro OCF reader (#37115) add b0e1f748f5 GH-38728: [Go] ipc: put lz4 decompression buffers back into sync.Pool (#38729) No new revisions were added by this update. Summary of changes: go/arrow/ipc/compression.go | 4 +- go/arrow/ipc/file_reader.go | 1 + go/arrow/ipc/reader_test.go | 90 + 3 files changed, 94 insertions(+), 1 deletion(-)