from:"zeroshade"

(arrow) branch main updated (a44b5372c3 -> ac1eadb5e0)

2024-06-04 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from a44b5372c3 GH-41493: [C++][S3] Add a new option to check existence 
before CreateDir (#41822)
 add ac1eadb5e0 GH-40494: [Go] add support for protobuf messages (#40496)

No new revisions were added by this update.

Summary of changes:
 dev/release/rat_exclude_files.txt  |   1 +
 go/arrow/datatype_nested.go|   2 +-
 .../array => go/arrow/util/messages}/README.md |   7 +-
 go/arrow/util/messages/types.proto |  56 ++
 go/arrow/util/protobuf_reflect.go  | 865 +
 go/arrow/util/protobuf_reflect_test.go | 311 
 go/arrow/util/util_message/types.pb.go | 539 +
 go/go.mod  |   2 +
 go/go.sum  |   2 +
 9 files changed, 1783 insertions(+), 2 deletions(-)
 copy {cpp/src/arrow/array => go/arrow/util/messages}/README.md (87%)
 create mode 100644 go/arrow/util/messages/types.proto
 create mode 100644 go/arrow/util/protobuf_reflect.go
 create mode 100644 go/arrow/util/protobuf_reflect_test.go
 create mode 100644 go/arrow/util/util_message/types.pb.go

(arrow) branch main updated (54bece3d4c -> 99014abd19)

2024-06-03 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 54bece3d4c GH-41648: [Java] Memory Leak about splitAndTransfer (#41898)
 add 99014abd19 GH-41887: [Go] Run linter via pre-commit (#41888)

No new revisions were added by this update.

Summary of changes:
 .gitignore | 5 +
 ci/conda_env_archery.txt => .golangci.yaml |35 +-
 .pre-commit-config.yaml|14 +
 go/arrow/array/bufferbuilder_numeric_test.go   | 2 +-
 go/arrow/array/numeric.gen.go  | 2 +-
 go/arrow/array/numeric_test.go |12 +-
 go/arrow/array/numericbuilder.gen_test.go  | 8 +-
 go/arrow/array/record_test.go  | 2 +-
 go/arrow/datatype_nested_test.go   |16 +-
 go/arrow/flight/gen/flight/Flight.pb.go| 5 +-
 go/arrow/flight/gen/flight/FlightSql.pb.go | 5 +-
 go/arrow/flight/gen/flight/Flight_grpc.pb.go   | 1 +
 go/arrow/float16/float16.go| 2 +-
 go/arrow/gen-flatbuffers.go| 1 +
 go/arrow/internal/debug/assert_off.go  | 1 +
 go/arrow/internal/debug/assert_on.go   | 1 +
 go/arrow/internal/debug/doc.go | 6 +-
 go/arrow/internal/debug/log_off.go | 1 +
 go/arrow/internal/debug/log_on.go  | 1 +
 go/arrow/internal/debug/util.go| 1 +
 go/arrow/internal/flatbuf/Binary.go| 2 +-
 go/arrow/internal/flatbuf/BinaryView.go|14 +-
 go/arrow/internal/flatbuf/Block.go |19 +-
 go/arrow/internal/flatbuf/BodyCompression.go   |18 +-
 go/arrow/internal/flatbuf/BodyCompressionMethod.go | 6 +-
 go/arrow/internal/flatbuf/Buffer.go|34 +-
 go/arrow/internal/flatbuf/Date.go  |12 +-
 go/arrow/internal/flatbuf/Decimal.go   |24 +-
 go/arrow/internal/flatbuf/DictionaryBatch.go   |24 +-
 go/arrow/internal/flatbuf/DictionaryEncoding.go|48 +-
 go/arrow/internal/flatbuf/DictionaryKind.go|10 +-
 go/arrow/internal/flatbuf/Endianness.go| 4 +-
 go/arrow/internal/flatbuf/Feature.go   |38 +-
 go/arrow/internal/flatbuf/Field.go |34 +-
 go/arrow/internal/flatbuf/FieldNode.go |40 +-
 go/arrow/internal/flatbuf/FixedSizeBinary.go   | 4 +-
 go/arrow/internal/flatbuf/FixedSizeList.go | 4 +-
 go/arrow/internal/flatbuf/Footer.go|10 +-
 go/arrow/internal/flatbuf/KeyValue.go  | 6 +-
 go/arrow/internal/flatbuf/LargeBinary.go   | 4 +-
 go/arrow/internal/flatbuf/LargeList.go | 4 +-
 go/arrow/internal/flatbuf/LargeListView.go | 4 +-
 go/arrow/internal/flatbuf/LargeUtf8.go | 4 +-
 go/arrow/internal/flatbuf/ListView.go  | 6 +-
 go/arrow/internal/flatbuf/Map.go   |54 +-
 go/arrow/internal/flatbuf/MessageHeader.go |16 +-
 go/arrow/internal/flatbuf/Null.go  | 2 +-
 go/arrow/internal/flatbuf/RecordBatch.go   |   102 +-
 go/arrow/internal/flatbuf/RunEndEncoded.go |10 +-
 go/arrow/internal/flatbuf/Schema.go|20 +-
 go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go  |   134 +-
 go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go  |   142 +-
 go/arrow/internal/flatbuf/SparseTensor.go  |28 +-
 go/arrow/internal/flatbuf/SparseTensorIndexCOO.go  |   100 +-
 go/arrow/internal/flatbuf/SparseTensorIndexCSF.go  |   254 +-
 go/arrow/internal/flatbuf/Struct_.go   | 6 +-
 go/arrow/internal/flatbuf/Tensor.go|24 +-
 go/arrow/internal/flatbuf/TensorDim.go |14 +-
 go/arrow/internal/flatbuf/Time.go  |28 +-
 go/arrow/internal/flatbuf/Timestamp.go |   250 +-
 go/arrow/internal/flatbuf/Type.go  | 6 +-
 go/arrow/internal/flatbuf/Union.go | 8 +-
 go/arrow/internal/flatbuf/Utf8.go  | 2 +-
 go/arrow/internal/flatbuf/Utf8View.go  |14 +-
 go/arrow/internal/flight_integration/scenario.go   | 2 +-
 go/arrow/ipc/cmd/arrow-cat/main.go |66 +-
 go/arrow/ipc/cmd/arrow-ls/main.go  |62 +-
 go/arrow/math/math_amd64.go| 1 +
 go/arrow/math/math_arm64.go| 5 +-
 go/arrow/math/math_noasm.go| 1 +
 go/arrow/math/math_ppc64le.go  | 1 +
 go/arrow/math/math_s390x.go| 1 +
 go/arrow/memory/cgo_allocator.go   | 4 +-
 go/arrow/mem

(arrow) branch main updated (8f3bf67cca -> 235608beb6)

2024-05-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 8f3bf67cca GH-41841: [R][CI] Remove more defunct rhub containers 
(#41828)
 add 235608beb6 MINOR: [C++] Slight improvement for ArrayData device_type 
(#41814)

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/array/data.cc | 12 
 cpp/src/arrow/array/data.h  |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

(arrow-adbc) branch main updated: chore(go/adbc): bump arrow from v16 to v17 in template (#1880)

2024-05-22 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 179d02885 chore(go/adbc): bump arrow from v16 to v17 in template 
(#1880)
179d02885 is described below

commit 179d02885548895297fe5e1adda8c835fbfe8fd2
Author: Cocoa 
AuthorDate: Wed May 22 15:18:23 2024 +0100

chore(go/adbc): bump arrow from v16 to v17 in template (#1880)

Hi, this PR should be a minor update for the go driver template which
bumps arrow from v16 to v17.
---
 go/adbc/pkg/_tmpl/driver.go.tmpl | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/go/adbc/pkg/_tmpl/driver.go.tmpl b/go/adbc/pkg/_tmpl/driver.go.tmpl
index 21a9e0919..5a071ea83 100644
--- a/go/adbc/pkg/_tmpl/driver.go.tmpl
+++ b/go/adbc/pkg/_tmpl/driver.go.tmpl
@@ -59,10 +59,10 @@ import (
"unsafe"
 
"github.com/apache/arrow-adbc/go/adbc"
-   "github.com/apache/arrow/go/v16/arrow/array"
-   "github.com/apache/arrow/go/v16/arrow/cdata"
-   "github.com/apache/arrow/go/v16/arrow/memory"
-   "github.com/apache/arrow/go/v16/arrow/memory/mallocator"
+   "github.com/apache/arrow/go/v17/arrow/array"
+   "github.com/apache/arrow/go/v17/arrow/cdata"
+   "github.com/apache/arrow/go/v17/arrow/memory"
+   "github.com/apache/arrow/go/v17/arrow/memory/mallocator"
 )
 
 // Must use malloc() to respect CGO rules

(arrow) branch main updated: GH-40078: [C++] Import/Export ArrowDeviceArrayStream (#40807)

2024-05-21 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 8169d6e719 GH-40078: [C++] Import/Export ArrowDeviceArrayStream 
(#40807)
8169d6e719 is described below

commit 8169d6e719453acd0e7ca1b6f784d800cca4f113
Author: Matt Topol 
AuthorDate: Tue May 21 15:40:16 2024 -0400

GH-40078: [C++] Import/Export ArrowDeviceArrayStream (#40807)



### Rationale for this change
The original PRs for adding support for importing and exporting the new C 
Device interface (#36488 / #36489) only added support for the Arrays 
themselves, not for the stream structure. We should support both.

### What changes are included in this PR?
Adding parallel functions for Import/Export of streams that accept 
`ArrowDeviceArrayStream`.

### Are these changes tested?
Test writing in progress, wanted to get this up for review while I write 
tests.

### Are there any user-facing changes?
No, only new functions have been added.

* GitHub Issue: #40078

Lead-authored-by: Matt Topol 
Co-authored-by: Felipe Oliveira Carvalho 
Co-authored-by: Benjamin Kietzman 
Co-authored-by: Antoine Pitrou 
Signed-off-by: Matt Topol 
---
 cpp/src/arrow/array/array_base.h  |   8 +
 cpp/src/arrow/array/array_test.cc |   5 +
 cpp/src/arrow/array/data.cc   |  36 +++
 cpp/src/arrow/array/data.h|  21 ++
 cpp/src/arrow/array/util.cc   |   2 +-
 cpp/src/arrow/c/bridge.cc | 278 +++-
 cpp/src/arrow/c/bridge.h  |  61 +
 cpp/src/arrow/c/bridge_test.cc| 516 ++
 cpp/src/arrow/c/helpers.h |  49 
 cpp/src/arrow/c/util_internal.h   |  22 ++
 cpp/src/arrow/record_batch.cc | 107 ++--
 cpp/src/arrow/record_batch.h  |  43 +++-
 python/pyarrow/tests/test_cffi.py |   2 +-
 13 files changed, 1051 insertions(+), 99 deletions(-)

diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 6411aebf80..716ae07220 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -224,6 +224,14 @@ class ARROW_EXPORT Array {
   /// \return Status
   Status ValidateFull() const;
 
+  /// \brief Return the device_type that this array's data is allocated on
+  ///
+  /// This just delegates to calling device_type on the underlying ArrayData
+  /// object which backs this Array.
+  ///
+  /// \return DeviceAllocationType
+  DeviceAllocationType device_type() const { return data_->device_type(); }
+
  protected:
   Array() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
diff --git a/cpp/src/arrow/array/array_test.cc 
b/cpp/src/arrow/array/array_test.cc
index 7e25ad61fa..32806d9d2e 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -478,6 +478,7 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
   ASSERT_EQ(array->type(), type);
   ASSERT_OK(array->ValidateFull());
   ASSERT_EQ(array->length(), length);
+  ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);
   if (is_union(type->id())) {
 ASSERT_EQ(array->null_count(), 0);
 ASSERT_EQ(array->ComputeLogicalNullCount(), length);
@@ -719,6 +720,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
   ASSERT_OK(array->ValidateFull());
   ASSERT_EQ(array->length(), length);
   ASSERT_EQ(array->null_count(), 0);
+  ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);
 
   // test case for ARROW-13321
   for (int64_t i : {int64_t{0}, length / 2, length - 1}) {
@@ -744,6 +746,7 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) {
 auto sliced = array->Slice(1, 4);
 ASSERT_EQ(sliced->length(), 4);
 ASSERT_EQ(sliced->null_count(), 0);
+ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);
 ARROW_EXPECT_OK(sliced->ValidateFull());
   }
 }
@@ -758,6 +761,7 @@ TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
   ASSERT_OK(array->ValidateFull());
   ASSERT_EQ(array->length(), 4);
   ASSERT_EQ(array->null_count(), 0);
+  ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU);
 
   for (int i = 0; i < 4; i++) {
 ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
@@ -797,6 +801,7 @@ TEST_F(TestArray, TestMakeEmptyArray) {
 ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type));
 ASSERT_OK(array->ValidateFull());
 ASSERT_EQ(array->length(), 0);
+
 CheckSpanRoundTrip(*array);
   }
 }
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index ac828a9c35..76a4352139 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -224,6 +224,42 @@ int64_t ArrayData::ComputeLogicalNullCount() const {
   return ArraySpan(*this).ComputeLogicalNullCount

(arrow-adbc) branch main updated: fix(go/adbc/driver/snowflake): Records dropped on ingestion when empty batch is present (#1866)

2024-05-21 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 75e392744 fix(go/adbc/driver/snowflake): Records dropped on ingestion 
when empty batch is present (#1866)
75e392744 is described below

commit 75e3927444cb48f90769f198af118a8b20c0fae2
Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com>
AuthorDate: Tue May 21 11:54:14 2024 -0400

fix(go/adbc/driver/snowflake): Records dropped on ingestion when empty 
batch is present (#1866)

Reproduces and fixes: #1847

Parquet files with empty row groups are valid per the spec, but
Snowflake does not currently handle them properly. To mitigate this we
buffer writes to the parquet file so that a row group is not written
until some amount of data has been received.

The CheckedAllocator was enabled for all tests as part of this fix,
which detected a leak in the BufferWriter that was fixed in:

[https://github.com/apache/arrow/pull/41698](https://github.com/apache/arrow/pull/41698).

There was an unrelated test failure that surfaced once the
CheckedAllocator was enabled which had to do with casting decimals of
certain precision. The fix is included in this PR as well.
---
 go/adbc/driver/snowflake/bulk_ingestion.go |   2 +-
 go/adbc/driver/snowflake/driver_test.go| 111 +++--
 go/adbc/driver/snowflake/record_reader.go  |   1 +
 go/adbc/go.mod |   2 +-
 go/adbc/go.sum |   4 +-
 5 files changed, 64 insertions(+), 56 deletions(-)

diff --git a/go/adbc/driver/snowflake/bulk_ingestion.go 
b/go/adbc/driver/snowflake/bulk_ingestion.go
index 17d76195d..2e18428bd 100644
--- a/go/adbc/driver/snowflake/bulk_ingestion.go
+++ b/go/adbc/driver/snowflake/bulk_ingestion.go
@@ -342,7 +342,7 @@ func writeParquet(
defer pqWriter.Close()
 
for rec := range in {
-   err = pqWriter.Write(rec)
+   err = pqWriter.WriteBuffered(rec)
rec.Release()
if err != nil {
return err
diff --git a/go/adbc/driver/snowflake/driver_test.go 
b/go/adbc/driver/snowflake/driver_test.go
index af94e6108..abc738306 100644
--- a/go/adbc/driver/snowflake/driver_test.go
+++ b/go/adbc/driver/snowflake/driver_test.go
@@ -325,19 +325,14 @@ type SnowflakeTests struct {
stmt   adbc.Statement
 }
 
-func (suite *SnowflakeTests) SetupSuite() {
+func (suite *SnowflakeTests) SetupTest() {
var err error
suite.ctx = context.Background()
suite.driver = suite.Quirks.SetupDriver(suite.T())
suite.db, err = suite.driver.NewDatabase(suite.Quirks.DatabaseOptions())
suite.NoError(err)
-}
-
-func (suite *SnowflakeTests) SetupTest() {
-   var err error
suite.cnxn, err = suite.db.Open(suite.ctx)
suite.NoError(err)
-
suite.stmt, err = suite.cnxn.NewStatement()
suite.NoError(err)
 }
@@ -345,11 +340,11 @@ func (suite *SnowflakeTests) SetupTest() {
 func (suite *SnowflakeTests) TearDownTest() {
suite.NoError(suite.stmt.Close())
suite.NoError(suite.cnxn.Close())
-}
-
-func (suite *SnowflakeTests) TearDownSuite() {
+   suite.Quirks.TearDownDriver(suite.T(), suite.driver)
+   suite.cnxn = nil
suite.NoError(suite.db.Close())
suite.db = nil
+   suite.driver = nil
 }
 
 func (suite *SnowflakeTests) TestSqlIngestTimestamp() {
@@ -409,9 +404,6 @@ func (suite *SnowflakeTests) 
TestSqlIngestRecordAndStreamAreEquivalent() {
suite.Require().NoError(suite.Quirks.DropTable(suite.cnxn, 
"bulk_ingest_bind"))
suite.Require().NoError(suite.Quirks.DropTable(suite.cnxn, 
"bulk_ingest_bind_stream"))
 
-   mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
-   defer mem.AssertSize(suite.T(), 0)
-
sc := arrow.NewSchema([]arrow.Field{
{
Name: "col_int64", Type: arrow.PrimitiveTypes.Int64,
@@ -467,7 +459,7 @@ func (suite *SnowflakeTests) 
TestSqlIngestRecordAndStreamAreEquivalent() {
},
}, nil)
 
-   bldr := array.NewRecordBuilder(mem, sc)
+   bldr := array.NewRecordBuilder(suite.Quirks.Alloc(), sc)
defer bldr.Release()
 
bldr.Field(0).(*array.Int64Builder).AppendValues([]int64{-1, 0, 25}, 
nil)
@@ -538,9 +530,6 @@ func (suite *SnowflakeTests) 
TestSqlIngestRecordAndStreamAreEquivalent() {
 func (suite *SnowflakeTests) TestSqlIngestRoundtripTypes() {
suite.Require().NoError(suite.Quirks.DropTable(suite.cnxn, 
"bulk_ingest_roundtrip"))
 
-   mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
-   defer mem.AssertSize(suite.T(), 0)
-
sc := arrow.NewSchema([]arrow.Field{
{

(arrow) branch main updated (e254c43c09 -> 34f0427620)

2024-05-21 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from e254c43c09 GH-41389: [Python] Expose byte_width and bit_width of 
ExtensionType in terms of the storage type (#41413)
 add 34f0427620 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.21.1 to 
2.22.0 in /go (#41743)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow-adbc) branch dependabot/go_modules/go/adbc/google.golang.org/protobuf-1.34.1 deleted (was 426d3be47)

2024-05-21 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/adbc/google.golang.org/protobuf-1.34.1
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was 426d3be47 chore(go/adbc): bump google.golang.org/protobuf in /go/adbc

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow-adbc) branch main updated: chore(go/adbc): bump google.golang.org/protobuf from 1.33.0 to 1.34.1 in /go/adbc (#1827)

2024-05-21 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 987cd193e chore(go/adbc): bump google.golang.org/protobuf from 1.33.0 
to 1.34.1 in /go/adbc (#1827)
987cd193e is described below

commit 987cd193e25fd314a7eb21d31d705a602b0413be
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
AuthorDate: Tue May 21 11:24:26 2024 -0400

chore(go/adbc): bump google.golang.org/protobuf from 1.33.0 to 1.34.1 in 
/go/adbc (#1827)

Bumps google.golang.org/protobuf from 1.33.0 to 1.34.1.


[![Dependabot compatibility

score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=google.golang.org/protobuf=go_modules=1.33.0=1.34.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---


Dependabot commands and options


You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show  ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)




Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] 
<49699333+dependabot[bot]@users.noreply.github.com>
---
 go/adbc/go.mod | 2 +-
 go/adbc/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/adbc/go.mod b/go/adbc/go.mod
index aed680c33..8a1fecb60 100644
--- a/go/adbc/go.mod
+++ b/go/adbc/go.mod
@@ -31,7 +31,7 @@ require (
golang.org/x/sync v0.7.0
golang.org/x/tools v0.21.0
google.golang.org/grpc v1.63.2
-   google.golang.org/protobuf v1.34.0
+   google.golang.org/protobuf v1.34.1
 )
 
 require (
diff --git a/go/adbc/go.sum b/go/adbc/go.sum
index 971cb267e..1f6db2e83 100644
--- a/go/adbc/go.sum
+++ b/go/adbc/go.sum
@@ -192,8 +192,8 @@ google.golang.org/genproto/googleapis/rpc 
v0.0.0-20240227224415-6ceb2ff114de h1:
 google.golang.org/genproto/googleapis/rpc 
v0.0.0-20240227224415-6ceb2ff114de/go.mod 
h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY=
 google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM=
 google.golang.org/grpc v1.63.2/go.mod 
h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA=
-google.golang.org/protobuf v1.34.0 
h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4=
-google.golang.org/protobuf v1.34.0/go.mod 
h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.34.1 
h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
+google.golang.org/protobuf v1.34.1/go.mod 
h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod 
h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod 
h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c 
h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

(arrow-site) branch main updated: Add Dane Pitkin to committers list (#519)

2024-05-16 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-site.git


The following commit(s) were added to refs/heads/main by this push:
 new 3c31678f6e1 Add Dane Pitkin to committers list (#519)
3c31678f6e1 is described below

commit 3c31678f6e1303e623f4511e061ffc3dea20f6bc
Author: Dane Pitkin 
AuthorDate: Thu May 16 15:43:53 2024 -0400

Add Dane Pitkin to committers list (#519)

I was accepted as a committer to the Arrow project on May 7, 2024. See
https://lists.apache.org/thread/9ysqj4qbmhhl8lp101ltq62ndf8vgsq3.
---
 _data/committers.yml | 4 
 1 file changed, 4 insertions(+)

diff --git a/_data/committers.yml b/_data/committers.yml
index 5705ca33fa6..1740e4f20dd 100644
--- a/_data/committers.yml
+++ b/_data/committers.yml
@@ -276,6 +276,10 @@
   role: Committer
   alias: thinkharderdev
   affiliation: Coralogix
+- name: Dane Pitkin
+  role: Committer
+  alias: dpitkin
+  affiliation: Voltron Data
 - name: David Alves
   role: Committer
   alias: dralves

(arrow-adbc) branch dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.10.0 deleted (was 840e6633c)

2024-05-15 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.10.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was 840e6633c chore(go/adbc): bump github.com/snowflakedb/gosnowflake in 
/go/adbc

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow-adbc) branch main updated (e184bce52 -> abe6d6aa4)

2024-05-15 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from e184bce52 fix(csharp): Fix packing process (#1862)
 add abe6d6aa4 chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 
1.9.0 to 1.10.0 in /go/adbc (#1857)

No new revisions were added by this update.

Summary of changes:
 go/adbc/go.mod |  4 ++--
 go/adbc/go.sum | 10 --
 2 files changed, 6 insertions(+), 8 deletions(-)

(arrow) branch main updated (63fddd7b2f -> e1de9c52d5)

2024-05-15 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 63fddd7b2f GH-41660: [CI][Java] Restore devtoolset relatead 
GANDIVA_CXX_FLAGS (#41661)
 add e1de9c52d5 GH-41541: [Go][Parquet] Fix writer performance regression 
(#41638)

No new revisions were added by this update.

Summary of changes:
 go/parquet/internal/encoding/types.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(arrow) branch main updated: GH-34484: [Substrait] add an option to disable augmented fields (#41583)

2024-05-14 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new a4a5cf1fbe GH-34484: [Substrait] add an option to disable augmented 
fields (#41583)
a4a5cf1fbe is described below

commit a4a5cf1fbe804f5b47184afe91b3c243e0487ab2
Author: David Sisson 
AuthorDate: Tue May 14 09:28:50 2024 -0700

GH-34484: [Substrait] add an option to disable augmented fields (#41583)

### Rationale for this change

Augmented fields interfere with the schema passing between nodes.  When 
enabled they cause names/schema mismatching at the end of the plan.

### What changes are included in this PR?

Adds an option to disable augmented fields (defaulting to adding them), 
connects it everywhere it is called, and disables it in ReadRel conversion.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

There are no API related changes however this will allow Substrait plans 
that consume local files to work without requiring a project/emit relation 
after the read relation to remove the unexpected fields.

* GitHub Issue: #34484

Authored-by: David Sisson 
Signed-off-by: Matt Topol 
---
 cpp/src/arrow/acero/sink_node.cc   |  1 +
 cpp/src/arrow/dataset/discovery_test.cc|  3 +-
 cpp/src/arrow/dataset/file_parquet_test.cc |  5 +-
 cpp/src/arrow/dataset/scanner.cc   | 35 ++
 cpp/src/arrow/dataset/scanner.h|  9 ++-
 cpp/src/arrow/dataset/scanner_test.cc  | 12 ++--
 cpp/src/arrow/dataset/test_util_internal.h | 18 +++--
 .../arrow/engine/substrait/relation_internal.cc|  1 +
 cpp/src/arrow/engine/substrait/serde_test.cc   | 81 ++
 9 files changed, 138 insertions(+), 27 deletions(-)

diff --git a/cpp/src/arrow/acero/sink_node.cc b/cpp/src/arrow/acero/sink_node.cc
index 4ab6b4537d..66f447aa87 100644
--- a/cpp/src/arrow/acero/sink_node.cc
+++ b/cpp/src/arrow/acero/sink_node.cc
@@ -423,6 +423,7 @@ class ConsumingSinkNode : public ExecNode,
   std::atomic backpressure_counter_ = 0;
   std::unique_ptr sequencer_;
 };
+
 static Result MakeTableConsumingSinkNode(ExecPlan* plan,
 std::vector 
inputs,
 const ExecNodeOptions& 
options) {
diff --git a/cpp/src/arrow/dataset/discovery_test.cc 
b/cpp/src/arrow/dataset/discovery_test.cc
index 92cec7f324..981146b799 100644
--- a/cpp/src/arrow/dataset/discovery_test.cc
+++ b/cpp/src/arrow/dataset/discovery_test.cc
@@ -144,7 +144,8 @@ class FileSystemDatasetFactoryTest : public 
DatasetFactoryTest {
 }
 options_ = std::make_shared();
 options_->dataset_schema = schema;
-ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default(*schema));
+ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default(
+  *schema, 
options_->add_augmented_fields));
 SetProjection(options_.get(), std::move(projection));
 ASSERT_OK_AND_ASSIGN(dataset_, factory_->Finish(schema));
 ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset_->GetFragments());
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc 
b/cpp/src/arrow/dataset/file_parquet_test.cc
index 76cd0af3b8..bf626826d4 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -330,8 +330,9 @@ TEST_F(TestParquetFileFormat, CachedMetadata) {
   // Read the file the first time, will read metadata
   auto options = std::make_shared();
   options->filter = literal(true);
-  ASSERT_OK_AND_ASSIGN(auto projection_descr,
-   ProjectionDescr::FromNames({"x"}, *test_schema));
+  ASSERT_OK_AND_ASSIGN(
+  auto projection_descr,
+  ProjectionDescr::FromNames({"x"}, *test_schema, 
options->add_augmented_fields));
   options->projected_schema = projection_descr.schema;
   options->projection = projection_descr.expression;
   ASSERT_OK_AND_ASSIGN(auto generator, fragment->ScanBatchesAsync(options));
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 18981d1451..a856a792a2 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -211,7 +211,8 @@ Status NormalizeScanOptions(const 
std::shared_ptr& scan_options,
 // create the projected schema only if the provided expressions
 // produces valid set of fields.
 ARROW_ASSIGN_OR_RAISE(auto projection_descr,
-  ProjectionDescr::Default(*projected_schema));
+  ProjectionDescr::Default(
+  *projected_schema, 
scan_options->add_augmented_fie

(arrow) branch main updated (bd444106af -> 1c62df5255)

2024-05-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from bd444106af GH-39645: [Python] Fix read_table for encrypted parquet 
(#39438)
 add 1c62df5255 GH-41179: [Docs] Documentation for Dissociated IPC Protocol 
(#41180)

No new revisions were added by this update.

Summary of changes:
 docs/source/format/Columnar.rst|   2 +
 docs/source/format/DissociatedIPC.rst  | 403 +
 .../ClientFlowchart.mmd}   |  34 +-
 .../SequenceDiagramSame.mmd}   |  38 +-
 .../DissociatedIPC/SequenceDiagramSeparate.mmd |  44 +++
 docs/source/format/Flight.rst  |   2 +
 docs/source/format/index.rst   |   1 +
 7 files changed, 499 insertions(+), 25 deletions(-)
 create mode 100644 docs/source/format/DissociatedIPC.rst
 copy docs/source/format/{Flight/DoExchange.mmd => 
DissociatedIPC/ClientFlowchart.mmd} (52%)
 copy docs/source/format/{Flight/DoExchange.mmd => 
DissociatedIPC/SequenceDiagramSame.mmd} (50%)
 create mode 100644 
docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd

(arrow) branch main updated: GH-41594: [Go] Support reading `date64` type & properly validate list-like types (#41595)

2024-05-08 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 5252c6ce13 GH-41594: [Go] Support reading `date64` type & properly 
validate list-like types (#41595)
5252c6ce13 is described below

commit 5252c6ce13694fa31dbcb2623d1629cd8fe53a47
Author: Alex Shcherbakov 
AuthorDate: Wed May 8 22:46:45 2024 +0300

GH-41594: [Go] Support reading `date64` type & properly validate list-like 
types (#41595)

This PR includes 2 fixes:
1. support reading `date64` columns (as write is supported)
2. properly validate list-like data types (list of unsupported is 
unsupported)

### Rationale for this change

See #41594

### What changes are included in this PR?

1. Added `date64` reading & conversion funcs similar to `date32`
2. Refactored date type validation

### Are these changes tested?

a55cd5324d2c47932410b0c7a9c46075386645d2

### Are there any user-facing changes?

No.

* GitHub Issue: #41594

Authored-by: candiduslynx 
Signed-off-by: Matt Topol 
---
 go/arrow/csv/common.go   | 40 ++
 go/arrow/csv/reader.go   | 74 
 go/arrow/csv/reader_test.go  |  8 +
 go/arrow/csv/testdata/header.csv |  8 ++---
 go/arrow/csv/testdata/types.csv  |  8 ++---
 go/arrow/csv/transformer.go  | 69 +
 6 files changed, 86 insertions(+), 121 deletions(-)

diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 4455c8b782..06fed69a77 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -239,21 +239,31 @@ func WithStringsReplacer(replacer *strings.Replacer) 
Option {
 
 func validate(schema *arrow.Schema) {
for i, f := range schema.Fields() {
-   switch ft := f.Type.(type) {
-   case *arrow.BooleanType:
-   case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, 
*arrow.Int64Type:
-   case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, 
*arrow.Uint64Type:
-   case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type:
-   case *arrow.StringType, *arrow.LargeStringType:
-   case *arrow.TimestampType:
-   case *arrow.Date32Type, *arrow.Date64Type:
-   case *arrow.Decimal128Type, *arrow.Decimal256Type:
-   case *arrow.ListType, *arrow.LargeListType, 
*arrow.FixedSizeListType:
-   case *arrow.BinaryType, *arrow.LargeBinaryType, 
*arrow.FixedSizeBinaryType:
-   case arrow.ExtensionType:
-   case *arrow.NullType:
-   default:
-   panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid 
data type %T", i, f.Name, ft))
+   if !typeSupported(f.Type) {
+   panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid 
data type %T", i, f.Name, f.Type))
}
}
 }
+
+func typeSupported(dt arrow.DataType) bool {
+   switch dt := dt.(type) {
+   case *arrow.BooleanType:
+   case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, 
*arrow.Int64Type:
+   case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, 
*arrow.Uint64Type:
+   case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type:
+   case *arrow.StringType, *arrow.LargeStringType:
+   case *arrow.TimestampType:
+   case *arrow.Date32Type, *arrow.Date64Type:
+   case *arrow.Decimal128Type, *arrow.Decimal256Type:
+   case *arrow.MapType:
+   return false
+   case arrow.ListLikeType:
+   return typeSupported(dt.Elem())
+   case *arrow.BinaryType, *arrow.LargeBinaryType, 
*arrow.FixedSizeBinaryType:
+   case arrow.ExtensionType:
+   case *arrow.NullType:
+   default:
+   return false
+   }
+   return true
+}
diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go
index 18f1083e6a..46591a9a5a 100644
--- a/go/arrow/csv/reader.go
+++ b/go/arrow/csv/reader.go
@@ -474,6 +474,10 @@ func (r *Reader) initFieldConverter(bldr array.Builder) 
func(string) {
return func(str string) {
r.parseDate32(bldr, str)
}
+   case *arrow.Date64Type:
+   return func(str string) {
+   r.parseDate64(bldr, str)
+   }
case *arrow.Time32Type:
return func(str string) {
r.parseTime32(bldr, str, dt.Unit)
@@ -486,17 +490,13 @@ func (r *Reader) initFieldConverter(bldr array.Builder) 
func(string) {
return func(str string) {
r.parseDecimal256(bldr, str, dt.Precision, dt.Scale)

(arrow) branch main updated (f462ec7e6b -> f672027654)

2024-05-08 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from f462ec7e6b MINOR: [Go] Bump golang.org/x/sys from 0.19.0 to 0.20.0 in 
/go (#41554)
 add f672027654 MINOR: [Go] Bump google.golang.org/protobuf from 1.34.0 to 
1.34.1 in /go (#41553)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow) branch main updated (e21952f969 -> f462ec7e6b)

2024-05-08 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from e21952f969 GH-40750: [C++][Python] Map child Array constructed from 
keys and items shouldn't have offset (#40871)
 add f462ec7e6b MINOR: [Go] Bump golang.org/x/sys from 0.19.0 to 0.20.0 in 
/go (#41554)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow) branch dependabot/go_modules/go/google.golang.org/protobuf-1.34.0 deleted (was 95d38e2794)

2024-05-03 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/google.golang.org/protobuf-1.34.0
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 95d38e2794 MINOR: [Go] Bump google.golang.org/protobuf from 1.33.0 to 
1.34.0 in /go

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow) branch main updated (56437409d1 -> 2b06472305)

2024-05-03 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 56437409d1 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 
2.21.1 in /go (#41512)
 add 2b06472305 MINOR: [Go] Bump google.golang.org/protobuf from 1.33.0 to 
1.34.0 in /go (#41513)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow) branch dependabot/go_modules/go/github.com/hamba/avro/v2-2.21.1 deleted (was 154bdb8655)

2024-05-03 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/github.com/hamba/avro/v2-2.21.1
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 154bdb8655 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 
2.21.1 in /go

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow) branch main updated (cc9e65fb80 -> 56437409d1)

2024-05-03 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from cc9e65fb80 MINOR: [C#] Bump Grpc.Tools from 2.62.0 to 2.63.0 in 
/csharp (#41523)
 add 56437409d1 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 
2.21.1 in /go (#41512)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow-adbc) branch main updated: feat(go/adbc/driver/flightsql): support stateless prepared statements (#1796)

2024-04-30 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new b64b73aa0 feat(go/adbc/driver/flightsql): support stateless prepared 
statements (#1796)
b64b73aa0 is described below

commit b64b73aa0cb24a0179e3e101f50e52a830d57d95
Author: David Li 
AuthorDate: Wed May 1 01:06:01 2024 +0900

feat(go/adbc/driver/flightsql): support stateless prepared statements 
(#1796)

Fixes #1657.
---
 go/adbc/adbc.go|  4 ++--
 go/adbc/driver/flightsql/cmd/testserver/main.go| 23 +-
 .../driver/flightsql/flightsql_adbc_server_test.go | 14 ++---
 go/adbc/driver/flightsql/flightsql_adbc_test.go| 14 ++---
 go/adbc/driver/flightsql/flightsql_connection.go   | 14 ++---
 go/adbc/driver/flightsql/flightsql_database.go |  6 +++---
 go/adbc/driver/flightsql/flightsql_driver.go   |  2 +-
 go/adbc/driver/flightsql/flightsql_statement.go| 10 +-
 go/adbc/driver/flightsql/record_reader.go  | 10 +-
 go/adbc/driver/flightsql/record_reader_test.go | 12 +--
 go/adbc/driver/internal/driverbase/connection.go   |  6 +++---
 go/adbc/driver/internal/driverbase/database.go |  2 +-
 go/adbc/driver/internal/driverbase/driver.go   |  2 +-
 go/adbc/driver/internal/driverbase/driver_test.go  |  6 +++---
 go/adbc/driver/internal/shared_utils.go|  6 +++---
 go/adbc/driver/panicdummy/panicdummy_adbc.go   |  6 +++---
 go/adbc/driver/snowflake/bulk_ingestion.go | 12 +--
 go/adbc/driver/snowflake/connection.go |  4 ++--
 go/adbc/driver/snowflake/driver.go |  2 +-
 go/adbc/driver/snowflake/driver_test.go|  8 
 go/adbc/driver/snowflake/record_reader.go  | 10 +-
 go/adbc/driver/snowflake/statement.go  |  6 +++---
 go/adbc/drivermgr/wrapper.go   |  6 +++---
 go/adbc/drivermgr/wrapper_sqlite_test.go   |  6 +++---
 go/adbc/go.mod |  8 
 go/adbc/go.sum | 16 +++
 go/adbc/pkg/flightsql/driver.go|  8 
 go/adbc/pkg/panicdummy/driver.go   |  8 
 go/adbc/pkg/snowflake/driver.go|  8 
 go/adbc/sqldriver/driver.go| 10 +-
 go/adbc/sqldriver/driver_internals_test.go | 10 +-
 go/adbc/sqldriver/flightsql/flightsql.go   |  2 +-
 go/adbc/sqldriver/flightsql/flightsql_test.go  |  8 
 go/adbc/standard_schemas.go|  2 +-
 go/adbc/utils/utils.go |  2 +-
 go/adbc/validation/validation.go   |  6 +++---
 python/adbc_driver_flightsql/tests/test_errors.py  |  6 ++
 37 files changed, 148 insertions(+), 137 deletions(-)

diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go
index 8622e71cb..b47f946f0 100644
--- a/go/adbc/adbc.go
+++ b/go/adbc/adbc.go
@@ -40,8 +40,8 @@ import (
"context"
"fmt"
 
-   "github.com/apache/arrow/go/v16/arrow"
-   "github.com/apache/arrow/go/v16/arrow/array"
+   "github.com/apache/arrow/go/v17/arrow"
+   "github.com/apache/arrow/go/v17/arrow/array"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
 )
diff --git a/go/adbc/driver/flightsql/cmd/testserver/main.go 
b/go/adbc/driver/flightsql/cmd/testserver/main.go
index 8ce65c9f7..9951df235 100644
--- a/go/adbc/driver/flightsql/cmd/testserver/main.go
+++ b/go/adbc/driver/flightsql/cmd/testserver/main.go
@@ -32,11 +32,11 @@ import (
"strings"
"sync"
 
-   "github.com/apache/arrow/go/v16/arrow"
-   "github.com/apache/arrow/go/v16/arrow/array"
-   "github.com/apache/arrow/go/v16/arrow/flight"
-   "github.com/apache/arrow/go/v16/arrow/flight/flightsql"
-   "github.com/apache/arrow/go/v16/arrow/memory"
+   "github.com/apache/arrow/go/v17/arrow"
+   "github.com/apache/arrow/go/v17/arrow/array"
+   "github.com/apache/arrow/go/v17/arrow/flight"
+   "github.com/apache/arrow/go/v17/arrow/flight/flightsql"
+   "github.com/apache/arrow/go/v17/arrow/memory"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/proto"
@@ -268,6 +268,9 @@ func (srv *ExampleServer) DoGetPreparedStatement(ctx 
context.Context, cmd flight
}()
out = ch
return
+   case "stateless_prepared_statement":

(arrow-adbc) branch main updated: docs: update driver status table (#1797)

2024-04-30 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 25456bf55 docs: update driver status table (#1797)
25456bf55 is described below

commit 25456bf554c0d4748d41f7dac2634746f666dce0
Author: David Li 
AuthorDate: Tue Apr 30 23:56:05 2024 +0900

docs: update driver status table (#1797)

Fixes #1786.
---
 docs/source/driver/postgresql.rst |  9 +
 docs/source/driver/status.rst | 22 ++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/docs/source/driver/postgresql.rst 
b/docs/source/driver/postgresql.rst
index c76534d20..8ba54a013 100644
--- a/docs/source/driver/postgresql.rst
+++ b/docs/source/driver/postgresql.rst
@@ -165,6 +165,15 @@ The PostgreSQL driver mostly supports features defined in 
the ADBC API
 specification 1.0.0, but not all cases are fully implemented
 (particularly around bind parameters and prepared statements).
 
+Bind Parameters and Prepared Statements
+---
+
+The PostgreSQL driver only supports executing prepared statements with
+parameters that do not return result sets (basically, an INSERT with
+parameters).  Queries that return result sets are difficult with prepared
+statements because the driver is built around using COPY for best
+performance, which is not supported in this context.
+
 Bulk Ingestion
 --
 
diff --git a/docs/source/driver/status.rst b/docs/source/driver/status.rst
index 7337dd4e8..b25f2f492 100644
--- a/docs/source/driver/status.rst
+++ b/docs/source/driver/status.rst
@@ -24,7 +24,7 @@ Implementation Status
 
 **Experimental** drivers are not feature-complete and the implementation is 
still progressing.
 **Beta** drivers are (mostly) feature-complete but have only been available 
for a short time.
-**Stable** drivers are feature-complete (as much as possible for the 
underlying database) and have been available/tested for a while.
+**Stable** drivers are (mostly) feature-complete (as much as possible for the 
underlying database) and have been available/tested for a while.
 
 .. list-table::
:header-rows: 1
@@ -42,7 +42,7 @@ Implementation Status
* - Flight SQL (Go)
  - C, Go
  - Go
- - Beta
+ - Stable
 
* - Flight SQL (Java)
  - Java
@@ -57,17 +57,17 @@ Implementation Status
* - PostgreSQL
  - C
  - C++
- - Beta
+ - Stable
 
* - SQLite
  - C
  - C
- - Beta
+ - Stable
 
* - Snowflake
  - C, Go
  - Go
- - Experimental
+ - Stable
 
 .. [#supported-languages] C drivers are usable from Go, Python, and Ruby as 
well.
 
@@ -183,7 +183,7 @@ Update Queries
* - PostgreSQL
  - N/A
  - N/A
- - Y
+ - Y [#postgresql-prepared]_
  - Y
  - Y
  - Y
@@ -196,6 +196,12 @@ Update Queries
  - Y
  - Y
 
+.. [#postgresql-prepared] The PostgreSQL driver only supports executing
+   prepared statements with parameters that do not return result sets
+   (basically, an INSERT with parameters).  Queries that return result sets
+   are difficult with prepared statements because the driver is built around
+   using COPY for best performance, which is not supported in this context.
+
 .. list-table:: Connection/database-level features
:header-rows: 1
 
@@ -207,7 +213,7 @@ Update Queries
* - Flight SQL (Go)
  - N
  - Y
- - N
+ - Y
 
* - Flight SQL (Java)
  - Y
@@ -222,7 +228,7 @@ Update Queries
* - PostgreSQL
  - Y
  - Y
- - N
+ - Y
 
* - SQLite
  - Y

(arrow-adbc) branch main updated: ci: disallow pings in PR body text (#1798)

2024-04-30 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 02aecca50 ci: disallow pings in PR body text (#1798)
02aecca50 is described below

commit 02aecca50a9792d6087076b9fe44856615de173b
Author: David Li 
AuthorDate: Tue Apr 30 23:55:04 2024 +0900

ci: disallow pings in PR body text (#1798)

Fixes #1739.
---
 .github/workflows/dev_pr.yml | 8 
 1 file changed, 8 insertions(+)

diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index a001695cf..805677cfb 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -57,3 +57,11 @@ jobs:
   PR_TITLE: ${{ github.event.pull_request.title }}
 run: |
   python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout 
"$PR_TITLE"
+
+  # Pings make it into the commit message where they annoy the user every
+  # time the commit gets pushed somewhere
+  - name: Check PR body for pings
+env:
+  PR_BODY: ${{ github.event.pull_request.body }}
+run: |
+  [[ "${PR_BODY}" =~ @[a-zA-Z0-9]+ ]] && exit 1 || true

(arrow) branch dependabot/go_modules/go/github.com/apache/thrift-0.20.0 deleted (was 82b45881df)

2024-04-29 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/github.com/apache/thrift-0.20.0
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 82b45881df MINOR: [Go] Bump github.com/apache/thrift from 0.19.0 to 
0.20.0 in /go

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow) branch main updated (c87073737b -> e3db586eb3)

2024-04-29 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from c87073737b MINOR: [R] refactor arrow_mask to include aggregations list 
(#41414)
 add e3db586eb3 MINOR: [Go] Bump github.com/apache/thrift from 0.19.0 to 
0.20.0 in /go (#40777)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow-adbc) branch main updated (71072e06c -> 59eede462)

2024-04-26 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 71072e06c feat(csharp): Implement remaining functions in 1.0 spec 
(#1773)
 add 59eede462 fix(go/adbc/driver/flightsql): should use 
`ctx.Err().Error()` (#1769)

No new revisions were added by this update.

Summary of changes:
 go/adbc/driver/flightsql/utils.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(arrow-adbc) branch main updated: fix(go/adbc/driver/snowflake): handle quotes properly (#1738)

2024-04-25 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 8bd0e9bd3 fix(go/adbc/driver/snowflake): handle quotes properly (#1738)
8bd0e9bd3 is described below

commit 8bd0e9bd308a4a1d8222d4c8317e98fd39e8d5ee
Author: Matt Topol 
AuthorDate: Thu Apr 25 16:12:05 2024 -0400

fix(go/adbc/driver/snowflake): handle quotes properly (#1738)

fixes #1721
---
 go/adbc/driver/snowflake/bulk_ingestion.go | 24 +++-
 go/adbc/driver/snowflake/connection.go |  6 +++---
 go/adbc/driver/snowflake/driver.go |  5 +
 go/adbc/driver/snowflake/driver_test.go| 12 
 go/adbc/driver/snowflake/statement.go  |  6 +++---
 5 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/go/adbc/driver/snowflake/bulk_ingestion.go 
b/go/adbc/driver/snowflake/bulk_ingestion.go
index 5e1f1314f..9ec64f6e5 100644
--- a/go/adbc/driver/snowflake/bulk_ingestion.go
+++ b/go/adbc/driver/snowflake/bulk_ingestion.go
@@ -29,7 +29,6 @@ import (
"io"
"math"
"runtime"
-   "strconv"
"strings"
"sync"
 
@@ -130,10 +129,13 @@ func (st *statement) ingestRecord(ctx context.Context) 
(nrows int64, err error)
st.bound = nil
}()
 
-   var initialRows int64
+   var (
+   initialRows int64
+   target  = quoteTblName(st.targetTable)
+   )
 
// Check final row count of target table to get definitive rows affected
-   initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, 
strconv.Quote(st.targetTable))
+   initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, target)
if err != nil {
st.bound.Release()
return
@@ -182,13 +184,13 @@ func (st *statement) ingestRecord(ctx context.Context) 
(nrows int64, err error)
}
 
// Load the uploaded file into the target table
-   _, err = st.cnxn.cn.ExecContext(ctx, copyQuery, 
[]driver.NamedValue{{Value: strconv.Quote(st.targetTable)}})
+   _, err = st.cnxn.cn.ExecContext(ctx, copyQuery, 
[]driver.NamedValue{{Value: target}})
if err != nil {
return
}
 
// Check final row count of target table to get definitive rows affected
-   nrows, err = countRowsInTable(ctx, st.cnxn.sqldb, 
strconv.Quote(st.targetTable))
+   nrows, err = countRowsInTable(ctx, st.cnxn.sqldb, target)
nrows = nrows - initialRows
return
 }
@@ -204,9 +206,13 @@ func (st *statement) ingestStream(ctx context.Context) 
(nrows int64, err error)
st.streamBind = nil
}()
 
-   var initialRows int64
+   var (
+   initialRows int64
+   target  = quoteTblName(st.targetTable)
+   )
+
// Check final row count of target table to get definitive rows affected
-   initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, 
strconv.Quote(st.targetTable))
+   initialRows, err = countRowsInTable(ctx, st.cnxn.sqldb, target)
if err != nil {
return
}
@@ -214,7 +220,7 @@ func (st *statement) ingestStream(ctx context.Context) 
(nrows int64, err error)
defer func() {
// Always check the resulting row count, even in the case of an 
error. We may have ingested part of the data.
ctx := context.Background() // TODO(joellubi): switch to 
context.WithoutCancel(ctx) once we're on Go 1.21
-   n, countErr := countRowsInTable(ctx, st.cnxn.sqldb, 
strconv.Quote(st.targetTable))
+   n, countErr := countRowsInTable(ctx, st.cnxn.sqldb, target)
nrows = n - initialRows
 
// Ingestion, row-count check, or both could have failed
@@ -268,7 +274,7 @@ func (st *statement) ingestStream(ctx context.Context) 
(nrows int64, err error)
}
 
// Kickoff background tasks to COPY Parquet files into Snowflake table 
as they are uploaded
-   fileReady, finishCopy, cancelCopy := runCopyTasks(ctx, st.cnxn.cn, 
strconv.Quote(st.targetTable), int(st.ingestOptions.copyConcurrency))
+   fileReady, finishCopy, cancelCopy := runCopyTasks(ctx, st.cnxn.cn, 
target, int(st.ingestOptions.copyConcurrency))
 
// Read Parquet files from buffer pool and upload to Snowflake stage in 
parallel
g.Go(func() error {
diff --git a/go/adbc/driver/snowflake/connection.go 
b/go/adbc/driver/snowflake/connection.go
index 41a8c1665..94223bb92 100644
--- a/go/adbc/driver/snowflake/connection.go
+++ b/go/adbc/driver/snowflake/connection.go
@@ -1212,12 +1212,12 @@ func (c *connectionImpl) getStringQuery(query string) 
(string, error) {
 func (c *connectionImpl) GetTableSchema(ctx context.Context, catalog *string, 
dbSchema *string, tableNa

(arrow-experiments) branch main updated: add cudf-flight-ucx example (#28)

2024-04-25 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-experiments.git


The following commit(s) were added to refs/heads/main by this push:
 new 05e4e88  add cudf-flight-ucx example (#28)
05e4e88 is described below

commit 05e4e888b19dbd98b95d8984a8d1f97fb6570d00
Author: Matt Topol 
AuthorDate: Thu Apr 25 13:25:09 2024 -0400

add cudf-flight-ucx example (#28)

* add cudf-flight-ucx example

* Apply suggestions from code review

Co-authored-by: Sutou Kouhei 

* Update dissociated-ipc/cudf-flight-poc.cc

Co-authored-by: Sutou Kouhei 

* ran linting

* Apply suggestions from code review

* split poc file for readability

* Update dissociated-ipc/README.md

Co-authored-by: Sutou Kouhei 

* rename files

-

Co-authored-by: Sutou Kouhei 
Co-authored-by: Ian Cook 
---
 .clang-format |  21 ++
 .gitignore|  21 ++
 data/taxi-data/README.md  |  22 ++
 data/taxi-data/train.parquet  |   3 +
 dissociated-ipc/CMakeLists.txt| 112 ++
 dissociated-ipc/README.md |  55 +
 dissociated-ipc/cudf-flight-client.cc | 384 
 dissociated-ipc/cudf-flight-server.cc | 408 ++
 dissociated-ipc/cudf-flight-ucx.cc|  39 
 dissociated-ipc/cudf-flight-ucx.h |  38 
 dissociated-ipc/ucx_client.cc |  73 ++
 dissociated-ipc/ucx_client.h  |  40 
 dissociated-ipc/ucx_conn.cc   | 355 +
 dissociated-ipc/ucx_conn.h|  90 
 dissociated-ipc/ucx_server.cc | 280 +++
 dissociated-ipc/ucx_server.h  |  88 
 dissociated-ipc/ucx_utils.cc  | 287 
 dissociated-ipc/ucx_utils.h   | 122 ++
 18 files changed, 2438 insertions(+)

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000..9448dc8
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+---
+BasedOnStyle: Google
+ColumnLimit: 90
+DerivePointerAlignment: false
+IncludeBlocks: Preserve
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000..d997483
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+vendored
+build
+.vscode
+cufile.log
diff --git a/data/taxi-data/README.md b/data/taxi-data/README.md
new file mode 100644
index 000..6a7416e
--- /dev/null
+++ b/data/taxi-data/README.md
@@ -0,0 +1,22 @@
+
+
+# taxi-data
+
+A small subset of the public [NYC Taxi 
Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page) used in the 
dissociated-ipc example.
diff --git a/data/taxi-data/train.parquet b/data/taxi-data/train.parquet
new file mode 100755
index 000..7bf702b
--- /dev/null
+++ b/data/taxi-data/train.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:854cf53ab8669aa260a8ae65beafe880ab1a0232dbdac09705fb9b6f3f84eacd
+size 38521857
diff --git a/dissociated-ipc/CMakeLists.txt b/dissociated-ipc/CMakeLists.txt
new file mode 100644
index 000..fa46397
--- /dev/null
+++ b/dissociated-ipc/CMakeLists.txt
@@ -0,0 +1,112 @@
+# Licensed to the Ap

(arrow-adbc) branch main updated (35d2c76f1 -> 96e05a0f1)

2024-04-25 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 35d2c76f1 feat(csharp/src/Apache.Arrow.Adbc): Cleanup use of List 
in APIs and implementation (#1761)
 add 96e05a0f1 fix(go/adbc/driver/snowflake): comment format (#1768)

No new revisions were added by this update.

Summary of changes:
 go/adbc/driver/snowflake/connection.go | 95 +-
 1 file changed, 48 insertions(+), 47 deletions(-)

(arrow) branch main updated (f8ef09a2b9 -> 7b62460551)

2024-04-19 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from f8ef09a2b9 GH-41263: [C#][Integration] Ensure offset is considered in 
all branches of the bitmap comparison (#41264)
 add 7b62460551 GH-40563: [Go] Unable to JSON marshal float64 arrays which 
contain a NaN value (#41109)

No new revisions were added by this update.

Summary of changes:
 go/arrow/array/float16.go  | 16 -
 go/arrow/array/numeric.gen.go  | 34 --
 go/arrow/array/numeric.gen.go.tmpl | 34 ++
 go/arrow/array/numeric_test.go | 91 +-
 go/arrow/array/numericbuilder.gen_test.go  | 45 +
 go/arrow/array/numericbuilder.gen_test.go.tmpl | 23 +++
 go/arrow/float16/float16.go|  2 +
 7 files changed, 235 insertions(+), 10 deletions(-)

(arrow) branch main updated (48a9639bb0 -> ec2d7cbfb4)

2024-04-12 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 48a9639bb0 GH-41140: [C#] Account for offset and length in union 
arrays (#41165)
 add ec2d7cbfb4 GH-41159: [Go][Parquet] Improvement Parquet BitWriter 
WriteVlqInt Performance  (#41160)

No new revisions were added by this update.

Summary of changes:
 go/parquet/internal/utils/bit_reader_test.go | 17 +
 go/parquet/internal/utils/bit_writer.go  |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

(arrow) branch main updated (835e218735 -> 433ceef8a2)

2024-04-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 835e218735 MINOR: [Go] Bump golang.org/x/tools from 0.19.0 to 0.20.0 
in /go (#41082)
 add 433ceef8a2 MINOR: [Go] Bump golang.org/x/sync from 0.6.0 to 0.7.0 in 
/go (#41079)

No new revisions were added by this update.

Summary of changes:

(arrow) branch main updated (75a100a113 -> 835e218735)

2024-04-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 75a100a113 GH-38768: [Python] Empty slicing an array backwards beyond 
the start is now empty (#40682)
 add 835e218735 MINOR: [Go] Bump golang.org/x/tools from 0.19.0 to 0.20.0 
in /go (#41082)

No new revisions were added by this update.

Summary of changes:
 go/go.mod |  6 +++---
 go/go.sum | 12 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

(arrow) branch dependabot/go_modules/go/golang.org/x/sync-0.7.0 deleted (was fde47fd718)

2024-04-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/golang.org/x/sync-0.7.0
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was fde47fd718 MINOR: [Go] Bump golang.org/x/sync from 0.6.0 to 0.7.0 in 
/go

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow) branch dependabot/go_modules/go/golang.org/x/tools-0.20.0 deleted (was 0f25267df5)

2024-04-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/golang.org/x/tools-0.20.0
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 0f25267df5 MINOR: [Go] Bump golang.org/x/tools from 0.19.0 to 0.20.0 
in /go

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow-adbc) branch main updated: chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 1.8.0 to 1.9.0 in /go/adbc (#1702)

2024-04-02 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new e9f2122b4 chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 
1.8.0 to 1.9.0 in /go/adbc (#1702)
e9f2122b4 is described below

commit e9f2122b449e6a5a5a22b3c8365cedb4b5e561fc
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
AuthorDate: Tue Apr 2 11:51:06 2024 -0400

chore(go/adbc): bump github.com/snowflakedb/gosnowflake from 1.8.0 to 1.9.0 
in /go/adbc (#1702)

Bumps

[github.com/snowflakedb/gosnowflake](https://github.com/snowflakedb/gosnowflake)
from 1.8.0 to 1.9.0.

Release notes
Sourced from https://github.com/snowflakedb/gosnowflake/releases;>github.com/snowflakedb/gosnowflake's
releases.

Release

Please check Snowflake https://docs.snowflake.com/en/release-notes/clients-drivers/golang;>community
page for release notes.




Commits

https://github.com/snowflakedb/gosnowflake/commit/a0ec4810a8c299eb5ad663533bd915326af828be;>a0ec481
MINOR: Bumped up GoLang connector MINOR version from 1.8.0 to 1.9.0 (https://redirect.github.com/snowflakedb/gosnowflake/issues/1086;>#1086)
https://github.com/snowflakedb/gosnowflake/commit/2141603917def9b5950561213845915edca14c44;>2141603
Allow enableHigherPrecision to be used in arrow batches (https://redirect.github.com/snowflakedb/gosnowflake/issues/1080;>#1080)
https://github.com/snowflakedb/gosnowflake/commit/bd8b73b051adbe96b9d701da3b113f52db9e028b;>bd8b73b
ArrowBatch high precision fails when using compute divide from int64 to
bigDe...
https://github.com/snowflakedb/gosnowflake/commit/5b174a341d4062bac62cc422ec90ec82538c58d4;>5b174a3
SNOW-920995 add CI configuration for regression tests (https://redirect.github.com/snowflakedb/gosnowflake/issues/1075;>#1075)
https://github.com/snowflakedb/gosnowflake/commit/bcb26f9b7ff621db8b03ea6810b955d99f19b048;>bcb26f9
SNOW-1256926 Add converter from snowflake date/time format to go (https://redirect.github.com/snowflakedb/gosnowflake/issues/1077;>#1077)
https://github.com/snowflakedb/gosnowflake/commit/656ba611df5006f1485a442c118efa3a065cbc69;>656ba61
SNOW-1259439 Extract lint and format to separate build (https://redirect.github.com/snowflakedb/gosnowflake/issues/1078;>#1078)
https://github.com/snowflakedb/gosnowflake/commit/3a5605dd2651d269d27776e09e5f33f1982dd348;>3a5605d
SNOW-1234152 Add timestamp to bulk array insert test (https://redirect.github.com/snowflakedb/gosnowflake/issues/1074;>#1074)
https://github.com/snowflakedb/gosnowflake/commit/0722bc9d1248ce4b3596c4c27e7acf4cfc814d33;>0722bc9
SNOW-1230690 Add UnsupportedArg check when requesting S3 accelerated
config (...
https://github.com/snowflakedb/gosnowflake/commit/1cbc05fef4765be72466d5c8433a46a2ce1e454c;>1cbc05f
fix: Fix data race when initializing logging (https://redirect.github.com/snowflakedb/gosnowflake/issues/1060;>#1060)
https://github.com/snowflakedb/gosnowflake/commit/7c2634a31c378fad29764a0e2f34839cf1fae243;>7c2634a
Upgrade to apache/arrow/go/v15 (https://redirect.github.com/snowflakedb/gosnowflake/issues/1062;>#1062)
Additional commits viewable in https://github.com/snowflakedb/gosnowflake/compare/v1.8.0...v1.9.0;>compare
view





[![Dependabot compatibility

score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/snowflakedb/gosnowflake=go_modules=1.8.0=1.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---


Dependabot commands and options


You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show  ignore conditions` w

(arrow-adbc) branch dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.9.0 deleted (was 4467c5f8c)

2024-04-02 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch 
dependabot/go_modules/go/adbc/github.com/snowflakedb/gosnowflake-1.9.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was 4467c5f8c chore(go/adbc): bump github.com/snowflakedb/gosnowflake in 
/go/adbc

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow) branch main updated: MINOR: [Go] Bump github.com/google/flatbuffers from 24.3.7+incompatible to 24.3.25+incompatible in /go (#40922)

2024-04-01 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new e44dc29df9 MINOR: [Go] Bump github.com/google/flatbuffers from 
24.3.7+incompatible to 24.3.25+incompatible in /go (#40922)
e44dc29df9 is described below

commit e44dc29df9587a139fe539069c3dafc771256b90
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
AuthorDate: Mon Apr 1 14:02:32 2024 -0400

MINOR: [Go] Bump github.com/google/flatbuffers from 24.3.7+incompatible to 
24.3.25+incompatible in /go (#40922)

Bumps 
[github.com/google/flatbuffers](https://github.com/google/flatbuffers) from 
24.3.7+incompatible to 24.3.25+incompatible.

Release notes
Sourced from https://github.com/google/flatbuffers/releases;>github.com/google/flatbuffers's
 releases.

v24.3.25
What's Changed

Fix License by https://github.com/p0fi;>@p0fi in https://redirect.github.com/google/flatbuffers/pull/8253;>google/flatbuffers#8253
Fix handling non null-terminated string_views in LookupByKey by https://github.com/mpawlowski-eyeo;>@mpawlowski-eyeo in 
https://redirect.github.com/google/flatbuffers/pull/8203;>google/flatbuffers#8203

New Contributors

https://github.com/p0fi;>@p0fi made their 
first contribution in https://redirect.github.com/google/flatbuffers/pull/8253;>google/flatbuffers#8253
https://github.com/mpawlowski-eyeo;>@mpawlowski-eyeo 
made their first contribution in https://redirect.github.com/google/flatbuffers/pull/8203;>google/flatbuffers#8203

Full Changelog: https://github.com/google/flatbuffers/compare/v24.3.7...v24.3.25;>https://github.com/google/flatbuffers/compare/v24.3.7...v24.3.25



Commits

https://github.com/google/flatbuffers/commit/595bf0007ab1929570c7671f091313c8fc20644e;>595bf00
 FlatBuffers Version v24.3.25
https://github.com/google/flatbuffers/commit/0cfb7eb80b05c058e19e50fb575263908e601469;>0cfb7eb
 Fix handling non null-terminated string_views in LookupByKey (https://redirect.github.com/google/flatbuffers/issues/8203;>#8203)
https://github.com/google/flatbuffers/commit/67eb95de9281087ccbba9aafd6e8ab1958d12045;>67eb95d
 presubmit.yml: Use xcode 14.2
https://github.com/google/flatbuffers/commit/b1f617fcb2821f67453dc037cd0a6ebd8eb44de0;>b1f617f
 Fix License (https://redirect.github.com/google/flatbuffers/issues/8253;>#8253)
https://github.com/google/flatbuffers/commit/960cd4d635b98fc5daeeafee8b0a5601d45c70ad;>960cd4d
 Lobster: Support required fields
See full diff in https://github.com/google/flatbuffers/compare/v24.3.7...v24.3.25;>compare 
view




[![Dependabot compatibility 
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/google/flatbuffers=go_modules=24.3.7+incompatible=24.3.25+incompatible)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't 
alter it yourself. You can also trigger a rebase manually by commenting `@ 
dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---


Dependabot commands and options


You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that 
have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your 
CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and 
block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating 
it. You can achieve the same result by closing it manually
- `@ dependabot show  ignore conditions` will show all of 
the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop 
Dependabot creating any more for this major version (unless you reopen the PR 
or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop 
Dependabot creating any more for this minor version (unless you reopen the PR 
or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop 
Dependabot creating any more for this dependency (unless you reopen the PR or 
upgrade to it yourself)



Authored-by: dependabot[bot] 
<49699333+dependabot[bot]@users.noreply.github.com>

(arrow) branch main updated: GH-40888: [Go][FlightRPC] support conversion from array.Duration in FlightSQL driver (#40889)

2024-04-01 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 68241d8a86 GH-40888: [Go][FlightRPC] support conversion from 
array.Duration in FlightSQL driver (#40889)
68241d8a86 is described below

commit 68241d8a86e9923cda2b758d10176b8dfb1cfea7
Author: wayne 
AuthorDate: Mon Apr 1 12:01:49 2024 -0600

GH-40888: [Go][FlightRPC] support conversion from array.Duration in 
FlightSQL driver (#40889)



### Rationale for this change

To enable the use of the flightsql driver's implementation of golang sql 
interfaces.

### What changes are included in this PR?

A new switch branch for handling `array.Duration`.

### Are these changes tested?

I manually tested and didn't add new unit tests because none of the other 
types handled in the same switch block are unit tested.

### Are there any user-facing changes?

Just a more complete set of types handled by the sql driver.

* GitHub Issue: #40888

Authored-by: wayne warren 
Signed-off-by: Matt Topol 
---
 go/arrow/flight/flightsql/driver/utils.go  |  4 
 go/arrow/flight/flightsql/driver/utils_test.go | 12 
 2 files changed, 16 insertions(+)

diff --git a/go/arrow/flight/flightsql/driver/utils.go 
b/go/arrow/flight/flightsql/driver/utils.go
index a99c045e2e..84cf2110cc 100644
--- a/go/arrow/flight/flightsql/driver/utils.go
+++ b/go/arrow/flight/flightsql/driver/utils.go
@@ -104,6 +104,10 @@ func fromArrowType(arr arrow.Array, idx int) (interface{}, 
error) {
return v.ToTime(ts.TimeUnit()), nil
case *array.Date64:
return c.Value(idx).ToTime(), nil
+   case *array.Duration:
+   dt := arr.DataType().(*arrow.DurationType)
+   duration := time.Duration(c.Value(idx)) * dt.Unit.Multiplier()
+   return duration, nil
case *array.DayTimeInterval:
durationDays := time.Duration(c.Value(idx).Days*24) * time.Hour
duration := time.Duration(c.Value(idx).Milliseconds) * 
time.Millisecond
diff --git a/go/arrow/flight/flightsql/driver/utils_test.go 
b/go/arrow/flight/flightsql/driver/utils_test.go
index 6b1adfed47..8ea7921b64 100644
--- a/go/arrow/flight/flightsql/driver/utils_test.go
+++ b/go/arrow/flight/flightsql/driver/utils_test.go
@@ -50,6 +50,10 @@ func Test_fromArrowType(t *testing.T) {
{Name: "f15-ts_us", Type: arrow.FixedWidthTypes.Timestamp_ns},
{Name: "f16-d64", Type: arrow.FixedWidthTypes.Date64},
{Name: "f17-dti", Type: arrow.FixedWidthTypes.DayTimeInterval},
+   {Name: "f18-duration_s", Type: 
arrow.FixedWidthTypes.Duration_s},
+   {Name: "f19-duration_ms", Type: 
arrow.FixedWidthTypes.Duration_ms},
+   {Name: "f20-duration_us", Type: 
arrow.FixedWidthTypes.Duration_us},
+   {Name: "f21-duration_ns", Type: 
arrow.FixedWidthTypes.Duration_ns},
}
 
schema := arrow.NewSchema(fields, nil)
@@ -90,6 +94,10 @@ func Test_fromArrowType(t *testing.T) {
testTime := time.Now()

b.Field(15).(*array.Date64Builder).Append(arrow.Date64FromTime(testTime))

b.Field(16).(*array.DayTimeIntervalBuilder).Append(arrow.DayTimeInterval{Days: 
1, Milliseconds: 1000})
+   b.Field(17).(*array.DurationBuilder).Append(1)
+   b.Field(18).(*array.DurationBuilder).Append(1)
+   b.Field(19).(*array.DurationBuilder).Append(1)
+   b.Field(20).(*array.DurationBuilder).Append(1)
 
rec := b.NewRecord()
defer rec.Release()
@@ -123,4 +131,8 @@ func Test_fromArrowType(t *testing.T) {
tf(t, 14, time.Date(1970, 1, 1, 12, 0, 0, 0, time.UTC))  // "f15-ts_us"
tf(t, 15, testTime.In(time.UTC).Truncate(24*time.Hour))  // "f16-d64"
tf(t, 16, time.Duration(24*time.Hour+time.Second))   // "f17-dti"
+   tf(t, 17, time.Duration(10)) // 
"f18-duration_s"
+   tf(t, 18, time.Duration(100))// 
"f19-duration_ms"
+   tf(t, 19, time.Duration(1000))   // 
"f20-duration_us"
+   tf(t, 20, time.Duration(1))  // 
"f21-duration_ns"
 }

(arrow) branch main updated: GH-40900: [Go] Fix Mallocator Weirdness (#40902)

2024-04-01 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 71321841eb GH-40900: [Go] Fix Mallocator Weirdness (#40902)
71321841eb is described below

commit 71321841eb6d94946de43cccb7f04afe5cf2aa10
Author: Matt Topol 
AuthorDate: Mon Apr 1 11:15:59 2024 -0400

GH-40900: [Go] Fix Mallocator Weirdness (#40902)



### Rationale for this change
With help from @ lidavidm and @ bkietz digging into the linked issue, we 
found the following:

* Using `mtrace` and `strace` didn't produce much enlightenment to what was 
happening.
* If the python adbc_driver_manager was built so that the cython lib is 
built using `CMAKE_BUILD_TYPE=Debug` then the crash/failure goes away
* If the env var `MALLOC_MMAP_THRESHOLD_` is set to 128MB, the 
crash/failure goes away
* It is only reproducible when calling through python, I haven't been able 
to reproduce it using pure Go
* Calling `calloc` again after it fails, still fails
* Calling `malloc` + `memset` immediately after the failing `calloc` works 
perfectly and doesn't fail anymore

### What changes are included in this PR?
Adding a comment describing the situation and falling back to `malloc` + 
`memset` if `calloc` returns an error. If the pointer returned from `malloc` is 
`nil` then we surface the error.

* GitHub Issue: #40900

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 go/arrow/memory/mallocator/mallocator.go | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/go/arrow/memory/mallocator/mallocator.go 
b/go/arrow/memory/mallocator/mallocator.go
index 59d240a106..9483bdfc2a 100644
--- a/go/arrow/memory/mallocator/mallocator.go
+++ b/go/arrow/memory/mallocator/mallocator.go
@@ -60,10 +60,19 @@ func (alloc *Mallocator) Allocate(size int) []byte {
}
ptr, err := C.calloc(C.size_t(size), 1)
if err != nil {
-   panic(err)
+   // under some circumstances and allocation patterns, we can end 
up in a scenario
+   // where for some reason calloc return ENOMEM even though there 
is definitely memory
+   // available for use. So we attempt to fallback to simply doing 
malloc + memset in
+   // this case. If malloc returns a nil pointer, then we know 
we're out of memory
+   // and will surface the error.
+   if ptr = C.malloc(C.size_t(size)); ptr == nil {
+   panic(err)
+   }
+   C.memset(ptr, 0, C.size_t(size))
} else if ptr == nil {
panic("mallocator: out of memory")
}
+
atomic.AddUint64(, uint64(size))
return unsafe.Slice((*byte)(ptr), size)
 }

(arrow-adbc) branch main updated: chore(dev/release): slight fix for non-conda verification and docs (#1682)

2024-03-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new a1deb83ad chore(dev/release): slight fix for non-conda verification 
and docs (#1682)
a1deb83ad is described below

commit a1deb83ad2e1e997004f18917eb0c1f1c40896cb
Author: Matt Topol 
AuthorDate: Thu Mar 28 16:06:05 2024 -0400

chore(dev/release): slight fix for non-conda verification and docs (#1682)

updating the docs and release verification script based on my experience
for non-conda verification runs
---
 dev/release/verify-release-candidate.sh | 2 +-
 docs/source/development/releasing.rst   | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/dev/release/verify-release-candidate.sh 
b/dev/release/verify-release-candidate.sh
index 0e2f1f6e3..ca81e78ba 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -455,7 +455,7 @@ test_python() {
   show_header "Build and test Python libraries"
 
   # Build and test Python
-  maybe_setup_virtualenv cython duckdb pandas protobuf pyarrow pytest 
setuptools_scm setuptools || exit 1
+  maybe_setup_virtualenv cython duckdb pandas protobuf pyarrow pytest 
setuptools_scm setuptools importlib_resources || exit 1
   maybe_setup_conda --file "${ADBC_DIR}/ci/conda_env_python.txt" || exit 1
 
   if [ "${USE_CONDA}" -gt 0 ]; then
diff --git a/docs/source/development/releasing.rst 
b/docs/source/development/releasing.rst
index b758c0d30..3b930ee79 100644
--- a/docs/source/development/releasing.rst
+++ b/docs/source/development/releasing.rst
@@ -213,9 +213,13 @@ How to Verify Release Candidates
- C and C++ compilers (or the equivalent of ``build-essential`` for your 
platform)
- Python 3
- Ruby with headers
+  - meson is required
- bundler, rake, red-arrow, and test-unit Ruby gems
- GLib and gobject-introspection with headers
+  - pkg-config or cmake must be able to find libarrow-glib.so
+  - GI_TYPELIB_PATH should be set to the path to the girepository-1.0 
directory
- Java JRE and JDK (Java 8+)
+  - the javadoc command must also be accessible
- Go
- CMake, ninja-build, libpq (with headers), SQLite (with headers)

(arrow) branch update-go-readme deleted (was 88484e638b)

2024-03-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch update-go-readme
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 88484e638b GH-40847: [Go] update readme

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow) branch main updated (950fbb62ce -> 7d1111214d)

2024-03-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 950fbb62ce GH-40733: [Go] Require Go 1.21 or later (#40848)
 add 7d214d GH-40847: [Go] update readme (#40877)

No new revisions were added by this update.

Summary of changes:
 go/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(arrow) branch main updated: GH-40733: [Go] Require Go 1.21 or later (#40848)

2024-03-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 950fbb62ce GH-40733: [Go] Require Go 1.21 or later (#40848)
950fbb62ce is described below

commit 950fbb62ce7388aad926c5af5861bf07f7db6de1
Author: Matt Topol 
AuthorDate: Thu Mar 28 15:59:14 2024 -0400

GH-40733: [Go] Require Go 1.21 or later (#40848)



### Rationale for this change
Bumping to require Go 1.21 or later as 1.20 is EOL

* GitHub Issue: #40733

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 .env   |  4 +-
 .github/workflows/go.yml   | 28 ++-
 ci/docker/conda-integration.dockerfile |  2 +-
 ci/docker/debian-12-go.dockerfile  |  4 +-
 dev/release/verify-release-candidate.sh|  6 +--
 dev/tasks/tasks.yml|  2 +-
 go/arrow/bitutil/bitutil.go| 35 +
 .../bitutil/bitutil_bytes.go}  | 26 +-
 go/arrow/cdata/cdata_allocate.go   | 57 ++
 go/arrow/cdata/cdata_exports.go| 55 -
 go/arrow/compute/exec/span.go  | 17 ---
 .../compute/exec/span_offsets.go}  | 20 ++--
 go/arrow/compute/fieldref.go   | 17 ---
 .../compute/fieldref_hash.go}  | 23 +++--
 go/arrow/doc.go|  2 -
 go/arrow/flight/flightsql/driver/driver_test.go|  1 +
 go/arrow/memory/mallocator/mallocator.go   | 11 ++---
 go/arrow/memory/mallocator/mallocator_util.go  | 26 ++
 go/go.mod  |  2 +-
 go/internal/hashing/hash_string.go |  4 ++
 go/internal/hashing/xxh3_memo_table.go |  9 +---
 go/parquet/types.go| 44 +++--
 22 files changed, 177 insertions(+), 218 deletions(-)

diff --git a/.env b/.env
index b5c66563f5..298c100c09 100644
--- a/.env
+++ b/.env
@@ -58,8 +58,8 @@ CUDA=11.2.2
 DASK=latest
 DOTNET=7.0
 GCC_VERSION=""
-GO=1.19.13
-STATICCHECK=v0.4.5
+GO=1.21.8
+STATICCHECK=v0.4.7
 HDFS=3.2.1
 JDK=8
 KARTOTHEK=latest
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 47148d9568..7ff781d35e 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -59,13 +59,13 @@ jobs:
   {
 "arch-label": "AMD64",
 "arch": "amd64",
-"go": "1.19",
+"go": "1.21",
 "runs-on": "ubuntu-latest"
   },
   {
 "arch-label": "AMD64",
 "arch": "amd64",
-"go": "1.20",
+"go": "1.22",
 "runs-on": "ubuntu-latest"
   }
   JSON
@@ -75,13 +75,13 @@ jobs:
   {
 "arch-label": "ARM64",
 "arch": "arm64v8",
-"go": "1.19",
+"go": "1.21",
 "runs-on": ["self-hosted", "arm", "linux"]
   },
   {
 "arch-label": "ARM64",
 "arch": "arm64v8",
-"go": "1.20",
+"go": "1.22",
 "runs-on": ["self-hosted", "arm", "linux"]
   }
   JSON
@@ -169,10 +169,13 @@ jobs:
 uses: actions/checkout@v4
 with:
   fetch-depth: 0
+  - name: Get required Go version
+run: |
+  (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
   - name: Install Go
 uses: actions/setup-go@v5
 with:
-  go-version: 1.19
+  go-version: "${{ env.GO_VERSION }}"
   cache: true
   cache-dependency-path: go/go.sum
   - name: Run build
@@ -188,7 +191,7 @@ jobs:
 strategy:
   fail-fast: false
   matrix:
-go: [1.19, '1.20']
+go: ['1.21', '1.22']
 env:
   GO: ${{ matrix.go }}
 steps:
@@ -229,7 +232,7 @@ jobs:
 strategy:
   fail-fast: false
   matrix:
-go: [1.19, '1.20']
+go: ['1.21', '1.22']
 env:
   GO: ${{ matrix.go }}
 steps:
@@ -268,7 +271,7 @@ jobs:
 strategy:
   fail-fast: false
   matrix:
-go: [1.19, '1.20']
+go: ['1.21', '1.22']
 steps:
   - name: Checkout Arrow

(arrow) branch update-go-readme created (now 88484e638b)

2024-03-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch update-go-readme
in repository https://gitbox.apache.org/repos/asf/arrow.git


  at 88484e638b GH-40847: [Go] update readme

This branch includes the following new commits:

 new 88484e638b GH-40847: [Go] update readme

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

(arrow) 01/01: GH-40847: [Go] update readme

2024-03-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch update-go-readme
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 88484e638bfecbbad8d59094a90d9574ea3be4a7
Author: Matt Topol 
AuthorDate: Thu Mar 28 15:25:29 2024 -0400

GH-40847: [Go] update readme

Remove reference to deleted internal package
---
 go/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/go/README.md b/go/README.md
index 4a9e151ddf..20bd7cd775 100644
--- a/go/README.md
+++ b/go/README.md
@@ -87,8 +87,8 @@ advanced optimizer and generate PLAN9 assembly functions from 
C/C++ code. The
 arrow package can be compiled without these optimizations using the `noasm`
 build tag. Alternatively, by configuring an environment variable, it is
 possible to dynamically configure which architecture optimizations are used at
-runtime.  See the `cpu` package [README](arrow/internal/cpu/README.md) for a
-description of this environment variable.
+runtime. We use the (cpu)[https://pkg.go.dev/golang.org/x/sys/cpu] package to
+check dynamically for these features.
 
 ### Example Usage

(arrow) branch main updated: GH-40719: [Go] Make `arrow.Null` non-null for `arrow.TypeEqual` to work properly with `new(arrow.NullType)` (#40802)

2024-03-26 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new f710ac52b0 GH-40719: [Go] Make `arrow.Null` non-null for 
`arrow.TypeEqual` to work properly with `new(arrow.NullType)` (#40802)
f710ac52b0 is described below

commit f710ac52b049806515a14445b242c3ec819fb99d
Author: Alex Shcherbakov 
AuthorDate: Tue Mar 26 21:17:04 2024 +0200

GH-40719: [Go] Make `arrow.Null` non-null for `arrow.TypeEqual` to work 
properly with `new(arrow.NullType)` (#40802)

### Rationale for this change

Currently creating a record with a `null` type via `new(arrow.NullType)` in 
the schema will fail the schema validation.

### What changes are included in this PR?

Made `arrow.Null` a non-null value instead of just a declaration.

### Are these changes tested?

Yes, see cd4253a24e6d828128fbb7854da3c37951d74885

### Are there any user-facing changes?

`arrow.Null` became non-null, but the type is the same.
* GitHub Issue: #40719

Authored-by: Alex Shcherbakov 
Signed-off-by: Matt Topol 
---
 go/arrow/compare_test.go  | 3 +++
 go/arrow/datatype_null.go | 6 ++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go
index 62e30e634e..ca87621ead 100644
--- a/go/arrow/compare_test.go
+++ b/go/arrow/compare_test.go
@@ -42,6 +42,9 @@ func TestTypeEqual(t *testing.T) {
{
Null, Null, true, false,
},
+   {
+   Null, new(NullType), true, false,
+   },
{
{}, {}, false, false,
},
diff --git a/go/arrow/datatype_null.go b/go/arrow/datatype_null.go
index 2d2454c652..c852b854a7 100644
--- a/go/arrow/datatype_null.go
+++ b/go/arrow/datatype_null.go
@@ -27,7 +27,5 @@ func (*NullType) Layout() DataTypeLayout {
return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull()}}
 }
 
-var (
-   Null *NullType
-   _DataType = Null
-)
+// Null gives us both the compile-time assertion of DataType interface as well 
as serving a good element for use in schemas.
+var Null DataType = new(NullType)

(arrow) branch main updated: GH-40630: [Go][Parquet] Enable writing of Parquet footer without closing file (#40654)

2024-03-25 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 5fd6b44936 GH-40630: [Go][Parquet] Enable writing of Parquet footer 
without closing file (#40654)
5fd6b44936 is described below

commit 5fd6b44936a19761e45a8e43d7e76a0a23c5a222
Author: Peter Newcomb 
AuthorDate: Mon Mar 25 16:48:50 2024 -0400

GH-40630: [Go][Parquet] Enable writing of Parquet footer without closing 
file (#40654)



### Rationale for this change

See #40630

### What changes are included in this PR?

1. Added `FlushWithFooter` method to *file.Writer
2. To support `FlushWithFooter`, refactored `Close` in a way that changes 
the order of operations in two ways:
   a. closure of open row group writers is now done after using `defer` to 
ensure closure of the sink, instead of before
   b. wiping out of encryption keys is now done by the same deferred 
function, ensuring that it happens even upon error

### Are these changes tested?

`file_writer_test.go` has been extended to cover `FlushWithFooter` in a 
manner equivalent to the existing coverage.

### Are there any user-facing changes?

Only the addition of a new public method as described above.  No breaking 
changes to any existing public interfaces, unless the two minor 
order-of-operation changes described above are somehow a problem.

I'm not sure it's a critical fix, but one of the minor changes described 
above may reduce the likelihood that an attack could inject an error (e.g., an 
I/O error) to prevent an encryption key from being wiped from memory.

* GitHub Issue: #40630

Authored-by: Peter Newcomb 
Signed-off-by: Matt Topol 
---
 go/parquet/file/file_writer.go  | 62 +++--
 go/parquet/file/file_writer_test.go | 17 +-
 go/parquet/metadata/file.go | 15 -
 3 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go
index a2cf397cbc..57344b25cf 100644
--- a/go/parquet/file/file_writer.go
+++ b/go/parquet/file/file_writer.go
@@ -32,6 +32,7 @@ import (
 type Writer struct {
sink   utils.WriteCloserTell
open   bool
+   footerFlushed  bool
props  *parquet.WriterProperties
rowGroups  int
nrows  int
@@ -125,6 +126,7 @@ func (fw *Writer) appendRowGroup(buffered bool) 
*rowGroupWriter {
fw.rowGroupWriter.Close()
}
fw.rowGroups++
+   fw.footerFlushed = false
rgMeta := fw.metadata.AppendRowGroup()
fw.rowGroupWriter = newRowGroupWriter(fw.sink, rgMeta, 
int16(fw.rowGroups)-1, fw.props, buffered, fw.fileEncryptor)
return fw.rowGroupWriter
@@ -172,12 +174,9 @@ func (fw *Writer) Close() (err error) {
// if any functions here panic, we set open to be false so
// that this doesn't get called again
fw.open = false
-   if fw.rowGroupWriter != nil {
-   fw.nrows += fw.rowGroupWriter.nrows
-   fw.rowGroupWriter.Close()
-   }
-   fw.rowGroupWriter = nil
+
defer func() {
+   fw.closeEncryptor()
ierr := fw.sink.Close()
if err != nil {
if ierr != nil {
@@ -189,30 +188,48 @@ func (fw *Writer) Close() (err error) {
err = ierr
}()
 
+   err = fw.FlushWithFooter()
+   fw.metadata.Clear()
+   }
+   return nil
+}
+
+// FlushWithFooter closes any open row group writer and writes the file 
footer, leaving
+// the writer open for additional row groups.  Additional footers written by 
later
+// calls to FlushWithFooter or Close will be cumulative, so that only the last 
footer
+// written need ever be read by a reader.
+func (fw *Writer) FlushWithFooter() error {
+   if !fw.footerFlushed {
+   if fw.rowGroupWriter != nil {
+   fw.nrows += fw.rowGroupWriter.nrows
+   fw.rowGroupWriter.Close()
+   }
+   fw.rowGroupWriter = nil
+
+   fileMetadata, err := fw.metadata.Snapshot()
+   if err != nil {
+   return err
+   }
+
fileEncryptProps := fw.props.FileEncryptionProperties()
if fileEncryptProps == nil { // non encrypted file
-   fileMetadata, err := fw.metadata.Finish()
-   if err != nil {
+   if _, err = writeFileMetadata(fileMetadata, fw.sink); 
err != nil {
+   return err

(arrow) branch main updated (cc771a0133 -> 1781b32487)

2024-03-25 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from cc771a0133 GH-40634: [C#] ArrowStreamReader should not be null (#40765)
 add 1781b32487 GH-40693: [Go] Fix Decimal type precision loss on 
GetOneForMarshal (#40694)

No new revisions were added by this update.

Summary of changes:
 go/arrow/array/decimal128.go  | 13 +---
 go/arrow/array/decimal128_test.go | 59 -
 go/arrow/array/decimal256.go  | 12 ---
 go/arrow/array/decimal256_test.go | 70 +--
 4 files changed, 142 insertions(+), 12 deletions(-)

(arrow) branch main updated (07e8aa2cae -> 1ee3da0064)

2024-03-19 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 07e8aa2cae GH-40568: [Java] Test failure in Dataset regarding 
TestAllTypes (#40662)
 add 1ee3da0064 GH-40672: [Go][Parquet] Add proper build tags for min_max 
(#40676)

No new revisions were added by this update.

Summary of changes:
 go/internal/utils/min_max_noasm.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(arrow-adbc) branch main updated: refactor(go/adbc/driver): driverbase implementation for connection (#1590)

2024-03-19 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 302242849 refactor(go/adbc/driver): driverbase implementation for 
connection (#1590)
302242849 is described below

commit 302242849ba09dbb4f4b6d95155421dffafb6105
Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com>
AuthorDate: Tue Mar 19 11:43:42 2024 -0400

refactor(go/adbc/driver): driverbase implementation for connection (#1590)

Implementation of Connection driver base, along with a refactor of
Driver and Database bases.

The bases have been refactored in the following way:
- The `*Impl` interface (e.g. `DatabaseImpl`) now explicitly implements
the corresponding `adbc` interface (e.g. `adbc.Database`).
- We now check to guarantee the `DatabaseImplBase` implements the entire
`DatabaseImpl` interface with stub methods or default implementations.
- A new interface has been added (e.g. `driverbase.Database`) which
contains all methods the _output_ of driverbase constructor
`NewDatabase()` should be. This helps document and guarantee the "extra"
behavior provided by using the driverbase. This interface should be
internal to the library.
- By embedding `DatabaseImpl` in the `database` struct (and similarly
for the other bases) it automatically inherits implementations coming
from the `DatabaseImpl`. This way we don't need to write out all the
implementations a second time, hence the deletes.
- The Connection base uses a builder for its constructor to register any
helper methods (see discussion in comments). The Driver and Database
bases use simple function constructors because they don't have any
helpers to register. This felt simpler but I can make those into trivial
builders as well if we prefer to have consistency between them.

A new `DriverInfo` type has been introduced to help consolidate the
collection and validation of metadata for `GetInfo()`.

There are more small changes such as refactors of the flightsql and
snowflake drivers to make use of the added functionality, as well as a
new set of tests for the driverbase. Please let me know if anything else
could use clarification.

Resolves #1105.
---
 go/adbc/adbc.go|  11 +
 go/adbc/driver/driverbase/driver.go|  66 ---
 go/adbc/driver/flightsql/flightsql_connection.go   | 578 +++-
 go/adbc/driver/flightsql/flightsql_database.go |  25 +-
 go/adbc/driver/flightsql/flightsql_driver.go   |  45 +-
 go/adbc/driver/flightsql/flightsql_statement.go|  12 +-
 go/adbc/driver/internal/driverbase/connection.go   | 497 +
 .../driver/{ => internal}/driverbase/database.go   | 111 ++--
 go/adbc/driver/internal/driverbase/driver.go   | 116 
 go/adbc/driver/internal/driverbase/driver_info.go  | 176 ++
 .../driver/internal/driverbase/driver_info_test.go |  88 +++
 go/adbc/driver/internal/driverbase/driver_test.go  | 595 +
 go/adbc/driver/{ => internal}/driverbase/error.go  |   0
 .../driver/{ => internal}/driverbase/logging.go|   0
 go/adbc/driver/snowflake/connection.go | 293 +++---
 go/adbc/driver/snowflake/driver.go |  45 +-
 go/adbc/driver/snowflake/driver_test.go|   4 +
 go/adbc/driver/snowflake/snowflake_database.go |  41 +-
 go/adbc/driver/snowflake/statement.go  |   2 +-
 go/adbc/go.mod |   1 +
 go/adbc/go.sum |   1 +
 21 files changed, 1861 insertions(+), 846 deletions(-)

diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go
index f5514626a..6968faacf 100644
--- a/go/adbc/adbc.go
+++ b/go/adbc/adbc.go
@@ -355,6 +355,17 @@ const (
InfoDriverADBCVersion InfoCode = 103 // DriverADBCVersion
 )
 
+type InfoValueTypeCode = arrow.UnionTypeCode
+
+const (
+   InfoValueStringType  InfoValueTypeCode = 0
+   InfoValueBooleanType InfoValueTypeCode = 1
+   InfoValueInt64Type   InfoValueTypeCode = 2
+   InfoValueInt32BitmaskTypeInfoValueTypeCode = 3
+   InfoValueStringListType  InfoValueTypeCode = 4
+   InfoValueInt32ToInt32ListMapType InfoValueTypeCode = 5
+)
+
 type ObjectDepth int
 
 const (
diff --git a/go/adbc/driver/driverbase/driver.go 
b/go/adbc/driver/driverbase/driver.go
deleted file mode 100644
index e4cfb9960..0
--- a/go/adbc/driver/driverbase/driver.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright own

(arrow) branch main updated (2e0d701029 -> 98a0fc8570)

2024-03-18 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 2e0d701029 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.0 to 
2.20.1 in /go (#40637)
 add 98a0fc8570 MINOR: [Go] Bump google.golang.org/grpc from 1.58.3 to 
1.62.1 in /go (#40638)

No new revisions were added by this update.

Summary of changes:
 go/go.mod |  4 ++--
 go/go.sum | 10 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(arrow) branch main updated (08401514a7 -> 2e0d701029)

2024-03-18 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 08401514a7 GH-40621: [C++] Add missing util/config.h in 
arrow/io/compressed_test.cc (#40625)
 add 2e0d701029 MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.0 to 
2.20.1 in /go (#40637)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(arrow) branch main updated (d10f468b06 -> 1dd0d45375)

2024-03-13 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from d10f468b06 GH-40395: [C++] Avoid simplifying expressions which call 
impure functions (#40396)
 add 1dd0d45375 MINOR: [Go] update go dependencies (#40511)

No new revisions were added by this update.

Summary of changes:
 go/go.mod | 28 ++--
 go/go.sum | 58 +-
 2 files changed, 43 insertions(+), 43 deletions(-)

(arrow) branch main updated: GH-40261: [Go] Don't export array functions with unexposed return types (#40272)

2024-02-29 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new fc48b8963d GH-40261: [Go] Don't export array functions with unexposed 
return types (#40272)
fc48b8963d is described below

commit fc48b8963d6486ac129a7c1365a35d02b28876e8
Author: Gabriel Tomitsuka 
AuthorDate: Thu Feb 29 19:00:33 2024 +0100

GH-40261: [Go] Don't export array functions with unexposed return types 
(#40272)

### Rationale for this change
Exposing functions that return unexposed types in Go is considered poor 
practice. This approach complicates type handling, making it challenging for 
developers to utilize these return values in their functions. Developers must 
undertake the cumbersome process of identifying the applicable interface for 
the return type, a task that often results in significant time consumption and 
leads to confusing, non-informative types being suggested by godocs and IDEs.

Consider the difficulty in discerning the relationship between two return 
types, `*simpleTable` and `arrow.Table`, at a glance. It is not immediately 
clear whether they implement the same interface or are distinct entities:

https://github.com/apache/arrow/assets/10295671/463cd8a7-47f3-44ce-9871-2885025e5a5c;>
https://github.com/apache/arrow/assets/10295671/4ffc049c-fb88-43fb-bd57-fc1ad5d4dc68;>

Returning exposed interfaces is already commonly done in the Arrow package 
to ensure API consistency and usability, as evidenced in methods like 
`AddColumn() -> arrow.Table` and `RecordFromJSON() -> arrow.Record`. Extending 
this to all functions, including `NewTable`, `NewTableFromSlice`, and 
`NewRecord`, will standardize the codebase in line with these principles.

The use of `*simpleTable` and similar types is restricted in explicit type 
declarations and function signatures. Therefore, transitioning to exposed 
return types is a backward-compatible improvement that will lead to enhanced 
documentation and better support in IDEs for Arrow users.

### What changes are included in this PR?
* Change return signature of functions using the following unexposed return 
types:
* `*simpleTable` --> `arrow.Table`
* `*simpleRecord` --> `arrow.Record`
* `*simpleRecords` --> `array.RecordReader`
* Add the function `String()`, which is implemented by `*simpleTable`, to 
the `arrow.Table` interface. `*simpleTable` is the only implementation of 
`arrow.Table`, so this requires no further changes.

### Are these changes tested?
Yes. The relevant code is already covered by tests in 
`arrow/array/table_test.go` (`TestTable`) and `arrow/array/record_test.go` 
(`TestRecord`, `TestRecordReader`).

All tests pass (subpackages without tests omitted):
```bash
ok  github.com/apache/arrow/go/v16/arrow0.398s
ok  github.com/apache/arrow/go/v16/arrow/array  0.600s
ok  github.com/apache/arrow/go/v16/arrow/arrio  1.544s
ok  github.com/apache/arrow/go/v16/arrow/avro   0.629s
ok  github.com/apache/arrow/go/v16/arrow/bitutil1.001s
ok  github.com/apache/arrow/go/v16/arrow/compute2.147s
ok  github.com/apache/arrow/go/v16/arrow/compute/exec   0.813s
ok  github.com/apache/arrow/go/v16/arrow/compute/exprs  1.900s
ok  github.com/apache/arrow/go/v16/arrow/csv0.288s
ok  github.com/apache/arrow/go/v16/arrow/decimal128 1.356s
ok  github.com/apache/arrow/go/v16/arrow/decimal256 1.718s
ok  github.com/apache/arrow/go/v16/arrow/encoded0.493s
ok  github.com/apache/arrow/go/v16/arrow/flight 2.845s
ok  github.com/apache/arrow/go/v16/arrow/flight/flightsql   0.512s
ok  github.com/apache/arrow/go/v16/arrow/flight/flightsql/driver
7.386s
ok  github.com/apache/arrow/go/v16/arrow/float160.570s
ok  github.com/apache/arrow/go/v16/arrow/internal/arrjson   0.419s
ok  github.com/apache/arrow/go/v16/arrow/internal/dictutils 0.407s
ok  github.com/apache/arrow/go/v16/arrow/internal/testing/tools 
0.247s
ok  github.com/apache/arrow/go/v16/arrow/ipc1.984s
ok  github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-cat  0.530s
ok  github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-file-to-stream   
1.267s
ok  
github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-json-integration-test
1.074s
ok  github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-ls   1.263s
ok  github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-stream-to-file   
0.935s
ok  github.com/apache/arrow/go/v16/arrow/math   0.616s
ok  github.com/apache/arrow/go/v16/arrow/memory 1.275s
ok  github.com/apache/arrow/go/v16/ar

(arrow-adbc) branch fix-snowflake-ci deleted (was 63c6985f)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was 63c6985f updates from feedback

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.

(arrow-adbc) branch main updated: test(go/adbc/driver/snowflake): fix flaky integration tests (#1561)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new aeabeacd test(go/adbc/driver/snowflake): fix flaky integration tests 
(#1561)
aeabeacd is described below

commit aeabeacd18f559a44d6fe0bcdc3d29c991a9b271
Author: Matt Topol 
AuthorDate: Fri Feb 23 16:59:15 2024 -0500

test(go/adbc/driver/snowflake): fix flaky integration tests (#1561)
---
 c/driver/snowflake/snowflake_test.cc| 111 +++-
 c/validation/adbc_validation.h  |   4 +
 c/validation/adbc_validation_connection.cc  |  29 +++--
 c/validation/adbc_validation_statement.cc   | 165 +---
 go/adbc/driver/flightsql/flightsql_adbc_test.go |  15 ++-
 go/adbc/driver/snowflake/bulk_ingestion.go  |  31 -
 go/adbc/driver/snowflake/connection.go  |   8 +-
 go/adbc/driver/snowflake/driver_test.go |  61 -
 go/adbc/driver/snowflake/statement.go   |   4 +-
 go/adbc/go.mod  |  54 
 go/adbc/go.sum  |  65 +-
 go/adbc/validation/validation.go|  14 +-
 12 files changed, 391 insertions(+), 170 deletions(-)

diff --git a/c/driver/snowflake/snowflake_test.cc 
b/c/driver/snowflake/snowflake_test.cc
index cdd92e2c..1c423711 100644
--- a/c/driver/snowflake/snowflake_test.cc
+++ b/c/driver/snowflake/snowflake_test.cc
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "validation/adbc_validation.h"
 #include "validation/adbc_validation_util.h"
 
@@ -35,6 +36,26 @@ using adbc_validation::IsOkStatus;
 }   \
   } while (false)
 
+namespace {
+std::string GetUuid() {
+  static std::random_device dev;
+  static std::mt19937 rng(dev());
+
+  std::uniform_int_distribution dist(0, 15);
+
+  const char* v = "0123456789ABCDEF";
+  const bool dash[] = {0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0};
+
+  std::string res;
+  for (int i = 0; i < 16; i++) {
+if (dash[i]) res += "-";
+res += v[dist(rng)];
+res += v[dist(rng)];
+  }
+  return res;
+}
+}  // namespace
+
 class SnowflakeQuirks : public adbc_validation::DriverQuirks {
  public:
   SnowflakeQuirks() {
@@ -47,6 +68,13 @@ class SnowflakeQuirks : public adbc_validation::DriverQuirks 
{
   AdbcStatusCode SetupDatabase(struct AdbcDatabase* database,
struct AdbcError* error) const override {
 EXPECT_THAT(AdbcDatabaseSetOption(database, "uri", uri_, error), 
IsOkStatus(error));
+EXPECT_THAT(AdbcDatabaseSetOption(
+database, 
"adbc.snowflake.sql.client_option.use_high_precision",
+"false", error),
+IsOkStatus(error));
+EXPECT_THAT(AdbcDatabaseSetOption(database, "adbc.snowflake.sql.schema",
+  schema_.c_str(), error),
+IsOkStatus(error));
 return ADBC_STATUS_OK;
   }
 
@@ -119,11 +147,13 @@ class SnowflakeQuirks : public 
adbc_validation::DriverQuirks {
   bool supports_metadata_current_db_schema() const override { return false; }
   bool supports_partitioned_data() const override { return false; }
   bool supports_dynamic_parameter_binding() const override { return false; }
+  bool supports_error_on_incompatible_schema() const override { return false; }
   bool ddl_implicit_commit_txn() const override { return true; }
-  std::string db_schema() const override { return "ADBC_TESTING"; }
+  std::string db_schema() const override { return schema_; }
 
   const char* uri_;
   bool skip_{false};
+  std::string schema_{"ADBC_TESTING"};
 };
 
 class SnowflakeTest : public ::testing::Test, public 
adbc_validation::DatabaseTest {
@@ -175,6 +205,7 @@ class SnowflakeStatementTest : public ::testing::Test,
public adbc_validation::StatementTest {
  public:
   const adbc_validation::DriverQuirks* quirks() const override { return 
_; }
+
   void SetUp() override {
 if (quirks_.skip_) {
   GTEST_SKIP();
@@ -192,6 +223,78 @@ class SnowflakeStatementTest : public ::testing::Test,
 
   void TestSqlIngestColumnEscaping() { GTEST_SKIP(); }
 
+ public:
+  // will need to be updated to SetUpTestSuite when gtest is upgraded
+  static void SetUpTestCase() {
+struct AdbcError error;
+struct AdbcDatabase db;
+struct AdbcConnection connection;
+struct AdbcStatement statement;
+
+std::memset(, 0, sizeof(error));
+std::memset(, 0, sizeof(db));
+std::memset(, 0, sizeof(connection));
+std::memset(, 0, sizeof(statement));
+
+ASSERT_THAT(AdbcDatabaseNew(, ), IsOkStatus());
+ASSERT_THAT(quirks_.SetupDatabase(, ), IsOkStatus());
+

(arrow-adbc) branch fix-snowflake-ci updated (f6be85fb -> 63c6985f)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from f6be85fb forgot to add the schema option
 add 63c6985f updates from feedback

No new revisions were added by this update.

Summary of changes:
 c/driver/snowflake/snowflake_test.cc | 54 
 1 file changed, 30 insertions(+), 24 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (76f4d075 -> f6be85fb)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 76f4d075 forgot to check support test
 add f6be85fb forgot to add the schema option

No new revisions were added by this update.

Summary of changes:
 c/driver/snowflake/snowflake_test.cc | 3 +++
 1 file changed, 3 insertions(+)

(arrow-adbc) branch fix-snowflake-ci updated (0c95216a -> 76f4d075)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 0c95216a fix lints
 add 76f4d075 forgot to check support test

No new revisions were added by this update.

Summary of changes:
 go/adbc/validation/validation.go | 4 
 1 file changed, 4 insertions(+)

(arrow-adbc) branch fix-snowflake-ci updated (b56b2a14 -> 0c95216a)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from b56b2a14 yet another attempt
 add 0c95216a fix lints

No new revisions were added by this update.

Summary of changes:
 c/driver/snowflake/snowflake_test.cc  | 6 --
 c/validation/adbc_validation_statement.cc | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (c01d1dd0 -> b56b2a14)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from c01d1dd0 fix postgres/sqlite tests
 add b56b2a14 yet another attempt

No new revisions were added by this update.

Summary of changes:
 c/driver/snowflake/snowflake_test.cc| 93 -
 c/validation/adbc_validation_statement.cc   |  4 +-
 go/adbc/driver/flightsql/flightsql_adbc_test.go | 15 ++--
 go/adbc/driver/snowflake/bulk_ingestion.go  |  2 +-
 go/adbc/driver/snowflake/driver_test.go | 33 +
 go/adbc/validation/validation.go|  2 +
 6 files changed, 122 insertions(+), 27 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (6c56796b -> c01d1dd0)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 6c56796b pre-commit linting
 add c01d1dd0 fix postgres/sqlite tests

No new revisions were added by this update.

Summary of changes:
 c/validation/adbc_validation_connection.cc | 21 +++--
 go/adbc/driver/snowflake/bulk_ingestion.go |  2 +-
 go/adbc/driver/snowflake/driver_test.go|  2 +-
 3 files changed, 13 insertions(+), 12 deletions(-)

(arrow) branch main updated (65c2b46c83 -> 036a22eaff)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 65c2b46c83 GH-40199: [R] dbplyr 2.5.0 forward compatibility (#40197)
 add 036a22eaff GH-40089: [Go] Concurrent Recordset for receiving huge 
recordset (#40090)

No new revisions were added by this update.

Summary of changes:
 go/arrow/flight/flightsql/driver/driver.go  | 205 --
 go/arrow/flight/flightsql/driver/driver_test.go | 937 
 2 files changed, 1076 insertions(+), 66 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (3c5c3b6e -> 6c56796b)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 3c5c3b6e more test fixes
 add 6c56796b pre-commit linting

No new revisions were added by this update.

Summary of changes:
 c/validation/adbc_validation_statement.cc | 65 +--
 1 file changed, 35 insertions(+), 30 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (7c2da50c -> 3c5c3b6e)

2024-02-23 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 7c2da50c pre-commit lint
 add 3c5c3b6e more test fixes

No new revisions were added by this update.

Summary of changes:
 c/driver/snowflake/snowflake_test.cc   |  7 ++-
 c/validation/adbc_validation.h |  4 
 c/validation/adbc_validation_statement.cc  | 22 --
 go/adbc/driver/snowflake/bulk_ingestion.go | 20 +++-
 go/adbc/driver/snowflake/driver_test.go| 26 +-
 go/adbc/validation/validation.go   |  8 
 6 files changed, 58 insertions(+), 29 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (6852eca4 -> 7c2da50c)

2024-02-22 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 6852eca4 fix trailing whitespace
 add 07eacffb fix quoting table names
 add 7c2da50c pre-commit lint

No new revisions were added by this update.

Summary of changes:
 c/validation/adbc_validation_statement.cc  | 94 +-
 go/adbc/driver/snowflake/bulk_ingestion.go |  7 ++-
 go/adbc/driver/snowflake/statement.go  |  4 +-
 3 files changed, 58 insertions(+), 47 deletions(-)

(arrow-adbc) branch fix-snowflake-ci updated (44da2bda -> 6852eca4)

2024-02-22 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 44da2bda ci(driver/snowflake): fixing snowflake integration tests
 add 6852eca4 fix trailing whitespace

No new revisions were added by this update.

Summary of changes:
 c/validation/adbc_validation_connection.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(arrow-adbc) 01/01: ci(driver/snowflake): fixing snowflake integration tests

2024-02-22 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git

commit 44da2bdad54050cb134a15b932261404427c11be
Author: Matt Topol 
AuthorDate: Thu Feb 22 13:16:31 2024 -0500

ci(driver/snowflake): fixing snowflake integration tests
---
 c/validation/adbc_validation_connection.cc | 12 --
 go/adbc/driver/snowflake/connection.go |  8 ++--
 go/adbc/go.mod | 54 -
 go/adbc/go.sum | 65 +-
 4 files changed, 102 insertions(+), 37 deletions(-)

diff --git a/c/validation/adbc_validation_connection.cc 
b/c/validation/adbc_validation_connection.cc
index f9af084e..f828ef97 100644
--- a/c/validation/adbc_validation_connection.cc
+++ b/c/validation/adbc_validation_connection.cc
@@ -550,7 +550,7 @@ void ConnectionTest::TestMetadataGetObjectsDbSchemas() {
 ASSERT_NO_FATAL_FAILURE(CheckGetObjectsSchema());
 ASSERT_NO_FATAL_FAILURE(reader.Next());
 ASSERT_NE(nullptr, reader.array->release);
-ASSERT_GT(reader.array->length, 0);
+ASSERT_GE(reader.array->length, 0);
 do {
   for (int64_t row = 0; row < reader.array->length; row++) {
 struct ArrowArrayView* catalog_db_schemas_list = 
reader.array_view->children[1];
@@ -595,8 +595,12 @@ void ConnectionTest::TestMetadataGetObjectsTables() {
 ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
 ASSERT_NO_FATAL_FAILURE(CheckGetObjectsSchema());
 ASSERT_NO_FATAL_FAILURE(reader.Next());
-ASSERT_NE(nullptr, reader.array->release);
-ASSERT_GT(reader.array->length, 0);
+ASSERT_NE(nullptr, reader.array->release);
+if (expected.second) {
+  ASSERT_GT(reader.array->length, 0); 
+} else {
+  ASSERT_EQ(reader.array->length, 0);
+}
 bool found_expected_table = false;
 do {
   for (int64_t row = 0; row < reader.array->length; row++) {
@@ -670,7 +674,7 @@ void ConnectionTest::TestMetadataGetObjectsTablesTypes() {
 ASSERT_NO_FATAL_FAILURE(CheckGetObjectsSchema());
 ASSERT_NO_FATAL_FAILURE(reader.Next());
 ASSERT_NE(nullptr, reader.array->release);
-ASSERT_GT(reader.array->length, 0);
+ASSERT_GE(reader.array->length, 0);
 bool found_expected_table = false;
 do {
   for (int64_t row = 0; row < reader.array->length; row++) {
diff --git a/go/adbc/driver/snowflake/connection.go 
b/go/adbc/driver/snowflake/connection.go
index 5855c875..1de2b6ae 100644
--- a/go/adbc/driver/snowflake/connection.go
+++ b/go/adbc/driver/snowflake/connection.go
@@ -968,14 +968,14 @@ func (c *cnxn) GetTableSchema(ctx context.Context, 
catalog *string, dbSchema *st
defer rows.Close()
 
var (
-   name, typ, kind, isnull, primary, unique string
-   def, check, expr, comment, policyNamesql.NullString
-   fields   = []arrow.Field{}
+   name, typ, kind, isnull, primary, unique  string
+   def, check, expr, comment, policyName, privDomain sql.NullString
+   fields= 
[]arrow.Field{}
)
 
for rows.Next() {
err := rows.Scan(, , , , , , 
,
-   , , , )
+   , , , , )
if err != nil {
return nil, errToAdbcErr(adbc.StatusIO, err)
}
diff --git a/go/adbc/go.mod b/go/adbc/go.mod
index 6322f5f8..7e7b605e 100644
--- a/go/adbc/go.mod
+++ b/go/adbc/go.mod
@@ -23,13 +23,13 @@ require (
github.com/apache/arrow/go/v16 v16.0.0-20240129203910-c2ca9bcedeb0
github.com/bluele/gcache v0.0.2
github.com/golang/protobuf v1.5.3
-   github.com/google/uuid v1.3.1
-   github.com/snowflakedb/gosnowflake v1.7.2
+   github.com/google/uuid v1.6.0
+   github.com/snowflakedb/gosnowflake v1.8.0
github.com/stretchr/testify v1.8.4
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a
-   golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
+   golang.org/x/exp v0.0.0-20240213143201-ec583247a57a
golang.org/x/sync v0.6.0
-   golang.org/x/tools v0.17.0
+   golang.org/x/tools v0.18.0
google.golang.org/grpc v1.58.3
google.golang.org/protobuf v1.31.0
 )
@@ -37,26 +37,26 @@ require (
 require (
github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // 
indirect
github.com/99designs/keyring v1.2.2 // indirect
-   github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.1 // indirect
-   github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.1 // indirect
-   github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.1 // indirect
+   github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.2 // indirect
+   github.com/Azure/azure-sdk-for

(arrow-adbc) branch fix-snowflake-ci created (now 44da2bda)

2024-02-22 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch fix-snowflake-ci
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


  at 44da2bda ci(driver/snowflake): fixing snowflake integration tests

This branch includes the following new commits:

 new 44da2bda ci(driver/snowflake): fixing snowflake integration tests

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

(arrow) branch main updated: GH-39870: [Go] Include buffered pages in TotalBytesWritten (#40105)

2024-02-20 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 29a0581f5b GH-39870: [Go] Include buffered pages in TotalBytesWritten 
(#40105)
29a0581f5b is described below

commit 29a0581f5bfcad86a6493854f8be8fcb6ffe2fbc
Author: Matthew McNew 
AuthorDate: Tue Feb 20 19:59:57 2024 -0600

GH-39870: [Go] Include buffered pages in TotalBytesWritten (#40105)



### Rationale for this change

Currently, buffered data pages are not included in TotalBytesWritten this 
means that their is not an accurate estimate of the size of the current size.

### Are there any user-facing changes?
`RowGroupTotalBytesWritten` will include the TotalBytes in buffered 
DataPages minus the buffered data pages headers.

* Closes: #39870

Authored-by: Matthew McNew 
Signed-off-by: Matt Topol 
---
 go/parquet/file/column_writer.go  |  7 ++-
 go/parquet/file/column_writer_test.go | 14 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 4d603c547c..91f5d18942 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -198,7 +198,12 @@ func (w *columnWriter) TotalCompressedBytes() int64 {
 }
 
 func (w *columnWriter) TotalBytesWritten() int64 {
-   return w.totalBytesWritten
+   bufferedPagesBytes := int64(0)
+   for _, p := range w.pages {
+   bufferedPagesBytes += int64(len(p.Data()))
+   }
+
+   return w.totalBytesWritten + bufferedPagesBytes
 }
 
 func (w *columnWriter) RowsWritten() int {
diff --git a/go/parquet/file/column_writer_test.go 
b/go/parquet/file/column_writer_test.go
index dd597e280b..d78e1c6761 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -430,6 +430,11 @@ func (p *PrimitiveWriterTestSuite) 
testDictionaryFallbackEncoding(version parque
 }
 
 func (p *PrimitiveWriterTestSuite) 
testDictionaryFallbackAndCompressedSize(version parquet.Version) {
+   // skip boolean as dictionary encoding is not used
+   if p.Typ.Kind() == reflect.Bool {
+   return
+   }
+
p.GenerateData(SmallSize)
props := parquet.DefaultColumnProperties()
props.DictionaryEnabled = true
@@ -440,13 +445,14 @@ func (p *PrimitiveWriterTestSuite) 
testDictionaryFallbackAndCompressedSize(versi
props.Encoding = parquet.Encodings.RLEDict
}
 
-   writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
+   writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version), 
parquet.WithDataPageSize(SmallSize-1))
p.WriteBatchValues(writer, nil, nil)
+   p.NotZero(writer.TotalBytesWritten())
writer.FallbackToPlain()
-   p.NotEqual(0, writer.TotalCompressedBytes())
+   p.NotZero(writer.TotalCompressedBytes())
writer.Close()
-   p.NotEqual(0, writer.TotalCompressedBytes())
-   p.NotEqual(0, writer.TotalBytesWritten())
+   p.NotZero(writer.TotalCompressedBytes())
+   p.NotZero(writer.TotalBytesWritten())
 }
 
 func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {

(arrow) branch main updated (a690088193 -> 47f15b0708)

2024-02-20 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from a690088193 GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098)
 add 47f15b0708 GH-40113 [Go][Parquet] New RegisterCodec function (#40114)

No new revisions were added by this update.

Summary of changes:
 go/parquet/compress/brotli.go   |  2 +-
 go/parquet/compress/compress.go | 20 
 go/parquet/compress/gzip.go |  2 +-
 go/parquet/compress/snappy.go   |  2 +-
 go/parquet/compress/zstd.go |  2 +-
 5 files changed, 24 insertions(+), 4 deletions(-)

(arrow) branch main updated: GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098)

2024-02-20 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new a690088193 GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098)
a690088193 is described below

commit a690088193711447aa4d526f2257027f9a459efa
Author: wayne 
AuthorDate: Tue Feb 20 08:38:06 2024 -0700

GH-40097: [Go][FlightRPC] Enable disabling TLS (#40098)



See https://github.com/apache/arrow/issues/40097 for more in-depth 
description
about the problem that led me to file this PR.

### Rationale for this change

Because it's annoying to not be able to connect to a non-TLS flightsql 
endpoint
in my development environment just because my development environment 
happens
to still use token authentication.

### What changes are included in this PR?

Thread the flightsql `DriverConfig.TLSEnabled` parameter into the
`grpcCredentials` type so that `grpcCredentials.RequireTransportSecurity` 
can
return false if TLS is not enabled on the driver config.

One thing that occurred to me about the `DriverConfig.TLSEnabled` field is 
that
its semantics seem very mildly dangerous since golang `bool` types are 
`false`
by default and golang doesn't require fields on structs to be explicitly
initialized. It seems to me that `DriverConfig.TLSDisabled` would be better 
(semantically speaking)
because then the API user doesn't have to explicitly enable TLS. But I 
suppose
it's probably undesirable to change the name of a public field on a public 
type.

### Are these changes tested?

I haven't written any tests, mostly because there weren't already any tests 
for
the `grpcCredentials` type but I have manually verified this fixes the 
problem
I described in https://github.com/apache/arrow/issues/40097 by rebuilding my
tool and running it against the non-TLS listening thing in my development
environment.

### Are there any user-facing changes?

* Closes: #40097

Authored-by: wayne warren 
Signed-off-by: Matt Topol 
---
 go/arrow/flight/flightsql/driver/driver.go |  9 +
 go/arrow/flight/flightsql/driver/utils.go  | 11 ++-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/go/arrow/flight/flightsql/driver/driver.go 
b/go/arrow/flight/flightsql/driver/driver.go
index 852a97fb4d..65068048ab 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -364,10 +364,11 @@ func (c *Connector) Configure(config *DriverConfig) error 
{
 
// Set authentication credentials
rpcCreds := grpcCredentials{
-   username: config.Username,
-   password: config.Password,
-   token:config.Token,
-   params:   config.Params,
+   username:   config.Username,
+   password:   config.Password,
+   token:  config.Token,
+   params: config.Params,
+   tlsEnabled: config.TLSEnabled,
}
c.options = append(c.options, grpc.WithPerRPCCredentials(rpcCreds))
 
diff --git a/go/arrow/flight/flightsql/driver/utils.go 
b/go/arrow/flight/flightsql/driver/utils.go
index f7bd2a2e02..a99c045e2e 100644
--- a/go/arrow/flight/flightsql/driver/utils.go
+++ b/go/arrow/flight/flightsql/driver/utils.go
@@ -27,10 +27,11 @@ import (
 
 // *** GRPC helpers ***
 type grpcCredentials struct {
-   username string
-   password string
-   tokenstring
-   params   map[string]string
+   username   string
+   password   string
+   token  string
+   params map[string]string
+   tlsEnabled bool
 }
 
 func (g grpcCredentials) GetRequestMetadata(ctx context.Context, uri 
...string) (map[string]string, error) {
@@ -53,7 +54,7 @@ func (g grpcCredentials) GetRequestMetadata(ctx 
context.Context, uri ...string)
 }
 
 func (g grpcCredentials) RequireTransportSecurity() bool {
-   return g.token != "" || g.username != ""
+   return g.tlsEnabled && (g.token != "" || g.username != "")
 }
 
 // *** Type conversions ***

(arrow) branch main updated: GH-39910: [Go] Add func to load prepared statement from ActionCreatePreparedStatementResult (#39913)

2024-02-07 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new f609bb171a GH-39910: [Go] Add func to load prepared statement from 
ActionCreatePreparedStatementResult (#39913)
f609bb171a is described below

commit f609bb171a8bce973d7b040d8684b04a60e806ed
Author: abandy 
AuthorDate: Wed Feb 7 16:01:55 2024 -0500

GH-39910: [Go] Add func to load prepared statement from 
ActionCreatePreparedStatementResult (#39913)

Currently, in order to create a PreparedStatement a DoAction call will 
always be made via the client. I need to be able to make a PreparedStatement 
from persisted data that will not trigger the DoAction call to the server.
* Closes: #39910

Authored-by: Alva Bandy 
Signed-off-by: Matt Topol 
---
 go/arrow/flight/flightsql/client.go  | 65 
 go/arrow/flight/flightsql/client_test.go | 30 +++
 go/arrow/flight/flightsql/types.go   |  2 +
 3 files changed, 97 insertions(+)

diff --git a/go/arrow/flight/flightsql/client.go 
b/go/arrow/flight/flightsql/client.go
index 441f88f39f..068bfa84c3 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -450,6 +450,31 @@ func (c *Client) PrepareSubstrait(ctx context.Context, 
plan SubstraitPlan, opts
return parsePreparedStatementResponse(c, c.Alloc, stream)
 }
 
+func (c *Client) LoadPreparedStatementFromResult(result 
*CreatePreparedStatementResult) (*PreparedStatement, error) {
+   var (
+   err   error
+   dsSchema, paramSchema *arrow.Schema
+   )
+   if result.DatasetSchema != nil {
+   dsSchema, err = flight.DeserializeSchema(result.DatasetSchema, 
c.Alloc)
+   if err != nil {
+   return nil, err
+   }
+   }
+   if result.ParameterSchema != nil {
+   paramSchema, err = 
flight.DeserializeSchema(result.ParameterSchema, c.Alloc)
+   if err != nil {
+   return nil, err
+   }
+   }
+   return {
+   client:c,
+   handle:result.PreparedStatementHandle,
+   datasetSchema: dsSchema,
+   paramSchema:   paramSchema,
+   }, nil
+}
+
 func parsePreparedStatementResponse(c *Client, mem memory.Allocator, results 
pb.FlightService_DoActionClient) (*PreparedStatement, error) {
if err := results.CloseSend(); err != nil {
return nil, err
@@ -1027,6 +1052,46 @@ func (p *PreparedStatement) Execute(ctx context.Context, 
opts ...grpc.CallOption
return p.client.getFlightInfo(ctx, desc, opts...)
 }
 
+// ExecutePut calls DoPut for the prepared statement on the server. If 
SetParameters
+// has been called then the parameter bindings will be sent before execution.
+//
+// Will error if already closed.
+func (p *PreparedStatement) ExecutePut(ctx context.Context, opts 
...grpc.CallOption) error {
+   if p.closed {
+   return errors.New("arrow/flightsql: prepared statement already 
closed")
+   }
+
+   cmd := {PreparedStatementHandle: 
p.handle}
+
+   desc, err := descForCommand(cmd)
+   if err != nil {
+   return err
+   }
+
+   if p.hasBindParameters() {
+   pstream, err := p.client.Client.DoPut(ctx, opts...)
+   if err != nil {
+   return err
+   }
+
+   wr, err := p.writeBindParameters(pstream, desc)
+   if err != nil {
+   return err
+   }
+   if err = wr.Close(); err != nil {
+   return err
+   }
+   pstream.CloseSend()
+
+   // wait for the server to ack the result
+   if _, err = pstream.Recv(); err != nil && err != io.EOF {
+   return err
+   }
+   }
+
+   return nil
+}
+
 // ExecutePoll executes the prepared statement on the server and returns a 
PollInfo
 // indicating the progress of execution.
 //
diff --git a/go/arrow/flight/flightsql/client_test.go 
b/go/arrow/flight/flightsql/client_test.go
index c8b9f7f124..f35aeefcf4 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -665,6 +665,36 @@ func (s *FlightSqlClientSuite) TestRenewFlightEndpoint() {
s.Equal(, renewedEndpoint)
 }
 
+func (s *FlightSqlClientSuite) TestPreparedStatementLoadFromResult() {
+   const query = "query"
+
+   result := {
+   PreparedStatementHandle: []byte(query),
+   }
+   
+   parameterSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "p_id", 
Type: arrow.PrimitiveTypes.Int64, Nullable: true}

(arrow-adbc) branch main updated: feat(go/adbc/driver/snowflake): add '[ADBC]' to snowflake application name (#1525)

2024-02-07 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 21fba985 feat(go/adbc/driver/snowflake): add '[ADBC]' to snowflake 
application name (#1525)
21fba985 is described below

commit 21fba985231990fa9639d217157f8f5d38df47cf
Author: Matt Topol 
AuthorDate: Wed Feb 7 13:29:56 2024 -0500

feat(go/adbc/driver/snowflake): add '[ADBC]' to snowflake application name 
(#1525)

To help Snowflake track adoption and usage of the ADBC driver, we can
explicitly add a prefix to any client application name to indicate the
ADBC driver is the source of the requests.

-

Co-authored-by: David Li 
---
 go/adbc/driver/snowflake/snowflake_database.go | 8 
 python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py | 4 
 2 files changed, 12 insertions(+)

diff --git a/go/adbc/driver/snowflake/snowflake_database.go 
b/go/adbc/driver/snowflake/snowflake_database.go
index 7b76fa5a..76ab4684 100644
--- a/go/adbc/driver/snowflake/snowflake_database.go
+++ b/go/adbc/driver/snowflake/snowflake_database.go
@@ -176,6 +176,11 @@ func (d *databaseImpl) SetOptions(cnOptions 
map[string]string) error {
}
}
 
+   defaultAppName := "[ADBC][Go-" + infoDriverVersion + "]"
+   // set default application name to track
+   // unless user overrides it
+   d.cfg.Application = defaultAppName
+
var err error
for k, v := range cnOptions {
v := v // copy into loop scope
@@ -265,6 +270,9 @@ func (d *databaseImpl) SetOptions(cnOptions 
map[string]string) error {
}
d.cfg.ClientTimeout = dur
case OptionApplicationName:
+   if !strings.HasPrefix(v, "[ADBC]") {
+   v = defaultAppName + v
+   }
d.cfg.Application = v
case OptionSSLSkipVerify:
switch v {
diff --git a/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py 
b/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py
index 701de23e..19b3bbc1 100644
--- a/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py
+++ b/python/adbc_driver_snowflake/adbc_driver_snowflake/__init__.py
@@ -131,6 +131,10 @@ def connect(
 kwargs = (db_kwargs or {}).copy()
 if uri is not None:
 kwargs["uri"] = uri
+appname = kwargs.get(DatabaseOptions.APPLICATION_NAME.value, "")
+kwargs[
+DatabaseOptions.APPLICATION_NAME.value
+] = f"[ADBC][Python-{__version__}]{appname}"
 return adbc_driver_manager.AdbcDatabase(driver=_driver_path(), **kwargs)

(arrow-site) branch main updated: Add Powered By note for pantab (#471)

2024-02-05 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-site.git


The following commit(s) were added to refs/heads/main by this push:
 new ce3bc0b161c Add Powered By note for pantab (#471)
ce3bc0b161c is described below

commit ce3bc0b161c5f80137096fbcd07ac52321056bb3
Author: William Ayd 
AuthorDate: Mon Feb 5 23:41:52 2024 -0500

Add Powered By note for pantab (#471)

Hi - I built pantab about 6 years ago to write pandas DataFrames to a
proprietary database owned by Tableau called Hyper. Over time hacking
into pandas internals proved to be difficult, and I very recently
swapped over to using nanoarrow and the Arrow PyCapsule interface with
great results.

No hard feelings if there is not an appetite for this PR, but I figured
I'd put it out there to celebrate the awesomeness of Arrow
---
 powered_by.md | 5 +
 1 file changed, 5 insertions(+)

diff --git a/powered_by.md b/powered_by.md
index e5018fbfe79..1ba4278d0ea 100644
--- a/powered_by.md
+++ b/powered_by.md
@@ -154,6 +154,10 @@ short description of your use case.
 * **[pandas][12]:** data analysis toolkit for Python programmers. pandas
   supports reading and writing Parquet files using pyarrow. Several pandas
   core developers are also contributors to Apache Arrow.
+* **[pantab][52]:** Allows high performance read/writes of popular dataframe 
libraries
+  like pandas, polars pyarrow, etc... to/from Tableau's Hyper database. pantab 
uses nanoarrow
+  and the Arrow PyCapsule interface to make that exchange process seamless.
+  core developers are also contributors to Apache Arrow.
 * **[Parseable][51]:** Log analytics platform built for scale and usability. 
Ingest logs from anywhere and unify logs with Parseable. Parseable uses Arrow 
as the intermediary, in-memory data format for log data ingestion.
 * **[Perspective][23]:** Perspective is a streaming data visualization engine 
in JavaScript for building real-time & user-configurable analytics entirely in 
the browser.
 * **[Petastorm][28]:** Petastorm enables single machine or distributed training
@@ -262,3 +266,4 @@ short description of your use case.
 [49]: https://kaskada.io
 [50]: https://openobserve.ai
 [51]: https://parseable.io
+[52]: https://github.com/innobi/pantab

(arrow) branch main updated: GH-39769: [C++][Device] Fix Importing nested and string types for DeviceArray (#39770)

2024-02-05 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 26801f147a GH-39769: [C++][Device] Fix Importing nested and string 
types for DeviceArray (#39770)
26801f147a is described below

commit 26801f147a9e98bb6c5bc4e7131bdf1bc2794467
Author: Matt Topol 
AuthorDate: Mon Feb 5 15:29:06 2024 -0500

GH-39769: [C++][Device] Fix Importing nested and string types for 
DeviceArray (#39770)



### Rationale for this change
In my testing with libcudf and other GPU data, I discovered a deficiency in 
ImportDeviceArray and thus ImportDeviceRecordBatch where the device type and 
memory manager aren't propagated to child importers and it fails to import 
offset-based types such as strings.

### What changes are included in this PR?
These are relatively easily handled by first ensuring that `ImportChild` 
propagates the device_type and memory manager from the parent. Then for 
importing offset based values we merely need to use the memory manager to copy 
the final offset value to the CPU to use for the buffer size computation.

This will work for any device which has implemented CopyBufferTo/From

### Are these changes tested?
A new test is added to test these situations.

* Closes: #39769

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 cpp/src/arrow/c/bridge.cc  | 23 ---
 cpp/src/arrow/c/bridge_test.cc | 10 ++
 cpp/src/arrow/device.cc| 14 ++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 9b165a10a6..119249da99 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1543,6 +1543,8 @@ struct ArrayImporter {
 if (recursion_level_ >= kMaxImportRecursionLevel) {
   return Status::Invalid("Recursion level in ArrowArray struct exceeded");
 }
+device_type_ = parent->device_type_;
+memory_mgr_ = parent->memory_mgr_;
 // Child buffers will keep the entire parent import alive.
 // Perhaps we can move the child structs to an owned area
 // when the parent ImportedArrayData::Release() gets called,
@@ -1857,10 +1859,25 @@ struct ArrayImporter {
   template 
   Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
   int64_t byte_width = 1) {
-auto offsets = data_->GetValues(offsets_buffer_id);
+if (device_type_ == DeviceAllocationType::kCPU) {
+  auto offsets = data_->GetValues(offsets_buffer_id);
+  // Compute visible size of buffer
+  int64_t buffer_size =
+  (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 
0;
+  return ImportBuffer(buffer_id, buffer_size);
+}
+
+// we only need the value of the last offset so let's just copy that
+// one value from device to host.
+auto single_value_buf =
+SliceBuffer(data_->buffers[offsets_buffer_id],
+c_struct_->length * sizeof(OffsetType), 
sizeof(OffsetType));
+ARROW_ASSIGN_OR_RAISE(
+auto cpubuf, Buffer::ViewOrCopy(single_value_buf, 
default_cpu_memory_manager()));
+auto offsets = cpubuf->data_as();
 // Compute visible size of buffer
-int64_t buffer_size =
-(c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 
0;
+
 return ImportBuffer(buffer_id, buffer_size);
   }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 8b67027454..b8d5e0fcd3 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4320,6 +4320,16 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) {
   TestWithJSON(mm, int32(), "[4, 5, null]");
 }
 
+TEST_F(TestDeviceArrayRoundtrip, Struct) {
+  std::shared_ptr device = std::make_shared(1);
+  auto mm = device->default_memory_manager();
+  auto type = struct_({field("ints", int16()), field("strs", utf8())});
+
+  TestWithJSON(mm, type, "[]");
+  TestWithJSON(mm, type, R"([[4, "foo"], [5, "bar"]])");
+  TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])");
+}
+
 
 // Array stream export tests
 
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 616f89aae8..3736a4e018 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -195,6 +195,13 @@ Result> 
CPUMemoryManager::ViewBufferFrom(
   if (!from->is_cpu()) {
 return nullptr;
   }
+  // in this case the memory manager we're coming from is visible on the CPU,
+  // but uses

(arrow) branch main updated (5856421e31 -> 85e2a684b7)

2024-02-05 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 5856421e31 GH-39921: [Go][Parquet] ColumnWriter not reset 
TotalCompressedBytes after Flush (#39922)
 add 85e2a684b7 GH-39925: [Go][Parquet] Fix re-slicing in 
maybeReplaceValidity function (#39926)

No new revisions were added by this update.

Summary of changes:
 go/parquet/file/column_writer.go  |  5 -
 go/parquet/file/column_writer_test.go | 38 +++
 2 files changed, 42 insertions(+), 1 deletion(-)

(arrow) branch main updated: GH-39921: [Go][Parquet] ColumnWriter not reset TotalCompressedBytes after Flush (#39922)

2024-02-05 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 5856421e31 GH-39921: [Go][Parquet] ColumnWriter not reset 
TotalCompressedBytes after Flush (#39922)
5856421e31 is described below

commit 5856421e31b163104570d0305cb79f323cf488a6
Author: mwish 
AuthorDate: Mon Feb 5 23:14:48 2024 +0800

GH-39921: [Go][Parquet] ColumnWriter not reset TotalCompressedBytes after 
Flush (#39922)



### Rationale for this change

See https://github.com/apache/arrow/issues/39921

### What changes are included in this PR?

Not clearing `totalCompressedBytes` when flush called

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes, it's a bugfix

* Closes: #39921

Authored-by: mwish 
Signed-off-by: Matt Topol 
---
 go/parquet/file/column_writer.go  |  5 +++--
 go/parquet/file/column_writer_test.go | 28 
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index ac857d17e6..36663b10b8 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -397,7 +397,6 @@ func (w *columnWriter) FlushBufferedDataPages() (err error) 
{
}
}
w.pages = w.pages[:0]
-   w.totalCompressedBytes = 0
return
 }
 
@@ -542,7 +541,9 @@ func (w *columnWriter) Close() (err error) {
if !w.closed {
w.closed = true
if w.hasDict && !w.fallbackToNonDict {
-   w.WriteDictionaryPage()
+   if err = w.WriteDictionaryPage(); err != nil {
+   return err
+   }
}
 
if err = w.FlushBufferedDataPages(); err != nil {
diff --git a/go/parquet/file/column_writer_test.go 
b/go/parquet/file/column_writer_test.go
index 8011ac2487..321e7b730d 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -426,6 +426,26 @@ func (p *PrimitiveWriterTestSuite) 
testDictionaryFallbackEncoding(version parque
}
 }
 
+func (p *PrimitiveWriterTestSuite) 
testDictionaryFallbackAndCompressedSize(version parquet.Version) {
+   p.GenerateData(SmallSize)
+   props := parquet.DefaultColumnProperties()
+   props.DictionaryEnabled = true
+
+   if version == parquet.V1_0 {
+   props.Encoding = parquet.Encodings.PlainDict
+   } else {
+   props.Encoding = parquet.Encodings.RLEDict
+   }
+
+   writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
+   p.WriteBatchValues(writer, nil, nil)
+   writer.FallbackToPlain()
+   p.NotEqual(0, writer.TotalCompressedBytes())
+   writer.Close()
+   p.NotEqual(0, writer.TotalCompressedBytes())
+   p.NotEqual(0, writer.TotalBytesWritten())
+}
+
 func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {
p.testRequiredWithEncoding(parquet.Encodings.Plain)
 }
@@ -575,6 +595,14 @@ func (p *PrimitiveWriterTestSuite) 
TestDictionaryFallbackEncodingV2() {
p.testDictionaryFallbackEncoding(parquet.V2_LATEST)
 }
 
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV1() {
+   p.testDictionaryFallbackAndCompressedSize(parquet.V1_0)
+}
+
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV2() {
+   p.testDictionaryFallbackAndCompressedSize(parquet.V2_LATEST)
+}
+
 func (p *PrimitiveWriterTestSuite) TestOptionalNullValueChunk() {
// test case for NULL values
p.SetupSchema(parquet.Repetitions.Optional, 1)

(arrow) branch main updated: GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo memory types (#39772)

2024-02-01 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 87b515e920 GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo 
memory types (#39772)
87b515e920 is described below

commit 87b515e9207509aa3f77e3e1c0122be314a77e6d
Author: Matt Topol 
AuthorDate: Thu Feb 1 11:48:29 2024 -0500

GH-39771: [C++][Device] Generic CopyBatchTo/CopyArrayTo memory types 
(#39772)



### Rationale for this change
Right now our MemoryManager interfaces operate solely at the buffer level 
and we do not provide any higher level facilities to copy an entire array or 
record batch between memory types. We should implement CopyArrayTo and 
CopyBatchTo functions which recursively utilize the buffer level copying to 
create a new Array/RecordBatch whose buffers have been copied to the 
destination memory manager.

### What changes are included in this PR?
Exposing a `CopyArrayTo` and `CopyBatchTo` function for copying entire 
Array or RecordBatches between memory types.

### Are these changes tested?
Tests are still being written but will be added.

* Closes: #39771

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 cpp/src/arrow/array/array_base.cc| 12 +++
 cpp/src/arrow/array/array_base.h | 16 +++
 cpp/src/arrow/array/data.cc  | 39 
 cpp/src/arrow/array/data.h   | 19 +++---
 cpp/src/arrow/buffer.h   |  2 +-
 cpp/src/arrow/c/bridge.cc|  2 +-
 cpp/src/arrow/c/bridge_test.cc   |  4 +++-
 cpp/src/arrow/device.cc  |  2 ++
 cpp/src/arrow/gpu/cuda_context.cc|  5 +
 cpp/src/arrow/ipc/read_write_test.cc | 27 -
 cpp/src/arrow/record_batch.cc| 24 ++
 cpp/src/arrow/record_batch.h | 19 ++
 12 files changed, 142 insertions(+), 29 deletions(-)

diff --git a/cpp/src/arrow/array/array_base.cc 
b/cpp/src/arrow/array/array_base.cc
index b483ec420c..6927f51283 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -307,6 +307,18 @@ Result> Array::View(
   return MakeArray(result);
 }
 
+Result> Array::CopyTo(
+const std::shared_ptr& to) const {
+  ARROW_ASSIGN_OR_RAISE(auto copied_data, data()->CopyTo(to));
+  return MakeArray(copied_data);
+}
+
+Result> Array::ViewOrCopyTo(
+const std::shared_ptr& to) const {
+  ARROW_ASSIGN_OR_RAISE(auto new_data, data()->ViewOrCopyTo(to));
+  return MakeArray(new_data);
+}
+
 // --
 // NullArray
 
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 7e857bf205..6411aebf80 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -165,6 +165,22 @@ class ARROW_EXPORT Array {
   /// An error is returned if the types are not layout-compatible.
   Result> View(const std::shared_ptr& type) 
const;
 
+  /// \brief Construct a copy of the array with all buffers on destination
+  /// Memory Manager
+  ///
+  /// This method recursively copies the array's buffers and those of its 
children
+  /// onto the destination MemoryManager device and returns the new Array.
+  Result> CopyTo(const std::shared_ptr& 
to) const;
+
+  /// \brief Construct a new array attempting to zero-copy view if possible.
+  ///
+  /// Like CopyTo this method recursively goes through all of the array's 
buffers
+  /// and those of it's children and first attempts to create zero-copy
+  /// views on the destination MemoryManager device. If it can't, it falls back
+  /// to performing a copy. See Buffer::ViewOrCopy.
+  Result> ViewOrCopyTo(
+  const std::shared_ptr& to) const;
+
   /// Construct a zero-copy slice of the array with the indicated offset and
   /// length
   ///
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 8454ac8f1d..80c411dfa6 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -27,6 +27,7 @@
 
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
+#include "arrow/device.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -36,6 +37,7 @@
 #include "arrow/util/dict_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/range.h"
 #include "arrow/util/ree_util.h"
 #include "arrow/util/slice_util_internal.h"
 #include "arrow/util/union_util.h"
@@ -140,6 +142,43 @@ std::shared_ptr 
ArrayData::Make(std::shared_ptr type, int64
   return std::make_shared(std::move(type

(arrow) branch main updated: GH-39837: [Go][Flight] Allow cloning existing cookies in middleware (#39838)

2024-01-29 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new c2ca9bcede GH-39837: [Go][Flight] Allow cloning existing cookies in 
middleware (#39838)
c2ca9bcede is described below

commit c2ca9bcedeb004f9d7f5d3e1aafc7b83ce6c1e3f
Author: Matt Topol 
AuthorDate: Mon Jan 29 15:39:10 2024 -0500

GH-39837: [Go][Flight] Allow cloning existing cookies in middleware (#39838)



### Rationale for this change
This is needed for https://github.com/apache/arrow-adbc/issues/1194 to 
facilitate better connection handling for flight clients in ADBC by copying the 
existing cookies over when creating a sub-client.

### What changes are included in this PR?
Creating a `Clone` method on the `CookieMiddleware` so that a user can 
create and hold a reference to a specific cookie middleware instance and then 
create new ones on the fly that copy over the existing cookies at that moment.

### Are these changes tested?
Yes.

### Are there any user-facing changes?
No

* Closes: #39837

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 go/arrow/flight/cookie_middleware.go  | 24 +
 go/arrow/flight/cookie_middleware_test.go | 60 +++
 2 files changed, 84 insertions(+)

diff --git a/go/arrow/flight/cookie_middleware.go 
b/go/arrow/flight/cookie_middleware.go
index 27754a13b8..39c86d8303 100644
--- a/go/arrow/flight/cookie_middleware.go
+++ b/go/arrow/flight/cookie_middleware.go
@@ -23,6 +23,7 @@ import (
"sync"
"time"
 
+   "golang.org/x/exp/maps"
"google.golang.org/grpc/metadata"
 )
 
@@ -40,11 +41,34 @@ func NewClientCookieMiddleware() ClientMiddleware {
return CreateClientMiddleware({jar: 
make(map[string]http.Cookie)})
 }
 
+func NewCookieMiddleware() CookieMiddleware {
+   return {jar: make(map[string]http.Cookie)}
+}
+
+// CookieMiddleware is a go-routine safe middleware for flight clients
+// which properly handles Set-Cookie headers for storing cookies.
+// This can be passed into `CreateClientMiddleware` to create a new
+// middleware object. You can also clone it to create middleware for a
+// new client which starts with the same cookies.
+type CookieMiddleware interface {
+   CustomClientMiddleware
+   // Clone creates a new CookieMiddleware that starts out with the same
+   // cookies that this one already has. This is useful when creating a
+   // new client connection for the same server.
+   Clone() CookieMiddleware
+}
+
 type clientCookieMiddleware struct {
jar map[string]http.Cookie
mx  sync.Mutex
 }
 
+func (cc *clientCookieMiddleware) Clone() CookieMiddleware {
+   cc.mx.Lock()
+   defer cc.mx.Unlock()
+   return {jar: maps.Clone(cc.jar)}
+}
+
 func (cc *clientCookieMiddleware) StartCall(ctx context.Context) 
context.Context {
cc.mx.Lock()
defer cc.mx.Unlock()
diff --git a/go/arrow/flight/cookie_middleware_test.go 
b/go/arrow/flight/cookie_middleware_test.go
index 0adf492765..4007d056b2 100644
--- a/go/arrow/flight/cookie_middleware_test.go
+++ b/go/arrow/flight/cookie_middleware_test.go
@@ -239,3 +239,63 @@ func TestCookieExpiration(t *testing.T) {
cookieMiddleware.expectedCookies = map[string]string{}
makeReq(client, t)
 }
+
+func TestCookiesClone(t *testing.T) {
+   cookieMiddleware := {}
+
+   s := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+   flight.CreateServerMiddleware(cookieMiddleware),
+   })
+   s.Init("localhost:0")
+   f := {}
+   s.RegisterFlightService(f)
+
+   go s.Serve()
+   defer s.Shutdown()
+
+   makeReq := func(c flight.Client, t *testing.T) {
+   flightStream, err := c.ListFlights(context.Background(), 
{})
+   assert.NoError(t, err)
+
+   for {
+   _, err := flightStream.Recv()
+   if err != nil {
+   if errors.Is(err, io.EOF) {
+   break
+   }
+   assert.NoError(t, err)
+   }
+   }
+   }
+
+   credsOpt := grpc.WithTransportCredentials(insecure.NewCredentials())
+   cookies := flight.NewCookieMiddleware()
+   client1, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+   
[]flight.ClientMiddleware{flight.CreateClientMiddleware(cookies)}, credsOpt)
+   require.NoError(t, err)
+   defer client1.Close()
+
+   // set cookies
+   cookieMiddleware.cookies = []*http.Cookie{
+   {Name: "foo", Value: "bar"},
+   {Name: &

(arrow-adbc) branch main updated: feat(go/adbc/driver/snowflake): improve bulk ingestion speed (#1456)

2024-01-26 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new b57e19b6 feat(go/adbc/driver/snowflake): improve bulk ingestion speed 
(#1456)
b57e19b6 is described below

commit b57e19b6c3dfdc65230b4cf9e8689cab51e3e89c
Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com>
AuthorDate: Fri Jan 26 15:01:40 2024 -0500

feat(go/adbc/driver/snowflake): improve bulk ingestion speed (#1456)

# What

- Replace Snowflake bulk ingestion with Parquet-based approach with
higher throughput and better type support
- Previously: INSERT bind parameters were uploaded to a CSV-based stage,
once per record batch
- Now: Parquet files written concurrently to stage independently of
record batch size. Parquet logical types are used to infer schema on
COPY.
- Tests to validate type support and consistency through Arrow ->
Parquet -> Snowflake -> Arrow roundtrip
- Improved type mapping between Arrow <-> Snowflake timestamps.

[TIMESTAMP_LTZ](https://docs.snowflake.com/en/sql-reference/data-types-datetime#timestamp-ltz-timestamp-ntz-timestamp-tz)
is more consistent with Arrow timestamp semantics than TIMESTAMP_TZ,
which can lead to lossy roundtrips.
- Minor bugfix where Snowflake local timestamps with timezone set to UTC
were being interpreted as non-local.

# Why

- Implements #1327, which comes from improvement request #1322
- BindStream ingestion is significantly faster
- Arrow type support is improved

# Methodology

The general approach for ingestion is most clearly demonstrated by the
path taken when `stmt.Bind()` for a single record is used:
### IngestRecord
```mermaid
flowchart LR
A(Record) --> B(Write Parquet)
B --> C(Upload File)
C --> D(Execute COPY)
D --> E(Check Row Count)
```
The Arrow record is written to a Parquet file due to its logical type
support, compressibility, and native Snowflake support. The file is then
uploaded to a temporary Snowflake stage via PUT query, and then loaded
into the target table via COPY query. Once the COPY has finished, one
more query to check the resulting row count is dispatched to accurately
return the number of rows affected. This is used instead of counting the
Arrow rows written in case there are any undetected losses when
importing the uploaded file into Snowflake.

A similar approach is taken when ingesting an arbitrarily large stream
of records via `stmt.BindStream()`, but makes use of several
opportunities to parallelize the work involved at different stages:

### IngestStream
```mermaid
flowchart LR

A(Read Records) --> B(Write Parquet)

A --> C(Write Parquet)
A --> D(Write Parquet)
A --> E(Write Parquet)

B --> J(Buffer Pool)
C --> J
D --> J
E --> J

J --> K(Upload File)
J --> L(Upload File)

K --> M(Finalize COPY)
L --> M

M --> N(Check Row Count)


O(File Ready) --> P(Execute COPY)
P --> O
```
The same steps are used, but the stream of records is now distributed
among a pool of Parquet writers. This step is inherently CPU-bound, so
it is desirable for it to scale independently with the availability of
logical cores for writing/compression. These Parquet files are written
to a buffer pool in memory to help decouple the upload stage from
writing, and so that a writer can start working on the next file _while_
the last file it wrote is being uploaded. Uploads from the buffer pool
also benefit from parallelism, but more so to maximize network
utilization by limiting idle time between uploads and amortizing
potential slowdown in any one upload.

Technically, only a single COPY command is required after the last file
is uploaded in order to load the Parquet files into the Snowflake table.
However, on many warehouses this operation takes as long or even longer
than the upload itself but can be made faster by paying for a larger
warehouse. Given the batched approach taken and that the COPY command is
idempotent, we can execute COPY repeatedly as files are uploaded to load
them into the table on an ongoing basis. These COPY queries are executed
asynchronously and listen for an upload-completed callback to ensure at
least one file will be loaded by the query (otherwise it will no-op so
this just prevents spamming Snowflake with a bunch of no-op COPYs).

Empirically, ingestion works reasonably well on an XS warehouse. COPY
speed i

(arrow) branch main updated: GH-39774: [Go] Add public access to PreparedStatement handle (#39775)

2024-01-25 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new c97e6c46d9 GH-39774: [Go] Add public access to PreparedStatement 
handle (#39775)
c97e6c46d9 is described below

commit c97e6c46d969718e850d3fdeb7d77f998cc2342d
Author: abandy 
AuthorDate: Thu Jan 25 10:20:54 2024 -0500

GH-39774: [Go] Add public access to PreparedStatement handle (#39775)


* Closes: #39774

Authored-by: Alva Bandy 
Signed-off-by: Matt Topol 
---
 go/arrow/flight/flightsql/client.go  |  3 +++
 go/arrow/flight/flightsql/client_test.go | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/go/arrow/flight/flightsql/client.go 
b/go/arrow/flight/flightsql/client.go
index 928118cf29..441f88f39f 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -1165,6 +1165,9 @@ func (p *PreparedStatement) DatasetSchema() *arrow.Schema 
{ return p.datasetSche
 // the prepared statement.
 func (p *PreparedStatement) ParameterSchema() *arrow.Schema { return 
p.paramSchema }
 
+// The handle associated with this PreparedStatement
+func (p *PreparedStatement) Handle() []byte { return p.handle }
+
 // GetSchema re-requests the schema of the result set of the prepared
 // statement from the server. It should otherwise be identical to 
DatasetSchema.
 //
diff --git a/go/arrow/flight/flightsql/client_test.go 
b/go/arrow/flight/flightsql/client_test.go
index a4fb83f984..c8b9f7f124 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -384,6 +384,8 @@ func (s *FlightSqlClientSuite) 
TestPreparedStatementExecute() {
s.NoError(err)
defer prepared.Close(context.TODO(), s.callOpts...)
 
+   s.Equal(string(prepared.Handle()), "query")
+
info, err := prepared.Execute(context.TODO(), s.callOpts...)
s.NoError(err)
s.Equal(, info)
@@ -445,11 +447,15 @@ func (s *FlightSqlClientSuite) 
TestPreparedStatementExecuteParamBinding() {
s.NoError(err)
defer prepared.Close(context.TODO(), s.callOpts...)
 
+   s.Equal(string(prepared.Handle()), "query")
+
paramSchema := prepared.ParameterSchema()
rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, 
paramSchema, strings.NewReader(`[{"id": 1}]`))
s.NoError(err)
defer rec.Release()
 
+   s.Equal(string(prepared.Handle()), "query")
+
prepared.SetParameters(rec)
info, err := prepared.Execute(context.TODO(), s.callOpts...)
s.NoError(err)
@@ -517,6 +523,8 @@ func (s *FlightSqlClientSuite) 
TestPreparedStatementExecuteReaderBinding() {
s.NoError(err)
defer prepared.Close(context.TODO(), s.callOpts...)
 
+   s.Equal(string(prepared.Handle()), "query")
+
paramSchema := prepared.ParameterSchema()
rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, 
paramSchema, strings.NewReader(`[{"id": 1}]`))
s.NoError(err)
@@ -575,6 +583,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementClose() 
{
 
err = prepared.Close(context.TODO(), s.callOpts...)
s.NoError(err)
+
+   s.Equal(string(prepared.Handle()), "query")
 }
 
 func (s *FlightSqlClientSuite) TestExecuteUpdate() {

(arrow-adbc) branch main updated: chore(deps): Update Snowflake dependency (#1474)

2024-01-19 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new d78ce71d chore(deps): Update Snowflake dependency (#1474)
d78ce71d is described below

commit d78ce71d4bd2932335d8f943be4817c47de43f81
Author: Matt Topol 
AuthorDate: Fri Jan 19 13:44:39 2024 -0500

chore(deps): Update Snowflake dependency (#1474)

Updating snowflake Go driver and its dependencies, as mentioned in #1454
---
 go/adbc/driver/flightsql/flightsql_adbc_test.go |   2 +-
 go/adbc/go.mod  |  73 +-
 go/adbc/go.sum  | 171 +++-
 3 files changed, 117 insertions(+), 129 deletions(-)

diff --git a/go/adbc/driver/flightsql/flightsql_adbc_test.go 
b/go/adbc/driver/flightsql/flightsql_adbc_test.go
index dc7d207d..2fcc010d 100644
--- a/go/adbc/driver/flightsql/flightsql_adbc_test.go
+++ b/go/adbc/driver/flightsql/flightsql_adbc_test.go
@@ -260,7 +260,7 @@ func (s *FlightSQLQuirks) GetMetadata(code adbc.InfoCode) 
interface{} {
case adbc.InfoVendorVersion:
return "sqlite 3"
case adbc.InfoVendorArrowVersion:
-   return "14.0.0"
+   return "14.0.2"
}
 
return nil
diff --git a/go/adbc/go.mod b/go/adbc/go.mod
index 69e9eaf5..2eff02d9 100644
--- a/go/adbc/go.mod
+++ b/go/adbc/go.mod
@@ -20,16 +20,16 @@ module github.com/apache/arrow-adbc/go/adbc
 go 1.19
 
 require (
-   github.com/apache/arrow/go/v14 v14.0.0
+   github.com/apache/arrow/go/v14 v14.0.2
github.com/bluele/gcache v0.0.2
github.com/golang/protobuf v1.5.3
github.com/google/uuid v1.3.1
-   github.com/snowflakedb/gosnowflake v1.6.22
+   github.com/snowflakedb/gosnowflake v1.7.2
github.com/stretchr/testify v1.8.4
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a
-   golang.org/x/exp v0.0.0-20231006140011-7918f672742d
-   golang.org/x/sync v0.4.0
-   golang.org/x/tools v0.14.0
+   golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
+   golang.org/x/sync v0.6.0
+   golang.org/x/tools v0.17.0
google.golang.org/grpc v1.58.3
google.golang.org/protobuf v1.31.0
 )
@@ -37,32 +37,31 @@ require (
 require (
github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // 
indirect
github.com/99designs/keyring v1.2.2 // indirect
-   github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 // indirect
-   github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 // indirect
-   github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 // indirect
+   github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.1 // indirect
+   github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.1 // indirect
+   github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.2.1 // indirect
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // 
indirect
-   github.com/andybalholm/brotli v1.0.5 // indirect
-   github.com/apache/arrow/go/v12 v12.0.1 // indirect
-   github.com/apache/thrift v0.17.0 // indirect
-   github.com/aws/aws-sdk-go-v2 v1.19.0 // indirect
-   github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // 
indirect
-   github.com/aws/aws-sdk-go-v2/credentials v1.13.27 // indirect
-   github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.72 // indirect
-   github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.35 // indirect
-   github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.29 // indirect
-   github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.27 // indirect
-   github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 
// indirect
-   github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.30 // 
indirect
-   github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.29 // 
indirect
-   github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.4 // 
indirect
-   github.com/aws/aws-sdk-go-v2/service/s3 v1.37.0 // indirect
-   github.com/aws/smithy-go v1.13.5 // indirect
-   github.com/danieljoos/wincred v1.2.0 // indirect
+   github.com/andybalholm/brotli v1.1.0 // indirect
+   github.com/apache/thrift v0.19.0 // indirect
+   github.com/aws/aws-sdk-go-v2 v1.24.1 // indirect
+   github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect
+   github.com/aws/aws-sdk-go-v2/credentials v1.16.16 // indirect
+   github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.15.13 // indirect
+   github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.10 // indirect
+   github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.10 // indirect
+   github.com/aws/aws-sdk-go-v2/internal/v4a v1.2.10 // indirect
+   github.com/aws/aws-sdk

(arrow-adbc) branch main updated: feat(go/adbc)!: close database explicitly (#1460)

2024-01-19 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 3aa0d121 feat(go/adbc)!: close database explicitly  (#1460)
3aa0d121 is described below

commit 3aa0d12169764e2b0afabaf9b1f1f68c2d63aea8
Author: Anton Levakin <39916473+leva...@users.noreply.github.com>
AuthorDate: Fri Jan 19 17:09:54 2024 +0100

feat(go/adbc)!: close database explicitly  (#1460)

Implicit database release behaves inconsistently on different OS, which
leads to bugs.

BREAKING CHANGE: adds Close to the Database interface.
Closes #1306.

-

Co-authored-by: Matt Topol 
---
 docs/source/driver/duckdb.rst  |  1 +
 docs/source/driver/flight_sql.rst  |  1 +
 docs/source/driver/postgresql.rst  |  1 +
 docs/source/driver/snowflake.rst   |  2 +
 docs/source/driver/sqlite.rst  |  1 +
 go/adbc/adbc.go|  3 ++
 go/adbc/driver/driverbase/database.go  |  5 ++
 go/adbc/driver/driverbase/driver.go|  4 +-
 .../driver/flightsql/flightsql_adbc_server_test.go |  1 +
 go/adbc/driver/flightsql/flightsql_adbc_test.go|  9 
 go/adbc/driver/flightsql/flightsql_database.go | 20 ---
 go/adbc/driver/flightsql/flightsql_driver.go   |  1 +
 go/adbc/driver/panicdummy/panicdummy_adbc.go   |  5 ++
 go/adbc/driver/snowflake/connection.go |  6 +--
 go/adbc/driver/snowflake/driver.go |  1 +
 go/adbc/driver/snowflake/driver_test.go| 62 --
 go/adbc/driver/snowflake/snowflake_database.go |  4 ++
 go/adbc/drivermgr/wrapper.go   | 46 ++--
 go/adbc/drivermgr/wrapper_sqlite_test.go   |  5 ++
 go/adbc/pkg/_tmpl/driver.go.tmpl   | 11 ++--
 go/adbc/pkg/flightsql/driver.go| 11 ++--
 go/adbc/pkg/panicdummy/driver.go   |  1 +
 go/adbc/pkg/snowflake/driver.go| 11 ++--
 go/adbc/validation/validation.go   |  3 ++
 24 files changed, 148 insertions(+), 67 deletions(-)

diff --git a/docs/source/driver/duckdb.rst b/docs/source/driver/duckdb.rst
index 410331c3..94460eb5 100644
--- a/docs/source/driver/duckdb.rst
+++ b/docs/source/driver/duckdb.rst
@@ -72,6 +72,7 @@ ADBC support in DuckDB requires the driver manager.
 if err != nil {
// handle error
 }
+defer db.Close()
 
 cnxn, err := db.Open(context.Background())
 if err != nil {
diff --git a/docs/source/driver/flight_sql.rst 
b/docs/source/driver/flight_sql.rst
index aca95d86..7473a7cb 100644
--- a/docs/source/driver/flight_sql.rst
+++ b/docs/source/driver/flight_sql.rst
@@ -152,6 +152,7 @@ the :cpp:class:`AdbcDatabase`.
 if err != nil {
 // do something with the error
 }
+defer db.Close()
 
 cnxn, err := db.Open(context.Background())
 if err != nil {
diff --git a/docs/source/driver/postgresql.rst 
b/docs/source/driver/postgresql.rst
index ddf9115d..c724a2c1 100644
--- a/docs/source/driver/postgresql.rst
+++ b/docs/source/driver/postgresql.rst
@@ -124,6 +124,7 @@ the :cpp:class:`AdbcDatabase`.  This should be a 
`connection URI
 if err != nil {
// handle error
 }
+defer db.Close()
 
 cnxn, err := db.Open(context.Background())
 if err != nil {
diff --git a/docs/source/driver/snowflake.rst b/docs/source/driver/snowflake.rst
index 04023a62..bf445349 100644
--- a/docs/source/driver/snowflake.rst
+++ b/docs/source/driver/snowflake.rst
@@ -127,6 +127,7 @@ constructing the :cpp::class:`AdbcDatabase`.
 if err != nil {
 // handle error
 }
+defer db.Close()
 
 cnxn, err := db.Open(context.Background())
 if err != nil {
@@ -241,6 +242,7 @@ a listing).
 if err != nil {
 // handle error
 }
+defer db.Close()
 
 cnxn, err := db.Open(context.Background())
 if err != nil {
diff --git a/docs/source/driver/sqlite.rst b/docs/source/driver/sqlite.rst
index 30e7d32b..96bd7bbd 100644
--- a/docs/source/driver/sqlite.rst
+++ b/docs/source/driver/sqlite.rst
@@ -140,6 +140,7 @@ shared across all connections.
 if err != nil {
// handle error
 }
+defer db.Close()
 
 cnxn, err := db.Open(context.Background())
 if err != nil {
diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go
index 3fb61d69..71a75daf 100644
--- a/go/adbc/adbc.go
+++ b/go/adbc/adbc.go
@@ -329,6 +329,9 @@ type Driver interface {
 type Database int

(arrow) branch main updated (858574d0bd -> 55afcf0450)

2024-01-18 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 858574d0bd GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp 
Instant/Local Semantics (#39467)
 add 55afcf0450 GH-39672: [Go] Time to Date32/Date64 conversion issues for 
non-UTC timezones (#39674)

No new revisions were added by this update.

Summary of changes:
 go/arrow/compute/internal/kernels/cast_temporal.go |  8 
 go/arrow/datatype_fixedwidth.go| 10 --
 go/arrow/datatype_fixedwidth_test.go   | 10 ++
 3 files changed, 18 insertions(+), 10 deletions(-)

(arrow) branch main updated: GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp Instant/Local Semantics (#39467)

2024-01-18 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 858574d0bd GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp 
Instant/Local Semantics (#39467)
858574d0bd is described below

commit 858574d0bd1f3ef4157d0446cfb05cef05aac96b
Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com>
AuthorDate: Thu Jan 18 11:09:50 2024 -0500

GH-39466: [Go][Parquet] Align Arrow and Parquet Timestamp Instant/Local 
Semantics (#39467)



### Rationale for this change

Closes: #39466

### What changes are included in this PR?

- Update logic for determining whether an Arrow Timestamp should have 
`isAdjustedToUTC=true` on conversion to Parquet.
- Update conversion from Parquet Timestamp to Arrow Timestamp to align with 
Parquet Format 
[backward-compatibilty](https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485)
 rules.
- Refactor Timestamp serialization methods to reduce duplicated code

### Are these changes tested?

Yes,
- Logical type mapping in existing test updated.
- New tests for roundtrip behavior of timestamps with various timezone 
settings, with/without store_schema enabled.
- New test to clarify equality behavior of timestamps with instant 
semantics, as well as Go-related quirks with timezone-unaware timestamps.

### Are there any user-facing changes?

Yes, users of `pqarrow.FileWriter` will produce Parquet files in which the 
`TIMESTAMP` type is normalized to UTC IFF the Arrow type provided has a 
timezone specified. This is different from the current Go behavior but aligned 
that of other implementations.

The conversion from Parquet to Arrow has been updated as well to reflect 
the Parquet format 
[document](https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485).
 Rust already 
[implements](https://github.com/apache/arrow-rs/blob/a61e824abdd7b38ea214828480430ff2a13f2ead/parquet/src/arrow/schema/primitive.rs#L211-L239)
 the spec as described and #39489 has been reported due to a mismatch in the 
handling of convertedTypes in C++.

* Closes: #39466

Authored-by: Joel Lubinitsky 
Signed-off-by: Matt Topol 
---
 go/arrow/array/timestamp.go | 11 +++---
 go/arrow/array/timestamp_test.go| 49 ++-
 go/arrow/datatype_fixedwidth.go | 19 +++--
 go/parquet/pqarrow/encode_arrow_test.go | 70 +
 go/parquet/pqarrow/schema.go| 13 +++---
 go/parquet/pqarrow/schema_test.go   |  6 +--
 6 files changed, 140 insertions(+), 28 deletions(-)

diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go
index 6ffb43e067..0cc46a127f 100644
--- a/go/arrow/array/timestamp.go
+++ b/go/arrow/array/timestamp.go
@@ -91,16 +91,15 @@ func (a *Timestamp) ValueStr(i int) string {
return NullValueStr
}
 
-   dt := a.DataType().(*arrow.TimestampType)
-   z, _ := dt.GetZone()
-   return a.values[i].ToTime(dt.Unit).In(z).Format("2006-01-02 
15:04:05.9Z0700")
+   toTime, _ := a.DataType().(*arrow.TimestampType).GetToTimeFunc()
+   return toTime(a.values[i]).Format("2006-01-02 15:04:05.9Z0700")
 }
 
 func (a *Timestamp) GetOneForMarshal(i int) interface{} {
-   if a.IsNull(i) {
-   return nil
+   if val := a.ValueStr(i); val != NullValueStr {
+   return val
}
-   return 
a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 
15:04:05.9")
+   return nil
 }
 
 func (a *Timestamp) MarshalJSON() ([]byte, error) {
diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go
index acbad8b586..c172ad811d 100644
--- a/go/arrow/array/timestamp_test.go
+++ b/go/arrow/array/timestamp_test.go
@@ -234,7 +234,7 @@ func TestTimestampBuilder_Resize(t *testing.T) {
assert.Equal(t, 5, ab.Len())
 }
 
-func TestTimestampValueStr(t *testing.T) { 
+func TestTimestampValueStr(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)
 
@@ -251,3 +251,50 @@ func TestTimestampValueStr(t *testing.T) {
assert.Equal(t, "1968-11-30 13:30:45-0700", arr.ValueStr(0))
assert.Equal(t, "2016-02-29 10:42:23-0700", arr.ValueStr(1))
 }
+
+func TestTimestampEquality(t *testing.T) {
+   mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+   defer mem.AssertSize(t, 0)
+
+   tsDatatypes := []*arrow.TimestampType{
+   {Unit: arrow.Second},
+   {Unit: arrow.Secon

(arrow) branch main updated: GH-39552: [Go] inclusion of option to use replacer when creating csv strings with go library (#39576)

2024-01-17 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new c170af41ba GH-39552: [Go] inclusion of option to use replacer when 
creating csv strings with go library (#39576)
c170af41ba is described below

commit c170af41ba0c30b80aa4172da0b3637206368cf2
Author: Jânio 
AuthorDate: Wed Jan 17 14:00:39 2024 -0300

GH-39552: [Go] inclusion of option to use replacer when creating csv 
strings with go library (#39576)

Rationale for this change
Make it possible to remove unwanted characters from strings

What changes are included in this PR?
Add new function to optionally  setup a replacer in csv Writer  Write method

Are these changes tested?
Yes

Are there any user-facing changes?
Adds an optional methods.

* Closes: #39552

Lead-authored-by: Jânio 
Co-authored-by: janiodev 
Signed-off-by: Matt Topol 
---
 go/arrow/csv/common.go  | 14 ++
 go/arrow/csv/transformer.go | 12 ++--
 go/arrow/csv/writer.go  | 24 +---
 go/arrow/csv/writer_test.go |  6 --
 4 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 99dac29f4d..31ca61f323 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -21,6 +21,7 @@ package csv
 import (
"errors"
"fmt"
+   "strings"
 
"github.com/apache/arrow/go/v15/arrow"
"github.com/apache/arrow/go/v15/arrow/memory"
@@ -223,6 +224,19 @@ func WithIncludeColumns(cols []string) Option {
}
 }
 
+// WithStringsReplacer receives a replacer to be applied in the string fields
+// of the CSV. This is useful to remove unwanted characters from the string.
+func WithStringsReplacer(replacer *strings.Replacer) Option {
+   return func(cfg config) {
+   switch cfg := cfg.(type) {
+   case *Writer:
+   cfg.stringReplacer = replacer.Replace
+   default:
+   panic(fmt.Errorf("arrow/csv: unknown config type %T", 
cfg))
+   }
+   }
+}
+
 func validate(schema *arrow.Schema) {
for i, f := range schema.Fields() {
switch ft := f.Type.(type) {
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 0f0181520b..78b16446d4 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -29,7 +29,7 @@ import (
"github.com/apache/arrow/go/v15/arrow/array"
 )
 
-func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) 
[]string {
+func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, 
stringsReplacer func(string)string) []string {
res := make([]string, col.Len())
switch typ.(type) {
case *arrow.BooleanType:
@@ -144,7 +144,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
arr := col.(*array.String)
for i := 0; i < arr.Len(); i++ {
if arr.IsValid(i) {
-   res[i] = arr.Value(i)
+   res[i] = stringsReplacer(arr.Value(i))
} else {
res[i] = w.nullValue
}
@@ -153,7 +153,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
arr := col.(*array.LargeString)
for i := 0; i < arr.Len(); i++ {
if arr.IsValid(i) {
-   res[i] = arr.Value(i)
+   res[i] = stringsReplacer(arr.Value(i))
} else {
res[i] = w.nullValue
}
@@ -224,7 +224,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
var b bytes.Buffer
b.Write([]byte{'{'})
writer := csv.NewWriter()
-   
writer.Write(w.transformColToStringArr(list.DataType(), list))
+   
writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
writer.Flush()
b.Truncate(b.Len() - 1)
b.Write([]byte{'}'})
@@ -243,7 +243,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
var b bytes.Buffer
b.Write([]byte{'{'})
writer := csv.NewWriter()
-   
writer.Write(w.transformColToStr

(arrow) branch main updated: GH-35718: [Go][Parquet] Fix for null-only encoding panic (#39497)

2024-01-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new f0879ed354 GH-35718: [Go][Parquet] Fix for null-only encoding panic 
(#39497)
f0879ed354 is described below

commit f0879ed3544bb12ee306eae1fb5c6d924dde02ed
Author: MagicBoost <39663879+magicbo...@users.noreply.github.com>
AuthorDate: Wed Jan 10 06:02:04 2024 +0800

GH-35718: [Go][Parquet] Fix for null-only encoding panic (#39497)



### Rationale for this change

closes: #35718

### What changes are included in this PR?

Fix painc writing with DeltaBinaryPacked or DeltaByteArray when column only 
has nulls

### Are these changes tested?

Yes

- add a test writing nulls to columns with DeltaBinaryPacked / 
DeltaByteArray / DeltaLengthByteArray encodings

### Are there any user-facing changes?

No

* Closes: #35718

Lead-authored-by: yufanmo 
Co-authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 go/parquet/internal/encoding/delta_byte_array.go | 10 +++-
 go/parquet/pqarrow/encode_arrow_test.go  | 58 
 2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/go/parquet/internal/encoding/delta_byte_array.go 
b/go/parquet/internal/encoding/delta_byte_array.go
index 5e5002e34a..18bd12015a 100644
--- a/go/parquet/internal/encoding/delta_byte_array.go
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -40,7 +40,15 @@ type DeltaByteArrayEncoder struct {
 }
 
 func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 {
-   return enc.prefixEncoder.EstimatedDataEncodedSize() + 
enc.suffixEncoder.EstimatedDataEncodedSize()
+   prefixEstimatedSize := int64(0)
+   if enc.prefixEncoder != nil {
+   prefixEstimatedSize = 
enc.prefixEncoder.EstimatedDataEncodedSize()
+   }
+   suffixEstimatedSize := int64(0)
+   if enc.suffixEncoder != nil {
+   suffixEstimatedSize = 
enc.suffixEncoder.EstimatedDataEncodedSize()
+   }
+   return prefixEstimatedSize + suffixEstimatedSize
 }
 
 func (enc *DeltaByteArrayEncoder) initEncoders() {
diff --git a/go/parquet/pqarrow/encode_arrow_test.go 
b/go/parquet/pqarrow/encode_arrow_test.go
index 3a8fef7e5a..75eb965d03 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -473,6 +473,64 @@ func TestWriteEmptyLists(t *testing.T) {
require.NoError(t, err)
 }
 
+func TestWriteAllNullsWithDeltaEncoding(t *testing.T) {
+   sc := arrow.NewSchema([]arrow.Field{
+   {Name: "f1", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+   {Name: "f2", Type: arrow.ListOf(arrow.FixedWidthTypes.Date32)},
+   {Name: "f3", Type: arrow.BinaryTypes.String, Nullable: true},
+   {Name: "f4", Type: arrow.ListOf(arrow.BinaryTypes.String)},
+   {Name: "f5", Type: arrow.BinaryTypes.LargeString, Nullable: 
true},
+   {Name: "f6", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)},
+   {Name: "f7", Type: arrow.PrimitiveTypes.Float64, Nullable: 
true},
+   {Name: "f8", Type: arrow.ListOf(arrow.FixedWidthTypes.Date64)},
+   {Name: "f9", Type: arrow.BinaryTypes.String, Nullable: true},
+   {Name: "f10", Type: 
arrow.ListOf(arrow.BinaryTypes.LargeString)},
+   {Name: "f11", Type: arrow.FixedWidthTypes.Boolean, Nullable: 
true},
+   {Name: "f12", Type: 
arrow.ListOf(arrow.FixedWidthTypes.Boolean)},
+   {Name: "f13", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
+   {Name: "f14", Type: arrow.ListOf(arrow.PrimitiveTypes.Float32)},
+   }, nil)
+   bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
+   defer bldr.Release()
+   for _, b := range bldr.Fields() {
+   b.AppendNull()
+   }
+
+   rec := bldr.NewRecord()
+   defer rec.Release()
+
+   props := parquet.NewWriterProperties(
+   parquet.WithVersion(parquet.V1_0),
+   parquet.WithDictionaryDefault(false),
+   parquet.WithDictionaryFor("f9", true),
+   parquet.WithDictionaryFor("f10", true),
+   parquet.WithDictionaryFor("f13", true),
+   parquet.WithDictionaryFor("f14", true),
+   parquet.WithEncodingFor("f1", 
parquet.Encodings.DeltaBinaryPacked),
+   parquet.WithEncodingFor("f2", 
parquet.Encodings.DeltaBinaryPacked),
+   parquet.WithEncodingFor("f3", parquet.Encodings.DeltaByteArra

(arrow) branch main updated: GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to Parquet DATE Logical Type (#39460)

2024-01-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new eade9383fb GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to 
Parquet DATE Logical Type (#39460)
eade9383fb is described below

commit eade9383fb237006390c91cc90e52724262f7dd1
Author: Joel Lubinitsky <33523178+joell...@users.noreply.github.com>
AuthorDate: Tue Jan 9 16:59:02 2024 -0500

GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to Parquet DATE Logical 
Type (#39460)



### Rationale for this change

Closes: #39456

### What changes are included in this PR?

Update physical and logical type mapping from Arrow to Parquet for DATE64 
type

### Are these changes tested?

Yes,
- Update expected schema mapping in existing test
- Tests asserting new behavior
  - Arrow DATE64 will roundtrip -> Parquet -> Arrow as DATE32
  - Arrow DATE64 _not aligned_ to exact date boundary will truncate to 
milliseconds at boundary of greatest full day on Parquet roundtrip

### Are there any user-facing changes?

Yes, users of `pqarrow.FileWriter` will produce Parquet files containing 
`DATE` logical type instead of `TIMESTAMP[ms]` when writing Arrow data 
containing DATE64 field(s). The proposed implementation truncates `int64` 
values to be divisible by 8640 rather than validating that this is already 
the case, as some implementations do. I am happy to add this validation if it 
would be preferred, but the truncating behavior will likely break fewer 
existing users.

I'm not sure whether this is technically considered a breaking change to a 
public API and if/how it should be communicated. Any direction regarding this 
would be appreciated.

* Closes: #39456

Authored-by: Joel Lubinitsky 
Signed-off-by: Matt Topol 
---
 go/parquet/pqarrow/encode_arrow_test.go | 84 +
 go/parquet/pqarrow/schema.go|  4 +-
 go/parquet/pqarrow/schema_test.go   |  2 +-
 3 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/go/parquet/pqarrow/encode_arrow_test.go 
b/go/parquet/pqarrow/encode_arrow_test.go
index 565fa3b6b2..3a8fef7e5a 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -125,6 +125,52 @@ func makeDateTimeTypesTable(mem memory.Allocator, expected 
bool, addFieldMeta bo
return array.NewTable(arrsc, cols, int64(len(isValid)))
 }
 
+func makeDateTypeTable(mem memory.Allocator, expected bool, partialDays bool) 
arrow.Table {
+   const (
+   millisPerHour int64 = 1000 * 60 * 60
+   millisPerDay  int64 = millisPerHour * 24
+   )
+   isValid := []bool{true, true, true, false, true, true}
+
+   var field arrow.Field
+   if expected {
+   field = arrow.Field{Name: "date", Type: 
arrow.FixedWidthTypes.Date32, Nullable: true}
+   } else {
+   field = arrow.Field{Name: "date", Type: 
arrow.FixedWidthTypes.Date64, Nullable: true}
+   }
+
+   field.Metadata = arrow.NewMetadata([]string{"PARQUET:field_id"}, 
[]string{"1"})
+
+   arrsc := arrow.NewSchema([]arrow.Field{field}, nil)
+
+   d32Values := []arrow.Date32{1489269000, 148927, 1489271000, 
1489272000, 1489272000, 1489273000}
+
+   d64Values := make([]arrow.Date64, len(d32Values))
+   for i := range d64Values {
+   // Calculate number of milliseconds at date boundary
+   d64Values[i] = arrow.Date64(int64(d32Values[i]) * millisPerDay)
+   if partialDays {
+   // Offset 1 or more hours past the date boundary
+   hoursIntoDay := int64(i) * millisPerHour
+   d64Values[i] += arrow.Date64(hoursIntoDay)
+   }
+   }
+
+   bldr := array.NewRecordBuilder(mem, arrsc)
+   defer bldr.Release()
+
+   if expected {
+   bldr.Field(0).(*array.Date32Builder).AppendValues(d32Values, 
isValid)
+   } else {
+   bldr.Field(0).(*array.Date64Builder).AppendValues(d64Values, 
isValid)
+   }
+
+   rec := bldr.NewRecord()
+   defer rec.Release()
+
+   return array.NewTableFromRecords(arrsc, []arrow.Record{rec})
+}
+
 func TestWriteArrowCols(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)
@@ -831,6 +877,44 @@ func (ps *ParquetIOTestSuite) 
TestDateTimeTypesWithInt96ReadWriteTable() {
}
 }
 
+func (ps *ParquetIOTestSuite) TestDate64ReadWriteTable() {
+   mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+   defer mem.AssertSize(ps.T(), 0)
+
+   date64InputTable := makeDateTypeTable(mem, false, false)
+   defer date64In

(arrow) branch main updated: GH-38988: [Go] Expose dictionary size from DictionaryBuilder (#39521)

2024-01-09 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 92520c67b4 GH-38988: [Go] Expose dictionary size from 
DictionaryBuilder (#39521)
92520c67b4 is described below

commit 92520c67b4fbeddf5a0c4e829ce2ca0bf54adccd
Author: ella-chao 
AuthorDate: Tue Jan 9 21:25:24 2024 +

GH-38988: [Go] Expose dictionary size from DictionaryBuilder (#39521)



### Rationale for this change

Details are in https://github.com/apache/arrow/issues/38988

### What changes are included in this PR?

This adds a method to `DictionaryBuilder` that returns the current 
dictionary size.

### Are these changes tested?

Updated an existing test to account for this new method.

### Are there any user-facing changes?

Yes, a new method is added to `DictionaryBuilder`.

* Closes: #38988

Authored-by: Ella Chao 
Signed-off-by: Matt Topol 
---
 go/arrow/array/dictionary.go  | 5 +
 go/arrow/array/dictionary_test.go | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go
index 125c02391f..bbde4e4f1e 100644
--- a/go/arrow/array/dictionary.go
+++ b/go/arrow/array/dictionary.go
@@ -412,6 +412,7 @@ type DictionaryBuilder interface {
AppendArray(arrow.Array) error
AppendIndices([]int, []bool)
ResetFull()
+   DictionarySize() int
 }
 
 type dictionaryBuilder struct {
@@ -1004,6 +1005,10 @@ func (b *dictionaryBuilder) AppendIndices(indices []int, 
valid []bool) {
}
 }
 
+func (b *dictionaryBuilder) DictionarySize() int {
+   return b.memoTable.Size()
+}
+
 type NullDictionaryBuilder struct {
dictionaryBuilder
 }
diff --git a/go/arrow/array/dictionary_test.go 
b/go/arrow/array/dictionary_test.go
index 5a3e0e10c2..f32cc9555f 100644
--- a/go/arrow/array/dictionary_test.go
+++ b/go/arrow/array/dictionary_test.go
@@ -92,6 +92,8 @@ func (p *PrimitiveDictionaryTestSuite) 
TestDictionaryBuilderBasic() {
p.EqualValues(4, bldr.Len())
p.EqualValues(1, bldr.NullN())
 
+   p.EqualValues(2, bldr.DictionarySize())
+
arr := bldr.NewArray().(*array.Dictionary)
defer arr.Release()

(arrow) branch main updated: GH-39309: [Go][Parquet] handle nil bitWriter for DeltaBinaryPacked (#39347)

2024-01-08 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 0aadd5a0e7 GH-39309: [Go][Parquet] handle nil bitWriter for 
DeltaBinaryPacked (#39347)
0aadd5a0e7 is described below

commit 0aadd5a0e7fd58e286f2d0f03b8bdbf99a1d3108
Author: Matt Topol 
AuthorDate: Mon Jan 8 15:28:15 2024 -0500

GH-39309: [Go][Parquet] handle nil bitWriter for DeltaBinaryPacked (#39347)



### Rationale for this change
If using the DeltaBinaryPacked encoding, we end up with a nil pointer 
dereference if we end up with an empty column.

### What changes are included in this PR?
Add a nil check in `EstimatedDataEncodedSize` for the base 
`deltaBitPackEncoder`. This should only ever occur if we have an empty column 
with this encoding when closing a row group.

### Are these changes tested?
Yes a unit test was added to verify the fix.

* Closes: #39309

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 go/parquet/internal/encoding/delta_bit_packing.go |  4 +++
 go/parquet/pqarrow/encode_arrow_test.go   | 37 +++
 2 files changed, 41 insertions(+)

diff --git a/go/parquet/internal/encoding/delta_bit_packing.go 
b/go/parquet/internal/encoding/delta_bit_packing.go
index 560b77f4c6..6ac799f1c1 100644
--- a/go/parquet/internal/encoding/delta_bit_packing.go
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -466,6 +466,10 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, 
error) {
 
 // EstimatedDataEncodedSize returns the current amount of data actually 
flushed out and written
 func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
+   if enc.bitWriter == nil {
+   return 0
+   }
+
return int64(enc.bitWriter.Written())
 }
 
diff --git a/go/parquet/pqarrow/encode_arrow_test.go 
b/go/parquet/pqarrow/encode_arrow_test.go
index 95ea644dd8..565fa3b6b2 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -1983,3 +1983,40 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 
require.Zero(t, mem.CurrentAlloc())
 }
+
+func TestEmptyListDeltaBinaryPacked(t *testing.T) {
+   schema := arrow.NewSchema([]arrow.Field{
+   {Name: "ts", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint64),
+   Metadata: 
arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})}}, nil)
+   builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+   defer builder.Release()
+
+   listBuilder := builder.Field(0).(*array.ListBuilder)
+   listBuilder.Append(true)
+   arrowRec := builder.NewRecord()
+   defer arrowRec.Release()
+
+   var buf bytes.Buffer
+   wr, err := pqarrow.NewFileWriter(schema, ,
+   parquet.NewWriterProperties(
+   parquet.WithDictionaryFor("ts.list.element", false),
+   parquet.WithEncodingFor("ts.list.element", 
parquet.Encodings.DeltaBinaryPacked)),
+   pqarrow.DefaultWriterProps())
+   require.NoError(t, err)
+
+   require.NoError(t, wr.WriteBuffered(arrowRec))
+   require.NoError(t, wr.Close())
+
+   rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
+   require.NoError(t, err)
+   reader, err := pqarrow.NewFileReader(rdr, 
pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
+   require.NoError(t, err)
+   defer rdr.Close()
+
+   tbl, err := reader.ReadTable(context.Background())
+   require.NoError(t, err)
+   defer tbl.Release()
+
+   assert.True(t, schema.Equal(tbl.Schema()))
+   assert.EqualValues(t, 1, tbl.NumRows())
+}

(arrow) branch main updated: GH-38458: [Go] Add ValueLen to BinaryLike interface (#39242)

2024-01-03 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 213cadbbc0 GH-38458: [Go] Add ValueLen to BinaryLike interface (#39242)
213cadbbc0 is described below

commit 213cadbbc080399b372291f9305fe0e67de1
Author: Matt Topol 
AuthorDate: Wed Jan 3 11:29:15 2024 -0500

GH-38458: [Go] Add ValueLen to BinaryLike interface (#39242)



### Rationale for this change
Adding `ValueLen` to the `BinaryLike` interface for easy convenience of 
determining the length of an individual value for a Binary/String like array.

### Are these changes tested?
yes

* Closes: #38458

Authored-by: Matt Topol 
Signed-off-by: Matt Topol 
---
 go/arrow/array/binary.go |  9 +
 go/arrow/array/string.go | 17 +
 2 files changed, 26 insertions(+)

diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index c226297da0..9e26de7a6d 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -30,6 +30,7 @@ import (
 
 type BinaryLike interface {
arrow.Array
+   ValueLen(int) int
ValueBytes() []byte
ValueOffset64(int) int64
 }
@@ -367,6 +368,11 @@ func (a *BinaryView) Value(i int) []byte {
return buf.Bytes()[start : start+int32(s.Len())]
 }
 
+func (a *BinaryView) ValueLen(i int) int {
+   s := a.ValueHeader(i)
+   return s.Len()
+}
+
 // ValueString returns the value at index i as a string instead of
 // a byte slice, without copying the underlying data.
 func (a *BinaryView) ValueString(i int) string {
@@ -441,4 +447,7 @@ var (
_ arrow.Array = (*Binary)(nil)
_ arrow.Array = (*LargeBinary)(nil)
_ arrow.Array = (*BinaryView)(nil)
+
+   _ BinaryLike = (*Binary)(nil)
+   _ BinaryLike = (*LargeBinary)(nil)
 )
diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go
index 90a4628f0d..c8517ba305 100644
--- a/go/arrow/array/string.go
+++ b/go/arrow/array/string.go
@@ -31,6 +31,7 @@ import (
 type StringLike interface {
arrow.Array
Value(int) string
+   ValueLen(int) int
 }
 
 // String represents an immutable sequence of variable-length UTF-8 strings.
@@ -225,6 +226,14 @@ func (a *LargeString) ValueOffset64(i int) int64 {
return a.ValueOffset(i)
 }
 
+func (a *LargeString) ValueLen(i int) int {
+   if i < 0 || i >= a.array.data.length {
+   panic("arrow/array: index out of range")
+   }
+   beg := a.array.data.offset + i
+   return int(a.offsets[beg+1] - a.offsets[beg])
+}
+
 func (a *LargeString) ValueOffsets() []int64 {
beg := a.array.data.offset
end := beg + a.array.data.length + 1
@@ -364,6 +373,11 @@ func (a *StringView) Value(i int) string {
return *(*string)(unsafe.Pointer())
 }
 
+func (a *StringView) ValueLen(i int) int {
+   s := a.ValueHeader(i)
+   return s.Len()
+}
+
 func (a *StringView) String() string {
var o strings.Builder
o.WriteString("[")
@@ -698,4 +712,7 @@ var (
_ StringLikeBuilder = (*StringBuilder)(nil)
_ StringLikeBuilder = (*LargeStringBuilder)(nil)
_ StringLikeBuilder = (*StringViewBuilder)(nil)
+   _ StringLike= (*String)(nil)
+   _ StringLike= (*LargeString)(nil)
+   _ StringLike= (*StringView)(nil)
 )

(arrow) branch main updated: GH-39238:[Go] PATCH Prevents empty record to be appended to empty resultset (#39239)

2023-12-15 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 75c6b642b5 GH-39238:[Go] PATCH Prevents empty record to be appended to 
empty resultset (#39239)
75c6b642b5 is described below

commit 75c6b642b5ff1ed171bc1d1a758a70098539c48e
Author: Miguel Pragier 
AuthorDate: Fri Dec 15 20:03:18 2023 +0100

GH-39238:[Go] PATCH Prevents empty record to be appended to empty resultset 
(#39239)

### Rationale for this change
When having an empty resultset, the driver tries to include an empty record 
referece, that cannot be scanned.

So, any operation that relies on the returned Row(s) will trigger a "Index 
out of Range" error.

### What changes are included in this PR?
We're preventing to include an invalid record (that can't be scanned) in an 
empty resultset

### Are these changes tested?
Yes, there's a new test included

### Are there any user-facing changes?
No

**This PR contains a "Critical Fix".**
* Closes: #39238

Authored-by: miguel pragier 
Signed-off-by: Matt Topol 
---
 go/arrow/flight/flightsql/driver/driver.go  |  7 ++--
 go/arrow/flight/flightsql/driver/driver_test.go | 44 +
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/go/arrow/flight/flightsql/driver/driver.go 
b/go/arrow/flight/flightsql/driver/driver.go
index e31e572586..f74bfa378a 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -487,9 +487,10 @@ func readEndpoint(ctx context.Context, client 
*flightsql.Client, endpoint *fligh
schema := reader.Schema()
var records []arrow.Record
for reader.Next() {
-   record := reader.Record()
-   record.Retain()
-   records = append(records, record)
+   if record := reader.Record(); record.NumRows() > 0 {
+   record.Retain()
+   records = append(records, record)
+   }
}
 
if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) {
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go 
b/go/arrow/flight/flightsql/driver/driver_test.go
index a388bf155e..24eb5ee681 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -273,6 +273,50 @@ func (s *SqlTestSuite) TestQuery() {
wg.Wait()
 }
 
+func (s *SqlTestSuite) TestQueryWithEmptyResultset() {
+   t := s.T()
+
+   // Create and start the server
+   server, addr, err := s.createServer()
+   require.NoError(t, err)
+
+   var wg sync.WaitGroup
+   wg.Add(1)
+   go func() {
+   defer wg.Done()
+   require.NoError(s.T(), s.startServer(server))
+   }()
+   defer s.stopServer(server)
+   time.Sleep(100 * time.Millisecond)
+
+   // Configure client
+   cfg := s.Config
+   cfg.Address = addr
+   db, err := sql.Open("flightsql", cfg.DSN())
+   require.NoError(t, err)
+   defer db.Close()
+
+   // Create the table
+   _, err = db.Exec(fmt.Sprintf(s.Statements["create table"], s.TableName))
+   require.NoError(t, err)
+
+   rows, err := db.Query(fmt.Sprintf(s.Statements["query"], s.TableName))
+   require.NoError(t, err)
+   require.False(t, rows.Next())
+
+   row := db.QueryRow(fmt.Sprintf(s.Statements["query"], s.TableName))
+   require.NotNil(t, row)
+   require.NoError(t, row.Err())
+
+   target := make(map[string]any)
+   err = row.Scan()
+   require.ErrorIs(t, err, sql.ErrNoRows)
+
+   // Tear-down server
+   s.stopServer(server)
+   wg.Wait()
+}
+
 func (s *SqlTestSuite) TestPreparedQuery() {
t := s.T()

(arrow) branch main updated: GH-38506: [Go][Parquet] Add NumRows and RowGroupNumRows to pqarrow.FileWriter (#38507)

2023-12-08 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 140ae018f3 GH-38506: [Go][Parquet] Add NumRows and RowGroupNumRows to 
pqarrow.FileWriter (#38507)
140ae018f3 is described below

commit 140ae018f372ee14c9ff19f3e4c2af1b1a579f49
Author: Tim Schaub 
AuthorDate: Fri Dec 8 20:06:32 2023 +0100

GH-38506: [Go][Parquet] Add NumRows and RowGroupNumRows to 
pqarrow.FileWriter (#38507)

### Rationale for this change

When using a chunked column reader to read from one Parquet file and a 
chunked column writer to write to another Parquet file, it can be useful to 
keep track of the number of rows written.

### What changes are included in this PR?

This branch adds a new `RowGroupNumRows` method to the 
`pqarrow.FileWriter`.  This is somewhat similar to the existing 
`RowGroupTotalBytesWritten` function.

### Are these changes tested?

A new `file_writer_test.go` file is added that adds a test for the new 
method.

### Are there any user-facing changes?

The new method is exported and documented.

* Closes: #38506

Authored-by: Tim Schaub 
Signed-off-by: Matt Topol 
---
 go/parquet/pqarrow/file_writer.go  | 17 +++
 go/parquet/pqarrow/file_writer_test.go | 89 ++
 2 files changed, 106 insertions(+)

diff --git a/go/parquet/pqarrow/file_writer.go 
b/go/parquet/pqarrow/file_writer.go
index bc484ba243..1164cd690c 100644
--- a/go/parquet/pqarrow/file_writer.go
+++ b/go/parquet/pqarrow/file_writer.go
@@ -134,6 +134,23 @@ func (fw *FileWriter) RowGroupTotalBytesWritten() int64 {
return 0
 }
 
+// RowGroupNumRows returns the number of rows written to the current row group.
+// Returns an error if they are unequal between columns that have been written 
so far.
+func (fw *FileWriter) RowGroupNumRows() (int, error) {
+   if fw.rgw != nil {
+   return fw.rgw.NumRows()
+   }
+   return 0, nil
+}
+
+// NumRows returns the total number of rows that have been written so far.
+func (fw *FileWriter) NumRows() int {
+   if fw.wr != nil {
+   return fw.wr.NumRows()
+   }
+   return 0
+}
+
 // WriteBuffered will either append to an existing row group or create a new 
one
 // based on the record length and max row group length.
 //
diff --git a/go/parquet/pqarrow/file_writer_test.go 
b/go/parquet/pqarrow/file_writer_test.go
new file mode 100644
index 00..0b76733a62
--- /dev/null
+++ b/go/parquet/pqarrow/file_writer_test.go
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pqarrow_test
+
+import (
+   "bytes"
+   "strings"
+   "testing"
+
+   "github.com/apache/arrow/go/v15/arrow"
+   "github.com/apache/arrow/go/v15/arrow/array"
+   "github.com/apache/arrow/go/v15/arrow/memory"
+   "github.com/apache/arrow/go/v15/parquet"
+   "github.com/apache/arrow/go/v15/parquet/pqarrow"
+   "github.com/stretchr/testify/assert"
+   "github.com/stretchr/testify/require"
+)
+
+func TestFileWriterRowGroupNumRows(t *testing.T) {
+   schema := arrow.NewSchema([]arrow.Field{
+   {Name: "one", Nullable: true, Type: 
arrow.PrimitiveTypes.Float64},
+   {Name: "two", Nullable: true, Type: 
arrow.PrimitiveTypes.Float64},
+   }, nil)
+
+   data := `[
+   {"one": 1, "two": 2},
+   {"one": 1, "two": null},
+   {"one": null, "two": 2},
+   {"one": null, "two": null}
+   ]`
+   record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, 
strings.NewReader(data))
+   require.NoError(t, err)
+
+   output := {}
+   writerProps := 
parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(100))
+   writer, err := pqarrow.NewFileWriter(sc

(arrow-adbc) branch main updated: fix(go/adbc/sqldriver): Fix nil pointer panics for query parameters (#1342)

2023-12-05 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 491ab8d4 fix(go/adbc/sqldriver): Fix nil pointer panics for query 
parameters (#1342)
491ab8d4 is described below

commit 491ab8d4638391e5b60c32f1e29c5fe3bacbd0f5
Author: William 
AuthorDate: Tue Dec 5 20:20:34 2023 +0100

fix(go/adbc/sqldriver): Fix nil pointer panics for query parameters (#1342)

Thought I would contribute some fixes I've been using locally for the
issues described in #1341

I have no previous experience with this repository or with the Arrow
memory model so I would say it's likely I've gotten something wrong.
Feel free to ask me to improve on my contributions or merely take them
as inspiration for some other fix.

Resolves #1341
---
 go/adbc/sqldriver/driver.go| 11 +++-
 go/adbc/sqldriver/driver_internals_test.go | 95 ++
 go/adbc/sqldriver/driver_test.go   |  6 +-
 3 files changed, 106 insertions(+), 6 deletions(-)

diff --git a/go/adbc/sqldriver/driver.go b/go/adbc/sqldriver/driver.go
index 4b83495f..775f3f78 100644
--- a/go/adbc/sqldriver/driver.go
+++ b/go/adbc/sqldriver/driver.go
@@ -22,6 +22,7 @@ import (
"database/sql"
"database/sql/driver"
"errors"
+   "fmt"
"io"
"reflect"
"strconv"
@@ -463,16 +464,20 @@ func arrFromVal(val any) arrow.Array {
case []byte:
dt = arrow.BinaryTypes.Binary
buffers[1] = 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, int32(len(v))}))
-   buffers[2] = memory.NewBufferBytes(v)
+   buffers = append(buffers, memory.NewBufferBytes(v))
case string:
dt = arrow.BinaryTypes.String
buffers[1] = 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, int32(len(v))}))
var buf = *(*[]byte)(unsafe.Pointer())
(*reflect.SliceHeader)(unsafe.Pointer()).Cap = len(v)
-   buffers[2] = memory.NewBufferBytes(buf)
+   buffers = append(buffers, memory.NewBufferBytes(buf))
+   default:
+   panic(fmt.Sprintf("unsupported type %T", val))
}
for _, b := range buffers {
-   defer b.Release()
+   if b != nil {
+   defer b.Release()
+   }
}
data := array.NewData(dt, 1, buffers, nil, 0, 0)
defer data.Release()
diff --git a/go/adbc/sqldriver/driver_internals_test.go 
b/go/adbc/sqldriver/driver_internals_test.go
index 8e9ce565..9981a40d 100644
--- a/go/adbc/sqldriver/driver_internals_test.go
+++ b/go/adbc/sqldriver/driver_internals_test.go
@@ -19,6 +19,7 @@ package sqldriver
 
 import (
"database/sql/driver"
+   "encoding/base64"
"fmt"
"strings"
"testing"
@@ -273,3 +274,97 @@ func TestNextRowTypes(t *testing.T) {
})
}
 }
+
+func TestArrFromVal(t *testing.T) {
+   tests := []struct {
+   value   any
+   expectedDataTypearrow.DataType
+   expectedStringValue string
+   }{
+   {
+   value:   true,
+   expectedDataType:arrow.FixedWidthTypes.Boolean,
+   expectedStringValue: "true",
+   },
+   {
+   value:   int8(1),
+   expectedDataType:arrow.PrimitiveTypes.Int8,
+   expectedStringValue: "1",
+   },
+   {
+   value:   uint8(1),
+   expectedDataType:arrow.PrimitiveTypes.Uint8,
+   expectedStringValue: "1",
+   },
+   {
+   value:   int16(1),
+   expectedDataType:arrow.PrimitiveTypes.Int16,
+   expectedStringValue: "1",
+   },
+   {
+   value:   uint16(1),
+   expectedDataType:arrow.PrimitiveTypes.Uint16,
+   expectedStringValue: "1",
+   },
+   {
+   value:   int32(1),
+   expectedDataType:arrow.PrimitiveTypes.Int32,
+   expectedStringValue: "1",
+   },
+   {
+   value:   uint32(1),
+   expectedDataType:arrow.PrimitiveTypes.

(arrow) branch main updated: GH-38918: [Go] Avoid schema.Fields allocations in some places (#38919)

2023-11-28 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 82be2555ab GH-38918: [Go] Avoid schema.Fields allocations in some 
places (#38919)
82be2555ab is described below

commit 82be2555abc6b06085976548ea3b77ba3f888a35
Author: Alfonso Subiotto Marqués 
AuthorDate: Tue Nov 28 21:58:56 2023 +0100

GH-38918: [Go] Avoid schema.Fields allocations in some places (#38919)



### Rationale for this change

Unnecessary allocations.

### What changes are included in this PR?

This PR is split into several commits. The first addresses allocations in 
the `dictutils` package, the second adds `NumFields` to `NestedType` so that 
the third commit, which is a purely mechanical change from `len(type.Fields())` 
to `type.NumFields` to avoid allocations in these specific cases can pass tests 
with no further changes.

The last commit removes some Fields allocations that specifically hurt our 
project. Note that this is not an all-encompassing change (therefore this PR 
should probably not close the linked issue).

### Are these changes tested?

These changes are implicitly tested by the existing test-suite. No 
functionality has been changed and they should be invisible to the user.

### Are there any user-facing changes?

No.

* Addresses: #38918
* Closes: #38918

Authored-by: Alfonso Subiotto Marques 
Signed-off-by: Matt Topol 
---
 go/arrow/array/concat.go  |  2 +-
 go/arrow/array/record.go  | 12 ++--
 go/arrow/array/struct.go  |  2 +-
 go/arrow/array/table.go   |  8 
 go/arrow/array/union.go   |  4 ++--
 go/arrow/array/util.go|  2 +-
 go/arrow/cdata/cdata_exports.go   |  2 +-
 go/arrow/compute/cast.go  |  4 ++--
 go/arrow/compute/exec/span.go |  2 +-
 go/arrow/compute/exprs/builders.go|  2 +-
 go/arrow/compute/exprs/exec.go|  4 ++--
 go/arrow/compute/exprs/types.go   |  2 +-
 go/arrow/compute/fieldref_test.go |  4 ++--
 go/arrow/datatype_encoded.go  |  2 ++
 go/arrow/datatype_extension.go|  7 +++
 go/arrow/datatype_nested.go   | 16 
 go/arrow/datatype_nested_test.go  |  2 +-
 go/arrow/flight/flightsql/driver/driver.go|  2 +-
 go/arrow/flight/flightsql/example/sql_batch_reader.go |  2 +-
 go/arrow/internal/arrjson/arrjson.go  |  8 
 go/arrow/internal/dictutils/dict.go   | 11 ---
 go/arrow/ipc/file_reader.go   | 10 +-
 go/arrow/ipc/metadata.go  | 10 +-
 go/arrow/scalar/nested.go | 16 
 go/arrow/scalar/scalar.go |  8 
 go/arrow/scalar/scalar_test.go|  2 +-
 go/arrow/schema.go|  6 +++---
 go/arrow/schema_test.go   |  6 +++---
 go/parquet/pqarrow/file_reader.go |  2 +-
 go/parquet/pqarrow/schema.go  |  8 
 30 files changed, 99 insertions(+), 69 deletions(-)

diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
index fa3554c1c0..f0bc2855eb 100644
--- a/go/arrow/array/concat.go
+++ b/go/arrow/array/concat.go
@@ -695,7 +695,7 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) 
(arr arrow.ArrayData,
}
out.childData = []arrow.ArrayData{children}
case *arrow.StructType:
-   out.childData = make([]arrow.ArrayData, len(dt.Fields()))
+   out.childData = make([]arrow.ArrayData, dt.NumFields())
for i := range dt.Fields() {
children := gatherChildren(data, i)
for _, c := range children {
diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go
index d080f726e4..f25e7c9a87 100644
--- a/go/arrow/array/record.go
+++ b/go/arrow/array/record.go
@@ -185,7 +185,7 @@ func (rec *simpleRecord) validate() error {
return nil
}
 
-   if len(rec.arrs) != len(rec.schema.Fields()) {
+   if len(rec.arrs) != rec.schema.NumFields() {
return fmt.Errorf("arrow/array: number of columns/fields 
mismatch")
}
 
@@ -285,11 +285,11 @@ func NewRecordBuilder(mem memory.Allocator, schema 
*arrow.Schema) *RecordBuilder
refCount: 1,

(arrow) branch main updated (5ab60eaea3 -> b0e1f748f5)

2023-11-27 Thread zeroshade

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 5ab60eaea3 GH-36760: [Go] Add Avro OCF reader (#37115)
 add b0e1f748f5 GH-38728: [Go] ipc: put lz4 decompression buffers back into 
sync.Pool (#38729)

No new revisions were added by this update.

Summary of changes:
 go/arrow/ipc/compression.go |  4 +-
 go/arrow/ipc/file_reader.go |  1 +
 go/arrow/ipc/reader_test.go | 90 +
 3 files changed, 94 insertions(+), 1 deletion(-)

1 2 3 4 5 6 >

1 - 100 of 510 matches

Mail list logo