(arrow-adbc) branch asf-site updated: publish documentation

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 7581c1a48 publish documentation
7581c1a48 is described below

commit 7581c1a48cfb07ab6acaf7d66171153230a80178
Author: github-actions[bot] 
AuthorDate: Fri Jun 14 05:41:58 2024 +

publish documentation
---
 main/java/api/allclasses-index.html | 2 +-
 main/java/api/allpackages-index.html| 2 +-
 main/java/api/constant-values.html  | 2 +-
 main/java/api/deprecated-list.html  | 2 +-
 main/java/api/help-doc.html | 2 +-
 main/java/api/index-all.html| 2 +-
 main/java/api/index.html| 2 +-
 .../org/apache/arrow/adbc/core/AdbcConnection.GetObjectsDepth.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcConnection.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcDatabase.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcDriver.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcException.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcInfoCode.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcOptions.html   | 2 +-
 .../org/apache/arrow/adbc/core/AdbcStatement.PartitionResult.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/AdbcStatement.QueryResult.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/AdbcStatement.UpdateResult.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcStatement.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcStatusCode.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/BulkIngestMode.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/ErrorDetail.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/IsolationLevel.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/PartitionDescriptor.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/StandardSchemas.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/StandardStatistics.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/TypedKey.html  | 2 +-
 .../arrow/adbc/core/class-use/AdbcConnection.GetObjectsDepth.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcConnection.html| 2 +-
 .../java/api/org/apache/arrow/adbc/core/class-use/AdbcDatabase.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/AdbcDriver.html  | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcException.html | 2 +-
 .../java/api/org/apache/arrow/adbc/core/class-use/AdbcInfoCode.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/AdbcOptions.html | 2 +-
 .../arrow/adbc/core/class-use/AdbcStatement.PartitionResult.html| 2 +-
 .../apache/arrow/adbc/core/class-use/AdbcStatement.QueryResult.html | 2 +-
 .../arrow/adbc/core/class-use/AdbcStatement.UpdateResult.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcStatement.html | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcStatusCode.html| 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/BulkIngestMode.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/ErrorDetail.html | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/IsolationLevel.html| 2 +-
 .../org/apache/arrow/adbc/core/class-use/PartitionDescriptor.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/StandardSchemas.html   | 2 +-
 .../org/apache/arrow/adbc/core/class-use/StandardStatistics.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/TypedKey.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/package-summary.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/package-tree.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/package-use.html   | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/BaseFlightReader.html| 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightInfoReader.html| 2 +-
 .../arrow/adbc/driver/flightsql/FlightSqlClientWithCallOptions.html | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightSqlConnection.html | 2 +-
 .../arrow/adbc/driver/flightsql/FlightSqlConnectionProperties.html  | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightSqlDatabase.html   | 2 +-
 .../api/org/apache/arrow/adbc/driver/flightsql/FlightSqlDriver.html | 2 +-
 .../apache/arrow/adbc/driver/flightsql/FlightSqlDriverFactory.html  | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightSqlStatement.html  | 2 +-
 .../arrow/adbc/driver/flightsql/class-use/BaseFlightReader.html | 2 +-
 .../arrow/adbc/driver/flightsql/class-use/FlightInfoReader.html | 2 +-
 

(arrow-adbc) branch main updated: chore(dev/release): test latest Python in verify-release-candidate (#1918)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new f121f12a8 chore(dev/release): test latest Python in 
verify-release-candidate (#1918)
f121f12a8 is described below

commit f121f12a8c6f536390af5b3765c46954116f0b42
Author: David Li 
AuthorDate: Fri Jun 14 14:31:35 2024 +0900

chore(dev/release): test latest Python in verify-release-candidate (#1918)

Fixes #1251.
---
 dev/release/verify-release-candidate.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh 
b/dev/release/verify-release-candidate.sh
index 825a450d4..4fc335e2f 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -318,9 +318,7 @@ install_conda() {
 maybe_setup_conda() {
   # Optionally setup conda environment with the passed dependencies
   local env="conda-${CONDA_ENV:-source}"
-  # XXX(https://github.com/apache/arrow-adbc/issues/1247): no duckdb for
-  # python 3.12 on conda-forge right now
-  local pyver=${PYTHON_VERSION:-3.11}
+  local pyver=${PYTHON_VERSION:-3}
 
   if [ "${USE_CONDA}" -gt 0 ]; then
 show_info "Configuring Conda environment..."



(arrow) branch main updated: GH-42128: [Packaging][CentOS] Migrate CentOS 7 and CentOS Stream 8 packaging jobs to use vault.centos.org (#42129)

2024-06-13 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 69e8a78c01 GH-42128: [Packaging][CentOS] Migrate CentOS 7 and CentOS 
Stream 8 packaging jobs to use vault.centos.org (#42129)
69e8a78c01 is described below

commit 69e8a78c018da88b60f9eb2b3b45703f81f3c93d
Author: Bryce Mecum 
AuthorDate: Thu Jun 13 20:45:31 2024 -0800

GH-42128: [Packaging][CentOS] Migrate CentOS 7 and CentOS Stream 8 
packaging jobs to use vault.centos.org (#42129)

### Rationale for this change

Fixes https://github.com/apache/arrow/issues/42128.

### What changes are included in this PR?

- Updates to the CentOS 7 and CentOS Stream 8 Dockerfiles we use in the 
Crossbow packaging job to point to vault.centos.org so they continue to run now 
that both distros are EOL

### Are these changes tested?

Yes, I successfully built both updated Dockerfiles locally.

### Are there any user-facing changes?

No.
* GitHub Issue: #42128

Lead-authored-by: Bryce Mecum 
Co-authored-by: Sutou Kouhei 
Signed-off-by: Sutou Kouhei 
---
 dev/release/verify-yum.sh| 12 
 .../apache-arrow-release/yum/centos-7/Dockerfile |  8 
 .../apache-arrow-release/yum/centos-8-stream/Dockerfile  |  8 
 .../linux-packages/apache-arrow/yum/centos-7/Dockerfile  |  8 
 .../apache-arrow/yum/centos-8-stream/Dockerfile  |  8 
 5 files changed, 44 insertions(+)

diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh
index a3904169bb..aad82af21f 100755
--- a/dev/release/verify-yum.sh
+++ b/dev/release/verify-yum.sh
@@ -64,6 +64,16 @@ uninstall_command="dnf remove -y"
 clean_command="dnf clean"
 info_command="dnf info --enablerepo=crb"
 
+# GH-42128
+# Switch all repos to point to to vault.centos.org, use for EOL distros
+fix_eol_repositories() {
+  sed -i \
+-e 's/^mirrorlist/#mirrorlist/' \
+-e 's/^#baseurl/baseurl/' \
+-e 's/mirror\.centos\.org/vault.centos.org/' \
+/etc/yum.repos.d/*.repo
+}
+
 echo "::group::Prepare repository"
 
 case "${distribution}-${distribution_version}" in
@@ -99,6 +109,7 @@ case "${distribution}-${distribution_version}" in
 uninstall_command="yum remove -y"
 clean_command="yum clean"
 info_command="yum info"
+fix_eol_repositories
 ;;
   centos-8)
 distribution_prefix="centos"
@@ -106,6 +117,7 @@ case "${distribution}-${distribution_version}" in
 ruby_devel_packages+=(redhat-rpm-config)
 install_command="dnf install -y --enablerepo=powertools"
 info_command="dnf info --enablerepo=powertools"
+fix_eol_repositories
 ;;
   centos-*)
 distribution_prefix="centos"
diff --git 
a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile 
b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
index 236b0e2972..2634e7a0d3 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-7/Dockerfile
@@ -19,6 +19,14 @@ FROM centos:7
 
 ARG DEBUG
 
+# GH-42128
+# Switch repos to point to to vault.centos.org because Centos Stream 8 is EOL
+RUN sed -i \
+  -e 's/^mirrorlist/#mirrorlist/' \
+  -e 's/^#baseurl/baseurl/' \
+  -e 's/mirror\.centos\.org/vault.centos.org/' \
+  /etc/yum.repos.d/*.repo
+
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
   yum install -y ${quiet} \
diff --git 
a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8-stream/Dockerfile 
b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8-stream/Dockerfile
index 11d7ad0ee7..65b6cb0ffb 100644
--- 
a/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8-stream/Dockerfile
+++ 
b/dev/tasks/linux-packages/apache-arrow-release/yum/centos-8-stream/Dockerfile
@@ -19,6 +19,14 @@ FROM quay.io/centos/centos:stream8
 
 ARG DEBUG
 
+# GH-42128
+# Switch repos to point to to vault.centos.org because Centos Stream 8 is EOL
+RUN sed -i \
+  -e 's/^mirrorlist/#mirrorlist/' \
+  -e 's/^#baseurl/baseurl/' \
+  -e 's/mirror\.centos\.org/vault.centos.org/' \
+  /etc/yum.repos.d/*.repo
+
 RUN \
   quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \
   dnf install --enablerepo=powertools -y ${quiet} \
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile 
b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index 3c52a5663f..8c029d87c2 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -23,6 +23,14 @@ ENV \
 
 ARG DEBUG
 
+# GH-42128
+# Switch repos to point to to vault.centos.org because Centos 7 is EOL
+RUN sed -i \
+  -e 's/^mirrorlist/#mirrorlist/' \
+  -e 's/^#baseurl/baseurl/' \
+  

(arrow) branch main updated (3333648e78 -> a7a46b2480)

2024-06-13 Thread curth
This is an automated email from the ASF dual-hosted git repository.

curth pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 648e78 MINOR: [C#] Handle Empty Schema (#42132)
 add a7a46b2480 GH-41349: [C#] Optimize DecimalUtility.GetBytes(SqlDecimal) 
on .NET 7+ (#42150)

No new revisions were added by this update.

Summary of changes:
 csharp/src/Apache.Arrow/Apache.Arrow.csproj   | 4 ++--
 csharp/src/Apache.Arrow/DecimalUtility.cs | 9 ++---
 csharp/src/Apache.Arrow/Scalars/BinaryView.cs | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)



(arrow) branch main updated: MINOR: [C#] Handle Empty Schema (#42132)

2024-06-13 Thread curth
This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 648e78 MINOR: [C#] Handle Empty Schema (#42132)
648e78 is described below

commit 648e780d18dda8fbb5f95394736117e33cfe
Author: Kirill Khramkov 
AuthorDate: Fri Jun 14 06:45:42 2024 +0400

MINOR: [C#] Handle Empty Schema (#42132)



### Rationale for this change

While developing SDK encountered this error:
```
System.ArgumentOutOfRangeException : Specified argument was out of the 
range of valid values. (Parameter 'length')
   at 
Apache.Arrow.Flight.FlightMessageSerializer.DecodeSchema(ReadOnlyMemory`1 
buffer)
```
The issue is the schema buffer is empty, and other libraries don't throw 
the error in this case. This PR makes this consistent with libraries for other 
languages.

### What changes are included in this PR?

Return null if Schema buffer is empty.

### Are these changes tested?

Tested against our Flight API.

### Are there any user-facing changes?

Fix critical bug this inability to run request against flight service, 
which doesn't return schema in GetFlightInfo.

Authored-by: Kirill Khramkov 
Signed-off-by: Curt Hagenlocher 
---
 csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs 
b/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs
index 9df28b5033..47ffe43d24 100644
--- a/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs
+++ b/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs
@@ -27,6 +27,7 @@ namespace Apache.Arrow.Flight
 {
 public static Schema DecodeSchema(ReadOnlyMemory buffer)
 {
+if (buffer.IsEmpty) return null;
 int bufferPosition = 0;
 int schemaMessageLength = 
BinaryPrimitives.ReadInt32LittleEndian(buffer.Span.Slice(bufferPosition));
 bufferPosition += sizeof(int);



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 13166b5f update documentation for tag main
13166b5f is described below

commit 13166b5f2868441e60e59c4f52057117f5d8ee82
Author: GitHub Actions 
AuthorDate: Fri Jun 14 02:21:07 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 97ddbebc..ff926b81 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-13T18:38Z
+last_built: 2024-06-14T02:20Z
 



(arrow-nanoarrow) branch main updated: refactor: Simplify Meson test generation (#525)

2024-06-13 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 397cfc5f refactor: Simplify Meson test generation (#525)
397cfc5f is described below

commit 397cfc5fccffb1ef71693b76069983cc5c311842
Author: William Ayd 
AuthorDate: Thu Jun 13 22:19:05 2024 -0400

refactor: Simplify Meson test generation (#525)
---
 src/nanoarrow/meson.build | 93 +++
 1 file changed, 38 insertions(+), 55 deletions(-)

diff --git a/src/nanoarrow/meson.build b/src/nanoarrow/meson.build
index e96d98b1..cdbd6ed0 100644
--- a/src/nanoarrow/meson.build
+++ b/src/nanoarrow/meson.build
@@ -104,64 +104,47 @@ if get_option('tests')
   gmock_dep = dependency('gmock')
   nlohmann_json_dep = dependency('nlohmann_json')
 
-  utils_test = executable('utils_test', 'utils_test.cc',
-  link_with: nanoarrow_lib,
-  dependencies: [
-  arrow_dep,
-  gtest_dep,
-  gmock_dep,
-  nlohmann_json_dep,
-  ],
-  include_directories: incdir)
-  test('utils test', utils_test)
-
-  buffer_test = executable('buffer_test', 'buffer_test.cc',
-   dependencies: [arrow_dep, gtest_dep],
-  link_with: nanoarrow_lib,
-   include_directories: incdir)
-  test('buffer test', buffer_test)
-
-  array_test = executable('array_test', 'array_test.cc',
-  dependencies: [arrow_dep, gtest_dep, gmock_dep],
-  link_with: nanoarrow_lib,
-  include_directories: incdir)
-  test('array test', array_test)
-
-  schema_test = executable('schema_test', 'schema_test.cc',
-   dependencies: [arrow_dep, gtest_dep],
-  link_with: nanoarrow_lib,
-   include_directories: incdir)
-  test('schema test', schema_test)
-
-  array_stream_test = executable('array_stream_test', 'array_stream_test.cc',
- dependencies: [arrow_dep, gtest_dep, 
gmock_dep],
- link_with: nanoarrow_lib,
- include_directories: incdir)
-  test('array_stream test', array_stream_test)
-
-  nanoarrow_hpp_test = executable('nanoarrow_hpp_test', 
'nanoarrow_hpp_test.cc',
-  dependencies: [
-  arrow_dep,
-  gtest_dep,
-  gmock_dep,
-  nlohmann_json_dep,
-  ],
-  link_with: nanoarrow_lib,
-  include_directories: incdir)
-  test('nanoarrow_hpp test', nanoarrow_hpp_test)
-
-  nanoarrow_testing_test = executable('nanoarrow_testing_test', 
'nanoarrow_testing_test.cc',
-  dependencies: [arrow_dep, gtest_dep, 
nlohmann_json_dep],
-  link_with: nanoarrow_lib,
-  include_directories: incdir)
-  test('nanoarrow_testing test', nanoarrow_testing_test)
-
-
-  c_data_integration_test = executable('c_data_integration_test', 
'integration/c_data_integration_test.cc',
+  nanoarrow_tests = {
+  'utils': {
+  'deps': [arrow_dep, gtest_dep, gmock_dep, nlohmann_json_dep],
+  },
+  'buffer': {
+  'deps': [arrow_dep, gtest_dep],
+  },
+  'array': {
+  'deps': [arrow_dep, gtest_dep, gmock_dep],
+  },
+  'schema': {
+  'deps': [arrow_dep, gtest_dep],
+  },
+  'array-stream': {
+  'deps': [arrow_dep, gtest_dep, gmock_dep],
+  },
+  'nanoarrow-hpp': {
+  'deps': [arrow_dep, gtest_dep, gmock_dep, nlohmann_json_dep],
+  },
+  'nanoarrow-testing': {
+  'deps': [arrow_dep, gtest_dep, nlohmann_json_dep],
+  },
+  }
+
+  foreach name, config : nanoarrow_tests
+exc = executable(
+name + '-test',
+sources: name.replace('-', '_') + '_test.cc',
+link_with: nanoarrow_lib,
+include_directories: incdir,
+dependencies: config['deps'],
+)
+test(name, exc)
+  endforeach
+
+  c_data_integration_test = executable('c-data-integration-test',
+   
'integration/c_data_integration_test.cc',
link_with: c_data_integration_lib,
dependencies: [arrow_dep, gtest_dep],
include_directories: incdir)
-  test('c_data_integration test', 

(arrow-adbc) branch asf-site updated: publish documentation

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 70c40c5e0 publish documentation
70c40c5e0 is described below

commit 70c40c5e0e622516c1787f5a4aa94e37b308e88b
Author: github-actions[bot] 
AuthorDate: Fri Jun 14 01:49:13 2024 +

publish documentation
---
 main/java/api/allclasses-index.html | 2 +-
 main/java/api/allpackages-index.html| 2 +-
 main/java/api/constant-values.html  | 2 +-
 main/java/api/deprecated-list.html  | 2 +-
 main/java/api/help-doc.html | 2 +-
 main/java/api/index-all.html| 2 +-
 main/java/api/index.html| 2 +-
 .../org/apache/arrow/adbc/core/AdbcConnection.GetObjectsDepth.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcConnection.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcDatabase.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcDriver.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcException.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcInfoCode.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcOptions.html   | 2 +-
 .../org/apache/arrow/adbc/core/AdbcStatement.PartitionResult.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/AdbcStatement.QueryResult.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/AdbcStatement.UpdateResult.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcStatement.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/AdbcStatusCode.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/BulkIngestMode.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/ErrorDetail.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/IsolationLevel.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/PartitionDescriptor.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/StandardSchemas.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/StandardStatistics.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/TypedKey.html  | 2 +-
 .../arrow/adbc/core/class-use/AdbcConnection.GetObjectsDepth.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcConnection.html| 2 +-
 .../java/api/org/apache/arrow/adbc/core/class-use/AdbcDatabase.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/AdbcDriver.html  | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcException.html | 2 +-
 .../java/api/org/apache/arrow/adbc/core/class-use/AdbcInfoCode.html | 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/AdbcOptions.html | 2 +-
 .../arrow/adbc/core/class-use/AdbcStatement.PartitionResult.html| 2 +-
 .../apache/arrow/adbc/core/class-use/AdbcStatement.QueryResult.html | 2 +-
 .../arrow/adbc/core/class-use/AdbcStatement.UpdateResult.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcStatement.html | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/AdbcStatusCode.html| 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/BulkIngestMode.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/ErrorDetail.html | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/IsolationLevel.html| 2 +-
 .../org/apache/arrow/adbc/core/class-use/PartitionDescriptor.html   | 2 +-
 .../api/org/apache/arrow/adbc/core/class-use/StandardSchemas.html   | 2 +-
 .../org/apache/arrow/adbc/core/class-use/StandardStatistics.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/class-use/TypedKey.html| 2 +-
 main/java/api/org/apache/arrow/adbc/core/package-summary.html   | 2 +-
 main/java/api/org/apache/arrow/adbc/core/package-tree.html  | 2 +-
 main/java/api/org/apache/arrow/adbc/core/package-use.html   | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/BaseFlightReader.html| 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightInfoReader.html| 2 +-
 .../arrow/adbc/driver/flightsql/FlightSqlClientWithCallOptions.html | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightSqlConnection.html | 2 +-
 .../arrow/adbc/driver/flightsql/FlightSqlConnectionProperties.html  | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightSqlDatabase.html   | 2 +-
 .../api/org/apache/arrow/adbc/driver/flightsql/FlightSqlDriver.html | 2 +-
 .../apache/arrow/adbc/driver/flightsql/FlightSqlDriverFactory.html  | 2 +-
 .../org/apache/arrow/adbc/driver/flightsql/FlightSqlStatement.html  | 2 +-
 .../arrow/adbc/driver/flightsql/class-use/BaseFlightReader.html | 2 +-
 .../arrow/adbc/driver/flightsql/class-use/FlightInfoReader.html | 2 +-
 

(arrow) branch main updated (02f461f0c5 -> d078d5cafc)

2024-06-13 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 02f461f0c5 GH-42101: [Java] Create File for Output Validation in 
FileRoundtrip (#42115)
 add d078d5cafc GH-42134: [C++][FS][Azure] Validate 
AzureOptions::{blob,dfs}_storage_scheme (#42135)

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/filesystem/azurefs.cc  |  8 ++
 cpp/src/arrow/filesystem/azurefs_test.cc | 44 
 python/pyarrow/tests/test_fs.py  |  4 +--
 3 files changed, 54 insertions(+), 2 deletions(-)



(arrow-adbc) branch dependabot/maven/java/com.uber.nullaway-nullaway-0.11.0 deleted (was 8eef5813a)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch 
dependabot/maven/java/com.uber.nullaway-nullaway-0.11.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was 8eef5813a chore(java): bump com.uber.nullaway:nullaway in /java

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow-adbc) branch main updated (2296b2a8a -> 7fb31cdf9)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 2296b2a8a chore(java): bump com.google.errorprone:error_prone_core 
from 2.27.1 to 2.28.0 in /java (#1897)
 add 7fb31cdf9 chore(java): bump com.uber.nullaway:nullaway from 0.10.26 to 
0.11.0 in /java (#1898)

No new revisions were added by this update.

Summary of changes:
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



(arrow-adbc) branch dependabot/maven/java/com.google.errorprone-error_prone_core-2.28.0 deleted (was ab862df85)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch 
dependabot/maven/java/com.google.errorprone-error_prone_core-2.28.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was ab862df85 chore(java): bump com.google.errorprone:error_prone_core in 
/java

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow-adbc) branch main updated: chore(java): bump com.google.errorprone:error_prone_core from 2.27.1 to 2.28.0 in /java (#1897)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 2296b2a8a chore(java): bump com.google.errorprone:error_prone_core 
from 2.27.1 to 2.28.0 in /java (#1897)
2296b2a8a is described below

commit 2296b2a8ac1683708532efbb75c2ff3c6295a5f4
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
AuthorDate: Thu Jun 13 21:29:16 2024 -0400

chore(java): bump com.google.errorprone:error_prone_core from 2.27.1 to 
2.28.0 in /java (#1897)

Bumps

[com.google.errorprone:error_prone_core](https://github.com/google/error-prone)
from 2.27.1 to 2.28.0.

Release notes
Sourced from https://github.com/google/error-prone/releases;>com.google.errorprone:error_prone_core's
releases.

Error Prone 2.28.0
Error Prone nows supports the latest JDK 23 EA builds (https://redirect.github.com/google/error-prone/issues/4412;>#4412,
https://redirect.github.com/google/error-prone/issues/4415;>#4415).
Closed issues:

Improved errors for invalid check severities (https://redirect.github.com/google/error-prone/issues/4306;>#4306).
Fix a crash with nested instanceof patterns (https://redirect.github.com/google/error-prone/issues/4349;>#4349).
Fix a crash in JUnitIncompatibleType (https://redirect.github.com/google/error-prone/issues/4377;>#4377).
In ObjectEqualsForPrimitives, don't suggest replacing
equal with == for floating-point values (https://redirect.github.com/google/error-prone/issues/4392;>#4392).

New checks:

https://errorprone.info/bugpattern/DeeplyNested;>DeeplyNested
discourages very deeply nested code that can cause StackOverflowErrors
during compilation.
https://errorprone.info/bugpattern/GuiceNestedCombine;>GuiceNestedCombine
discourages nesting of Modules.combine(), which is
unnecessary.
https://errorprone.info/bugpattern/PatternMatchingInstanceof;>PatternMatchingInstanceof
migrates code to use https://openjdk.org/jeps/394;>pattern
matching for instanceof
https://errorprone.info/bugpattern/SunApi;>SunApi
discourages use of internal proprietary JDK APIs which may be removed
from future releases.

Full Changelog: https://github.com/google/error-prone/compare/v2.27.1...v2.28.0;>https://github.com/google/error-prone/compare/v2.27.1...v2.28.0



Commits

https://github.com/google/error-prone/commit/c71fd4ef6aea290a74fa641de498cb351fd325e3;>c71fd4e
Release Error Prone 2.28.0
https://github.com/google/error-prone/commit/32997f77a4bfc79c10c1ddcbfb52b03a046d2e41;>32997f7
Bugfix assignment switch analysis in StatementSwitchToExpressionSwitch:
if an...
https://github.com/google/error-prone/commit/2dde25434696cc2f87e76b64bf60d8875da36a09;>2dde254
Update references to javadoc APIs after the introduction of Markdown doc
comm...
https://github.com/google/error-prone/commit/5fef6e0ab3414a21c3274059ec2df5579bf76dd7;>5fef6e0
Yet another JUnitIncompatibleType crash fix.
https://github.com/google/error-prone/commit/c2df1b6546ddf3d778a8ab7781a6b45560b1ff9e;>c2df1b6
Refactor comment handling in tokenization to use a new ErrorProneComment
clas...
https://github.com/google/error-prone/commit/3fff6105290f69b72641de4ea8cefc7d76996f74;>3fff610
Update hamcrest to v2.2
https://github.com/google/error-prone/commit/6f265ddb8bcbe225b90f83d1d276b3fc65aee09b;>6f265dd
Add a disabled regression test for an UnusedVariable bug
https://github.com/google/error-prone/commit/5eded877e844326b3e00a8e0dd4c225ab8f03617;>5eded87
Add an Error Prone check that reimplements javac sunapi warnings
https://github.com/google/error-prone/commit/9e0fbf705dc98faf2a8ac88cbdb1facc0ba18696;>9e0fbf7
Prepare for a change to the return type of
JCCompilationUnit#getImports in ...
https://github.com/google/error-prone/commit/13be411511ab367bf44c1a100085925255a3751e;>13be411
Handle null != CONST_CASE in YodaCondition
Additional commits viewable in https://github.com/google/error-prone/compare/v2.27.1...v2.28.0;>compare
view





[![Dependabot compatibility

score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.errorprone:error_prone_core=maven=2.27.1=2.28.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---


Dependabot commands and options


You can 

(arrow-adbc) branch main updated: chore(java): bump org.assertj:assertj-core from 3.25.3 to 3.26.0 in /java (#1885)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 6a1a11774 chore(java): bump org.assertj:assertj-core from 3.25.3 to 
3.26.0 in /java (#1885)
6a1a11774 is described below

commit 6a1a11774932c467a43115a60bd91975d12a4d27
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
AuthorDate: Thu Jun 13 21:28:16 2024 -0400

chore(java): bump org.assertj:assertj-core from 3.25.3 to 3.26.0 in /java 
(#1885)

Bumps [org.assertj:assertj-core](https://github.com/assertj/assertj)
from 3.25.3 to 3.26.0.

Release notes
Sourced from https://github.com/assertj/assertj/releases;>org.assertj:assertj-core's
releases.

v.3.26.0
:boom: Breaking Changes
Core


Delegate OptionalDouble value comparison to
Double.compare in hasValue assertion https://redirect.github.com/assertj/assertj/issues/3411;>#3411

This fixes the comparison of NaN values which wasn't
working the way the hasValue Javadoc describes.
The previous behavior can be obtained with
getAsDouble:
assertThat(OptionalDouble.of(Double.NaN).getAsDouble()).isSameAs(Double.NaN);




:no_entry_sign: Deprecated
Core

Deprecate ObjectAssertFactory in favor of
Assertions.assertThat(Object)
Deprecate AssertionErrorFactory in favor of
AssertionErrorCreator
Deprecate catchThrowableOfType(ThrowingCallable, Class)
in favor of catchThrowableOfType(Class, ThrowingCallable)
https://redirect.github.com/assertj/assertj/issues/2823;>#2823
Deprecate assertThat(Iterable, AssertFactory),
assertThat(Iterable, Class) and their respective
then variants https://redirect.github.com/assertj/assertj/issues/3453;>#3453

:sparkles: New Features
Core

Support multiple AfterAssertionErrorCollected callbacks
https://redirect.github.com/assertj/assertj/issues/3313;>#3313
Add InstanceOfAssertFactory for Set
instances https://redirect.github.com/assertj/assertj/issues/3325;>#3325
Add doesNotContainKey and
doesNotContainKeys to Guava Multimap
assertions https://redirect.github.com/assertj/assertj/issues/3334;>#3334
Add assertions for JDK YearMonth type https://redirect.github.com/assertj/assertj/issues/3142;>#3142
Add TemporalAssert type https://redirect.github.com/assertj/assertj/issues/3404;>#3404
Add ignoringFieldsOfTypesMatchingRegexes https://redirect.github.com/assertj/assertj/issues/3369;>#3369
Add fail(Throwable) and fail() variants https://redirect.github.com/assertj/assertj/issues/3204;>#3204
Add isPrivate to Class assertions
Add doesNot[Start/End]WithWhitespace methods to
CharSequence assertions https://redirect.github.com/assertj/assertj/issues/3441;>#3441
Add createAssert(ValueProvider) to
AssertFactory https://redirect.github.com/assertj/assertj/issues/3377;>#3377
Add values() navigation method to
AbstractMapAssert https://redirect.github.com/assertj/assertj/issues/3297;>#3297
Add
bytes()/bytes(Charset)/bytes(String)
navigation methods to AbstractStringAssert https://redirect.github.com/assertj/assertj/issues/3232;>#3232
Add doesNotThrowAnyExceptionExcept to
AbstractThrowableAssert https://redirect.github.com/assertj/assertj/issues/3261;>#3261
Add hasPermittedSubclasses to Class
assertions https://redirect.github.com/assertj/assertj/issues/3316;>#3316
Add isUnmodifiable to Iterator assertions
https://redirect.github.com/assertj/assertj/issues/3477;>#3477

:bug: Bug Fixes
Core


... (truncated)


Commits

https://github.com/assertj/assertj/commit/b7aae0383d4a06b9f7e3ee82aa94039e4ce49711;>b7aae03
[maven-release-plugin] prepare release assertj-build-3.26.0
https://github.com/assertj/assertj/commit/be4cf7cea4d703a76107217afc2083dbe015b7a4;>be4cf7c
Add isUnmodifiable to Iterator assertions (https://redirect.github.com/assertj/assertj/issues/3477;>#3477)
https://github.com/assertj/assertj/commit/6e760db4f2288f9de0f88a874273bdcfda072cb4;>6e760db
chore(deps-dev): bump org.springframework:spring-core from 5.3.35 to
5.3.36 (...
https://github.com/assertj/assertj/commit/29d2bbeb5c5ba640ce73f63f717719044a5efeff;>29d2bbe
chore(deps): bump byte-buddy.version from 1.14.15 to 1.14.16 (https://redirect.github.com/assertj/assertj/issues/3487;>#3487)
https://github.com/assertj/assertj/commit/97b642a0b462fe0ad25255800f9531eaa44c4c15;>97b642a
Rebuild default date formats used to parse string as dates when the
default t...
https://github.com/assertj/assertj/commit/9eeb352e1188b8398a952fc9819a5173d44f8fe0;>9eeb352
Refactor tests covering date string conversion

(arrow-adbc) branch dependabot/maven/java/org.assertj-assertj-core-3.26.0 deleted (was 997aa373a)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch 
dependabot/maven/java/org.assertj-assertj-core-3.26.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


 was 997aa373a chore(java): bump org.assertj:assertj-core in /java

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow) branch main updated: GH-42101: [Java] Create File for Output Validation in FileRoundtrip (#42115)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 02f461f0c5 GH-42101: [Java] Create File for Output Validation in 
FileRoundtrip (#42115)
02f461f0c5 is described below

commit 02f461f0c51ddb50388edc227e8ec436156094a1
Author: Hyunseok Seo 
AuthorDate: Fri Jun 14 09:49:10 2024 +0900

GH-42101: [Java] Create File for Output Validation in FileRoundtrip (#42115)



### Rationale for this change

Enhance the logic to ensure that the output directory and file are created 
if they do not exist. While the input directory and file are mandatory, the 
output directory and file might not exist.

### What changes are included in this PR?

- [x] Create file if output directory or file does not exist.
- [x] Add unit tests
  - Test with different directories to validate creation of non-existent 
directory
  - Test for non-existent input file

### Are these changes tested?

Yes. Additional unit tests have been added.

### Are there any user-facing changes?

Maybe. Yes.

* GitHub Issue: #42101

Authored-by: Hyunseok Seo 
Signed-off-by: David Li 
---
 .../java/org/apache/arrow/tools/FileRoundtrip.java | 16 --
 .../org/apache/arrow/tools/TestFileRoundtrip.java  | 35 ++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java 
b/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
index 70e13c518b..45205bab54 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
@@ -52,13 +52,23 @@ public class FileRoundtrip {
 System.exit(new FileRoundtrip(System.err).run(args));
   }
 
-  private File validateFile(String type, String fileName) {
+  private File validateFile(String type, String fileName) throws IOException {
 if (fileName == null) {
   throw new IllegalArgumentException("missing " + type + " file 
parameter");
 }
 File f = new File(fileName);
-if (!f.exists() || f.isDirectory()) {
-  throw new IllegalArgumentException(type + " file not found: " + 
f.getAbsolutePath());
+if (type.equals("input")) {
+  if (!f.exists() || f.isDirectory()) {
+throw new IllegalArgumentException(type + " file not found: " + 
f.getAbsolutePath());
+  }
+} else if (type.equals("output")) {
+  File parentDir = f.getParentFile();
+  if (parentDir != null && !parentDir.exists()) {
+if (!parentDir.mkdirs()) {
+  throw new IOException(
+  "Failed to create parent directory: " + 
parentDir.getAbsolutePath());
+}
+  }
 }
 return f;
   }
diff --git 
a/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java 
b/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
index 69b0b48079..a98fefeea7 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
@@ -19,10 +19,12 @@ package org.apache.arrow.tools;
 import static org.apache.arrow.tools.ArrowFileTestFixtures.validateOutput;
 import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
 
 import java.io.File;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.InvalidArrowFileException;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Rule;
@@ -32,6 +34,7 @@ import org.junit.rules.TemporaryFolder;
 public class TestFileRoundtrip {
 
   @Rule public TemporaryFolder testFolder = new TemporaryFolder();
+  @Rule public TemporaryFolder testAnotherFolder = new TemporaryFolder();
 
   private BufferAllocator allocator;
 
@@ -58,4 +61,36 @@ public class TestFileRoundtrip {
 
 validateOutput(testOutFile, allocator);
   }
+
+  @Test
+  public void testDiffFolder() throws Exception {
+File testInFile = testFolder.newFile("testIn.arrow");
+File testOutFile = testAnotherFolder.newFile("testOut.arrow");
+
+writeInput(testInFile, allocator);
+
+String[] args = {"-i", testInFile.getAbsolutePath(), "-o", 
testOutFile.getAbsolutePath()};
+int result = new FileRoundtrip(System.err).run(args);
+assertEquals(0, result);
+
+validateOutput(testOutFile, allocator);
+  }
+
+  @Test
+  public void testNotPreparedInput() throws Exception {
+File testInFile = testFolder.newFile("testIn.arrow");
+File testOutFile = testFolder.newFile("testOut.arrow");
+
+String[] args = {"-i", testInFile.getAbsolutePath(), "-o", 
testOutFile.getAbsolutePath()};

(arrow) branch main updated: GH-42030: [Java] Update Unit Tests for Adapter Module (#42038)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 870b315d4b GH-42030: [Java] Update Unit Tests for Adapter Module 
(#42038)
870b315d4b is described below

commit 870b315d4b766a671cd5e478e5b9776beb00ff59
Author: Hyunseok Seo 
AuthorDate: Fri Jun 14 09:48:38 2024 +0900

GH-42030: [Java] Update Unit Tests for Adapter Module (#42038)



### Rationale for this change

Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`).

### What changes are included in this PR?

- `avro` and `jdbc` module
  - [x] Replacing `org.junit` with `org.junit.jupiter.api`.
  - [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports.
  - [x] Updating annotations such as `@ Before`, `@ After`.
- `@ Before` -> `@ BeforeEach`
- `@ After` -> `@ AfterEach`
- `@ Test` -> `@ Test` with `org.junit.jupiter`
- `@ ClassRule` -> `@ TempDir` and `@ BeforeAll`
  - [x] Updating `Parameterized` test
  - [x] Doing self review for avro
  - [x] Dealing with `java.io.IOException: Failed to delete temp directory` 
on Windows with JDK 11
  - [x] Exploring a more effective structure for `ParameterizedTest` in 
JDBC tests.
  - [x] Doing self review for jdbc
- `orc` module
  - [x] Reviewing the build method
  - [x] Updating annotations such as `@ BeforeAll`, `@ Rule`, `@ 
TemporaryFolder`
  - [x] Doing self review

### Are these changes tested?

Yes, existing tests have passed.

### Are there any user-facing changes?

No.

* GitHub Issue: #42030

Authored-by: Hyunseok Seo 
Signed-off-by: David Li 
---
 .../arrow/adapter/avro/AvroLogicalTypesTest.java   |  2 +-
 .../arrow/adapter/avro/AvroSkipFieldTest.java  |  4 +-
 .../apache/arrow/adapter/avro/AvroTestBase.java| 37 +-
 .../adapter/avro/AvroToArrowIteratorTest.java  | 52 +-
 .../apache/arrow/adapter/avro/AvroToArrowTest.java |  4 +-
 .../adapter/avro/TestWriteReadAvroRecord.java  | 34 -
 .../adapter/jdbc/AbstractJdbcToArrowTest.java  | 16 ++---
 .../arrow/adapter/jdbc/JdbcFieldInfoTest.java  |  4 +-
 .../jdbc/JdbcToArrowCommentMetadataTest.java   | 10 +--
 .../arrow/adapter/jdbc/JdbcToArrowConfigTest.java  | 55 ++-
 .../arrow/adapter/jdbc/JdbcToArrowTestHelper.java  |  8 +--
 .../arrow/adapter/jdbc/ResultSetUtilityTest.java   | 20 +++---
 .../arrow/adapter/jdbc/UnreliableMetaDataTest.java | 81 +++---
 .../jdbc/consumer/AbstractConsumerTest.java|  8 +--
 .../adapter/jdbc/consumer/BinaryConsumerTest.java  |  8 +--
 .../adapter/jdbc/h2/JdbcAliasToArrowTest.java  | 12 ++--
 .../adapter/jdbc/h2/JdbcToArrowArrayTest.java  | 16 ++---
 .../adapter/jdbc/h2/JdbcToArrowCharSetTest.java| 51 ++
 .../adapter/jdbc/h2/JdbcToArrowDataTypesTest.java  | 41 +--
 .../jdbc/h2/JdbcToArrowMapDataTypeTest.java| 20 --
 .../arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java | 40 +--
 .../jdbc/h2/JdbcToArrowOptionalColumnsTest.java| 35 --
 .../arrow/adapter/jdbc/h2/JdbcToArrowTest.java | 53 +++---
 .../adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java   | 41 +--
 .../jdbc/h2/JdbcToArrowVectorIteratorTest.java | 64 +
 .../apache/arrow/adapter/orc/OrcReaderTest.java| 19 +++--
 26 files changed, 373 insertions(+), 362 deletions(-)

diff --git 
a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
 
b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
index d8eefc715f..b114f2b00c 100644
--- 
a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
+++ 
b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
@@ -32,7 +32,7 @@ import org.apache.arrow.vector.util.DateUtility;
 import org.apache.avro.Conversions;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericFixed;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 public class AvroLogicalTypesTest extends AvroTestBase {
 
diff --git 
a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
 
b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
index 3335ee5a8f..3120c26638 100644
--- 
a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
+++ 
b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
@@ -16,7 +16,7 @@
  */
 package org.apache.arrow.adapter.avro;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.nio.ByteBuffer;
 

(arrow) branch main updated: MINOR: [Dev][Docs] Fix references to archery docker run command (#42148)

2024-06-13 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 377506ce7c MINOR: [Dev][Docs] Fix references to archery docker run 
command (#42148)
377506ce7c is described below

commit 377506ce7ce962987d320d760b208d96e121cf8c
Author: Bryce Mecum 
AuthorDate: Thu Jun 13 16:16:04 2024 -0800

MINOR: [Dev][Docs] Fix references to archery docker run command (#42148)

### Rationale for this change

I noticed two instances where we refer to "archery run" but I think the 
command is "archery docker run".

```
❯ archery run
Usage: archery [OPTIONS] COMMAND [ARGS]...
Try 'archery -h' for help.

Error: No such command 'run'.
❯ archery docker run --help
Usage: archery docker run [OPTIONS] IMAGE [COMMAND]
```

### What changes are included in this PR?

Fixed two places we refer to "archery run".

### Are these changes tested?

No.

### Are there any user-facing changes?

No.

Authored-by: Bryce Mecum 
Signed-off-by: Sutou Kouhei 
---
 docker-compose.yml | 2 +-
 docs/source/developers/continuous_integration/overview.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 9f575e2030..533a656660 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -99,7 +99,7 @@ x-with-gpus:
 x-hierarchy:
   # This section is used by the archery tool to enable building nested images,
   # so it is enough to call:
-  #   archery run debian-ruby
+  #   archery docker run debian-ruby
   # instead of a sequence of docker-compose commands:
   #   docker-compose build debian-cpp
   #   docker-compose build debian-c-glib
diff --git a/docs/source/developers/continuous_integration/overview.rst 
b/docs/source/developers/continuous_integration/overview.rst
index 93e74f269d..9d39c90cfd 100644
--- a/docs/source/developers/continuous_integration/overview.rst
+++ b/docs/source/developers/continuous_integration/overview.rst
@@ -30,7 +30,7 @@ Some files central to Arrow CI are:
 
 We use :ref:`Docker` in order to have portable and reproducible 
Linux builds, as well as running Windows builds in Windows containers.  We use 
:ref:`Archery` and :ref:`Crossbow` to help co-ordinate the 
various CI tasks.
 
-One thing to note is that some of the services defined in 
``docker-compose.yml`` are interdependent.  When running services locally, you 
must either manually build its dependencies first, or build it via the use of 
``archery run ...`` which automatically finds and builds dependencies.
+One thing to note is that some of the services defined in 
``docker-compose.yml`` are interdependent.  When running services locally, you 
must either manually build its dependencies first, or build it via the use of 
``archery docker run ...`` which automatically finds and builds dependencies.
 
 There are numerous important directories in the Arrow project which relate to 
CI:
 



(arrow) branch main updated: GH-41947: [Java] Support catalog in JDBC driver with session options (#42035)

2024-06-13 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new cecd7710f3 GH-41947: [Java] Support catalog in JDBC driver with 
session options (#42035)
cecd7710f3 is described below

commit cecd7710f31ae99758c58bcff42ec824b4291156
Author: Steve Lord <72518652+stevelorddre...@users.noreply.github.com>
AuthorDate: Thu Jun 13 16:40:48 2024 -0700

GH-41947: [Java] Support catalog in JDBC driver with session options 
(#42035)



### Rationale for this change

See Issue https://github.com/apache/arrow/issues/41947

### What changes are included in this PR?

### Are these changes tested?

Yes

### Are there any user-facing changes?

Introductiona of an optional catalog query parameter in the JDBC url string.

* GitHub Issue: #41947

Authored-by: Steve Lord 
Signed-off-by: David Li 
---
 java/flight/flight-sql-jdbc-core/pom.xml   |  7 ++
 .../arrow/driver/jdbc/ArrowFlightConnection.java   |  2 +
 .../jdbc/client/ArrowFlightSqlClientHandler.java   | 74 --
 .../utils/ArrowFlightConnectionConfigImpl.java | 12 +++-
 .../ArrowFlightSqlClientHandlerBuilderTest.java| 27 
 .../utils/ArrowFlightConnectionConfigImplTest.java |  7 ++
 6 files changed, 123 insertions(+), 6 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/pom.xml 
b/java/flight/flight-sql-jdbc-core/pom.xml
index 7fe4e7f18c..7ea96b3e55 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -137,6 +137,13 @@ under the License.
   bcpkix-jdk18on
   1.78.1
 
+
+
+  com.google.code.findbugs
+  jsr305
+  3.0.2
+  compile
+
   
 
   
diff --git 
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java
 
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java
index 24d72eb3f0..c1b1c8f8e6 100644
--- 
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java
+++ 
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java
@@ -112,6 +112,7 @@ public final class ArrowFlightConnection extends 
AvaticaConnection {
   .withCallOptions(config.toCallOption())
   .withRetainCookies(config.retainCookies())
   .withRetainAuth(config.retainAuth())
+  .withCatalog(config.getCatalog())
   .build();
 } catch (final SQLException e) {
   try {
@@ -171,6 +172,7 @@ public final class ArrowFlightConnection extends 
AvaticaConnection {
 
   @Override
   public void close() throws SQLException {
+clientHandler.close();
 if (executorService != null) {
   executorService.shutdown();
 }
diff --git 
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
 
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
index f3553ae2f0..845f5372d3 100644
--- 
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
+++ 
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.driver.jdbc.client;
 
+import com.google.common.collect.ImmutableMap;
 import java.io.IOException;
 import java.net.URI;
 import java.security.GeneralSecurityException;
@@ -25,9 +26,14 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
+import javax.annotation.Nullable;
 import org.apache.arrow.driver.jdbc.client.utils.ClientAuthenticationUtils;
 import org.apache.arrow.flight.CallOption;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.CloseSessionRequest;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightClientMiddleware;
 import org.apache.arrow.flight.FlightEndpoint;
@@ -36,6 +42,10 @@ import org.apache.arrow.flight.FlightRuntimeException;
 import org.apache.arrow.flight.FlightStatusCode;
 import org.apache.arrow.flight.Location;
 import org.apache.arrow.flight.LocationSchemes;
+import org.apache.arrow.flight.SessionOptionValue;
+import org.apache.arrow.flight.SessionOptionValueFactory;
+import org.apache.arrow.flight.SetSessionOptionsRequest;
+import org.apache.arrow.flight.SetSessionOptionsResult;
 import org.apache.arrow.flight.auth2.BearerCredentialWriter;
 import org.apache.arrow.flight.auth2.ClientBearerHeaderHandler;
 import 

(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new a764e466 update documentation for tag main
a764e466 is described below

commit a764e46656b2bece65b438cf1d5f8f305cb4ca24
Author: GitHub Actions 
AuthorDate: Thu Jun 13 18:39:05 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 489f5ac7..97ddbebc 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-13T01:17Z
+last_built: 2024-06-13T18:38Z
 



(arrow) branch main updated: GH-42126: [C++] Move TakeXXX free functions into TakeMetaFunction and make them private (#42127)

2024-06-13 Thread felipecrv
This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 7d84c1e05b GH-42126: [C++] Move TakeXXX free functions into 
TakeMetaFunction and make them private (#42127)
7d84c1e05b is described below

commit 7d84c1e05bc358441da8bc8b214777e41868a101
Author: Felipe Oliveira Carvalho 
AuthorDate: Thu Jun 13 18:52:21 2024 -0300

GH-42126: [C++] Move TakeXXX free functions into TakeMetaFunction and make 
them private (#42127)

### Rationale for this change

Move TakeXXX free functions into `TakeMetaFunction` and make them private

### What changes are included in this PR?

Code move and some small refactorings in preparation for #41700.

### Are these changes tested?

By existing tests.
* GitHub Issue: #42126

Authored-by: Felipe Oliveira Carvalho 
Signed-off-by: Felipe Oliveira Carvalho 
---
 .../kernels/vector_selection_take_internal.cc  | 252 +++--
 1 file changed, 128 insertions(+), 124 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc 
b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
index dee80e9d25..8b3f0431e6 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
@@ -31,6 +31,7 @@
 #include "arrow/compute/kernels/gather_internal.h"
 #include "arrow/compute/kernels/vector_selection_internal.h"
 #include "arrow/compute/kernels/vector_selection_take_internal.h"
+#include "arrow/compute/registry.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
@@ -536,142 +537,144 @@ Status ExtensionTake(KernelContext* ctx, const 
ExecSpan& batch, ExecResult* out)
 // R -> RecordBatch
 // T -> Table
 
-Result> TakeAAA(const std::shared_ptr& 
values,
-   const std::shared_ptr& 
indices,
-   const TakeOptions& options, 
ExecContext* ctx) {
-  ARROW_ASSIGN_OR_RAISE(Datum result,
-CallFunction("array_take", {values, indices}, 
, ctx));
-  return result.array();
-}
+const FunctionDoc take_doc(
+"Select values from an input based on indices from another array",
+("The output is populated with values from the input at positions\n"
+ "given by `indices`.  Nulls in `indices` emit null in the output."),
+{"input", "indices"}, "TakeOptions");
 
-Result> TakeCAC(const ChunkedArray& values,
-  const Array& indices,
-  const TakeOptions& options,
-  ExecContext* ctx) {
-  std::shared_ptr values_array;
-  if (values.num_chunks() == 1) {
-// Case 1: `values` has a single chunk, so just use it
-values_array = values.chunk(0);
-  } else {
-// TODO Case 2: See if all `indices` fall in the same chunk and call Array 
Take on it
-// See
-// 
https://github.com/apache/arrow/blob/6f2c9041137001f7a9212f244b51bc004efc29af/r/src/compute.cpp#L123-L151
-// TODO Case 3: If indices are sorted, can slice them and call Array Take
-// (these are relevant to TakeCCC as well)
-
-// Case 4: Else, concatenate chunks and call Array Take
-if (values.chunks().empty()) {
-  ARROW_ASSIGN_OR_RAISE(
-  values_array, MakeArrayOfNull(values.type(), /*length=*/0, 
ctx->memory_pool()));
-} else {
-  ARROW_ASSIGN_OR_RAISE(values_array,
-Concatenate(values.chunks(), ctx->memory_pool()));
-}
+// Metafunction for dispatching to different Take implementations other than
+// Array-Array.
+class TakeMetaFunction : public MetaFunction {
+ public:
+  TakeMetaFunction()
+  : MetaFunction("take", Arity::Binary(), take_doc, 
GetDefaultTakeOptions()) {}
+
+  static Result CallArrayTake(const std::vector& args,
+ const TakeOptions& options, ExecContext* 
ctx) {
+ARROW_ASSIGN_OR_RAISE(auto array_take_func,
+  ctx->func_registry()->GetFunction("array_take"));
+return array_take_func->Execute(args, , ctx);
   }
-  // Call Array Take on our single chunk
-  ARROW_ASSIGN_OR_RAISE(std::shared_ptr new_chunk,
-TakeAAA(values_array->data(), indices.data(), options, 
ctx));
-  std::vector> chunks = {MakeArray(new_chunk)};
-  return std::make_shared(std::move(chunks));
-}
 
-Result> TakeCCC(const ChunkedArray& values,
-  const ChunkedArray& indices,
-  const TakeOptions& options,
-  ExecContext* ctx) {
-  // XXX: for every chunk in indices, values are gathered from all chunks 

(arrow-nanoarrow) branch main updated: chore(c): Add UBSAN / ASAN to Meson CI job (#523)

2024-06-13 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 4ed06316 chore(c): Add UBSAN / ASAN to Meson CI job (#523)
4ed06316 is described below

commit 4ed0631649d0fe61a0befb048bb8037b9abde99d
Author: William Ayd 
AuthorDate: Thu Jun 13 14:37:07 2024 -0400

chore(c): Add UBSAN / ASAN to Meson CI job (#523)

Figured this is a pretty easy addition, and might help catch more
mistakes up front if this becomes a standard CI job
---
 ci/scripts/build-with-meson.sh  | 44 ++---
 src/nanoarrow/nanoarrow_ipc_decoder_test.cc |  2 +-
 src/nanoarrow/nanoarrow_ipc_reader_test.cc  |  4 +--
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/ci/scripts/build-with-meson.sh b/ci/scripts/build-with-meson.sh
index 85bff22b..80fbed3e 100755
--- a/ci/scripts/build-with-meson.sh
+++ b/ci/scripts/build-with-meson.sh
@@ -65,16 +65,52 @@ function main() {
 
 pushd "${SANDBOX_DIR}"
 
-show_header "Run test suite"
-meson configure -Dtests=true -Db_coverage=true -Dipc=true
+show_header "Run ASAN/UBSAN test suite"
+meson configure \
+  -Dbuildtype=debugoptimized \
+  -Db_sanitize="address,undefined" \
+  -Dtests=true \
+  -Dipc=true \
+  -Dbenchmarks=false \
+  -Db_coverage=false
 meson compile
-meson test --wrap='valgrind --track-origins=yes --leak-check=full' 
--print-errorlogs
+export ASAN_OPTIONS=allocator_may_return_null=1  # allow ENOMEM tests
+meson test --print-errorlogs
+
+show_header "Run valgrind test suite"
+meson configure \
+  -Dbuildtype=debugoptimized \
+  -Db_sanitize=none \
+  -Dtests=true \
+  -Dipc=true \
+  -Dbenchmarks=false \
+  -Db_coverage=false
+meson compile
+meson test --wrap='valgrind --track-origins=yes --leak-check=full' 
--print-errorlog
 
 show_header "Run benchmarks"
-meson configure -Dbenchmarks=true
+meson configure \
+  -Dbuildtype=release \
+  -Db_sanitize=none \
+  -Dtests=false \
+  -Dipc=true \
+  -Dbenchmarks=true \
+  -Db_coverage=false
 meson compile
 meson test --benchmark --print-errorlogs
 
+show_header "Run coverage test suite"
+meson configure \
+  -Dbuildtype=release \
+  -Db_sanitize=none \
+  -Dtests=true \
+  -Dipc=true \
+  -Dbenchmarks=false \
+  -Db_coverage=true
+
+meson compile
+meson test --print-errorlogs
+
 show_header "Generate coverage reports"
 ninja coverage
 lcov --list meson-logs/coverage.info
diff --git a/src/nanoarrow/nanoarrow_ipc_decoder_test.cc 
b/src/nanoarrow/nanoarrow_ipc_decoder_test.cc
index 4141b9b0..6f4e4093 100644
--- a/src/nanoarrow/nanoarrow_ipc_decoder_test.cc
+++ b/src/nanoarrow/nanoarrow_ipc_decoder_test.cc
@@ -252,7 +252,7 @@ TEST(NanoarrowIpcTest, NanoarrowIpcVerifyInvalid) {
 
 memcpy(simple_schema_invalid, kSimpleSchema, i);
 memcpy(simple_schema_invalid + i, kSimpleSchema + (i + 1),
-   (sizeof(simple_schema_invalid) - i));
+   (sizeof(simple_schema_invalid) - i - 1));
 
 ArrowErrorInit();
 ASSERT_NE(ArrowIpcDecoderVerifyHeader(, data, ), 
NANOARROW_OK);
diff --git a/src/nanoarrow/nanoarrow_ipc_reader_test.cc 
b/src/nanoarrow/nanoarrow_ipc_reader_test.cc
index 95dd54d8..8010b3bd 100644
--- a/src/nanoarrow/nanoarrow_ipc_reader_test.cc
+++ b/src/nanoarrow/nanoarrow_ipc_reader_test.cc
@@ -360,7 +360,7 @@ TEST(NanoarrowIpcTest, StreamReaderInvalidBuffer) {
 
 memcpy(simple_stream_invalid, kSimpleSchema, i);
 memcpy(simple_stream_invalid + i, kSimpleSchema + (i + 1),
-   (sizeof(kSimpleSchema) - i));
+   (sizeof(kSimpleSchema) - i - 1));
 
 ArrowBufferInit(_buffer);
 ASSERT_EQ(ArrowBufferAppendBufferView(_buffer, data), NANOARROW_OK);
@@ -382,7 +382,7 @@ TEST(NanoarrowIpcTest, StreamReaderInvalidBuffer) {
 
 memcpy(simple_stream_invalid + sizeof(kSimpleSchema), kSimpleRecordBatch, 
i);
 memcpy(simple_stream_invalid + sizeof(kSimpleSchema) + i,
-   kSimpleRecordBatch + (i + 1), (sizeof(kSimpleRecordBatch) - i));
+   kSimpleRecordBatch + (i + 1), (sizeof(kSimpleRecordBatch) - i - 1));
 
 ArrowBufferInit(_buffer);
 ASSERT_EQ(ArrowBufferAppendBufferView(_buffer, data), NANOARROW_OK);



(arrow-rs) branch master updated: Fix clippy for object_store (#5883)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new d32f68d6ec7 Fix clippy for object_store (#5883)
d32f68d6ec7 is described below

commit d32f68d6ec737673068f5b2f410a2febbff90ba1
Author: Andrew Lamb 
AuthorDate: Thu Jun 13 14:52:18 2024 -0400

Fix clippy for object_store (#5883)

* Fix clippy for object_store

* Update object_store/src/aws/credential.rs
---
 object_store/src/aws/credential.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/object_store/src/aws/credential.rs 
b/object_store/src/aws/credential.rs
index 08831fd5123..01cfb342745 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -424,6 +424,8 @@ fn canonicalize_headers(header_map: ) -> (String, 
String) {
 /// 

 #[derive(Debug)]
 pub struct InstanceCredentialProvider {
+// https://github.com/apache/arrow-rs/issues/5884
+#[allow(dead_code)]
 pub cache: TokenCache>,
 pub imdsv1_fallback: bool,
 pub metadata_endpoint: String,



(arrow) branch main updated: GH-42140: [C++] Avoid invalid accesses in parquet-encoding-benchmark (#42141)

2024-06-13 Thread apitrou
This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new fc7b800a0e GH-42140: [C++] Avoid invalid accesses in 
parquet-encoding-benchmark (#42141)
fc7b800a0e is described below

commit fc7b800a0ea38713391dd8772bb77583b6a56e6b
Author: Antoine Pitrou 
AuthorDate: Thu Jun 13 21:11:42 2024 +0200

GH-42140: [C++] Avoid invalid accesses in parquet-encoding-benchmark 
(#42141)

### Rationale for this change

`parquet-encoding-benchmark`  would make invalid memory accesses if more 
than one repetition per benchmark is requested by the user.

This was initially noticed in https://github.com/apache/arrow/pull/41205 : 
switching to jemalloc would crash the specific benchmark(s) on a macOS ARM 
machine.

### What changes are included in this PR?

Make sure benchmark data initialization is idempotent.

### Are these changes tested?

Locally using Valgrind.

### Are there any user-facing changes?

No.
* GitHub Issue: #42140

Authored-by: Antoine Pitrou 
Signed-off-by: Antoine Pitrou 
---
 cpp/src/parquet/encoding_benchmark.cc | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/cpp/src/parquet/encoding_benchmark.cc 
b/cpp/src/parquet/encoding_benchmark.cc
index e74a9f55b1..34d12a624f 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -1233,12 +1233,10 @@ class BenchmarkDecodeArrowByteArray : public 
BenchmarkDecodeArrowBasenull_bitmap_data();
 total_size_ = input_array_->data()->buffers[2]->size();
 
-values_.reserve(num_values_);
+values_.resize(num_values_);
 const auto& binary_array = static_cast(*input_array_);
 for (int64_t i = 0; i < binary_array.length(); i++) {
-  auto view = binary_array.GetView(i);
-  values_.emplace_back(static_cast(view.length()),
-   reinterpret_cast(view.data()));
+  values_[i] = binary_array.GetView(i);
 }
   }
 
@@ -1431,10 +1429,10 @@ class BenchmarkDecodeArrowBoolean : public 
BenchmarkDecodeArrowBase
 // so, we uses this as "total_size" for the benchmark.
 total_size_ = ::arrow::bit_util::BytesForBits(num_values_);
 
-values_.reserve(num_values_);
+values_.resize(num_values_);
 const auto& boolean_array = static_cast(*input_array_);
 for (int64_t i = 0; i < boolean_array.length(); i++) {
-  values_.push_back(boolean_array.Value(i));
+  values_[i] = boolean_array.Value(i);
 }
   }
 



(arrow-rs) branch asf-site updated: deploy: e8285bea0eca99316590fcefc8816e1df6581a5c

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 00e3dc2c2d0 deploy: e8285bea0eca99316590fcefc8816e1df6581a5c
00e3dc2c2d0 is described below

commit 00e3dc2c2d0f041646279f5c341658d89b4a854e
Author: alamb 
AuthorDate: Thu Jun 13 20:12:30 2024 +

deploy: e8285bea0eca99316590fcefc8816e1df6581a5c
---
 arrow_cast/cast/fn.cast_byte_to_view.html  |  2 +-
 arrow_cast/cast/fn.cast_view_to_byte.html  |  2 +-
 arrow_cast/cast/index.html |  2 +-
 parquet/arrow/buffer/offset_buffer/index.html  |  2 +-
 .../buffer/offset_buffer/struct.OffsetBuffer.html  |  4 +-
 .../record_reader/buffer/trait.ValuesBuffer.html   |  2 +-
 src/arrow_cast/cast/mod.rs.html| 62 +-
 src/parquet/arrow/buffer/offset_buffer.rs.html |  8 +--
 8 files changed, 25 insertions(+), 59 deletions(-)

diff --git a/arrow_cast/cast/fn.cast_byte_to_view.html 
b/arrow_cast/cast/fn.cast_byte_to_view.html
index 4650ab6a474..ddcb014b0e1 100644
--- a/arrow_cast/cast/fn.cast_byte_to_view.html
+++ b/arrow_cast/cast/fn.cast_byte_to_view.html
@@ -1,4 +1,4 @@
-cast_byte_to_view in arrow_cast::cast - 

(arrow-rs) branch master updated: clean up ByteView construction (#5879)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new e8285bea0ec clean up ByteView construction (#5879)
e8285bea0ec is described below

commit e8285bea0eca99316590fcefc8816e1df6581a5c
Author: Xiangpeng Hao 
AuthorDate: Thu Jun 13 16:09:33 2024 -0400

clean up ByteView construction (#5879)

Co-authored-by: Andrew Lamb 
---
 arrow-cast/src/cast/mod.rs| 44 +--
 parquet/src/arrow/buffer/offset_buffer.rs |  7 ++---
 2 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 354c31af695..55f2ed72836 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -46,7 +46,7 @@ use crate::cast::dictionary::*;
 use crate::cast::list::*;
 use crate::cast::string::*;
 
-use arrow_buffer::{IntervalMonthDayNano, ScalarBuffer};
+use arrow_buffer::IntervalMonthDayNano;
 use arrow_data::ByteView;
 use chrono::{NaiveTime, Offset, TimeZone, Utc};
 use std::cmp::Ordering;
@@ -2341,47 +2341,29 @@ where
 FROM::Offset: OffsetSizeTrait + ToPrimitive,
 V: ByteViewType,
 {
-let data = array.to_data();
-assert_eq!(data.data_type(), ::DATA_TYPE);
-
+let byte_array:  = array.as_bytes();
 let len = array.len();
-let str_values_buf = data.buffers()[1].clone();
-let offsets = data.buffers()[0].typed_data::();
+let str_values_buf = byte_array.values().clone();
+let offsets = byte_array.offsets();
 
-let mut views_builder = BufferBuildernew(len);
-for w in offsets.windows(2) {
+let mut views_builder = GenericByteViewBuilderwith_capacity(len);
+let block = views_builder.append_block(str_values_buf);
+for (i, w) in offsets.windows(2).enumerate() {
 let offset = w[0].to_u32().unwrap();
 let end = w[1].to_u32().unwrap();
-let value_buf = _values_buf[offset as usize..end as usize];
 let length = end - offset;
 
-if length <= 12 {
-let mut view_buffer = [0; 16];
-view_buffer[0..4].copy_from_slice(_le_bytes());
-view_buffer[4..4 + value_buf.len()].copy_from_slice(value_buf);
-views_builder.append(u128::from_le_bytes(view_buffer));
+if byte_array.is_null(i) {
+views_builder.append_null();
 } else {
-let view = ByteView {
-length,
-prefix: 
u32::from_le_bytes(value_buf[0..4].try_into().unwrap()),
-buffer_index: 0,
-offset,
-};
-views_builder.append(view.into());
+// Safety: the input was a valid array so it valid UTF8 (if 
string). And
+// all offsets were valid and we created the views correctly
+unsafe { views_builder.append_view_unchecked(block, offset, 
length) }
 }
 }
 
 assert_eq!(views_builder.len(), len);
-
-// Safety: the input was a valid array so it valid UTF8 (if string). And
-// all offsets were valid and we created the views correctly
-Ok(Arc::new(unsafe {
-GenericByteViewArraynew_unchecked(
-ScalarBuffer::new(views_builder.finish(), 0, len),
-vec![str_values_buf],
-data.nulls().cloned(),
-)
-}))
+Ok(Arc::new(views_builder.finish()))
 }
 
 /// Helper function to cast from one `ByteViewType` array to `ByteArrayType` 
array.
diff --git a/parquet/src/arrow/buffer/offset_buffer.rs 
b/parquet/src/arrow/buffer/offset_buffer.rs
index 181e69c669a..806f144d966 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -164,9 +164,10 @@ impl OffsetBuffer {
 let len = (end - start).to_usize().unwrap();
 
 if len != 0 {
-builder
-.try_append_view(block, start.as_usize() as u32, len as 
u32)
-.unwrap();
+// Safety: (1) the buffer is valid (2) the offsets are valid 
(3) the values in between are of ByteViewType
+unsafe {
+builder.append_view_unchecked(block, start.as_usize() as 
u32, len as u32);
+}
 } else {
 builder.append_null();
 }



(arrow-adbc) branch main updated: feat(csharp/src/Drivers/Apache/Spark): extend SQL type name parsing for all types (#1911)

2024-06-13 Thread curth
This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
 new 319ba01b6 feat(csharp/src/Drivers/Apache/Spark): extend SQL type name 
parsing for all types (#1911)
319ba01b6 is described below

commit 319ba01b60109e824fdf87b30dc2a9f394767adf
Author: Bruce Irschick 
AuthorDate: Thu Jun 13 09:53:40 2024 -0700

feat(csharp/src/Drivers/Apache/Spark): extend SQL type name parsing for all 
types (#1911)

Extend SQL type name parsing to all possible types for Spark.

Additional support for:
* ARRAY
* BIGINT
* BINARY
* BOOLEAN
* DATE
* DOUBLE
* FLOAT
* INTEGER
* JAVA_OBJECT
* SMALLINT
* STRUCT
* TIMESTAMP
* TIMESTAMP_WITH_TIMEZONE
* TINYINT

Add extensive tests for SQL type name parsing
---
 csharp/src/Drivers/Apache/Spark/SparkConnection.cs |  42 +--
 .../src/Drivers/Apache/Spark/SqlTypeNameParser.cs  | 387 +++--
 csharp/test/Drivers/Apache/Spark/DriverTests.cs|   3 +
 .../Drivers/Apache/Spark/SqlTypeNameParserTests.cs | 316 +
 4 files changed, 698 insertions(+), 50 deletions(-)

diff --git a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs 
b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
index 798fdeec2..45111446c 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkConnection.cs
@@ -708,7 +708,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
 case (short)ColumnTypeId.DECIMAL:
 case (short)ColumnTypeId.NUMERIC:
 {
-SqlDecimalParserResult result = new 
SqlDecimalTypeParser().ParseOrDefault(typeName, new 
SqlDecimalParserResult(typeName));
+SqlDecimalParserResult result = 
SqlTypeNameParser.Parse(typeName, colType);
 tableInfo?.Precision.Add(result.Precision);
 tableInfo?.Scale.Add((short)result.Scale);
 tableInfo?.BaseTypeName.Add(result.BaseTypeName);
@@ -717,30 +717,26 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
 
 case (short)ColumnTypeId.CHAR:
 case (short)ColumnTypeId.NCHAR:
-{
-bool success = new 
SqlCharTypeParser().TryParse(typeName, out SqlCharVarcharParserResult? result);
-tableInfo?.Precision.Add(success ? result!.ColumnSize 
: SqlVarcharTypeParser.VarcharColumnSizeDefault);
-tableInfo?.Scale.Add(null);
-tableInfo?.BaseTypeName.Add(success ? 
result!.BaseTypeName : "CHAR");
-break;
-}
 case (short)ColumnTypeId.VARCHAR:
 case (short)ColumnTypeId.LONGVARCHAR:
 case (short)ColumnTypeId.LONGNVARCHAR:
 case (short)ColumnTypeId.NVARCHAR:
 {
-bool success = new 
SqlVarcharTypeParser().TryParse(typeName, out SqlCharVarcharParserResult? 
result);
-tableInfo?.Precision.Add(success ? result!.ColumnSize 
: SqlVarcharTypeParser.VarcharColumnSizeDefault);
+SqlCharVarcharParserResult result = 
SqlTypeNameParser.Parse(typeName, colType);
+tableInfo?.Precision.Add(result.ColumnSize);
 tableInfo?.Scale.Add(null);
-tableInfo?.BaseTypeName.Add(success ? 
result!.BaseTypeName : "STRING");
+tableInfo?.BaseTypeName.Add(result.BaseTypeName);
 break;
 }
 
 default:
-tableInfo?.Precision.Add(null);
-tableInfo?.Scale.Add(null);
-tableInfo?.BaseTypeName.Add(typeName);
-break;
+{
+SqlTypeNameParserResult result = 
SqlTypeNameParser.Parse(typeName, colType);
+tableInfo?.Precision.Add(null);
+tableInfo?.Scale.Add(null);
+tableInfo?.BaseTypeName.Add(result.BaseTypeName);
+break;
+}
 }
 }
 
@@ -783,8 +779,8 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
 case (int)ColumnTypeId.NUMERIC:
 // Note: parsing the type name for SQL DECIMAL types as 
the precision and scale values
 // are not returned in the Thrift call to GetColumns
-return new SqlDecimalTypeParser()
-.ParseOrDefault(typeName, new 
SqlDecimalParserResult(typeName))
+return SqlTypeNameParser
+.Parse(typeName, columnTypeId)
   

(arrow-rs) branch asf-site updated: deploy: d32f68d6ec737673068f5b2f410a2febbff90ba1

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 75b9bde3d19 deploy: d32f68d6ec737673068f5b2f410a2febbff90ba1
75b9bde3d19 is described below

commit 75b9bde3d193dec8d98c8b392a1edef981ed5842
Author: alamb 
AuthorDate: Thu Jun 13 18:54:48 2024 +

deploy: d32f68d6ec737673068f5b2f410a2febbff90ba1
---
 arrow/pyarrow/struct.ArrowException.html  | 2 +-
 arrow_flight/error/enum.FlightError.html  | 2 +-
 arrow_flight/gen/struct.FlightData.html   | 2 +-
 arrow_flight/gen/struct.FlightEndpoint.html   | 2 +-
 arrow_flight/gen/struct.PollInfo.html | 2 +-
 arrow_flight/gen/struct.SchemaResult.html | 2 +-
 arrow_flight/gen/struct.Ticket.html   | 2 +-
 arrow_flight/sql/metadata/sql_info/enum.SqlInfoValue.html | 2 +-
 arrow_flight/struct.FlightData.html   | 2 +-
 arrow_flight/struct.FlightEndpoint.html   | 2 +-
 arrow_flight/struct.PollInfo.html | 2 +-
 arrow_flight/struct.SchemaResult.html | 2 +-
 arrow_flight/struct.Ticket.html   | 2 +-
 arrow_integration_test/struct.ArrowJsonField.html | 2 +-
 help.html | 2 +-
 index.html| 2 +-
 parquet_derive_test/struct.ACompleteRecord.html   | 2 +-
 parquet_derive_test/struct.APartiallyCompleteRecord.html  | 2 +-
 parquet_fromcsv/enum.ParquetFromCsvError.html | 2 +-
 search-index.js   | 8 
 settings.html | 2 +-
 21 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arrow/pyarrow/struct.ArrowException.html 
b/arrow/pyarrow/struct.ArrowException.html
index bf70c8cfc18..3ba9b64367f 100644
--- a/arrow/pyarrow/struct.ArrowException.html
+++ b/arrow/pyarrow/struct.ArrowException.html
@@ -420,7 +420,7 @@ of the pointer or decrease the reference count (e.g. with 
pub fn py_super(self) - https://doc.rust-lang.org/nightly/core/result/enum.Result.html; 
title="enum core::result::Result">ResultPySuper, 
PyErrReturn a proxy 
object that delegates method calls to a parent or sibling  [...]
 This is equivalent to the Python expression super()
 Trait Implementations§source§impl AsPyPo [...]
-source§impl https://doc.rust-lang.org/nightly/core/convert/trait.AsRef.html; 
title="trait core::convert::AsRef">AsRefPyAny for source§impl https://doc.rust-lang.org/nightly/core/convert/trait.AsRef.html; 
title="trait core::convert::AsRef">AsRefPyAny for https://doc.rust-lang.org/nightly/core/marker/trait.Sized.html; 
title="trait core::marker::Sized">Sized,https://doc.rust-lang.org/nightly/src/core/any.rs.html#141;>source§fn https://doc.rust-lang.org/nightly/core/marker/trait.Sized.html; 
title="trait core::marker::Sized">Sized,https://doc.rust-lang.org/nightly/src/core/borrow.rs.html#210;>source§fn https://doc.rust-lang.org/nightly/core/marker/trait.Sized.html; 
title="trait core::marker::Sized">Sized,https://doc.rust-lang.org/nightly/src/core/borrow.rs.html#217;>source§fn §DecodeError(https://doc.rust-lang.org/nightly/alloc/string/struct.String.html; 
title="struct alloc::string::String">String)An error occurred during decoding
 §ExternalError(https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html; 
title="struct alloc::boxed::Box">Boxdyn https://doc.rust-lang.org/nightly/core/error/trait.Error.html; 
title="trait core::error::Error">Error + https://doc.rust-lang.org/nightly/core/marker/trait.Se [...]
 Implementations§source§impl Trait Implementations§source§impl Trait Implementations§source§impl https://doc.rust-lang.org/nightly/core/marker/trait.Sized.html; 
title="trait core::marker::Sized">Sized,https://doc.rust-lang.org/nightly/src/core/any.rs.html#141;>source§fn https://doc.rust-lang.org/nightly/core/marker/trait.Sized.html; 
title="trait core::marker::Sized">Sized,https://doc.rust-lang.org/nightly/src/core/borrow.rs.html#210;>source§fn https://doc.rust-lang.org/nightly/core/marker/trait.Sized.html; 
title="trait core::marker::Sized">Sized,https://doc.rust-lang.org/nightly/src/core/borrow.rs.html#217;>source§fn RecordBatch<
 sourcepub fn with_data_header(self, data_header: impl https://doc.rust-lang.org/nightly/core/convert/trait.Into.html; 
title="trait core::convert::Into">IntoBytes) - Selfsourcepub fn with_data_body(self, data_body: impl https://doc.rust-lang.org/nightly/core/convert/trait.Into.html; 
title="trait core::convert::Into">IntoBytes) - 
Selfsourcepub fn with_app_metadata(self, app_metadata: impl https://doc.rust-lang.org/nightly/core/convert/trait.Into.html; 
title="trait core::convert::Into">IntoBytes) - Self< [...]
-Trait 

(arrow-rs) branch master updated: feat: implement take for dense union array (#5873)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 8e476aee95a feat: implement take for dense union array (#5873)
8e476aee95a is described below

commit 8e476aee95affa20122bc72fc7a8b701763a26ad
Author: gstvg <28798827+gs...@users.noreply.github.com>
AuthorDate: Thu Jun 13 12:44:03 2024 -0300

feat: implement take for dense union array (#5873)
---
 arrow-select/src/filter.rs |   5 +-
 arrow-select/src/take.rs   | 128 -
 2 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 8e06b07f5ef..65ccbe1e01a 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -552,7 +552,10 @@ fn filter_native(values: &[T], 
predicate: )
 }
 
 /// `filter` implementation for primitive arrays
-fn filter_primitive(array: , predicate: ) 
-> PrimitiveArray
+pub(crate) fn filter_primitive(
+array: ,
+predicate: ,
+) -> PrimitiveArray
 where
 T: ArrowPrimitiveType,
 {
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index b8d59142db7..d6892eb0a9e 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -31,6 +31,8 @@ use arrow_schema::{ArrowError, DataType, FieldRef, UnionMode};
 
 use num::{One, Zero};
 
+use crate::filter::{filter_primitive, FilterBuilder};
+
 /// Take elements by index from [Array], creating a new [Array] from those 
indexes.
 ///
 /// ```text
@@ -240,6 +242,44 @@ fn take_impl(
 let array = UnionArray::try_new(fields.clone(), type_ids, None, 
children)?;
 Ok(Arc::new(array))
 }
+DataType::Union(fields, UnionMode::Dense) => {
+let values = values.as_any().downcast_ref::().unwrap();
+
+let type_ids = 
>::new(take_native(values.type_ids(), indices), None);
+let offsets = 
>::new(take_native(values.offsets().unwrap(), 
indices), None);
+
+let children = fields.iter()
+.map(|(field_type_id, _)| {
+let mask = BooleanArray::from_unary(_ids, 
|value_type_id| value_type_id == field_type_id);
+let predicate = FilterBuilder::new().build();
+
+let indices = filter_primitive(, );
+
+let values = values.child(field_type_id);
+
+take_impl(values, )
+})
+.collect::>()?;
+
+let mut child_offsets = [0; 128];
+
+let offsets = type_ids.values()
+.iter()
+.map(|| {
+let offset = child_offsets[i as usize];
+
+child_offsets[i as usize] += 1;
+
+offset
+})
+.collect();
+
+let (_, type_ids, _) = type_ids.into_parts();
+
+let array = UnionArray::try_new(fields.clone(), type_ids, 
Some(offsets), children)?;
+
+Ok(Arc::new(array))
+}
 t => unimplemented!("Take not supported for data type {:?}", t)
 }
 }
@@ -2146,7 +2186,7 @@ mod tests {
 }
 
 #[test]
-fn test_take_union() {
+fn test_take_union_sparse() {
 let structs = create_test_struct(vec![
 Some((Some(true), Some(42))),
 Some((Some(false), Some(28))),
@@ -2183,4 +2223,90 @@ mod tests {
 let expected = vec![Some("a"), None, None, Some("a"), Some("c"), 
Some("d")];
 assert_eq!(expected, actual);
 }
+
+#[test]
+fn test_take_union_dense() {
+let type_ids = vec![0, 1, 1, 0, 0, 1, 0];
+let offsets = vec![0, 0, 1, 1, 2, 2, 3];
+let ints = vec![10, 20, 30, 40];
+let strings = vec![Some("a"), None, Some("c"), Some("d")];
+
+let indices = vec![0, 3, 1, 0, 2, 4];
+
+let taken_type_ids = vec![0, 0, 1, 0, 1, 0];
+let taken_offsets = vec![0, 1, 0, 2, 1, 3];
+let taken_ints = vec![10, 20, 10, 30];
+let taken_strings = vec![Some("a"), None];
+
+let type_ids = >::from(type_ids);
+let offsets = >::from(offsets);
+let ints = UInt32Array::from(ints);
+let strings = StringArray::from(strings);
+
+let union_fields = [
+(
+0,
+Arc::new(Field::new("f1", ints.data_type().clone(), true)),
+),
+(
+1,
+Arc::new(Field::new("f2", strings.data_type().clone(), true)),
+),
+]
+.into_iter()
+.collect();
+
+let array = UnionArray::try_new(
+union_fields,
+type_ids,
+Some(offsets),
+vec![Arc::new(ints), Arc::new(strings)],
+)
+.unwrap();
+
+let index = UInt32Array::from(indices);
+
+let actual = take(, , 

(arrow) branch main updated: GH-41955: [C++] Follow up of adding null_bitmap to MapArray::FromArrays (#41956)

2024-06-13 Thread felipecrv
This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new fe1f7c0de4 GH-41955: [C++] Follow up of adding null_bitmap to 
MapArray::FromArrays (#41956)
fe1f7c0de4 is described below

commit fe1f7c0de4557784b20d3936f22d2efaed01a9d0
Author: Alenka Frim 
AuthorDate: Thu Jun 13 19:06:28 2024 +0200

GH-41955: [C++] Follow up of adding null_bitmap to MapArray::FromArrays 
(#41956)

### Rationale for this change

There have been some new comments regarding the work done in 
https://github.com/apache/arrow/pull/41757.

### What changes are included in this PR?

This PR addresses the comments from 
https://github.com/apache/arrow/pull/41757#pullrequestreview-2094287563

### Are these changes tested?

Yes. Existing tests should pass.

### Are there any user-facing changes?

No.
* GitHub Issue: #41955

Lead-authored-by: AlenkaF 
Co-authored-by: Alenka Frim 
Co-authored-by: Sutou Kouhei 
Signed-off-by: Felipe Oliveira Carvalho 
---
 cpp/src/arrow/array/array_list_test.cc | 20 +
 cpp/src/arrow/array/array_nested.cc| 41 ++
 cpp/src/arrow/array/array_nested.h |  6 ++---
 python/pyarrow/tests/test_array.py | 14 
 4 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/array/array_list_test.cc 
b/cpp/src/arrow/array/array_list_test.cc
index 063b68706b..3d18d5f967 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -1369,14 +1369,26 @@ TEST_F(TestMapArray, FromArrays) {
   ASSERT_RAISES(Invalid,
 MapArray::FromArrays(offsets1, keys_with_null, tmp_items, 
pool_));
 
-  // With null_bitmap
-  ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items, 
pool_,
-   
offsets3->data()->buffers[0]));
+  // With null_bitmap and null_count=1
+  auto null_bitmap_1 = ArrayFromJSON(boolean(), "[1, 0, 
1]")->data()->buffers[1];
+  ASSERT_OK_AND_ASSIGN(auto map7,
+   MapArray::FromArrays(offsets1, keys, items, pool_, 
null_bitmap_1));
   ASSERT_OK(map7->Validate());
   MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys, 
items,
- offsets3->data()->buffers[0], 1);
+ null_bitmap_1, 1);
+  ASSERT_EQ(map7->null_count(), 1);
   AssertArraysEqual(expected7, *map7);
 
+  // With null_bitmap and null_count=2
+  auto null_bitmap_2 = ArrayFromJSON(boolean(), "[0, 1, 
0]")->data()->buffers[1];
+  ASSERT_OK_AND_ASSIGN(auto map8,
+   MapArray::FromArrays(offsets1, keys, items, pool_, 
null_bitmap_2));
+  ASSERT_OK(map8->Validate());
+  MapArray expected8(map_type, length, offsets1->data()->buffers[1], keys, 
items,
+ null_bitmap_2, 2);
+  ASSERT_EQ(map8->null_count(), 2);
+  AssertArraysEqual(expected8, *map8);
+
   // Null bitmap and offset with null
   ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_,
   offsets3->data()->buffers[0]));
diff --git a/cpp/src/arrow/array/array_nested.cc 
b/cpp/src/arrow/array/array_nested.cc
index 2f6bca3d57..47c0fd3582 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr& type, 
int64_t length,
 Result> MapArray::FromArraysInternal(
 std::shared_ptr type, const std::shared_ptr& offsets,
 const std::shared_ptr& keys, const std::shared_ptr& items,
-MemoryPool* pool, const std::shared_ptr& null_bitmap) {
+MemoryPool* pool, std::shared_ptr null_bitmap) {
   using offset_type = typename MapType::offset_type;
   using OffsetArrowType = typename CTypeTraits::ArrowType;
 
@@ -836,7 +836,7 @@ Result> MapArray::FromArraysInternal(
 return Status::NotImplemented("Null bitmap with offsets slice not 
supported.");
   }
 
-  if (offsets->null_count() > 0) {
+  if (offsets->data()->MayHaveNulls()) {
 ARROW_ASSIGN_OR_RAISE(auto buffers,
   CleanListOffsets(NULLPTR, *offsets, pool));
 return std::make_shared(type, offsets->length() - 1, 
std::move(buffers),
@@ -847,30 +847,32 @@ Result> 
MapArray::FromArraysInternal(
   const auto& typed_offsets = checked_cast(*offsets);
 
   BufferVector buffers;
-  int64_t null_count;
-  if (null_bitmap != nullptr) {
-buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()});
-null_count = null_bitmap->size();
-  } else {
-buffers = BufferVector({null_bitmap, typed_offsets.values()});
-null_count = 0;
+  buffers.resize(2);
+  int64_t null_count = 0;
+  if (null_bitmap) {
+buffers[0] = std::move(null_bitmap);
+null_count = 

(arrow) branch main updated: GH-41662: [Python] Ensure Buffer methods don't crash with non-CPU data (#41889)

2024-06-13 Thread jorisvandenbossche
This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new f43cf0d614 GH-41662: [Python] Ensure Buffer methods don't crash with 
non-CPU data (#41889)
f43cf0d614 is described below

commit f43cf0d6146068b8a1a1f61be248dc7fa6fc6591
Author: Alenka Frim 
AuthorDate: Thu Jun 13 17:25:20 2024 +0200

GH-41662: [Python] Ensure Buffer methods don't crash with non-CPU data 
(#41889)

### Rationale for this change

`hex()` and `__getitem__` currently segfault if the data is not located on 
the CPU.

### What changes are included in this PR?

This PR adds a check and returns `NotImplementedError` if the data is not 
on CPU.

### Are these changes tested?

Yes

### Are there any user-facing changes?

No.
* GitHub Issue: #41662

Lead-authored-by: AlenkaF 
Co-authored-by: Alenka Frim 
Co-authored-by: Joris Van den Bossche 
Signed-off-by: Joris Van den Bossche 
---
 python/pyarrow/io.pxi   | 24 ++
 python/pyarrow/tests/test_io.py | 70 +
 2 files changed, 94 insertions(+)

diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 48b7934209..3a0125e957 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1286,6 +1286,10 @@ cdef class Buffer(_Weakrefable):
 f"is_cpu={self.is_cpu} "
 f"is_mutable={self.is_mutable}>")
 
+def _assert_cpu(self):
+if not self.is_cpu:
+raise NotImplementedError("Implemented only for data on CPU 
device")
+
 @property
 def size(self):
 """
@@ -1311,6 +1315,7 @@ cdef class Buffer(_Weakrefable):
 ---
 : bytes
 """
+self._assert_cpu()
 return self.buffer.get().ToHexString()
 
 @property
@@ -1378,6 +1383,7 @@ cdef class Buffer(_Weakrefable):
 return self.getitem(_normalize_index(key, self.size))
 
 cdef getitem(self, int64_t i):
+self._assert_cpu()
 return self.buffer.get().data()[i]
 
 def slice(self, offset=0, length=None):
@@ -1424,6 +1430,18 @@ cdef class Buffer(_Weakrefable):
 are_equal : bool
 True if buffer contents and size are equal
 """
+if self.device != other.device:
+raise ValueError(
+"Device on which the data resides differs between buffers: "
+f"{self.device.type_name} and {other.device.type_name}."
+)
+if not self.is_cpu:
+if self.address != other.address:
+raise NotImplementedError(
+"Implemented only for data on CPU device or data with 
equal "
+"addresses"
+)
+
 cdef c_bool result = False
 with nogil:
 result = self.buffer.get().Equals(deref(other.buffer.get()))
@@ -1436,6 +1454,8 @@ cdef class Buffer(_Weakrefable):
 return self.equals(py_buffer(other))
 
 def __reduce_ex__(self, protocol):
+self._assert_cpu()
+
 if protocol >= 5:
 bufobj = pickle.PickleBuffer(self)
 elif self.buffer.get().is_mutable():
@@ -1452,11 +1472,15 @@ cdef class Buffer(_Weakrefable):
 """
 Return this buffer as a Python bytes object. Memory is copied.
 """
+self._assert_cpu()
+
 return cp.PyBytes_FromStringAndSize(
 self.buffer.get().data(),
 self.buffer.get().size())
 
 def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+self._assert_cpu()
+
 if self.buffer.get().is_mutable():
 buffer.readonly = 0
 else:
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 17eab871a2..2306014c41 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -669,6 +669,76 @@ def test_allocate_buffer_resizable():
 assert buf.size == 200
 
 
+def test_non_cpu_buffer(pickle_module):
+cuda = pytest.importorskip("pyarrow.cuda")
+ctx = cuda.Context(0)
+
+data = np.array([b'testing'])
+cuda_buf = ctx.buffer_from_data(data)
+arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(7), 1, [None, 
cuda_buf])
+buf_on_gpu = arr.buffers()[1]
+
+assert buf_on_gpu.size == cuda_buf.size
+assert buf_on_gpu.address == cuda_buf.address
+assert buf_on_gpu.is_cpu == cuda_buf.is_cpu
+assert buf_on_gpu.is_mutable
+
+repr1 = ""
+assert repr1 in repr(buf_on_gpu)
+assert repr2 in repr(buf_on_gpu)
+
+buf_on_gpu_sliced = buf_on_gpu.slice(2)
+cuda_sliced = cuda.CudaBuffer.from_buffer(buf_on_gpu_sliced)
+assert cuda_sliced.to_pybytes() == b'sting'
+
+buf_on_gpu_sliced = buf_on_gpu[2:4]
+cuda_sliced = 

(arrow-rs) branch master updated: new clippy rules (#5881)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 3715d5447e4 new clippy rules (#5881)
3715d5447e4 is described below

commit 3715d5447e468a5a4dc631ae9aafec706c57aa20
Author: Xiangpeng Hao 
AuthorDate: Thu Jun 13 11:44:55 2024 -0400

new clippy rules (#5881)
---
 arrow-data/src/data.rs|  2 +-
 arrow-ipc/src/writer.rs   |  2 +-
 arrow-json/src/reader/mod.rs  |  8 ++---
 parquet/src/column/reader.rs  | 68 +++
 parquet/src/encodings/decoding.rs | 16 -
 parquet_derive_test/src/lib.rs|  6 ++--
 6 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 5ee96639488..12fe4968d5a 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -24,9 +24,9 @@ use arrow_buffer::{
 bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime, 
IntervalMonthDayNano, MutableBuffer,
 };
 use arrow_schema::{ArrowError, DataType, UnionMode};
+use std::mem;
 use std::ops::Range;
 use std::sync::Arc;
-use std::{mem, usize};
 
 use crate::{equal, validate_binary_view, validate_string_view};
 
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index ef08a6130e3..f74a86e013c 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -1742,7 +1742,7 @@ mod tests {
 let array1 = NullArray::new(32);
 let array2 = Int32Array::from(vec![1; 32]);
 let array3 = NullArray::new(32);
-let array4 = Float64Array::from(vec![std::f64::NAN; 32]);
+let array4 = Float64Array::from(vec![f64::NAN; 32]);
 let batch = RecordBatch::try_new(
 Arc::new(schema.clone()),
 vec![
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 9a113ee2bd7..0a50cfac65f 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -2282,8 +2282,8 @@ mod tests {
 fn test_coercing_primitive_into_string_decoder() {
 let buf = !(
 r#"[{{"a": 1, "b": "A", "c": "T"}}, {{"a": 2, "b": "BB", "c": 
"F"}}, {{"a": {}, "b": 123, "c": false}}, {{"a": {}, "b": 789, "c": true}}]"#,
-(std::i32::MAX as i64 + 10),
-std::i64::MAX - 10
+(i32::MAX as i64 + 10),
+i64::MAX - 10
 );
 let schema = Schema::new(vec![
 Field::new("a", DataType::Float64, true),
@@ -2306,8 +2306,8 @@ mod tests {
 Arc::new(Float64Array::from(vec![
 1.0,
 2.0,
-(std::i32::MAX as i64 + 10) as f64,
-(std::i64::MAX - 10) as f64
+(i32::MAX as i64 + 10) as f64,
+(i64::MAX - 10) as f64
 ])),
 Arc::new(StringArray::from(vec!["A", "BB", "123", "789"])),
 Arc::new(StringArray::from(vec!["T", "F", "false", 
"true"])),
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 53d56dd6b9f..b40ca2b7829 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -672,8 +672,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 16,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 test!(
 test_read_plain_v2_int32,
@@ -684,8 +684,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 16,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 
 test!(
@@ -697,8 +697,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 17,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 test!(
 test_read_plain_v2_int32_uneven,
@@ -709,8 +709,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 17,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 
 test!(
@@ -722,8 +722,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 512,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 test!(
 test_read_plain_v2_int32_multi_page,
@@ -734,8 +734,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 512,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 
 // test cases when column descriptor has MAX_DEF_LEVEL = 0 and 
MAX_REP_LEVEL = 0
@@ -748,8 +748,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 16,
-std::i32::MIN,
-std::i32::MAX
+i32::MIN,
+i32::MAX
 );
 test!(
 test_read_plain_v2_int32_required_non_repeated,
@@ -760,8 +760,8 @@ mod tests {
 NUM_PAGES,
 NUM_LEVELS,
 

(arrow-rs) branch asf-site updated: deploy: 8752e01be642bce205984e16b44e06078413dc68

2024-06-13 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new f0d4f74ee97 deploy: 8752e01be642bce205984e16b44e06078413dc68
f0d4f74ee97 is described below

commit f0d4f74ee97d24719c6b5856f139ac5819b1ea01
Author: alamb 
AuthorDate: Thu Jun 13 14:06:32 2024 +

deploy: 8752e01be642bce205984e16b44e06078413dc68
---
 arrow_cast/all.html|   2 +-
 .../dictionary/fn.binary_view_to_dictionary.html   |   4 +
 .../cast/dictionary/fn.cast_to_dictionary.html |   2 +-
 .../dictionary/fn.pack_byte_to_dictionary.html |   2 +-
 .../dictionary/fn.pack_numeric_to_dictionary.html  |   2 +-
 .../dictionary/fn.string_view_to_dictionary.html   |   4 +
 arrow_cast/cast/dictionary/index.html  |   4 +-
 arrow_cast/cast/dictionary/sidebar-items.js|   2 +-
 arrow_cast/cast/index.html |   2 +-
 search-index.js|   2 +-
 src/arrow_cast/cast/dictionary.rs.html | 160 -
 src/arrow_cast/cast/mod.rs.html|  48 +--
 12 files changed, 209 insertions(+), 25 deletions(-)

diff --git a/arrow_cast/all.html b/arrow_cast/all.html
index 8f00a0ef2fb..20f0b33a742 100644
--- a/arrow_cast/all.html
+++ b/arrow_cast/all.html
@@ -1 +1 @@
-List of all items in this 

(arrow-rs) branch master updated: Improve performance of casting `StringView`/`BinaryView` to `DictionaryArray` (#5872)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 8752e01be64 Improve performance of casting `StringView`/`BinaryView` 
to `DictionaryArray` (#5872)
8752e01be64 is described below

commit 8752e01be642bce205984e16b44e06078413dc68
Author: Xiangpeng Hao 
AuthorDate: Thu Jun 13 10:03:03 2024 -0400

Improve performance of casting `StringView`/`BinaryView` to 
`DictionaryArray` (#5872)

* zero-copy dict to view

* view to dict

* refactor to use try_append_view

* unchecked view

* make fmt happy

* update test

* add comments

-

Co-authored-by: Andrew Lamb 
---
 arrow-cast/src/cast/dictionary.rs | 84 +--
 arrow-cast/src/cast/mod.rs| 36 +++--
 2 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/arrow-cast/src/cast/dictionary.rs 
b/arrow-cast/src/cast/dictionary.rs
index d929277a4da..ee2021d15b6 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -188,10 +188,34 @@ pub(crate) fn cast_to_dictionary(
 Decimal256(_, _) => {
 pack_numeric_to_dictionary::(array, 
dict_value_type, cast_options)
 }
-Utf8 => pack_byte_to_dictionary::>(array, 
cast_options),
-LargeUtf8 => pack_byte_to_dictionary::>(array, cast_options),
-Binary => pack_byte_to_dictionary::>(array, 
cast_options),
-LargeBinary => pack_byte_to_dictionary::>(array, cast_options),
+Utf8 => {
+// If the input is a view type, we can avoid casting (thus 
copying) the data
+if array.data_type() == ::Utf8View {
+return string_view_to_dictionary::(array);
+}
+pack_byte_to_dictionary::>(array, 
cast_options)
+}
+LargeUtf8 => {
+// If the input is a view type, we can avoid casting (thus 
copying) the data
+if array.data_type() == ::Utf8View {
+return string_view_to_dictionary::(array);
+}
+pack_byte_to_dictionary::>(array, 
cast_options)
+}
+Binary => {
+// If the input is a view type, we can avoid casting (thus 
copying) the data
+if array.data_type() == ::BinaryView {
+return binary_view_to_dictionary::(array);
+}
+pack_byte_to_dictionary::>(array, 
cast_options)
+}
+LargeBinary => {
+// If the input is a view type, we can avoid casting (thus 
copying) the data
+if array.data_type() == ::BinaryView {
+return binary_view_to_dictionary::(array);
+}
+pack_byte_to_dictionary::>(array, 
cast_options)
+}
 _ => Err(ArrowError::CastError(format!(
 "Unsupported output type for dictionary packing: 
{dict_value_type:?}"
 ))),
@@ -226,6 +250,58 @@ where
 Ok(Arc::new(b.finish()))
 }
 
+pub(crate) fn string_view_to_dictionary(
+array:  Array,
+) -> Result
+where
+K: ArrowDictionaryKeyType,
+{
+let mut b = GenericByteDictionaryBuilder::>::with_capacity(
+array.len(),
+1024,
+1024,
+);
+let string_view = 
array.as_any().downcast_ref::().unwrap();
+for v in string_view.iter() {
+match v {
+Some(v) => {
+b.append(v)?;
+}
+None => {
+b.append_null();
+}
+}
+}
+
+Ok(Arc::new(b.finish()))
+}
+
+pub(crate) fn binary_view_to_dictionary(
+array:  Array,
+) -> Result
+where
+K: ArrowDictionaryKeyType,
+{
+let mut b = GenericByteDictionaryBuilder::>::with_capacity(
+array.len(),
+1024,
+1024,
+);
+let binary_view = 
array.as_any().downcast_ref::().unwrap();
+for v in binary_view.iter() {
+match v {
+Some(v) => {
+b.append(v)?;
+}
+None => {
+b.append_null();
+}
+}
+}
+
+Ok(Arc::new(b.finish()))
+}
+
 // Packs the data as a GenericByteDictionaryBuilder, if possible, with the
 // key types of K
 pub(crate) fn pack_byte_to_dictionary(
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index e073e34cb6e..354c31af695 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -5205,10 +5205,10 @@ mod tests {
 
 const VIEW_TEST_DATA: [Option<>; 5] = [
 Some("hello"),
-Some("world"),
+Some("repeated"),
 None,
 Some("large payload over 12 bytes"),
-Some("lulu"),
+Some("repeated"),
 ];
 
 fn _test_string_to_view()
@@ -5291,6 +5291,26 @@ mod tests {
 assert_eq!(casted_binary_array.as_ref(), 

(arrow-rs) branch master updated: Avoid copy/allocation when read view types from parquet (#5877)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new c6359bf78da Avoid copy/allocation when read view types from parquet 
(#5877)
c6359bf78da is described below

commit c6359bf78dab76c16112cfbe22c83af680ba7376
Author: Xiangpeng Hao 
AuthorDate: Thu Jun 13 09:22:03 2024 -0400

Avoid copy/allocation when read view types from parquet (#5877)

* avoid copy/allocation when build from offset buffer

* avoid hard code block id
---
 parquet/src/arrow/buffer/offset_buffer.rs | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/buffer/offset_buffer.rs 
b/parquet/src/arrow/buffer/offset_buffer.rs
index 32bb9d0862b..181e69c669a 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -156,16 +156,19 @@ impl OffsetBuffer {
 
 fn build_generic_byte_view(self) -> GenericByteViewBuilder 
{
 let mut builder = 
GenericByteViewBuilderwith_capacity(self.len());
-let mut values = self.values;
+let buffer = self.values.into();
+let block = builder.append_block(buffer);
 for window in self.offsets.windows(2) {
 let start = window[0];
 let end = window[1];
 let len = (end - start).to_usize().unwrap();
-let b = values.drain(..len).collect::>();
-if b.is_empty() {
-builder.append_null();
+
+if len != 0 {
+builder
+.try_append_view(block, start.as_usize() as u32, len as 
u32)
+.unwrap();
 } else {
-builder.append_value(b);
+builder.append_null();
 }
 }
 builder



(arrow) branch main updated: GH-41102: [Packaging][Release] Create unique git tags for release candidates (e.g. apache-arrow-{MAJOR}.{MINOR}.{PATCH}-rc{RC_NUM}) (#41131)

2024-06-13 Thread sgilmore
This is an automated email from the ASF dual-hosted git repository.

sgilmore pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 6ec2f222ff GH-41102: [Packaging][Release] Create unique git tags for 
release candidates (e.g. apache-arrow-{MAJOR}.{MINOR}.{PATCH}-rc{RC_NUM}) 
(#41131)
6ec2f222ff is described below

commit 6ec2f222ff7a6ad9dcb3b5b7edc38c414259aee2
Author: Sarah Gilmore <74676073+sgilmor...@users.noreply.github.com>
AuthorDate: Thu Jun 13 09:06:29 2024 -0400

GH-41102: [Packaging][Release] Create unique git tags for release 
candidates (e.g. apache-arrow-{MAJOR}.{MINOR}.{PATCH}-rc{RC_NUM}) (#41131)


### Rationale for this change

As per @ kou's 
[suggestion](https://github.com/apache/arrow/pull/40956#discussion_r1556447060) 
in #40956, we should create unique git tags (e.g. 
`apache-arrow-{MAJOR}.{MINOR}.{VERSION}-rc{RC_NUM}`) instead re-using the same 
git tag (`apache-arrow-{MAJOR}.{MINOR}.{VERSION}`) for each release candidate. 
The official release candidate tag (`apache-arrow-{MAJOR}.{MINOR}.{VERSION}`) 
should be created **only** after a release candidate is voted on and accepted. 
This "official" release tag s [...]

The new release workflow could look like the following:

> 1. Create a apache-arrow-X.Y.Z-rc0 tag for X.Y.Z RC0
> 2. (Found a problem for X.Y.Z RC0)
> 3. Create a apache-arrow-X.Y.Z-rc1 tag for X.Y.Z RC1
> 4. Vote
> 5. Passed
> 6. Create a apache-arrow-X.Y.Z tag from apache-arrow-X.Y.Z-rc1 ike 
apache/arrow-adbc and apache/arrow-flight-sql-postgresql do

See @ kou's 
[comment](https://github.com/apache/arrow/pull/40956#discussion_r1556447060) 
for more details.

### What changes are included in this PR?

1. Updated `dev/release/01-prepare.sh` to create release-candidate-specific 
git tags (e.g. `apache-arrow-{MAJOR}.{MINOR}.{PATCH}-rc{RC_NUM}`).
2. Updated scripts in `dev/release` to use the new git tag name.
3. Added GitHub Workflow file  `publish_release_candidate.yml`. This 
workflow is triggered when a release candidate git tag is pushed and creates a 
Prerelease GitHub Release.
4. Added logic to `dev/release/02-post-binary.sh` to create and push the 
release git tag (i.e. `apache-arrow-{MAJOR}.{MINOR}.{PATCH}`).
5. Added GitHub Workflow `publish_release.yml`. This workflow is triggered 
when the release tag is pushed and creates a GitHub Release for the approved 
release (i.e. the voted upon release).
6. Added `dev/release/post-16-delete-release-candidates.sh` to delete the 
release candidate git tags and their associated GitHub Releases.
7. Updated `docs/developers/release.rst` with the new steps.

### Are these changes tested?

1. We were not able to verify the changes made to the scripts in 
`dev/release`. Any suggestions on how we can verify these scripts would be much 
appreciated :)
2. We did test the new GitHub Workflows (`publish_release_candidate.yml` 
and `publish_release.yml`) work as intended by pushing git tags to 
[`mathworks/arrow`](https://github.com/mathworks/arrow).

### Are there any user-facing changes?

No.

### Open Questions

1. We noticed that 
[apache/arrow-flight-sql-postgresql](https://github.com/apache/arrow-flight-sql-postgresql/releases)
 does **not** delete the release candidate Prereleases from their GitHub 
Releases area. Should we be doing the same? Or would it be preferable to just 
delete the the release candidates **without** deleting the release candidate 
tags.
2. We're not that familiar with ruby, so we're not sure if the changes we 
made to `dev/release/02-source-test.rb` make sense.

### Future Directions

1.  Continue working on #40956
2. Add logic to auto-sign release artifacts in GitHub Actions Workflows.

* GitHub Issue: #41102

Lead-authored-by: Sarah Gilmore 
Co-authored-by: Sarah Gilmore <74676073+sgilmor...@users.noreply.github.com>
Co-authored-by: Sutou Kouhei 
Signed-off-by: Sarah Gilmore 
---
 .github/workflows/release.yml  | 74 
 .github/workflows/release_candidate.yml| 70 +++
 dev/release/01-prepare.sh  | 12 +--
 dev/release/02-source-test.rb  |  7 +-
 dev/release/02-source.sh   | 59 +
 dev/release/03-binary-submit.sh|  2 +-
 dev/release/07-matlab-upload.sh| 89 +++
 .../{07-binary-verify.sh => 08-binary-verify.sh}   |  0
 .../{07-binary-verify.sh => post-01-tag.sh}| 20 ++---
 .../{post-01-upload.sh => post-02-upload.sh}   |  0
 .../{post-02-binary.sh => post-03-binary.sh}   |  0
 .../{post-03-website.sh => post-04-website.sh} |  0
 ...10-go.sh => 

(arrow-rs) branch master updated: feat: recursively merge Field when datatype is DataType::List and DataType::LargeList (#5852)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 2d17bf09d06 feat: recursively merge Field when datatype is 
DataType::List and DataType::LargeList (#5852)
2d17bf09d06 is described below

commit 2d17bf09d068d2b36792361498af2aa31f541a39
Author: Mrinal Paliwal 
AuthorDate: Thu Jun 13 18:42:45 2024 +0530

feat: recursively merge Field when datatype is DataType::List and 
DataType::LargeList (#5852)
---
 arrow-schema/src/field.rs | 113 +-
 1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index b84a2568ed8..a84a6ada334 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -483,6 +483,30 @@ impl Field {
 ));
 }
 },
+DataType::List(field) => match _type {
+DataType::List(from_field) => {
+let mut f = (**field).clone();
+f.try_merge(from_field)?;
+(*field) = Arc::new(f);
+},
+_ => {
+return Err(ArrowError::SchemaError(
+format!("Fail to merge schema field '{}' because the 
from data_type = {} is not DataType::List",
+self.name, from.data_type)
+))}
+},
+DataType::LargeList(field) => match _type {
+DataType::LargeList(from_field) => {
+let mut f = (**field).clone();
+f.try_merge(from_field)?;
+(*field) = Arc::new(f);
+},
+_ => {
+return Err(ArrowError::SchemaError(
+format!("Fail to merge schema field '{}' because the 
from data_type = {} is not DataType::LargeList",
+self.name, from.data_type)
+))}
+},
 DataType::Null => {
 self.nullable = true;
 self.data_type = from.data_type.clone();
@@ -509,9 +533,7 @@ impl Field {
 | DataType::LargeBinary
 | DataType::BinaryView
 | DataType::Interval(_)
-| DataType::LargeList(_)
 | DataType::LargeListView(_)
-| DataType::List(_)
 | DataType::ListView(_)
 | DataType::Map(_, _)
 | DataType::Dictionary(_, _)
@@ -623,6 +645,93 @@ mod test {
 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
 }
 
+#[test]
+fn test_merge_with_nested_null() {
+let mut struct1 = Field::new(
+"s1",
+DataType::Struct(Fields::from(vec![Field::new(
+"inner",
+DataType::Float32,
+false,
+)])),
+false,
+);
+
+let struct2 = Field::new(
+"s2",
+DataType::Struct(Fields::from(vec![Field::new(
+"inner",
+DataType::Null,
+false,
+)])),
+true,
+);
+
+struct1
+.try_merge()
+.expect("should widen inner field's type to nullable float");
+assert_eq!(
+Field::new(
+"s1",
+DataType::Struct(Fields::from(vec![Field::new(
+"inner",
+DataType::Float32,
+true,
+)])),
+true,
+),
+struct1
+);
+
+let mut list1 = Field::new(
+"l1",
+DataType::List(Field::new("inner", DataType::Float32, 
false).into()),
+false,
+);
+
+let list2 = Field::new(
+"l2",
+DataType::List(Field::new("inner", DataType::Null, false).into()),
+true,
+);
+
+list1
+.try_merge()
+.expect("should widen inner field's type to nullable float");
+assert_eq!(
+Field::new(
+"l1",
+DataType::List(Field::new("inner", DataType::Float32, 
true).into()),
+true,
+),
+list1
+);
+
+let mut large_list1 = Field::new(
+"ll1",
+DataType::LargeList(Field::new("inner", DataType::Float32, 
false).into()),
+false,
+);
+
+let large_list2 = Field::new(
+"ll2",
+DataType::LargeList(Field::new("inner", DataType::Null, 
false).into()),
+true,
+);
+
+large_list1
+.try_merge(_list2)
+.expect("should widen inner field's type to nullable float");
+assert_eq!(
+Field::new(
+

(arrow) branch main updated: GH-41924: [Python] Fix tests when using NumPy 2.0 on Windows (#42099)

2024-06-13 Thread jorisvandenbossche
This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 680980e8bd GH-41924: [Python] Fix tests when using NumPy 2.0 on 
Windows (#42099)
680980e8bd is described below

commit 680980e8bd70393e95f65fa880914aa6c32aedaa
Author: Joris Van den Bossche 
AuthorDate: Thu Jun 13 14:45:10 2024 +0200

GH-41924: [Python] Fix tests when using NumPy 2.0 on Windows (#42099)

### Rationale for this change

The tests are failing on windows when using numpy 2.0 RC, probably related 
to default integer bitwidth.

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* GitHub issue: #41319
* GitHub Issue: #41924

Authored-by: Joris Van den Bossche 
Signed-off-by: Joris Van den Bossche 
---
 python/pyarrow/tests/parquet/common.py | 2 +-
 python/pyarrow/tests/test_array.py | 2 +-
 python/pyarrow/tests/test_pandas.py| 2 +-
 python/pyarrow/tests/test_table.py | 2 +-
 python/scripts/test_leak.py| 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/parquet/common.py 
b/python/pyarrow/tests/parquet/common.py
index 8365ed5b28..c3094ee20b 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -83,7 +83,7 @@ def _random_integers(size, dtype):
 iinfo = np.iinfo(dtype)
 return np.random.randint(max(iinfo.min, platform_int_info.min),
  min(iinfo.max, platform_int_info.max),
- size=size).astype(dtype)
+ size=size, dtype=dtype)
 
 
 def _range_integers(size, dtype):
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 88394c77e4..1032ab9add 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -3398,7 +3398,7 @@ def test_numpy_array_protocol():
 result = np.asarray(arr)
 np.testing.assert_array_equal(result, expected)
 
-if Version(np.__version__) < Version("2.0"):
+if Version(np.__version__) < Version("2.0.0.dev0"):
 # copy keyword is not strict and not passed down to __array__
 result = np.array(arr, copy=False)
 np.testing.assert_array_equal(result, expected)
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index be2c5b14e6..ba9d6a3c01 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -780,7 +780,7 @@ class TestConvertPrimitiveTypes:
 info = np.iinfo(dtype)
 values = np.random.randint(max(info.min, np.iinfo(np.int_).min),
min(info.max, np.iinfo(np.int_).max),
-   size=num_values)
+   size=num_values, dtype=dtype)
 data[dtype] = values.astype(dtype)
 fields.append(pa.field(dtype, arrow_dtype))
 
diff --git a/python/pyarrow/tests/test_table.py 
b/python/pyarrow/tests/test_table.py
index a58010d083..f40759de50 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -3281,7 +3281,7 @@ def test_numpy_array_protocol(constructor):
 table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
 expected = np.array([[1, 4], [2, 5], [3, 6]], dtype="float64")
 
-if Version(np.__version__) < Version("2.0"):
+if Version(np.__version__) < Version("2.0.0.dev0"):
 # copy keyword is not strict and not passed down to __array__
 result = np.array(table, copy=False)
 np.testing.assert_array_equal(result, expected)
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index f2bbe8d051..86a87f5e74 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -98,7 +98,7 @@ def test_leak3():
 
 
 def test_ARROW_8801():
-x = pd.to_datetime(np.random.randint(0, 2**32, size=2**20),
+x = pd.to_datetime(np.random.randint(0, 2**32, size=2**20, dtype=np.int64),
unit='ms', utc=True)
 table = pa.table(pd.DataFrame({'x': x}))
 



(arrow) branch main updated: GH-41480: [Python] Update Python development guide about components being enabled by default based on Arrow C++ (#41705)

2024-06-13 Thread jorisvandenbossche
This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new aea10c2b59 GH-41480: [Python] Update Python development guide about 
components being enabled by default based on Arrow C++ (#41705)
aea10c2b59 is described below

commit aea10c2b59043397639a80c7582a1d3e5c588125
Author: Joris Van den Bossche 
AuthorDate: Thu Jun 13 14:44:04 2024 +0200

GH-41480: [Python] Update Python development guide about components being 
enabled by default based on Arrow C++ (#41705)

### Rationale for this change

Follow-up on https://github.com/apache/arrow/pull/41494 to update the 
Python development guide to reflect the change in how PyArrow is build 
(defaults for the various `PYARROW_BUILD_` are now set based on the 
`ARROW_` setting. The current `PYARROW_WITH_` environment 
variables are kept working to allow to override this default)

* GitHub Issue: #41480

Authored-by: Joris Van den Bossche 
Signed-off-by: Joris Van den Bossche 
---
 docs/source/developers/python.rst | 95 ---
 1 file changed, 49 insertions(+), 46 deletions(-)

diff --git a/docs/source/developers/python.rst 
b/docs/source/developers/python.rst
index e84cd25201..2f3e892ce8 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -397,18 +397,14 @@ Now, build pyarrow:
 .. code-block::
 
$ pushd arrow/python
-   $ export PYARROW_WITH_PARQUET=1
-   $ export PYARROW_WITH_DATASET=1
$ export PYARROW_PARALLEL=4
$ python setup.py build_ext --inplace
$ popd
 
-If you did build one of the optional components (in C++), you need to set the
-corresponding ``PYARROW_WITH_$COMPONENT`` environment variable to 1.
-
-Similarly, if you built with ``PARQUET_REQUIRE_ENCRYPTION`` (in C++), you
-need to set the corresponding ``PYARROW_WITH_PARQUET_ENCRYPTION`` environment
-variable to 1.
+If you did build one of the optional components in C++, the equivalent 
components
+will be enabled by default for building pyarrow. This default can be overridden
+by setting the corresponding ``PYARROW_WITH_$COMPONENT`` environment variable
+to 0 or 1, see :ref:`python-dev-env-variables` below.
 
 To set the number of threads used to compile PyArrow's C++/Cython components,
 set the ``PYARROW_PARALLEL`` environment variable.
@@ -551,7 +547,6 @@ Now, we can build pyarrow:
 .. code-block::
 
$ pushd arrow\python
-   $ set PYARROW_WITH_PARQUET=1
$ set CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1
$ python setup.py build_ext --inplace
$ popd
@@ -601,46 +596,12 @@ Then run the unit tests with:
 Caveats
 ---
 
+.. _python-dev-env-variables:
+
 Relevant components and environment variables
 =
 
-List of relevant Arrow CMake flags and corresponding environment variables
-to be used when building PyArrow are:
-
-.. list-table::
-   :widths: 30 30
-   :header-rows: 1
-
-   * - Arrow flags/options
- - Corresponding environment variables for PyArrow
-   * - ``CMAKE_BUILD_TYPE``
- - ``PYARROW_BUILD_TYPE`` (release, debug or relwithdebinfo)
-   * - ``ARROW_GCS``
- - ``PYARROW_WITH_GCS``
-   * - ``ARROW_S3``
- - ``PYARROW_WITH_S3``
-   * - ``ARROW_HDFS``
- - ``PYARROW_WITH_HDFS``
-   * - ``ARROW_CUDA``
- - ``PYARROW_WITH_CUDA``
-   * - ``ARROW_SUBSTRAIT``
- - ``PYARROW_WITH_SUBSTRAIT``
-   * - ``ARROW_FLIGHT``
- - ``PYARROW_WITH_FLIGHT``
-   * - ``ARROW_DATASET``
- - ``PYARROW_WITH_DATASET``
-   * - ``ARROW_PARQUET``
- - ``PYARROW_WITH_PARQUET``
-   * - ``PARQUET_REQUIRE_ENCRYPTION``
- - ``PYARROW_WITH_PARQUET_ENCRYPTION``
-   * - ``ARROW_TENSORFLOW``
- - ``PYARROW_WITH_TENSORFLOW``
-   * - ``ARROW_ORC``
- - ``PYARROW_WITH_ORC``
-   * - ``ARROW_GANDIVA``
- - ``PYARROW_WITH_GANDIVA``
-
-List of relevant environment variables that can also be used to build
+List of relevant environment variables that can be used to build
 PyArrow are:
 
 .. list-table::
@@ -650,6 +611,9 @@ PyArrow are:
* - PyArrow environment variable
  - Description
  - Default value
+   * - ``PYARROW_BUILD_TYPE``
+ - Build type for PyArrow (release, debug or relwithdebinfo), sets 
``CMAKE_BUILD_TYPE``
+ - ``release``
* - ``PYARROW_CMAKE_GENERATOR``
  - Example: ``'Visual Studio 15 2017 Win64'``
  - ``''``
@@ -678,6 +642,45 @@ PyArrow are:
  - Number of processes used to compile PyArrow’s C++/Cython components
  - ``''``
 
+The components being disabled or enabled when building PyArrrow is by default
+based on how Arrow C++ is build (i.e. it follows the ``ARROW_$COMPONENT`` 
flags).
+However, the ``PYARROW_WITH_$COMPONENT`` environment variables can still be 
used
+to override this when building PyArrow (e.g. to disable components, or to 
enforce
+certain components to 

(arrow) branch main updated: GH-41758: [Python] Disallow direct pa.RecordBatchReader() construction to avoid segfaults (#41773)

2024-06-13 Thread jorisvandenbossche
This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 2ae6d11167 GH-41758: [Python] Disallow direct pa.RecordBatchReader() 
construction to avoid segfaults (#41773)
2ae6d11167 is described below

commit 2ae6d11167a9800b2a3d4cfdd968f2bd69f98183
Author: Joris Van den Bossche 
AuthorDate: Thu Jun 13 14:43:22 2024 +0200

GH-41758: [Python] Disallow direct pa.RecordBatchReader() construction to 
avoid segfaults (#41773)

### Rationale for this change

A user should never do this, but if you do you easily get segfaults. We 
should raise an error with an informative message like we do for other classes.

### Are these changes tested?
Yes

* GitHub Issue: #41758

Authored-by: Joris Van den Bossche 
Signed-off-by: Joris Van den Bossche 
---
 python/pyarrow/ipc.pxi| 5 +
 python/pyarrow/tests/test_misc.py | 1 +
 2 files changed, 6 insertions(+)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 617e25a142..e15b0ea40e 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -659,6 +659,11 @@ cdef class RecordBatchReader(_Weakrefable):
 
 # cdef block is in lib.pxd
 
+def __init__(self):
+raise TypeError("Do not call {}'s constructor directly, "
+"use one of the RecordBatchReader.from_* functions 
instead."
+.format(self.__class__.__name__))
+
 def __iter__(self):
 return self
 
diff --git a/python/pyarrow/tests/test_misc.py 
b/python/pyarrow/tests/test_misc.py
index 308c37fd0d..3d8ab2999e 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -237,6 +237,7 @@ def test_set_timezone_db_path_non_windows():
 pa.StructScalar,
 pa.DictionaryScalar,
 pa.RunEndEncodedScalar,
+pa.RecordBatchReader,
 pa.ipc.Message,
 pa.ipc.MessageReader,
 pa.MemoryPool,



(arrow-rs) branch master updated: Improve performance of casting `DictionaryArray` to `StringViewArray` (#5871)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 77271c4984b Improve performance of casting `DictionaryArray` to 
`StringViewArray` (#5871)
77271c4984b is described below

commit 77271c4984b3917e0d3d3bcf26215d1ab70f29f5
Author: Xiangpeng Hao 
AuthorDate: Thu Jun 13 07:46:26 2024 -0400

Improve performance of casting `DictionaryArray` to `StringViewArray` 
(#5871)

* zero-copy dict to view

* refactor to use try_append_view

* unchecked view

* make fmt happy

-

Co-authored-by: Andrew Lamb 
---
 .../src/builder/generic_bytes_view_builder.rs  | 47 
 arrow-cast/src/cast/dictionary.rs  | 59 +++
 arrow-cast/src/cast/mod.rs | 84 +-
 3 files changed, 143 insertions(+), 47 deletions(-)

diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs 
b/arrow-array/src/builder/generic_bytes_view_builder.rs
index e7f13a68288..6ec34bf5a91 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -116,6 +116,36 @@ impl GenericByteViewBuilder {
 offset as u32
 }
 
+/// Append a view of the given `block`, `offset` and `length`
+///
+/// # Safety
+/// (1) The block must have been added using [`Self::append_block`]
+/// (2) The range `offset..offset+length` must be within the bounds of the 
block
+/// (3) The data in the block must be valid of type `T`
+pub unsafe fn append_view_unchecked( self, block: u32, offset: u32, 
len: u32) {
+let b = self.completed.get_unchecked(block as usize);
+let start = offset as usize;
+let end = start.saturating_add(len as usize);
+let b = b.get_unchecked(start..end);
+
+if len <= 12 {
+let mut view_buffer = [0; 16];
+view_buffer[0..4].copy_from_slice(_le_bytes());
+view_buffer[4..4 + b.len()].copy_from_slice(b);
+self.views_builder.append(u128::from_le_bytes(view_buffer));
+} else {
+let view = ByteView {
+length: len,
+prefix: u32::from_le_bytes(b[0..4].try_into().unwrap()),
+buffer_index: block,
+offset,
+};
+self.views_builder.append(view.into());
+}
+
+self.null_buffer_builder.append_non_null();
+}
+
 /// Try to append a view of the given `block`, `offset` and `length`
 ///
 /// See [`Self::append_block`]
@@ -139,22 +169,9 @@ impl GenericByteViewBuilder {
 ));
 }
 
-if len <= 12 {
-let mut view_buffer = [0; 16];
-view_buffer[0..4].copy_from_slice(_le_bytes());
-view_buffer[4..4 + b.len()].copy_from_slice(b);
-self.views_builder.append(u128::from_le_bytes(view_buffer));
-} else {
-let view = ByteView {
-length: len,
-prefix: u32::from_le_bytes(b[0..4].try_into().unwrap()),
-buffer_index: block,
-offset,
-};
-self.views_builder.append(view.into());
+unsafe {
+self.append_view_unchecked(block, offset, len);
 }
-
-self.null_buffer_builder.append_non_null();
 Ok(())
 }
 
diff --git a/arrow-cast/src/cast/dictionary.rs 
b/arrow-cast/src/cast/dictionary.rs
index 244e101f1d8..d929277a4da 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -85,10 +85,69 @@ pub(crate) fn dictionary_cast(
 
 Ok(new_array)
 }
+Utf8View => {
+// `unpack_dictionary` can handle Utf8View/BinaryView types, but 
incurs unnecessary data copy of the value buffer.
+// we handle it here to avoid the copy.
+let dict_array = array
+.as_dictionary::()
+.downcast_dict::()
+.unwrap();
+
+let string_view = view_from_dict_values::>(
+dict_array.values(),
+dict_array.keys(),
+);
+Ok(Arc::new(string_view))
+}
+BinaryView => {
+// `unpack_dictionary` can handle Utf8View/BinaryView types, but 
incurs unnecessary data copy of the value buffer.
+// we handle it here to avoid the copy.
+let dict_array = array
+.as_dictionary::()
+.downcast_dict::()
+.unwrap();
+
+let binary_view = view_from_dict_values::(
+dict_array.values(),
+dict_array.keys(),
+);
+Ok(Arc::new(binary_view))
+}
 _ => unpack_dictionary::(array, to_type, 

(arrow-rs) branch master updated: feat(object_store): Add `put` API for buffered::BufWriter (#5835)

2024-06-13 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 601a7222069 feat(object_store): Add `put` API for buffered::BufWriter 
(#5835)
601a7222069 is described below

commit 601a722206963b44753a6dd53cfb0ad53643851e
Author: Xuanwo 
AuthorDate: Thu Jun 13 19:23:22 2024 +0800

feat(object_store): Add `put` API for buffered::BufWriter (#5835)

* feat(object_store): Add buffered::BufUploader

Signed-off-by: Xuanwo 

* Polish tests

Signed-off-by: Xuanwo 

* Merge BufUploader into BufWriter

Signed-off-by: Xuanwo 

* Fix docs

Signed-off-by: Xuanwo 

* Add comment

Signed-off-by: Xuanwo 

-

Signed-off-by: Xuanwo 
---
 object_store/src/buffered.rs | 113 +--
 1 file changed, 110 insertions(+), 3 deletions(-)

diff --git a/object_store/src/buffered.rs b/object_store/src/buffered.rs
index feb84d4d0bc..c7b71aa1cc2 100644
--- a/object_store/src/buffered.rs
+++ b/object_store/src/buffered.rs
@@ -238,11 +238,11 @@ enum BufWriterState {
 /// Buffer up to capacity bytes
 Buffer(Path, PutPayloadMut),
 /// [`ObjectStore::put_multipart`]
-Prepare(BoxFuture<'static, std::io::Result>),
+Prepare(BoxFuture<'static, crate::Result>),
 /// Write to a multipart upload
 Write(Option),
 /// [`ObjectStore::put`]
-Flush(BoxFuture<'static, std::io::Result<()>>),
+Flush(BoxFuture<'static, crate::Result<()>>),
 }
 
 impl BufWriter {
@@ -289,6 +289,58 @@ impl BufWriter {
 }
 }
 
+/// Write data to the writer in [`Bytes`].
+///
+/// Unlike [`AsyncWrite::poll_write`], `put` can write data without extra 
copying.
+///
+/// This API is recommended while the data source generates [`Bytes`].
+pub async fn put( self, bytes: Bytes) -> crate::Result<()> {
+loop {
+return match  self.state {
+BufWriterState::Write(Some(write)) => {
+write.wait_for_capacity(self.max_concurrency).await?;
+write.put(bytes);
+Ok(())
+}
+BufWriterState::Write(None) | BufWriterState::Flush(_) => {
+panic!("Already shut down")
+}
+// NOTE
+//
+// This case should never happen in practice, but rust async 
API does
+// make it possible for users to call `put` before 
`poll_write` returns `Ready`.
+//
+// We allow such usage by `await` the future and continue the 
loop.
+BufWriterState::Prepare(f) => {
+self.state = BufWriterState::Write(f.await?.into());
+continue;
+}
+BufWriterState::Buffer(path, b) => {
+if b.content_length().saturating_add(bytes.len()) < 
self.capacity {
+b.push(bytes);
+Ok(())
+} else {
+let buffer = std::mem::take(b);
+let path = std::mem::take(path);
+let opts = PutMultipartOpts {
+attributes: 
self.attributes.take().unwrap_or_default(),
+tags: self.tags.take().unwrap_or_default(),
+};
+let upload = self.store.put_multipart_opts(, 
opts).await?;
+let mut chunked =
+WriteMultipart::new_with_chunk_size(upload, 
self.capacity);
+for chunk in buffer.freeze() {
+chunked.put(chunk);
+}
+chunked.put(bytes);
+self.state = BufWriterState::Write(Some(chunked));
+Ok(())
+}
+}
+};
+}
+}
+
 /// Abort this writer, cleaning up any partially uploaded state
 ///
 /// # Panic
@@ -384,7 +436,7 @@ impl AsyncWrite for BufWriter {
 Ok(())
 }));
 }
-BufWriterState::Flush(f) => return f.poll_unpin(cx),
+BufWriterState::Flush(f) => return 
f.poll_unpin(cx).map_err(std::io::Error::from),
 BufWriterState::Write(x) => {
 let upload = x.take().unwrap();
 self.state = BufWriterState::Flush(
@@ -416,6 +468,7 @@ mod tests {
 use crate::memory::InMemory;
 use crate::path::Path;
 use crate::{Attribute, GetOptions};
+use itertools::Itertools;
 use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, 
AsyncWriteExt};
 
 #[tokio::test]

(arrow) branch main updated: GH-42109: [C++][CMake] Add preset for Valgrind (#42110)

2024-06-13 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 40f9c2616b GH-42109: [C++][CMake] Add preset for Valgrind (#42110)
40f9c2616b is described below

commit 40f9c2616b6fb7868cdd6a26ddd4a59b80ce52ac
Author: Sutou Kouhei 
AuthorDate: Thu Jun 13 18:04:31 2024 +0900

GH-42109: [C++][CMake] Add preset for Valgrind (#42110)

### Rationale for this change

If we want to use Valgrind for our tests, we need to specify at least the 
following CMake options:

* `ARROW_RUNTIME_SIMD_LEVEL=AVX2`
* `ARROW_TEST_MEMCHECK=ON`

If we have a CMake preset for Valgrind, we don't need to remember them.

### What changes are included in this PR?

Add `features-valgrind` and `ninja-debug-valgrind` preset. 
`features-valgrind` is a hidden preset.

### Are these changes tested?

Yes. I used this to reproduce #42107 on local.

### Are there any user-facing changes?

No.
* GitHub Issue: #42109

Authored-by: Sutou Kouhei 
Signed-off-by: Sutou Kouhei 
---
 cpp/CMakePresets.json | 50 +-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json
index 13d1241990..cb4cdfc03a 100644
--- a/cpp/CMakePresets.json
+++ b/cpp/CMakePresets.json
@@ -220,6 +220,14 @@
 "PARQUET_REQUIRE_ENCRYPTION": "ON"
   }
 },
+{
+  "name": "features-valgrind",
+  "hidden": true,
+  "cacheVariables": {
+"ARROW_RUNTIME_SIMD_LEVEL": "AVX2",
+"ARROW_TEST_MEMCHECK": "ON"
+  }
+},
 {
   "name": "ninja-debug-minimal",
   "inherits": [
@@ -331,6 +339,46 @@
   "displayName": "Debug build with everything enabled (except benchmarks)",
   "cacheVariables": {}
 },
+{
+  "name": "ninja-debug-valgrind-basic",
+  "inherits": [
+"base-debug",
+"features-basic",
+"features-valgrind"
+  ],
+  "displayName": "Debug build for Valgrind with reduced dependencies",
+  "cacheVariables": {}
+},
+{
+  "name": "ninja-debug-valgrind",
+  "inherits": [
+"base-debug",
+"features-main",
+"features-valgrind"
+  ],
+  "displayName": "Debug build for Valgrind with more optional components",
+  "cacheVariables": {}
+},
+{
+  "name": "ninja-debug-valgrind-minimal",
+  "inherits": [
+"base-debug",
+"features-minimal",
+"features-valgrind"
+  ],
+  "displayName": "Debug build for Valgrind without anything enabled",
+  "cacheVariables": {}
+},
+{
+  "name": "ninja-debug-valgrind-maximal",
+  "inherits": [
+"base-debug",
+"features-maximal",
+"features-valgrind"
+  ],
+  "displayName": "Debug build for Valgrind with everything enabled",
+  "cacheVariables": {}
+},
 {
   "name": "ninja-release-minimal",
   "inherits": [
@@ -491,4 +539,4 @@
   }
 }
   ]
-}
\ No newline at end of file
+}



(arrow) branch main updated: GH-42107: [C++][FS][Azure] Ensure setting BlobSasBuilder::Protocol (#42108)

2024-06-13 Thread kou
This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 787a6ab32e GH-42107: [C++][FS][Azure] Ensure setting 
BlobSasBuilder::Protocol (#42108)
787a6ab32e is described below

commit 787a6ab32e04fe259504a74c20b9e1135914490f
Author: Sutou Kouhei 
AuthorDate: Thu Jun 13 18:02:39 2024 +0900

GH-42107: [C++][FS][Azure] Ensure setting BlobSasBuilder::Protocol (#42108)

### Rationale for this change

`BlobSasBuilder::Protocol` is used in `BlobSasBuilder::GenerateSasToken()` 
and it doesn't have the default value. If we don't specify it explicitly, 
`GenerateSasToken()` result may be unexpected.

### What changes are included in this PR?

Set `BlobSasBuilder::Protocol` explicitly based on 
`AzureOptions::blob_storage_scheme`. It's `https` by default but can be `http` 
when `enable_tls=false` query parameter is specified.

If it's `http`, both of `https` and `http` are accepted as protocol.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #42107

Authored-by: Sutou Kouhei 
Signed-off-by: Sutou Kouhei 
---
 cpp/src/arrow/filesystem/azurefs.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc 
b/cpp/src/arrow/filesystem/azurefs.cc
index 809aef32b3..ca64486724 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -418,6 +418,9 @@ AzureOptions::MakeDataLakeServiceClient() const {
 
 Result AzureOptions::GenerateSASToken(
 Storage::Sas::BlobSasBuilder* builder, Blobs::BlobServiceClient* client) 
const {
+  using SasProtocol = Storage::Sas::SasProtocol;
+  builder->Protocol =
+  blob_storage_scheme == "http" ? SasProtocol::HttpsAndHttp : 
SasProtocol::HttpsOnly;
   if (storage_shared_key_credential_) {
 return builder->GenerateSasToken(*storage_shared_key_credential_);
   } else {



(arrow) branch main updated (512d2458ff -> 8ae1edbc61)

2024-06-12 Thread maplefu
This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 512d2458ff GH-42015: [MATLAB] Executing `tfeather.m` test class causes 
MATLAB to crash on `windows-2022` after MSVC update from 14.39.33519 to 
14.40.33807 (#42123)
 add 8ae1edbc61 GH-41923: [C++] Fix ExecuteScalar deduce all_scalar with 
chunked_array (#41925)

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/compute/expression.cc  |  4 +---
 cpp/src/arrow/compute/expression_test.cc | 35 
 2 files changed, 36 insertions(+), 3 deletions(-)



(arrow-nanoarrow) branch main updated (d96edacc -> 9c4aba3f)

2024-06-12 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


from d96edacc fix: Check for offset + length > int64_max before using the 
value to calculate buffer sizes (#524)
 add 9c4aba3f Update dist/ for commit 
d96edacc04412f333f2287b67faa6d273f86aad7

No new revisions were added by this update.

Summary of changes:
 dist/nanoarrow.c   | 286 +
 dist/nanoarrow_device.c|   6 +-
 dist/nanoarrow_ipc.c   |  97 ---
 dist/nanoarrow_testing.hpp |  24 +---
 4 files changed, 216 insertions(+), 197 deletions(-)



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-12 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 6d40a046 update documentation for tag main
6d40a046 is described below

commit 6d40a046f4de0295ed6cbe619b3704856626df07
Author: GitHub Actions 
AuthorDate: Thu Jun 13 01:17:50 2024 +

update documentation for tag main
---
 main/genindex.html  |  58 +-
 main/objects.inv| Bin 21095 -> 22601 bytes
 main/r/pkgdown.yml  |   2 +-
 main/reference/testing.html | 254 +++-
 main/searchindex.js |   2 +-
 5 files changed, 308 insertions(+), 8 deletions(-)

diff --git a/main/genindex.html b/main/genindex.html
index ff083398..d85db9cf 100644
--- a/main/genindex.html
+++ b/main/genindex.html
@@ -1545,8 +1545,6 @@ document.write(`
 
   nanoarrow::EmptyArrayStream
 (C++ class)
 
-  
-  
   nanoarrow::EmptyArrayStream::EmptyArrayStream
 (C++ function)
 
   nanoarrow::EmptyArrayStream::MakeUnique
 (C++ function)
@@ -1554,6 +1552,62 @@ document.write(`
   nanoarrow::EmptyArrayStream::ToArrayStream
 (C++ function)
 
   nanoarrow::Exception 
(C++ class)
+
+  nanoarrow::testing::TestingJSONComparison
 (C++ class)
+
+  nanoarrow::testing::TestingJSONComparison::ClearDifferences
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::CompareArrayStream
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::CompareBatch
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::CompareSchema
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::num_differences
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::set_compare_batch_flags
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::set_compare_float_precision
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::set_compare_metadata_order
 (C++ function)
+
+  nanoarrow::testing::TestingJSONComparison::SetSchema
 (C++ function)
+
+  
+  
+  nanoarrow::testing::TestingJSONComparison::WriteDifferences
 (C++ function)
+
+  nanoarrow::testing::TestingJSONReader
 (C++ class)
+
+  nanoarrow::testing::TestingJSONReader::ReadBatch
 (C++ function)
+
+  nanoarrow::testing::TestingJSONReader::ReadColumn
 (C++ function)
+
+  nanoarrow::testing::TestingJSONReader::ReadDataFile
 (C++ function)
+
+  nanoarrow::testing::TestingJSONReader::ReadField
 (C++ function)
+
+  nanoarrow::testing::TestingJSONReader::ReadSchema
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter
 (C++ class)
+
+  nanoarrow::testing::TestingJSONWriter::set_float_precision
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::set_include_metadata
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteBatch
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteColumn
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteDataFile
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteField
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteMetadata
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteSchema
 (C++ function)
+
+  nanoarrow::testing::TestingJSONWriter::WriteType
 (C++ function)
 
   nanoarrow::VectorArrayStream
 (C++ class)
 
diff --git a/main/objects.inv b/main/objects.inv
index 85c77f25..6e4ed32b 100644
Binary files a/main/objects.inv and b/main/objects.inv differ
diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 85f416f1..489f5ac7 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-12T16:50Z
+last_built: 2024-06-13T01:17Z
 
diff --git a/main/reference/testing.html b/main/reference/testing.html
index 8a41e2b7..6f36d272 100644
--- a/main/reference/testing.html
+++ b/main/reference/testing.html
@@ -413,10 +413,221 @@ document.write(`
 
 
 Integration testing JSON#
-
-Warning
-doxygengroup: Cannot find group “nanoarrow_testing-json” in doxygen xml 
output for project “nanoarrow_c” from directory: ../../src/apidoc/xml
+
+
+group nanoarrow_testing-json
+See testing format documentation for details of the JSON 
representation. This representation is not canonical but can be used to 
implement integration tests with other implementations. 
+
+
+class TestingJSONWriter
+#include nanoarrow_testing.hpp
+Writer for the Arrow integration testing JSON format. 
+
+Public Functions
+
+
+inline voidSet the floating point precision of the writer. 
+The floating point precision by default is -1, which uses the JSON 
serializer to encode the value in the output. When writing files specifically 
for integration tests, floating point values should be rounded to 3 decimal 
places to avoid 

(arrow-nanoarrow) branch main updated (d6eb52bb -> d96edacc)

2024-06-12 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


from d6eb52bb refactor: Remove CMake requirement from Meson IPC config 
(#522)
 add d96edacc fix: Check for offset + length > int64_max before using the 
value to calculate buffer sizes (#524)

No new revisions were added by this update.

Summary of changes:
 src/nanoarrow/array.c   | 53 -
 src/nanoarrow/array_test.cc | 29 +++-
 src/nanoarrow/buffer_test.cc|  2 --
 src/nanoarrow/nanoarrow_testing.hpp | 24 ++---
 4 files changed, 53 insertions(+), 55 deletions(-)



(arrow-rs) branch master updated: Benchmark for casting view to dict arrays (and the reverse) (#5874)

2024-06-12 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 4fc9ad126ff Benchmark for casting view to dict arrays (and the 
reverse) (#5874)
4fc9ad126ff is described below

commit 4fc9ad126ff26d2ad81d3b35f0c11d262cb104f4
Author: Xiangpeng Hao 
AuthorDate: Wed Jun 12 18:31:06 2024 -0400

Benchmark for casting view to dict arrays (and the reverse) (#5874)

* add benchmark

* make clippy happy

* move to arrow workspace
---
 arrow/benches/cast_kernels.rs | 25 +
 1 file changed, 25 insertions(+)

diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 228408e5711..8803e8eea87 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -114,6 +114,17 @@ fn build_decimal256_array(size: usize, precision: u8, 
scale: i8) -> ArrayRef {
 )
 }
 
+fn build_dict_array(size: usize) -> ArrayRef {
+let values = StringArray::from_iter([
+Some("small"),
+Some("larger string more than 12 bytes"),
+None,
+]);
+let keys = UInt64Array::from_iter((0..size as u64).map(|v| v % 3));
+
+Arc::new(DictionaryArray::new(keys, Arc::new(values)))
+}
+
 // cast array from specified primitive array type to desired data type
 fn cast_array(array: , to_type: DataType) {
 criterion::black_box(cast(array, _type).unwrap());
@@ -138,6 +149,9 @@ fn add_benchmark(c:  Criterion) {
 let decimal128_array = build_decimal128_array(512, 10, 3);
 let decimal256_array = build_decimal256_array(512, 50, 3);
 
+let dict_array = build_dict_array(10_000);
+let string_view_array = cast(_array, ::Utf8View).unwrap();
+
 c.bench_function("cast int32 to int32 512", |b| {
 b.iter(|| cast_array(_array, DataType::Int32))
 });
@@ -237,6 +251,17 @@ fn add_benchmark(c:  Criterion) {
 c.bench_function("cast decimal256 to decimal256 512 with same scale", |b| {
 b.iter(|| cast_array(_array, DataType::Decimal256(60, 3)))
 });
+c.bench_function("cast dict to string view", |b| {
+b.iter(|| cast_array(_array, DataType::Utf8View))
+});
+c.bench_function("cast string view to dict", |b| {
+b.iter(|| {
+cast_array(
+_view_array,
+DataType::Dictionary(Box::new(DataType::UInt64), 
Box::new(DataType::Utf8)),
+)
+})
+});
 }
 
 criterion_group!(benches, add_benchmark);



(arrow) branch main updated: GH-42015: [MATLAB] Executing `tfeather.m` test class causes MATLAB to crash on `windows-2022` after MSVC update from 14.39.33519 to 14.40.33807 (#42123)

2024-06-12 Thread sgilmore
This is an automated email from the ASF dual-hosted git repository.

sgilmore pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 512d2458ff GH-42015: [MATLAB] Executing `tfeather.m` test class causes 
MATLAB to crash on `windows-2022` after MSVC update from 14.39.33519 to 
14.40.33807 (#42123)
512d2458ff is described below

commit 512d2458ff777f15354ee2a88bde7723ec3d5035
Author: Sarah Gilmore <74676073+sgilmor...@users.noreply.github.com>
AuthorDate: Wed Jun 12 13:22:19 2024 -0400

GH-42015: [MATLAB] Executing `tfeather.m` test class causes MATLAB to crash 
on `windows-2022` after MSVC update from 14.39.33519 to 14.40.33807 (#42123)

### Rationale for this change

After the `windows-2022` GitHub runner image was updated last week, MATLAB 
began crashing when running the unit tests in `arrow/matlab/test/tfeather.m` on 
Windows. As part of the update, VS 2022 was updated from
`17.9.34902.65` to `17.10.34928.147` and MSVC was updated from 
`14.39.33519` to `14.40.33807`.

It looks like many other projects have run into this issue as well:

1. https://github.com/actions/runner-images/issues/10004
2. https://github.com/actions/runner-images/issues/10020

The suggested workaround for this crash is to supply  the flag 
`_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR` when building.

### What changes are included in this PR?

1. Supply `_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR` flag when building  Arrow 
C++.

### Are these changes tested?

N/A. Existing tests used.

### Are there any user-facing changes?

No.
* GitHub Issue: #42015

Authored-by: Sarah Gilmore 
Signed-off-by: Sarah Gilmore 
---
 matlab/CMakeLists.txt | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index d8ed65ce92..4daca3a5bc 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -34,9 +34,15 @@ function(build_arrow)
 
   set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix")
   set(ARROW_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-build")
+
+  # Supply -DARROW_CXXFLAGS=-D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR to fix
+  # a segfault on windows. See https://github.com/apache/arrow/issues/42015.
   set(ARROW_CMAKE_ARGS
-  "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}" "-DCMAKE_INSTALL_LIBDIR=lib"
-  "-DARROW_BUILD_STATIC=OFF" "-DARROW_CSV=ON")
+  "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}"
+  "-DCMAKE_INSTALL_LIBDIR=lib"
+  "-DARROW_BUILD_STATIC=OFF"
+  "-DARROW_CSV=ON"
+  "-DARROW_CXXFLAGS=-D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR")
 
   add_library(arrow_shared SHARED IMPORTED)
   set(ARROW_LIBRARY_TARGET arrow_shared)



(arrow) branch main updated (e6e37b2c54 -> 09d81c4252)

2024-06-12 Thread apitrou
This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from e6e37b2c54 MINOR: [DEV] Add sgilmore10 to CODEOWNERS for `/matlab/` 
(#42125)
 add 09d81c4252 GH-42065: [C++] Support list-views on list_slice (#42067)

No new revisions were added by this update.

Summary of changes:
 cpp/src/arrow/compute/kernels/CMakeLists.txt   |   1 +
 .../arrow/compute/kernels/scalar_list_benchmark.cc | 153 +
 cpp/src/arrow/compute/kernels/scalar_nested.cc | 346 +
 .../arrow/compute/kernels/scalar_nested_test.cc|  86 +++--
 python/pyarrow/tests/test_compute.py   |   2 +-
 5 files changed, 425 insertions(+), 163 deletions(-)
 create mode 100644 cpp/src/arrow/compute/kernels/scalar_list_benchmark.cc



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-12 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 1855017f update documentation for tag main
1855017f is described below

commit 1855017fb037387157823474edad7b42ee38febb
Author: GitHub Actions 
AuthorDate: Wed Jun 12 16:50:29 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 205a5db6..85f416f1 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-12T14:57Z
+last_built: 2024-06-12T16:50Z
 



(arrow-nanoarrow) branch main updated: refactor: Remove CMake requirement from Meson IPC config (#522)

2024-06-12 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new d6eb52bb refactor: Remove CMake requirement from Meson IPC config 
(#522)
d6eb52bb is described below

commit d6eb52bb0f18c2b2a49456a95d97d646ab54d3b4
Author: William Ayd 
AuthorDate: Wed Jun 12 12:48:33 2024 -0400

refactor: Remove CMake requirement from Meson IPC config (#522)

Follow up to https://github.com/apache/arrow-nanoarrow/pull/483
---
 ci/scripts/build-with-meson.sh |  2 +-
 src/nanoarrow/meson.build  | 15 +--
 subprojects/flatcc.wrap|  3 +++
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/ci/scripts/build-with-meson.sh b/ci/scripts/build-with-meson.sh
index cd76c6db..85bff22b 100755
--- a/ci/scripts/build-with-meson.sh
+++ b/ci/scripts/build-with-meson.sh
@@ -66,7 +66,7 @@ function main() {
 pushd "${SANDBOX_DIR}"
 
 show_header "Run test suite"
-meson configure -Dtests=true -Db_coverage=true
+meson configure -Dtests=true -Db_coverage=true -Dipc=true
 meson compile
 meson test --wrap='valgrind --track-origins=yes --leak-check=full' 
--print-errorlogs
 
diff --git a/src/nanoarrow/meson.build b/src/nanoarrow/meson.build
index ccf59c99..e96d98b1 100644
--- a/src/nanoarrow/meson.build
+++ b/src/nanoarrow/meson.build
@@ -65,11 +65,7 @@ nanoarrow_dep = declare_dependency(include_directories: 
[curdir, incdir],
link_with: nanoarrow_lib)
 
 if get_option('ipc')
-cmake = import('cmake')
-cmake_opts = cmake.subproject_options()
-cmake_opts.add_cmake_defines({'CMAKE_POSITION_INDEPENDENT_CODE': true})
-flatcc_subproj = cmake.subproject('flatcc', options: cmake_opts)
-flatcc_dep = flatcc_subproj.dependency('flatccrt')
+flatcc_dep = dependency('flatcc')
 
 nanoarrow_ipc_lib = build_target(
 'nanoarrow_ipc',
@@ -172,9 +168,14 @@ if get_option('tests')
   ipc_test_files = {
   'nanoarrow-ipc-decoder': {
   'deps': [nanoarrow_ipc_dep, arrow_dep, gtest_dep],
+  'timeout': 30,
   },
   'nanoarrow-ipc-reader': {
   'deps': [nanoarrow_ipc_dep, arrow_dep, gtest_dep],
+  # the ipc reader test can take longer when executed
+  # under valgrind, hence the increased timeout
+  'timeout': 90,
+
   },
   'nanoarrow-ipc-files': {
   'deps': [
@@ -184,9 +185,11 @@ if get_option('tests')
   gtest_dep,
   nlohmann_json_dep
   ],
+  'timeout': 30,
   },
   'nanoarrow-ipc-hpp': {
   'deps': [nanoarrow_ipc_dep, gtest_dep],
+  'timeout': 30,
   },
   }
 
@@ -196,7 +199,7 @@ if get_option('tests')
   name.replace('-', '_') + '_test.cc',
   dependencies: config['deps']
   )
-  test(name, exc)
+  test(name, exc, timeout: config['timeout'])
   endforeach
   endif
 endif
diff --git a/subprojects/flatcc.wrap b/subprojects/flatcc.wrap
index 311425ae..811cf38c 100644
--- a/subprojects/flatcc.wrap
+++ b/subprojects/flatcc.wrap
@@ -21,3 +21,6 @@ source_url = 
https://github.com/dvidelabs/flatcc/archive/refs/tags/v0.6.1.tar.gz
 source_filename = flatcc-0.6.1.tar.gz
 source_hash = 2533c2f1061498499f15acc7e0937dcf35bc68e685d237325124ae0d6c600c2b
 patch_directory = flatcc
+
+[provide]
+flatcc = flatcc_dep



(arrow) branch main updated (383ffe0607 -> e6e37b2c54)

2024-06-12 Thread assignuser
This is an automated email from the ASF dual-hosted git repository.

assignuser pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 383ffe0607 GH-37333: [Python] Replace pandas.util.testing.rands with 
vendored version (#42089)
 add e6e37b2c54 MINOR: [DEV] Add sgilmore10 to CODEOWNERS for `/matlab/` 
(#42125)

No new revisions were added by this update.

Summary of changes:
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-12 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 68bf9d5c update documentation for tag main
68bf9d5c is described below

commit 68bf9d5c4609d7a80987c2ebda772e8a9781fe3c
Author: GitHub Actions 
AuthorDate: Wed Jun 12 14:57:24 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 2bb81267..205a5db6 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-11T21:39Z
+last_built: 2024-06-12T14:57Z
 



(arrow-nanoarrow) branch main updated: refactor: Use inttypes.h macros instead of casts to print fixed-width integers (#520)

2024-06-12 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new f6a6b407 refactor: Use inttypes.h macros instead of casts to print 
fixed-width integers (#520)
f6a6b407 is described below

commit f6a6b407e62484bef3cee88d0a7bf2a168231da4
Author: William Ayd 
AuthorDate: Wed Jun 12 10:55:17 2024 -0400

refactor: Use inttypes.h macros instead of casts to print fixed-width 
integers (#520)
---
 src/apps/dump_stream.c|   5 +-
 src/nanoarrow/array.c | 193 ++
 src/nanoarrow/nanoarrow_device.c  |   6 +-
 src/nanoarrow/nanoarrow_device_cuda.c |   4 +-
 src/nanoarrow/nanoarrow_ipc_decoder.c |  91 
 src/nanoarrow/nanoarrow_ipc_reader.c  |   6 +-
 src/nanoarrow/schema.c|  58 +-
 7 files changed, 195 insertions(+), 168 deletions(-)

diff --git a/src/apps/dump_stream.c b/src/apps/dump_stream.c
index ba1baec4..e01cfd7c 100644
--- a/src/apps/dump_stream.c
+++ b/src/apps/dump_stream.c
@@ -17,6 +17,7 @@
 
 #include "nanoarrow/nanoarrow_ipc.h"
 
+#include 
 #include 
 #include 
 #include 
@@ -121,8 +122,8 @@ int main(int argc, char* argv[]) {
 
   end = clock();
   elapsed = (end - begin) / ((double)CLOCKS_PER_SEC);
-  fprintf(stdout, "Read %ld rows in %ld batch(es) <%.06f seconds>\n", 
(long)row_count,
-  (long)batch_count, elapsed);
+  fprintf(stdout, "Read %l" PRId64 " rows in %" PRId64 " batch(es) <%.06f 
seconds>\n",
+  row_count, batch_count, elapsed);
 
   ArrowArrayStreamRelease();
   fclose(file_ptr);
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index ebd95df1..0143d4af 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -16,6 +16,7 @@
 // under the License.
 
 #include 
+#include 
 #include 
 #include 
 
@@ -696,15 +697,17 @@ static int ArrowArrayViewSetArrayInternal(struct 
ArrowArrayView* array_view,
 
   // Check the number of buffers
   if (buffers_required != array->n_buffers) {
-ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d 
buffer(s)",
-  (int)buffers_required, (int)array->n_buffers);
+ArrowErrorSet(error,
+  "Expected array with %" PRId64 " buffer(s) but found %" 
PRId64
+  " buffer(s)",
+  buffers_required, array->n_buffers);
 return EINVAL;
   }
 
   // Check number of children
   if (array_view->n_children != array->n_children) {
-ArrowErrorSet(error, "Expected %ld children but found %ld children",
-  (long)array_view->n_children, (long)array->n_children);
+ArrowErrorSet(error, "Expected %" PRId64 " children but found %" PRId64 " 
children",
+  array_view->n_children, array->n_children);
 return EINVAL;
   }
 
@@ -736,14 +739,14 @@ static int ArrowArrayViewSetArrayInternal(struct 
ArrowArrayView* array_view,
 static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
  struct ArrowError* error) {
   if (array_view->length < 0) {
-ArrowErrorSet(error, "Expected length >= 0 but found length %ld",
-  (long)array_view->length);
+ArrowErrorSet(error, "Expected length >= 0 but found length %" PRId64,
+  array_view->length);
 return EINVAL;
   }
 
   if (array_view->offset < 0) {
-ArrowErrorSet(error, "Expected offset >= 0 but found offset %ld",
-  (long)array_view->offset);
+ArrowErrorSet(error, "Expected offset >= 0 but found offset %" PRId64,
+  array_view->offset);
 return EINVAL;
   }
 
@@ -791,11 +794,11 @@ static int ArrowArrayViewValidateMinimal(struct 
ArrowArrayView* array_view,
   array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
 } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) 
{
   ArrowErrorSet(error,
-"Expected %s array buffer %d to have size >= %ld bytes but 
found "
-"buffer with %ld bytes",
-ArrowTypeString(array_view->storage_type), (int)i,
-(long)min_buffer_size_bytes,
-(long)array_view->buffer_views[i].size_bytes);
+"Expected %s array buffer %d to have size >= %" PRId64
+" bytes but found "
+"buffer with %" PRId64 " bytes",
+ArrowTypeString(array_view->storage_type), i, 
min_buffer_size_bytes,
+array_view->buffer_views[i].size_bytes);
   return EINVAL;
 }
   }
@@ -807,17 +810,17 @@ static int ArrowArrayViewValidateMinimal(struct 
ArrowArrayView* array_view,
 case NANOARROW_TYPE_FIXED_SIZE_LIST:
 case NANOARROW_TYPE_MAP:
   if (array_view->n_children != 1) {
-ArrowErrorSet(error, 

(arrow-rs) branch asf-site updated: deploy: 0cc14168000e1e41fc5f63929d34d13dda6e5873

2024-06-12 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new ddff770688c deploy: 0cc14168000e1e41fc5f63929d34d13dda6e5873
ddff770688c is described below

commit ddff770688cf8931d176d0211b14d15964251b8c
Author: tustvold 
AuthorDate: Wed Jun 12 10:22:38 2024 +

deploy: 0cc14168000e1e41fc5f63929d34d13dda6e5873
---
 help.html|  2 +-
 index.html   |  2 +-
 parquet/basic/enum.Compression.html  |  2 +-
 parquet/basic/enum.ConvertedType.html|  2 +-
 parquet/basic/enum.SortOrder.html|  2 +-
 parquet/data_type/struct.ByteArray.html  | 14 +++---
 parquet/data_type/struct.FixedLenByteArray.html  | 16 
 parquet/data_type/struct.Int96.html  |  2 +-
 parquet/data_type/trait.AsBytes.html |  2 +-
 parquet/errors/enum.ParquetError.html|  2 +-
 .../serialized_reader/struct.SerializedFileReader.html   |  2 +-
 parquet/file/statistics/enum.Statistics.html |  2 +-
 parquet/format/struct.BoundaryOrder.html |  2 +-
 parquet/format/struct.CompressionCodec.html  |  2 +-
 parquet/format/struct.ConvertedType.html |  2 +-
 parquet/format/struct.Encoding.html  |  2 +-
 parquet/format/struct.FieldRepetitionType.html   |  2 +-
 parquet/format/struct.PageType.html  |  2 +-
 parquet/format/struct.Type.html  |  2 +-
 parquet/record/api/enum.Field.html   |  2 +-
 parquet/record/enum.Field.html   |  2 +-
 parquet/schema/types/struct.ColumnPath.html  |  2 +-
 parquet_derive_test/struct.ACompleteRecord.html  |  2 +-
 parquet_derive_test/struct.APartiallyCompleteRecord.html |  2 +-
 search-index.js  |  2 +-
 settings.html|  2 +-
 26 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/help.html b/help.html
index 2079bb54bf5..fe27b595408 100644
--- a/help.html
+++ b/help.html
@@ -1 +1 @@

(arrow-rs) branch dependabot/cargo/object_store/master/quick-xml-0.32.0 deleted (was 4c51e68d1fe)

2024-06-12 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch 
dependabot/cargo/object_store/master/quick-xml-0.32.0
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


 was 4c51e68d1fe Update quick-xml requirement from 0.31.0 to 0.32.0 in 
/object_store

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow-rs) branch master updated: Update quick-xml requirement from 0.31.0 to 0.32.0 in /object_store (#5870)

2024-06-12 Thread tustvold
This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 0cc14168000 Update quick-xml requirement from 0.31.0 to 0.32.0 in 
/object_store (#5870)
0cc14168000 is described below

commit 0cc14168000e1e41fc5f63929d34d13dda6e5873
Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
AuthorDate: Wed Jun 12 11:19:22 2024 +0100

Update quick-xml requirement from 0.31.0 to 0.32.0 in /object_store (#5870)

Updates the requirements on [quick-xml](https://github.com/tafia/quick-xml) 
to permit the latest version.
- [Release notes](https://github.com/tafia/quick-xml/releases)
- [Changelog](https://github.com/tafia/quick-xml/blob/master/Changelog.md)
- [Commits](https://github.com/tafia/quick-xml/compare/v0.31.0...v0.32.0)

---
updated-dependencies:
- dependency-name: quick-xml
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] 
Co-authored-by: dependabot[bot] 
<49699333+dependabot[bot]@users.noreply.github.com>
---
 object_store/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index 203e48d1f42..13da3a8c7ab 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -46,7 +46,7 @@ walkdir = "2"
 # Cloud storage support
 base64 = { version = "0.22", default-features = false, features = ["std"], 
optional = true }
 hyper = { version = "1.2", default-features = false, optional = true }
-quick-xml = { version = "0.31.0", features = ["serialize", 
"overlapped-lists"], optional = true }
+quick-xml = { version = "0.32.0", features = ["serialize", 
"overlapped-lists"], optional = true }
 serde = { version = "1.0", default-features = false, features = ["derive"], 
optional = true }
 serde_json = { version = "1.0", default-features = false, optional = true }
 rand = { version = "0.8", default-features = false, features = ["std", 
"std_rng"], optional = true }



(arrow-rs) branch master updated: Improve Parquet reader/writer properties docs (#5863)

2024-06-12 Thread tustvold
This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 8bee08bf3c6 Improve Parquet reader/writer properties docs (#5863)
8bee08bf3c6 is described below

commit 8bee08bf3c68ba6b8cb933b5b230ede2ff6f11ef
Author: Andrew Lamb 
AuthorDate: Wed Jun 12 06:18:59 2024 -0400

Improve Parquet reader/writer properties docs (#5863)

* Improve Parquet reader/writer properties docs

* fix

* Apply suggestions from code review

Co-authored-by: Val Lorentz 

* Apply suggestions from code review

Co-authored-by: Raphael Taylor-Davies 
<1781103+tustv...@users.noreply.github.com>

-

Co-authored-by: Val Lorentz 
Co-authored-by: Raphael Taylor-Davies 
<1781103+tustv...@users.noreply.github.com>
---
 parquet/src/arrow/arrow_reader/mod.rs |  18 ++--
 parquet/src/arrow/arrow_writer/mod.rs |   8 +-
 parquet/src/file/properties.rs| 175 +++---
 3 files changed, 132 insertions(+), 69 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs 
b/parquet/src/arrow/arrow_reader/mod.rs
index 793f79272c1..6b95324bee3 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -258,12 +258,12 @@ impl ArrowReaderOptions {
 Self::default()
 }
 
-/// Parquet files generated by some writers may contain embedded arrow
-/// schema and metadata. This may not be correct or compatible with your 
system.
-///
-/// For 
example:[ARROW-16184](https://issues.apache.org/jira/browse/ARROW-16184)
+/// Skip decoding the embedded arrow metadata (defaults to `false`)
 ///
-/// Set `skip_arrow_metadata` to true, to skip decoding this
+/// Parquet files generated by some writers may contain embedded arrow
+/// schema and metadata.
+/// This may not be correct or compatible with your system,
+/// for example: 
[ARROW-16184](https://issues.apache.org/jira/browse/ARROW-16184)
 pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self {
 Self {
 skip_arrow_metadata,
@@ -271,10 +271,12 @@ impl ArrowReaderOptions {
 }
 }
 
-/// Set this true to enable decoding of the [PageIndex] if present. This 
can be used
-/// to push down predicates to the parquet scan, potentially eliminating 
unnecessary IO
+/// Enable decoding of the [`PageIndex`], if present (defaults to `false`)
+///
+/// The `PageIndex` can be used to push down predicates to the parquet 
scan,
+/// potentially eliminating unnecessary IO, by some query engines.
 ///
-/// [PageIndex]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+/// [`PageIndex`]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
 pub fn with_page_index(self, page_index: bool) -> Self {
 Self { page_index, ..self }
 }
diff --git a/parquet/src/arrow/arrow_writer/mod.rs 
b/parquet/src/arrow/arrow_writer/mod.rs
index fd3f9591718..53287dec572 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -341,10 +341,12 @@ impl ArrowWriterOptions {
 Self { properties, ..self }
 }
 
+/// Skip encoding the embedded arrow metadata (defaults to `false`)
+///
 /// Parquet files generated by the [`ArrowWriter`] contain embedded arrow 
schema
 /// by default.
 ///
-/// Set `skip_arrow_metadata` to true, to skip encoding this.
+/// Set `skip_arrow_metadata` to true, to skip encoding the embedded 
metadata.
 pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self {
 Self {
 skip_arrow_metadata,
@@ -352,9 +354,7 @@ impl ArrowWriterOptions {
 }
 }
 
-/// Overrides the name of the root parquet schema element
-///
-/// Defaults to `"arrow_schema"`
+/// Set the name of the root parquet schema element (defaults to 
`"arrow_schema"`)
 pub fn with_schema_root(self, name: String) -> Self {
 Self {
 schema_root: Some(name),
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 59e29440ae0..87d84cef80a 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -91,18 +91,17 @@ pub type WriterPropertiesPtr = Arc;
 
 /// Configuration settings for writing parquet files.
 ///
-/// All properties except the key-value metadata are immutable,
-/// use [`WriterPropertiesBuilder`] to assemble these properties.
+/// Use [`Self::builder`] to create a [`WriterPropertiesBuilder`] to change 
settings.
 ///
 /// # Example
 ///
 /// ```rust
-/// use parquet::{
-/// basic::{Compression, Encoding},
-/// file::properties::*,
-/// schema::types::ColumnPath,
-/// };
-///
+/// # use parquet::{
+/// #

(arrow) branch main updated (832880a6ed -> 383ffe0607)

2024-06-12 Thread jorisvandenbossche
This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 832880a6ed GH-40824: [Java] Adding Spotless to Dataset module (#42062)
 add 383ffe0607 GH-37333: [Python] Replace pandas.util.testing.rands with 
vendored version (#42089)

No new revisions were added by this update.

Summary of changes:
 docs/source/python/benchmarks.rst   | 3 +--
 python/benchmarks/common.py | 1 -
 python/benchmarks/convert_pandas.py | 4 ++--
 python/benchmarks/parquet.py| 2 +-
 4 files changed, 4 insertions(+), 6 deletions(-)



(arrow) branch main updated (23e1f4c2db -> 832880a6ed)

2024-06-12 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 23e1f4c2db GH-42025: [Java] Update Unit Tests for Algorithm Module 
(#42029)
 add 832880a6ed GH-40824: [Java] Adding Spotless to Dataset module (#42062)

No new revisions were added by this update.

Summary of changes:
 java/dataset/pom.xml   |   3 +
 java/dataset/src/main/java/module-info.java|  30 +-
 .../arrow/dataset/file/DatasetFileWriter.java  |  28 +-
 .../org/apache/arrow/dataset/file/FileFormat.java  |   5 +-
 .../dataset/file/FileSystemDatasetFactory.java |  14 +-
 .../org/apache/arrow/dataset/file/JniWrapper.java  |  22 +-
 .../dataset/jni/DirectReservationListener.java |  38 +-
 .../arrow/dataset/jni/JniExceptionDescriber.java   |  12 +-
 .../org/apache/arrow/dataset/jni/JniLoader.java|  29 +-
 .../org/apache/arrow/dataset/jni/JniWrapper.java   |  30 +-
 .../apache/arrow/dataset/jni/NativeContext.java|  13 +-
 .../apache/arrow/dataset/jni/NativeDataset.java|  18 +-
 .../arrow/dataset/jni/NativeDatasetFactory.java|  33 +-
 .../jni/NativeInstanceReleasedException.java   |   5 +-
 .../apache/arrow/dataset/jni/NativeMemoryPool.java |  23 +-
 .../apache/arrow/dataset/jni/NativeScanTask.java   |  12 +-
 .../apache/arrow/dataset/jni/NativeScanner.java|  24 +-
 .../arrow/dataset/jni/ReservationListener.java |   9 +-
 .../arrow/dataset/scanner/ArrowScannerReader.java  |  10 +-
 .../apache/arrow/dataset/scanner/ScanOptions.java  |  46 +-
 .../org/apache/arrow/dataset/scanner/ScanTask.java |  11 +-
 .../org/apache/arrow/dataset/scanner/Scanner.java  |   9 +-
 .../org/apache/arrow/dataset/source/Dataset.java   |   5 +-
 .../arrow/dataset/source/DatasetFactory.java   |  10 +-
 .../dataset/substrait/AceroSubstraitConsumer.java  |  61 +-
 .../apache/arrow/dataset/substrait/JniWrapper.java |  42 +-
 .../org/apache/arrow/dataset/OrcWriteSupport.java  |   8 +-
 .../apache/arrow/dataset/ParquetWriteSupport.java  |  41 +-
 .../org/apache/arrow/dataset/TestAllTypes.java | 140 +++--
 .../java/org/apache/arrow/dataset/TestDataset.java |  34 +-
 .../arrow/dataset/TextBasedWriteSupport.java   |  15 +-
 .../arrow/dataset/file/TestDatasetFileWriter.java  |  76 +--
 .../arrow/dataset/file/TestFileSystemDataset.java  | 339 +++
 .../dataset/file/TestFileSystemDatasetFactory.java |  32 +-
 .../arrow/dataset/jni/TestNativeDataset.java   |   1 -
 .../arrow/dataset/jni/TestReservationListener.java |  81 +--
 .../substrait/TestAceroSubstraitConsumer.java  | 652 +
 37 files changed, 1115 insertions(+), 846 deletions(-)



(arrow-adbc) branch dependabot/go_modules/go/adbc/google.golang.org/grpc-1.64.0 updated (4d639eefe -> 0992913bf)

2024-06-12 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch 
dependabot/go_modules/go/adbc/google.golang.org/grpc-1.64.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


from 4d639eefe --- updated-dependencies: - dependency-name: 
google.golang.org/grpc   dependency-type: direct:production   update-type: 
version-update:semver-minor ...
 add 0992913bf Remove deprecated option

No new revisions were added by this update.

Summary of changes:
 docs/source/driver/flight_sql.rst  |  9 +
 go/adbc/driver/flightsql/flightsql_database.go | 19 ++-
 go/adbc/driver/flightsql/flightsql_driver.go   |  3 ---
 3 files changed, 7 insertions(+), 24 deletions(-)



(arrow) branch main updated: GH-42025: [Java] Update Unit Tests for Algorithm Module (#42029)

2024-06-11 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 23e1f4c2db GH-42025: [Java] Update Unit Tests for Algorithm Module 
(#42029)
23e1f4c2db is described below

commit 23e1f4c2db3501173d1e9b3a77bdc36b4c7b5bf4
Author: Hyunseok Seo 
AuthorDate: Wed Jun 12 14:19:33 2024 +0900

GH-42025: [Java] Update Unit Tests for Algorithm Module (#42029)



### Rationale for this change

Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`).

### What changes are included in this PR?

- [x] Replacing `org.junit` and `junit.framework.TestCase` with 
`org.junit.jupiter.api`.
- [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports.
- [x] Updating annotations such as `@ Before`, `@ After`.
  - `@ Before` -> `@ BeforeEach`
  - `@ After` -> `@ AfterEach`
  - `@ Test` -> `@ Test` with `org.junit.jupiter`
- [x] Updating `Parameterized` test
- [x] Doing self review round 1
- [x] Resolving conflcit
- [x] Doing self review rount 2

### Are these changes tested?

Yes, existing tests have passed.

### Are there any user-facing changes?

No.

* GitHub Issue: #42025

Authored-by: Hyunseok Seo 
Signed-off-by: David Li 
---
 .../deduplicate/TestDeduplicationUtils.java|  12 +-
 .../deduplicate/TestVectorRunDeduplicator.java |  12 +-
 .../TestHashTableBasedDictionaryBuilder.java   |  14 +-
 .../dictionary/TestHashTableDictionaryEncoder.java |  12 +-
 .../dictionary/TestLinearDictionaryEncoder.java|  19 ++-
 .../dictionary/TestSearchDictionaryEncoder.java|  19 ++-
 .../TestSearchTreeBasedDictionaryBuilder.java  |  14 +-
 .../arrow/algorithm/misc/TestPartialSumUtils.java  |  12 +-
 .../arrow/algorithm/rank/TestVectorRank.java   |  12 +-
 .../algorithm/search/TestParallelSearcher.java |  54 +++
 .../algorithm/search/TestVectorRangeSearcher.java  |  51 +++---
 .../arrow/algorithm/search/TestVectorSearcher.java |  12 +-
 .../sort/TestCompositeVectorComparator.java|  10 +-
 .../sort/TestDefaultVectorComparator.java  |  17 +-
 .../sort/TestFixedWidthInPlaceVectorSorter.java|  37 +++--
 .../sort/TestFixedWidthOutOfPlaceVectorSorter.java | 178 +++--
 .../algorithm/sort/TestFixedWidthSorting.java  | 168 +--
 .../sort/TestGeneralOutOfPlaceVectorSorter.java|  12 +-
 .../arrow/algorithm/sort/TestIndexSorter.java  |  14 +-
 .../arrow/algorithm/sort/TestInsertionSorter.java  |  12 +-
 .../arrow/algorithm/sort/TestOffHeapIntStack.java  |  14 +-
 .../algorithm/sort/TestOutOfPlaceVectorSorter.java |  24 +--
 .../algorithm/sort/TestStableVectorComparator.java |  15 +-
 .../TestVariableWidthOutOfPlaceVectorSorter.java   |  35 ++--
 .../algorithm/sort/TestVariableWidthSorting.java   |  67 +++-
 25 files changed, 396 insertions(+), 450 deletions(-)

diff --git 
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
 
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
index 537189013a..49a719bafa 100644
--- 
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
+++ 
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
@@ -16,8 +16,8 @@
  */
 package org.apache.arrow.algorithm.deduplicate;
 
-import static org.junit.Assert.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.nio.charset.StandardCharsets;
 import org.apache.arrow.memory.ArrowBuf;
@@ -27,9 +27,9 @@ import org.apache.arrow.vector.BitVectorHelper;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 /** Test cases for {@link DeduplicationUtils}. */
 public class TestDeduplicationUtils {
@@ -40,12 +40,12 @@ public class TestDeduplicationUtils {
 
   private BufferAllocator allocator;
 
-  @Before
+  @BeforeEach
   public void prepare() {
 allocator = new RootAllocator(1024 * 1024);
   }
 
-  @After
+  @AfterEach
   public void shutdown() {
 allocator.close();
   }
diff --git 
a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
 
b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
index 820cadccae..7fd816ee9f 100644
--- 

(arrow) branch main updated (fd11b7a042 -> 17d0685cc3)

2024-06-11 Thread brycemecum
This is an automated email from the ASF dual-hosted git repository.

brycemecum pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from fd11b7a042 GH-23576: [JS] Support DictionaryMessage replacement 
(#41965)
 add 17d0685cc3 GH-41791: [CI][Conda] Update azure.linux.yml task, replace 
CondaEnvironment@1 with Bash@3 (#41883)

No new revisions were added by this update.

Summary of changes:
 dev/tasks/conda-recipes/azure.linux.yml |  6 --
 dev/tasks/macros.jinja  | 12 
 2 files changed, 4 insertions(+), 14 deletions(-)



(arrow-nanoarrow) branch main updated: Update dist/ for commit 3540b8578bd78fc77787babe06b9931e2634d600

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 2fd50f7c Update dist/ for commit 
3540b8578bd78fc77787babe06b9931e2634d600
2fd50f7c is described below

commit 2fd50f7c3a87eddf951a3904e442475665bed414
Author: GitHub Actions 
AuthorDate: Wed Jun 12 01:31:04 2024 +

Update dist/ for commit 3540b8578bd78fc77787babe06b9931e2634d600
---
 dist/nanoarrow.c   | 38 +-
 dist/nanoarrow.hpp |  2 +-
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index c4897cfa..0c5e2492 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -3163,6 +3163,7 @@ static int ArrowArrayViewValidateDefault(struct 
ArrowArrayView* array_view,
 
 case NANOARROW_TYPE_RUN_END_ENCODED: {
   struct ArrowArrayView* run_ends_view = array_view->children[0];
+  if (run_ends_view->length == 0) break;
   int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
   if (last_run_end < 1) {
 ArrowErrorSet(error,
@@ -3342,25 +3343,28 @@ static int ArrowArrayViewValidateFull(struct 
ArrowArrayView* array_view,
 
   if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) {
 struct ArrowArrayView* run_ends_view = array_view->children[0];
-int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
-for (int64_t i = 1; i < run_ends_view->length; i++) {
-  const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
-  if (run_end <= last_run_end) {
-ArrowErrorSet(error,
-  "Every run end must be strictly greater than the 
previous run end, "
-  "but run_ends[%ld] is %ld and run_ends[%ld] is %ld",
-  (long)i, (long)run_end, (long)i - 1, (long)last_run_end);
+if (run_ends_view->length > 0) {
+  int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+  for (int64_t i = 1; i < run_ends_view->length; i++) {
+const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
+if (run_end <= last_run_end) {
+  ArrowErrorSet(
+  error,
+  "Every run end must be strictly greater than the previous run 
end, "
+  "but run_ends[%ld] is %ld and run_ends[%ld] is %ld",
+  (long)i, (long)run_end, (long)i - 1, (long)last_run_end);
+  return EINVAL;
+}
+last_run_end = run_end;
+  }
+  last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 
run_ends_view->length - 1);
+  if (last_run_end < (array_view->offset + array_view->length)) {
+ArrowErrorSet(
+error, "Last run end is %ld but it should >= %ld (offset: %ld, 
length: %ld)",
+(long)last_run_end, (long)(array_view->offset + 
array_view->length),
+(long)array_view->offset, (long)array_view->length);
 return EINVAL;
   }
-  last_run_end = run_end;
-}
-last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 
run_ends_view->length - 1);
-if (last_run_end < (array_view->offset + array_view->length)) {
-  ArrowErrorSet(error,
-"Last run end is %ld but it should >= %ld (offset: %ld, 
length: %ld)",
-(long)last_run_end, (long)(array_view->offset + 
array_view->length),
-(long)array_view->offset, (long)array_view->length);
-  return EINVAL;
 }
   }
 
diff --git a/dist/nanoarrow.hpp b/dist/nanoarrow.hpp
index 0de2371b..5f8aabba 100644
--- a/dist/nanoarrow.hpp
+++ b/dist/nanoarrow.hpp
@@ -864,8 +864,8 @@ class ViewArrayStream {
   };
 
   internal::InputRange range_;
-  ArrowError* error_;
   ArrowErrorCode* code_;
+  ArrowError* error_;
   ArrowError internal_error_ = {};
   ArrowErrorCode internal_code_;
   bool code_was_accessed_ = false;



(arrow) branch main updated (65974672a3 -> fd11b7a042)

2024-06-11 Thread ptaylor
This is an automated email from the ASF dual-hosted git repository.

ptaylor pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 65974672a3 GH-42100: [C++][Parquet] ParquetFilePrinter::JSONPrint 
print length of FLBA (#41981)
 add fd11b7a042 GH-23576: [JS] Support DictionaryMessage replacement 
(#41965)

No new revisions were added by this update.

Summary of changes:
 js/gulp/argv.js|  1 +
 js/gulp/test-task.js   | 17 ++-
 js/src/ipc/reader.ts   | 13 +++---
 js/src/ipc/writer.ts   | 44 +++---
 js/test/unit/ipc/message-reader-tests.ts   | 15 ---
 js/test/unit/ipc/writer/file-writer-tests.ts   | 62 +-
 js/test/unit/ipc/writer/stream-writer-tests.ts | 43 --
 7 files changed, 149 insertions(+), 46 deletions(-)



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 99ebed0d update documentation for tag main
99ebed0d is described below

commit 99ebed0dd8e24d2e0e41df67cded14ec28adec68
Author: GitHub Actions 
AuthorDate: Tue Jun 11 21:39:23 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 11bfc8de..2bb81267 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-11T20:26Z
+last_built: 2024-06-11T21:39Z
 



(arrow-nanoarrow) branch main updated: chore: Fix compiler warnings on Windows (#521)

2024-06-11 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 3540b857 chore: Fix compiler warnings on Windows (#521)
3540b857 is described below

commit 3540b8578bd78fc77787babe06b9931e2634d600
Author: Dewey Dunnington 
AuthorDate: Tue Jun 11 21:37:08 2024 +

chore: Fix compiler warnings on Windows (#521)

Some minor changes to correct usage that resulted in compiler warnings
when opening this up and doing a naive build in VSCode on Windows.
---
 src/nanoarrow/array_test.cc | 43 +--
 src/nanoarrow/utils_test.cc |  6 +++---
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index e6b0364f..90f29e8c 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -19,7 +19,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -821,7 +820,7 @@ TEST(ArrayTest, ArrayTestAppendToHalfFloatArray) {
   EXPECT_EQ(data_buffer[1], 0);
   EXPECT_EQ(data_buffer[2], 0);
   EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[3]), 3.0);
-  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[4]), 3.138672);
+  EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[4]), 
static_cast(3.138672));
   EXPECT_FLOAT_EQ(ArrowHalfFloatToFloat(data_buffer[5]),
   std::numeric_limits::max());
   EXPECT_TRUE(std::isnan(ArrowHalfFloatToFloat(data_buffer[6])));
@@ -2585,6 +2584,20 @@ TEST(ArrayTest, ArrayViewTestSparseUnionGet) {
   ArrowArrayRelease();
 }
 
+// In Arrow C++, HalfFloatType::ctype gives uint16_t; however, this is not
+// the "value type" that would correspond to what 
ArrowArrayViewGetDoubleUnsafe()
+// or ArrowArrayAppendDouble() do since they operate on the logical/represented
+// value.
+template 
+BuilderValueT logical_value_to_builder_value(int64_t value) {
+  return static_cast(value);
+}
+
+template <>
+uint16_t logical_value_to_builder_value(int64_t 
value) {
+  return ArrowFloatToHalfFloat(static_cast(value));
+}
+
 template 
 void TestGetFromNumericArrayView() {
   struct ArrowArray array;
@@ -2593,19 +2606,16 @@ void TestGetFromNumericArrayView() {
   struct ArrowError error;
 
   auto type = TypeTraits::type_singleton();
+  using value_type = typename TypeClass::c_type;
 
   // Array with nulls
   auto builder = NumericBuilder();
 
-  if (type->id() == Type::HALF_FLOAT) {
-ARROW_EXPECT_OK(builder.Append(ArrowFloatToHalfFloat(1)));
-ARROW_EXPECT_OK(builder.AppendNulls(2));
-ARROW_EXPECT_OK(builder.Append(ArrowFloatToHalfFloat(4)));
-  } else {
-ARROW_EXPECT_OK(builder.Append(1));
-ARROW_EXPECT_OK(builder.AppendNulls(2));
-ARROW_EXPECT_OK(builder.Append(4));
-  }
+  ARROW_EXPECT_OK(
+  builder.Append(logical_value_to_builder_value(1)));
+  ARROW_EXPECT_OK(builder.AppendNulls(2));
+  ARROW_EXPECT_OK(
+  builder.Append(logical_value_to_builder_value(4)));
 
   auto maybe_arrow_array = builder.Finish();
   ARROW_EXPECT_OK(maybe_arrow_array);
@@ -2638,13 +2648,10 @@ void TestGetFromNumericArrayView() {
   // Array without nulls (Arrow does not allocate the validity buffer)
   builder = NumericBuilder();
 
-  if (type->id() == Type::HALF_FLOAT) {
-ARROW_EXPECT_OK(builder.Append(ArrowFloatToHalfFloat(1)));
-ARROW_EXPECT_OK(builder.Append(ArrowFloatToHalfFloat(2)));
-  } else {
-ARROW_EXPECT_OK(builder.Append(1));
-ARROW_EXPECT_OK(builder.Append(2));
-  }
+  ARROW_EXPECT_OK(
+  builder.Append(logical_value_to_builder_value(1)));
+  ARROW_EXPECT_OK(
+  builder.Append(logical_value_to_builder_value(2)));
 
   maybe_arrow_array = builder.Finish();
   ARROW_EXPECT_OK(maybe_arrow_array);
diff --git a/src/nanoarrow/utils_test.cc b/src/nanoarrow/utils_test.cc
index 24892a6e..3a63df4d 100644
--- a/src/nanoarrow/utils_test.cc
+++ b/src/nanoarrow/utils_test.cc
@@ -618,7 +618,7 @@ TEST(MaybeTest, ConstructionAndConversion) {
 }
 
 TEST(RandomAccessRangeTest, ConstructionAndPrinting) {
-  auto square = [](int i) { return i * i; };
+  auto square = [](int64_t i) { return i * i; };
 
   // the range is usable as a constant
   const nanoarrow::internal::RandomAccessRange 
squares{square, 4};
@@ -628,8 +628,8 @@ TEST(RandomAccessRangeTest, ConstructionAndPrinting) {
 
   // since the range is usable as a constant, we can iterate through it 
multiple times and
   // it will work
-  int sum = 0;
-  for (int i : squares) {
+  int64_t sum = 0;
+  for (int64_t i : squares) {
 sum += i;
   }
   EXPECT_EQ(sum, 1 + 4 + 9);



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 8df664e2 update documentation for tag main
8df664e2 is described below

commit 8df664e2d52b326fc76b2ba972d9dcea4cfa3faa
Author: GitHub Actions 
AuthorDate: Tue Jun 11 20:26:17 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index e123798b..11bfc8de 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-11T16:40Z
+last_built: 2024-06-11T20:26Z
 



(arrow-nanoarrow) branch main updated: feat: Meson build system for nanoarrow-ipc extension (#483)

2024-06-11 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 41e9e718 feat: Meson build system for nanoarrow-ipc extension (#483)
41e9e718 is described below

commit 41e9e71838669964f62d17d8547fa9bcc1c3a5af
Author: William Ayd 
AuthorDate: Tue Jun 11 16:23:58 2024 -0400

feat: Meson build system for nanoarrow-ipc extension (#483)
---
 .gitignore |  1 +
 ci/scripts/build-with-meson.sh |  4 +-
 dev/release/rat_exclude_files.txt  |  3 --
 meson.build| 11 +
 meson.options  |  2 +
 src/apps/dump_stream.c |  2 +-
 src/nanoarrow/meson.build  | 52 ++
 src/nanoarrow/nanoarrow.hpp|  2 +-
 src/nanoarrow/nanoarrow_ipc_decoder_test.cc| 11 +++--
 src/nanoarrow/nanoarrow_testing_test.cc|  2 +-
 .gitignore => subprojects/flatcc.wrap  | 25 +++
 subprojects/google-benchmark.wrap  | 17 +++
 subprojects/gtest.wrap | 17 +++
 subprojects/nlohmann_json.wrap | 17 +++
 .../packagefiles/flatcc/meson.build| 38 +---
 .gitignore => subprojects/zlib.wrap| 30 +
 16 files changed, 166 insertions(+), 68 deletions(-)

diff --git a/.gitignore b/.gitignore
index 79709860..89fb9fe2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,7 @@ __pycache__
 # meson subprojects - wrap files need to be kept to let meson download
 # dependencies as needed, but dependencies themselves should not be versioned
 subprojects/*
+!subprojects/packagefiles
 !subprojects/*.wrap
 
 compile_commands.json
diff --git a/ci/scripts/build-with-meson.sh b/ci/scripts/build-with-meson.sh
index 41592121..cd76c6db 100755
--- a/ci/scripts/build-with-meson.sh
+++ b/ci/scripts/build-with-meson.sh
@@ -68,12 +68,12 @@ function main() {
 show_header "Run test suite"
 meson configure -Dtests=true -Db_coverage=true
 meson compile
-meson test --wrap valgrind
+meson test --wrap='valgrind --track-origins=yes --leak-check=full' 
--print-errorlogs
 
 show_header "Run benchmarks"
 meson configure -Dbenchmarks=true
 meson compile
-meson test --benchmark
+meson test --benchmark --print-errorlogs
 
 show_header "Generate coverage reports"
 ninja coverage
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 26a8b0f6..6372a56a 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -15,6 +15,3 @@ dist/flatcc.c
 src/nanoarrow/nanoarrow_ipc_flatcc_generated.h
 thirdparty/*
 python/src/nanoarrow/dlpack_abi.h
-subprojects/google-benchmark.wrap
-subprojects/gtest.wrap
-subprojects/nlohmann_json.wrap
diff --git a/meson.build b/meson.build
index c56bf4bb..bd918d01 100644
--- a/meson.build
+++ b/meson.build
@@ -34,3 +34,14 @@ subdir('src/nanoarrow')
 if get_option('benchmarks')
   subdir('dev/benchmarks')
 endif
+
+
+if get_option('apps')
+  if get_option('ipc')
+  executable(
+  'dump_stream',
+  'src/apps/dump_stream.c',
+  dependencies: [nanoarrow_dep, nanoarrow_ipc_dep]
+  )
+  endif
+endif
diff --git a/meson.options b/meson.options
index 73cbccd0..b9150982 100644
--- a/meson.options
+++ b/meson.options
@@ -17,6 +17,8 @@
 
 option('tests', type: 'boolean', description: 'Build tests', value: false)
 option('benchmarks', type: 'boolean', description: 'Build benchmarks', value: 
false)
+option('apps', type: 'boolean', description: 'Build utility applications', 
value: false)
+option('ipc', type: 'boolean', description: 'Build IPC libraries', value: 
false)
 option('integration_tests', type: 'boolean',
description: 'Build cross-implementation Arrow integration tests',
value: false)
diff --git a/src/apps/dump_stream.c b/src/apps/dump_stream.c
index f26e566f..ba1baec4 100644
--- a/src/apps/dump_stream.c
+++ b/src/apps/dump_stream.c
@@ -23,7 +23,7 @@
 
 void dump_schema_to_stdout(struct ArrowSchema* schema, int level, char* buf,
int buf_size) {
-  int n_chars = ArrowSchemaToString(schema, buf, buf_size, 0);
+  ArrowSchemaToString(schema, buf, buf_size, 0);
 
   for (int i = 0; i < level; i++) {
 fprintf(stdout, "  ");
diff --git a/src/nanoarrow/meson.build b/src/nanoarrow/meson.build
index f6f25716..ccf59c99 100644
--- a/src/nanoarrow/meson.build
+++ b/src/nanoarrow/meson.build
@@ -64,6 +64,26 @@ incdir = include_directories('..')
 nanoarrow_dep = declare_dependency(include_directories: [curdir, incdir],
link_with: nanoarrow_lib)
 
+if get_option('ipc')
+cmake = 

(arrow-rs) branch asf-site updated: deploy: a20116ec36f8c0c959aa9e6c547dc7e5625ebb1b

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 3f0aee23437 deploy: a20116ec36f8c0c959aa9e6c547dc7e5625ebb1b
3f0aee23437 is described below

commit 3f0aee234371c0d1746fdb54deea50f0860071d7
Author: alamb 
AuthorDate: Tue Jun 11 18:50:23 2024 +

deploy: a20116ec36f8c0c959aa9e6c547dc7e5625ebb1b
---
 index.html |  2 +-
 .../selection/fn.intersect_row_selections.html |  2 +-
 parquet/arrow/arrow_reader/selection/index.html|  2 +-
 .../selection/struct.RowSelection.html | 37 +-
 .../arrow_reader/selection/struct.RowSelector.html |  2 +-
 .../arrow/arrow_reader/struct.RowSelection.html| 37 +-
 parquet/arrow/arrow_reader/struct.RowSelector.html |  2 +-
 src/parquet/arrow/arrow_reader/selection.rs.html   | 32 +--
 8 files changed, 78 insertions(+), 38 deletions(-)

diff --git a/index.html b/index.html
index 5affe225a19..775dc9a163e 100644
--- a/index.html
+++ b/index.html
@@ -1 +1 @@
-Index of 

(arrow-rs) branch master updated: Make RowSelection's from_consecutive_ranges public (#5848)

2024-06-11 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new a20116ec36f Make RowSelection's from_consecutive_ranges public (#5848)
a20116ec36f is described below

commit a20116ec36f8c0c959aa9e6c547dc7e5625ebb1b
Author: advancedxy 
AuthorDate: Wed Jun 12 03:47:56 2024 +0900

Make RowSelection's from_consecutive_ranges public (#5848)

* Make RowSelection's from_consecutive_ranges public

This constructor method should be easier to use.

* Address reviewers' comments

-

Co-authored-by: Andrew Lamb 
---
 parquet/src/arrow/arrow_reader/selection.rs | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/selection.rs 
b/parquet/src/arrow/arrow_reader/selection.rs
index 82f21461290..0287e5b4215 100644
--- a/parquet/src/arrow/arrow_reader/selection.rs
+++ b/parquet/src/arrow/arrow_reader/selection.rs
@@ -81,6 +81,13 @@ impl RowSelector {
 ///
 /// let actual: Vec = selection.into();
 /// assert_eq!(actual, expected);
+///
+/// // you can also create a selection from consecutive ranges
+/// let ranges = vec![5..10, 10..15];
+/// let selection =
+///   RowSelection::from_consecutive_ranges(ranges.into_iter(), 20);
+/// let actual: Vec = selection.into();
+/// assert_eq!(actual, expected);
 /// ```
 ///
 /// A [`RowSelection`] maintains the following invariants:
@@ -115,7 +122,7 @@ impl RowSelection {
 }
 
 /// Creates a [`RowSelection`] from an iterator of consecutive ranges to 
keep
-pub(crate) fn from_consecutive_ranges>>(
+pub fn from_consecutive_ranges>>(
 ranges: I,
 total_rows: usize,
 ) -> Self {
@@ -1136,7 +1143,7 @@ mod tests {
 }
 
 #[test]
-fn test_empty_ranges() {
+fn test_from_ranges() {
 let ranges = [1..3, 4..6, 6..6, 8..8, 9..10];
 let selection = 
RowSelection::from_consecutive_ranges(ranges.into_iter(), 10);
 assert_eq!(
@@ -1149,7 +1156,13 @@ mod tests {
 RowSelector::skip(3),
 RowSelector::select(1)
 ]
-)
+);
+
+let out_of_order_ranges = [1..3, 8..10, 4..7];
+let result = std::panic::catch_unwind(|| {
+
RowSelection::from_consecutive_ranges(out_of_order_ranges.into_iter(), 10)
+});
+assert!(result.is_err());
 }
 
 #[test]



(arrow-rs) branch master updated: Minor: refine row selection example more (#5850)

2024-06-11 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new b8c2741491b Minor: refine row selection example more (#5850)
b8c2741491b is described below

commit b8c2741491ba5c3464010e023d390a34d037cd2d
Author: Andrew Lamb 
AuthorDate: Tue Jun 11 14:28:41 2024 -0400

Minor: refine row selection example more (#5850)
---
 parquet/src/arrow/arrow_reader/mod.rs | 29 -
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs 
b/parquet/src/arrow/arrow_reader/mod.rs
index bbb1c079b16..793f79272c1 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -155,24 +155,43 @@ impl ArrowReaderBuilder {
 ///
 /// # Example
 ///
-/// Given a parquet file with 3 row groups, and a row group filter of
-/// `[0, 2]`, in order to only scan rows 50-100 in row group 2:
+/// Given a parquet file with 4 row groups, and a row group filter of `[0,
+/// 2, 3]`, in order to scan rows 50-100 in row group 2 and rows 200-300 in
+/// row group 3:
 ///
 /// ```text
 ///   Row Group 0, 1000 rows (selected)
 ///   Row Group 1, 1000 rows (skipped)
 ///   Row Group 2, 1000 rows (selected, but want to only scan rows 50-100)
+///   Row Group 3, 1000 rows (selected, but want to only scan rows 200-300)
 /// ```
 ///
-/// You would pass the following [`RowSelection`]:
+/// You could pass the following [`RowSelection`]:
 ///
 /// ```text
 ///  Select 1000(scan all rows in row group 0)
-///  Select 50-100 (scan rows 50-100 in row group 2)
+///  Skip 50(skip the first 50 rows in row group 2)
+///  Select 50  (scan rows 50-100 in row group 2)
+///  Skip 900   (skip the remaining rows in row group 2)
+///  Skip 200   (skip the first 200 rows in row group 3)
+///  Select 100 (scan rows 200-300 in row group 3)
+///  Skip 700   (skip the remaining rows in row group 3)
 /// ```
-///
 /// Note there is no entry for the (entirely) skipped row group 1.
 ///
+/// Note you can represent the same selection with fewer entries. Instead 
of
+///
+/// ```text
+///  Skip 900   (skip the remaining rows in row group 2)
+///  Skip 200   (skip the first 200 rows in row group 3)
+/// ```
+///
+/// you could use
+///
+/// ```text
+/// Skip 1100  (skip the remaining 900 rows in row group 2 and the 
first 200 rows in row group 3)
+/// ```
+///
 /// [`Index`]: crate::file::page_index::index::Index
 pub fn with_row_selection(self, selection: RowSelection) -> Self {
 Self {



(arrow) branch main updated (64b110942d -> 65974672a3)

2024-06-11 Thread maplefu
This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 64b110942d GH-41899: [C++]  IPC: Minor enhance the code of writer 
(#41900)
 add 65974672a3 GH-42100: [C++][Parquet] ParquetFilePrinter::JSONPrint 
print length of FLBA (#41981)

No new revisions were added by this update.

Summary of changes:
 cpp/src/parquet/printer.cc |  3 ++-
 cpp/src/parquet/reader_test.cc | 47 ++
 2 files changed, 40 insertions(+), 10 deletions(-)



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 12883bda update documentation for tag main
12883bda is described below

commit 12883bda08e8553630bb3a3590bb69e325a571b6
Author: GitHub Actions 
AuthorDate: Tue Jun 11 16:40:15 2024 +

update documentation for tag main
---
 main/_sources/reference/device.rst.txt |  6 +++---
 main/_sources/reference/ipc.rst.txt|  4 ++--
 main/r/pkgdown.yml |  2 +-
 main/reference/device.html | 16 
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/main/_sources/reference/device.rst.txt 
b/main/_sources/reference/device.rst.txt
index 8313d5fc..56cc5428 100644
--- a/main/_sources/reference/device.rst.txt
+++ b/main/_sources/reference/device.rst.txt
@@ -22,20 +22,20 @@ C API
 
 
 .. doxygengroup:: nanoarrow_device
-   :project: nanoarrow_device
+   :project: nanoarrow_c
:members:
 
 C++ Helpers
 
 
 .. doxygengroup:: nanoarrow_device_hpp-unique
-   :project: nanoarrow_device
+   :project: nanoarrow_c
:members:
 
 Arrow C Device Interface
 
 
 .. doxygengroup:: nanoarrow_device-arrow-cdata
-   :project: nanoarrow_device
+   :project: nanoarrow_c
:members:
:undoc-members:
diff --git a/main/_sources/reference/ipc.rst.txt 
b/main/_sources/reference/ipc.rst.txt
index fb009c1a..7d3ae1e1 100644
--- a/main/_sources/reference/ipc.rst.txt
+++ b/main/_sources/reference/ipc.rst.txt
@@ -22,12 +22,12 @@ C API
 
 
 .. doxygengroup:: nanoarrow_ipc
-   :project: nanoarrow_ipc
+   :project: nanoarrow_c
:members:
 
 C++ Helpers
 
 
 .. doxygengroup:: nanoarrow_ipc_hpp-unique
-   :project: nanoarrow_ipc
+   :project: nanoarrow_c
:members:
diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index b3059f7a..e123798b 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-11T13:25Z
+last_built: 2024-06-11T16:40Z
 
diff --git a/main/reference/device.html b/main/reference/device.html
index cf3d8120..d74e2368 100644
--- a/main/reference/device.html
+++ b/main/reference/device.html
@@ -422,7 +422,7 @@ document.write(`
 
 ArrowDeviceArray. 
-Given an ArrowArray whose buffers/release callback has been set 
appropriately, initialize an ArrowDeviceArray. 
+Given an ArrowArray whose buffers/release callback has been set 
appropriately, initialize an ArrowDeviceArray. 
 
 
 
@@ -435,20 +435,20 @@ document.write(`
 
 
 void ArrowDeviceArrayViewResetRelease the underlying ArrowArrayView. 
+Release the underlying ArrowArrayView. 
 
 
 
 
 ArrowArrayView buffer information from a device array. 
+A thin wrapper around ArrowArrayViewSetArrayMinimal() that does not 
attempt to resolve buffer sizes of variable-length buffers by copying data from 
the device. 
 
 
 
 
 ArrowArrayView buffer information from a device array. 
 Runs ArrowDeviceArrayViewSetArrayMinimal() but also 
sets buffer sizes for variable-length buffers by copying data from the device. 
This function will block on the device_array’s sync_event. 
 
 
@@ -502,8 +502,8 @@ document.write(`
 
 
 ArrowDeviceArrayStream from an existing ArrowArrayStream. 

-Wrap an ArrowArrayStream of ArrowDeviceArray objects already allocated by the specified 
device as an ArrowDeviceArrayStream. This function moves the ownership 
of array_stream to the device_array_stream. If this function returns 
NANOARROW_OK, the caller is responsible for releasin [...]
+Initialize an ArrowDeviceArrayStream from an existing ArrowArrayStream. 
+Wrap an ArrowArrayStream of ArrowDeviceArray objects already allocated by the specified 
device as an ArrowDeviceArrayStream. This function moves the ownership 
of array_strea [...]
 
 
 
@@ -531,7 +531,7 @@ document.write(`
 
 
 ArrowErrorCode (*ArrowDeviceArray from a previously allocated ArrowArray. 

+Initialize an ArrowDeviceArray from a previously allocated ArrowArray. 
 Given a device and an uninitialized device_array, populate the fields of 
the device_array (including sync_event) appropriately. If NANOARROW_OK is 
returned, ownership of array is transferred to device_array. This function must 
allocate the appropriate sync_event and make its address available as 
device_array-sync_event (if sync_event applies to this device type). 
 
 



(arrow-nanoarrow) branch main updated: refactor: Consolidate device extension into main project (#517)

2024-06-11 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new e92c364f refactor: Consolidate device extension into main project 
(#517)
e92c364f is described below

commit e92c364f65f9d6fb029d918b9ff31a8a39b3a1df
Author: William Ayd 
AuthorDate: Tue Jun 11 12:37:52 2024 -0400

refactor: Consolidate device extension into main project (#517)
---
 .github/workflows/build-and-test-device.yaml   |  33 +-
 .github/workflows/bundle.yaml  |   6 +-
 CMakeLists.txt | 166 -
 ci/scripts/build-docs.sh   |   5 -
 ci/scripts/coverage.sh |  17 +-
 docs/source/conf.py|   2 -
 docs/source/reference/device.rst   |   6 +-
 docs/source/reference/ipc.rst  |   4 +-
 extensions/nanoarrow_device/.gitignore |  18 -
 extensions/nanoarrow_device/CMakeLists.txt | 226 
 extensions/nanoarrow_device/CMakePresets.json  |  26 --
 .../nanoarrow_device/CMakeUserPresets.json.example |  29 --
 extensions/nanoarrow_device/README.md  |  68 
 extensions/nanoarrow_device/src/apidoc/.gitignore  |  18 -
 extensions/nanoarrow_device/src/apidoc/Doxyfile| 407 -
 python/bootstrap.py|  16 +-
 .../src => src}/nanoarrow/nanoarrow_device.c   |   0
 .../src => src}/nanoarrow/nanoarrow_device.h   |   0
 .../src => src}/nanoarrow/nanoarrow_device.hpp |   0
 .../src => src}/nanoarrow/nanoarrow_device_cuda.c  |   0
 .../src => src}/nanoarrow/nanoarrow_device_cuda.h  |   0
 .../nanoarrow/nanoarrow_device_cuda_test.cc|   0
 .../nanoarrow/nanoarrow_device_hpp_test.cc |   0
 .../nanoarrow/nanoarrow_device_metal.cc|   0
 .../src => src}/nanoarrow/nanoarrow_device_metal.h |   0
 .../nanoarrow/nanoarrow_device_metal_test.cc   |   0
 .../src => src}/nanoarrow/nanoarrow_device_test.cc |   0
 27 files changed, 200 insertions(+), 847 deletions(-)

diff --git a/.github/workflows/build-and-test-device.yaml 
b/.github/workflows/build-and-test-device.yaml
index 47345289..d1247c30 100644
--- a/.github/workflows/build-and-test-device.yaml
+++ b/.github/workflows/build-and-test-device.yaml
@@ -28,7 +28,6 @@ on:
   - 'CMakeLists.txt'
   - '.github/workflows/build-and-test-device.yaml'
   - 'src/nanoarrow/**'
-  - 'extensions/nanoarrow_device/**'
 
 permissions:
   contents: read
@@ -46,28 +45,52 @@ jobs:
 config:
   - {label: default-build}
   - {label: namespaced-build, cmake_args: 
"-DNANOARROW_NAMESPACE=SomeUserNamespace"}
-  - {label: bundled-build, cmake_args: "-DNANOARROW_DEVICE_BUNDLE=ON"}
+  - {label: bundled-build, cmake_args: "-DNANOARROW_BUNDLE=ON"}
 
 env:
-  SUBDIR: 'extensions/nanoarrow_device'
+  SUBDIR: '${{ github.workspace }}'
   NANOARROW_ARROW_TESTING_DIR: '${{ github.workspace }}/arrow-testing'
 
 steps:
   - uses: actions/checkout@v4
 
+  - name: Checkout arrow-testing
+uses: actions/checkout@v4
+with:
+  repository: apache/arrow-testing
+  path: arrow-testing
+
   - name: Install memcheck dependencies
 if: matrix.config.label == 'default-build'
 run: |
   sudo apt-get update && sudo apt-get install -y valgrind
 
+  - name: Cache Arrow C++ Build
+id: cache-arrow-build
+uses: actions/cache@v4
+with:
+  path: arrow
+  # Bump the number at the end of this line to force a new Arrow C++ 
build
+  key: arrow-${{ runner.os }}-${{ runner.arch }}-1
+
+  - name: Build Arrow C++
+if: steps.cache-arrow-build.outputs.cache-hit != 'true'
+shell: bash
+run: |
+  ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
+
   - name: Build
 run: |
+  ARROW_PATH="$(pwd)/arrow"
   cd $SUBDIR
   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
   sudo ldconfig
   mkdir build
   cd build
-  cmake .. -DCMAKE_BUILD_TYPE=Debug -DNANOARROW_DEVICE_BUILD_TESTS=ON 
${{ matrix.config.cmake_args }}
+  cmake .. -DCMAKE_BUILD_TYPE=Debug -DNANOARROW_DEVICE=ON \
+-DNANOARROW_BUILD_TESTS=ON -DCMAKE_PREFIX_PATH="${ARROW_PATH}" \
+${{ matrix.config.cmake_args }}
+
   cmake --build .
 
   - name: Check for non-namespaced symbols in namespaced build
@@ -112,4 +135,4 @@ jobs:
 uses: actions/upload-artifact@main
 with:
   name: nanoarrow-device-memcheck
-  path: 
extensions/nanoarrow_device/build/Testing/Temporary/MemoryChecker.*.log
+  path: build/Testing/Temporary/MemoryChecker.*.log
diff 

(arrow) branch main updated: GH-41899: [C++] IPC: Minor enhance the code of writer (#41900)

2024-06-11 Thread maplefu
This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 64b110942d GH-41899: [C++]  IPC: Minor enhance the code of writer 
(#41900)
64b110942d is described below

commit 64b110942d0c88bd7e16f72cddd060d2bcff4f3f
Author: mwish 
AuthorDate: Wed Jun 12 00:33:11 2024 +0800

GH-41899: [C++]  IPC: Minor enhance the code of writer (#41900)



### Rationale for this change

Enhance the code of IPC writer

### What changes are included in this PR?

1. memcpy rather than reinterpret_cast
2. move the buffer rather than copying

### Are these changes tested?

Covered by existing

### Are there any user-facing changes?

No

* GitHub Issue: #41899

Authored-by: mwish 
Signed-off-by: mwish 
---
 cpp/src/arrow/ipc/writer.cc | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index e3dd36efe0..549fb34a2e 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -164,7 +164,7 @@ class RecordBatchSerializer {
 std::shared_ptr bitmap;
 RETURN_NOT_OK(GetTruncatedBitmap(arr.offset(), arr.length(), 
arr.null_bitmap(),
  options_.memory_pool, ));
-out_->body_buffers.emplace_back(bitmap);
+out_->body_buffers.emplace_back(std::move(bitmap));
   } else {
 // Push a dummy zero-length buffer, not to be copied
 out_->body_buffers.emplace_back(kNullBuffer);
@@ -222,8 +222,9 @@ class RecordBatchSerializer {
   RETURN_NOT_OK(
   result->Resize(actual_length + sizeof(int64_t), /* shrink_to_fit= */ 
true));
 }
-*reinterpret_cast(result->mutable_data()) =
-bit_util::ToLittleEndian(prefixed_length);
+int64_t prefixed_length_little_endian = 
bit_util::ToLittleEndian(prefixed_length);
+util::SafeStore(result->mutable_data(), prefixed_length_little_endian);
+
 *out = SliceBuffer(std::move(result), /*offset=*/0, actual_length + 
sizeof(int64_t));
 
 return Status::OK();
@@ -415,7 +416,7 @@ class RecordBatchSerializer {
 std::shared_ptr data;
 RETURN_NOT_OK(GetTruncatedBitmap(array.offset(), array.length(), 
array.values(),
  options_.memory_pool, ));
-out_->body_buffers.emplace_back(data);
+out_->body_buffers.emplace_back(std::move(data));
 return Status::OK();
   }
 
@@ -442,7 +443,7 @@ class RecordBatchSerializer {
data->size() - byte_offset);
   data = SliceBuffer(data, byte_offset, buffer_length);
 }
-out_->body_buffers.emplace_back(data);
+out_->body_buffers.emplace_back(std::move(data));
 return Status::OK();
   }
 
@@ -464,8 +465,8 @@ class RecordBatchSerializer {
   data = SliceBuffer(data, start_offset, slice_length);
 }
 
-out_->body_buffers.emplace_back(value_offsets);
-out_->body_buffers.emplace_back(data);
+out_->body_buffers.emplace_back(std::move(value_offsets));
+out_->body_buffers.emplace_back(std::move(data));
 return Status::OK();
   }
 
@@ -566,7 +567,7 @@ class RecordBatchSerializer {
 RETURN_NOT_OK(GetTruncatedBuffer(
 offset, length, static_cast(sizeof(UnionArray::type_code_t)),
 array.type_codes(), options_.memory_pool, _codes));
-out_->body_buffers.emplace_back(type_codes);
+out_->body_buffers.emplace_back(std::move(type_codes));
 
 --max_recursion_depth_;
 for (int i = 0; i < array.num_fields(); ++i) {
@@ -585,7 +586,7 @@ class RecordBatchSerializer {
 RETURN_NOT_OK(GetTruncatedBuffer(
 offset, length, static_cast(sizeof(UnionArray::type_code_t)),
 array.type_codes(), options_.memory_pool, _codes));
-out_->body_buffers.emplace_back(type_codes);
+out_->body_buffers.emplace_back(std::move(type_codes));
 
 --max_recursion_depth_;
 const auto& type = checked_cast(*array.type());
@@ -640,7 +641,7 @@ class RecordBatchSerializer {
 
   value_offsets = std::move(shifted_offsets_buffer);
 }
-out_->body_buffers.emplace_back(value_offsets);
+out_->body_buffers.emplace_back(std::move(value_offsets));
 
 // Visit children and slice accordingly
 for (int i = 0; i < type.num_fields(); ++i) {



(arrow-rs) branch dependabot/cargo/object_store/master/quick-xml-0.32.0 created (now 4c51e68d1fe)

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch 
dependabot/cargo/object_store/master/quick-xml-0.32.0
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


  at 4c51e68d1fe Update quick-xml requirement from 0.31.0 to 0.32.0 in 
/object_store

No new revisions were added by this update.



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 9505744a update documentation for tag main
9505744a is described below

commit 9505744ae16d5ac3b46e78d5ed2928154ef18574
Author: GitHub Actions 
AuthorDate: Tue Jun 11 13:25:24 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 8aae314f..b3059f7a 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-11T12:11Z
+last_built: 2024-06-11T13:25Z
 



(arrow-nanoarrow) branch main updated: chore(r): Bump minimum supported R version to 4.0 (#519)

2024-06-11 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 8911a01c chore(r): Bump minimum supported R version to 4.0 (#519)
8911a01c is described below

commit 8911a01cc1fe3ee1a0269f0568fbf0356411a127
Author: Dewey Dunnington 
AuthorDate: Tue Jun 11 13:23:20 2024 +

chore(r): Bump minimum supported R version to 4.0 (#519)

We (vaguely) follow the [tidyverse R version support
policy](https://www.tidyverse.org/blog/2019/04/r-version-support/) and
support the current release plus the last four releases. Relatedly, our
test dependencies no longer support R 3.6 and this CI job is failing.
---
 .github/workflows/r-check.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/r-check.yaml b/.github/workflows/r-check.yaml
index c3a559b5..e42a69d5 100644
--- a/.github/workflows/r-check.yaml
+++ b/.github/workflows/r-check.yaml
@@ -40,7 +40,7 @@ jobs:
   matrix:
 config:
   - {os: macOS-latest,   r: 'release'}
-  - {os: windows-latest, r: '3.6'}
+  - {os: windows-latest, r: '4.0'}
   - {os: windows-latest, r: 'release'}
   - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
   - {os: ubuntu-latest,   r: 'release'}



(arrow) branch main updated: GH-42013 [Python] Allow Array.filter() to take general array input (#42051)

2024-06-11 Thread jorisvandenbossche
This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 03a960de86 GH-42013 [Python] Allow Array.filter() to take general 
array input (#42051)
03a960de86 is described below

commit 03a960de86d0c3ce8bf299724c94be5b291d85b4
Author: Kelvin Wu 
AuthorDate: Tue Jun 11 20:41:24 2024 +0800

GH-42013 [Python] Allow Array.filter() to take general array input (#42051)



### What changes are included in this PR?
Allow Array.filter() to take general array input.

### Are these changes tested?
Unit test added, via CI.

### Are there any user-facing changes?

No

* GitHub Issue: #42013

Authored-by: Kelvin Wu 
Signed-off-by: Joris Van den Bossche 
---
 python/pyarrow/array.pxi | 2 +-
 python/pyarrow/tests/test_compute.py | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 3c26e85887..efa9b814ed 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1444,7 +1444,7 @@ cdef class Array(_PandasConvertible):
 """
 return _pc().drop_null(self)
 
-def filter(self, Array mask, *, null_selection_behavior='drop'):
+def filter(self, object mask, *, null_selection_behavior='drop'):
 """
 Select values from an array.
 
diff --git a/python/pyarrow/tests/test_compute.py 
b/python/pyarrow/tests/test_compute.py
index d7dee1ad05..4c8ec893e2 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1305,6 +1305,12 @@ def test_filter(ty, values):
 result.validate()
 assert result.equals(pa.array([values[0], values[3], None], type=ty))
 
+# same test with different array type
+mask = np.array([True, False, False, True, None])
+result = arr.filter(mask, null_selection_behavior='drop')
+result.validate()
+assert result.equals(pa.array([values[0], values[3]], type=ty))
+
 # non-boolean dtype
 mask = pa.array([0, 1, 0, 1, 0])
 with pytest.raises(NotImplementedError):



(arrow-rs) branch master updated: parquet: Fix warning about unused import (#5865)

2024-06-11 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 298bf481adf parquet: Fix warning about unused import (#5865)
298bf481adf is described below

commit 298bf481adf898ece699fda25cb61191d7bfb29c
Author: Val Lorentz 
AuthorDate: Tue Jun 11 14:26:55 2024 +0200

parquet: Fix warning about unused import (#5865)
---
 parquet/src/arrow/array_reader/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/parquet/src/arrow/array_reader/mod.rs 
b/parquet/src/arrow/array_reader/mod.rs
index f662156543e..8febe41e688 100644
--- a/parquet/src/arrow/array_reader/mod.rs
+++ b/parquet/src/arrow/array_reader/mod.rs
@@ -46,6 +46,7 @@ mod test_util;
 
 pub use builder::build_array_reader;
 pub use byte_array::make_byte_array_reader;
+#[allow(unused_imports)] // Only used for benchmarks
 pub use byte_array::make_byte_view_array_reader;
 pub use byte_array_dictionary::make_byte_array_dictionary_reader;
 #[allow(unused_imports)] // Only used for benchmarks



(arrow-rs) branch master updated: fix: msrv CI for object_store (#5866)

2024-06-11 Thread alamb
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
 new 7f66b77af94 fix: msrv CI for object_store (#5866)
7f66b77af94 is described below

commit 7f66b77af945680c12e13392936013bdc68541df
Author: Eduard Karacharov 
AuthorDate: Tue Jun 11 15:25:23 2024 +0300

fix: msrv CI for object_store (#5866)

* fix: msrv CI for object_store

* fixed url crate version
---
 .github/workflows/rust.yml | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index d8d892ea940..a1644ee49b8 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -119,17 +119,20 @@ jobs:
 run: cargo update -p ahash --precise 0.8.7
   - name: Check arrow
 working-directory: arrow
-run: cargo msrv verify
+run: cargo msrv --log-target stdout verify
   - name: Check parquet
 working-directory: parquet
-run: cargo msrv verify
+run: cargo msrv --log-target stdout verify
   - name: Check arrow-flight
 working-directory: arrow-flight
-run: cargo msrv verify
+run: cargo msrv --log-target stdout verify
   - name: Downgrade object_store dependencies
 working-directory: object_store
-# Necessary because 1.30.0 updates MSRV to 1.63
-run: cargo update -p tokio --precise 1.29.1
+# Necessary because tokio 1.30.0 updates MSRV to 1.63
+# and url 2.5.1, updates to 1.67
+run: |
+  cargo update -p tokio --precise 1.29.1
+  cargo update -p url --precise 2.5.0
   - name: Check object_store
 working-directory: object_store
-run: cargo msrv verify
+run: cargo msrv --log-target stdout verify



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 91013e2b update documentation for tag main
91013e2b is described below

commit 91013e2b9528a848a119da21bf08a3b6a440ef6e
Author: GitHub Actions 
AuthorDate: Tue Jun 11 12:11:51 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index 6b32e97b..8aae314f 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-11T12:04Z
+last_built: 2024-06-11T12:11Z
 



(arrow-nanoarrow) branch main updated: fix: check `run_ends_view->length` before accessing its values (#518)

2024-06-11 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new ec1a6927 fix: check `run_ends_view->length` before accessing its 
values (#518)
ec1a6927 is described below

commit ec1a6927283a33a97232047179b6abc77c374de3
Author: Cocoa 
AuthorDate: Tue Jun 11 05:09:48 2024 -0700

fix: check `run_ends_view->length` before accessing its values (#518)

This PR should fix the issue where `run_ends_view->length` is not
checked if equals to 0 before attempting to access `run_ends_view`'s
values. Many thanks to @WillAyd.
---
 src/nanoarrow/array.c | 38 +-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 36df998f..ebd95df1 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -1067,6 +1067,7 @@ static int ArrowArrayViewValidateDefault(struct 
ArrowArrayView* array_view,
 
 case NANOARROW_TYPE_RUN_END_ENCODED: {
   struct ArrowArrayView* run_ends_view = array_view->children[0];
+  if (run_ends_view->length == 0) break;
   int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
   if (last_run_end < 1) {
 ArrowErrorSet(error,
@@ -1246,25 +1247,28 @@ static int ArrowArrayViewValidateFull(struct 
ArrowArrayView* array_view,
 
   if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) {
 struct ArrowArrayView* run_ends_view = array_view->children[0];
-int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
-for (int64_t i = 1; i < run_ends_view->length; i++) {
-  const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
-  if (run_end <= last_run_end) {
-ArrowErrorSet(error,
-  "Every run end must be strictly greater than the 
previous run end, "
-  "but run_ends[%ld] is %ld and run_ends[%ld] is %ld",
-  (long)i, (long)run_end, (long)i - 1, (long)last_run_end);
+if (run_ends_view->length > 0) {
+  int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+  for (int64_t i = 1; i < run_ends_view->length; i++) {
+const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
+if (run_end <= last_run_end) {
+  ArrowErrorSet(
+  error,
+  "Every run end must be strictly greater than the previous run 
end, "
+  "but run_ends[%ld] is %ld and run_ends[%ld] is %ld",
+  (long)i, (long)run_end, (long)i - 1, (long)last_run_end);
+  return EINVAL;
+}
+last_run_end = run_end;
+  }
+  last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 
run_ends_view->length - 1);
+  if (last_run_end < (array_view->offset + array_view->length)) {
+ArrowErrorSet(
+error, "Last run end is %ld but it should >= %ld (offset: %ld, 
length: %ld)",
+(long)last_run_end, (long)(array_view->offset + 
array_view->length),
+(long)array_view->offset, (long)array_view->length);
 return EINVAL;
   }
-  last_run_end = run_end;
-}
-last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 
run_ends_view->length - 1);
-if (last_run_end < (array_view->offset + array_view->length)) {
-  ArrowErrorSet(error,
-"Last run end is %ld but it should >= %ld (offset: %ld, 
length: %ld)",
-(long)last_run_end, (long)(array_view->offset + 
array_view->length),
-(long)array_view->offset, (long)array_view->length);
-  return EINVAL;
 }
   }
 



(arrow-nanoarrow) branch asf-site updated: update documentation for tag main

2024-06-11 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new e182ecd1 update documentation for tag main
e182ecd1 is described below

commit e182ecd1c28418be95bb9238b7a5e02c4316160c
Author: GitHub Actions 
AuthorDate: Tue Jun 11 12:04:18 2024 +

update documentation for tag main
---
 main/r/pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main/r/pkgdown.yml b/main/r/pkgdown.yml
index bd9e30d6..6b32e97b 100644
--- a/main/r/pkgdown.yml
+++ b/main/r/pkgdown.yml
@@ -2,5 +2,5 @@ pandoc: 3.1.3
 pkgdown: 2.0.9
 pkgdown_sha: ~
 articles: {}
-last_built: 2024-06-10T19:42Z
+last_built: 2024-06-11T12:04Z
 



(arrow-nanoarrow) branch main updated: chore(dev/benchmarks): Add 0.5.0 release to benchmark run + enable build with against refacotred CMake (#516)

2024-06-11 Thread paleolimbot
This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 8680f6dd chore(dev/benchmarks): Add 0.5.0 release to benchmark run + 
enable build with against refacotred CMake (#516)
8680f6dd is described below

commit 8680f6dd36f97c9821a11bf62162f46eab3f691f
Author: Dewey Dunnington 
AuthorDate: Tue Jun 11 12:02:03 2024 +

chore(dev/benchmarks): Add 0.5.0 release to benchmark run + enable build 
with against refacotred CMake (#516)

After #511 there is a slightly different way to build the nanoarrow with
IPC support (required by benchmarks).
---
 dev/benchmarks/CMakeLists.txt| 10 +-
 dev/benchmarks/CMakePresets.json | 11 +++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/dev/benchmarks/CMakeLists.txt b/dev/benchmarks/CMakeLists.txt
index bb452c17..edacad32 100644
--- a/dev/benchmarks/CMakeLists.txt
+++ b/dev/benchmarks/CMakeLists.txt
@@ -53,13 +53,13 @@ fetchcontent_declare(benchmark
 fetchcontent_makeavailable(benchmark)
 
 if(IS_DIRECTORY "${NANOARROW_BENCHMARK_SOURCE_URL}")
+  # In nanoarrow >= 0.6.0, optional features use NANOARROW_XXX=ON instead
+  # of being packaged as separate projects.
+  set(NANOARROW_IPC
+  ON
+  CACHE INTERNAL "")
   fetchcontent_declare(nanoarrow SOURCE_DIR 
"${NANOARROW_BENCHMARK_SOURCE_URL}")
-  fetchcontent_declare(nanoarrow_ipc
-   SOURCE_DIR
-   
"${NANOARROW_BENCHMARK_SOURCE_URL}/extensions/nanoarrow_ipc")
-
   fetchcontent_makeavailable(nanoarrow)
-  fetchcontent_makeavailable(nanoarrow_ipc)
 elseif(NOT "${NANOARROW_BENCHMARK_SOURCE_URL}" STREQUAL "")
   fetchcontent_declare(nanoarrow URL "${NANOARROW_BENCHMARK_SOURCE_URL}")
   fetchcontent_declare(nanoarrow_ipc URL "${NANOARROW_BENCHMARK_SOURCE_URL}"
diff --git a/dev/benchmarks/CMakePresets.json b/dev/benchmarks/CMakePresets.json
index d64be290..60ca1cc5 100644
--- a/dev/benchmarks/CMakePresets.json
+++ b/dev/benchmarks/CMakePresets.json
@@ -25,6 +25,17 @@
 "NANOARROW_BENCHMARK_SOURCE_URL": "${sourceDir}/../.."
 }
 },
+{
+"name": "v0.5.0",
+"displayName": "v0.5.0",
+"description": "Uses the nanoarrow C sources the 0.5.0 release.",
+"inherits": [
+"base"
+],
+"cacheVariables": {
+"NANOARROW_BENCHMARK_SOURCE_URL": 
"https://github.com/apache/arrow-nanoarrow/archive/refs/tags/apache-arrow-nanoarrow-0.5.0.zip;
+}
+},
 {
 "name": "v0.4.0",
 "displayName": "v0.4.0",



(arrow) branch main updated: GH-40749: [Python][Packaging] Strip unnecessary symbols when building wheels (#42028)

2024-06-11 Thread raulcd
This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new baf4089eaa GH-40749: [Python][Packaging] Strip unnecessary symbols 
when building wheels (#42028)
baf4089eaa is described below

commit baf4089eaa84b2a5c2cb437f3a249f83577dbc29
Author: Raúl Cumplido 
AuthorDate: Tue Jun 11 11:41:07 2024 +0200

GH-40749: [Python][Packaging] Strip unnecessary symbols when building 
wheels (#42028)

### Rationale for this change

Removing unnecessary symbols for wheels will allow us to reduce the size of 
the wheels considerably.

### What changes are included in this PR?

Running `strip --strip-debug` on Linux wheels for all *.so files.

### Are these changes tested?

Yes, via Archery.

### Are there any user-facing changes?

No
* GitHub Issue: #40749

Lead-authored-by: Raúl Cumplido 
Co-authored-by: Antoine Pitrou 
Signed-off-by: Raúl Cumplido 
---
 ci/scripts/python_wheel_manylinux_build.sh | 20 
 1 file changed, 20 insertions(+)

diff --git a/ci/scripts/python_wheel_manylinux_build.sh 
b/ci/scripts/python_wheel_manylinux_build.sh
index 6e29ef58d2..aa86494a9d 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -160,6 +160,26 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist
 pushd /arrow/python
 python setup.py bdist_wheel
 
+echo "=== Strip symbols from wheel ==="
+mkdir -p dist/temp-fix-wheel
+mv dist/pyarrow-*.whl dist/temp-fix-wheel
+
+pushd dist/temp-fix-wheel
+wheel_name=$(ls pyarrow-*.whl)
+# Unzip and remove old wheel
+unzip $wheel_name
+rm $wheel_name
+for filename in $(ls pyarrow/*.so pyarrow/*.so.*); do
+echo "Stripping debug symbols from: $filename";
+strip --strip-debug $filename
+done
+# Zip wheel again after stripping symbols
+zip -r $wheel_name .
+mv $wheel_name ..
+popd
+
+rm -rf dist/temp-fix-wheel
+
 echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} 
==="
 auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
 popd



(arrow) branch main updated: GH-41818: [C++][Parquet] normalize dictionary encoding to use RLE_DICTIONARY (#41819)

2024-06-11 Thread apitrou
This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new a81b7effd1 GH-41818: [C++][Parquet] normalize dictionary encoding to 
use RLE_DICTIONARY (#41819)
a81b7effd1 is described below

commit a81b7effd1c849953e2935f58ae6cceb18315221
Author: mwish 
AuthorDate: Tue Jun 11 16:33:46 2024 +0800

GH-41818: [C++][Parquet] normalize dictionary encoding to use 
RLE_DICTIONARY (#41819)



### Rationale for this change

There're some points:

1. 
https://github.com/apache/arrow/blob/main/cpp/src/parquet/encoding.cc#L444-L445 
. encoding is not passed in Encoder
2. But, it's RLE in decoder: 
https://github.com/apache/arrow/blob/main/cpp/src/parquet/encoding.cc#L1607 it 
will be detect and normalized in other place, like:
3. 
https://github.com/apache/arrow/blob/main/cpp/src/parquet/column_reader.cc#L876

We'd better unifying them

### What changes are included in this PR?

Unify dict encoding to `RLE_DICTIONARY`.

### Are these changes tested?

No need

### Are there any user-facing changes?

No

* GitHub Issue: #41818

Authored-by: mwish 
Signed-off-by: Antoine Pitrou 
---
 cpp/src/parquet/column_reader.cc | 11 +++
 cpp/src/parquet/column_writer.cc |  8 ++--
 cpp/src/parquet/encoding.cc  |  4 ++--
 cpp/src/parquet/file_reader.cc   |  3 +--
 cpp/src/parquet/types.h  |  6 ++
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 407201a89e..52add8f339 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -642,12 +642,6 @@ namespace {
 // --
 // Impl base class for TypedColumnReader and RecordReader
 
-// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
-// encoding.
-static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
-  return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
-}
-
 template 
 class ColumnReaderImplBase {
  public:
@@ -876,8 +870,9 @@ class ColumnReaderImplBase {
 }
 
 Encoding::type encoding = page.encoding();
-
 if (IsDictionaryIndexEncoding(encoding)) {
+  // Normalizing the PLAIN_DICTIONARY to RLE_DICTIONARY encoding
+  // in decoder.
   encoding = Encoding::RLE_DICTIONARY;
 }
 
@@ -950,7 +945,7 @@ class ColumnReaderImplBase {
 
   /// Flag to signal when a new dictionary has been set, for the benefit of
   /// DictionaryRecordReader
-  bool new_dictionary_;
+  bool new_dictionary_ = false;
 
   // The exposed encoding
   ExposedEncoding exposed_encoding_ = ExposedEncoding::NO_ENCODING;
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 9059cd1641..ac1c3ea2e3 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1205,10 +1205,6 @@ Status ConvertDictionaryToDense(const ::arrow::Array& 
array, MemoryPool* pool,
   return Status::OK();
 }
 
-static inline bool IsDictionaryEncoding(Encoding::type encoding) {
-  return encoding == Encoding::PLAIN_DICTIONARY;
-}
-
 template 
 class TypedColumnWriterImpl : public ColumnWriterImpl, public 
TypedColumnWriter {
  public:
@@ -1565,7 +1561,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, 
public TypedColumnWriter<
   }
 
   void FallbackToPlainEncoding() {
-if (IsDictionaryEncoding(current_encoder_->encoding())) {
+if (IsDictionaryIndexEncoding(current_encoder_->encoding())) {
   WriteDictionaryPage();
   // Serialize the buffered Dictionary Indices
   FlushBufferedDataPages();
@@ -1661,7 +1657,7 @@ Status TypedColumnWriterImpl::WriteArrowDictionary(
maybe_parent_nulls);
   };
 
-  if (!IsDictionaryEncoding(current_encoder_->encoding()) ||
+  if (!IsDictionaryIndexEncoding(current_encoder_->encoding()) ||
   !DictionaryDirectWriteSupported(array)) {
 // No longer dictionary-encoding for whatever reason, maybe we never were
 // or we decided to stop. Note that WriteArrow can be invoked multiple
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 903faa92b6..54e1e4 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -442,12 +442,12 @@ class DictEncoderImpl : public EncoderImpl, virtual 
public DictEncoder {
   constexpr static int32_t kDataPageBitWidthBytes = 1;
 
   explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
-  : EncoderImpl(desc, Encoding::PLAIN_DICTIONARY, pool),
+  : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
 buffered_indices_(::arrow::stl::allocator(pool)),
 dict_encoded_size_(0),
 

(arrow) branch main updated (c7bfd2e996 -> ee62d97033)

2024-06-11 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from c7bfd2e996 GH-40832: [Java] Adding Spotless to Vector module (#42061)
 add ee62d97033 GH-40829: [Java] Adding Spotless to Memory modules (#42056)

No new revisions were added by this update.

Summary of changes:
 java/dev/checkstyle/checkstyle-spotless.xml|   3 +
 .../memory-core/src/main/java/module-info.java |   1 +
 .../java/org/apache/arrow/memory/Accountant.java   | 118 +++--
 .../apache/arrow/memory/AllocationListener.java|  28 +-
 .../org/apache/arrow/memory/AllocationManager.java | 105 +++--
 .../org/apache/arrow/memory/AllocationOutcome.java |  30 +-
 .../arrow/memory/AllocationOutcomeDetails.java |  58 ++-
 .../apache/arrow/memory/AllocationReservation.java |  26 +-
 .../arrow/memory/AllocatorClosedException.java |   6 +-
 .../java/org/apache/arrow/memory/ArrowBuf.java | 498 +++--
 .../org/apache/arrow/memory/BaseAllocator.java | 299 +++--
 .../org/apache/arrow/memory/BoundsChecking.java|  25 +-
 .../org/apache/arrow/memory/BufferAllocator.java   |  96 ++--
 .../java/org/apache/arrow/memory/BufferLedger.java | 221 +
 .../org/apache/arrow/memory/BufferManager.java |  13 +-
 .../org/apache/arrow/memory/CheckAllocator.java|  26 +-
 .../org/apache/arrow/memory/ChildAllocator.java|  21 +-
 .../memory/DefaultAllocationManagerOption.java |  58 +--
 .../org/apache/arrow/memory/ForeignAllocation.java |  16 +-
 .../arrow/memory/ForeignAllocationManager.java |   8 +-
 .../arrow/memory/LowCostIdentityHashMap.java   |  68 ++-
 .../apache/arrow/memory/OutOfMemoryException.java  |  17 +-
 .../apache/arrow/memory/OwnershipTransferNOOP.java |   5 +-
 .../arrow/memory/OwnershipTransferResult.java  |   5 +-
 .../org/apache/arrow/memory/ReferenceManager.java  | 169 +++
 .../org/apache/arrow/memory/ReusableBuffer.java|  17 +-
 .../org/apache/arrow/memory/RootAllocator.java |  33 +-
 .../apache/arrow/memory/ValueWithKeyIncluded.java  |   4 +-
 .../java/org/apache/arrow/memory/package-info.java |   6 +-
 .../memory/rounding/DefaultRoundingPolicy.java |  34 +-
 .../arrow/memory/rounding/RoundingPolicy.java  |   1 -
 .../memory/rounding/SegmentRoundingPolicy.java |  28 +-
 .../apache/arrow/memory/util/ArrowBufPointer.java  |  46 +-
 .../apache/arrow/memory/util/AssertionUtil.java|   7 +-
 .../arrow/memory/util/AutoCloseableLock.java   |   6 +-
 .../arrow/memory/util/ByteFunctionHelpers.java |  85 ++--
 .../org/apache/arrow/memory/util/CommonUtil.java   |   9 +-
 .../java/org/apache/arrow/memory/util/Float16.java | 103 ++---
 .../apache/arrow/memory/util/HistoricalLog.java|  78 ++--
 .../apache/arrow/memory/util/LargeMemoryUtil.java  |  13 +-
 .../org/apache/arrow/memory/util/MemoryUtil.java   | 131 +++---
 .../org/apache/arrow/memory/util/StackTrace.java   |  15 +-
 .../arrow/memory/util/hash/ArrowBufHasher.java |  12 +-
 .../arrow/memory/util/hash/MurmurHasher.java   |  32 +-
 .../arrow/memory/util/hash/SimpleHasher.java   |  35 +-
 .../java/org/apache/arrow/util/AutoCloseables.java |  76 ++--
 .../java/org/apache/arrow/util/Collections2.java   |  29 +-
 .../java/org/apache/arrow/util/Preconditions.java  | 264 ---
 .../org/apache/arrow/util/VisibleForTesting.java   |   9 +-
 .../arrow/memory/CountingAllocationListener.java   |  18 +-
 .../memory/DefaultAllocationManagerFactory.java|  10 +-
 .../org/apache/arrow/memory/TestAccountant.java|  29 +-
 .../apache/arrow/memory/TestAllocationManager.java |   5 +-
 .../java/org/apache/arrow/memory/TestArrowBuf.java |  33 +-
 .../org/apache/arrow/memory/TestBaseAllocator.java | 347 +++---
 .../apache/arrow/memory/TestBoundaryChecking.java  |  21 +-
 .../apache/arrow/memory/TestForeignAllocation.java |  27 +-
 .../arrow/memory/TestLowCostIdentityHashMap.java   |  13 +-
 .../java/org/apache/arrow/memory/TestOpens.java|  15 +-
 .../arrow/memory/util/TestArrowBufPointer.java |  13 +-
 .../arrow/memory/util/TestByteFunctionHelpers.java |  74 ++-
 .../arrow/memory/util/TestLargeMemoryUtil.java | 215 -
 .../arrow/memory/util/hash/TestArrowBufHasher.java |  55 ++-
 .../org/apache/arrow/util/TestCollections2.java|   8 +-
 .../java/org/apache/arrow/util/TestStackTrace.java |  12 +-
 .../java/io/netty/buffer/ExpandableByteBuf.java|   5 +-
 .../src/main/java/io/netty/buffer/LargeBuffer.java |   1 -
 .../io/netty/buffer/MutableWrappedByteBuf.java |  28 +-
 .../main/java/io/netty/buffer/NettyArrowBuf.java   |  91 ++--
 .../io/netty/buffer/PooledByteBufAllocatorL.java   |  47 +-
 .../io/netty/buffer/UnsafeDirectLittleEndian.java  |   9 +-
 .../arrow/memory/patch/ArrowByteBufAllocator.java  |  16 +-
 .../netty/buffer/TestUnsafeDirectLittleEndian.java |  10 +-
 .../netty/DefaultAllocationManagerFactory.java |   

(arrow) branch dependabot/maven/java/maven.version-3.9.7 deleted (was 8990d582b0)

2024-06-11 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch dependabot/maven/java/maven.version-3.9.7
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 8990d582b0 MINOR: [Java] Bump maven.version from 3.8.7 to 3.9.7 in 
/java

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow) branch main updated: GH-40823: [Java] Adding Spotless to Compression module (#42060)

2024-06-11 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 1a48366924 GH-40823: [Java] Adding Spotless to Compression module 
(#42060)
1a48366924 is described below

commit 1a48366924a1ca77df62d7406b44b8eb76bfc54b
Author: Vibhatha Lakmal Abeykoon 
AuthorDate: Tue Jun 11 12:22:04 2024 +0530

GH-40823: [Java] Adding Spotless to Compression module (#42060)

### Rationale for this change

Adding code style and formatting options for Compression module.

### What changes are included in this PR?

Code formatting spotless plugin has been added.

### Are these changes tested?

Yes, but doesn't involve test cases, the plugin itself corrects.
* GitHub Issue: #40823

Authored-by: Vibhatha Abeykoon 
Signed-off-by: David Li 
---
 java/compression/pom.xml   |   5 +
 .../compression/CommonsCompressionFactory.java |   3 +-
 .../arrow/compression/Lz4CompressionCodec.java |  31 ++--
 .../arrow/compression/ZstdCompressionCodec.java|  36 +++--
 .../TestArrowReaderWriterWithCompression.java  | 133 +--
 .../arrow/compression/TestCompressionCodec.java| 179 +
 6 files changed, 234 insertions(+), 153 deletions(-)

diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 561877bd5c..3b4449c33f 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -28,6 +28,11 @@ under the License.
   Arrow Compression
   (Experimental/Contrib) A library for working with the 
compression/decompression of Arrow data.
 
+  
+
dev/checkstyle/checkstyle-spotless.xml
+none
+  
+
   
 
   org.apache.arrow
diff --git 
a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
 
b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
index 45d8c7d443..f15c139df1 100644
--- 
a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
+++ 
b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.compression;
 
 import org.apache.arrow.vector.compression.CompressionCodec;
@@ -23,7 +22,7 @@ import org.apache.arrow.vector.compression.CompressionUtil;
 /**
  * Default implementation of factory supported LZ4 and ZSTD compression.
  *
- * // TODO(ARROW-12115): Rename this class.
+ * // TODO(ARROW-12115): Rename this class.
  */
 public class CommonsCompressionFactory implements CompressionCodec.Factory {
 
diff --git 
a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
 
b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
index e8b780638e..edd52604bc 100644
--- 
a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
+++ 
b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.compression;
 
 import java.io.ByteArrayInputStream;
@@ -22,7 +21,6 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Preconditions;
@@ -32,21 +30,21 @@ import 
org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorInputStrea
 import 
org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;
 import org.apache.commons.compress.utils.IOUtils;
 
-/**
- * Compression codec for the LZ4 algorithm.
- */
+/** Compression codec for the LZ4 algorithm. */
 public class Lz4CompressionCodec extends AbstractCompressionCodec {
 
   @Override
   protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf 
uncompressedBuffer) {
-Preconditions.checkArgument(uncompressedBuffer.writerIndex() <= 
Integer.MAX_VALUE,
-"The uncompressed buffer size exceeds the integer limit %s.", 
Integer.MAX_VALUE);
+Preconditions.checkArgument(
+uncompressedBuffer.writerIndex() <= Integer.MAX_VALUE,
+"The uncompressed buffer size exceeds the integer limit %s.",
+Integer.MAX_VALUE);
 
 byte[] inBytes = new byte[(int) uncompressedBuffer.writerIndex()];
-uncompressedBuffer.getBytes(/*index=*/0, inBytes);
+uncompressedBuffer.getBytes(/*index=*/ 0, inBytes);
 ByteArrayOutputStream baos = new ByteArrayOutputStream();
 try (InputStream in = new ByteArrayInputStream(inBytes);
- 

(arrow) branch dependabot/maven/java/org.apache.orc-orc-core-2.0.1 deleted (was bbee978225)

2024-06-11 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch 
dependabot/maven/java/org.apache.orc-orc-core-2.0.1
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was bbee978225 MINOR: [Java] Bump org.apache.orc:orc-core from 1.9.2 to 
2.0.1 in /java

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow) branch main updated (ee6fcf3c46 -> 4df00fa477)

2024-06-10 Thread zeroshade
This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from ee6fcf3c46 GH-40827: [Java] Adding Spotless to Gandiva module (#42055)
 add 4df00fa477 GH-41988: [Go] Add FormatRecoveredError to consistently 
handle recovery with wrapped errors (#41989)

No new revisions were added by this update.

Summary of changes:
 go/arrow/array/concat.go   |   7 +-
 go/arrow/avro/schema.go|  10 +-
 go/arrow/flight/record_batch_reader.go |   5 +-
 go/arrow/internal/cdata_integration/entrypoints.go |   3 +-
 go/arrow/ipc/reader.go |   5 +-
 go/arrow/ipc/writer.go |   2 +-
 .../utils/recovery.go} |  21 ++-
 .../utils/recovery_test.go}|  71 +-
 go/parquet/file/column_writer_types.gen.go | 149 +++--
 go/parquet/file/column_writer_types.gen.go.tmpl|  19 +--
 go/parquet/metadata/statistics_types.gen.go|  82 ++--
 go/parquet/metadata/statistics_types.gen.go.tmpl   |   9 +-
 go/parquet/schema/reflection.go|  20 +--
 13 files changed, 91 insertions(+), 312 deletions(-)
 copy go/{parquet/internal/utils/bit_packing_amd64.go => 
internal/utils/recovery.go} (58%)
 copy go/{arrow/memory/util_test.go => internal/utils/recovery_test.go} (50%)



(arrow-nanoarrow) branch main updated: Update dist/ for commit f443e46163ded43ddb5e024c1a2f1fddd3b43c3f

2024-06-10 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 784325be Update dist/ for commit 
f443e46163ded43ddb5e024c1a2f1fddd3b43c3f
784325be is described below

commit 784325be6c3ac85fca46e894af9d0fdfd0bc324e
Author: GitHub Actions 
AuthorDate: Tue Jun 11 01:31:10 2024 +

Update dist/ for commit f443e46163ded43ddb5e024c1a2f1fddd3b43c3f
---
 dist/nanoarrow.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 37d14305..c4897cfa 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -2984,14 +2984,11 @@ static int ArrowArrayViewValidateMinimal(struct 
ArrowArrayView* array_view,
   // uint64_t is used here to avoid overflow when adding the offset and 
length
   if ((uint64_t)array_view->offset + (uint64_t)array_view->length >
   (uint64_t)max_length) {
-ArrowErrorSet(
-error,
-"Offset + length of a run-end encoded array must fit in a value"
-" of the run end type %s, but offset + length is %lu while the "
-"allowed maximum is %lu",
-ArrowTypeString(run_ends_view->storage_type),
-(unsigned long)array_view->offset + (unsigned 
long)array_view->length,
-(unsigned long)max_length);
+ArrowErrorSet(error,
+  "Offset + length of a run-end encoded array must fit in 
a value"
+  " of the run end type %s, but offset + length is %ld",
+  ArrowTypeString(run_ends_view->storage_type),
+  (long)array_view->offset + (long)array_view->length);
 return EINVAL;
   }
   if (run_ends_view->length > values_view->length) {



(arrow) branch dependabot/maven/java/com.google.protobuf-protobuf-bom-4.27.1 deleted (was 33947dc7b8)

2024-06-10 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch 
dependabot/maven/java/com.google.protobuf-protobuf-bom-4.27.1
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 33947dc7b8 MINOR: [Java] Bump com.google.protobuf:protobuf-bom in /java

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



(arrow) branch main updated (12e32f5347 -> ee6fcf3c46)

2024-06-10 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


from 12e32f5347 GH-40830: [Java] Adding Spotless to Performance module 
(#42057)
 add ee6fcf3c46 GH-40827: [Java] Adding Spotless to Gandiva module (#42055)

No new revisions were added by this update.

Summary of changes:
 java/gandiva/pom.xml   |   2 +
 java/gandiva/src/main/java/module-info.java|   2 +-
 .../gandiva/evaluator/ConfigurationBuilder.java|  16 +-
 .../arrow/gandiva/evaluator/DecimalTypeUtil.java   |  42 +-
 .../gandiva/evaluator/ExpressionRegistry.java  |  45 +-
 .../evaluator/ExpressionRegistryJniHelper.java |   6 +-
 .../org/apache/arrow/gandiva/evaluator/Filter.java |  70 +-
 .../arrow/gandiva/evaluator/FunctionSignature.java |  30 +-
 .../apache/arrow/gandiva/evaluator/JniLoader.java  |  55 +-
 .../apache/arrow/gandiva/evaluator/JniWrapper.java | 121 +--
 .../apache/arrow/gandiva/evaluator/Projector.java  | 277 +++---
 .../arrow/gandiva/evaluator/SelectionVector.java   |  15 +-
 .../gandiva/evaluator/SelectionVectorInt16.java|   5 +-
 .../gandiva/evaluator/SelectionVectorInt32.java|   5 +-
 .../arrow/gandiva/evaluator/VectorExpander.java|  16 +-
 .../exceptions/EvaluatorClosedException.java   |   1 -
 .../arrow/gandiva/exceptions/GandivaException.java |   1 -
 .../exceptions/UnsupportedTypeException.java   |   5 +-
 .../apache/arrow/gandiva/expression/AndNode.java   |   6 +-
 .../arrow/gandiva/expression/ArrowTypeHelper.java  | 463 +-
 .../arrow/gandiva/expression/BinaryNode.java   |  17 +-
 .../arrow/gandiva/expression/BooleanNode.java  |   5 +-
 .../apache/arrow/gandiva/expression/Condition.java |   6 +-
 .../arrow/gandiva/expression/DecimalNode.java  |   5 +-
 .../arrow/gandiva/expression/DoubleNode.java   |   5 +-
 .../arrow/gandiva/expression/ExpressionTree.java   |   6 +-
 .../apache/arrow/gandiva/expression/FieldNode.java |   5 +-
 .../apache/arrow/gandiva/expression/FloatNode.java |   5 +-
 .../arrow/gandiva/expression/FunctionNode.java |   6 +-
 .../apache/arrow/gandiva/expression/IfNode.java|   5 +-
 .../apache/arrow/gandiva/expression/InNode.java| 129 +--
 .../apache/arrow/gandiva/expression/IntNode.java   |   5 +-
 .../apache/arrow/gandiva/expression/LongNode.java  |   5 +-
 .../apache/arrow/gandiva/expression/NullNode.java  |   1 -
 .../apache/arrow/gandiva/expression/OrNode.java|   6 +-
 .../arrow/gandiva/expression/StringNode.java   |  20 +-
 .../arrow/gandiva/expression/TreeBuilder.java  |  88 +-
 .../apache/arrow/gandiva/expression/TreeNode.java  |   5 +-
 .../arrow/gandiva/evaluator/BaseEvaluatorTest.java |  51 +-
 .../gandiva/evaluator/DecimalTypeUtilTest.java |  44 +-
 .../gandiva/evaluator/ExpressionRegistryTest.java  |   5 +-
 .../arrow/gandiva/evaluator/FilterProjectTest.java |  28 +-
 .../apache/arrow/gandiva/evaluator/FilterTest.java |  49 +-
 .../gandiva/evaluator/MicroBenchmarkTest.java  |  54 +-
 .../gandiva/evaluator/ProjectorDecimalTest.java| 652 +++---
 .../arrow/gandiva/evaluator/ProjectorTest.java | 932 ++---
 .../arrow/gandiva/evaluator/TestJniLoader.java |  42 +-
 .../gandiva/expression/ArrowTypeHelperTest.java|   2 -
 .../arrow/gandiva/expression/TreeBuilderTest.java  |   2 -
 49 files changed, 1754 insertions(+), 1614 deletions(-)



(arrow) branch main updated: GH-40830: [Java] Adding Spotless to Performance module (#42057)

2024-06-10 Thread lidavidm
This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
 new 12e32f5347 GH-40830: [Java] Adding Spotless to Performance module 
(#42057)
12e32f5347 is described below

commit 12e32f53470098e14996730fa7fe9165159419d6
Author: Laurent Goujon 
AuthorDate: Mon Jun 10 17:40:54 2024 -0700

GH-40830: [Java] Adding Spotless to Performance module (#42057)

### Rationale for this change

Applying Java code style and formatting options to Performance module.

### What changes are included in this PR?

Java code formatting via spotless plugin has been enabled.

### Are these changes tested?

Yes, but doesn't involve test cases, the plugin itself corrects.

### Are there any user-facing changes?

No

* GitHub Issue: #40830

Authored-by: Laurent Goujon 
Signed-off-by: David Li 
---
 java/performance/pom.xml   |  2 +
 .../arrow/adapter/AvroAdapterBenchmarks.java   | 47 +
 .../arrow/adapter/jdbc/JdbcAdapterBenchmarks.java  | 45 -
 .../search/ParallelSearcherBenchmarks.java | 13 ++---
 .../apache/arrow/memory/AllocatorBenchmarks.java   | 24 +++--
 .../apache/arrow/memory/ArrowBufBenchmarks.java| 20 ++--
 .../memory/util/ArrowBufPointerBenchmarks.java | 19 ++-
 .../memory/util/ByteFunctionHelpersBenchmarks.java | 38 --
 .../arrow/vector/BaseValueVectorBenchmarks.java| 28 --
 .../arrow/vector/BitVectorHelperBenchmarks.java| 59 --
 .../arrow/vector/DecimalVectorBenchmarks.java  | 33 
 .../org/apache/arrow/vector/Float8Benchmarks.java  | 31 
 .../arrow/vector/FloatingPointBenchmarks.java  | 20 +++-
 .../org/apache/arrow/vector/IntBenchmarks.java | 14 ++---
 .../org/apache/arrow/vector/VarCharBenchmarks.java | 22 +++-
 .../vector/VariableWidthVectorBenchmarks.java  | 21 +++-
 .../apache/arrow/vector/VectorLoaderBenchmark.java | 24 +++--
 .../arrow/vector/VectorUnloaderBenchmark.java  | 19 +++
 .../dictionary/DictionaryEncoderBenchmarks.java| 29 ---
 .../arrow/vector/ipc/WriteChannelBenchmark.java| 16 ++
 .../ipc/message/ArrowRecordBatchBenchmarks.java| 22 +++-
 .../arrow/vector/util/TransferPairBenchmarks.java  | 22 +++-
 22 files changed, 205 insertions(+), 363 deletions(-)

diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 83b0a88da0..1db900c81e 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -30,6 +30,8 @@ under the License.
   JMH Performance benchmarks for other Arrow 
libraries.
 
   
+
dev/checkstyle/checkstyle-spotless.xml
+none
 1.37
 benchmarks
 true
diff --git 
a/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
 
b/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
index c07aeffafb..6032a79ece 100644
--- 
a/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
+++ 
b/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
@@ -14,13 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.arrow.adapter;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.util.concurrent.TimeUnit;
-
 import org.apache.arrow.adapter.avro.AvroToArrow;
 import org.apache.arrow.adapter.avro.AvroToArrowConfig;
 import org.apache.arrow.adapter.avro.AvroToArrowConfigBuilder;
@@ -52,9 +50,7 @@ import org.openjdk.jmh.runner.RunnerException;
 import org.openjdk.jmh.runner.options.Options;
 import org.openjdk.jmh.runner.options.OptionsBuilder;
 
-/**
- * Benchmarks for avro adapter.
- */
+/** Benchmarks for avro adapter. */
 @State(Scope.Benchmark)
 public class AvroAdapterBenchmarks {
 
@@ -65,21 +61,25 @@ public class AvroAdapterBenchmarks {
   private Schema schema;
   private BinaryDecoder decoder;
 
-  /**
-   * Setup benchmarks.
-   */
+  /** Setup benchmarks. */
   @Setup
   public void prepare() throws Exception {
 BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
 config = new AvroToArrowConfigBuilder(allocator).build();
 
-String schemaStr = "{\n" + " \"namespace\": \"org.apache.arrow.avro\",\n" +
- " \"type\": \"record\",\n" + " \"name\": \"testBenchmark\",\n" + " 
\"fields\": [\n" +
- "{\"name\": \"f0\", \"type\": \"string\"},\n" +
- "{\"name\": \"f1\", \"type\": \"int\"},\n" +
- "{\"name\": \"f2\", \"type\": \"long\"},\n" +
- "{\"name\": \"f3\", \"type\": \"boolean\"},\n" +
- "{\"name\": \"f4\", \"type\": \"float\"}\n" + "  ]\n" + 

(arrow) branch dependabot/maven/java/com.google.api.grpc-proto-google-common-protos-2.40.0 deleted (was 74bdf373ac)

2024-06-10 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch 
dependabot/maven/java/com.google.api.grpc-proto-google-common-protos-2.40.0
in repository https://gitbox.apache.org/repos/asf/arrow.git


 was 74bdf373ac MINOR: [Java] Bump 
com.google.api.grpc:proto-google-common-protos

The revisions that were on this branch are still contained in
other references; therefore, this change does not discard any commits
from the repository.



  1   2   3   4   5   6   7   8   9   10   >