This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9a36c42fb5 GH-38798: [Integration] Enable C Data Interface integration
testing on Rust (#38799)
9a36c42fb5 is described below
commit 9a36c42fb5f34499018ba40a38d63e1edc50d9a2
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Nov 21 14:17:41 2023 +0100
GH-38798: [Integration] Enable C Data Interface integration testing on Rust
(#38799)
### Rationale for this change
Arrow Rust has added entrypoints for C Data Interface integration testing,
so this can now be enabled on our side:
https://github.com/apache/arrow-rs/pull/5080
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* Closes: #38798
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
ci/scripts/integration_arrow.sh | 1 +
ci/scripts/rust_build.sh | 3 +-
dev/archery/archery/integration/cdata.py | 9 ++
dev/archery/archery/integration/tester_cpp.py | 10 +--
dev/archery/archery/integration/tester_go.py | 10 +--
dev/archery/archery/integration/tester_rust.py | 114 ++++++++++++++++++++++++-
docker-compose.yml | 3 +-
7 files changed, 128 insertions(+), 22 deletions(-)
diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh
index b5a38f0141..6d1d4befa6 100755
--- a/ci/scripts/integration_arrow.sh
+++ b/ci/scripts/integration_arrow.sh
@@ -43,6 +43,7 @@ fi
# Get more detailed context on crashes
export PYTHONFAULTHANDLER=1
+
# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
time archery integration \
--run-c-data \
diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh
index 2dfc0f1b18..5fc21d454b 100755
--- a/ci/scripts/rust_build.sh
+++ b/ci/scripts/rust_build.sh
@@ -21,6 +21,7 @@ set -e
arrow_dir=${1}
source_dir=${1}/rust
+build_dir=${2}/rust
# This file is used to build the rust binaries needed for the archery
# integration tests. Testing of the rust implementation in normal CI is handled
@@ -54,7 +55,7 @@ rustup show
pushd ${source_dir}
# build only the integration testing binaries
-cargo build -p arrow-integration-testing
+cargo build -p arrow-integration-testing --target-dir ${build_dir}
# Save disk space by removing large temporary build products
rm -rf target/debug/deps
diff --git a/dev/archery/archery/integration/cdata.py
b/dev/archery/archery/integration/cdata.py
index 8e5550fcdb..a5dbbe29d8 100644
--- a/dev/archery/archery/integration/cdata.py
+++ b/dev/archery/archery/integration/cdata.py
@@ -18,10 +18,19 @@
import cffi
from contextlib import contextmanager
import functools
+import os
+import sys
from .tester import CDataExporter, CDataImporter
+if sys.platform == "darwin":
+ dll_suffix = ".dylib"
+elif os.name == "nt":
+ dll_suffix = ".dll"
+else:
+ dll_suffix = ".so"
+
_c_data_decls = """
struct ArrowSchema {
// Array type description
diff --git a/dev/archery/archery/integration/tester_cpp.py
b/dev/archery/archery/integration/tester_cpp.py
index 658e713301..02c110c0e2 100644
--- a/dev/archery/archery/integration/tester_cpp.py
+++ b/dev/archery/archery/integration/tester_cpp.py
@@ -18,7 +18,6 @@
import contextlib
import functools
import os
-import sys
import subprocess
from . import cdata
@@ -42,15 +41,8 @@ _FLIGHT_CLIENT_CMD = [
"localhost",
]
-if sys.platform == "darwin":
- _dll_suffix = ".dylib"
-elif os.name == "nt":
- _dll_suffix = ".dll"
-else:
- _dll_suffix = ".so"
-
_DLL_PATH = _EXE_PATH
-_ARROW_DLL = os.path.join(_DLL_PATH, "libarrow" + _dll_suffix)
+_ARROW_DLL = os.path.join(_DLL_PATH, "libarrow" + cdata.dll_suffix)
class CppTester(Tester):
diff --git a/dev/archery/archery/integration/tester_go.py
b/dev/archery/archery/integration/tester_go.py
index 2b3dc3a1be..5368f06a31 100644
--- a/dev/archery/archery/integration/tester_go.py
+++ b/dev/archery/archery/integration/tester_go.py
@@ -18,7 +18,6 @@
import contextlib
import functools
import os
-import sys
import subprocess
from . import cdata
@@ -43,17 +42,10 @@ _FLIGHT_CLIENT_CMD = [
"localhost",
]
-if sys.platform == "darwin":
- _dll_suffix = ".dylib"
-elif os.name == "nt":
- _dll_suffix = ".dll"
-else:
- _dll_suffix = ".so"
-
_DLL_PATH = os.path.join(
ARROW_ROOT_DEFAULT,
"go/arrow/internal/cdata_integration")
-_INTEGRATION_DLL = os.path.join(_DLL_PATH, "arrow_go_integration" +
_dll_suffix)
+_INTEGRATION_DLL = os.path.join(_DLL_PATH, "arrow_go_integration" +
cdata.dll_suffix)
class GoTester(Tester):
diff --git a/dev/archery/archery/integration/tester_rust.py
b/dev/archery/archery/integration/tester_rust.py
index c7a94de219..56b07859dc 100644
--- a/dev/archery/archery/integration/tester_rust.py
+++ b/dev/archery/archery/integration/tester_rust.py
@@ -16,15 +16,19 @@
# under the License.
import contextlib
+import functools
import os
import subprocess
-from .tester import Tester
+from . import cdata
+from .tester import Tester, CDataExporter, CDataImporter
from .util import run_cmd, log
from ..utils.source import ARROW_ROOT_DEFAULT
-_EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, "rust/target/debug")
+_EXE_PATH = os.environ.get(
+ "ARROW_RUST_EXE_PATH", os.path.join(ARROW_ROOT_DEFAULT,
"rust/target/debug")
+)
_INTEGRATION_EXE = os.path.join(_EXE_PATH, "arrow-json-integration-test")
_STREAM_TO_FILE = os.path.join(_EXE_PATH, "arrow-stream-to-file")
_FILE_TO_STREAM = os.path.join(_EXE_PATH, "arrow-file-to-stream")
@@ -37,12 +41,19 @@ _FLIGHT_CLIENT_CMD = [
"localhost",
]
+_INTEGRATION_DLL = os.path.join(_EXE_PATH,
+ "libarrow_integration_testing" +
cdata.dll_suffix)
+
class RustTester(Tester):
PRODUCER = True
CONSUMER = True
FLIGHT_SERVER = True
FLIGHT_CLIENT = True
+ C_DATA_SCHEMA_EXPORTER = True
+ C_DATA_ARRAY_EXPORTER = True
+ C_DATA_SCHEMA_IMPORTER = True
+ C_DATA_ARRAY_IMPORTER = True
name = 'Rust'
@@ -117,3 +128,102 @@ class RustTester(Tester):
if self.debug:
log(' '.join(cmd))
run_cmd(cmd)
+
+ def make_c_data_exporter(self):
+ return RustCDataExporter(self.debug, self.args)
+
+ def make_c_data_importer(self):
+ return RustCDataImporter(self.debug, self.args)
+
+
+_rust_c_data_entrypoints = """
+ const char* arrow_rs_cdata_integration_export_schema_from_json(
+ const char* json_path, uintptr_t out);
+ const char* arrow_rs_cdata_integration_import_schema_and_compare_to_json(
+ const char* json_path, uintptr_t c_schema);
+
+ const char* arrow_rs_cdata_integration_export_batch_from_json(
+ const char* json_path, int num_batch, uintptr_t out);
+ const char* arrow_rs_cdata_integration_import_batch_and_compare_to_json(
+ const char* json_path, int num_batch, uintptr_t c_array);
+
+ void arrow_rs_free_error(const char*);
+ """
+
+
[email protected]_cache
+def _load_ffi(ffi, lib_path=_INTEGRATION_DLL):
+ ffi.cdef(_rust_c_data_entrypoints)
+ dll = ffi.dlopen(lib_path)
+ return dll
+
+
+class _CDataBase:
+
+ def __init__(self, debug, args):
+ self.debug = debug
+ self.args = args
+ self.ffi = cdata.ffi()
+ self.dll = _load_ffi(self.ffi)
+
+ def _pointer_to_int(self, c_ptr):
+ return self.ffi.cast('uintptr_t', c_ptr)
+
+ def _check_rust_error(self, rs_error):
+ """
+ Check a `const char*` error return from an integration entrypoint.
+
+ A null means success, a non-empty string is an error message.
+ The string is dynamically allocated on the Rust side.
+ """
+ assert self.ffi.typeof(rs_error) is self.ffi.typeof("const char*")
+ if rs_error != self.ffi.NULL:
+ try:
+ error = self.ffi.string(rs_error).decode(
+ 'utf8', errors='replace')
+ raise RuntimeError(
+ f"Rust C Data Integration call failed: {error}")
+ finally:
+ self.dll.arrow_rs_free_error(rs_error)
+
+
+class RustCDataExporter(CDataExporter, _CDataBase):
+
+ def export_schema_from_json(self, json_path, c_schema_ptr):
+ rs_error = self.dll.arrow_rs_cdata_integration_export_schema_from_json(
+ str(json_path).encode(), self._pointer_to_int(c_schema_ptr))
+ self._check_rust_error(rs_error)
+
+ def export_batch_from_json(self, json_path, num_batch, c_array_ptr):
+ rs_error = self.dll.arrow_rs_cdata_integration_export_batch_from_json(
+ str(json_path).encode(), num_batch,
+ self._pointer_to_int(c_array_ptr))
+ self._check_rust_error(rs_error)
+
+ @property
+ def supports_releasing_memory(self):
+ return True
+
+ def record_allocation_state(self):
+ # FIXME we should track the amount of Rust-allocated memory (GH-38822)
+ return 0
+
+
+class RustCDataImporter(CDataImporter, _CDataBase):
+
+ def import_schema_and_compare_to_json(self, json_path, c_schema_ptr):
+ rs_error = \
+
self.dll.arrow_rs_cdata_integration_import_schema_and_compare_to_json(
+ str(json_path).encode(), self._pointer_to_int(c_schema_ptr))
+ self._check_rust_error(rs_error)
+
+ def import_batch_and_compare_to_json(self, json_path, num_batch,
+ c_array_ptr):
+ rs_error = \
+
self.dll.arrow_rs_cdata_integration_import_batch_and_compare_to_json(
+ str(json_path).encode(), num_batch,
self._pointer_to_int(c_array_ptr))
+ self._check_rust_error(rs_error)
+
+ @property
+ def supports_releasing_memory(self):
+ return True
diff --git a/docker-compose.yml b/docker-compose.yml
index e2c993ee9e..8cc05903e9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1716,8 +1716,9 @@ services:
environment:
<<: [*common, *ccache]
ARCHERY_INTEGRATION_WITH_RUST: 0
- # Tell Archery where the arrow C++ binaries are located
+ # Tell Archery where Arrow binaries are located
ARROW_CPP_EXE_PATH: /build/cpp/debug
+ ARROW_RUST_EXE_PATH: /build/rust/debug
command:
["/arrow/ci/scripts/integration_arrow_build.sh /arrow /build &&
/arrow/ci/scripts/integration_arrow.sh /arrow /build"]