This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new fd73986 chore(ci): Fix verification builds for Windows and centos7
(#341)
fd73986 is described below
commit fd73986cd1e6742e8f5e3d1ebb307fcf5060a734
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Dec 21 11:13:27 2023 -0400
chore(ci): Fix verification builds for Windows and centos7 (#341)
This PR contains a few updates such that the verification job passes on
Windows and centos7, and fedora.
On centos7/fedora, the builds were failing because the new test that was
added needs to be compiled with `-fPIC`. I updated the verification
script to always set `CMAKE_POSITION_INDEPENDENT_CODE=ON` from the
top-level `cmake` invocation. This was easier than trying to get
`target_compile_options()` working on the versions of cmake available by
default in all the runners.
On the GitHub Actions-based verifications, the builds were failing
because the cached Arrow C++ build was not built with the zlib/the gzip
codec. Simply adding `ARROW_WITH_ZLIB=ON` did not work on Windows
(something about a missing `.lib` file), so I just switched to using
zlib directly. When the IPC reader supports compression we will need it
anyway and it was a fairly minimal change.
There were also a few compiler warnings that I fixed while I had a real
live Windows/MSVC session going.
---
.github/workflows/verify.yaml | 6 +--
dev/release/README.md | 8 +--
dev/release/verify-release-candidate.sh | 1 +
extensions/nanoarrow_ipc/CMakeLists.txt | 11 ++++
.../src/nanoarrow/nanoarrow_ipc_files_test.cc | 60 ++++++++++------------
.../nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt | 25 +++++++++
src/nanoarrow/integration/c_data_integration.cc | 19 ++++---
src/nanoarrow/integration/c_data_integration.h | 23 ++++++---
src/nanoarrow/nanoarrow_testing.hpp | 10 ++--
9 files changed, 101 insertions(+), 62 deletions(-)
diff --git a/.github/workflows/verify.yaml b/.github/workflows/verify.yaml
index a4b1bb5..c5d1236 100644
--- a/.github/workflows/verify.yaml
+++ b/.github/workflows/verify.yaml
@@ -81,16 +81,16 @@ jobs:
uses: actions/cache@v3
with:
path: arrow
- key: arrow-${{ runner.os }}-5
+ key: arrow-${{ runner.os }}-6
- name: Build Arrow C++
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
- curl
https://dlcdn.apache.org/arrow/arrow-12.0.1/apache-arrow-12.0.1.tar.gz | \
+ curl
https://dlcdn.apache.org/arrow/arrow-14.0.2/apache-arrow-14.0.2.tar.gz | \
tar -zxf -
mkdir arrow-build && cd arrow-build
- cmake ../apache-arrow-12.0.1/cpp -DCMAKE_INSTALL_PREFIX=../arrow
+ cmake ../apache-arrow-14.0.2/cpp -DCMAKE_INSTALL_PREFIX=../arrow
cmake --build .
cmake --install . --prefix=../arrow ${{
matrix.config.extra_cmake_install }}
cd ..
diff --git a/dev/release/README.md b/dev/release/README.md
index 4ecc67e..628e1d0 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -80,10 +80,10 @@ Command Line Tools (i.e., `xcode-select --install`),
```bash
# Download + build Arrow C++
-curl https://dlcdn.apache.org/arrow/arrow-11.0.0/apache-arrow-11.0.0.tar.gz | \
+curl
https://github.com/apache/arrow/archive/refs/tags/apache-arrow-14.0.2.tar.gz | \
tar -zxf -
mkdir arrow-build && cd arrow-build
-cmake ../apache-arrow-11.0.0/cpp \
+cmake ../apache-arrow-14.0.2/cpp \
-DARROW_JEMALLOC=OFF -DARROW_SIMD_LEVEL=NONE \
# Required for Arrow on old MacOS
-DCMAKE_CXX_FLAGS="-D_LIBCPP_DISABLE_AVAILABILITY" \
@@ -139,10 +139,10 @@ the verification script.
```bash
# Build Arrow C++ from source
-curl https://dlcdn.apache.org/arrow/arrow-12.0.1/apache-arrow-12.0.1.tar.gz | \
+curl
https://github.com/apache/arrow/archive/refs/tags/apache-arrow-14.0.2.tar.gz | \
tar -zxf -
mkdir arrow-build && cd arrow-build
-cmake ../apache-arrow-12.0.1/cpp -DCMAKE_INSTALL_PREFIX=../arrow
+cmake ../apache-arrow-14.0.2/cpp -DCMAKE_INSTALL_PREFIX=../arrow
cmake --build .
cmake --install . --prefix=../arrow --config=Debug
cd ..
diff --git a/dev/release/verify-release-candidate.sh
b/dev/release/verify-release-candidate.sh
index 321100f..250bbe6 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -217,6 +217,7 @@ test_cmake_project() {
show_info "Configure CMake Project"
${CMAKE_BIN} "${NANOARROW_SOURCE_DIR}/${2}" \
"${@:3}" \
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
${NANOARROW_CMAKE_OPTIONS:-}
show_info "Build CMake Project"
diff --git a/extensions/nanoarrow_ipc/CMakeLists.txt
b/extensions/nanoarrow_ipc/CMakeLists.txt
index 2635f24..7a724b1 100644
--- a/extensions/nanoarrow_ipc/CMakeLists.txt
+++ b/extensions/nanoarrow_ipc/CMakeLists.txt
@@ -215,6 +215,16 @@ if(NANOARROW_IPC_BUILD_TESTS)
fetchcontent_makeavailable(nlohmann_json)
endif()
+ # zlib to decode gzipped integration testing JSON files
+ # We don't use Arrow C++ for this because building Arrow C++ with zlib
+ # is not trivial on Windows.
+ find_package(ZLIB)
+ if(NOT ZLIB_FOUND)
+ # Wrapper around FetchContent that better isolates the zlib CMakeLists.txt
+ message(STATUS "Using FetchContent to build a static zlib")
+ add_subdirectory(thirdparty/zlib)
+ endif()
+
enable_testing()
add_executable(nanoarrow_ipc_decoder_test
src/nanoarrow/nanoarrow_ipc_decoder_test.cc)
@@ -251,6 +261,7 @@ if(NANOARROW_IPC_BUILD_TESTS)
nanoarrow
${NANOARROW_IPC_ARROW_TARGET}
nlohmann_json
+ ZLIB::ZLIB
gtest_main
ipc_coverage_config)
target_link_libraries(nanoarrow_ipc_hpp_test
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
index ccb677f..4b4428e 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
@@ -19,6 +19,8 @@
#include <fstream>
#include <sstream>
+#include <zlib.h>
+
#include <arrow/buffer.h>
#include <arrow/c/bridge.h>
#include <arrow/io/api.h>
@@ -109,13 +111,9 @@ class TestFile {
path_builder << dir_prefix << "/" << CheckJSONGzFile();
// Read .json.gz file into a buffer
- nanoarrow::UniqueBuffer json_gz_content;
- NANOARROW_RETURN_NOT_OK(
- ReadFileBuffer(path_builder.str(), json_gz_content.get(), error));
-
- // Decompress into a JSON string
nanoarrow::UniqueBuffer json_content;
- NANOARROW_RETURN_NOT_OK(UnGZIP(json_gz_content.get(), json_content.get(),
error));
+ NANOARROW_RETURN_NOT_OK(
+ ReadGzFileBuffer(path_builder.str(), json_content.get(), error));
std::string json_string(reinterpret_cast<char*>(json_content->data),
json_content->size_bytes);
@@ -142,39 +140,37 @@ class TestFile {
return NANOARROW_OK;
}
- // Create an arrow::io::InputStream wrapper around an ArrowBuffer
- static std::shared_ptr<io::InputStream> BufferInputStream(ArrowBuffer* src) {
- auto content_copy_wrapped = Buffer::Wrap<uint8_t>(src->data,
src->size_bytes);
- return std::make_shared<io::BufferReader>(content_copy_wrapped);
- }
-
- // Decompress gzipped buffer content (currently uses Arrow C++)
- static ArrowErrorCode UnGZIP(ArrowBuffer* src, ArrowBuffer* dst, ArrowError*
error) {
- auto maybe_gzip = arrow::util::Codec::Create(arrow::Compression::GZIP);
- NANOARROW_RETURN_ARROW_RESULT_NOT_OK(maybe_gzip, error);
-
- std::shared_ptr<io::InputStream> gz_input_stream = BufferInputStream(src);
-
- auto maybe_input =
- io::CompressedInputStream::Make(maybe_gzip->get(), gz_input_stream);
- NANOARROW_RETURN_ARROW_RESULT_NOT_OK(maybe_input, error);
+ static ArrowErrorCode ReadGzFileBuffer(const std::string& path, ArrowBuffer*
dst,
+ ArrowError* error) {
+ gzFile file = gzopen(path.c_str(), "rb");
+ if (file == NULL) {
+ ArrowErrorSet(error, "Failed to open '%s'", path.c_str());
+ return EINVAL;
+ }
- std::stringstream testing_json;
- auto input = *maybe_input;
- int64_t bytes_read = 0;
+ char buf[8096];
+ int out_len = 0;
do {
- NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(dst, 8096), error);
-
- auto maybe_bytes_read = input->Read(8096, dst->data + dst->size_bytes);
- NANOARROW_RETURN_ARROW_RESULT_NOT_OK(maybe_bytes_read, error);
+ out_len = gzread(file, buf, sizeof(buf));
+ if (out_len < 0) {
+ gzclose(file);
+ ArrowErrorSet(error, "gzread() returned %d", out_len);
+ return EIO;
+ }
- bytes_read = *maybe_bytes_read;
- dst->size_bytes += bytes_read;
- } while (bytes_read > 0);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferAppend(dst, buf, out_len),
error);
+ } while (out_len > 0);
+ gzclose(file);
return NANOARROW_OK;
}
+ // Create an arrow::io::InputStream wrapper around an ArrowBuffer
+ static std::shared_ptr<io::InputStream> BufferInputStream(ArrowBuffer* src) {
+ auto content_copy_wrapped = Buffer::Wrap<uint8_t>(src->data,
src->size_bytes);
+ return std::make_shared<io::BufferReader>(content_copy_wrapped);
+ }
+
void TestEqualsArrowCpp(const std::string& dir_prefix) {
std::stringstream path_builder;
path_builder << dir_prefix << "/" << path_;
diff --git a/extensions/nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt
b/extensions/nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt
new file mode 100644
index 0000000..bd95b12
--- /dev/null
+++ b/extensions/nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+fetchcontent_declare(nanoarrow_zlib
+ URL
https://github.com/madler/zlib/releases/download/v1.3/zlib13.zip
+ URL_HASH
SHA256=c561d09347f674f0d72692e7c75d9898919326c532aab7f8c07bb43b07efeb38
+)
+fetchcontent_makeavailable(nanoarrow_zlib)
+
+add_library(ZLIB::ZLIB ALIAS zlibstatic)
+target_include_directories(zlibstatic INTERFACE ${zlib_BINARY_DIR}
${zlib_SOURCE_DIR})
diff --git a/src/nanoarrow/integration/c_data_integration.cc
b/src/nanoarrow/integration/c_data_integration.cc
index 6c391ec..14a92b3 100644
--- a/src/nanoarrow/integration/c_data_integration.cc
+++ b/src/nanoarrow/integration/c_data_integration.cc
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <cstdint>
#include <fstream>
#include <sstream>
#include <string>
@@ -196,28 +197,26 @@ static const char* ConvertError(ArrowErrorCode
errno_code) {
int64_t nanoarrow_BytesAllocated() { return kBytesAllocated; }
-const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(const char*
json_path,
- ArrowSchema* out) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(
+ const char* json_path, ArrowSchema* out) {
ArrowErrorInit(&global_error);
return ConvertError(ExportSchemaFromJson(json_path, out, &global_error));
}
-const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(const
char* json_path,
-
ArrowSchema* schema) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
+ const char* json_path, ArrowSchema* schema) {
ArrowErrorInit(&global_error);
return ConvertError(ImportSchemaAndCompareToJson(json_path, schema,
&global_error));
}
-const char* nanoarrow_CDataIntegration_ExportBatchFromJson(const char*
json_path,
- int num_batch,
- ArrowArray* out) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportBatchFromJson(
+ const char* json_path, int num_batch, ArrowArray* out) {
ArrowErrorInit(&global_error);
return ConvertError(ExportBatchFromJson(json_path, num_batch, out,
&global_error));
}
-const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(const char*
json_path,
- int
num_batch,
- ArrowArray*
batch) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
+ const char* json_path, int num_batch, ArrowArray* batch) {
ArrowErrorInit(&global_error);
return ConvertError(
ImportBatchAndCompareToJson(json_path, num_batch, batch, &global_error));
diff --git a/src/nanoarrow/integration/c_data_integration.h
b/src/nanoarrow/integration/c_data_integration.h
index cf76c1b..b596244 100644
--- a/src/nanoarrow/integration/c_data_integration.h
+++ b/src/nanoarrow/integration/c_data_integration.h
@@ -18,6 +18,14 @@
#ifndef NANOARROW_INTEGRATION_C_DATA_INTEGRATION_H_INCLUDED
#define NANOARROW_INTEGRATION_C_DATA_INTEGRATION_H_INCLUDED
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+#define DLL_EXPORT __declspec(dllexport)
+#else
+#define DLL_EXPORT
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -68,20 +76,19 @@ struct ArrowArray {
#endif // ARROW_C_DATA_INTERFACE
#endif // ARROW_FLAG_DICTIONARY_ORDERED
-const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(const char*
json_path,
- struct
ArrowSchema* out);
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(
+ const char* json_path, struct ArrowSchema* out);
-const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
const char* json_path, struct ArrowSchema* schema);
-const char* nanoarrow_CDataIntegration_ExportBatchFromJson(const char*
json_path,
- int num_batch,
- struct ArrowArray*
out);
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportBatchFromJson(
+ const char* json_path, int num_batch, struct ArrowArray* out);
-const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
const char* json_path, int num_batch, struct ArrowArray* batch);
-int64_t nanoarrow_BytesAllocated(void);
+DLL_EXPORT int64_t nanoarrow_BytesAllocated(void);
#ifdef __cplusplus
}
diff --git a/src/nanoarrow/nanoarrow_testing.hpp
b/src/nanoarrow/nanoarrow_testing.hpp
index dbcc4f4..b62c3ca 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -1574,7 +1574,7 @@ class TestingJSONReader {
// The JSON parser here can handle up to 2^64 - 1
auto item_int = json::parse(item.get<std::string>());
return SetBufferIntItem<T, BiggerT>(item_int, buffer, error);
- } catch (json::parse_error& e) {
+ } catch (json::parse_error&) {
ArrowErrorSet(error,
"integer buffer item encoded as string must parse as
integer: %s",
item.dump().c_str());
@@ -1597,7 +1597,7 @@ class TestingJSONReader {
item_int <= std::numeric_limits<T>::max(),
error, "integer buffer item '" + item.dump() + "' outside type
limits"));
- T buffer_value = item_int;
+ T buffer_value = static_cast<T>(item_int);
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
@@ -1620,7 +1620,7 @@ class TestingJSONReader {
item_dbl <= std::numeric_limits<T>::max(),
error, "floatingpoint buffer item '" + item.dump() + "' outside type
limits"));
- T buffer_value = item_dbl;
+ T buffer_value = static_cast<T>(item_dbl);
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
}
@@ -1725,10 +1725,10 @@ class TestingJSONReader {
"binary data buffer item must have even
size"));
NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(data,
item_size_bytes), error);
- for (int64_t i = 0; i < item_str.size(); i += 2) {
+ for (size_t i = 0; i < item_str.size(); i += 2) {
std::string byte_hex = item_str.substr(i, 2);
char* end_ptr;
- uint8_t byte = std::strtoul(byte_hex.data(), &end_ptr, 16);
+ uint8_t byte = static_cast<uint8_t>(std::strtoul(byte_hex.data(),
&end_ptr, 16));
NANOARROW_RETURN_NOT_OK(
Check(end_ptr == (byte_hex.data() + 2), error,
"binary data buffer item must contain a valid hex-encoded byte
string"));