This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fd73986  chore(ci): Fix verification builds for Windows and centos7 
(#341)
fd73986 is described below

commit fd73986cd1e6742e8f5e3d1ebb307fcf5060a734
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Dec 21 11:13:27 2023 -0400

    chore(ci): Fix verification builds for Windows and centos7 (#341)
    
    This PR contains a few updates such that the verification job passes on
    Windows and centos7, and fedora.
    
    On centos7/fedora, the builds were failing because the new test that was
    added needs to be compiled with `-fPIC`. I updated the verification
    script to always set `CMAKE_POSITION_INDEPENDENT_CODE=ON` from the
    top-level `cmake` invocation. This was easier than trying to get
    `target_compile_options()` working on the versions of cmake available by
    default in all the runners.
    
    On the GitHub Actions-based verifications, the builds were failing
    because the cached Arrow C++ build was not built with the zlib/the gzip
    codec. Simply adding `ARROW_WITH_ZLIB=ON` did not work on Windows
    (something about a missing `.lib` file), so I just switched to using
    zlib directly. When the IPC reader supports compression we will need it
    anyway and it was a fairly minimal change.
    
    There were also a few compiler warnings that I fixed while I had a real
    live Windows/MSVC session going.
---
 .github/workflows/verify.yaml                      |  6 +--
 dev/release/README.md                              |  8 +--
 dev/release/verify-release-candidate.sh            |  1 +
 extensions/nanoarrow_ipc/CMakeLists.txt            | 11 ++++
 .../src/nanoarrow/nanoarrow_ipc_files_test.cc      | 60 ++++++++++------------
 .../nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt   | 25 +++++++++
 src/nanoarrow/integration/c_data_integration.cc    | 19 ++++---
 src/nanoarrow/integration/c_data_integration.h     | 23 ++++++---
 src/nanoarrow/nanoarrow_testing.hpp                | 10 ++--
 9 files changed, 101 insertions(+), 62 deletions(-)

diff --git a/.github/workflows/verify.yaml b/.github/workflows/verify.yaml
index a4b1bb5..c5d1236 100644
--- a/.github/workflows/verify.yaml
+++ b/.github/workflows/verify.yaml
@@ -81,16 +81,16 @@ jobs:
         uses: actions/cache@v3
         with:
           path: arrow
-          key: arrow-${{ runner.os }}-5
+          key: arrow-${{ runner.os }}-6
 
       - name: Build Arrow C++
         if: steps.cache-arrow-build.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          curl 
https://dlcdn.apache.org/arrow/arrow-12.0.1/apache-arrow-12.0.1.tar.gz | \
+          curl 
https://dlcdn.apache.org/arrow/arrow-14.0.2/apache-arrow-14.0.2.tar.gz | \
             tar -zxf -
           mkdir arrow-build && cd arrow-build
-          cmake ../apache-arrow-12.0.1/cpp -DCMAKE_INSTALL_PREFIX=../arrow
+          cmake ../apache-arrow-14.0.2/cpp -DCMAKE_INSTALL_PREFIX=../arrow
           cmake --build .
           cmake --install . --prefix=../arrow ${{ 
matrix.config.extra_cmake_install }}
           cd ..
diff --git a/dev/release/README.md b/dev/release/README.md
index 4ecc67e..628e1d0 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -80,10 +80,10 @@ Command Line Tools (i.e., `xcode-select --install`),
 
 ```bash
 # Download + build Arrow C++
-curl https://dlcdn.apache.org/arrow/arrow-11.0.0/apache-arrow-11.0.0.tar.gz | \
+curl 
https://github.com/apache/arrow/archive/refs/tags/apache-arrow-14.0.2.tar.gz | \
   tar -zxf -
 mkdir arrow-build && cd arrow-build
-cmake ../apache-arrow-11.0.0/cpp \
+cmake ../apache-arrow-14.0.2/cpp \
     -DARROW_JEMALLOC=OFF -DARROW_SIMD_LEVEL=NONE \
     # Required for Arrow on old MacOS
     -DCMAKE_CXX_FLAGS="-D_LIBCPP_DISABLE_AVAILABILITY" \
@@ -139,10 +139,10 @@ the verification script.
 
 ```bash
 # Build Arrow C++ from source
-curl https://dlcdn.apache.org/arrow/arrow-12.0.1/apache-arrow-12.0.1.tar.gz | \
+curl 
https://github.com/apache/arrow/archive/refs/tags/apache-arrow-14.0.2.tar.gz | \
   tar -zxf -
 mkdir arrow-build && cd arrow-build
-cmake ../apache-arrow-12.0.1/cpp -DCMAKE_INSTALL_PREFIX=../arrow
+cmake ../apache-arrow-14.0.2/cpp -DCMAKE_INSTALL_PREFIX=../arrow
 cmake --build .
 cmake --install . --prefix=../arrow --config=Debug
 cd ..
diff --git a/dev/release/verify-release-candidate.sh 
b/dev/release/verify-release-candidate.sh
index 321100f..250bbe6 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -217,6 +217,7 @@ test_cmake_project() {
   show_info "Configure CMake Project"
   ${CMAKE_BIN} "${NANOARROW_SOURCE_DIR}/${2}" \
     "${@:3}" \
+    -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
     ${NANOARROW_CMAKE_OPTIONS:-}
 
   show_info "Build CMake Project"
diff --git a/extensions/nanoarrow_ipc/CMakeLists.txt 
b/extensions/nanoarrow_ipc/CMakeLists.txt
index 2635f24..7a724b1 100644
--- a/extensions/nanoarrow_ipc/CMakeLists.txt
+++ b/extensions/nanoarrow_ipc/CMakeLists.txt
@@ -215,6 +215,16 @@ if(NANOARROW_IPC_BUILD_TESTS)
     fetchcontent_makeavailable(nlohmann_json)
   endif()
 
+  # zlib to decode gzipped integration testing JSON files
+  # We don't use Arrow C++ for this because building Arrow C++ with zlib
+  # is not trivial on Windows.
+  find_package(ZLIB)
+  if(NOT ZLIB_FOUND)
+    # Wrapper around FetchContent that better isolates the zlib CMakeLists.txt
+    message(STATUS "Using FetchContent to build a static zlib")
+    add_subdirectory(thirdparty/zlib)
+  endif()
+
   enable_testing()
 
   add_executable(nanoarrow_ipc_decoder_test 
src/nanoarrow/nanoarrow_ipc_decoder_test.cc)
@@ -251,6 +261,7 @@ if(NANOARROW_IPC_BUILD_TESTS)
                         nanoarrow
                         ${NANOARROW_IPC_ARROW_TARGET}
                         nlohmann_json
+                        ZLIB::ZLIB
                         gtest_main
                         ipc_coverage_config)
   target_link_libraries(nanoarrow_ipc_hpp_test
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc 
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
index ccb677f..4b4428e 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <sstream>
 
+#include <zlib.h>
+
 #include <arrow/buffer.h>
 #include <arrow/c/bridge.h>
 #include <arrow/io/api.h>
@@ -109,13 +111,9 @@ class TestFile {
     path_builder << dir_prefix << "/" << CheckJSONGzFile();
 
     // Read .json.gz file into a buffer
-    nanoarrow::UniqueBuffer json_gz_content;
-    NANOARROW_RETURN_NOT_OK(
-        ReadFileBuffer(path_builder.str(), json_gz_content.get(), error));
-
-    // Decompress into a JSON string
     nanoarrow::UniqueBuffer json_content;
-    NANOARROW_RETURN_NOT_OK(UnGZIP(json_gz_content.get(), json_content.get(), 
error));
+    NANOARROW_RETURN_NOT_OK(
+        ReadGzFileBuffer(path_builder.str(), json_content.get(), error));
 
     std::string json_string(reinterpret_cast<char*>(json_content->data),
                             json_content->size_bytes);
@@ -142,39 +140,37 @@ class TestFile {
     return NANOARROW_OK;
   }
 
-  // Create an arrow::io::InputStream wrapper around an ArrowBuffer
-  static std::shared_ptr<io::InputStream> BufferInputStream(ArrowBuffer* src) {
-    auto content_copy_wrapped = Buffer::Wrap<uint8_t>(src->data, 
src->size_bytes);
-    return std::make_shared<io::BufferReader>(content_copy_wrapped);
-  }
-
-  // Decompress gzipped buffer content (currently uses Arrow C++)
-  static ArrowErrorCode UnGZIP(ArrowBuffer* src, ArrowBuffer* dst, ArrowError* 
error) {
-    auto maybe_gzip = arrow::util::Codec::Create(arrow::Compression::GZIP);
-    NANOARROW_RETURN_ARROW_RESULT_NOT_OK(maybe_gzip, error);
-
-    std::shared_ptr<io::InputStream> gz_input_stream = BufferInputStream(src);
-
-    auto maybe_input =
-        io::CompressedInputStream::Make(maybe_gzip->get(), gz_input_stream);
-    NANOARROW_RETURN_ARROW_RESULT_NOT_OK(maybe_input, error);
+  static ArrowErrorCode ReadGzFileBuffer(const std::string& path, ArrowBuffer* 
dst,
+                                         ArrowError* error) {
+    gzFile file = gzopen(path.c_str(), "rb");
+    if (file == NULL) {
+      ArrowErrorSet(error, "Failed to open '%s'", path.c_str());
+      return EINVAL;
+    }
 
-    std::stringstream testing_json;
-    auto input = *maybe_input;
-    int64_t bytes_read = 0;
+    char buf[8096];
+    int out_len = 0;
     do {
-      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(dst, 8096), error);
-
-      auto maybe_bytes_read = input->Read(8096, dst->data + dst->size_bytes);
-      NANOARROW_RETURN_ARROW_RESULT_NOT_OK(maybe_bytes_read, error);
+      out_len = gzread(file, buf, sizeof(buf));
+      if (out_len < 0) {
+        gzclose(file);
+        ArrowErrorSet(error, "gzread() returned %d", out_len);
+        return EIO;
+      }
 
-      bytes_read = *maybe_bytes_read;
-      dst->size_bytes += bytes_read;
-    } while (bytes_read > 0);
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferAppend(dst, buf, out_len), 
error);
+    } while (out_len > 0);
 
+    gzclose(file);
     return NANOARROW_OK;
   }
 
+  // Create an arrow::io::InputStream wrapper around an ArrowBuffer
+  static std::shared_ptr<io::InputStream> BufferInputStream(ArrowBuffer* src) {
+    auto content_copy_wrapped = Buffer::Wrap<uint8_t>(src->data, 
src->size_bytes);
+    return std::make_shared<io::BufferReader>(content_copy_wrapped);
+  }
+
   void TestEqualsArrowCpp(const std::string& dir_prefix) {
     std::stringstream path_builder;
     path_builder << dir_prefix << "/" << path_;
diff --git a/extensions/nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt 
b/extensions/nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt
new file mode 100644
index 0000000..bd95b12
--- /dev/null
+++ b/extensions/nanoarrow_ipc/thirdparty/zlib/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+fetchcontent_declare(nanoarrow_zlib
+                     URL 
https://github.com/madler/zlib/releases/download/v1.3/zlib13.zip
+                     URL_HASH 
SHA256=c561d09347f674f0d72692e7c75d9898919326c532aab7f8c07bb43b07efeb38
+)
+fetchcontent_makeavailable(nanoarrow_zlib)
+
+add_library(ZLIB::ZLIB ALIAS zlibstatic)
+target_include_directories(zlibstatic INTERFACE ${zlib_BINARY_DIR} 
${zlib_SOURCE_DIR})
diff --git a/src/nanoarrow/integration/c_data_integration.cc 
b/src/nanoarrow/integration/c_data_integration.cc
index 6c391ec..14a92b3 100644
--- a/src/nanoarrow/integration/c_data_integration.cc
+++ b/src/nanoarrow/integration/c_data_integration.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <cstdint>
 #include <fstream>
 #include <sstream>
 #include <string>
@@ -196,28 +197,26 @@ static const char* ConvertError(ArrowErrorCode 
errno_code) {
 
 int64_t nanoarrow_BytesAllocated() { return kBytesAllocated; }
 
-const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(const char* 
json_path,
-                                                            ArrowSchema* out) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(
+    const char* json_path, ArrowSchema* out) {
   ArrowErrorInit(&global_error);
   return ConvertError(ExportSchemaFromJson(json_path, out, &global_error));
 }
 
-const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(const 
char* json_path,
-                                                                    
ArrowSchema* schema) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
+    const char* json_path, ArrowSchema* schema) {
   ArrowErrorInit(&global_error);
   return ConvertError(ImportSchemaAndCompareToJson(json_path, schema, 
&global_error));
 }
 
-const char* nanoarrow_CDataIntegration_ExportBatchFromJson(const char* 
json_path,
-                                                           int num_batch,
-                                                           ArrowArray* out) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportBatchFromJson(
+    const char* json_path, int num_batch, ArrowArray* out) {
   ArrowErrorInit(&global_error);
   return ConvertError(ExportBatchFromJson(json_path, num_batch, out, 
&global_error));
 }
 
-const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(const char* 
json_path,
-                                                                   int 
num_batch,
-                                                                   ArrowArray* 
batch) {
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
+    const char* json_path, int num_batch, ArrowArray* batch) {
   ArrowErrorInit(&global_error);
   return ConvertError(
       ImportBatchAndCompareToJson(json_path, num_batch, batch, &global_error));
diff --git a/src/nanoarrow/integration/c_data_integration.h 
b/src/nanoarrow/integration/c_data_integration.h
index cf76c1b..b596244 100644
--- a/src/nanoarrow/integration/c_data_integration.h
+++ b/src/nanoarrow/integration/c_data_integration.h
@@ -18,6 +18,14 @@
 #ifndef NANOARROW_INTEGRATION_C_DATA_INTEGRATION_H_INCLUDED
 #define NANOARROW_INTEGRATION_C_DATA_INTEGRATION_H_INCLUDED
 
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+#define DLL_EXPORT __declspec(dllexport)
+#else
+#define DLL_EXPORT
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -68,20 +76,19 @@ struct ArrowArray {
 #endif  // ARROW_C_DATA_INTERFACE
 #endif  // ARROW_FLAG_DICTIONARY_ORDERED
 
-const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(const char* 
json_path,
-                                                            struct 
ArrowSchema* out);
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(
+    const char* json_path, struct ArrowSchema* out);
 
-const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
     const char* json_path, struct ArrowSchema* schema);
 
-const char* nanoarrow_CDataIntegration_ExportBatchFromJson(const char* 
json_path,
-                                                           int num_batch,
-                                                           struct ArrowArray* 
out);
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ExportBatchFromJson(
+    const char* json_path, int num_batch, struct ArrowArray* out);
 
-const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
+DLL_EXPORT const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
     const char* json_path, int num_batch, struct ArrowArray* batch);
 
-int64_t nanoarrow_BytesAllocated(void);
+DLL_EXPORT int64_t nanoarrow_BytesAllocated(void);
 
 #ifdef __cplusplus
 }
diff --git a/src/nanoarrow/nanoarrow_testing.hpp 
b/src/nanoarrow/nanoarrow_testing.hpp
index dbcc4f4..b62c3ca 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -1574,7 +1574,7 @@ class TestingJSONReader {
         // The JSON parser here can handle up to 2^64 - 1
         auto item_int = json::parse(item.get<std::string>());
         return SetBufferIntItem<T, BiggerT>(item_int, buffer, error);
-      } catch (json::parse_error& e) {
+      } catch (json::parse_error&) {
         ArrowErrorSet(error,
                       "integer buffer item encoded as string must parse as 
integer: %s",
                       item.dump().c_str());
@@ -1597,7 +1597,7 @@ class TestingJSONReader {
                   item_int <= std::numeric_limits<T>::max(),
               error, "integer buffer item '" + item.dump() + "' outside type 
limits"));
 
-    T buffer_value = item_int;
+    T buffer_value = static_cast<T>(item_int);
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(
         ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
 
@@ -1620,7 +1620,7 @@ class TestingJSONReader {
               item_dbl <= std::numeric_limits<T>::max(),
           error, "floatingpoint buffer item '" + item.dump() + "' outside type 
limits"));
 
-      T buffer_value = item_dbl;
+      T buffer_value = static_cast<T>(item_dbl);
       NANOARROW_RETURN_NOT_OK_WITH_ERROR(
           ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
     }
@@ -1725,10 +1725,10 @@ class TestingJSONReader {
                                   "binary data buffer item must have even 
size"));
 
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(data, 
item_size_bytes), error);
-    for (int64_t i = 0; i < item_str.size(); i += 2) {
+    for (size_t i = 0; i < item_str.size(); i += 2) {
       std::string byte_hex = item_str.substr(i, 2);
       char* end_ptr;
-      uint8_t byte = std::strtoul(byte_hex.data(), &end_ptr, 16);
+      uint8_t byte = static_cast<uint8_t>(std::strtoul(byte_hex.data(), 
&end_ptr, 16));
       NANOARROW_RETURN_NOT_OK(
           Check(end_ptr == (byte_hex.data() + 2), error,
                 "binary data buffer item must contain a valid hex-encoded byte 
string"));

Reply via email to