This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5c0015d34b GH-43587: [Python] Remove no longer used
serialize/deserialize PyArrow C++ code (#45743)
5c0015d34b is described below
commit 5c0015d34b35b7cb6b5275f7b450f1e580f74d4f
Author: Alenka Frim <[email protected]>
AuthorDate: Mon Mar 24 15:53:10 2025 +0100
GH-43587: [Python] Remove no longer used serialize/deserialize PyArrow C++
code (#45743)
### Rationale for this change
We deprecated serialization functionality in C++ code in
https://github.com/apache/arrow/pull/44064. We can now remove it.
### What changes are included in this PR?
Serialization functionality is removed from PyArrow C++ code.
### Are these changes tested?
Tests should pass.
### Are there any user-facing changes?
Serialization functionality is removed from PyArrow C++ code.
* GitHub Issue: #43587
Authored-by: AlenkaF <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
python/CMakeLists.txt | 2 -
python/pyarrow/includes/libarrow_python.pxd | 35 --
python/pyarrow/src/arrow/python/api.h | 2 -
python/pyarrow/src/arrow/python/deserialize.cc | 503 ----------------
python/pyarrow/src/arrow/python/deserialize.h | 112 ----
python/pyarrow/src/arrow/python/serialize.cc | 798 -------------------------
python/pyarrow/src/arrow/python/serialize.h | 149 -----
7 files changed, 1601 deletions(-)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index ac497e844e..d07edaa6ff 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -356,7 +356,6 @@ set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/common.cc
${PYARROW_CPP_SOURCE_DIR}/datetime.cc
${PYARROW_CPP_SOURCE_DIR}/decimal.cc
- ${PYARROW_CPP_SOURCE_DIR}/deserialize.cc
${PYARROW_CPP_SOURCE_DIR}/extension_type.cc
${PYARROW_CPP_SOURCE_DIR}/gdb.cc
${PYARROW_CPP_SOURCE_DIR}/helpers.cc
@@ -369,7 +368,6 @@ set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/python_test.cc
${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
- ${PYARROW_CPP_SOURCE_DIR}/serialize.cc
${PYARROW_CPP_SOURCE_DIR}/udf.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
PROPERTIES SKIP_PRECOMPILE_HEADERS ON
diff --git a/python/pyarrow/includes/libarrow_python.pxd
b/python/pyarrow/includes/libarrow_python.pxd
index 96725c9c38..cd4474f834 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -199,41 +199,6 @@ cdef extern from "arrow/python/api.h" namespace
"arrow::py" nogil:
unordered_set[c_string] extension_columns
c_bool to_numpy
- cdef cppclass CSerializedPyObject" arrow::py::SerializedPyObject":
- shared_ptr[CRecordBatch] batch
- vector[shared_ptr[CTensor]] tensors
-
- CStatus WriteTo(COutputStream* dst)
- CStatus GetComponents(CMemoryPool* pool, PyObject** dst)
-
- CStatus SerializeObject(object context, object sequence,
- CSerializedPyObject* out)
-
- CStatus DeserializeObject(object context,
- const CSerializedPyObject& obj,
- PyObject* base, PyObject** out)
-
- CStatus ReadSerializedObject(CRandomAccessFile* src,
- CSerializedPyObject* out)
-
- cdef cppclass SparseTensorCounts:
- SparseTensorCounts()
- int coo
- int csr
- int csc
- int csf
- int ndim_csf
- int num_total_tensors() const
- int num_total_buffers() const
-
- CStatus GetSerializedFromComponents(
- int num_tensors,
- const SparseTensorCounts& num_sparse_tensors,
- int num_ndarrays,
- int num_buffers,
- object buffers,
- CSerializedPyObject* out)
-
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
diff --git a/python/pyarrow/src/arrow/python/api.h
b/python/pyarrow/src/arrow/python/api.h
index a0b13d6d13..e66bf49dfe 100644
--- a/python/pyarrow/src/arrow/python/api.h
+++ b/python/pyarrow/src/arrow/python/api.h
@@ -20,11 +20,9 @@
#include "arrow/python/arrow_to_pandas.h"
#include "arrow/python/common.h"
#include "arrow/python/datetime.h"
-#include "arrow/python/deserialize.h"
#include "arrow/python/helpers.h"
#include "arrow/python/inference.h"
#include "arrow/python/io.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/numpy_to_arrow.h"
#include "arrow/python/python_to_arrow.h"
-#include "arrow/python/serialize.h"
diff --git a/python/pyarrow/src/arrow/python/deserialize.cc
b/python/pyarrow/src/arrow/python/deserialize.cc
deleted file mode 100644
index ab300a182f..0000000000
--- a/python/pyarrow/src/arrow/python/deserialize.cc
+++ /dev/null
@@ -1,503 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/python/deserialize.h"
-
-#include "arrow/python/numpy_interop.h"
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-
-#include "arrow/array.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/io/memory.h"
-#include "arrow/ipc/options.h"
-#include "arrow/ipc/reader.h"
-#include "arrow/ipc/util.h"
-#include "arrow/ipc/writer.h"
-#include "arrow/table.h"
-#include "arrow/util/checked_cast.h"
-#include "arrow/util/logging.h"
-#include "arrow/util/value_parsing.h"
-
-#include "arrow/python/common.h"
-#include "arrow/python/datetime.h"
-#include "arrow/python/helpers.h"
-#include "arrow/python/numpy_convert.h"
-#include "arrow/python/pyarrow.h"
-#include "arrow/python/serialize.h"
-#include "arrow/python/vendored/pythoncapi_compat.h"
-
-namespace arrow {
-
-using internal::checked_cast;
-using internal::ParseValue;
-
-namespace py {
-
-Status CallDeserializeCallback(PyObject* context, PyObject* value,
- PyObject** deserialized_object);
-
-Status DeserializeTuple(PyObject* context, const Array& array, int64_t
start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out);
-
-Status DeserializeList(PyObject* context, const Array& array, int64_t
start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out);
-
-Status DeserializeSet(PyObject* context, const Array& array, int64_t start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out);
-
-Status DeserializeDict(PyObject* context, const Array& array, int64_t
start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out) {
- const auto& data = checked_cast<const StructArray&>(array);
- OwnedRef keys, vals;
- OwnedRef result(PyDict_New());
- RETURN_IF_PYERROR();
-
- DCHECK_EQ(2, data.num_fields());
-
- RETURN_NOT_OK(DeserializeList(context, *data.field(0), start_idx, stop_idx,
base, blobs,
- keys.ref()));
- RETURN_NOT_OK(DeserializeList(context, *data.field(1), start_idx, stop_idx,
base, blobs,
- vals.ref()));
- for (int64_t i = start_idx; i < stop_idx; ++i) {
- // PyDict_SetItem behaves differently from PyList_SetItem and
PyTuple_SetItem.
- // The latter two steal references whereas PyDict_SetItem does not. So we
need
- // to make sure the reference count is decremented by letting the OwnedRef
- // go out of scope at the end.
- PyObject* key = PyList_GetItemRef(keys.obj(), i - start_idx);
- RETURN_IF_PYERROR();
- OwnedRef keyref(key);
- PyObject* val = PyList_GetItemRef(vals.obj(), i - start_idx);
- RETURN_IF_PYERROR();
- OwnedRef valref(val);
- int ret = PyDict_SetItem(result.obj(), key, val);
- if (ret != 0) {
- return ConvertPyError();
- }
- }
- static PyObject* py_type = PyUnicode_FromString("_pytype_");
- if (PyDict_Contains(result.obj(), py_type)) {
- RETURN_NOT_OK(CallDeserializeCallback(context, result.obj(), out));
- } else {
- *out = result.detach();
- }
- return Status::OK();
-}
-
-Status DeserializeArray(int32_t index, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out) {
- RETURN_NOT_OK(py::TensorToNdarray(blobs.ndarrays[index], base, out));
- // Mark the array as immutable
- OwnedRef flags(PyObject_GetAttrString(*out, "flags"));
- if (flags.obj() == NULL) {
- return ConvertPyError();
- }
- if (PyObject_SetAttrString(flags.obj(), "writeable", Py_False) < 0) {
- return ConvertPyError();
- }
- return Status::OK();
-}
-
-Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t
type,
- PyObject* base, const SerializedPyObject& blobs, PyObject**
result) {
- switch (type) {
- case PythonType::NONE:
- Py_INCREF(Py_None);
- *result = Py_None;
- return Status::OK();
- case PythonType::BOOL:
- *result = PyBool_FromLong(checked_cast<const
BooleanArray&>(arr).Value(index));
- return Status::OK();
- case PythonType::PY2INT:
- case PythonType::INT: {
- *result = PyLong_FromSsize_t(checked_cast<const
Int64Array&>(arr).Value(index));
- return Status::OK();
- }
- case PythonType::BYTES: {
- auto view = checked_cast<const BinaryArray&>(arr).GetView(index);
- *result = PyBytes_FromStringAndSize(view.data(), view.length());
- return CheckPyError();
- }
- case PythonType::STRING: {
- auto view = checked_cast<const StringArray&>(arr).GetView(index);
- *result = PyUnicode_FromStringAndSize(view.data(), view.length());
- return CheckPyError();
- }
- case PythonType::HALF_FLOAT: {
- *result = PyHalf_FromHalf(checked_cast<const
HalfFloatArray&>(arr).Value(index));
- RETURN_IF_PYERROR();
- return Status::OK();
- }
- case PythonType::FLOAT:
- *result = PyFloat_FromDouble(checked_cast<const
FloatArray&>(arr).Value(index));
- return Status::OK();
- case PythonType::DOUBLE:
- *result = PyFloat_FromDouble(checked_cast<const
DoubleArray&>(arr).Value(index));
- return Status::OK();
- case PythonType::DATE64: {
- RETURN_NOT_OK(internal::PyDateTime_from_int(
- checked_cast<const Date64Array&>(arr).Value(index), TimeUnit::MICRO,
result));
- RETURN_IF_PYERROR();
- return Status::OK();
- }
- case PythonType::LIST: {
- const auto& l = checked_cast<const ListArray&>(arr);
- return DeserializeList(context, *l.values(), l.value_offset(index),
- l.value_offset(index + 1), base, blobs, result);
- }
- case PythonType::DICT: {
- const auto& l = checked_cast<const ListArray&>(arr);
- return DeserializeDict(context, *l.values(), l.value_offset(index),
- l.value_offset(index + 1), base, blobs, result);
- }
- case PythonType::TUPLE: {
- const auto& l = checked_cast<const ListArray&>(arr);
- return DeserializeTuple(context, *l.values(), l.value_offset(index),
- l.value_offset(index + 1), base, blobs, result);
- }
- case PythonType::SET: {
- const auto& l = checked_cast<const ListArray&>(arr);
- return DeserializeSet(context, *l.values(), l.value_offset(index),
- l.value_offset(index + 1), base, blobs, result);
- }
- case PythonType::TENSOR: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- *result = wrap_tensor(blobs.tensors[ref]);
- return Status::OK();
- }
- case PythonType::SPARSECOOTENSOR: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- const std::shared_ptr<SparseCOOTensor>& sparse_coo_tensor =
- arrow::internal::checked_pointer_cast<SparseCOOTensor>(
- blobs.sparse_tensors[ref]);
- *result = wrap_sparse_coo_tensor(sparse_coo_tensor);
- return Status::OK();
- }
- case PythonType::SPARSECSRMATRIX: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- const std::shared_ptr<SparseCSRMatrix>& sparse_csr_matrix =
- arrow::internal::checked_pointer_cast<SparseCSRMatrix>(
- blobs.sparse_tensors[ref]);
- *result = wrap_sparse_csr_matrix(sparse_csr_matrix);
- return Status::OK();
- }
- case PythonType::SPARSECSCMATRIX: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- const std::shared_ptr<SparseCSCMatrix>& sparse_csc_matrix =
- arrow::internal::checked_pointer_cast<SparseCSCMatrix>(
- blobs.sparse_tensors[ref]);
- *result = wrap_sparse_csc_matrix(sparse_csc_matrix);
- return Status::OK();
- }
- case PythonType::SPARSECSFTENSOR: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- const std::shared_ptr<SparseCSFTensor>& sparse_csf_tensor =
- arrow::internal::checked_pointer_cast<SparseCSFTensor>(
- blobs.sparse_tensors[ref]);
- *result = wrap_sparse_csf_tensor(sparse_csf_tensor);
- return Status::OK();
- }
- case PythonType::NDARRAY: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- return DeserializeArray(ref, base, blobs, result);
- }
- case PythonType::BUFFER: {
- int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
- *result = wrap_buffer(blobs.buffers[ref]);
- return Status::OK();
- }
- default: {
- ARROW_CHECK(false) << "union tag " << type << "' not recognized";
- }
- }
- return Status::OK();
-}
-
-Status GetPythonTypes(const UnionArray& data, std::vector<int8_t>* result) {
- ARROW_CHECK(result != nullptr);
- auto type = data.type();
- for (int i = 0; i < type->num_fields(); ++i) {
- int8_t tag = 0;
- const std::string& data = type->field(i)->name();
- if (!ParseValue<Int8Type>(data.c_str(), data.size(), &tag)) {
- return Status::SerializationError("Cannot convert string: \"",
- type->field(i)->name(), "\" to
int8_t");
- }
- result->push_back(tag);
- }
- return Status::OK();
-}
-
-template <typename CreateSequenceFn, typename SetItemFn>
-Status DeserializeSequence(PyObject* context, const Array& array, int64_t
start_idx,
- int64_t stop_idx, PyObject* base,
- const SerializedPyObject& blobs,
- CreateSequenceFn&& create_sequence, SetItemFn&&
set_item,
- PyObject** out) {
- const auto& data = checked_cast<const DenseUnionArray&>(array);
- OwnedRef result(create_sequence(stop_idx - start_idx));
- RETURN_IF_PYERROR();
- const int8_t* type_codes = data.raw_type_codes();
- const int32_t* value_offsets = data.raw_value_offsets();
- std::vector<int8_t> python_types;
- RETURN_NOT_OK(GetPythonTypes(data, &python_types));
- for (int64_t i = start_idx; i < stop_idx; ++i) {
- const int64_t offset = value_offsets[i];
- const uint8_t type = type_codes[i];
- PyObject* value;
- RETURN_NOT_OK(GetValue(context, *data.field(type), offset,
python_types[type], base,
- blobs, &value));
- RETURN_NOT_OK(set_item(result.obj(), i - start_idx, value));
- }
- *out = result.detach();
- return Status::OK();
-}
-
-Status DeserializeList(PyObject* context, const Array& array, int64_t
start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out) {
- return DeserializeSequence(
- context, array, start_idx, stop_idx, base, blobs,
- [](int64_t size) { return PyList_New(size); },
- [](PyObject* seq, int64_t index, PyObject* item) {
- PyList_SET_ITEM(seq, index, item);
- return Status::OK();
- },
- out);
-}
-
-Status DeserializeTuple(PyObject* context, const Array& array, int64_t
start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out) {
- return DeserializeSequence(
- context, array, start_idx, stop_idx, base, blobs,
- [](int64_t size) { return PyTuple_New(size); },
- [](PyObject* seq, int64_t index, PyObject* item) {
- PyTuple_SET_ITEM(seq, index, item);
- return Status::OK();
- },
- out);
-}
-
-Status DeserializeSet(PyObject* context, const Array& array, int64_t start_idx,
- int64_t stop_idx, PyObject* base, const
SerializedPyObject& blobs,
- PyObject** out) {
- return DeserializeSequence(
- context, array, start_idx, stop_idx, base, blobs,
- [](int64_t size) { return PySet_New(nullptr); },
- [](PyObject* seq, int64_t index, PyObject* item) {
- int err = PySet_Add(seq, item);
- Py_DECREF(item);
- if (err < 0) {
- RETURN_IF_PYERROR();
- }
- return Status::OK();
- },
- out);
-}
-
-Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject*
out) {
- int32_t num_tensors;
- int32_t num_sparse_tensors;
- int32_t num_ndarrays;
- int32_t num_buffers;
-
- // Read number of tensors
- RETURN_NOT_OK(src->Read(sizeof(int32_t),
reinterpret_cast<uint8_t*>(&num_tensors)));
- RETURN_NOT_OK(
- src->Read(sizeof(int32_t),
reinterpret_cast<uint8_t*>(&num_sparse_tensors)));
- RETURN_NOT_OK(src->Read(sizeof(int32_t),
reinterpret_cast<uint8_t*>(&num_ndarrays)));
- RETURN_NOT_OK(src->Read(sizeof(int32_t),
reinterpret_cast<uint8_t*>(&num_buffers)));
-
- // Align stream to 8-byte offset
- RETURN_NOT_OK(ipc::AlignStream(src, ipc::kArrowIpcAlignment));
- std::shared_ptr<RecordBatchReader> reader;
- ARROW_ASSIGN_OR_RAISE(reader, ipc::RecordBatchStreamReader::Open(src));
- RETURN_NOT_OK(reader->ReadNext(&out->batch));
-
- /// Skip EOS marker
- RETURN_NOT_OK(src->Advance(4));
-
- /// Align stream so tensor bodies are 64-byte aligned
- RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
-
- for (int i = 0; i < num_tensors; ++i) {
- std::shared_ptr<Tensor> tensor;
- ARROW_ASSIGN_OR_RAISE(tensor, ipc::ReadTensor(src));
- RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
- out->tensors.push_back(tensor);
- }
-
- for (int i = 0; i < num_sparse_tensors; ++i) {
- std::shared_ptr<SparseTensor> sparse_tensor;
- ARROW_ASSIGN_OR_RAISE(sparse_tensor, ipc::ReadSparseTensor(src));
- RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
- out->sparse_tensors.push_back(sparse_tensor);
- }
-
- for (int i = 0; i < num_ndarrays; ++i) {
- std::shared_ptr<Tensor> ndarray;
- ARROW_ASSIGN_OR_RAISE(ndarray, ipc::ReadTensor(src));
- RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
- out->ndarrays.push_back(ndarray);
- }
-
- ARROW_ASSIGN_OR_RAISE(int64_t offset, src->Tell());
- for (int i = 0; i < num_buffers; ++i) {
- int64_t size;
- RETURN_NOT_OK(src->ReadAt(offset, sizeof(int64_t), &size));
- offset += sizeof(int64_t);
- ARROW_ASSIGN_OR_RAISE(auto buffer, src->ReadAt(offset, size));
- out->buffers.push_back(buffer);
- offset += size;
- }
-
- return Status::OK();
-}
-
-Status DeserializeObject(PyObject* context, const SerializedPyObject& obj,
PyObject* base,
- PyObject** out) {
- PyAcquireGIL lock;
- return DeserializeList(context, *obj.batch->column(0), 0,
obj.batch->num_rows(), base,
- obj, out);
-}
-
-Status GetSerializedFromComponents(int num_tensors,
- const SparseTensorCounts&
num_sparse_tensors,
- int num_ndarrays, int num_buffers,
PyObject* data,
- SerializedPyObject* out) {
- PyAcquireGIL gil;
- const Py_ssize_t data_length = PyList_Size(data);
- RETURN_IF_PYERROR();
-
- const Py_ssize_t expected_data_length = 1 + num_tensors * 2 +
-
num_sparse_tensors.num_total_buffers() +
- num_ndarrays * 2 + num_buffers;
- if (data_length != expected_data_length) {
- return Status::Invalid("Invalid number of buffers in data");
- }
-
- auto GetBuffer = [&data](Py_ssize_t index, std::shared_ptr<Buffer>* out) {
- ARROW_CHECK_LE(index, PyList_Size(data));
- PyObject* py_buf = PyList_GetItemRef(data, index);
- RETURN_IF_PYERROR();
- OwnedRef py_buf_ref(py_buf);
- return unwrap_buffer(py_buf).Value(out);
- };
-
- Py_ssize_t buffer_index = 0;
-
- // Read the union batch describing object structure
- {
- std::shared_ptr<Buffer> data_buffer;
- RETURN_NOT_OK(GetBuffer(buffer_index++, &data_buffer));
- gil.release();
- io::BufferReader buf_reader(data_buffer);
- std::shared_ptr<RecordBatchReader> reader;
- ARROW_ASSIGN_OR_RAISE(reader,
ipc::RecordBatchStreamReader::Open(&buf_reader));
- RETURN_NOT_OK(reader->ReadNext(&out->batch));
- gil.acquire();
- }
-
- // Zero-copy reconstruct tensors
- for (int i = 0; i < num_tensors; ++i) {
- std::shared_ptr<Buffer> metadata;
- std::shared_ptr<Buffer> body;
- std::shared_ptr<Tensor> tensor;
- RETURN_NOT_OK(GetBuffer(buffer_index++, &metadata));
- RETURN_NOT_OK(GetBuffer(buffer_index++, &body));
-
- ipc::Message message(metadata, body);
-
- ARROW_ASSIGN_OR_RAISE(tensor, ipc::ReadTensor(message));
- out->tensors.emplace_back(std::move(tensor));
- }
-
- // Zero-copy reconstruct sparse tensors
- for (int i = 0, n = num_sparse_tensors.num_total_tensors(); i < n; ++i) {
- ipc::IpcPayload payload;
- RETURN_NOT_OK(GetBuffer(buffer_index++, &payload.metadata));
-
- ARROW_ASSIGN_OR_RAISE(
- size_t num_bodies,
- ipc::internal::ReadSparseTensorBodyBufferCount(*payload.metadata));
-
- payload.body_buffers.reserve(num_bodies);
- for (size_t i = 0; i < num_bodies; ++i) {
- std::shared_ptr<Buffer> body;
- RETURN_NOT_OK(GetBuffer(buffer_index++, &body));
- payload.body_buffers.emplace_back(body);
- }
-
- std::shared_ptr<SparseTensor> sparse_tensor;
- ARROW_ASSIGN_OR_RAISE(sparse_tensor,
ipc::internal::ReadSparseTensorPayload(payload));
- out->sparse_tensors.emplace_back(std::move(sparse_tensor));
- }
-
- // Zero-copy reconstruct tensors for numpy ndarrays
- for (int i = 0; i < num_ndarrays; ++i) {
- std::shared_ptr<Buffer> metadata;
- std::shared_ptr<Buffer> body;
- std::shared_ptr<Tensor> tensor;
- RETURN_NOT_OK(GetBuffer(buffer_index++, &metadata));
- RETURN_NOT_OK(GetBuffer(buffer_index++, &body));
-
- ipc::Message message(metadata, body);
-
- ARROW_ASSIGN_OR_RAISE(tensor, ipc::ReadTensor(message));
- out->ndarrays.emplace_back(std::move(tensor));
- }
-
- // Unwrap and append buffers
- for (int i = 0; i < num_buffers; ++i) {
- std::shared_ptr<Buffer> buffer;
- RETURN_NOT_OK(GetBuffer(buffer_index++, &buffer));
- out->buffers.emplace_back(std::move(buffer));
- }
-
- return Status::OK();
-}
-
-Status DeserializeNdarray(const SerializedPyObject& object,
- std::shared_ptr<Tensor>* out) {
- if (object.ndarrays.size() != 1) {
- return Status::Invalid("Object is not an Ndarray");
- }
- *out = object.ndarrays[0];
- return Status::OK();
-}
-
-Status NdarrayFromBuffer(std::shared_ptr<Buffer> src, std::shared_ptr<Tensor>*
out) {
- io::BufferReader reader(src);
- SerializedPyObject object;
- RETURN_NOT_OK(ReadSerializedObject(&reader, &object));
- return DeserializeNdarray(object, out);
-}
-
-} // namespace py
-} // namespace arrow
diff --git a/python/pyarrow/src/arrow/python/deserialize.h
b/python/pyarrow/src/arrow/python/deserialize.h
deleted file mode 100644
index fe1d73622a..0000000000
--- a/python/pyarrow/src/arrow/python/deserialize.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "arrow/python/serialize.h"
-#include "arrow/python/visibility.h"
-#include "arrow/status.h"
-#include "arrow/util/macros.h"
-
-namespace arrow {
-
-class RecordBatch;
-class Tensor;
-
-namespace io {
-
-class RandomAccessFile;
-
-} // namespace io
-
-namespace py {
-
-struct ARROW_PYTHON_EXPORT SparseTensorCounts {
- int coo;
- int csr;
- int csc;
- int csf;
- int ndim_csf;
-
- int num_total_tensors() const { return coo + csr + csc + csf; }
- int num_total_buffers() const {
- return coo * 3 + csr * 4 + csc * 4 + 2 * ndim_csf + csf;
- }
-};
-
-/// \brief Read serialized Python sequence from file interface using Arrow IPC
-/// \param[in] src a RandomAccessFile
-/// \param[out] out the reconstructed data
-/// \return Status
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject*
out);
-
-/// \brief Reconstruct SerializedPyObject from representation produced by
-/// SerializedPyObject::GetComponents.
-///
-/// \param[in] num_tensors number of tensors in the object
-/// \param[in] num_sparse_tensors number of sparse tensors in the object
-/// \param[in] num_ndarrays number of numpy Ndarrays in the object
-/// \param[in] num_buffers number of buffers in the object
-/// \param[in] data a list containing pyarrow.Buffer instances. It must be 1 +
-/// num_tensors * 2 + num_coo_tensors * 3 + num_csr_tensors * 4 +
num_csc_tensors * 4 +
-/// num_csf_tensors * (2 * ndim_csf + 3) + num_buffers in length
-/// \param[out] out the reconstructed object
-/// \return Status
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status GetSerializedFromComponents(int num_tensors,
- const SparseTensorCounts&
num_sparse_tensors,
- int num_ndarrays, int num_buffers,
PyObject* data,
- SerializedPyObject* out);
-
-/// \brief Reconstruct Python object from Arrow-serialized representation
-/// \param[in] context Serialization context which contains custom
serialization
-/// and deserialization callbacks. Can be any Python object with a
-/// _serialize_callback method for serialization and a _deserialize_callback
-/// method for deserialization. If context is None, no custom serialization
-/// will be attempted.
-/// \param[in] object Object to deserialize
-/// \param[in] base a Python object holding the underlying data that any NumPy
-/// arrays will reference, to avoid premature deallocation
-/// \param[out] out The returned object
-/// \return Status
-/// This acquires the GIL
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
- PyObject* base, PyObject** out);
-
-/// \brief Reconstruct Ndarray from Arrow-serialized representation
-/// \param[in] object Object to deserialize
-/// \param[out] out The deserialized tensor
-/// \return Status
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status DeserializeNdarray(const SerializedPyObject& object,
std::shared_ptr<Tensor>* out);
-
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status NdarrayFromBuffer(std::shared_ptr<Buffer> src, std::shared_ptr<Tensor>*
out);
-
-} // namespace py
-} // namespace arrow
diff --git a/python/pyarrow/src/arrow/python/serialize.cc
b/python/pyarrow/src/arrow/python/serialize.cc
deleted file mode 100644
index ad079cbd9c..0000000000
--- a/python/pyarrow/src/arrow/python/serialize.cc
+++ /dev/null
@@ -1,798 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/python/serialize.h"
-#include "arrow/python/numpy_interop.h"
-
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-
-#include "arrow/array.h"
-#include "arrow/array/builder_binary.h"
-#include "arrow/array/builder_nested.h"
-#include "arrow/array/builder_primitive.h"
-#include "arrow/array/builder_union.h"
-#include "arrow/io/interfaces.h"
-#include "arrow/io/memory.h"
-#include "arrow/ipc/util.h"
-#include "arrow/ipc/writer.h"
-#include "arrow/record_batch.h"
-#include "arrow/result.h"
-#include "arrow/tensor.h"
-#include "arrow/util/logging.h"
-
-#include "arrow/python/common.h"
-#include "arrow/python/datetime.h"
-#include "arrow/python/helpers.h"
-#include "arrow/python/iterators.h"
-#include "arrow/python/numpy_convert.h"
-#include "arrow/python/platform.h"
-#include "arrow/python/pyarrow.h"
-
-constexpr int32_t kMaxRecursionDepth = 100;
-
-namespace arrow {
-
-using internal::checked_cast;
-
-namespace py {
-
-class SequenceBuilder;
-class DictBuilder;
-
-Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
- int32_t recursion_depth, SerializedPyObject* blobs_out);
-
-// A Sequence is a heterogeneous collections of elements. It can contain
-// scalar Python types, lists, tuples, dictionaries, tensors and sparse
tensors.
-class SequenceBuilder {
- public:
- explicit SequenceBuilder(MemoryPool* pool = default_memory_pool())
- : pool_(pool),
- types_(::arrow::int8(), pool),
- offsets_(::arrow::int32(), pool),
- type_map_(PythonType::NUM_PYTHON_TYPES, -1) {
- auto null_builder = std::make_shared<NullBuilder>(pool);
- auto initial_ty = dense_union({field("0", null())});
- builder_.reset(new DenseUnionBuilder(pool, {null_builder}, initial_ty));
- }
-
- // Appending a none to the sequence
- Status AppendNone() { return builder_->AppendNull(); }
-
- template <typename BuilderType, typename MakeBuilderFn>
- Status CreateAndUpdate(std::shared_ptr<BuilderType>* child_builder, int8_t
tag,
- MakeBuilderFn make_builder) {
- if (!*child_builder) {
- child_builder->reset(make_builder());
- std::ostringstream convert;
- convert.imbue(std::locale::classic());
- convert << static_cast<int>(tag);
- type_map_[tag] = builder_->AppendChild(*child_builder, convert.str());
- }
- return builder_->Append(type_map_[tag]);
- }
-
- template <typename BuilderType, typename T>
- Status AppendPrimitive(std::shared_ptr<BuilderType>* child_builder, const T
val,
- int8_t tag) {
- RETURN_NOT_OK(
- CreateAndUpdate(child_builder, tag, [this]() { return new
BuilderType(pool_); }));
- return (*child_builder)->Append(val);
- }
-
- // Appending a boolean to the sequence
- Status AppendBool(const bool data) {
- return AppendPrimitive(&bools_, data, PythonType::BOOL);
- }
-
- // Appending an int64_t to the sequence
- Status AppendInt64(const int64_t data) {
- return AppendPrimitive(&ints_, data, PythonType::INT);
- }
-
- // Append a list of bytes to the sequence
- Status AppendBytes(const uint8_t* data, int32_t length) {
- RETURN_NOT_OK(CreateAndUpdate(&bytes_, PythonType::BYTES,
- [this]() { return new BinaryBuilder(pool_);
}));
- return bytes_->Append(data, length);
- }
-
- // Appending a string to the sequence
- Status AppendString(const char* data, int32_t length) {
- RETURN_NOT_OK(CreateAndUpdate(&strings_, PythonType::STRING,
- [this]() { return new StringBuilder(pool_);
}));
- return strings_->Append(data, length);
- }
-
- // Appending a half_float to the sequence
- Status AppendHalfFloat(const npy_half data) {
- return AppendPrimitive(&half_floats_, data, PythonType::HALF_FLOAT);
- }
-
- // Appending a float to the sequence
- Status AppendFloat(const float data) {
- return AppendPrimitive(&floats_, data, PythonType::FLOAT);
- }
-
- // Appending a double to the sequence
- Status AppendDouble(const double data) {
- return AppendPrimitive(&doubles_, data, PythonType::DOUBLE);
- }
-
- // Appending a Date64 timestamp to the sequence
- Status AppendDate64(const int64_t timestamp) {
- return AppendPrimitive(&date64s_, timestamp, PythonType::DATE64);
- }
-
- // Appending a tensor to the sequence
- //
- // \param tensor_index Index of the tensor in the object.
- Status AppendTensor(const int32_t tensor_index) {
- RETURN_NOT_OK(CreateAndUpdate(&tensor_indices_, PythonType::TENSOR,
- [this]() { return new Int32Builder(pool_);
}));
- return tensor_indices_->Append(tensor_index);
- }
-
- // Appending a sparse coo tensor to the sequence
- //
- // \param sparse_coo_tensor_index Index of the sparse coo tensor in the
object.
- Status AppendSparseCOOTensor(const int32_t sparse_coo_tensor_index) {
- RETURN_NOT_OK(CreateAndUpdate(&sparse_coo_tensor_indices_,
- PythonType::SPARSECOOTENSOR,
- [this]() { return new Int32Builder(pool_);
}));
- return sparse_coo_tensor_indices_->Append(sparse_coo_tensor_index);
- }
-
- // Appending a sparse csr matrix to the sequence
- //
- // \param sparse_csr_matrix_index Index of the sparse csr matrix in the
object.
- Status AppendSparseCSRMatrix(const int32_t sparse_csr_matrix_index) {
- RETURN_NOT_OK(CreateAndUpdate(&sparse_csr_matrix_indices_,
- PythonType::SPARSECSRMATRIX,
- [this]() { return new Int32Builder(pool_);
}));
- return sparse_csr_matrix_indices_->Append(sparse_csr_matrix_index);
- }
-
- // Appending a sparse csc matrix to the sequence
- //
- // \param sparse_csc_matrix_index Index of the sparse csc matrix in the
object.
- Status AppendSparseCSCMatrix(const int32_t sparse_csc_matrix_index) {
- RETURN_NOT_OK(CreateAndUpdate(&sparse_csc_matrix_indices_,
- PythonType::SPARSECSCMATRIX,
- [this]() { return new Int32Builder(pool_);
}));
- return sparse_csc_matrix_indices_->Append(sparse_csc_matrix_index);
- }
-
- // Appending a sparse csf tensor to the sequence
- //
- // \param sparse_csf_tensor_index Index of the sparse csf tensor in the
object.
- Status AppendSparseCSFTensor(const int32_t sparse_csf_tensor_index) {
- RETURN_NOT_OK(CreateAndUpdate(&sparse_csf_tensor_indices_,
- PythonType::SPARSECSFTENSOR,
- [this]() { return new Int32Builder(pool_);
}));
- return sparse_csf_tensor_indices_->Append(sparse_csf_tensor_index);
- }
-
- // Appending a numpy ndarray to the sequence
- //
- // \param tensor_index Index of the tensor in the object.
- Status AppendNdarray(const int32_t ndarray_index) {
- RETURN_NOT_OK(CreateAndUpdate(&ndarray_indices_, PythonType::NDARRAY,
- [this]() { return new Int32Builder(pool_);
}));
- return ndarray_indices_->Append(ndarray_index);
- }
-
- // Appending a buffer to the sequence
- //
- // \param buffer_index Index of the buffer in the object.
- Status AppendBuffer(const int32_t buffer_index) {
- RETURN_NOT_OK(CreateAndUpdate(&buffer_indices_, PythonType::BUFFER,
- [this]() { return new Int32Builder(pool_);
}));
- return buffer_indices_->Append(buffer_index);
- }
-
- Status AppendSequence(PyObject* context, PyObject* sequence, int8_t tag,
- std::shared_ptr<ListBuilder>& target_sequence,
- std::unique_ptr<SequenceBuilder>& values, int32_t
recursion_depth,
- SerializedPyObject* blobs_out) {
- if (recursion_depth >= kMaxRecursionDepth) {
- return Status::NotImplemented(
- "This object exceeds the maximum recursion depth. It may contain
itself "
- "recursively.");
- }
- RETURN_NOT_OK(CreateAndUpdate(&target_sequence, tag, [this, &values]() {
- values.reset(new SequenceBuilder(pool_));
- return new ListBuilder(pool_, values->builder());
- }));
- RETURN_NOT_OK(target_sequence->Append());
- return internal::VisitIterable(
- sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
- return Append(context, obj, values.get(), recursion_depth,
blobs_out);
- });
- }
-
- Status AppendList(PyObject* context, PyObject* list, int32_t recursion_depth,
- SerializedPyObject* blobs_out) {
- return AppendSequence(context, list, PythonType::LIST, lists_,
list_values_,
- recursion_depth + 1, blobs_out);
- }
-
- Status AppendTuple(PyObject* context, PyObject* tuple, int32_t
recursion_depth,
- SerializedPyObject* blobs_out) {
- return AppendSequence(context, tuple, PythonType::TUPLE, tuples_,
tuple_values_,
- recursion_depth + 1, blobs_out);
- }
-
- Status AppendSet(PyObject* context, PyObject* set, int32_t recursion_depth,
- SerializedPyObject* blobs_out) {
- return AppendSequence(context, set, PythonType::SET, sets_, set_values_,
- recursion_depth + 1, blobs_out);
- }
-
- Status AppendDict(PyObject* context, PyObject* dict, int32_t recursion_depth,
- SerializedPyObject* blobs_out);
-
- // Finish building the sequence and return the result.
- // Input arrays may be nullptr
- Status Finish(std::shared_ptr<Array>* out) { return builder_->Finish(out); }
-
- std::shared_ptr<DenseUnionBuilder> builder() { return builder_; }
-
- private:
- MemoryPool* pool_;
-
- Int8Builder types_;
- Int32Builder offsets_;
-
- /// Mapping from PythonType to child index
- std::vector<int8_t> type_map_;
-
- std::shared_ptr<BooleanBuilder> bools_;
- std::shared_ptr<Int64Builder> ints_;
- std::shared_ptr<BinaryBuilder> bytes_;
- std::shared_ptr<StringBuilder> strings_;
- std::shared_ptr<HalfFloatBuilder> half_floats_;
- std::shared_ptr<FloatBuilder> floats_;
- std::shared_ptr<DoubleBuilder> doubles_;
- std::shared_ptr<Date64Builder> date64s_;
-
- std::unique_ptr<SequenceBuilder> list_values_;
- std::shared_ptr<ListBuilder> lists_;
- std::unique_ptr<DictBuilder> dict_values_;
- std::shared_ptr<ListBuilder> dicts_;
- std::unique_ptr<SequenceBuilder> tuple_values_;
- std::shared_ptr<ListBuilder> tuples_;
- std::unique_ptr<SequenceBuilder> set_values_;
- std::shared_ptr<ListBuilder> sets_;
-
- std::shared_ptr<Int32Builder> tensor_indices_;
- std::shared_ptr<Int32Builder> sparse_coo_tensor_indices_;
- std::shared_ptr<Int32Builder> sparse_csr_matrix_indices_;
- std::shared_ptr<Int32Builder> sparse_csc_matrix_indices_;
- std::shared_ptr<Int32Builder> sparse_csf_tensor_indices_;
- std::shared_ptr<Int32Builder> ndarray_indices_;
- std::shared_ptr<Int32Builder> buffer_indices_;
-
- std::shared_ptr<DenseUnionBuilder> builder_;
-};
-
-// Constructing dictionaries of key/value pairs. Sequences of
-// keys and values are built separately using a pair of
-// SequenceBuilders. The resulting Arrow representation
-// can be obtained via the Finish method.
-class DictBuilder {
- public:
- explicit DictBuilder(MemoryPool* pool = nullptr) : keys_(pool), vals_(pool) {
- builder_.reset(new StructBuilder(struct_({field("keys",
dense_union(FieldVector{})),
- field("vals",
dense_union(FieldVector{}))}),
- pool, {keys_.builder(),
vals_.builder()}));
- }
-
- // Builder for the keys of the dictionary
- SequenceBuilder& keys() { return keys_; }
- // Builder for the values of the dictionary
- SequenceBuilder& vals() { return vals_; }
-
- // Construct an Arrow StructArray representing the dictionary.
- // Contains a field "keys" for the keys and "vals" for the values.
- Status Finish(std::shared_ptr<Array>* out) { return builder_->Finish(out); }
-
- std::shared_ptr<StructBuilder> builder() { return builder_; }
-
- private:
- SequenceBuilder keys_;
- SequenceBuilder vals_;
- std::shared_ptr<StructBuilder> builder_;
-};
-
-Status SequenceBuilder::AppendDict(PyObject* context, PyObject* dict,
- int32_t recursion_depth,
- SerializedPyObject* blobs_out) {
- if (recursion_depth >= kMaxRecursionDepth) {
- return Status::NotImplemented(
- "This object exceeds the maximum recursion depth. It may contain
itself "
- "recursively.");
- }
- RETURN_NOT_OK(CreateAndUpdate(&dicts_, PythonType::DICT, [this]() {
- dict_values_.reset(new DictBuilder(pool_));
- return new ListBuilder(pool_, dict_values_->builder());
- }));
- RETURN_NOT_OK(dicts_->Append());
- PyObject* key;
- PyObject* value;
- Py_ssize_t pos = 0;
- while (PyDict_Next(dict, &pos, &key, &value)) {
- RETURN_NOT_OK(dict_values_->builder()->Append());
- RETURN_NOT_OK(
- Append(context, key, &dict_values_->keys(), recursion_depth + 1,
blobs_out));
- RETURN_NOT_OK(
- Append(context, value, &dict_values_->vals(), recursion_depth + 1,
blobs_out));
- }
-
- // This block is used to decrement the reference counts of the results
- // returned by the serialization callback, which is called in AppendArray,
- // in DeserializeDict and in Append
- static PyObject* py_type = PyUnicode_FromString("_pytype_");
- if (PyDict_Contains(dict, py_type)) {
- // If the dictionary contains the key "_pytype_", then the user has to
- // have registered a callback.
- if (context == Py_None) {
- return Status::Invalid("No serialization callback set");
- }
- Py_XDECREF(dict);
- }
- return Status::OK();
-}
-
-Status CallCustomCallback(PyObject* context, PyObject* method_name, PyObject*
elem,
- PyObject** result) {
- if (context == Py_None) {
- *result = NULL;
- return Status::SerializationError("error while calling callback on ",
- internal::PyObject_StdStringRepr(elem),
- ": handler not registered");
- } else {
- *result = PyObject_CallMethodObjArgs(context, method_name, elem, NULL);
- return CheckPyError();
- }
-}
-
-Status CallSerializeCallback(PyObject* context, PyObject* value,
- PyObject** serialized_object) {
- OwnedRef method_name(PyUnicode_FromString("_serialize_callback"));
- RETURN_NOT_OK(CallCustomCallback(context, method_name.obj(), value,
serialized_object));
- if (!PyDict_Check(*serialized_object)) {
- return Status::TypeError("serialization callback must return a valid
dictionary");
- }
- return Status::OK();
-}
-
-Status CallDeserializeCallback(PyObject* context, PyObject* value,
- PyObject** deserialized_object) {
- OwnedRef method_name(PyUnicode_FromString("_deserialize_callback"));
- return CallCustomCallback(context, method_name.obj(), value,
deserialized_object);
-}
-
-Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder*
builder,
- int32_t recursion_depth, SerializedPyObject* blobs_out);
-
-template <typename NumpyScalarObject>
-Status AppendIntegerScalar(PyObject* obj, SequenceBuilder* builder) {
- int64_t value = reinterpret_cast<NumpyScalarObject*>(obj)->obval;
- return builder->AppendInt64(value);
-}
-
-// Append a potentially 64-bit wide unsigned Numpy scalar.
-// Must check for overflow as we reinterpret it as signed int64.
-template <typename NumpyScalarObject>
-Status AppendLargeUnsignedScalar(PyObject* obj, SequenceBuilder* builder) {
- constexpr uint64_t max_value = std::numeric_limits<int64_t>::max();
-
- uint64_t value = reinterpret_cast<NumpyScalarObject*>(obj)->obval;
- if (value > max_value) {
- return Status::Invalid("cannot serialize Numpy uint64 scalar >= 2**63");
- }
- return builder->AppendInt64(static_cast<int64_t>(value));
-}
-
-Status AppendScalar(PyObject* obj, SequenceBuilder* builder) {
- if (PyArray_IsScalar(obj, Bool)) {
- return
builder->AppendBool(reinterpret_cast<PyBoolScalarObject*>(obj)->obval != 0);
- } else if (PyArray_IsScalar(obj, Half)) {
- return
builder->AppendHalfFloat(reinterpret_cast<PyHalfScalarObject*>(obj)->obval);
- } else if (PyArray_IsScalar(obj, Float)) {
- return
builder->AppendFloat(reinterpret_cast<PyFloatScalarObject*>(obj)->obval);
- } else if (PyArray_IsScalar(obj, Double)) {
- return
builder->AppendDouble(reinterpret_cast<PyDoubleScalarObject*>(obj)->obval);
- }
- if (PyArray_IsScalar(obj, Byte)) {
- return AppendIntegerScalar<PyByteScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, Short)) {
- return AppendIntegerScalar<PyShortScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, Int)) {
- return AppendIntegerScalar<PyIntScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, Long)) {
- return AppendIntegerScalar<PyLongScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, LongLong)) {
- return AppendIntegerScalar<PyLongLongScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, Int64)) {
- return AppendIntegerScalar<PyInt64ScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, UByte)) {
- return AppendIntegerScalar<PyUByteScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, UShort)) {
- return AppendIntegerScalar<PyUShortScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, UInt)) {
- return AppendIntegerScalar<PyUIntScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, ULong)) {
- return AppendLargeUnsignedScalar<PyULongScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, ULongLong)) {
- return AppendLargeUnsignedScalar<PyULongLongScalarObject>(obj, builder);
- } else if (PyArray_IsScalar(obj, UInt64)) {
- return AppendLargeUnsignedScalar<PyUInt64ScalarObject>(obj, builder);
- }
- return Status::NotImplemented("Numpy scalar type not recognized");
-}
-
-Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
- int32_t recursion_depth, SerializedPyObject* blobs_out) {
- // The bool case must precede the int case (PyInt_Check passes for bools)
- if (PyBool_Check(elem)) {
- RETURN_NOT_OK(builder->AppendBool(elem == Py_True));
- } else if (PyArray_DescrFromScalar(elem)->type_num == NPY_HALF) {
- npy_half halffloat = reinterpret_cast<PyHalfScalarObject*>(elem)->obval;
- RETURN_NOT_OK(builder->AppendHalfFloat(halffloat));
- } else if (PyFloat_Check(elem)) {
- RETURN_NOT_OK(builder->AppendDouble(PyFloat_AS_DOUBLE(elem)));
- } else if (PyLong_Check(elem)) {
- int overflow = 0;
- int64_t data = PyLong_AsLongLongAndOverflow(elem, &overflow);
- if (!overflow) {
- RETURN_NOT_OK(builder->AppendInt64(data));
- } else {
- // Attempt to serialize the object using the custom callback.
- PyObject* serialized_object;
- // The reference count of serialized_object will be decremented in
SerializeDict
- RETURN_NOT_OK(CallSerializeCallback(context, elem, &serialized_object));
- RETURN_NOT_OK(
- builder->AppendDict(context, serialized_object, recursion_depth,
blobs_out));
- }
- } else if (PyBytes_Check(elem)) {
- auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem));
- int32_t size = -1;
- RETURN_NOT_OK(internal::CastSize(PyBytes_GET_SIZE(elem), &size));
- RETURN_NOT_OK(builder->AppendBytes(data, size));
- } else if (PyUnicode_Check(elem)) {
- ARROW_ASSIGN_OR_RAISE(auto view, PyBytesView::FromUnicode(elem));
- int32_t size = -1;
- RETURN_NOT_OK(internal::CastSize(view.size, &size));
- RETURN_NOT_OK(builder->AppendString(view.bytes, size));
- } else if (PyList_CheckExact(elem)) {
- RETURN_NOT_OK(builder->AppendList(context, elem, recursion_depth,
blobs_out));
- } else if (PyDict_CheckExact(elem)) {
- RETURN_NOT_OK(builder->AppendDict(context, elem, recursion_depth,
blobs_out));
- } else if (PyTuple_CheckExact(elem)) {
- RETURN_NOT_OK(builder->AppendTuple(context, elem, recursion_depth,
blobs_out));
- } else if (PySet_Check(elem)) {
- RETURN_NOT_OK(builder->AppendSet(context, elem, recursion_depth,
blobs_out));
- } else if (PyArray_IsScalar(elem, Generic)) {
- RETURN_NOT_OK(AppendScalar(elem, builder));
- } else if (PyArray_CheckExact(elem)) {
- RETURN_NOT_OK(AppendArray(context, reinterpret_cast<PyArrayObject*>(elem),
builder,
- recursion_depth, blobs_out));
- } else if (elem == Py_None) {
- RETURN_NOT_OK(builder->AppendNone());
- } else if (PyDateTime_Check(elem)) {
- PyDateTime_DateTime* datetime =
reinterpret_cast<PyDateTime_DateTime*>(elem);
- RETURN_NOT_OK(builder->AppendDate64(internal::PyDateTime_to_us(datetime)));
- } else if (is_buffer(elem)) {
-
RETURN_NOT_OK(builder->AppendBuffer(static_cast<int32_t>(blobs_out->buffers.size())));
- ARROW_ASSIGN_OR_RAISE(auto buffer, unwrap_buffer(elem));
- blobs_out->buffers.push_back(buffer);
- } else if (is_tensor(elem)) {
-
RETURN_NOT_OK(builder->AppendTensor(static_cast<int32_t>(blobs_out->tensors.size())));
- ARROW_ASSIGN_OR_RAISE(auto tensor, unwrap_tensor(elem));
- blobs_out->tensors.push_back(tensor);
- } else if (is_sparse_coo_tensor(elem)) {
- RETURN_NOT_OK(builder->AppendSparseCOOTensor(
- static_cast<int32_t>(blobs_out->sparse_tensors.size())));
- ARROW_ASSIGN_OR_RAISE(auto tensor, unwrap_sparse_coo_tensor(elem));
- blobs_out->sparse_tensors.push_back(tensor);
- } else if (is_sparse_csr_matrix(elem)) {
- RETURN_NOT_OK(builder->AppendSparseCSRMatrix(
- static_cast<int32_t>(blobs_out->sparse_tensors.size())));
- ARROW_ASSIGN_OR_RAISE(auto matrix, unwrap_sparse_csr_matrix(elem));
- blobs_out->sparse_tensors.push_back(matrix);
- } else if (is_sparse_csc_matrix(elem)) {
- RETURN_NOT_OK(builder->AppendSparseCSCMatrix(
- static_cast<int32_t>(blobs_out->sparse_tensors.size())));
- ARROW_ASSIGN_OR_RAISE(auto matrix, unwrap_sparse_csc_matrix(elem));
- blobs_out->sparse_tensors.push_back(matrix);
- } else if (is_sparse_csf_tensor(elem)) {
- RETURN_NOT_OK(builder->AppendSparseCSFTensor(
- static_cast<int32_t>(blobs_out->sparse_tensors.size())));
- ARROW_ASSIGN_OR_RAISE(auto tensor, unwrap_sparse_csf_tensor(elem));
- blobs_out->sparse_tensors.push_back(tensor);
- } else {
- // Attempt to serialize the object using the custom callback.
- PyObject* serialized_object;
- // The reference count of serialized_object will be decremented in
SerializeDict
- RETURN_NOT_OK(CallSerializeCallback(context, elem, &serialized_object));
- RETURN_NOT_OK(
- builder->AppendDict(context, serialized_object, recursion_depth,
blobs_out));
- }
- return Status::OK();
-}
-
-Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder*
builder,
- int32_t recursion_depth, SerializedPyObject* blobs_out) {
- int dtype = PyArray_TYPE(array);
- switch (dtype) {
- case NPY_UINT8:
- case NPY_INT8:
- case NPY_UINT16:
- case NPY_INT16:
- case NPY_UINT32:
- case NPY_INT32:
- case NPY_UINT64:
- case NPY_INT64:
- case NPY_HALF:
- case NPY_FLOAT:
- case NPY_DOUBLE: {
- RETURN_NOT_OK(
-
builder->AppendNdarray(static_cast<int32_t>(blobs_out->ndarrays.size())));
- std::shared_ptr<Tensor> tensor;
- RETURN_NOT_OK(NdarrayToTensor(default_memory_pool(),
- reinterpret_cast<PyObject*>(array), {},
&tensor));
- blobs_out->ndarrays.push_back(tensor);
- } break;
- default: {
- PyObject* serialized_object;
- // The reference count of serialized_object will be decremented in
SerializeDict
- RETURN_NOT_OK(CallSerializeCallback(context,
reinterpret_cast<PyObject*>(array),
- &serialized_object));
- RETURN_NOT_OK(builder->AppendDict(context, serialized_object,
recursion_depth + 1,
- blobs_out));
- }
- }
- return Status::OK();
-}
-
-std::shared_ptr<RecordBatch> MakeBatch(std::shared_ptr<Array> data) {
- auto field = std::make_shared<Field>("list", data->type());
- auto schema = ::arrow::schema({field});
- return RecordBatch::Make(schema, data->length(), {data});
-}
-
-Status SerializeObject(PyObject* context, PyObject* sequence,
SerializedPyObject* out) {
- PyAcquireGIL lock;
- SequenceBuilder builder;
- RETURN_NOT_OK(internal::VisitIterable(
- sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
- return Append(context, obj, &builder, 0, out);
- }));
- std::shared_ptr<Array> array;
- RETURN_NOT_OK(builder.Finish(&array));
- out->batch = MakeBatch(array);
- return Status::OK();
-}
-
-Status SerializeNdarray(std::shared_ptr<Tensor> tensor, SerializedPyObject*
out) {
- std::shared_ptr<Array> array;
- SequenceBuilder builder;
-
RETURN_NOT_OK(builder.AppendNdarray(static_cast<int32_t>(out->ndarrays.size())));
- out->ndarrays.push_back(tensor);
- RETURN_NOT_OK(builder.Finish(&array));
- out->batch = MakeBatch(array);
- return Status::OK();
-}
-
-Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
- const std::vector<int64_t>& shape, int64_t
tensor_num_bytes,
- io::OutputStream* dst) {
- auto empty_tensor = std::make_shared<Tensor>(
- dtype, std::make_shared<Buffer>(nullptr, tensor_num_bytes), shape);
- SerializedPyObject serialized_tensor;
- RETURN_NOT_OK(SerializeNdarray(empty_tensor, &serialized_tensor));
- return serialized_tensor.WriteTo(dst);
-}
-
-SerializedPyObject::SerializedPyObject()
- : ipc_options(ipc::IpcWriteOptions::Defaults()) {}
-
-Status SerializedPyObject::WriteTo(io::OutputStream* dst) {
- int32_t num_tensors = static_cast<int32_t>(this->tensors.size());
- int32_t num_sparse_tensors =
static_cast<int32_t>(this->sparse_tensors.size());
- int32_t num_ndarrays = static_cast<int32_t>(this->ndarrays.size());
- int32_t num_buffers = static_cast<int32_t>(this->buffers.size());
- RETURN_NOT_OK(
- dst->Write(reinterpret_cast<const uint8_t*>(&num_tensors),
sizeof(int32_t)));
- RETURN_NOT_OK(
- dst->Write(reinterpret_cast<const uint8_t*>(&num_sparse_tensors),
sizeof(int32_t)));
- RETURN_NOT_OK(
- dst->Write(reinterpret_cast<const uint8_t*>(&num_ndarrays),
sizeof(int32_t)));
- RETURN_NOT_OK(
- dst->Write(reinterpret_cast<const uint8_t*>(&num_buffers),
sizeof(int32_t)));
-
- // Align stream to 8-byte offset
- RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kArrowIpcAlignment));
- RETURN_NOT_OK(ipc::WriteRecordBatchStream({this->batch}, this->ipc_options,
dst));
-
- // Align stream to 64-byte offset so tensor bodies are 64-byte aligned
- RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
-
- int32_t metadata_length;
- int64_t body_length;
- for (const auto& tensor : this->tensors) {
- RETURN_NOT_OK(ipc::WriteTensor(*tensor, dst, &metadata_length,
&body_length));
- RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
- }
-
- for (const auto& sparse_tensor : this->sparse_tensors) {
- RETURN_NOT_OK(
- ipc::WriteSparseTensor(*sparse_tensor, dst, &metadata_length,
&body_length));
- RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
- }
-
- for (const auto& tensor : this->ndarrays) {
- RETURN_NOT_OK(ipc::WriteTensor(*tensor, dst, &metadata_length,
&body_length));
- RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
- }
-
- for (const auto& buffer : this->buffers) {
- int64_t size = buffer->size();
- RETURN_NOT_OK(dst->Write(reinterpret_cast<const uint8_t*>(&size),
sizeof(int64_t)));
- RETURN_NOT_OK(dst->Write(buffer->data(), size));
- }
-
- return Status::OK();
-}
-
-namespace {
-
-Status CountSparseTensors(
- const std::vector<std::shared_ptr<SparseTensor>>& sparse_tensors,
PyObject** out) {
- OwnedRef num_sparse_tensors(PyDict_New());
- size_t num_coo = 0;
- size_t num_csr = 0;
- size_t num_csc = 0;
- size_t num_csf = 0;
- size_t ndim_csf = 0;
-
- for (const auto& sparse_tensor : sparse_tensors) {
- switch (sparse_tensor->format_id()) {
- case SparseTensorFormat::COO:
- ++num_coo;
- break;
- case SparseTensorFormat::CSR:
- ++num_csr;
- break;
- case SparseTensorFormat::CSC:
- ++num_csc;
- break;
- case SparseTensorFormat::CSF:
- ++num_csf;
- ndim_csf += sparse_tensor->ndim();
- break;
- }
- }
-
- PyDict_SetItemString(num_sparse_tensors.obj(), "coo",
PyLong_FromSize_t(num_coo));
- PyDict_SetItemString(num_sparse_tensors.obj(), "csr",
PyLong_FromSize_t(num_csr));
- PyDict_SetItemString(num_sparse_tensors.obj(), "csc",
PyLong_FromSize_t(num_csc));
- PyDict_SetItemString(num_sparse_tensors.obj(), "csf",
PyLong_FromSize_t(num_csf));
- PyDict_SetItemString(num_sparse_tensors.obj(), "ndim_csf",
PyLong_FromSize_t(ndim_csf));
- RETURN_IF_PYERROR();
-
- *out = num_sparse_tensors.detach();
- return Status::OK();
-}
-
-} // namespace
-
-Status SerializedPyObject::GetComponents(MemoryPool* memory_pool, PyObject**
out) {
- PyAcquireGIL py_gil;
-
- OwnedRef result(PyDict_New());
- PyObject* buffers = PyList_New(0);
- PyObject* num_sparse_tensors = nullptr;
-
- // TODO(wesm): Not sure how pedantic we need to be about checking the return
- // values of these functions. There are other places where we do not check
- // PyDict_SetItem/SetItemString return value, but these failures would be
- // quite esoteric
- PyDict_SetItemString(result.obj(), "num_tensors",
- PyLong_FromSize_t(this->tensors.size()));
- RETURN_NOT_OK(CountSparseTensors(this->sparse_tensors, &num_sparse_tensors));
- PyDict_SetItemString(result.obj(), "num_sparse_tensors", num_sparse_tensors);
- PyDict_SetItemString(result.obj(), "ndim_csf", num_sparse_tensors);
- PyDict_SetItemString(result.obj(), "num_ndarrays",
- PyLong_FromSize_t(this->ndarrays.size()));
- PyDict_SetItemString(result.obj(), "num_buffers",
- PyLong_FromSize_t(this->buffers.size()));
- PyDict_SetItemString(result.obj(), "data", buffers);
- RETURN_IF_PYERROR();
-
- Py_DECREF(buffers);
-
- auto PushBuffer = [&buffers](const std::shared_ptr<Buffer>& buffer) {
- PyObject* wrapped_buffer = wrap_buffer(buffer);
- RETURN_IF_PYERROR();
- if (PyList_Append(buffers, wrapped_buffer) < 0) {
- Py_DECREF(wrapped_buffer);
- RETURN_IF_PYERROR();
- }
- Py_DECREF(wrapped_buffer);
- return Status::OK();
- };
-
- constexpr int64_t kInitialCapacity = 1024;
-
- // Write the record batch describing the object structure
- py_gil.release();
- ARROW_ASSIGN_OR_RAISE(auto stream,
- io::BufferOutputStream::Create(kInitialCapacity,
memory_pool));
- RETURN_NOT_OK(
- ipc::WriteRecordBatchStream({this->batch}, this->ipc_options,
stream.get()));
- ARROW_ASSIGN_OR_RAISE(auto buffer, stream->Finish());
- py_gil.acquire();
-
- RETURN_NOT_OK(PushBuffer(buffer));
-
- // For each tensor, get a metadata buffer and a buffer for the body
- for (const auto& tensor : this->tensors) {
- ARROW_ASSIGN_OR_RAISE(std::unique_ptr<ipc::Message> message,
- ipc::GetTensorMessage(*tensor, memory_pool));
- RETURN_NOT_OK(PushBuffer(message->metadata()));
- RETURN_NOT_OK(PushBuffer(message->body()));
- }
-
- // For each sparse tensor, get a metadata buffer and buffers containing
index and data
- for (const auto& sparse_tensor : this->sparse_tensors) {
- ipc::IpcPayload payload;
- RETURN_NOT_OK(ipc::GetSparseTensorPayload(*sparse_tensor, memory_pool,
&payload));
- RETURN_NOT_OK(PushBuffer(payload.metadata));
- for (const auto& body : payload.body_buffers) {
- RETURN_NOT_OK(PushBuffer(body));
- }
- }
-
- // For each ndarray, get a metadata buffer and a buffer for the body
- for (const auto& ndarray : this->ndarrays) {
- ARROW_ASSIGN_OR_RAISE(std::unique_ptr<ipc::Message> message,
- ipc::GetTensorMessage(*ndarray, memory_pool));
- RETURN_NOT_OK(PushBuffer(message->metadata()));
- RETURN_NOT_OK(PushBuffer(message->body()));
- }
-
- for (const auto& buf : this->buffers) {
- RETURN_NOT_OK(PushBuffer(buf));
- }
-
- *out = result.detach();
- return Status::OK();
-}
-
-} // namespace py
-} // namespace arrow
diff --git a/python/pyarrow/src/arrow/python/serialize.h
b/python/pyarrow/src/arrow/python/serialize.h
deleted file mode 100644
index af6d2d81a6..0000000000
--- a/python/pyarrow/src/arrow/python/serialize.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <vector>
-
-#include "arrow/ipc/options.h"
-#include "arrow/python/visibility.h"
-#include "arrow/sparse_tensor.h"
-#include "arrow/status.h"
-#include "arrow/util/macros.h"
-
-// Forward declaring PyObject, see
-// https://mail.python.org/pipermail/python-dev/2003-August/037601.html
-#ifndef PyObject_HEAD
-struct _object;
-typedef _object PyObject;
-#endif
-
-namespace arrow {
-
-class Buffer;
-class DataType;
-class MemoryPool;
-class RecordBatch;
-class Tensor;
-
-namespace io {
-
-class OutputStream;
-
-} // namespace io
-
-namespace py {
-
-struct ARROW_PYTHON_EXPORT SerializedPyObject {
- std::shared_ptr<RecordBatch> batch;
- std::vector<std::shared_ptr<Tensor>> tensors;
- std::vector<std::shared_ptr<SparseTensor>> sparse_tensors;
- std::vector<std::shared_ptr<Tensor>> ndarrays;
- std::vector<std::shared_ptr<Buffer>> buffers;
- ipc::IpcWriteOptions ipc_options;
-
- SerializedPyObject();
-
- /// \brief Write serialized Python object to OutputStream
- /// \param[in,out] dst an OutputStream
- /// \return Status
- Status WriteTo(io::OutputStream* dst);
-
- /// \brief Convert SerializedPyObject to a dict containing the message
- /// components as Buffer instances with minimal memory allocation
- ///
- /// {
- /// 'num_tensors': M,
- /// 'num_sparse_tensors': N,
- /// 'num_buffers': K,
- /// 'data': [Buffer]
- /// }
- ///
- /// Each tensor is written as two buffers, one for the metadata and one for
- /// the body. Therefore, the number of buffers in 'data' is 2 * M + 2 * N +
K + 1,
- /// with the first buffer containing the serialized record batch containing
- /// the UnionArray that describes the whole object
- Status GetComponents(MemoryPool* pool, PyObject** out);
-};
-
-/// \brief Serialize Python sequence as a SerializedPyObject.
-/// \param[in] context Serialization context which contains custom
serialization
-/// and deserialization callbacks. Can be any Python object with a
-/// _serialize_callback method for serialization and a _deserialize_callback
-/// method for deserialization. If context is None, no custom serialization
-/// will be attempted.
-/// \param[in] sequence A Python sequence object to serialize to Arrow data
-/// structures
-/// \param[out] out The serialized representation
-/// \return Status
-///
-/// Release GIL before calling
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status SerializeObject(PyObject* context, PyObject* sequence,
SerializedPyObject* out);
-
-/// \brief Serialize an Arrow Tensor as a SerializedPyObject.
-/// \param[in] tensor Tensor to be serialized
-/// \param[out] out The serialized representation
-/// \return Status
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject*
out);
-
-/// \brief Write the Tensor metadata header to an OutputStream.
-/// \param[in] dtype DataType of the Tensor
-/// \param[in] shape The shape of the tensor
-/// \param[in] tensor_num_bytes The length of the Tensor data in bytes
-/// \param[in] dst The OutputStream to write the Tensor header to
-/// \return Status
-ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
-ARROW_PYTHON_EXPORT
-Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
- const std::vector<int64_t>& shape, int64_t
tensor_num_bytes,
- io::OutputStream* dst);
-
-struct PythonType {
- enum type {
- NONE,
- BOOL,
- INT,
- PY2INT, // Kept for compatibility
- BYTES,
- STRING,
- HALF_FLOAT,
- FLOAT,
- DOUBLE,
- DATE64,
- LIST,
- DICT,
- TUPLE,
- SET,
- TENSOR,
- NDARRAY,
- BUFFER,
- SPARSECOOTENSOR,
- SPARSECSRMATRIX,
- SPARSECSCMATRIX,
- SPARSECSFTENSOR,
- NUM_PYTHON_TYPES
- };
-};
-
-} // namespace py
-
-} // namespace arrow