[
https://issues.apache.org/jira/browse/ARROW-2145?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16382770#comment-16382770
]
ASF GitHub Bot commented on ARROW-2145:
---------------------------------------
wesm closed pull request #1651:
ARROW-2145/ARROW-2153/ARROW-2157/ARROW-2160/ARROW-2177: [Python] Decimal
conversion not working for NaN values
URL: https://github.com/apache/arrow/pull/1651
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/.travis.yml b/.travis.yml
index a4c74657e..b1241e793 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -174,7 +174,7 @@ matrix:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh
- # [OS X] C++ & glib w/ XCode 8.3 & autotools
+ # [OS X] C++ & glib w/ XCode 8.3 & autotools & homebrew
- compiler: clang
osx_image: xcode8.3
os: osx
@@ -185,7 +185,8 @@ matrix:
- BUILD_SYSTEM=autotools
before_script:
- if [ $ARROW_CI_C_GLIB_AFFECTED != "1" ]; then exit; fi
- - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
+ - $TRAVIS_BUILD_DIR/ci/travis_install_osx.sh
+ - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
--homebrew
- $TRAVIS_BUILD_DIR/ci/travis_before_script_c_glib.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_c_glib.sh
diff --git a/c_glib/Brewfile b/c_glib/Brewfile
index 9fe5c3b61..955072e1e 100644
--- a/c_glib/Brewfile
+++ b/c_glib/Brewfile
@@ -16,7 +16,7 @@
# under the License.
brew "autoconf-archive"
-brew "boost"
+brew "boost", args: ["1.65.0"]
brew "ccache"
brew "cmake"
brew "git"
diff --git a/ci/travis_before_script_c_glib.sh
b/ci/travis_before_script_c_glib.sh
index 27d1e86fd..033fbd7c6 100755
--- a/ci/travis_before_script_c_glib.sh
+++ b/ci/travis_before_script_c_glib.sh
@@ -21,9 +21,7 @@ set -ex
source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-if [ $TRAVIS_OS_NAME = "osx" ]; then
- brew update && brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile
-else # Linux
+if [ $TRAVIS_OS_NAME = "linux" ]; then
sudo apt-get install -y -q gtk-doc-tools autoconf-archive
libgirepository1.0-dev
fi
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 17b5deb36..b9afbee78 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -22,10 +22,22 @@ set -ex
source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-if [ "$1" == "--only-library" ]; then
- only_library_mode=yes
-else
- only_library_mode=no
+only_library_mode=no
+using_homebrew=no
+
+while true; do
+ case "$1" in
+ --only-library)
+ only_library_mode=yes
+ shift ;;
+ --homebrew)
+ using_homebrew=yes
+ shift ;;
+ *) break ;;
+ esac
+done
+
+if [ "$only_library_mode" == "no" ]; then
source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
fi
@@ -78,6 +90,10 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then
-DBUILD_WARNING_LEVEL=$ARROW_BUILD_WARNING_LEVEL \
$ARROW_CPP_DIR
else
+ if [ "$using_homebrew" = "yes" ]; then
+ # build against homebrew's boost if we're using it
+ export BOOST_ROOT=/usr/local/opt/boost
+ fi
cmake $CMAKE_COMMON_FLAGS \
$CMAKE_OSX_FLAGS \
-DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
diff --git a/ci/travis_build_parquet_cpp.sh b/ci/travis_build_parquet_cpp.sh
index 7d2e3ab73..f64a85d62 100755
--- a/ci/travis_build_parquet_cpp.sh
+++ b/ci/travis_build_parquet_cpp.sh
@@ -38,7 +38,7 @@ cmake \
-GNinja \
-DCMAKE_BUILD_TYPE=debug \
-DCMAKE_INSTALL_PREFIX=$ARROW_PYTHON_PARQUET_HOME \
- -DPARQUET_BOOST_USE_SHARED=off \
+ -DPARQUET_BOOST_USE_SHARED=on \
-DPARQUET_BUILD_BENCHMARKS=off \
-DPARQUET_BUILD_EXECUTABLES=off \
-DPARQUET_BUILD_TESTS=off \
diff --git a/ci/travis_install_linux.sh b/ci/travis_install_linux.sh
index acee9ebcb..74fde2774 100755
--- a/ci/travis_install_linux.sh
+++ b/ci/travis_install_linux.sh
@@ -19,7 +19,7 @@
sudo apt-get install -y -q \
gdb ccache libboost-dev libboost-filesystem-dev \
- libboost-system-dev libjemalloc-dev
+ libboost-system-dev libboost-regex-dev libjemalloc-dev
if [ "$ARROW_TRAVIS_VALGRIND" == "1" ]; then
sudo apt-get install -y -q valgrind
diff --git a/ci/travis_install_osx.sh b/ci/travis_install_osx.sh
new file mode 100755
index 000000000..b03a5f16a
--- /dev/null
+++ b/ci/travis_install_osx.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if [ "$ARROW_CI_C_GLIB_AFFECTED" = "1" ]; then
+ brew update
+ brew bundle --file=$TRAVIS_BUILD_DIR/c_glib/Brewfile
+fi
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8c0e95634..47692a831 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -574,11 +574,13 @@ set(ARROW_LINK_LIBS
set(ARROW_SHARED_PRIVATE_LINK_LIBS
${BOOST_SYSTEM_LIBRARY}
- ${BOOST_FILESYSTEM_LIBRARY})
+ ${BOOST_FILESYSTEM_LIBRARY}
+ ${BOOST_REGEX_LIBRARY})
set(ARROW_STATIC_PRIVATE_LINK_LIBS
${BOOST_SYSTEM_LIBRARY}
- ${BOOST_FILESYSTEM_LIBRARY})
+ ${BOOST_FILESYSTEM_LIBRARY}
+ ${BOOST_REGEX_LIBRARY})
if (NOT MSVC)
set(ARROW_LINK_LIBS
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 944ca1d3b..4103af41b 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -157,8 +157,11 @@ if (ARROW_BOOST_VENDORED)
"${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_system${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(BOOST_STATIC_FILESYSTEM_LIBRARY
"${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_filesystem${CMAKE_STATIC_LIBRARY_SUFFIX}")
+ set(BOOST_STATIC_REGEX_LIBRARY
+
"${BOOST_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}boost_regex${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(BOOST_SYSTEM_LIBRARY "${BOOST_STATIC_SYSTEM_LIBRARY}")
set(BOOST_FILESYSTEM_LIBRARY "${BOOST_STATIC_FILESYSTEM_LIBRARY}")
+ set(BOOST_REGEX_LIBRARY "${BOOST_STATIC_REGEX_LIBRARY}")
if (ARROW_BOOST_HEADER_ONLY)
set(BOOST_BUILD_PRODUCTS)
set(BOOST_CONFIGURE_COMMAND "")
@@ -166,7 +169,8 @@ if (ARROW_BOOST_VENDORED)
else()
set(BOOST_BUILD_PRODUCTS
${BOOST_SYSTEM_LIBRARY}
- ${BOOST_FILESYSTEM_LIBRARY})
+ ${BOOST_FILESYSTEM_LIBRARY}
+ ${BOOST_REGEX_LIBRARY})
set(BOOST_CONFIGURE_COMMAND
"./bootstrap.sh"
"--prefix=${BOOST_PREFIX}"
@@ -210,16 +214,19 @@ else()
if (ARROW_BOOST_HEADER_ONLY)
find_package(Boost REQUIRED)
else()
- find_package(Boost COMPONENTS system filesystem REQUIRED)
+ find_package(Boost COMPONENTS system filesystem regex REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+ set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_SHARED_FILESYSTEM_LIBRARY
${Boost_FILESYSTEM_LIBRARY_RELEASE})
+ set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_shared)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
+ set(BOOST_REGEX_LIBRARY boost_regex_shared)
endif()
else()
# Find static boost headers and libs
@@ -228,16 +235,19 @@ else()
if (ARROW_BOOST_HEADER_ONLY)
find_package(Boost REQUIRED)
else()
- find_package(Boost COMPONENTS system filesystem REQUIRED)
+ find_package(Boost COMPONENTS system filesystem regex REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+ set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_STATIC_FILESYSTEM_LIBRARY
${Boost_FILESYSTEM_LIBRARY_RELEASE})
+ set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_static)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
+ set(BOOST_REGEX_LIBRARY boost_regex_static)
endif()
endif()
endif()
@@ -254,7 +264,11 @@ if (NOT ARROW_BOOST_HEADER_ONLY)
STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
- SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
+ ADD_THIRDPARTY_LIB(boost_regex
+ STATIC_LIB "${BOOST_STATIC_REGEX_LIBRARY}"
+ SHARED_LIB "${BOOST_SHARED_REGEX_LIBRARY}")
+
+ SET(ARROW_BOOST_LIBS boost_system boost_filesystem boost_regex)
endif()
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc
b/cpp/src/arrow/python/arrow_to_pandas.cc
index aefd4d76d..17b87bf4e 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -640,11 +640,11 @@ static Status ConvertTimes(PandasOptions options, const
ChunkedArray& data,
static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
PyObject** out_values) {
PyAcquireGIL lock;
- OwnedRef decimal_ref;
- OwnedRef Decimal_ref;
- RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref));
- RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal",
&Decimal_ref));
- PyObject* Decimal = Decimal_ref.obj();
+ OwnedRef decimal;
+ OwnedRef Decimal;
+ RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
+ RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
+ PyObject* decimal_constructor = Decimal.obj();
for (int c = 0; c < data.num_chunks(); c++) {
const auto& arr = static_cast<const
arrow::Decimal128Array&>(*data.chunk(c));
@@ -654,7 +654,8 @@ static Status ConvertDecimals(PandasOptions options, const
ChunkedArray& data,
Py_INCREF(Py_None);
*out_values++ = Py_None;
} else {
- *out_values++ = internal::DecimalFromString(Decimal,
arr.FormatValue(i));
+ *out_values++ =
+ internal::DecimalFromString(decimal_constructor,
arr.FormatValue(i));
RETURN_IF_PYERROR();
}
}
diff --git a/cpp/src/arrow/python/builtin_convert.cc
b/cpp/src/arrow/python/builtin_convert.cc
index a286c6bd5..d2f900f6a 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -76,7 +76,15 @@ class ScalarVisitor {
timestamp_count_(0),
float_count_(0),
binary_count_(0),
- unicode_count_(0) {}
+ unicode_count_(0),
+ decimal_count_(0),
+ max_decimal_metadata_(std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<int32_t>::min()),
+ decimal_type_() {
+ PyAcquireGIL lock;
+ Status status = internal::ImportDecimalType(&decimal_type_);
+ DCHECK_OK(status);
+ }
Status Visit(PyObject* obj) {
++total_count_;
@@ -111,10 +119,13 @@ class ScalarVisitor {
ss << type->ToString();
return Status::Invalid(ss.str());
}
+ } else if (PyObject_IsInstance(obj, decimal_type_.obj())) {
+ RETURN_NOT_OK(max_decimal_metadata_.Update(obj));
+ ++decimal_count_;
} else {
// TODO(wesm): accumulate error information somewhere
static std::string supported_types =
- "bool, float, integer, date, datetime, bytes, unicode";
+ "bool, float, integer, date, datetime, bytes, unicode, decimal";
std::stringstream ss;
ss << "Error inferring Arrow data type for collection of Python objects.
";
RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss));
@@ -125,7 +136,9 @@ class ScalarVisitor {
std::shared_ptr<DataType> GetType() {
// TODO(wesm): handling mixed-type cases
- if (float_count_) {
+ if (decimal_count_) {
+ return decimal(max_decimal_metadata_.precision(),
max_decimal_metadata_.scale());
+ } else if (float_count_) {
return float64();
} else if (int_count_) {
// TODO(wesm): tighter type later
@@ -157,8 +170,13 @@ class ScalarVisitor {
int64_t float_count_;
int64_t binary_count_;
int64_t unicode_count_;
+ int64_t decimal_count_;
+
+ internal::DecimalMetadata max_decimal_metadata_;
+
// Place to accumulate errors
// std::vector<Status> errors_;
+ OwnedRefNoGIL decimal_type_;
};
static constexpr int MAX_NESTING_LEVELS = 32;
@@ -379,17 +397,14 @@ class TypedConverter : public SeqConverter {
BuilderType* typed_builder_;
};
-// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull()
+// We use the CRTP trick here to devirtualize the AppendItem(), AppendNull(),
and IsNull()
// method calls.
template <typename BuilderType, class Derived>
class TypedConverterVisitor : public TypedConverter<BuilderType> {
public:
Status AppendSingle(PyObject* obj) override {
- if (obj == Py_None) {
- return static_cast<Derived*>(this)->AppendNull();
- } else {
- return static_cast<Derived*>(this)->AppendItem(obj);
- }
+ auto self = static_cast<Derived*>(this);
+ return self->IsNull(obj) ? self->AppendNull() : self->AppendItem(obj);
}
Status AppendMultiple(PyObject* obj, int64_t size) override {
@@ -409,6 +424,7 @@ class TypedConverterVisitor : public
TypedConverter<BuilderType> {
// Append a missing item (default implementation)
Status AppendNull() { return this->typed_builder_->AppendNull(); }
+ bool IsNull(PyObject* obj) const { return obj == Py_None; }
};
class NullConverter : public TypedConverterVisitor<NullBuilder, NullConverter>
{
@@ -830,12 +846,16 @@ class DecimalConverter
public:
// Append a non-missing item
Status AppendItem(PyObject* obj) {
- /// TODO(phillipc): Check for nan?
Decimal128 value;
const auto& type = static_cast<const
DecimalType&>(*typed_builder_->type());
RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value));
return typed_builder_->Append(value);
}
+
+ bool IsNull(PyObject* obj) const {
+ return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj)
||
+ (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj));
+ }
};
// Dynamic constructor for sequence converters
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index df1db9991..429068dd1 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+#include <algorithm>
+#include <limits>
#include <sstream>
#include "arrow/python/common.h"
@@ -61,6 +63,7 @@ namespace internal {
Status ImportModule(const std::string& module_name, OwnedRef* ref) {
PyObject* module = PyImport_ImportModule(module_name.c_str());
RETURN_IF_PYERROR();
+ DCHECK_NE(module, nullptr) << "unable to import the " << module_name << "
module";
ref->reset(module);
return Status::OK();
}
@@ -71,10 +74,18 @@ Status ImportFromModule(const OwnedRef& module, const
std::string& name, OwnedRe
PyObject* attr = PyObject_GetAttrString(module.obj(), name.c_str());
RETURN_IF_PYERROR();
+ DCHECK_NE(attr, nullptr) << "unable to import the " << name << " object";
ref->reset(attr);
return Status::OK();
}
+Status ImportDecimalType(OwnedRef* decimal_type) {
+ OwnedRef decimal_module;
+ RETURN_NOT_OK(ImportModule("decimal", &decimal_module));
+ RETURN_NOT_OK(ImportFromModule(decimal_module, "Decimal", decimal_type));
+ return Status::OK();
+}
+
Status PythonDecimalToString(PyObject* python_decimal, std::string* out) {
// Call Python's str(decimal_object)
OwnedRef str_obj(PyObject_Str(python_decimal));
@@ -93,13 +104,19 @@ Status PythonDecimalToString(PyObject* python_decimal,
std::string* out) {
return Status::OK();
}
-Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t*
precision,
- int32_t* scale) {
+// \brief Infer the precision and scale of a Python decimal.Decimal instance
+// \param python_decimal[in] An instance of decimal.Decimal
+// \param precision[out] The value of the inferred precision
+// \param scale[out] The value of the inferred scale
+// \return The status of the operation
+static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t*
precision,
+ int32_t* scale) {
DCHECK_NE(python_decimal, NULLPTR);
DCHECK_NE(precision, NULLPTR);
DCHECK_NE(scale, NULLPTR);
- OwnedRef as_tuple(PyObject_CallMethod(python_decimal, "as_tuple", "()"));
+ // TODO(phillipc): Make sure we perform PyDecimal_Check(python_decimal) as a
DCHECK
+ OwnedRef as_tuple(PyObject_CallMethod(python_decimal, "as_tuple", ""));
RETURN_IF_PYERROR();
DCHECK(PyTuple_Check(as_tuple.obj()));
@@ -117,8 +134,23 @@ Status InferDecimalPrecisionAndScale(PyObject*
python_decimal, int32_t* precisio
const auto exponent = static_cast<int32_t>(PyLong_AsLong(py_exponent.obj()));
RETURN_IF_PYERROR();
- *precision = num_digits;
- *scale = -exponent;
+ const int32_t abs_exponent = std::abs(exponent);
+
+ int32_t num_additional_zeros;
+
+ if (num_digits <= abs_exponent) {
+ DCHECK_NE(exponent, 0) << "exponent should never be zero here";
+
+ // we have leading/trailing zeros, leading if exponent is negative
+ num_additional_zeros = exponent < 0 ? abs_exponent - num_digits : exponent;
+ *scale = static_cast<int32_t>(exponent < 0) * -exponent;
+ } else {
+ // we can use the number of digits as the precision
+ num_additional_zeros = 0;
+ *scale = -exponent;
+ }
+
+ *precision = num_digits + num_additional_zeros;
return Status::OK();
}
@@ -193,6 +225,62 @@ Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
return Status::OK();
}
+bool PyFloat_isnan(PyObject* obj) {
+ return PyFloat_Check(obj) && std::isnan(PyFloat_AS_DOUBLE(obj));
+}
+
+bool PyDecimal_Check(PyObject* obj) {
+ // TODO(phillipc): Is this expensive?
+ OwnedRef Decimal;
+ Status status = ImportDecimalType(&Decimal);
+ DCHECK_OK(status);
+ const int32_t result = PyObject_IsInstance(obj, Decimal.obj());
+ DCHECK_NE(result, -1) << " error during PyObject_IsInstance check";
+ return result == 1;
+}
+
+bool PyDecimal_ISNAN(PyObject* obj) {
+ DCHECK(PyDecimal_Check(obj)) << "obj is not an instance of decimal.Decimal";
+ OwnedRef is_nan(PyObject_CallMethod(obj, "is_nan", ""));
+ return PyObject_IsTrue(is_nan.obj()) == 1;
+}
+
+DecimalMetadata::DecimalMetadata()
+ : DecimalMetadata(std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<int32_t>::min()) {}
+
+DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale)
+ : precision_(precision), scale_(scale) {}
+
+Status DecimalMetadata::Update(int32_t suggested_precision, int32_t
suggested_scale) {
+ const int32_t current_precision = precision_;
+ precision_ = std::max(current_precision, suggested_precision);
+
+ const int32_t current_scale = scale_;
+ scale_ = std::max(current_scale, suggested_scale);
+
+ // if our suggested scale is zero and we don't yet have enough precision
then we need to
+ // add whatever the current scale is to the precision
+ if (suggested_scale == 0 && suggested_precision > current_precision) {
+ precision_ += scale_;
+ }
+
+ return Status::OK();
+}
+
+Status DecimalMetadata::Update(PyObject* object) {
+ DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal";
+
+ if (ARROW_PREDICT_FALSE(PyDecimal_ISNAN(object))) {
+ return Status::OK();
+ }
+
+ int32_t precision;
+ int32_t scale;
+ RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
+ return Update(precision, scale);
+}
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c0171aa2f..6be0e49b1 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -36,29 +36,92 @@ namespace py {
class OwnedRef;
-ARROW_EXPORT
-std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+// \brief Get an arrow DataType instance from Arrow's Type::type enum
+// \param[in] type One of the values of Arrow's Type::type enum
+// \return A shared pointer to DataType
+ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
namespace internal {
+// \brief Import a Python module
+// \param[in] module_name The name of the module
+// \param[out] ref The OwnedRef containing the module PyObject*
Status ImportModule(const std::string& module_name, OwnedRef* ref);
-Status ImportFromModule(const OwnedRef& module, const std::string& module_name,
- OwnedRef* ref);
-Status PythonDecimalToString(PyObject* python_decimal, std::string* out);
+// \brief Import an object from a Python module
+// \param[in] module A Python module
+// \param[in] name The name of the object to import
+// \param[out] ref The OwnedRef containing the \c name attribute of the Python
module \c
+// module
+Status ImportFromModule(const OwnedRef& module, const std::string& name,
OwnedRef* ref);
+
+// \brief Import
+Status ImportDecimalType(OwnedRef* decimal_type);
-Status InferDecimalPrecisionAndScale(PyObject* python_decimal,
- int32_t* precision = NULLPTR,
- int32_t* scale = NULLPTR);
+// \brief Convert a Python Decimal object to a C++ string
+// \param[in] python_decimal A Python decimal.Decimal instance
+// \param[out] The string representation of the Python Decimal instance
+// \return The status of the operation
+Status PythonDecimalToString(PyObject* python_decimal, std::string* out);
+// \brief Convert a C++ std::string to a Python Decimal instance
+// \param[in] decimal_constructor The decimal type object
+// \param[in] decimal_string A decimal string
+// \return An instance of decimal.Decimal
PyObject* DecimalFromString(PyObject* decimal_constructor,
const std::string& decimal_string);
+
+// \brief Convert a Python decimal to an Arrow Decimal128 object
+// \param[in] python_decimal A Python decimal.Decimal instance
+// \param[in] arrow_type An instance of arrow::DecimalType
+// \param[out] out A pointer to a Decimal128
+// \return The status of the operation
Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType&
arrow_type,
Decimal128* out);
+
+// \brief Check whether obj is an integer, independent of Python versions.
bool IsPyInteger(PyObject* obj);
+// \brief Check whether obj is nan
+bool PyFloat_isnan(PyObject* obj);
+
+// \brief Check whether obj is an instance of Decimal
+bool PyDecimal_Check(PyObject* obj);
+
+// \brief Check whether obj is nan. This function will abort the program if
the argument
+// is not a Decimal instance
+bool PyDecimal_ISNAN(PyObject* obj);
+
+// \brief Convert a Python integer into an unsigned 64-bit integer
+// \param[in] obj A Python integer
+// \param[out] out A pointer to a C uint64_t to hold the result of the
conversion
+// \return The status of the operation
Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);
+// \brief Helper class to track and update the precision and scale of a decimal
+class DecimalMetadata {
+ public:
+ DecimalMetadata();
+ DecimalMetadata(int32_t precision, int32_t scale);
+
+ // \brief Adjust the precision and scale of a decimal type given a new
precision and a
+ // new scale \param[in] suggested_precision A candidate precision \param[in]
+ // suggested_scale A candidate scale \return The status of the operation
+ Status Update(int32_t suggested_precision, int32_t suggested_scale);
+
+ // \brief A convenient interface for updating the precision and scale based
on a Python
+ // Decimal object \param object A Python Decimal object \return The status
of the
+ // operation
+ Status Update(PyObject* object);
+
+ int32_t precision() const { return precision_; }
+ int32_t scale() const { return scale_; }
+
+ private:
+ int32_t precision_;
+ int32_t scale_;
+};
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/numpy-internal.h
b/cpp/src/arrow/python/numpy-internal.h
index 6c9c871a1..8d4308065 100644
--- a/cpp/src/arrow/python/numpy-internal.h
+++ b/cpp/src/arrow/python/numpy-internal.h
@@ -54,6 +54,9 @@ class Ndarray1DIndexer {
T* data() const { return data_; }
+ T* begin() const { return data(); }
+ T* end() const { return begin() + size() * stride_; }
+
bool is_strided() const { return stride_ == 1; }
T& operator[](size_type index) { return data_[index * stride_]; }
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc
b/cpp/src/arrow/python/numpy_to_arrow.cc
index 23418ad92..04a71c1f6 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -67,17 +67,9 @@ constexpr int64_t kBinaryMemoryLimit =
std::numeric_limits<int32_t>::max();
namespace {
-inline bool PyFloat_isnan(PyObject* obj) {
- if (PyFloat_Check(obj)) {
- double val = PyFloat_AS_DOUBLE(obj);
- return val != val;
- } else {
- return false;
- }
-}
-
inline bool PandasObjectIsNull(PyObject* obj) {
- return obj == Py_None || obj == numpy_nan || PyFloat_isnan(obj);
+ return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) ||
+ (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj));
}
inline bool PyObject_is_string(PyObject* obj) {
@@ -88,10 +80,8 @@ inline bool PyObject_is_string(PyObject* obj) {
#endif
}
-inline bool PyObject_is_float(PyObject* obj) { return PyFloat_Check(obj); }
-
inline bool PyObject_is_integer(PyObject* obj) {
- return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj);
+ return !PyBool_Check(obj) && PyArray_IsIntegerScalar(obj);
}
template <int TYPE>
@@ -310,13 +300,18 @@ class NumPyConverter {
arr_(reinterpret_cast<PyArrayObject*>(ao)),
dtype_(PyArray_DESCR(arr_)),
mask_(nullptr),
- use_pandas_null_sentinels_(use_pandas_null_sentinels) {
+ use_pandas_null_sentinels_(use_pandas_null_sentinels),
+ decimal_type_() {
if (mo != nullptr && mo != Py_None) {
mask_ = reinterpret_cast<PyArrayObject*>(mo);
}
length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
itemsize_ = static_cast<int>(PyArray_DESCR(arr_)->elsize);
stride_ = static_cast<int64_t>(PyArray_STRIDES(arr_)[0]);
+
+ PyAcquireGIL lock;
+ Status status = internal::ImportDecimalType(&decimal_type_);
+ DCHECK_OK(status);
}
bool is_strided() const { return itemsize_ != stride_; }
@@ -491,6 +486,8 @@ class NumPyConverter {
bool use_pandas_null_sentinels_;
+ OwnedRefNoGIL decimal_type_;
+
// Used in visitor pattern
std::vector<std::shared_ptr<Array>> out_arrays_;
@@ -743,58 +740,42 @@ Status NumPyConverter::ConvertDates() {
Status NumPyConverter::ConvertDecimals() {
PyAcquireGIL lock;
- // Import the decimal module and Decimal class
- OwnedRef decimal;
- OwnedRef Decimal;
- RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
- RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
-
+ internal::DecimalMetadata max_decimal_metadata;
Ndarray1DIndexer<PyObject*> objects(arr_);
- PyObject* object = objects[0];
if (type_ == NULLPTR) {
- int32_t precision;
- int32_t desired_scale;
-
- int32_t tmp_precision;
- int32_t tmp_scale;
-
- RETURN_NOT_OK(
- internal::InferDecimalPrecisionAndScale(objects[0], &precision,
&desired_scale));
-
- for (int64_t i = 1; i < length_; ++i) {
- RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(objects[i],
&tmp_precision,
- &tmp_scale));
- precision = std::max(precision, tmp_precision);
-
- if (std::abs(desired_scale) < std::abs(tmp_scale)) {
- desired_scale = tmp_scale;
- }
+ for (PyObject* object : objects) {
+ RETURN_NOT_OK(max_decimal_metadata.Update(object));
}
- type_ = ::arrow::decimal(precision, desired_scale);
+ type_ =
+ ::arrow::decimal(max_decimal_metadata.precision(),
max_decimal_metadata.scale());
}
Decimal128Builder builder(type_, pool_);
RETURN_NOT_OK(builder.Resize(length_));
const auto& decimal_type = static_cast<const DecimalType&>(*type_);
- PyObject* Decimal_type_object = Decimal.obj();
- for (int64_t i = 0; i < length_; ++i) {
- object = objects[i];
+ for (PyObject* object : objects) {
+ const int is_decimal = PyObject_IsInstance(object, decimal_type_.obj());
- if (PyObject_IsInstance(object, Decimal_type_object)) {
- Decimal128 value;
- RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type,
&value));
- RETURN_NOT_OK(builder.Append(value));
- } else if (PandasObjectIsNull(object)) {
- RETURN_NOT_OK(builder.AppendNull());
- } else {
+ if (ARROW_PREDICT_FALSE(is_decimal == 0)) {
std::stringstream ss;
ss << "Error converting from Python objects to Decimal: ";
RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss));
return Status::Invalid(ss.str());
+ } else if (ARROW_PREDICT_FALSE(is_decimal == -1)) {
+ DCHECK_NE(PyErr_Occurred(), nullptr);
+ RETURN_IF_PYERROR();
+ }
+
+ if (PandasObjectIsNull(object)) {
+ RETURN_NOT_OK(builder.AppendNull());
+ } else {
+ Decimal128 value;
+ RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type,
&value));
+ RETURN_NOT_OK(builder.Append(value));
}
}
return PushBuilderResult(&builder);
@@ -1045,18 +1026,13 @@ Status NumPyConverter::ConvertObjectsInfer() {
objects.Init(arr_);
PyDateTime_IMPORT;
- OwnedRef decimal;
- OwnedRef Decimal;
- RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
- RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
-
for (int64_t i = 0; i < length_; ++i) {
PyObject* obj = objects[i];
if (PandasObjectIsNull(obj)) {
continue;
} else if (PyObject_is_string(obj)) {
return ConvertObjectStrings();
- } else if (PyObject_is_float(obj)) {
+ } else if (PyFloat_Check(obj)) {
return ConvertObjectFloats();
} else if (PyBool_Check(obj)) {
return ConvertBooleans();
@@ -1069,7 +1045,7 @@ Status NumPyConverter::ConvertObjectsInfer() {
return ConvertDateTimes();
} else if (PyTime_Check(obj)) {
return ConvertTimes();
- } else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj()))
{
+ } else if (PyObject_IsInstance(obj, decimal_type_.obj()) == 1) {
return ConvertDecimals();
} else if (PyList_Check(obj)) {
std::shared_ptr<DataType> inferred_type;
diff --git a/cpp/src/arrow/python/python-test.cc
b/cpp/src/arrow/python/python-test.cc
index b76caaece..16ac1e332 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -78,15 +78,14 @@ TEST(OwnedRefNoGIL, TestMoves) {
class DecimalTest : public ::testing::Test {
public:
- DecimalTest() : lock_(), decimal_module_(), decimal_constructor_() {
- auto s = internal::ImportModule("decimal", &decimal_module_);
- DCHECK(s.ok()) << s.message();
- DCHECK_NE(decimal_module_.obj(), NULLPTR);
+ DecimalTest() : lock_(), decimal_constructor_() {
+ OwnedRef decimal_module;
- s = internal::ImportFromModule(decimal_module_, "Decimal",
&decimal_constructor_);
- DCHECK(s.ok()) << s.message();
+ Status status = internal::ImportModule("decimal", &decimal_module);
+ DCHECK_OK(status);
- DCHECK_NE(decimal_constructor_.obj(), NULLPTR);
+ status = internal::ImportFromModule(decimal_module, "Decimal",
&decimal_constructor_);
+ DCHECK_OK(status);
}
OwnedRef CreatePythonDecimal(const std::string& string_value) {
@@ -94,16 +93,17 @@ class DecimalTest : public ::testing::Test {
return ref;
}
+ PyObject* decimal_constructor() const { return decimal_constructor_.obj(); }
+
private:
PyAcquireGIL lock_;
- OwnedRef decimal_module_;
OwnedRef decimal_constructor_;
};
TEST_F(DecimalTest, TestPythonDecimalToString) {
std::string decimal_string("-39402950693754869342983");
- OwnedRef python_object = this->CreatePythonDecimal(decimal_string);
+ OwnedRef python_object(this->CreatePythonDecimal(decimal_string));
ASSERT_NE(python_object.obj(), nullptr);
std::string string_result;
@@ -114,35 +114,57 @@ TEST_F(DecimalTest, TestInferPrecisionAndScale) {
std::string decimal_string("-394029506937548693.42983");
OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
- int32_t precision;
- int32_t scale;
-
- ASSERT_OK(
- internal::InferDecimalPrecisionAndScale(python_decimal.obj(),
&precision, &scale));
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(python_decimal.obj()));
const auto expected_precision =
static_cast<int32_t>(decimal_string.size() - 2); // 1 for -, 1 for .
const int32_t expected_scale = 5;
- ASSERT_EQ(expected_precision, precision);
- ASSERT_EQ(expected_scale, scale);
+ ASSERT_EQ(expected_precision, metadata.precision());
+ ASSERT_EQ(expected_scale, metadata.scale());
}
TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) {
std::string decimal_string("-3.94042983E+10");
OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
- int32_t precision;
- int32_t scale;
-
- ASSERT_OK(
- internal::InferDecimalPrecisionAndScale(python_decimal.obj(),
&precision, &scale));
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(python_decimal.obj()));
const auto expected_precision = 9;
const int32_t expected_scale = -2;
- ASSERT_EQ(expected_precision, precision);
- ASSERT_EQ(expected_scale, scale);
+ ASSERT_EQ(expected_precision, metadata.precision());
+ ASSERT_EQ(expected_scale, metadata.scale());
+}
+
+TEST_F(DecimalTest, TestInferAllLeadingZeros) {
+ std::string decimal_string("0.001");
+ OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
+
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(python_decimal.obj()));
+ ASSERT_EQ(3, metadata.precision());
+ ASSERT_EQ(3, metadata.scale());
+}
+
+TEST_F(DecimalTest, TestInferAllLeadingZerosExponentialNotationPositive) {
+ std::string decimal_string("0.01E5");
+ OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(python_decimal.obj()));
+ ASSERT_EQ(4, metadata.precision());
+ ASSERT_EQ(0, metadata.scale());
+}
+
+TEST_F(DecimalTest, TestInferAllLeadingZerosExponentialNotationNegative) {
+ std::string decimal_string("0.01E3");
+ OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(python_decimal.obj()));
+ ASSERT_EQ(2, metadata.precision());
+ ASSERT_EQ(0, metadata.scale());
}
TEST(PandasConversionTest, TestObjectBlockWriteFails) {
@@ -226,14 +248,12 @@ TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable)
{
TEST_F(DecimalTest, TestOverflowFails) {
Decimal128 value;
- int32_t precision;
- int32_t scale;
OwnedRef python_decimal(
this->CreatePythonDecimal("9999999999999999999999999999999999999.9"));
- ASSERT_OK(
- internal::InferDecimalPrecisionAndScale(python_decimal.obj(),
&precision, &scale));
- ASSERT_EQ(38, precision);
- ASSERT_EQ(1, scale);
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(python_decimal.obj()));
+ ASSERT_EQ(38, metadata.precision());
+ ASSERT_EQ(1, metadata.scale());
auto type = ::arrow::decimal(38, 38);
const auto& decimal_type = static_cast<const DecimalType&>(*type);
@@ -241,5 +261,111 @@ TEST_F(DecimalTest, TestOverflowFails) {
decimal_type,
&value));
}
+TEST_F(DecimalTest, TestNoneAndNaN) {
+ OwnedRef list_ref(PyList_New(4));
+ PyObject* list = list_ref.obj();
+
+ ASSERT_NE(list, nullptr);
+
+ PyObject* constructor = this->decimal_constructor();
+ PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234");
+ ASSERT_NE(decimal_value, nullptr);
+
+ Py_INCREF(Py_None);
+ PyObject* missing_value1 = Py_None;
+ ASSERT_NE(missing_value1, nullptr);
+
+ PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN);
+ ASSERT_NE(missing_value2, nullptr);
+
+ PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan");
+ ASSERT_NE(missing_value3, nullptr);
+
+ // This steals a reference to each object, so we don't need to decref them
later,
+ // just the list
+ ASSERT_EQ(0, PyList_SetItem(list, 0, decimal_value));
+ ASSERT_EQ(0, PyList_SetItem(list, 1, missing_value1));
+ ASSERT_EQ(0, PyList_SetItem(list, 2, missing_value2));
+ ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3));
+
+ MemoryPool* pool = default_memory_pool();
+ std::shared_ptr<Array> arr;
+ ASSERT_OK(ConvertPySequence(list, pool, &arr));
+ ASSERT_TRUE(arr->IsValid(0));
+ ASSERT_TRUE(arr->IsNull(1));
+ ASSERT_TRUE(arr->IsNull(2));
+ ASSERT_TRUE(arr->IsNull(3));
+}
+
+TEST_F(DecimalTest, TestMixedPrecisionAndScale) {
+ std::vector<std::string> strings{{"0.001", "1.01E5", "1.01E5"}};
+
+ OwnedRef list_ref(PyList_New(static_cast<Py_ssize_t>(strings.size())));
+ PyObject* list = list_ref.obj();
+
+ ASSERT_NE(list, nullptr);
+
+ // PyList_SetItem steals a reference to the item so we don't decref it later
+ PyObject* decimal_constructor = this->decimal_constructor();
+ for (Py_ssize_t i = 0; i < static_cast<Py_ssize_t>(strings.size()); ++i) {
+ const int result = PyList_SetItem(
+ list, i, internal::DecimalFromString(decimal_constructor,
strings.at(i)));
+ ASSERT_EQ(0, result);
+ }
+
+ MemoryPool* pool = default_memory_pool();
+ std::shared_ptr<Array> arr;
+ ASSERT_OK(ConvertPySequence(list, pool, &arr));
+ const auto& type = static_cast<const DecimalType&>(*arr->type());
+
+ int32_t expected_precision = 9;
+ int32_t expected_scale = 3;
+ ASSERT_EQ(expected_precision, type.precision());
+ ASSERT_EQ(expected_scale, type.scale());
+}
+
+TEST_F(DecimalTest, TestMixedPrecisionAndScaleSequenceConvert) {
+ PyAcquireGIL lock;
+ MemoryPool* pool = default_memory_pool();
+ std::shared_ptr<Array> arr;
+
+ PyObject* value1 = this->CreatePythonDecimal("0.01").detach();
+ ASSERT_NE(value1, nullptr);
+
+ PyObject* value2 = this->CreatePythonDecimal("0.001").detach();
+ ASSERT_NE(value2, nullptr);
+
+ OwnedRef list_ref(PyList_New(2));
+ PyObject* list = list_ref.obj();
+
+ // This steals a reference to each object, so we don't need to decref them
later
+ // just the list
+ ASSERT_EQ(PyList_SetItem(list, 0, value1), 0);
+ ASSERT_EQ(PyList_SetItem(list, 1, value2), 0);
+
+ ASSERT_OK(ConvertPySequence(list, pool, &arr));
+
+ const auto& type = static_cast<const Decimal128Type&>(*arr->type());
+ ASSERT_EQ(3, type.precision());
+ ASSERT_EQ(3, type.scale());
+}
+
+TEST_F(DecimalTest, SimpleInference) {
+ OwnedRef value(this->CreatePythonDecimal("0.01"));
+ ASSERT_NE(value.obj(), nullptr);
+ internal::DecimalMetadata metadata;
+ ASSERT_OK(metadata.Update(value.obj()));
+ ASSERT_EQ(2, metadata.precision());
+ ASSERT_EQ(2, metadata.scale());
+}
+
+TEST_F(DecimalTest, UpdateWithNaN) {
+ internal::DecimalMetadata metadata;
+ OwnedRef nan_value(this->CreatePythonDecimal("nan"));
+ ASSERT_OK(metadata.Update(nan_value.obj()));
+ ASSERT_EQ(std::numeric_limits<int32_t>::min(), metadata.precision());
+ ASSERT_EQ(std::numeric_limits<int32_t>::min(), metadata.scale());
+}
+
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal-test.cc
b/cpp/src/arrow/util/decimal-test.cc
index e4406747d..6db46d485 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -14,7 +14,6 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-//
#include <cstdint>
#include <string>
@@ -37,7 +36,7 @@ class DecimalTestFixture : public ::testing::Test {
TEST_F(DecimalTestFixture, TestToString) {
Decimal128 decimal(this->integer_value_);
- int scale = 5;
+ int32_t scale = 5;
std::string result = decimal.ToString(scale);
ASSERT_EQ(result, this->string_value_);
}
@@ -45,7 +44,7 @@ TEST_F(DecimalTestFixture, TestToString) {
TEST_F(DecimalTestFixture, TestFromString) {
Decimal128 expected(this->integer_value_);
Decimal128 result;
- int precision, scale;
+ int32_t precision, scale;
ASSERT_OK(Decimal128::FromString(this->string_value_, &result, &precision,
&scale));
ASSERT_EQ(result, expected);
ASSERT_EQ(precision, 8);
@@ -55,8 +54,8 @@ TEST_F(DecimalTestFixture, TestFromString) {
TEST_F(DecimalTestFixture, TestStringStartingWithPlus) {
std::string plus_value("+234.234");
Decimal128 out;
- int scale;
- int precision;
+ int32_t scale;
+ int32_t precision;
ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale));
ASSERT_EQ(234234, out);
ASSERT_EQ(6, precision);
@@ -67,8 +66,8 @@ TEST_F(DecimalTestFixture, TestStringStartingWithPlus128) {
std::string plus_value("+2342394230592.232349023094");
Decimal128 expected_value("2342394230592232349023094");
Decimal128 out;
- int scale;
- int precision;
+ int32_t scale;
+ int32_t precision;
ASSERT_OK(Decimal128::FromString(plus_value, &out, &precision, &scale));
ASSERT_EQ(expected_value, out);
ASSERT_EQ(25, precision);
@@ -90,9 +89,7 @@ TEST(DecimalTest, TestFromDecimalString128) {
Decimal128 result;
ASSERT_OK(Decimal128::FromString(string_value, &result));
Decimal128 expected(static_cast<int64_t>(-230492239423435324));
- expected *= 100;
- expected -= 12;
- ASSERT_EQ(result, expected);
+ ASSERT_EQ(result, expected * 100 - 12);
// Sanity check that our number is actually using more than 64 bits
ASSERT_NE(result.high_bits(), 0);
@@ -194,36 +191,36 @@ TEST(DecimalTest, TestInvalidInputWithLeadingZeros) {
TEST(DecimalZerosTest, LeadingZerosNoDecimalPoint) {
std::string string_value("0000000");
Decimal128 d;
- int precision;
- int scale;
+ int32_t precision;
+ int32_t scale;
ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
- ASSERT_EQ(precision, 7);
- ASSERT_EQ(scale, 0);
- ASSERT_EQ(d, 0);
+ ASSERT_EQ(0, precision);
+ ASSERT_EQ(0, scale);
+ ASSERT_EQ(0, d);
}
TEST(DecimalZerosTest, LeadingZerosDecimalPoint) {
std::string string_value("000.0000");
Decimal128 d;
- int precision;
- int scale;
+ int32_t precision;
+ int32_t scale;
ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
// We explicitly do not support this for now, otherwise this would be
ASSERT_EQ
- ASSERT_NE(precision, 7);
+ ASSERT_EQ(4, precision);
- ASSERT_EQ(scale, 4);
- ASSERT_EQ(d, 0);
+ ASSERT_EQ(4, scale);
+ ASSERT_EQ(0, d);
}
TEST(DecimalZerosTest, NoLeadingZerosDecimalPoint) {
std::string string_value(".00000");
Decimal128 d;
- int precision;
- int scale;
+ int32_t precision;
+ int32_t scale;
ASSERT_OK(Decimal128::FromString(string_value, &d, &precision, &scale));
- ASSERT_EQ(precision, 5);
- ASSERT_EQ(scale, 5);
- ASSERT_EQ(d, 0);
+ ASSERT_EQ(5, precision);
+ ASSERT_EQ(5, scale);
+ ASSERT_EQ(0, d);
}
template <typename T>
@@ -335,16 +332,16 @@ INSTANTIATE_TEST_CASE_P(Decimal128ParsingTest,
Decimal128ParsingTest,
std::make_tuple("0.00123", 123ULL,
5),
std::make_tuple("1.23E-8", 123ULL,
10),
std::make_tuple("-1.23E-8", -123LL,
10),
- std::make_tuple("1.23E+3", 123ULL,
-1),
- std::make_tuple("-1.23E+3", -123LL,
-1),
- std::make_tuple("1.23E+5", 123ULL,
-3),
- std::make_tuple("1.2345E+7",
12345ULL, -3),
+ std::make_tuple("1.23E+3", 1230ULL,
0),
+ std::make_tuple("-1.23E+3", -1230LL,
0),
+ std::make_tuple("1.23E+5",
123000ULL, 0),
+ std::make_tuple("1.2345E+7",
12345000ULL, 0),
std::make_tuple("1.23e-8", 123ULL,
10),
std::make_tuple("-1.23e-8", -123LL,
10),
- std::make_tuple("1.23e+3", 123ULL,
-1),
- std::make_tuple("-1.23e+3", -123LL,
-1),
- std::make_tuple("1.23e+5", 123ULL,
-3),
- std::make_tuple("1.2345e+7",
12345ULL, -3)));
+ std::make_tuple("1.23e+3", 1230ULL,
0),
+ std::make_tuple("-1.23e+3", -1230LL,
0),
+ std::make_tuple("1.23e+5",
123000ULL, 0),
+ std::make_tuple("1.2345e+7",
12345000ULL, 0)));
class Decimal128ParsingTestInvalid : public
::testing::TestWithParam<std::string> {};
@@ -375,4 +372,14 @@ TEST(Decimal128Test, TestSmallNumberFormat) {
ASSERT_EQ(expected, result);
}
+TEST(Decimal128Test, TestNoDecimalPointExponential) {
+ Decimal128 value;
+ int32_t precision;
+ int32_t scale;
+ ASSERT_OK(Decimal128::FromString("1E1", &value, &precision, &scale));
+ ASSERT_EQ(10, value.low_bits());
+ ASSERT_EQ(2, precision);
+ ASSERT_EQ(0, scale);
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index a3c8cda76..48380a9c9 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -23,12 +23,55 @@
#include <limits>
#include <sstream>
+#include <boost/regex.hpp>
+
#include "arrow/util/bit-util.h"
#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
namespace arrow {
+static const Decimal128 ScaleMultipliers[] = {
+ Decimal128(0LL),
+ Decimal128(10LL),
+ Decimal128(100LL),
+ Decimal128(1000LL),
+ Decimal128(10000LL),
+ Decimal128(100000LL),
+ Decimal128(1000000LL),
+ Decimal128(10000000LL),
+ Decimal128(100000000LL),
+ Decimal128(1000000000LL),
+ Decimal128(10000000000LL),
+ Decimal128(100000000000LL),
+ Decimal128(1000000000000LL),
+ Decimal128(10000000000000LL),
+ Decimal128(100000000000000LL),
+ Decimal128(1000000000000000LL),
+ Decimal128(10000000000000000LL),
+ Decimal128(100000000000000000LL),
+ Decimal128(1000000000000000000LL),
+ Decimal128(0LL, 10000000000000000000ULL),
+ Decimal128(5LL, 7766279631452241920ULL),
+ Decimal128(54LL, 3875820019684212736ULL),
+ Decimal128(542LL, 1864712049423024128ULL),
+ Decimal128(5421LL, 200376420520689664ULL),
+ Decimal128(54210LL, 2003764205206896640ULL),
+ Decimal128(542101LL, 1590897978359414784ULL),
+ Decimal128(5421010LL, 15908979783594147840ULL),
+ Decimal128(54210108LL, 11515845246265065472ULL),
+ Decimal128(542101086LL, 4477988020393345024ULL),
+ Decimal128(5421010862LL, 7886392056514347008ULL),
+ Decimal128(54210108624LL, 5076944270305263616ULL),
+ Decimal128(542101086242LL, 13875954555633532928ULL),
+ Decimal128(5421010862427LL, 9632337040368467968ULL),
+ Decimal128(54210108624275LL, 4089650035136921600ULL),
+ Decimal128(542101086242752LL, 4003012203950112768ULL),
+ Decimal128(5421010862427522LL, 3136633892082024448ULL),
+ Decimal128(54210108624275221LL, 12919594847110692864ULL),
+ Decimal128(542101086242752217LL, 68739955140067328ULL),
+ Decimal128(5421010862427522170LL, 687399551400673280ULL)};
+
static constexpr uint64_t kIntMask = 0xFFFFFFFF;
static constexpr auto kCarryBit = static_cast<uint64_t>(1) <<
static_cast<uint64_t>(32);
@@ -49,7 +92,7 @@ std::array<uint8_t, 16> Decimal128::ToBytes() const {
}
void Decimal128::ToBytes(uint8_t* out) const {
- DCHECK_NE(out, NULLPTR);
+ DCHECK_NE(out, nullptr);
reinterpret_cast<uint64_t*>(out)[0] = BitUtil::ToLittleEndian(low_bits_);
reinterpret_cast<int64_t*>(out)[1] = BitUtil::ToLittleEndian(high_bits_);
}
@@ -187,12 +230,10 @@ static constexpr int64_t kPowersOfTen[kInt64DecimalDigits
+ 1] = {1LL,
100000000000000000LL,
1000000000000000000LL};
-static inline bool isdigit(char value) { return std::isdigit(value) != 0; }
-
static void StringToInteger(const std::string& str, Decimal128* out) {
using std::size_t;
- DCHECK_NE(out, NULLPTR) << "Decimal128 output variable cannot be NULLPTR";
+ DCHECK_NE(out, nullptr) << "Decimal128 output variable cannot be nullptr";
DCHECK_EQ(*out, 0)
<< "When converting a string to Decimal128 the initial output must be 0";
@@ -212,160 +253,124 @@ static void StringToInteger(const std::string& str,
Decimal128* out) {
}
}
-Status Decimal128::FromString(const std::string& s, Decimal128* out, int*
precision,
- int* scale) {
- // Implements this regex:
"(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?((E|e)(\\+|-)?\\d+)?";
- if (s.empty()) {
- return Status::Invalid("Empty string cannot be converted to decimal");
- }
+static const boost::regex DECIMAL_REGEX(
+ // sign of the number
+ "(?<SIGN>[-+]?)"
- std::string::const_iterator charp = s.cbegin();
- std::string::const_iterator end = s.cend();
+ // digits around the decimal point
+
"(((?<LEFT_DIGITS>\\d+)\\.(?<FIRST_RIGHT_DIGITS>\\d*)|\\.(?<SECOND_RIGHT_DIGITS>\\d+)"
+ ")"
- char first_char = *charp;
- bool is_negative = false;
- if (first_char == '+' || first_char == '-') {
- is_negative = first_char == '-';
- ++charp;
- }
+ // optional exponent
+ "([eE](?<FIRST_EXP_VALUE>[-+]?\\d+))?"
- if (charp == end) {
- std::stringstream ss;
- ss << "Single character: '" << first_char << "' is not a valid decimal
value";
- return Status::Invalid(ss.str());
- }
+ // otherwise
+ "|"
- std::string::const_iterator numeric_string_start = charp;
+ // we're just an integer
+ "(?<INTEGER>\\d+)"
- DCHECK_LT(charp, end);
+ // or an integer with an exponent
+ "(?:[eE](?<SECOND_EXP_VALUE>[-+]?\\d+))?)");
- // skip leading zeros
- charp = std::find_if_not(charp, end, [](char value) { return value == '0';
});
+static inline bool is_zero_character(char c) { return c == '0'; }
- // all zeros and no decimal point
- if (charp == end) {
- if (out != NULLPTR) {
- *out = 0;
- }
+Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t*
precision,
+ int32_t* scale) {
+ if (s.empty()) {
+ return Status::Invalid("Empty string cannot be converted to decimal");
+ }
- // Not sure what other libraries assign precision to for this case (this
case of
- // a string consisting only of one or more zeros)
- if (precision != NULLPTR) {
- *precision = static_cast<int>(charp - numeric_string_start);
+ // case of all zeros
+ if (std::all_of(s.cbegin(), s.cend(), is_zero_character)) {
+ if (precision != nullptr) {
+ *precision = 0;
}
- if (scale != NULLPTR) {
+ if (scale != nullptr) {
*scale = 0;
}
+ *out = 0;
return Status::OK();
}
- std::string::const_iterator whole_part_start = charp;
+ boost::smatch results;
+ const bool matches = boost::regex_match(s, results, DECIMAL_REGEX);
- charp = std::find_if_not(charp, end, isdigit);
+ if (!matches) {
+ std::stringstream ss;
+ ss << "The string " << s << " is not a valid decimal number";
+ return Status::Invalid(ss.str());
+ }
- std::string::const_iterator whole_part_end = charp;
- std::string whole_part(whole_part_start, whole_part_end);
+ const std::string sign = results["SIGN"];
+ const std::string integer = results["INTEGER"];
- if (charp != end && *charp == '.') {
- ++charp;
+ const std::string left_digits = results["LEFT_DIGITS"];
+ const std::string first_right_digits = results["FIRST_RIGHT_DIGITS"];
- if (charp == end) {
- return Status::Invalid(
- "Decimal point must be followed by at least one base ten digit.
Reached the "
- "end of the string.");
- }
+ const std::string second_right_digits = results["SECOND_RIGHT_DIGITS"];
- if (std::isdigit(*charp) == 0) {
- std::stringstream ss;
- ss << "Decimal point must be followed by a base ten digit. Found '" <<
*charp
- << "'";
- return Status::Invalid(ss.str());
- }
- } else {
- if (charp != end) {
- std::stringstream ss;
- ss << "Expected base ten digit or decimal point but found '" << *charp
- << "' instead.";
- return Status::Invalid(ss.str());
- }
- }
+ const std::string first_exp_value = results["FIRST_EXP_VALUE"];
+ const std::string second_exp_value = results["SECOND_EXP_VALUE"];
- std::string::const_iterator fractional_part_start = charp;
+ std::string whole_part;
+ std::string fractional_part;
+ std::string exponent_value;
- // The rest must be digits or an exponent
- if (charp != end) {
- charp = std::find_if_not(charp, end, isdigit);
-
- // The while loop has ended before the end of the string which means we've
hit a
- // character that isn't a base ten digit or "E" for exponent
- if (charp != end && *charp != 'E' && *charp != 'e') {
- std::stringstream ss;
- ss << "Found non base ten digit character '" << *charp
- << "' before the end of the string";
- return Status::Invalid(ss.str());
- }
+ if (!integer.empty()) {
+ whole_part = integer;
+ } else if (!left_digits.empty()) {
+ DCHECK(second_right_digits.empty()) << s << " " << second_right_digits;
+ whole_part = left_digits;
+ fractional_part = first_right_digits;
+ } else {
+ DCHECK(first_right_digits.empty()) << s << " " << first_right_digits;
+ fractional_part = second_right_digits;
}
- std::string::const_iterator fractional_part_end = charp;
- std::string fractional_part(fractional_part_start, fractional_part_end);
+ // skip leading zeros before the decimal point
+ std::string::const_iterator without_leading_zeros =
+ std::find_if_not(whole_part.cbegin(), whole_part.cend(),
is_zero_character);
+ whole_part = std::string(without_leading_zeros, whole_part.cend());
- if (precision != NULLPTR) {
- *precision = static_cast<int>(whole_part.size() + fractional_part.size());
+ if (!first_exp_value.empty()) {
+ exponent_value = first_exp_value;
+ } else {
+ exponent_value = second_exp_value;
}
- if (charp != end) {
- // we must have an exponent, if this aborts then we have somehow not
caught this and
- // raised a proper error
- DCHECK(*charp == 'E' || *charp == 'e');
-
- ++charp;
-
- const char value = *charp;
- const bool starts_with_plus_or_minus = value == '+' || value == '-';
-
- // we use this to construct the adjusted exponent integer later
- std::string::const_iterator digit_start = charp;
-
- // skip plus or minus
- charp += starts_with_plus_or_minus;
-
- // confirm that the rest of the characters are digits
- charp = std::find_if_not(charp, end, isdigit);
-
- if (charp != end) {
- // we have something other than digits here
- std::stringstream ss;
- ss << "Found non decimal digit exponent value '" << *charp << "'";
- return Status::Invalid(ss.str());
- }
-
- if (scale != NULLPTR) {
- // compute the scale from the adjusted exponent
- std::string adjusted_exponent_string(digit_start, end);
- DCHECK(std::all_of(adjusted_exponent_string.cbegin() +
starts_with_plus_or_minus,
- adjusted_exponent_string.cend(), isdigit))
- << "Non decimal digit character found in " <<
adjusted_exponent_string;
- const auto adjusted_exponent =
- static_cast<int32_t>(std::stol(adjusted_exponent_string));
- const auto len = static_cast<int32_t>(whole_part.size() +
fractional_part.size());
+ if (precision != nullptr) {
+ *precision = static_cast<int32_t>(whole_part.size() +
fractional_part.size());
+ }
+ if (scale != nullptr) {
+ if (!exponent_value.empty()) {
+ auto adjusted_exponent = static_cast<int32_t>(std::stol(exponent_value));
+ auto len = static_cast<int32_t>(whole_part.size() +
fractional_part.size());
*scale = -adjusted_exponent + len - 1;
- }
- } else {
- if (scale != NULLPTR) {
- *scale = static_cast<int>(fractional_part.size());
+ } else {
+ *scale = static_cast<int32_t>(fractional_part.size());
}
}
- if (out != NULLPTR) {
- // zero out in case we've passed in a previously used value
+ if (out != nullptr) {
*out = 0;
StringToInteger(whole_part + fractional_part, out);
- if (is_negative) {
+ if (sign == "-") {
out->Negate();
}
+
+ if (scale != nullptr && *scale < 0) {
+ const int32_t abs_scale = std::abs(*scale);
+ *out *= ScaleMultipliers[abs_scale];
+
+ if (precision != nullptr) {
+ *precision += abs_scale;
+ }
+ *scale = 0;
+ }
}
return Status::OK();
@@ -813,47 +818,6 @@ Decimal128 operator%(const Decimal128& left, const
Decimal128& right) {
return remainder;
}
-static const Decimal128 ScaleMultipliers[] = {
- Decimal128(1),
- Decimal128(10),
- Decimal128(100),
- Decimal128(1000),
- Decimal128(10000),
- Decimal128(100000),
- Decimal128(1000000),
- Decimal128(10000000),
- Decimal128(100000000),
- Decimal128(1000000000),
- Decimal128(10000000000),
- Decimal128(100000000000),
- Decimal128(1000000000000),
- Decimal128(10000000000000),
- Decimal128(100000000000000),
- Decimal128(1000000000000000),
- Decimal128(10000000000000000),
- Decimal128(100000000000000000),
- Decimal128(1000000000000000000),
- Decimal128("10000000000000000000"),
- Decimal128("100000000000000000000"),
- Decimal128("1000000000000000000000"),
- Decimal128("10000000000000000000000"),
- Decimal128("100000000000000000000000"),
- Decimal128("1000000000000000000000000"),
- Decimal128("10000000000000000000000000"),
- Decimal128("100000000000000000000000000"),
- Decimal128("1000000000000000000000000000"),
- Decimal128("10000000000000000000000000000"),
- Decimal128("100000000000000000000000000000"),
- Decimal128("1000000000000000000000000000000"),
- Decimal128("10000000000000000000000000000000"),
- Decimal128("100000000000000000000000000000000"),
- Decimal128("1000000000000000000000000000000000"),
- Decimal128("10000000000000000000000000000000000"),
- Decimal128("100000000000000000000000000000000000"),
- Decimal128("1000000000000000000000000000000000000"),
- Decimal128("10000000000000000000000000000000000000"),
- Decimal128("100000000000000000000000000000000000000")};
-
static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t
delta_scale,
int32_t abs_delta_scale, Decimal128*
result) {
Decimal128 multiplier(ScaleMultipliers[abs_delta_scale]);
@@ -872,7 +836,7 @@ static bool RescaleWouldCauseDataLoss(const Decimal128&
value, int32_t delta_sca
Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
Decimal128* out) const {
- DCHECK_NE(out, NULLPTR) << "out is nullptr";
+ DCHECK_NE(out, nullptr) << "out is nullptr";
DCHECK_NE(original_scale, new_scale) << "original_scale != new_scale";
const int32_t delta_scale = new_scale - original_scale;
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 1594090a0..79a99ba6a 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -124,7 +124,7 @@ class ARROW_EXPORT Decimal128 {
/// \brief Convert a decimal string to an Decimal128 value, optionally
including
/// precision and scale if they're passed in and not null.
static Status FromString(const std::string& s, Decimal128* out,
- int* precision = NULLPTR, int* scale = NULLPTR);
+ int32_t* precision = NULLPTR, int32_t* scale =
NULLPTR);
/// \brief Convert Decimal128 from one scale to another
Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out)
const;
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 4ca4d2252..c823f06bd 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -30,7 +30,7 @@ namespace arrow {
//
// Add more as needed.
-// Log levels. LOG ignores them, so their values are abitrary.
+// Log levels. LOG ignores them, so their values are arbitrary.
#define ARROW_DEBUG (-1)
#define ARROW_INFO 0
@@ -53,6 +53,9 @@ namespace arrow {
#define DCHECK(condition) \
ARROW_IGNORE_EXPR(condition) \
while (false) ::arrow::internal::NullLog()
+#define DCHECK_OK(status) \
+ ARROW_IGNORE_EXPR(status) \
+ while (false) ::arrow::internal::NullLog()
#define DCHECK_EQ(val1, val2) \
ARROW_IGNORE_EXPR(val1) \
while (false) ::arrow::internal::NullLog()
@@ -76,6 +79,7 @@ namespace arrow {
#define ARROW_DFATAL ARROW_FATAL
#define DCHECK(condition) ARROW_CHECK(condition)
+#define DCHECK_OK(status) (ARROW_CHECK((status).ok()) << (status).message())
#define DCHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
#define DCHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
#define DCHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
diff --git a/python/pyarrow/tests/test_convert_builtin.py
b/python/pyarrow/tests/test_convert_builtin.py
index 8423ff00b..19b59a49b 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -639,3 +639,13 @@ def test_structarray_from_arrays_coerce():
pa.StructArray.from_arrays(arrays)
assert result.equals(expected)
+
+
+def test_decimal_array_with_none_and_nan():
+ values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+ array = pa.array(values)
+ assert array.type == pa.decimal128(4, 3)
+ assert array.to_pylist() == values[:2] + [None, None]
+
+ array = pa.array(values, type=pa.decimal128(10, 4))
+ assert array.to_pylist() == [decimal.Decimal('1.2340'), None, None, None]
diff --git a/python/pyarrow/tests/test_convert_pandas.py
b/python/pyarrow/tests/test_convert_pandas.py
index 986aeffca..813fbdf40 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1161,6 +1161,20 @@ def test_decimal_fails_with_truncation(self):
with pytest.raises(pa.ArrowException):
pa.array(data2, type=type2)
+ def test_decimal_with_different_precisions(self):
+ data = [
+ decimal.Decimal('0.01'),
+ decimal.Decimal('0.001'),
+ ]
+ series = pd.Series(data)
+ array = pa.array(series)
+ assert array.to_pylist() == data
+ assert array.type == pa.decimal128(3, 3)
+
+ array = pa.array(data, type=pa.decimal128(12, 5))
+ expected = [decimal.Decimal('0.01000'), decimal.Decimal('0.00100')]
+ assert array.to_pylist() == expected
+
class TestListTypes(object):
"""
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Decimal conversion not working for NaN values
> ------------------------------------------------------
>
> Key: ARROW-2145
> URL: https://issues.apache.org/jira/browse/ARROW-2145
> Project: Apache Arrow
> Issue Type: Bug
> Components: C++, Python
> Affects Versions: 0.8.0
> Reporter: Antony Mayi
> Assignee: Phillip Cloud
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> {code:python}
> import pyarrow as pa
> import pandas as pd
> import decimal
> pa.Table.from_pandas(pd.DataFrame({'a': [decimal.Decimal('1.1'),
> decimal.Decimal('NaN')]}))
> {code}
> throws following exception:
> {code}
> Traceback (most recent call last):
> File "<stdin>", line 1, in <module>
> File "pyarrow/table.pxi", line 875, in pyarrow.lib.Table.from_pandas
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:44927)
> File "/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 350, in
> dataframe_to_arrays
> convert_types)]
> File "/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 349, in
> <listcomp>
> for c, t in zip(columns_to_convert,
> File "/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 345, in
> convert_column
> return pa.array(col, from_pandas=True, type=ty)
> File "pyarrow/array.pxi", line 170, in pyarrow.lib.array
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:29224)
> File "pyarrow/array.pxi", line 70, in pyarrow.lib._ndarray_to_array
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:28465)
> File "pyarrow/error.pxi", line 98, in pyarrow.lib.check_status
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:9068)
> pyarrow.lib.ArrowException: Unknown error: an integer is required (got type
> str)
> {code}
> Same problem with other special decimal values like {{infinity}}.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)