[
https://issues.apache.org/jira/browse/ARROW-1839?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16277969#comment-16277969
]
ASF GitHub Bot commented on ARROW-1839:
---------------------------------------
wesm closed pull request #1335: ARROW-1839/ARROW-1871: [C++/Python] Add Decimal
Parquet Read/Write Tests
URL: https://github.com/apache/arrow/pull/1335
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc
b/cpp/src/arrow/python/arrow_to_pandas.cc
index 096bbd55c..b1825cbe6 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -620,13 +620,6 @@ static Status ConvertTimes(PandasOptions options, const
ChunkedArray& data,
return Status::OK();
}
-static Status RawDecimalToString(const uint8_t* bytes, int scale, std::string*
result) {
- DCHECK_NE(result, nullptr);
- Decimal128 decimal(bytes);
- *result = decimal.ToString(scale);
- return Status::OK();
-}
-
static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
PyObject** out_values) {
PyAcquireGIL lock;
@@ -637,19 +630,14 @@ static Status ConvertDecimals(PandasOptions options,
const ChunkedArray& data,
PyObject* Decimal = Decimal_ref.obj();
for (int c = 0; c < data.num_chunks(); c++) {
- auto* arr(static_cast<arrow::Decimal128Array*>(data.chunk(c).get()));
- auto type(std::dynamic_pointer_cast<arrow::Decimal128Type>(arr->type()));
- const int scale = type->scale();
+ const auto& arr(static_cast<const
arrow::Decimal128Array&>(*data.chunk(c).get()));
- for (int64_t i = 0; i < arr->length(); ++i) {
- if (arr->IsNull(i)) {
+ for (int64_t i = 0; i < arr.length(); ++i) {
+ if (arr.IsNull(i)) {
Py_INCREF(Py_None);
*out_values++ = Py_None;
} else {
- const uint8_t* raw_value = arr->GetValue(i);
- std::string decimal_string;
- RETURN_NOT_OK(RawDecimalToString(raw_value, scale, &decimal_string));
- *out_values++ = internal::DecimalFromString(Decimal, decimal_string);
+ *out_values++ = internal::DecimalFromString(Decimal,
arr.FormatValue(i));
RETURN_IF_PYERROR();
}
}
diff --git a/cpp/src/arrow/python/builtin_convert.cc
b/cpp/src/arrow/python/builtin_convert.cc
index c716c47d2..08cbae7ab 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -37,14 +37,6 @@
namespace arrow {
namespace py {
-static inline bool IsPyInteger(PyObject* obj) {
-#if PYARROW_IS_PY2
- return PyLong_Check(obj) || PyInt_Check(obj);
-#else
- return PyLong_Check(obj);
-#endif
-}
-
Status InvalidConversion(PyObject* obj, const std::string& expected_types,
std::ostream* out) {
OwnedRef type(PyObject_Type(obj));
@@ -91,7 +83,7 @@ class ScalarVisitor {
++bool_count_;
} else if (PyFloat_Check(obj)) {
++float_count_;
- } else if (IsPyInteger(obj)) {
+ } else if (internal::IsPyInteger(obj)) {
++int_count_;
} else if (PyDate_CheckExact(obj)) {
++date_count_;
@@ -390,28 +382,26 @@ class TypedConverterVisitor : public
TypedConverter<BuilderType> {
}
return Status::OK();
}
-
- virtual Status AppendItem(const OwnedRef& item) = 0;
};
class NullConverter : public TypedConverterVisitor<NullBuilder, NullConverter>
{
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
return Status::Invalid("NullConverter: passed non-None value");
}
};
class BoolConverter : public TypedConverterVisitor<BooleanBuilder,
BoolConverter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
return typed_builder_->Append(item.obj() == Py_True);
}
};
class Int8Converter : public TypedConverterVisitor<Int8Builder, Int8Converter>
{
public:
- inline Status AppendItem(const OwnedRef& item) {
- int64_t val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<int8_t>::max() ||
val < std::numeric_limits<int8_t>::min())) {
@@ -426,8 +416,8 @@ class Int8Converter : public
TypedConverterVisitor<Int8Builder, Int8Converter> {
class Int16Converter : public TypedConverterVisitor<Int16Builder,
Int16Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- int64_t val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<int16_t>::max() ||
val < std::numeric_limits<int16_t>::min())) {
@@ -442,8 +432,8 @@ class Int16Converter : public
TypedConverterVisitor<Int16Builder, Int16Converter
class Int32Converter : public TypedConverterVisitor<Int32Builder,
Int32Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- int64_t val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<int32_t>::max() ||
val < std::numeric_limits<int32_t>::min())) {
@@ -458,8 +448,8 @@ class Int32Converter : public
TypedConverterVisitor<Int32Builder, Int32Converter
class Int64Converter : public TypedConverterVisitor<Int64Builder,
Int64Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- int64_t val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
}
@@ -467,8 +457,8 @@ class Int64Converter : public
TypedConverterVisitor<Int64Builder, Int64Converter
class UInt8Converter : public TypedConverterVisitor<UInt8Builder,
UInt8Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- uint64_t val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<uint8_t>::max())) {
return Status::Invalid(
@@ -482,8 +472,8 @@ class UInt8Converter : public
TypedConverterVisitor<UInt8Builder, UInt8Converter
class UInt16Converter : public TypedConverterVisitor<UInt16Builder,
UInt16Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- uint64_t val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<uint16_t>::max())) {
return Status::Invalid(
@@ -497,8 +487,8 @@ class UInt16Converter : public
TypedConverterVisitor<UInt16Builder, UInt16Conver
class UInt32Converter : public TypedConverterVisitor<UInt32Builder,
UInt32Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- uint64_t val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<uint32_t>::max())) {
return Status::Invalid(
@@ -512,8 +502,8 @@ class UInt32Converter : public
TypedConverterVisitor<UInt32Builder, UInt32Conver
class UInt64Converter : public TypedConverterVisitor<UInt64Builder,
UInt64Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
- int64_t val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ Status AppendItem(const OwnedRef& item) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
}
@@ -521,13 +511,13 @@ class UInt64Converter : public
TypedConverterVisitor<UInt64Builder, UInt64Conver
class Date32Converter : public TypedConverterVisitor<Date32Builder,
Date32Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
int32_t t;
if (PyDate_Check(item.obj())) {
auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
t = static_cast<int32_t>(PyDate_to_s(pydate));
} else {
- int64_t casted_val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ const auto casted_val =
static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
RETURN_IF_PYERROR();
if (casted_val > std::numeric_limits<int32_t>::max()) {
return Status::Invalid("Integer as date32 larger than INT32_MAX");
@@ -540,7 +530,7 @@ class Date32Converter : public
TypedConverterVisitor<Date32Builder, Date32Conver
class Date64Converter : public TypedConverterVisitor<Date64Builder,
Date64Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
int64_t t;
if (PyDate_Check(item.obj())) {
auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
@@ -558,7 +548,7 @@ class TimestampConverter
public:
explicit TimestampConverter(TimeUnit::type unit) : unit_(unit) {}
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
int64_t t;
if (PyDateTime_Check(item.obj())) {
auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
@@ -590,7 +580,7 @@ class TimestampConverter
class DoubleConverter : public TypedConverterVisitor<DoubleBuilder,
DoubleConverter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
double val = PyFloat_AsDouble(item.obj());
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
@@ -599,7 +589,7 @@ class DoubleConverter : public
TypedConverterVisitor<DoubleBuilder, DoubleConver
class BytesConverter : public TypedConverterVisitor<BinaryBuilder,
BytesConverter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
PyObject* bytes_obj;
const char* bytes;
Py_ssize_t length;
@@ -627,7 +617,7 @@ class BytesConverter : public
TypedConverterVisitor<BinaryBuilder, BytesConverte
class FixedWidthBytesConverter
: public TypedConverterVisitor<FixedSizeBinaryBuilder,
FixedWidthBytesConverter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
PyObject* bytes_obj;
OwnedRef tmp;
Py_ssize_t expected_length =
@@ -654,7 +644,7 @@ class FixedWidthBytesConverter
class UTF8Converter : public TypedConverterVisitor<StringBuilder,
UTF8Converter> {
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
PyObject* bytes_obj;
OwnedRef tmp;
const char* bytes;
@@ -689,10 +679,10 @@ class ListConverter : public
TypedConverterVisitor<ListBuilder, ListConverter> {
public:
Status Init(ArrayBuilder* builder) override;
- inline Status AppendItem(const OwnedRef& item) override {
+ Status AppendItem(const OwnedRef& item) {
RETURN_NOT_OK(typed_builder_->Append());
PyObject* item_obj = item.obj();
- int64_t list_size = static_cast<int64_t>(PySequence_Size(item_obj));
+ const auto list_size = static_cast<int64_t>(PySequence_Size(item_obj));
return value_converter_->AppendData(item_obj, list_size);
}
@@ -703,13 +693,11 @@ class ListConverter : public
TypedConverterVisitor<ListBuilder, ListConverter> {
class DecimalConverter
: public TypedConverterVisitor<arrow::Decimal128Builder, DecimalConverter>
{
public:
- inline Status AppendItem(const OwnedRef& item) {
+ Status AppendItem(const OwnedRef& item) {
/// TODO(phillipc): Check for nan?
- std::string string;
- RETURN_NOT_OK(internal::PythonDecimalToString(item.obj(), &string));
-
Decimal128 value;
- RETURN_NOT_OK(Decimal128::FromString(string, &value));
+ const auto& type = static_cast<const
DecimalType&>(*typed_builder_->type());
+ RETURN_NOT_OK(internal::DecimalFromPythonDecimal(item.obj(), type,
&value));
return typed_builder_->Append(value);
}
};
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index 708d9916f..494f92900 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -15,8 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/python/helpers.h"
+#include <sstream>
+
#include "arrow/python/common.h"
+#include "arrow/python/helpers.h"
#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
@@ -91,20 +93,33 @@ Status PythonDecimalToString(PyObject* python_decimal,
std::string* out) {
return Status::OK();
}
-Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int* precision,
- int* scale) {
- // Call Python's str(decimal_object)
- OwnedRef str_obj(PyObject_Str(python_decimal));
+Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t*
precision,
+ int32_t* scale) {
+ DCHECK_NE(python_decimal, NULLPTR);
+ DCHECK_NE(precision, NULLPTR);
+ DCHECK_NE(scale, NULLPTR);
+
+ OwnedRef as_tuple(PyObject_CallMethod(python_decimal, "as_tuple", "()"));
RETURN_IF_PYERROR();
- PyObjectStringify str(str_obj.obj());
+ DCHECK(PyTuple_Check(as_tuple.obj()));
- const char* bytes = str.bytes;
- DCHECK_NE(bytes, nullptr);
+ OwnedRef digits(PyObject_GetAttrString(as_tuple.obj(), "digits"));
+ RETURN_IF_PYERROR();
+ DCHECK(PyTuple_Check(digits.obj()));
- auto size = str.size;
+ const auto num_digits = static_cast<int32_t>(PyTuple_Size(digits.obj()));
+ RETURN_IF_PYERROR();
- std::string c_string(bytes, size);
- return Decimal128::FromString(c_string, nullptr, precision, scale);
+ OwnedRef py_exponent(PyObject_GetAttrString(as_tuple.obj(), "exponent"));
+ RETURN_IF_PYERROR();
+ DCHECK(IsPyInteger(py_exponent.obj()));
+
+ const auto exponent = static_cast<int32_t>(PyLong_AsLong(py_exponent.obj()));
+ RETURN_IF_PYERROR();
+
+ *precision = num_digits;
+ *scale = -exponent;
+ return Status::OK();
}
PyObject* DecimalFromString(PyObject* decimal_constructor,
@@ -121,6 +136,46 @@ PyObject* DecimalFromString(PyObject* decimal_constructor,
string_size);
}
+Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType&
arrow_type,
+ Decimal128* out) {
+ DCHECK_NE(python_decimal, NULLPTR);
+ DCHECK_NE(out, NULLPTR);
+
+ std::string string;
+ RETURN_NOT_OK(PythonDecimalToString(python_decimal, &string));
+
+ int32_t inferred_precision;
+ int32_t inferred_scale;
+
+ RETURN_NOT_OK(
+ Decimal128::FromString(string, out, &inferred_precision,
&inferred_scale));
+
+ const int32_t precision = arrow_type.precision();
+ const int32_t scale = arrow_type.scale();
+
+ if (ARROW_PREDICT_FALSE(inferred_precision > precision)) {
+ std::stringstream buf;
+ buf << "Decimal type with precision " << inferred_precision
+ << " does not fit into precision inferred from first array element: "
+ << precision;
+ return Status::Invalid(buf.str());
+ }
+
+ if (scale != inferred_scale) {
+ DCHECK_NE(out, NULLPTR);
+ RETURN_NOT_OK(out->Rescale(inferred_scale, scale, out));
+ }
+ return Status::OK();
+}
+
+bool IsPyInteger(PyObject* obj) {
+#if PYARROW_IS_PY2
+ return PyLong_Check(obj) || PyInt_Check(obj);
+#else
+ return PyLong_Check(obj);
+#endif
+}
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index 719ed796e..c82bdabc4 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -29,6 +29,9 @@
#include "arrow/util/visibility.h"
namespace arrow {
+
+class Decimal128;
+
namespace py {
class OwnedRef;
@@ -44,11 +47,15 @@ Status ImportFromModule(const OwnedRef& module, const
std::string& module_name,
Status PythonDecimalToString(PyObject* python_decimal, std::string* out);
-Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int* precision
= NULLPTR,
- int* scale = NULLPTR);
+Status InferDecimalPrecisionAndScale(PyObject* python_decimal,
+ int32_t* precision = NULLPTR,
+ int32_t* scale = NULLPTR);
PyObject* DecimalFromString(PyObject* decimal_constructor,
const std::string& decimal_string);
+Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType&
arrow_type,
+ Decimal128* out);
+bool IsPyInteger(PyObject* obj);
} // namespace internal
} // namespace py
diff --git a/cpp/src/arrow/python/numpy-internal.h
b/cpp/src/arrow/python/numpy-internal.h
index db34d24d9..6c9c871a1 100644
--- a/cpp/src/arrow/python/numpy-internal.h
+++ b/cpp/src/arrow/python/numpy-internal.h
@@ -56,8 +56,8 @@ class Ndarray1DIndexer {
bool is_strided() const { return stride_ == 1; }
- T& operator[](size_type index) { return *(data_ + index * stride_); }
- T& operator[](size_type index) const { return *(data_ + index * stride_); }
+ T& operator[](size_type index) { return data_[index * stride_]; }
+ T& operator[](size_type index) const { return data_[index * stride_]; }
private:
PyArrayObject* arr_;
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc
b/cpp/src/arrow/python/numpy_to_arrow.cc
index 0b1124d30..2316a796a 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -682,24 +682,41 @@ Status NumPyConverter::ConvertDecimals() {
Ndarray1DIndexer<PyObject*> objects(arr_);
PyObject* object = objects[0];
- int precision;
- int scale;
+ if (type_ == NULLPTR) {
+ int32_t precision;
+ int32_t desired_scale;
- RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(object, &precision,
&scale));
+ int32_t tmp_precision;
+ int32_t tmp_scale;
- type_ = std::make_shared<Decimal128Type>(precision, scale);
+ RETURN_NOT_OK(
+ internal::InferDecimalPrecisionAndScale(objects[0], &precision,
&desired_scale));
+
+ for (int64_t i = 1; i < length_; ++i) {
+ RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(objects[i],
&tmp_precision,
+ &tmp_scale));
+ precision = std::max(precision, tmp_precision);
+
+ if (std::abs(desired_scale) < std::abs(tmp_scale)) {
+ desired_scale = tmp_scale;
+ }
+ }
+
+ type_ = ::arrow::decimal(precision, desired_scale);
+ }
Decimal128Builder builder(type_, pool_);
RETURN_NOT_OK(builder.Resize(length_));
+ const auto& decimal_type = static_cast<const DecimalType&>(*type_);
+ PyObject* Decimal_type_object = Decimal.obj();
+
for (int64_t i = 0; i < length_; ++i) {
object = objects[i];
- if (PyObject_IsInstance(object, Decimal.obj())) {
- std::string string;
- RETURN_NOT_OK(internal::PythonDecimalToString(object, &string));
+ if (PyObject_IsInstance(object, Decimal_type_object)) {
Decimal128 value;
- RETURN_NOT_OK(Decimal128::FromString(string, &value));
+ RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type,
&value));
RETURN_NOT_OK(builder.Append(value));
} else if (PandasObjectIsNull(object)) {
RETURN_NOT_OK(builder.AppendNull());
@@ -724,7 +741,7 @@ Status NumPyConverter::ConvertTimes() {
Time64Builder builder(::arrow::time64(TimeUnit::MICRO), pool_);
RETURN_NOT_OK(builder.Resize(length_));
- PyObject* obj;
+ PyObject* obj = NULLPTR;
for (int64_t i = 0; i < length_; ++i) {
obj = objects[i];
if (PyTime_Check(obj)) {
diff --git a/cpp/src/arrow/python/python-test.cc
b/cpp/src/arrow/python/python-test.cc
index 3b7d7d884..d9919ee49 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -35,34 +35,73 @@ namespace py {
TEST(PyBuffer, InvalidInputObject) { PyBuffer buffer(Py_None); }
-TEST(DecimalTest, TestPythonDecimalToString) {
- PyAcquireGIL lock;
+class DecimalTest : public ::testing::Test {
+ public:
+ DecimalTest() : lock_(), decimal_module_(), decimal_constructor_() {
+ auto s = internal::ImportModule("decimal", &decimal_module_);
+ DCHECK(s.ok()) << s.message();
+ DCHECK_NE(decimal_module_.obj(), NULLPTR);
- OwnedRef decimal;
- OwnedRef Decimal;
- ASSERT_OK(internal::ImportModule("decimal", &decimal));
- ASSERT_NE(decimal.obj(), nullptr);
+ s = internal::ImportFromModule(decimal_module_, "Decimal",
&decimal_constructor_);
+ DCHECK(s.ok()) << s.message();
- ASSERT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
- ASSERT_NE(Decimal.obj(), nullptr);
+ DCHECK_NE(decimal_constructor_.obj(), NULLPTR);
+ }
- std::string decimal_string("-39402950693754869342983");
- const char* format = "s#";
- auto c_string = decimal_string.c_str();
- ASSERT_NE(c_string, nullptr);
+ OwnedRef CreatePythonDecimal(const std::string& string_value) {
+ OwnedRef ref(internal::DecimalFromString(decimal_constructor_.obj(),
string_value));
+ return ref;
+ }
- auto c_string_size = decimal_string.size();
- ASSERT_GT(c_string_size, 0);
- OwnedRef pydecimal(PyObject_CallFunction(Decimal.obj(),
const_cast<char*>(format),
- c_string, c_string_size));
- ASSERT_NE(pydecimal.obj(), nullptr);
- ASSERT_EQ(PyErr_Occurred(), nullptr);
+ private:
+ PyAcquireGIL lock_;
+ OwnedRef decimal_module_;
+ OwnedRef decimal_constructor_;
+};
- PyObject* python_object = pydecimal.obj();
- ASSERT_NE(python_object, nullptr);
+TEST_F(DecimalTest, TestPythonDecimalToString) {
+ std::string decimal_string("-39402950693754869342983");
+
+ OwnedRef python_object = this->CreatePythonDecimal(decimal_string);
+ ASSERT_NE(python_object.obj(), nullptr);
std::string string_result;
- ASSERT_OK(internal::PythonDecimalToString(python_object, &string_result));
+ ASSERT_OK(internal::PythonDecimalToString(python_object.obj(),
&string_result));
+}
+
+TEST_F(DecimalTest, TestInferPrecisionAndScale) {
+ std::string decimal_string("-394029506937548693.42983");
+ OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
+
+ int32_t precision;
+ int32_t scale;
+
+ ASSERT_OK(
+ internal::InferDecimalPrecisionAndScale(python_decimal.obj(),
&precision, &scale));
+
+ const auto expected_precision =
+ static_cast<int32_t>(decimal_string.size() - 2); // 1 for -, 1 for .
+ const int32_t expected_scale = 5;
+
+ ASSERT_EQ(expected_precision, precision);
+ ASSERT_EQ(expected_scale, scale);
+}
+
+TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) {
+ std::string decimal_string("-3.94042983E+10");
+ OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
+
+ int32_t precision;
+ int32_t scale;
+
+ ASSERT_OK(
+ internal::InferDecimalPrecisionAndScale(python_decimal.obj(),
&precision, &scale));
+
+ const auto expected_precision = 9;
+ const int32_t expected_scale = -2;
+
+ ASSERT_EQ(expected_precision, precision);
+ ASSERT_EQ(expected_scale, scale);
}
TEST(PandasConversionTest, TestObjectBlockWriteFails) {
diff --git a/cpp/src/arrow/util/decimal-test.cc
b/cpp/src/arrow/util/decimal-test.cc
index 0d0c08cc4..e4406747d 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -366,4 +366,13 @@ TEST(Decimal128ParseTest, WithExponentAndNullptrScale) {
ASSERT_EQ(expected_value, value);
}
+TEST(Decimal128Test, TestSmallNumberFormat) {
+ Decimal128 value("0.2");
+ std::string expected("0.2");
+
+ const int32_t scale = 1;
+ std::string result = value.ToString(scale);
+ ASSERT_EQ(expected, result);
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 447cae5c5..e999854b1 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -813,4 +813,68 @@ Decimal128 operator%(const Decimal128& left, const
Decimal128& right) {
return remainder;
}
+static const Decimal128 ScaleMultipliers[] = {
+ Decimal128(1),
+ Decimal128(10),
+ Decimal128(100),
+ Decimal128(1000),
+ Decimal128(10000),
+ Decimal128(100000),
+ Decimal128(1000000),
+ Decimal128(10000000),
+ Decimal128(100000000),
+ Decimal128(1000000000),
+ Decimal128(10000000000),
+ Decimal128(100000000000),
+ Decimal128(1000000000000),
+ Decimal128(10000000000000),
+ Decimal128(100000000000000),
+ Decimal128(1000000000000000),
+ Decimal128(10000000000000000),
+ Decimal128(100000000000000000),
+ Decimal128(1000000000000000000),
+ Decimal128("10000000000000000000"),
+ Decimal128("100000000000000000000"),
+ Decimal128("1000000000000000000000"),
+ Decimal128("10000000000000000000000"),
+ Decimal128("100000000000000000000000"),
+ Decimal128("1000000000000000000000000"),
+ Decimal128("10000000000000000000000000"),
+ Decimal128("100000000000000000000000000"),
+ Decimal128("1000000000000000000000000000"),
+ Decimal128("10000000000000000000000000000"),
+ Decimal128("100000000000000000000000000000"),
+ Decimal128("1000000000000000000000000000000"),
+ Decimal128("10000000000000000000000000000000"),
+ Decimal128("100000000000000000000000000000000"),
+ Decimal128("1000000000000000000000000000000000"),
+ Decimal128("10000000000000000000000000000000000"),
+ Decimal128("100000000000000000000000000000000000"),
+ Decimal128("1000000000000000000000000000000000000"),
+ Decimal128("10000000000000000000000000000000000000"),
+ Decimal128("100000000000000000000000000000000000000")};
+
+Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
+ Decimal128* out) const {
+ DCHECK_NE(out, NULLPTR);
+ DCHECK_NE(original_scale, new_scale);
+ const int32_t delta_scale = original_scale - new_scale;
+ const int32_t abs_delta_scale = std::abs(delta_scale);
+ DCHECK_GE(abs_delta_scale, 1);
+ DCHECK_LE(abs_delta_scale, 38);
+
+ const Decimal128 scale_multiplier = ScaleMultipliers[abs_delta_scale];
+ const Decimal128 result = *this * scale_multiplier;
+
+ if (ARROW_PREDICT_FALSE(result < *this)) {
+ std::stringstream buf;
+ buf << "Rescaling decimal value from original scale " << original_scale
+ << " to new scale " << new_scale << " would cause overflow";
+ return Status::Invalid(buf.str());
+ }
+
+ *out = result;
+ return Status::OK();
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index a0423e9fc..1594090a0 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -126,6 +126,9 @@ class ARROW_EXPORT Decimal128 {
static Status FromString(const std::string& s, Decimal128* out,
int* precision = NULLPTR, int* scale = NULLPTR);
+ /// \brief Convert Decimal128 from one scale to another
+ Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out)
const;
+
private:
int64_t high_bits_;
uint64_t low_bits_;
diff --git a/python/pyarrow/tests/test_parquet.py
b/python/pyarrow/tests/test_parquet.py
index 9004fc01a..d17d89e24 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -16,13 +16,17 @@
# under the License.
from os.path import join as pjoin
+
import datetime
+import decimal
import io
-import os
import json
+import os
+
import pytest
from pyarrow.compat import guid, u, BytesIO, unichar, frombytes
+from pyarrow.tests import util
from pyarrow.filesystem import LocalFileSystem
import pyarrow as pa
from .pandas_examples import dataframe_with_arrays, dataframe_with_lists
@@ -1564,3 +1568,41 @@ def
test_backwards_compatible_index_multi_level_some_named():
t = _read_table(path)
result = t.to_pandas()
tm.assert_frame_equal(result, expected)
+
+
[email protected]('precision', range(1, 39))
+def test_decimal_roundtrip(tmpdir, precision):
+ num_values = 10
+
+ columns = {}
+
+ for scale in range(0, precision + 1):
+ with util.random_seed(0):
+ random_decimal_values = [
+ util.randdecimal(precision, scale) for _ in range(num_values)
+ ]
+ column_name = 'dec_precision_{:d}_scale_{:d}'.format(precision, scale)
+ columns[column_name] = random_decimal_values
+
+ expected = pd.DataFrame(columns)
+ filename = tmpdir.join('decimals.parquet')
+ string_filename = str(filename)
+ t = pa.Table.from_pandas(expected)
+ _write_table(t, string_filename)
+ result_table = _read_table(string_filename)
+ result = result_table.to_pandas()
+ tm.assert_frame_equal(result, expected)
+
+
[email protected](
+ raises=pa.ArrowException, reason='Parquet does not support negative scale'
+)
+def test_decimal_roundtrip_negative_scale(tmpdir):
+ expected = pd.DataFrame({'decimal_num': [decimal.Decimal('1.23E4')]})
+ filename = tmpdir.join('decimals.parquet')
+ string_filename = str(filename)
+ t = pa.Table.from_pandas(expected)
+ _write_table(t, string_filename)
+ result_table = _read_table(string_filename)
+ result = result_table.to_pandas()
+ tm.assert_frame_equal(result, expected)
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
new file mode 100644
index 000000000..a3ba9000c
--- /dev/null
+++ b/python/pyarrow/tests/util.py
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Utility functions for testing
+"""
+
+import decimal
+import random
+import contextlib
+
+
+def randsign():
+ """Randomly choose either 1 or -1.
+
+ Returns
+ -------
+ sign : int
+ """
+ return random.choice((-1, 1))
+
+
[email protected]
+def random_seed(seed):
+ """Set the random seed inside of a context manager.
+
+ Parameters
+ ----------
+ seed : int
+ The seed to set
+
+ Notes
+ -----
+ This function is useful when you want to set a random seed but not affect
+ the random state of other functions using the random module.
+ """
+ original_state = random.getstate()
+ random.seed(seed)
+ try:
+ yield
+ finally:
+ random.setstate(original_state)
+
+
+def randdecimal(precision, scale):
+ """Generate a random decimal value with specified precision and scale.
+
+ Parameters
+ ----------
+ precision : int
+ The maximum number of digits to generate. Must be an integer between 1
+ and 38 inclusive.
+ scale : int
+ The maximum number of digits following the decimal point. Must be an
+ integer greater than or equal to 0.
+
+ Returns
+ -------
+ decimal_value : decimal.Decimal
+ A random decimal.Decimal object with the specifed precision and scale.
+ """
+ assert 1 <= precision <= 38, 'precision must be between 1 and 38 inclusive'
+ if scale < 0:
+ raise ValueError(
+ 'randdecimal does not yet support generating decimals with '
+ 'negative scale'
+ )
+ max_whole_value = 10 ** (precision - scale) - 1
+ whole = random.randint(-max_whole_value, max_whole_value)
+
+ if not scale:
+ return decimal.Decimal(whole)
+
+ max_fractional_value = 10 ** scale - 1
+ fractional = random.randint(0, max_fractional_value)
+
+ return decimal.Decimal(
+ '{}.{}'.format(whole, str(fractional).rjust(scale, '0'))
+ )
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [C++/Python] Add Decimal Parquet Read/Write Tests
> -------------------------------------------------
>
> Key: ARROW-1839
> URL: https://issues.apache.org/jira/browse/ARROW-1839
> Project: Apache Arrow
> Issue Type: Test
> Components: C++, Python
> Affects Versions: 0.7.1
> Reporter: Phillip Cloud
> Assignee: Phillip Cloud
> Labels: pull-request-available
> Fix For: 0.8.0
>
>
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)