[
https://issues.apache.org/jira/browse/ARROW-2007?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16341159#comment-16341159
]
ASF GitHub Bot commented on ARROW-2007:
---------------------------------------
wesm closed pull request #1509: ARROW-2007: [Python] Implement float32
conversions, use NumPy dtype when possible for inner arrays
URL: https://github.com/apache/arrow/pull/1509
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 62ebcf364..94eb16a5e 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -81,7 +81,7 @@ conda info -a
conda create -n arrow -q -y python=%PYTHON% ^
six pytest setuptools numpy pandas cython ^
- thrift-cpp
+ thrift-cpp=0.10.0
if "%JOB%" == "Toolchain" (
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index fd2c16446..2f164c416 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -47,7 +47,7 @@ if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
zlib \
cmake \
curl \
- thrift-cpp \
+ thrift-cpp=0.10.0 \
ninja
# HACK(wesm): We started experiencing OpenSSL failures when Miniconda was
diff --git a/cpp/src/arrow/python/builtin_convert.cc
b/cpp/src/arrow/python/builtin_convert.cc
index cd88d557d..bf13ed5af 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -586,6 +586,15 @@ class TimestampConverter
TimeUnit::type unit_;
};
+class Float32Converter : public TypedConverterVisitor<FloatBuilder,
Float32Converter> {
+ public:
+ Status AppendItem(const OwnedRef& item) {
+ float val = static_cast<float>(PyFloat_AsDouble(item.obj()));
+ RETURN_IF_PYERROR();
+ return typed_builder_->Append(val);
+ }
+};
+
class DoubleConverter : public TypedConverterVisitor<DoubleBuilder,
DoubleConverter> {
public:
Status AppendItem(const OwnedRef& item) {
@@ -740,6 +749,8 @@ std::shared_ptr<SeqConverter> GetConverter(const
std::shared_ptr<DataType>& type
case Type::TIMESTAMP:
return std::make_shared<TimestampConverter>(
static_cast<const TimestampType&>(*type).unit());
+ case Type::FLOAT:
+ return std::make_shared<Float32Converter>();
case Type::DOUBLE:
return std::make_shared<DoubleConverter>();
case Type::BINARY:
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc
b/cpp/src/arrow/python/numpy_to_arrow.cc
index c5c02e355..b5a75aeed 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1008,10 +1008,21 @@ Status NumPyConverter::ConvertObjectsInfer() {
return ConvertTimes();
} else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj()))
{
return ConvertDecimals();
- } else if (PyList_Check(obj) || PyArray_Check(obj)) {
+ } else if (PyList_Check(obj)) {
std::shared_ptr<DataType> inferred_type;
RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
return ConvertLists(inferred_type);
+ } else if (PyArray_Check(obj)) {
+ std::shared_ptr<DataType> inferred_type;
+ PyArray_Descr* dtype =
PyArray_DESCR(reinterpret_cast<PyArrayObject*>(obj));
+
+ if (dtype->type_num == NPY_OBJECT) {
+ RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
+ } else {
+ RETURN_NOT_OK(
+ NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype),
&inferred_type));
+ }
+ return ConvertLists(inferred_type);
} else {
const std::string supported_types =
"string, bool, float, int, date, time, decimal, list, array";
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index fa38c9257..aa64664f3 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -507,6 +507,23 @@ def test_array_from_numpy_datetimeD():
assert result.equals(expected)
+def test_array_from_py_float32():
+ data = [[1.2, 3.4], [9.0, 42.0]]
+
+ t = pa.float32()
+
+ arr1 = pa.array(data[0], type=t)
+ arr2 = pa.array(data, type=pa.list_(t))
+
+ expected1 = np.array(data[0], dtype=np.float32)
+ expected2 = pd.Series([np.array(data[0], dtype=np.float32),
+ np.array(data[1], dtype=np.float32)])
+
+ assert arr1.type == t
+ assert arr1.equals(pa.array(expected1))
+ assert arr2.equals(pa.array(expected2))
+
+
def test_array_from_numpy_ascii():
arr = np.array(['abcde', 'abc', ''], dtype='|S5')
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Sequence converter for float32 not implemented
> -------------------------------------------------------
>
> Key: ARROW-2007
> URL: https://issues.apache.org/jira/browse/ARROW-2007
> Project: Apache Arrow
> Issue Type: Improvement
> Components: Python
> Reporter: Wes McKinney
> Assignee: Wes McKinney
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> See bug report in [https://github.com/apache/arrow/issues/1431,] example
> {code:java}
> import pyarrow as pa
> l = [[1.2, 3.4], [9.0, 42.0]]
> pa.array(l, type=pa.list_(pa.float32())){code}
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)