[
https://issues.apache.org/jira/browse/ARROW-1999?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16342612#comment-16342612
]
ASF GitHub Bot commented on ARROW-1999:
---------------------------------------
xhochy closed pull request #1523: ARROW-1999: [Python] Type checking in
`from_numpy_dtype`
URL: https://github.com/apache/arrow/pull/1523
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/cpp/src/arrow/python/numpy_convert.cc
b/cpp/src/arrow/python/numpy_convert.cc
index 9ed2d73d4..124745ede 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -84,6 +84,9 @@ NumPyBuffer::~NumPyBuffer() { Py_XDECREF(arr_); }
break;
Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
+ if (!PyArray_DescrCheck(dtype)) {
+ return Status::TypeError("Did not pass numpy.dtype object");
+ }
PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
int type_num = cast_npy_type_compat(descr->type_num);
@@ -145,6 +148,9 @@ Status GetNumPyType(const DataType& type, int* type_num) {
}
Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
+ if (!PyArray_DescrCheck(dtype)) {
+ return Status::TypeError("Did not pass numpy.dtype object");
+ }
PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
int type_num = cast_npy_type_compat(descr->type_num);
diff --git a/python/pyarrow/tests/test_schema.py
b/python/pyarrow/tests/test_schema.py
index dbca139e2..90efe3f7e 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -154,8 +154,21 @@ def test_time_types():
pa.time64('s')
-def test_type_from_numpy_dtype_timestamps():
+def test_from_numpy_dtype():
cases = [
+ (np.dtype('bool'), pa.bool_()),
+ (np.dtype('int8'), pa.int8()),
+ (np.dtype('int16'), pa.int16()),
+ (np.dtype('int32'), pa.int32()),
+ (np.dtype('int64'), pa.int64()),
+ (np.dtype('uint8'), pa.uint8()),
+ (np.dtype('uint16'), pa.uint16()),
+ (np.dtype('uint32'), pa.uint32()),
+ (np.dtype('float16'), pa.float16()),
+ (np.dtype('float32'), pa.float32()),
+ (np.dtype('float64'), pa.float64()),
+ (np.dtype('U'), pa.string()),
+ (np.dtype('S'), pa.binary()),
(np.dtype('datetime64[s]'), pa.timestamp('s')),
(np.dtype('datetime64[ms]'), pa.timestamp('ms')),
(np.dtype('datetime64[us]'), pa.timestamp('us')),
@@ -166,6 +179,18 @@ def test_type_from_numpy_dtype_timestamps():
result = pa.from_numpy_dtype(dt)
assert result == pt
+ # Things convertible to numpy dtypes work
+ assert pa.from_numpy_dtype('U') == pa.string()
+ assert pa.from_numpy_dtype(np.unicode) == pa.string()
+ assert pa.from_numpy_dtype('int32') == pa.int32()
+ assert pa.from_numpy_dtype(bool) == pa.bool_()
+
+ with pytest.raises(NotImplementedError):
+ pa.from_numpy_dtype(np.dtype('O'))
+
+ with pytest.raises(TypeError):
+ pa.from_numpy_dtype('not_convertible_to_dtype')
+
def test_field():
t = pa.string()
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 1563b5785..a3cbeefb0 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1207,6 +1207,7 @@ def from_numpy_dtype(object dtype):
Convert NumPy dtype to pyarrow.DataType
"""
cdef shared_ptr[CDataType] c_type
+ dtype = np.dtype(dtype)
with nogil:
check_status(NumPyDtypeToArrow(dtype, &c_type))
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] from_numpy_dtype returns wrong types
> ---------------------------------------------
>
> Key: ARROW-1999
> URL: https://issues.apache.org/jira/browse/ARROW-1999
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Affects Versions: 0.8.0
> Environment: Windows 10 Build 15063.850
> Python: 3.6.3
> Numpy: 1.14.0
> Reporter: Victor Jimenez
> Assignee: Phillip Cloud
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> The following code shows multiple issues when using {{from_numpy_dtype}}:
> {code}
> import numpy as np
> import pyarrow as pa
> pa.from_numpy_dtype(np.unicode) # returns DataType(bool)
> pa.from_numpy_dtype(np.int) # returns DataType(bool)
> pa.from_numpy_dtype(np.int64) # Fails with the following message:
> #
> # ArrowNotImplementedError Traceback (most recent call last)
> # <ipython-input-14-ca0855a7dda8> in <module>()
> # ----> 1 pa.from_numpy_dtype(np.int64)
> # 2
> #
> # types.pxi in pyarrow.lib.from_numpy_dtype()
> #
> # error.pxi in pyarrow.lib.check_status()
> #
> # ArrowNotImplementedError: Unsupported numpy type 32760
> {code}
> Additionally, a potentially related issue is also seen when using
> {{to_pandas_dtype}}:
> {code}
> pa.DataType.to_pandas_dtype(pa.string()) # Returns numpy.object_
> # (shouldn't it be numpy.unicode?)
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)