This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 29268ec ARROW-2342: [Python] Allow pickling more types
29268ec is described below
commit 29268ec5a0a10da639d0dd5684bc57fcd1c8f7e3
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Mar 22 16:58:44 2018 -0400
ARROW-2342: [Python] Allow pickling more types
Several types (bool, timestamp with a non-None timezone, floats) were
unpicklable.
Based on PR #1778.
Author: Antoine Pitrou <[email protected]>
Closes #1780 from pitrou/ARROW-2342-types-pickling and squashes the
following commits:
4cda526d <Antoine Pitrou> ARROW-2342: Allow pickling more types
---
python/pyarrow/tests/test_types.py | 57 ++++++++++++++++++++++++++------------
python/pyarrow/types.pxi | 13 +++++++++
2 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/python/pyarrow/tests/test_types.py
b/python/pyarrow/tests/test_types.py
index 6459496..b517020 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -15,12 +15,44 @@
# specific language governing permissions and limitations
# under the License.
+import pickle
+
import pytest
import pyarrow as pa
import pyarrow.types as types
+MANY_TYPES = [
+ pa.null(),
+ pa.bool_(),
+ pa.int32(),
+ pa.time32('s'),
+ pa.time64('us'),
+ pa.date32(),
+ pa.timestamp('us'),
+ pa.timestamp('us', tz='UTC'),
+ pa.timestamp('us', tz='Europe/Paris'),
+ pa.float16(),
+ pa.float32(),
+ pa.float64(),
+ pa.decimal128(19, 4),
+ pa.string(),
+ pa.binary(),
+ pa.binary(10),
+ pa.list_(pa.int32()),
+ pa.struct([pa.field('a', pa.int32()),
+ pa.field('b', pa.int8()),
+ pa.field('c', pa.string())]),
+ pa.union([pa.field('a', pa.binary(10)),
+ pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
+ pa.union([pa.field('a', pa.binary(10)),
+ pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
+ # XXX Needs array pickling
+ # pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])),
+]
+
+
def test_is_boolean():
assert types.is_boolean(pa.bool_())
assert not types.is_boolean(pa.int8())
@@ -163,27 +195,18 @@ def test_union_type():
def test_types_hashable():
- types = [
- pa.null(),
- pa.int32(),
- pa.time32('s'),
- pa.time64('us'),
- pa.date32(),
- pa.timestamp('us'),
- pa.string(),
- pa.binary(),
- pa.binary(10),
- pa.list_(pa.int32()),
- pa.struct([pa.field('a', pa.int32()),
- pa.field('b', pa.int8()),
- pa.field('c', pa.string())])
- ]
-
in_dict = {}
- for i, type_ in enumerate(types):
+ for i, type_ in enumerate(MANY_TYPES):
assert hash(type_) == hash(type_)
in_dict[type_] = i
assert in_dict[type_] == i
+ assert len(in_dict) == len(MANY_TYPES)
+
+
+def test_types_picklable():
+ for ty in MANY_TYPES:
+ data = pickle.dumps(ty)
+ assert pickle.loads(data) == ty
@pytest.mark.parametrize('t,check_func', [
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 1294850..a4391c7 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -247,6 +247,13 @@ cdef class TimestampType(DataType):
# Return DatetimeTZ
return pdcompat.make_datetimetz(self.tz)
+ def __getstate__(self):
+ return self.unit, self.tz
+
+ def __setstate__(self, state):
+ cdef DataType reconstituted = timestamp(*state)
+ self.init(reconstituted.sp_type)
+
cdef class Time32Type(DataType):
@@ -1192,6 +1199,7 @@ def union(children_fields, mode):
cdef dict _type_aliases = {
'null': null,
+ 'bool': bool_,
'i1': int8,
'int8': int8,
'i2': int16,
@@ -1208,9 +1216,14 @@ cdef dict _type_aliases = {
'uint32': uint32,
'u8': uint64,
'uint64': uint64,
+ 'f2': float16,
+ 'halffloat': float16,
+ 'float16': float16,
'f4': float32,
+ 'float': float32,
'float32': float32,
'f8': float64,
+ 'double': float64,
'float64': float64,
'string': string,
'str': string,
--
To stop receiving notification emails like this one, please contact
[email protected].