Repository: arrow Updated Branches: refs/heads/master ced9d766d -> 2406d4eed
ARROW-552: [Python] Implement getitem for DictionaryArray by returning a value from the dictionary Author: Miki Tebeka <[email protected]> Author: Wes McKinney <[email protected]> Closes #414 from wesm/ARROW-552 and squashes the following commits: 8a039b5 [Wes McKinney] Implement DictionaryArray.getitem by indexing into the dictionary. Add indices and dictionary properties e700b45 [Miki Tebeka] ARROW-552: [Python] Add scalar value support for Dictionary type (WIP) Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2406d4ee Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2406d4ee Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2406d4ee Branch: refs/heads/master Commit: 2406d4eed9af41b1ef60c53834aced036a933327 Parents: ced9d76 Author: Miki Tebeka <[email protected]> Authored: Wed Mar 22 14:06:42 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Wed Mar 22 14:06:42 2017 -0400 ---------------------------------------------------------------------- python/pyarrow/array.pxd | 4 +++- python/pyarrow/array.pyx | 25 +++++++++++++++++++++++++ python/pyarrow/scalar.pyx | 2 +- python/pyarrow/tests/test_scalars.py | 13 +++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/array.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd index 56bb53d..c3e7997 100644 --- a/python/pyarrow/array.pxd +++ b/python/pyarrow/array.pxd @@ -109,7 +109,9 @@ cdef class BinaryArray(Array): cdef class DictionaryArray(Array): - pass + cdef: + object _indices, _dictionary + cdef wrap_array_output(PyObject* output) http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/array.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 6afeaa0..795076c 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -406,6 +406,31 @@ cdef class BinaryArray(Array): cdef class DictionaryArray(Array): + cdef getitem(self, int64_t i): + cdef Array dictionary = self.dictionary + cdef int64_t index = self.indices[i].as_py() + return scalar.box_scalar(dictionary.type, dictionary.sp_array, index) + + property dictionary: + + def __get__(self): + cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap) + + if self._dictionary is None: + self._dictionary = box_array(darr.dictionary()) + + return self._dictionary + + property indices: + + def __get__(self): + cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap) + + if self._indices is None: + self._indices = box_array(darr.indices()) + + return self._indices + @staticmethod def from_arrays(indices, dictionary, mask=None, MemoryPool memory_pool=None): http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/scalar.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 8c88f90..1b7e67b 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -241,7 +241,7 @@ cdef dict _scalar_classes = { Type_DOUBLE: DoubleValue, Type_LIST: ListValue, Type_BINARY: BinaryValue, - Type_STRING: StringValue, + Type_STRING: StringValue } cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array, http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/tests/test_scalars.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index ef600a0..d56481c 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -16,6 +16,8 @@ # specific language governing permissions and limitations # under the License. +import pandas as pd + from pyarrow.compat import unittest, u, unicode_type import pyarrow as A @@ -100,3 +102,14 @@ class TestScalars(unittest.TestCase): v = arr[3] assert len(v) == 0 + + def test_dictionary(self): + colors = ['red', 'green', 'blue'] + values = pd.Series(colors * 4) + + categorical = pd.Categorical(values, categories=colors) + + v = A.DictionaryArray.from_arrays(categorical.codes, + categorical.categories) + for i, c in enumerate(values): + assert v[i].as_py() == c
