date64 Array from iterable [arrow-nanoarrow]

via GitHub Fri, 31 May 2024 12:44:04 -0700


paleolimbot commented on code in PR #502:
URL: https://github.com/apache/arrow-nanoarrow/pull/502#discussion_r1622862449



##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -483,13 +483,13 @@ cdef int c_format_from_arrow_type(ArrowType type_id, int 
element_size_bits, size
     elif type_id == NANOARROW_TYPE_UINT16:
         format_const = "H"
         element_size_bits_calc = 16
-    elif type_id in (NANOARROW_TYPE_INT32, NANOARROW_TYPE_INTERVAL_MONTHS):
+    elif type_id in (NANOARROW_TYPE_INT32, NANOARROW_TYPE_INTERVAL_MONTHS, 
NANOARROW_TYPE_DATE32):

Review Comment:
   We should add a comment here to clarify what this function is doing, but I 
believe that `NANOARROW_TYPE_DATE32` does not *quite* belong here (arguably 
`NANOARROW_TYPE_INTERVAL_MONTHS` should not be here either, which has come up 
in previous reviews, and may change). See below for an alternative (I think 
adding a property to the schema view might be a better fit).



##########
python/tests/test_c_array.py:
##########
@@ -514,3 +516,36 @@ def test_c_array_from_buffers_validation():
             validation_level=validation_level,
         )
         assert c_array.length == 2
+
+def test_c_array_timestamp_seconds():
+    # Timestamp is a 64-bit signed integer representing 
+    # an elapsed time since a fixed epoch
+    d1 = int(round(datetime(1970, 1, 1).timestamp()))
+    d2 = int(round(datetime(1985, 12, 31).timestamp()))
+    d3 = int(round(datetime(2005, 3, 4).timestamp()))
+
+    c_array = na.c_array([d1, d2, d3], na.timestamp('s'))
+    c_array_from_c_array = na.c_array(c_array)
+    assert c_array_from_c_array.length == c_array.length
+    assert c_array_from_c_array.buffers == c_array.buffers
+    assert list(c_array.view().buffer(1)) == [28800, 504864000, 1109923200]
+
+def test_c_array_timestamp_milliseconds():    
+    d1 = int(round(datetime(1970, 1, 1).timestamp() * 1e3))
+    d2 = int(round(datetime(1985, 12, 31).timestamp() * 1e3))
+    d3 = int(round(datetime(2005, 3, 4).timestamp() * 1e3))
+    c_array = na.c_array([d1, d2, d3], na.timestamp('ms'))
+    c_array_from_c_array = na.c_array(c_array)
+    assert c_array_from_c_array.length == c_array.length
+    assert c_array_from_c_array.buffers == c_array.buffers
+    assert list(c_array.view().buffer(1)) == [28800e3, 504864000e3, 
1109923200e3]
+
+def test_c_array_timestamp_microseconds():    
+    d1 = int(round(datetime(1970, 1, 1).timestamp() * 1e6))
+    d2 = int(round(datetime(1985, 12, 31).timestamp() * 1e6))
+    d3 = int(round(datetime(2005, 3, 4).timestamp() * 1e6))
+    c_array = na.c_array([d1, d2, d3], na.timestamp('us'))
+    c_array_from_c_array = na.c_array(c_array)
+    assert c_array_from_c_array.length == c_array.length
+    assert c_array_from_c_array.buffers == c_array.buffers
+    assert list(c_array.view().buffer(1)) == [28800e6, 504864000e6, 
1109923200e6]

Review Comment:
   Are nanoseconds supported by this code path?



##########
python/tests/test_c_array.py:
##########
@@ -514,3 +516,36 @@ def test_c_array_from_buffers_validation():
             validation_level=validation_level,
         )
         assert c_array.length == 2
+
+def test_c_array_timestamp_seconds():
+    # Timestamp is a 64-bit signed integer representing 
+    # an elapsed time since a fixed epoch
+    d1 = int(round(datetime(1970, 1, 1).timestamp()))
+    d2 = int(round(datetime(1985, 12, 31).timestamp()))
+    d3 = int(round(datetime(2005, 3, 4).timestamp()))
+
+    c_array = na.c_array([d1, d2, d3], na.timestamp('s'))
+    c_array_from_c_array = na.c_array(c_array)
+    assert c_array_from_c_array.length == c_array.length
+    assert c_array_from_c_array.buffers == c_array.buffers
+    assert list(c_array.view().buffer(1)) == [28800, 504864000, 1109923200]
+
+def test_c_array_timestamp_milliseconds():    
+    d1 = int(round(datetime(1970, 1, 1).timestamp() * 1e3))
+    d2 = int(round(datetime(1985, 12, 31).timestamp() * 1e3))
+    d3 = int(round(datetime(2005, 3, 4).timestamp() * 1e3))
+    c_array = na.c_array([d1, d2, d3], na.timestamp('ms'))
+    c_array_from_c_array = na.c_array(c_array)
+    assert c_array_from_c_array.length == c_array.length
+    assert c_array_from_c_array.buffers == c_array.buffers
+    assert list(c_array.view().buffer(1)) == [28800e3, 504864000e3, 
1109923200e3]

Review Comment:
   Are the numbers here `[d1, d2, d3]`? (and in the test below)



##########
python/src/nanoarrow/c_array.py:
##########
@@ -549,6 +549,9 @@ def _append_using_buffer_builder(self, obj: Iterable) -> 
None:
     CArrowType.UINT64: "_append_using_array",
     CArrowType.FLOAT: "_append_using_array",
     CArrowType.DOUBLE: "_append_using_array",
+    CArrowType.TIMESTAMP: "_append_using_array",
+    CArrowType.DATE32: "_append_using_array",
+    CArrowType.DATE64: "_append_using_array"

Review Comment:
   Does `CArrowType.DURATION` belong in this list?



##########
python/tests/test_array.py:
##########
@@ -354,3 +354,86 @@ def test_array_inspect(capsys):
     array.inspect()
     captured = capsys.readouterr()
     assert captured.out.startswith("<ArrowArray struct<col0: int32")
+
+
+def test_array_using_struct(capsys):

Review Comment:
   I am a little unclear what this is trying to test...would it be sufficient 
to test `na.c_array([d1, d2], na.timestamp('ms'))` and its corresponding 
`to_pysequence()`? (Same question for the tests below)



##########
python/src/nanoarrow/_lib.pyx:
##########
@@ -1069,6 +1069,12 @@ cdef class CSchemaView:
         NANOARROW_TYPE_SPARSE_UNION
     )
 
+    _buffer_protocol_supported_types = (
+        NANOARROW_TYPE_DATE32,
+        NANOARROW_TYPE_DATE64,
+        NANOARROW_TYPE_TIMESTAMP
+    )

Review Comment:
   How about adding a `CSchemaView.storage_buffer_format`? I think these are 
perhaps more accurately types whose storage can be represented by the Python 
buffer protocol (however, the types themselves do not have a dedicated buffer 
protocol format string).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(python) : Added support to create timestamp/date32/date64 Array from iterable [arrow-nanoarrow]

Reply via email to