paleolimbot commented on code in PR #417:
URL: https://github.com/apache/arrow-nanoarrow/pull/417#discussion_r1559963929


##########
python/src/nanoarrow/iterator.py:
##########
@@ -244,6 +244,126 @@ def _binary_iter(self, offset, length):
             for start, end in zip(starts, ends):
                 yield bytes(data[start:end])
 
+    def _date_iter(self, offset, length):
+        from datetime import date, timedelta
+
+        storage = self._primitive_iter(offset, length)
+        epoch = date(1970, 1, 1)
+
+        if self._schema_view.type_id == CArrowType.DATE32:
+            for item in storage:
+                if item is None:
+                    yield item
+                else:
+                    yield epoch + timedelta(item)
+        else:
+            for item in storage:
+                if item is None:
+                    yield item
+                else:
+                    yield epoch + timedelta(milliseconds=item)
+
+    def _time_iter(self, offset, length):
+        from datetime import time
+
+        for item in self._iter_datetime_components(offset, length):
+            if item is None:
+                yield None
+            else:
+                days, hours, mins, secs, us = item
+                yield time(hours, mins, secs, us)
+
+    def _timestamp_iter(self, offset, length):
+        from datetime import datetime
+
+        fromtimestamp = datetime.fromtimestamp
+        storage = self._primitive_iter(offset, length)
+
+        unit = self._schema_view.time_unit
+        if unit == "s":
+            scale = 1
+        elif unit == "ms":
+            scale = 1000
+        elif unit == "us":
+            scale = 1_000_000
+        elif unit == "ns":
+            storage = _scale_and_round_maybe_none(storage, 0.001)
+            scale = 1_000_000
+
+        tz = self._schema_view.timezone
+        if tz:
+            tz = _get_tzinfo(tz)
+            tz_fromtimestamp = tz
+        else:
+            tz = None
+            tz_fromtimestamp = _get_tzinfo("UTC")
+
+        for parent in storage:
+            if parent is None:
+                yield None
+            else:
+                s = parent // scale
+                us = parent % scale * (1_000_000 // scale)
+                yield fromtimestamp(s, tz_fromtimestamp).replace(
+                    microsecond=us, tzinfo=tz
+                )
+
+    def _duration_iter(self, offset, length):
+        from datetime import timedelta
+
+        storage = self._primitive_iter(offset, length)
+
+        unit = self._schema_view.time_unit
+        if unit == "s":
+            to_us = 1_000_000

Review Comment:
   I think there is a lot that could be optimized here...this pass is mostly 
for completeness/correctness. Probably this is a job for C or C++ + and Python 
C API where we can do some of these things efficiently.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to