[
https://issues.apache.org/jira/browse/ARROW-1791?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16257932#comment-16257932
]
ASF GitHub Bot commented on ARROW-1791:
---------------------------------------
wesm closed pull request #1328: ARROW-1791: Limit generated data range to
physical limits for temporal types
URL: https://github.com/apache/arrow/pull/1328
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/integration/integration_test.py b/integration/integration_test.py
index 205176ecc..46d010608 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -231,10 +231,19 @@ class DateType(IntegerType):
DAY = 0
MILLISECOND = 1
+ # 1/1/1 to 12/31/9999
+ _ranges = {
+ DAY: [-719162, 2932896],
+ MILLISECOND: [-62135596800000, 253402214400000]
+ }
+
def __init__(self, name, unit, nullable=True):
bit_width = 32 if unit == self.DAY else 64
+
+ min_value, max_value = self._ranges[unit]
super(DateType, self).__init__(
- name, True, bit_width, nullable=nullable
+ name, True, bit_width, nullable=nullable,
+ min_value=min_value, max_value=max_value
)
self.unit = unit
@@ -262,10 +271,19 @@ class TimeType(IntegerType):
'ns': 64
}
+ _ranges = {
+ 's': [0, 86400],
+ 'ms': [0, 86400000],
+ 'us': [0, 86400000000],
+ 'ns': [0, 86400000000000]
+ }
+
def __init__(self, name, unit='s', nullable=True):
- super(TimeType, self).__init__(
- name, True, self.BIT_WIDTHS[unit], nullable=nullable
- )
+ min_val, max_val = self._ranges[unit]
+ super(TimeType, self).__init__(name, True, self.BIT_WIDTHS[unit],
+ nullable=nullable,
+ min_value=min_val,
+ max_value=max_val)
self.unit = unit
def _get_type(self):
@@ -278,8 +296,21 @@ def _get_type(self):
class TimestampType(IntegerType):
+ # 1/1/1 to 12/31/9999
+ _ranges = {
+ 's': [-62135596800, 253402214400],
+ 'ms': [-62135596800000, 253402214400000],
+ 'us': [-62135596800000000, 253402214400000000],
+
+ # Physical range for int64, ~584 years and change
+ 'ns': [np.iinfo('int64').min, np.iinfo('int64').max]
+ }
+
def __init__(self, name, unit='s', tz=None, nullable=True):
- super(TimestampType, self).__init__(name, True, 64, nullable=nullable)
+ min_val, max_val = self._ranges[unit]
+ super(TimestampType, self).__init__(name, True, 64, nullable=nullable,
+ min_value=min_val,
+ max_value=max_val)
self.unit = unit
self.tz = tz
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Integration tests generate date[DAY] values outside of reasonable range
> -----------------------------------------------------------------------
>
> Key: ARROW-1791
> URL: https://issues.apache.org/jira/browse/ARROW-1791
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Reporter: Wes McKinney
> Assignee: Wes McKinney
> Labels: pull-request-available
> Fix For: 0.8.0
>
>
> The integration tests are generating random int32 values, but for systems
> that use millisecond-based date objects (like JavaScript), converting to
> millisecond date will cause an overflow in a lot of cases. We should generate
> values that are within a reasonable year range so that overflows when
> converting to milliseconds do not occur
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)