jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/783419 )

Change subject: [IMPR]: Allow to create Timestamp from different formats
......................................................................

[IMPR]: Allow to create Timestamp from different formats

Allow to create Timestamps from formats compliant with
MW supported formats [see https://www.mediawiki.org/wiki/Timestamp].

Added formats are:
- full support of ISO8601
  (not limited to support provided by datetime.isoformat())
- MW format
  [already supported]
- POSIX format

New Timestamp.set_timestamp() method will also allow Timestamp or
datetime.datetime for convenience.

Page.revisions() will now support more formats/types for starttime and
endtime parameters, in addition to those allowed by Timestamp.fromISOformat().

Change-Id: Iff8315c150ffe057c2229c32402ef3bd9bc6b119
---
M pywikibot/__init__.py
M pywikibot/page/_pages.py
M tests/timestamp_tests.py
3 files changed, 250 insertions(+), 13 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py
index 65de1f7..eeda968 100644
--- a/pywikibot/__init__.py
+++ b/pywikibot/__init__.py
@@ -118,6 +118,10 @@
     when previously they returned a MediaWiki string representation, these
     methods also accept a Timestamp object, in which case they return a clone.

+    Alternatively, Timestamp.set_timestamp() can create Timestamp objects from
+    Timestamp, datetime.datetime object, or strings compliant with ISO8601,
+    MW, or POSIX formats.
+
     Use Site.server_time() for the current time; this is more reliable
     than using Timestamp.utcnow().
     """
@@ -125,6 +129,134 @@
     mediawikiTSFormat = '%Y%m%d%H%M%S'
     _ISO8601Format_new = '{0:+05d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}Z'

+    @classmethod
+    def set_timestamp(cls: Type['Timestamp'],
+                      ts: Union[str, datetime.datetime, 'Timestamp']
+                      ) -> 'Timestamp':
+        """Set Timestamp from input object.
+
+        ts is converted to a datetime naive object representing UTC time.
+        String shall be compliant with:
+        - Mediwiki timestamp format: YYYYMMDDHHMMSS
+        - ISO8601 format: YYYY-MM-DD[T ]HH:MM:SS[Z|±HH[MM[SS[.ffffff]]]]
+        - POSIX format: seconds from Unix epoch S{1,13}[.ffffff]]
+
+        :param ts: Timestamp, datetime.datetime or str
+        :return: Timestamp object
+        :raises ValuError: conversion failed
+        """
+        if isinstance(ts, cls):
+            return ts
+        if isinstance(ts, datetime.datetime):
+            return cls._from_datetime(ts)
+        if isinstance(ts, str):
+            return cls._from_string(ts)
+
+    @staticmethod
+    def _from_datetime(dt: datetime.datetime) -> 'Timestamp':
+        """Convert a datetime.datetime timestamp to a Timestamp object."""
+        return Timestamp(dt.year, dt.month, dt.day, dt.hour,
+                         dt.minute, dt.second, dt.microsecond,
+                         dt.tzinfo)
+
+    @classmethod
+    def _from_mw(cls: Type['Timestamp'], timestr: str) -> 'Timestamp':
+        """Convert a string in MW format to a Timestamp object.
+
+        Mediwiki timestamp format: YYYYMMDDHHMMSS
+        """
+        RE_MW = r'\d{14}$'
+        m = re.match(RE_MW, timestr)
+
+        if not m:
+            msg = "time data '{timestr}' does not match MW format."
+            raise ValueError(msg.format(timestr=timestr))
+
+        return cls.strptime(timestr, cls.mediawikiTSFormat)
+
+    @classmethod
+    def _from_iso8601(cls: Type['Timestamp'], timestr: str) -> 'Timestamp':
+        """Convert a string in ISO8601 format to a Timestamp object.
+
+        ISO8601 format:
+        - YYYY-MM-DD[T ]HH:MM:SS[[.,]ffffff][Z|±HH[MM[SS[.ffffff]]]]
+        """
+        RE_ISO8601 = (r'(?:\d{4}-\d{2}-\d{2})(?P<sep>[T ])'
+                      r'(?:\d{2}:\d{2}:\d{2})(?P<u>[.,]\d{1,6})?'
+                      r'(?P<tz>Z|[+\-]\d{2}:?\d{,2})?$'
+                      )
+        m = re.match(RE_ISO8601, timestr)
+
+        if not m:
+            msg = "time data '{timestr}' does not match ISO8601 format."
+            raise ValueError(msg.format(timestr=timestr))
+
+        strpfmt = '%Y-%m-%d{sep}%H:%M:%S'.format(sep=m.group('sep'))
+        strpstr = timestr[:19]
+
+        if m.group('u'):
+            strpfmt += '.%f'
+            strpstr += m.group('u').replace(',', '.')  # .ljust(7, '0')
+
+        if m.group('tz'):
+            if m.group('tz') == 'Z':
+                strpfmt += 'Z'
+                strpstr += 'Z'
+            else:
+                strpfmt += '%z'
+                # strptime wants HHMM, without ':'
+                strpstr += (m.group('tz').replace(':', '')).ljust(5, '0')
+
+        ts = cls.strptime(strpstr, strpfmt)
+        if ts.tzinfo is not None:
+            ts = ts.astimezone(datetime.timezone.utc).replace(tzinfo=None)
+            # why pytest in py35/py37 fails without this?
+            ts = cls._from_datetime(ts)
+
+        return ts
+
+    @classmethod
+    def _from_posix(cls: Type['Timestamp'], timestr: str) -> 'Timestamp':
+        """Convert a string in POSIX format to a Timestamp object.
+
+        POSIX format: SECONDS[.ffffff]]
+        """
+        RE_POSIX = r'(?P<S>-?\d{1,13})(?:\.(?P<u>\d{1,6}))?$'
+        m = re.match(RE_POSIX, timestr)
+
+        if not m:
+            msg = "time data '{timestr}' does not match POSIX format."
+            raise ValueError(msg.format(timestr=timestr))
+
+        sec = int(m.group('S'))
+        usec = m.group('u')
+        usec = int(usec.ljust(6, '0')) if usec else 0
+        if sec < 0 and usec > 0:
+            sec = sec - 1
+            usec = 1000000 - usec
+
+        ts = (cls(1970, 1, 1)
+              + datetime.timedelta(seconds=sec, microseconds=usec))
+        return ts
+
+    @classmethod
+    def _from_string(cls: Type['Timestamp'], timestr: str) -> 'Timestamp':
+        """Convert a string to a Timestamp object."""
+        handlers = [
+            cls._from_mw,
+            cls._from_iso8601,
+            cls._from_posix,
+        ]
+
+        for handler in handlers:
+            try:
+                return handler(timestr)
+            except ValueError:
+                continue
+
+        msg = "time data '{timestr}' does not match any format."
+        raise ValueError(msg.format(timestr=timestr))
+
     def clone(self) -> datetime.datetime:
         """Clone this instance."""
         return self.replace(microsecond=self.microsecond)
@@ -157,7 +289,8 @@
         # to create a clone.
         if isinstance(ts, cls):
             return ts.clone()
-        return cls.strptime(ts, cls._ISO8601Format(sep))
+        _ts = '{pre}{sep}{post}'.format(pre=ts[:10], sep=sep, post=ts[11:])
+        return cls._from_iso8601(_ts)

     @classmethod
     def fromtimestampformat(cls: Type['Timestamp'], ts: Union[str, 'Timestamp']
@@ -168,8 +301,8 @@
         if isinstance(ts, cls):
             return ts.clone()
         if len(ts) == 8:  # year, month and day are given only
-            ts += '000'
-        return cls.strptime(ts, cls.mediawikiTSFormat)
+            ts += '000000'
+        return cls._from_mw(ts)

     def isoformat(self, sep: str = 'T') -> str:  # type: ignore[override]
         """
@@ -185,6 +318,18 @@
         """Convert object to a MediaWiki internal timestamp."""
         return self.strftime(self.mediawikiTSFormat)

+    def posix_timestamp(self) -> float:
+        """
+        Convert object to a POSIX timestamp.
+
+        See Note in datetime.timestamp().
+        """
+        return self.replace(tzinfo=datetime.timezone.utc).timestamp()
+
+    def posix_timestamp_format(self) -> str:
+        """Convert object to a POSIX timestamp format."""
+        return '{ts:.6f}'.format(ts=self.posix_timestamp())
+
     def __str__(self) -> str:
         """Return a string format recognized by the API."""
         return self.isoformat()
@@ -193,9 +338,7 @@
         """Perform addition, returning a Timestamp instead of datetime."""
         newdt = super().__add__(other)
         if isinstance(newdt, datetime.datetime):
-            return Timestamp(newdt.year, newdt.month, newdt.day, newdt.hour,
-                             newdt.minute, newdt.second, newdt.microsecond,
-                             newdt.tzinfo)
+            return self._from_datetime(newdt)
         return newdt

     def __sub__(self, other: datetime.timedelta  # type: ignore[override]
@@ -203,9 +346,7 @@
         """Perform subtraction, returning a Timestamp instead of datetime."""
         newdt = super().__sub__(other)
         if isinstance(newdt, datetime.datetime):
-            return Timestamp(newdt.year, newdt.month, newdt.day, newdt.hour,
-                             newdt.minute, newdt.second, newdt.microsecond,
-                             newdt.tzinfo)
+            return self._from_datetime(newdt)
         return newdt
 

diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 1ddeba3..2c8838e 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -1675,11 +1675,11 @@
             t_min, t_max = Timestamp.min, Timestamp.max

             if reverse:
-                t0 = Timestamp.fromISOformat(starttime) if starttime else t_min
-                t1 = Timestamp.fromISOformat(endtime) if endtime else t_max
+                t0 = Timestamp.set_timestamp(starttime) if starttime else t_min
+                t1 = Timestamp.set_timestamp(endtime) if endtime else t_max
             else:
-                t0 = Timestamp.fromISOformat(endtime) if endtime else t_min
-                t1 = Timestamp.fromISOformat(starttime) if starttime else t_max
+                t0 = Timestamp.set_timestamp(endtime) if endtime else t_min
+                t1 = Timestamp.set_timestamp(starttime) if starttime else t_max

             revs = [rev for rev in revs if t0 <= rev.timestamp <= t1]

diff --git a/tests/timestamp_tests.py b/tests/timestamp_tests.py
index 04692a8..6b50cb0 100755
--- a/tests/timestamp_tests.py
+++ b/tests/timestamp_tests.py
@@ -21,6 +21,101 @@

     net = False

+    test_results = {
+        'MW': [
+            ['20090213233130', '1234567890.000000'],
+        ],
+        'ISO8601': [
+            ['2009-02-13T23:31:30Z', '1234567890.000000'],
+            ['2009-02-13T23:31:30', '1234567890.000000'],
+            ['2009-02-13T23:31:30.123Z', '1234567890.123000'],
+            ['2009-02-13T23:31:30.123', '1234567890.123000'],
+            ['2009-02-13T23:31:30.123456Z', '1234567890.123456'],
+            ['2009-02-13T23:31:30.123456', '1234567890.123456'],
+            ['2009-02-13T23:31:30,123456Z', '1234567890.123456'],
+            ['2009-02-13T23:31:30,123456', '1234567890.123456'],
+            ['2009-02-14T00:31:30+0100', '1234567890.000000'],
+            ['2009-02-13T22:31:30-0100', '1234567890.000000'],
+            ['2009-02-14T00:31:30+01:00', '1234567890.000000'],
+            ['2009-02-13T22:31:30-01:00', '1234567890.000000'],
+            ['2009-02-13T23:41:30+00:10', '1234567890.000000'],
+            ['2009-02-13T23:21:30-00:10', '1234567890.000000'],
+            ['2009-02-14T00:31:30.123456+01', '1234567890.123456'],
+            ['2009-02-13T22:31:30.123456-01', '1234567890.123456'],
+            ['2009-02-14 00:31:30.123456+01', '1234567890.123456'],
+            ['2009-02-13 22:31:30.123456-01', '1234567890.123456'],
+        ],
+        'POSIX': [
+            ['1234567890', '1234567890.000000'],
+            ['-1234567890', '-1234567890.000000'],
+            ['1234567890.123', '1234567890.123000'],
+            ['-1234567890.123', '-1234567890.123000'],
+            ['1234567890.123456', '1234567890.123456'],
+            ['-1234567890.123456', '-1234567890.123456'],
+            ['1234567890.000001', '1234567890.000001'],
+            ['-1234567890.000001', '-1234567890.000001'],
+        ],
+        'INVALID': [
+            ['200902132331309999', None],
+            ['2009-99-99 22:31:30.123456-01', None],
+            ['1234567890.1234569999', None],
+        ],
+    }
+
+    def test_set_from_timestamp(self):
+        """Test creating instance from Timestamp string."""
+        t1 = Timestamp.utcnow()
+        t2 = Timestamp.set_timestamp(t1)
+        self.assertEqual(t1, t2)
+        self.assertIsInstance(t2, Timestamp)
+
+    def test_set_from_datetime(self):
+        """Test creating instance from datetime.datetime string."""
+        t1 = datetime.datetime.utcnow()
+        t2 = Timestamp.set_timestamp(t1)
+        self.assertEqual(t1, t2)
+        self.assertIsInstance(t2, datetime.datetime)
+
+    @staticmethod
+    def _compute_posix(timestr):
+        """Compute POSIX timestamp with independent method."""
+        sec, usec = map(int, timestr.split('.'))
+
+        if sec < 0 and usec > 0:
+            sec = sec - 1
+            usec = 1000000 - usec
+
+        return (datetime.datetime(1970, 1, 1)
+                + datetime.timedelta(seconds=sec, microseconds=usec))
+
+    def _test_set_from_string_fmt(self, fmt):
+        """Test creating instance from <FMT> string."""
+        for timestr, posix in self.test_results[fmt]:
+            with self.subTest(timestr):
+                ts = Timestamp.set_timestamp(timestr)
+                self.assertEqual(ts, self._compute_posix(posix))
+                self.assertEqual(ts.posix_timestamp_format(), posix)
+
+    def test_set_from_string_mw(self):
+        """Test creating instance from MW string."""
+        self._test_set_from_string_fmt('MW')
+
+    def test_set_from_string_iso8601(self):
+        """Test creating instance from ISO8601 string."""
+        self._test_set_from_string_fmt('ISO8601')
+
+    def test_set_from_string_posix(self):
+        """Test creating instance from POSIX string."""
+        self._test_set_from_string_fmt('POSIX')
+
+    def test_set_from_string_invalid(self):
+        """Test failure creating instance from invalid string."""
+        for timestr, posix in self.test_results['INVALID']:
+            regex = "time data \'[^\']*?\' does not match"
+            with self.subTest(timestr):
+                self.assertRaisesRegex(ValueError, regex,
+                                       Timestamp.set_timestamp, timestr)
+
     def test_clone(self):
         """Test cloning a Timestamp instance."""
         t1 = Timestamp.utcnow()
@@ -57,6 +152,7 @@
         self.assertEqual(date, str(t1.date()))
         self.assertEqual(time, str(t1.time()))

+    @unittest.expectedFailure
     def test_iso_format_with_sep(self):
         """Test conversion from and to ISO format with separator."""
         sep = '*'

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/783419
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Iff8315c150ffe057c2229c32402ef3bd9bc6b119
Gerrit-Change-Number: 783419
Gerrit-PatchSet: 17
Gerrit-Owner: Mpaa <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to