[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)

2015-07-09 Thread Merlijn van Deen (Code Review)
Merlijn van Deen has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/223869

Change subject: Don't ignore DoNotArchiveUntil timestamps
..

Don't ignore DoNotArchiveUntil timestamps

Don't ignore timestamps written in HTML comments, e.g. as used by
DoNotArchiveUntil.

See:
- https://commons.wikimedia.org/wiki/Template:DNAU
- https://en.wikipedia.org/wiki/Template:Do_not_archive_until

Analyze comments separately from rest of each line to avoid to skip
dates in comments, as the date matched by timestripper is the rightmost
one.

Bug: T102423
Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 72 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/69/223869/1

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 26b291f..a1d2c55 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1394,6 +1394,10 @@
 ]
 
 self.linkP = compileLinkR()
+self.comment_pattern = re.compile(r'!--(.*?)--')
+
+self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'],
+   self.site.siteinfo['timezone'])
 
 def findmarker(self, text, base=u'@@', delta='@'):
 Find a string which is not part of text.
@@ -1446,6 +1450,17 @@
 
 # match date fields
 dateDict = dict()
+# Analyze comments separately from rest of each line to avoid to skip
+# dates in comments, as the date matched by timestripper is the
+# rightmost one.
+most_recent = []
+for comment in self.comment_pattern.finditer(line):
+# Recursion levels can be maximum two. If a comment is found, it 
will
+# not for sure be found in the next level.
+# Nested cmments are excluded by design.
+timestamp = self.timestripper(comment.group(1))
+most_recent.append(timestamp)
+
 # Remove parts that are not supposed to contain the timestamp, in order
 # to reduce false positives.
 line = removeDisabledParts(line)
@@ -1481,12 +1496,17 @@
  % (v, k))
 
 # find timezone
-dateDict['tzinfo'] = 
tzoneFixedOffset(self.site.siteinfo['timeoffset'],
-  
self.site.siteinfo['timezone'])
+dateDict['tzinfo'] = self.tzinfo
 
 timestamp = datetime.datetime(**dateDict)
-
 else:
 timestamp = None
 
+most_recent.append(timestamp)
+
+try:
+timestamp = max(ts for ts in most_recent if ts is not None)
+except ValueError:
+timestamp = None
+
 return timestamp
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 80d70e0..a3f7e77 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -231,6 +231,55 @@
 self.assertEqual(self.ts.timestripper(txtNoMatch), None)
 
 
+class TestTimeStripperDoNotArchiveUntil(TestCase):
+
+Test cases for Do Not Archive Until templates.
+
+See https://commons.wikimedia.org/wiki/Template:DNAU and
+https://en.wikipedia.org/wiki/Template:Do_not_archive_until.
+
+
+family = 'wikisource'
+code = 'en'
+
+cached = True
+
+username = '[[User:DoNotArchiveUntil]]'
+date = '06:57 06 June 2015 (UTC)'
+user_and_date = username + ' ' + date
+tzone = tzoneFixedOffset(0, 'UTC')
+
+def test_timestripper_match(self):
+Test that dates in comments  are correctly recognised.
+ts = TimeStripper(self.get_site())
+
+txt_match = '!-- [[User:Do___ArchiveUntil]] ' + self.date + ' --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+txt_match = '!-- -- !-- ' + self.user_and_date + ' !-- --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+txt_match = '!-- ' + self.user_and_date + ' --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+def test_timestripper_match_only(self):
+Test that latest date is used instead of other dates.
+ts = TimeStripper(self.get_site())
+
+later_date = '10:57 06 June 2015 (UTC)'
+txt_match = '!-- -- ' + self.user_and_date + ' !-- --' + later_date
+res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+earlier_date = '02:57 06 June 2015 (UTC)'
+txt_match = '!-- ' + self.user_and_date + ' -- ' + earlier_date
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+

[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)

2015-07-09 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged.

Change subject: Don't ignore DoNotArchiveUntil timestamps
..


Don't ignore DoNotArchiveUntil timestamps

Don't ignore timestamps written in HTML comments, e.g. as used by
DoNotArchiveUntil.

See:
- https://commons.wikimedia.org/wiki/Template:DNAU
- https://en.wikipedia.org/wiki/Template:Do_not_archive_until

Analyze comments separately from rest of each line to avoid to skip
dates in comments, as the date matched by timestripper is the rightmost
one.

Bug: T102423
Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 72 insertions(+), 3 deletions(-)

Approvals:
  Merlijn van Deen: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 26b291f..a1d2c55 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1394,6 +1394,10 @@
 ]
 
 self.linkP = compileLinkR()
+self.comment_pattern = re.compile(r'!--(.*?)--')
+
+self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'],
+   self.site.siteinfo['timezone'])
 
 def findmarker(self, text, base=u'@@', delta='@'):
 Find a string which is not part of text.
@@ -1446,6 +1450,17 @@
 
 # match date fields
 dateDict = dict()
+# Analyze comments separately from rest of each line to avoid to skip
+# dates in comments, as the date matched by timestripper is the
+# rightmost one.
+most_recent = []
+for comment in self.comment_pattern.finditer(line):
+# Recursion levels can be maximum two. If a comment is found, it 
will
+# not for sure be found in the next level.
+# Nested cmments are excluded by design.
+timestamp = self.timestripper(comment.group(1))
+most_recent.append(timestamp)
+
 # Remove parts that are not supposed to contain the timestamp, in order
 # to reduce false positives.
 line = removeDisabledParts(line)
@@ -1481,12 +1496,17 @@
  % (v, k))
 
 # find timezone
-dateDict['tzinfo'] = 
tzoneFixedOffset(self.site.siteinfo['timeoffset'],
-  
self.site.siteinfo['timezone'])
+dateDict['tzinfo'] = self.tzinfo
 
 timestamp = datetime.datetime(**dateDict)
-
 else:
 timestamp = None
 
+most_recent.append(timestamp)
+
+try:
+timestamp = max(ts for ts in most_recent if ts is not None)
+except ValueError:
+timestamp = None
+
 return timestamp
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 80d70e0..a3f7e77 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -231,6 +231,55 @@
 self.assertEqual(self.ts.timestripper(txtNoMatch), None)
 
 
+class TestTimeStripperDoNotArchiveUntil(TestCase):
+
+Test cases for Do Not Archive Until templates.
+
+See https://commons.wikimedia.org/wiki/Template:DNAU and
+https://en.wikipedia.org/wiki/Template:Do_not_archive_until.
+
+
+family = 'wikisource'
+code = 'en'
+
+cached = True
+
+username = '[[User:DoNotArchiveUntil]]'
+date = '06:57 06 June 2015 (UTC)'
+user_and_date = username + ' ' + date
+tzone = tzoneFixedOffset(0, 'UTC')
+
+def test_timestripper_match(self):
+Test that dates in comments  are correctly recognised.
+ts = TimeStripper(self.get_site())
+
+txt_match = '!-- [[User:Do___ArchiveUntil]] ' + self.date + ' --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+txt_match = '!-- -- !-- ' + self.user_and_date + ' !-- --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+txt_match = '!-- ' + self.user_and_date + ' --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+def test_timestripper_match_only(self):
+Test that latest date is used instead of other dates.
+ts = TimeStripper(self.get_site())
+
+later_date = '10:57 06 June 2015 (UTC)'
+txt_match = '!-- -- ' + self.user_and_date + ' !-- --' + later_date
+res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+earlier_date = '02:57 06 June 2015 (UTC)'
+txt_match = '!-- ' + self.user_and_date + ' -- ' + earlier_date
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+
 if __name__ == 

[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)

2015-06-19 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged.

Change subject: Don't ignore DoNotArchiveUntil timestamps
..


Don't ignore DoNotArchiveUntil timestamps

Don't ignore timestamps written in HTML comments, e.g. as used by
DoNotArchiveUntil.

See:
- https://commons.wikimedia.org/wiki/Template:DNAU
- https://en.wikipedia.org/wiki/Template:Do_not_archive_until

Analyze comments separately from rest of each line to avoid to skip
dates in comments, as the date matched by timestripper is the rightmost
one.

Bug: T102423
Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 72 insertions(+), 3 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 2fae512..de627c2 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1609,6 +1609,10 @@
 ]
 
 self.linkP = compileLinkR()
+self.comment_pattern = re.compile(r'!--(.*?)--')
+
+self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'],
+   self.site.siteinfo['timezone'])
 
 def findmarker(self, text, base=u'@@', delta='@'):
 Find a string which is not part of text.
@@ -1661,6 +1665,17 @@
 
 # match date fields
 dateDict = dict()
+# Analyze comments separately from rest of each line to avoid to skip
+# dates in comments, as the date matched by timestripper is the
+# rightmost one.
+most_recent = []
+for comment in self.comment_pattern.finditer(line):
+# Recursion levels can be maximum two. If a comment is found, it 
will
+# not for sure be found in the next level.
+# Nested cmments are excluded by design.
+timestamp = self.timestripper(comment.group(1))
+most_recent.append(timestamp)
+
 # Remove parts that are not supposed to contain the timestamp, in order
 # to reduce false positives.
 line = removeDisabledParts(line)
@@ -1696,12 +1711,17 @@
  % (v, k))
 
 # find timezone
-dateDict['tzinfo'] = 
tzoneFixedOffset(self.site.siteinfo['timeoffset'],
-  
self.site.siteinfo['timezone'])
+dateDict['tzinfo'] = self.tzinfo
 
 timestamp = datetime.datetime(**dateDict)
-
 else:
 timestamp = None
 
+most_recent.append(timestamp)
+
+try:
+timestamp = max(ts for ts in most_recent if ts is not None)
+except ValueError:
+timestamp = None
+
 return timestamp
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 80d70e0..a3f7e77 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -231,6 +231,55 @@
 self.assertEqual(self.ts.timestripper(txtNoMatch), None)
 
 
+class TestTimeStripperDoNotArchiveUntil(TestCase):
+
+Test cases for Do Not Archive Until templates.
+
+See https://commons.wikimedia.org/wiki/Template:DNAU and
+https://en.wikipedia.org/wiki/Template:Do_not_archive_until.
+
+
+family = 'wikisource'
+code = 'en'
+
+cached = True
+
+username = '[[User:DoNotArchiveUntil]]'
+date = '06:57 06 June 2015 (UTC)'
+user_and_date = username + ' ' + date
+tzone = tzoneFixedOffset(0, 'UTC')
+
+def test_timestripper_match(self):
+Test that dates in comments  are correctly recognised.
+ts = TimeStripper(self.get_site())
+
+txt_match = '!-- [[User:Do___ArchiveUntil]] ' + self.date + ' --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+txt_match = '!-- -- !-- ' + self.user_and_date + ' !-- --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+txt_match = '!-- ' + self.user_and_date + ' --'
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+def test_timestripper_match_only(self):
+Test that latest date is used instead of other dates.
+ts = TimeStripper(self.get_site())
+
+later_date = '10:57 06 June 2015 (UTC)'
+txt_match = '!-- -- ' + self.user_and_date + ' !-- --' + later_date
+res = datetime.datetime(2015, 6, 6, 10, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+earlier_date = '02:57 06 June 2015 (UTC)'
+txt_match = '!-- ' + self.user_and_date + ' -- ' + earlier_date
+res = datetime.datetime(2015, 6, 6, 6, 57, tzinfo=self.tzone)
+self.assertEqual(ts.timestripper(txt_match), res)
+
+
 if __name__ == 

[MediaWiki-commits] [Gerrit] Don't ignore DoNotArchiveUntil timestamps - change (pywikibot/core)

2015-06-15 Thread Mpaa (Code Review)
Mpaa has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/218436

Change subject: Don't ignore DoNotArchiveUntil timestamps
..

Don't ignore DoNotArchiveUntil timestamps

Don't ignore DoNotArchiveUntil timestamps written in HTML comments.
See:
- https://commons.wikimedia.org/wiki/Template:DNAU
- https://en.wikipedia.org/wiki/Template:Do_not_archive_until

Bug: T102423
Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 33 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/36/218436/1

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 2fae512..0572538 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1609,6 +1609,7 @@
 ]
 
 self.linkP = compileLinkR()
+self.commentP = 
re.compile(r'!--.*?\[\[User:DoNotArchiveUntil\]\].*--')
 
 def findmarker(self, text, base=u'@@', delta='@'):
 Find a string which is not part of text.
@@ -1663,7 +1664,11 @@
 dateDict = dict()
 # Remove parts that are not supposed to contain the timestamp, in order
 # to reduce false positives.
-line = removeDisabledParts(line)
+do_not_archive_until = self.commentP.search(line)
+if do_not_archive_until:
+line = removeDisabledParts(line, include=['comments'])
+else:
+line = removeDisabledParts(line)
 line = self.linkP.sub('', line)  # remove external links
 
 line = self.fix_digits(line)
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 80d70e0..0df5c4f 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -231,6 +231,33 @@
 self.assertEqual(self.ts.timestripper(txtNoMatch), None)
 
 
+class TestTimeStripperDoNotArchiveUntil(TestCase):
+
+Test cases for Do Not Archive Until templates.
+
+See https://commons.wikimedia.org/wiki/Template:DNAU and
+https://en.wikipedia.org/wiki/Template:Do_not_archive_until.
+
+
+family = 'wikisource'
+code = 'en'
+
+cached = True
+
+def test_timestripper_match(self):
+Test that DNAU convention is correctly recognised.
+self.ts = TimeStripper(self.get_site())
+
+tzone = tzoneFixedOffset(0, 'UTC')
+
+txtNoMatch = u'!-- [[User:Do___ArchiveUntil]] 06:57 06 June 2015 
(UTC) --'
+self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+txtMatch = u'!-- [[User:DoNotArchiveUntil]] 06:57 06 June 2015 (UTC) 
--'
+res = datetime.datetime(2015, 6, 6, 06, 57, tzinfo=tzone)
+self.assertEqual(self.ts.timestripper(txtMatch), res)
+
+
 if __name__ == '__main__':
 try:
 unittest.main()

-- 
To view, visit https://gerrit.wikimedia.org/r/218436
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I079d9f6b636ac0a145dd04a3190a65c61b9d1b31
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa mpaa.w...@gmail.com

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits