jenkins-bot has submitted this change and it was merged.

Change subject: Bug 69551-add Vietnamese to timestripper_tests.py plus bug 
fixing
......................................................................


Bug 69551-add Vietnamese to timestripper_tests.py plus bug fixing

Added Vietnamese language to timestripper_tests.py.

Fixed also a bug in TimeStripper.last_match_and_replace() found
debugging Vietnamese.

Month was not replaced correctly.
Inserted proper logic to handle when months are expressed as digits or
in words.

Updated timestripper_tests.py to cover the logic in
last_match_and_replace(), using sites with the two formats.

Change-Id: I148663b7c694c499c194e993526ea8f928e4c174
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 130 insertions(+), 13 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index c6997a1..03c2d0a 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1201,9 +1201,11 @@
         # work around for cs wiki: if month are in digits, we assume
         # that format is dd. mm. (with dot and spaces optional)
         if any(_.isdigit() for _ in self.origNames2monthNum):
+            self.is_digit_month = True
             monthR = r'(?P<month>(%s)|\d{1,2}\.?)' % u'|'.join(escaped_months)
             dayR = r'(?P<day>(3[01]|[12]\d|0?[1-9]))\.?\s*[01]?\d\.?'
         else:
+            self.is_digit_month = False
             monthR = r'(?P<month>(%s))' % u'|'.join(escaped_months)
             dayR = r'(?P<day>(3[01]|[12]\d|0?[1-9]))\.?'
 
@@ -1244,8 +1246,11 @@
             # replace all matches but the last two
             # (i.e. allow to search for dd. mm.)
             if pat == self.pmonthR:
-                if cnt > 2:
-                    txt = pat.sub(marker, txt, cnt - 2)
+                if self.is_digit_month:
+                    if cnt > 2:
+                        txt = pat.sub(marker, txt, cnt - 2)
+                else:
+                    txt = pat.sub(marker, txt)
             else:
                 txt = pat.sub(marker, txt)
             return (txt, m.groupdict())
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 1b3f171..9d12b04 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -16,13 +16,13 @@
 from pywikibot.textlib import TimeStripper, tzoneFixedOffset
 
 
-class TestTimeStripper(PywikibotTestCase):
-    """Test cases for Link objects"""
+class TestTimeStripperWithNoDigitsAsMonths(PywikibotTestCase):
+    """Test cases for TimeStripper methods"""
 
     def setUp(self):
         site = pywikibot.Site('fr', 'wikipedia')
         self.ts = TimeStripper(site)
-        super(TestTimeStripper, self).setUp()
+        super(TestTimeStripperWithNoDigitsAsMonths, self).setUp()
 
     def test_findmarker(self):
         """Test that string which is not part of text is found"""
@@ -32,15 +32,43 @@
                          '@@@@@@')
 
     def test_last_match_and_replace(self):
-        """Test that pattern matches the righmost item"""
+        """Test that pattern matches and removes items correctly."""
 
-        txtWithMatch = u'this string has one 1998, 1999 and 3000 in it'
+        txtWithOneMatch = u'this string has 3000, 1999 and 3000 in it'
+        txtWithTwoMatch = u'this string has 1998, 1999 and 3000 in it'
         txtWithNoMatch = u'this string has no match'
         pat = self.ts.pyearR
 
-        self.assertEqual(self.ts.last_match_and_replace(txtWithMatch, pat),
-                         (u'this string has one @@, @@ and 3000 in it',
+        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
+                         (u'this string has 3000, @@ and 3000 in it',
                           {'year': u'1999'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
+                         (u'this string has @@, @@ and 3000 in it',
+                          {'year': u'1999'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
+                         (txtWithNoMatch,
+                          None)
+                         )
+
+        txtWithOneMatch = u'this string has XXX, YYY and fév in it'
+        txtWithTwoMatch = u'this string has XXX, mars and fév in it'
+        txtWithThreeMatch = u'this string has avr, mars and fév in it'
+        txtWithNoMatch = u'this string has no match'
+        pat = self.ts.pmonthR
+
+        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
+                         (u'this string has XXX, YYY and @@ in it',
+                          {'month': u'fév'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
+                         (u'this string has XXX, @@ and @@ in it',
+                          {'month': u'fév'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, 
pat),
+                         (u'this string has @@, @@ and @@ in it',
+                          {'month': u'fév'})
                          )
         self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
                          (txtWithNoMatch,
@@ -62,8 +90,55 @@
         self.assertEqual(self.ts.timestripper(txtNoMatch), None)
 
 
+class TestTimeStripperWithDigitsAsMonths(PywikibotTestCase):
+    """Test cases for TimeStripper methods"""
+
+    def setUp(self):
+        site = pywikibot.Site('cs', 'wikipedia')
+        self.ts = TimeStripper(site)
+        super(TestTimeStripperWithDigitsAsMonths, self).setUp()
+
+    def test_last_match_and_replace(self):
+        """Test that pattern matches and removes items correctly."""
+
+        txtWithOneMatch = u'this string has XX. YY. 12. in it'
+        txtWithTwoMatch = u'this string has XX. 1. 12. in it'
+        txtWithThreeMatch = u'this string has 1. 1. 12. in it'
+        txtWithNoMatch = u'this string has no match'
+        pat = self.ts.pmonthR
+
+        self.assertEqual(self.ts.last_match_and_replace(txtWithOneMatch, pat),
+                         (u'this string has XX. YY. 12. in it',
+                          {'month': u'12.'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithTwoMatch, pat),
+                         (u'this string has XX. 1. 12. in it',
+                          {'month': u'12.'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithThreeMatch, 
pat),
+                         (u'this string has @@ 1. 12. in it',
+                          {'month': u'12.'})
+                         )
+        self.assertEqual(self.ts.last_match_and_replace(txtWithNoMatch, pat),
+                         (txtWithNoMatch,
+                          None)
+                         )
+
+    def test_timestripper(self):
+        txtMatch = u'3. 2. 2010, 19:48 (UTC) 7. 2. 2010 19:48 (UTC)'
+        txtNoMatch = u'3 March 2010 19:48 (UTC) 7 March 2010 19:48 (UTC)'
+
+        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
+                                 self.ts.site.siteinfo['timezone'])
+
+        res = datetime.datetime(2010, 2, 7, 19, 48, tzinfo=tzone)
+
+        self.assertEqual(self.ts.timestripper(txtMatch), res)
+        self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+
 class TestEnglishTimeStripper(PywikibotTestCase):
-    """Test cases for Link objects"""
+    """Test cases for English language"""
 
     def setUp(self):
         site = pywikibot.Site('en', 'wikipedia')
@@ -86,7 +161,7 @@
 
 
 class TestCzechTimeStripper(PywikibotTestCase):
-    """Test cases for Link objects"""
+    """Test cases for Czech language"""
 
     def setUp(self):
         site = pywikibot.Site('cs', 'wikipedia')
@@ -109,7 +184,7 @@
 
 
 class TestPortugueseTimeStripper(PywikibotTestCase):
-    """Test cases for Link objects"""
+    """Test cases for Portuguese language"""
 
     def setUp(self):
         site = pywikibot.Site('pt', 'wikipedia')
@@ -132,7 +207,7 @@
 
 
 class TestNorwegianTimeStripper(PywikibotTestCase):
-    """Test cases for Link objects"""
+    """Test cases for Norwegian language"""
 
     def setUp(self):
         site = pywikibot.Site('no', 'wikipedia')
@@ -154,6 +229,43 @@
         self.assertEqual(self.ts.timestripper(txtNoMatch), None)
 
 
+class TestVietnameseTimeStripper(PywikibotTestCase):
+    """Test cases for Vietnamese language"""
+
+    def setUp(self):
+        site = pywikibot.Site('vi', 'wikipedia')
+        self.ts = TimeStripper(site)
+        super(TestVietnameseTimeStripper, self).setUp()
+
+    def test_timestripper_01(self):
+        """Test that correct date is matched"""
+
+        txtMatch = u'16:41, ngày 15 tháng 9 năm 2008 (UTC) 16:41, ngày 12 
tháng 9 năm 2008 (UTC)'
+        txtNoMatch = u'16:41, ngày 15 March 9 năm 2008 (UTC) 16:41, ngày 12 
March 9 năm 2008 (UTC)'
+
+        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
+                                 self.ts.site.siteinfo['timezone'])
+
+        res = datetime.datetime(2008, 9, 12, 16, 41, tzinfo=tzone)
+
+        self.assertEqual(self.ts.timestripper(txtMatch), res)
+        self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+    def test_timestripper_02(self):
+        """Test that correct date is matched"""
+
+        txtMatch = u'21:18, ngày 13 tháng 8 năm 2014 (UTC) 21:18, ngày 14 
tháng 8 năm 2014 (UTC)'
+        txtNoMatch = u'21:18, ngày 13 March 8 năm 2014 (UTC) 21:18, ngày 14 
March 8 năm 2014 (UTC)'
+
+        tzone = tzoneFixedOffset(self.ts.site.siteinfo['timeoffset'],
+                                 self.ts.site.siteinfo['timezone'])
+
+        res = datetime.datetime(2014, 8, 14, 21, 18, tzinfo=tzone)
+
+        self.assertEqual(self.ts.timestripper(txtMatch), res)
+        self.assertEqual(self.ts.timestripper(txtNoMatch), None)
+
+
 if __name__ == '__main__':
     try:
         unittest.main()

-- 
To view, visit https://gerrit.wikimedia.org/r/154209
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I148663b7c694c499c194e993526ea8f928e4c174
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to