http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11324
Revision: 11324
Author: xqt
Date: 2013-04-03 16:54:38 +0000 (Wed, 03 Apr 2013)
Log Message:
-----------
Remove code duplication for Page.templatesWithParams() and call
textlib.extract_templates_and_params() method.
Deprecated warning for thistxt parameter: textlib method should be used.
Bugfix for templatesWithParams parser bug #3609794 and bug #3609685:
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2013-04-03 16:18:30 UTC (rev 11323)
+++ trunk/pywikipedia/wikipedia.py 2013-04-03 16:54:38 UTC (rev 11324)
@@ -2838,118 +2838,12 @@
thistxt = self.get(get_redirect=get_redirect)
except (IsRedirectPage, NoPage):
return []
+ else:
+ output(u"""\
+thistxt argument of templatesWithParams is deprecated. Please use textlib
method
+extract_templates_and_params() instead.""")
+ return extract_templates_and_params(thistxt)
- # remove commented-out stuff etc.
- thistxt = removeDisabledParts(thistxt)
-
- # marker for inside templates or parameters
- marker = findmarker(thistxt, u'@@', u'@')
-
- # marker for links
- marker2 = findmarker(thistxt, u'##', u'#')
-
- # marker for math
- marker3 = findmarker(thistxt, u'%%', u'%')
-
- result = []
- inside = {}
- count = 0
- Rtemplate = re.compile(
- ur'{{(msg:)?(?P<name>[^{\|]+?)(\|(?P<params>[^{]*?))?}}')
- Rlink = re.compile(ur'\[\[[^\]]+\]\]')
- Rmath = re.compile(ur'<math>[^<]+</math>')
- Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker))
- Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2))
- Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3))
-
- # Replace math with markers
- maths = {}
- count = 0
- for m in Rmath.finditer(thistxt):
- count += 1
- text = m.group()
- thistxt = thistxt.replace(text, '%s%d%s' % (marker3, count,
marker3))
- maths[count] = text
-
- while Rtemplate.search(thistxt) is not None:
- for m in Rtemplate.finditer(thistxt):
- # Make sure it is not detected again
- count += 1
- text = m.group()
- thistxt = thistxt.replace(text,
- '%s%d%s' % (marker, count, marker))
- # Make sure stored templates don't contain markers
- for m2 in Rmarker.finditer(text):
- text = text.replace(m2.group(), inside[int(m2.group(1))])
- for m2 in Rmarker3.finditer(text):
- text = text.replace(m2.group(), maths[int(m2.group(1))])
- inside[count] = text
-
- # Name
- name = m.group('name').strip()
- m2 = Rmarker.search(name) or Rmath.search(name)
- if m2 is not None:
- # Doesn't detect templates whose name changes,
- # or templates whose name contains math tags
- continue
- if self.site().isInterwikiLink(name):
- continue
-
- # {{#if: }}
- if name.startswith('#'):
- continue
- # {{DEFAULTSORT:...}}
- defaultKeys = self.site().versionnumber() > 13 and \
- self.site().getmagicwords('defaultsort')
- # It seems some wikis does not have this magic key
- if defaultKeys:
- found = False
- for key in defaultKeys:
- if name.startswith(key):
- found = True
- break
- if found: continue
-
- try:
- name = Page(self.site(), name).title()
- except InvalidTitle:
- if name:
- output(
- u"Page %s contains invalid template name {{%s}}."
- % (self.title(), name.strip()))
- continue
- # Parameters
- paramString = m.group('params')
- params = []
- if paramString:
- # Replace links to markers
- links = {}
- count2 = 0
- for m2 in Rlink.finditer(paramString):
- count2 += 1
- text = m2.group()
- paramString = paramString.replace(text,
- '%s%d%s' % (marker2, count2, marker2))
- links[count2] = text
- # Parse string
- markedParams = paramString.split('|')
- # Replace markers
- for param in markedParams:
- for m2 in Rmarker.finditer(param):
- param = param.replace(m2.group(),
- inside[int(m2.group(1))])
- for m2 in Rmarker2.finditer(param):
- param = param.replace(m2.group(),
- links[int(m2.group(1))])
- for m2 in Rmarker3.finditer(param):
- param = param.replace(m2.group(),
- maths[int(m2.group(1))])
- params.append(param)
-
- # Add it to the result
- result.append((name, params))
- return result
-
def getRedirectTarget(self):
"""Return a Page object for the target this Page redirects to.
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn