Revision: 7487
Author: xqt
Date: 2009-10-17 12:09:43 +0000 (Sat, 17 Oct 2009)
Log Message:
-----------
solve some identation errors coming with r6540 (bug #2840435)
Modified Paths:
--------------
trunk/pywikipedia/standardize_notes.py
Modified: trunk/pywikipedia/standardize_notes.py
===================================================================
--- trunk/pywikipedia/standardize_notes.py 2009-10-17 11:10:07 UTC (rev
7486)
+++ trunk/pywikipedia/standardize_notes.py 2009-10-17 12:09:43 UTC (rev
7487)
@@ -46,7 +46,7 @@
# 2005-07-15: Build list of all sections which may contain citations:
doFindAllCitationSections(). (SEWilco)
#
-from __future__ import generators
+#from __future__ import generators
import subprocess, sys, re, random
import socket, urllib, robotparser
import wikipedia, pagegenerators, config
@@ -378,7 +378,7 @@
wikipedia.output( u"Reading existing Notes section" )
self.doReadReferencesSection( new_text, refsectionname )
while self.references and self.references[len(self.references)-1] ==
u'\n':
- del self.references[len(self.references)-1] # delete
trailing empty lines
+ del self.references[len(self.references)-1] # delete trailing
empty lines
# Convert any external links to footnote references
wikipedia.output( u"Converting external links" )
new_text = self.doConvertExternalLinks( new_text )
@@ -416,53 +416,53 @@
new_text = new_text + text_line # skip section, so
retain text.
else:
# TODO: recognize {{inline}} invisible footnotes when
something can be done with them
- #
- # Ignore lines within comments
- if not text_line.startswith( u'<!--' ):
- # Fix erroneous external links in double brackets
- Rextlink = re.compile(r'(?i)\[\[(?P<linkname>http://[^\]]+?)\]\]')
- # TODO: compiling the regex each time might be inefficient
- text_lineR = re.compile(Rextlink)
- MOextlink = text_lineR.search(text_line)
- while MOextlink: # find all links on line
- extlink_linkname = MOextlink.group('linkname')
- # Rewrite double brackets to single ones
- text_line=text_line[:MOextlink.start()] + '[%s]' %
extlink_linkname + text_line[MOextlink.end(0):]
- MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1)
- # Regular expression to look for external link [linkname linktext]
- linktext is optional.
- # Also accepts erroneous pipe symbol as separator.
- # Accepts wikilinks within <linktext>
- #Rextlink = re.compile(r'[^\[]\[(?P<linkname>[h]*[ft]+tp:[^
[\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\][^\]]')
- #Rextlink = re.compile(r'\[(?P<linkname>[h]*[ft]+tp:[^
[\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\]')
- Rextlink = re.compile(r'(?i)\[(?P<linkname>[h]*[ft]+tp:[^
[\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\]')
- # TODO: compiling the regex each time might be inefficient
- text_lineR = re.compile(Rextlink)
- MOextlink = text_lineR.search(text_line)
- while MOextlink: # find all links on line
- extlink_linkname = MOextlink.group('linkname')
- extlink_linktext = MOextlink.group('linktext')
- self.refsequence += 1
- ( refname, reftext ) =
self.doConvertLinkTextToReference(self.refsequence, extlink_linkname,
extlink_linktext)
- self.references.append( reftext ) # append new entry to
References
- if extlink_linktext:
- # If there was text as part of link, reinsert text before
footnote.
- text_line=text_line[:MOextlink.start(0)] + '%s{{ref|%s}}' %
(extlink_linktext, refname) + text_line[MOextlink.end(0):]
- else:
- text_line=text_line[:MOextlink.start(0)] + '{{ref|%s}}' %
refname + text_line[MOextlink.end(0):]
- MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1)
- # Search for {{doi}}
- Rdoi = re.compile(r'(?i){{doi\|(?P<doilink>[^}|]*)}}')
- # TODO: compiling the regex each time might be inefficient
- doiR = re.compile(Rdoi)
- MOdoi = doiR.search(text_line)
- while MOdoi: # find all doi on line
- doi_link = MOdoi.group('doilink')
- if doi_link:
- self.refsequence += 1
- ( refname, reftext ) = self.doConvertDOIToReference(
self.refsequence, doi_link )
- self.references.append( reftext ) # append new entry to
References
- text_line=text_line[:MOdoi.start(0)] + '{{ref|%s}}' % refname
+ text_line[MOdoi.end(0):]
- MOdoi = doiR.search(text_line, MOdoi.start(0)+1)
+ #
+ # Ignore lines within comments
+ if not text_line.startswith( u'<!--' ):
+ # Fix erroneous external links in double brackets
+ Rextlink =
re.compile(r'(?i)\[\[(?P<linkname>http://[^\]]+?)\]\]')
+ # TODO: compiling the regex each time might be inefficient
+ text_lineR = re.compile(Rextlink)
+ MOextlink = text_lineR.search(text_line)
+ while MOextlink: # find all links on line
+ extlink_linkname = MOextlink.group('linkname')
+ # Rewrite double brackets to single ones
+ text_line=text_line[:MOextlink.start()] + '[%s]' %
extlink_linkname + text_line[MOextlink.end(0):]
+ MOextlink =
text_lineR.search(text_line,MOextlink.start(0)+1)
+ # Regular expression to look for external link [linkname
linktext] - linktext is optional.
+ # Also accepts erroneous pipe symbol as separator.
+ # Accepts wikilinks within <linktext>
+ #Rextlink =
re.compile(r'[^\[]\[(?P<linkname>[h]*[ft]+tp:[^ [\]\|]+?)(?P<linktext>[ \|]+((
*[^\]\|]*)|( *\[\[.+?\]\])*)+)*\][^\]]')
+ #Rextlink = re.compile(r'\[(?P<linkname>[h]*[ft]+tp:[^
[\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\]')
+ Rextlink = re.compile(r'(?i)\[(?P<linkname>[h]*[ft]+tp:[^
[\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\]')
+ # TODO: compiling the regex each time might be inefficient
+ text_lineR = re.compile(Rextlink)
+ MOextlink = text_lineR.search(text_line)
+ while MOextlink: # find all links on line
+ extlink_linkname = MOextlink.group('linkname')
+ extlink_linktext = MOextlink.group('linktext')
+ self.refsequence += 1
+ ( refname, reftext ) =
self.doConvertLinkTextToReference(self.refsequence, extlink_linkname,
extlink_linktext)
+ self.references.append( reftext ) # append new
entry to References
+ if extlink_linktext:
+ # If there was text as part of link, reinsert text
before footnote.
+ text_line=text_line[:MOextlink.start(0)] +
'%s{{ref|%s}}' % (extlink_linktext, refname) + text_line[MOextlink.end(0):]
+ else:
+ text_line=text_line[:MOextlink.start(0)] +
'{{ref|%s}}' % refname + text_line[MOextlink.end(0):]
+ MOextlink =
text_lineR.search(text_line,MOextlink.start(0)+1)
+ # Search for {{doi}}
+ Rdoi = re.compile(r'(?i){{doi\|(?P<doilink>[^}|]*)}}')
+ # TODO: compiling the regex each time might be inefficient
+ doiR = re.compile(Rdoi)
+ MOdoi = doiR.search(text_line)
+ while MOdoi: # find all doi on line
+ doi_link = MOdoi.group('doilink')
+ if doi_link:
+ self.refsequence += 1
+ ( refname, reftext ) =
self.doConvertDOIToReference( self.refsequence, doi_link )
+ self.references.append( reftext ) # append
new entry to References
+ text_line=text_line[:MOdoi.start(0)] +
'{{ref|%s}}' % refname + text_line[MOdoi.end(0):]
+ MOdoi = doiR.search(text_line, MOdoi.start(0)+1)
new_text = new_text + text_line # append new line to new
text
if new_text == '':
new_text = original_text # If somehow no new text, return
original text
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn