Ricordisamoa has uploaded a new change for review.
https://gerrit.wikimedia.org/r/118795
Change subject: add option to detect edited section automagically
......................................................................
add option to detect edited section automagically
new functions:
textlib.split_into_sections
textlib.modified_section
new hook:
Page._modified_section_hook
new configuration key:
modified_section
Change-Id: I7a157afaf985902d1c19d2d92f3c314e0d2f6ecd
---
M pywikibot/config2.py
M pywikibot/page.py
M pywikibot/textlib.py
3 files changed, 78 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/95/118795/1
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index bf6c63f..69347f2 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -555,6 +555,14 @@
cosmetic_changes_deny_script = ['category_redirect', 'cosmetic_changes',
'touch']
+############## MODIFIED SECTION SETTINGS ##############
+# The bot can automatically detect the edited section (if a single one),
+# and notice it in the edit summary.
+
+# This is an experimental feature; handle with care and consider re-checking
+# each bot edit if enabling this!
+modified_section = False
+
############## REPLICATION BOT ################
# You can add replicate_replace to your user_config.py, which has the following
# format:
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 6c0178d..6949513 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -847,6 +847,11 @@
link = self.title(asLink=True)
if config.cosmetic_changes:
comment = self._cosmetic_changes_hook(comment) or comment
+ if config.modified_section:
+ try:
+ comment = self._modified_section_hook(comment) or comment
+ except:
+ pass
try:
done = self.site.editpage(self, summary=comment, minor=minor,
watch=watchval, bot=botflag, **kwargs)
@@ -903,6 +908,15 @@
comment += i18n.twtranslate(self.site, 'cosmetic_changes-append')
return comment
+ def _modified_section_hook(self, comment):
+ oldText = self.getOldVersion(self.latestRevision())
+ newText = self.text
+ sectionsChanged = pywikibot.textlib.modified_sections(oldText, newText)
+ # if multiple sections changed, can't provide section edit summary
+ if sectionsChanged is not None and len(sectionsChanged) == 1:
+ comment = u'/* %s */ %s'%(sectionsChanged[0], comment)
+ return comment
+
def put(self, newtext, comment=u'', watchArticle=None, minorEdit=True,
botflag=None, force=False, async=False, callback=None, **kwargs):
"""Save the page with the contents of the first argument as the text.
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 725d676..355e6d0 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1109,6 +1109,62 @@
m = re.search("=+[ ']*%s[ ']*=+" % section, pagetext)
return bool(m)
+def split_into_sections(text):
+ """Parse some wikitext and return a tuple containing all sections found.
+
+ Each section is a tuple in the form: (
+ level (None for the leading section),
+ title (empty string for the leading section),
+ content (can be empty)
+ )
+
+ similar to the 'Tools.SplitToSections' function found in AutoWikiBrowser's
WikiFunctions
+
+ @param text: The wikitext to parse
+ @type text: unicode or string
+
+ """
+ regex = ur'(\=+)\s*(.+?)\s*(\=+)'
+ lines = text.split(u'\n')
+ sections = [[None, u'', u'']]
+ for line in lines:
+ match = re.match(regex, line)
+ if match:
+ level = min(len(match.group(1)), len(match.group(3)))
+ match = re.match(ur'(%s)\s*(.+?)\s*(%s)'%(ur'\=' * level, ur'\=' *
level), line)
+ sections.append([level, match.group(2), u''])
+ else:
+ sections[-1][2] += ('' if sections[-1][2] == '' else '\n') + line
+ return tuple(tuple(section) for section in sections)
+
+def modified_sections(oldText, newText):
+ """Return a tuple of section titles that have been changed.
+ Only changes to content are taken into account,
+ while heading levels and spacing are omitted.
+
+ similar to the 'Summary.ModifiedSection' function found in
AutoWikiBrowser's WikiFunctions
+
+ @param oldText: The starting wikitext
+ @type oldText: unicode or string
+ @param newText: The final wikitext with edited sections
+ @type newText: unicode or string
+
+ """
+ sectionsBefore = split_into_sections(oldText)
+ sectionsAfter = split_into_sections(newText)
+ if sectionsBefore is None or sectionsAfter is None:
+ return
+ # if number of sections has changed, can't provide section edit summary
+ if len(sectionsAfter) != len(sectionsBefore):
+ return
+ sectionsChanged = []
+ for i, section in enumerate(sectionsBefore):
+ if sectionsAfter[i][1] != section[1]:
+ return
+ if sectionsAfter[i][2] != section[2]:
+ sectionsChanged.append(section[1])
+ return tuple(sectionsChanged)
+
#---------------------------------
# Time parsing functionality (Archivebot)
--
To view, visit https://gerrit.wikimedia.org/r/118795
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7a157afaf985902d1c19d2d92f3c314e0d2f6ecd
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ricordisamoa <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits