Ricordisamoa has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/118795

Change subject: add option to detect edited section automagically
......................................................................

add option to detect edited section automagically

new functions:
    textlib.split_into_sections
    textlib.modified_section
new hook:
    Page._modified_section_hook
new configuration key:
    modified_section

Change-Id: I7a157afaf985902d1c19d2d92f3c314e0d2f6ecd
---
M pywikibot/config2.py
M pywikibot/page.py
M pywikibot/textlib.py
3 files changed, 78 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/95/118795/1

diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index bf6c63f..69347f2 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -555,6 +555,14 @@
 cosmetic_changes_deny_script = ['category_redirect', 'cosmetic_changes',
                                 'touch']
 
+############## MODIFIED SECTION SETTINGS ##############
+# The bot can automatically detect the edited section (if a single one),
+# and notice it in the edit summary.
+
+# This is an experimental feature; handle with care and consider re-checking
+# each bot edit if enabling this!
+modified_section = False
+
 ############## REPLICATION BOT ################
 # You can add replicate_replace to your user_config.py, which has the following
 # format:
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 6c0178d..6949513 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -847,6 +847,11 @@
         link = self.title(asLink=True)
         if config.cosmetic_changes:
             comment = self._cosmetic_changes_hook(comment) or comment
+        if config.modified_section:
+            try:
+                comment = self._modified_section_hook(comment) or comment
+            except:
+                pass
         try:
             done = self.site.editpage(self, summary=comment, minor=minor,
                                       watch=watchval, bot=botflag, **kwargs)
@@ -903,6 +908,15 @@
             comment += i18n.twtranslate(self.site, 'cosmetic_changes-append')
             return comment
 
+    def _modified_section_hook(self, comment):
+        oldText = self.getOldVersion(self.latestRevision())
+        newText = self.text
+        sectionsChanged = pywikibot.textlib.modified_sections(oldText, newText)
+        # if multiple sections changed, can't provide section edit summary
+        if sectionsChanged is not None and len(sectionsChanged) == 1:
+            comment = u'/* %s */ %s'%(sectionsChanged[0], comment)
+            return comment
+
     def put(self, newtext, comment=u'', watchArticle=None, minorEdit=True,
             botflag=None, force=False, async=False, callback=None, **kwargs):
         """Save the page with the contents of the first argument as the text.
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 725d676..355e6d0 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1109,6 +1109,62 @@
     m = re.search("=+[ ']*%s[ ']*=+" % section, pagetext)
     return bool(m)
 
+def split_into_sections(text):
+    """Parse some wikitext and return a tuple containing all sections found.
+
+    Each section is a tuple in the form: (
+        level (None for the leading section),
+        title (empty string for the leading section),
+        content (can be empty)
+    )
+
+    similar to the 'Tools.SplitToSections' function found in AutoWikiBrowser's 
WikiFunctions
+
+    @param text: The wikitext to parse
+    @type text: unicode or string
+
+    """
+    regex = ur'(\=+)\s*(.+?)\s*(\=+)'
+    lines = text.split(u'\n')
+    sections = [[None, u'', u'']]
+    for line in lines:
+        match = re.match(regex, line)
+        if match:
+            level = min(len(match.group(1)), len(match.group(3)))
+            match = re.match(ur'(%s)\s*(.+?)\s*(%s)'%(ur'\=' * level, ur'\=' * 
level), line)
+            sections.append([level, match.group(2), u''])
+        else:
+            sections[-1][2] += ('' if sections[-1][2] == '' else '\n') + line
+    return tuple(tuple(section) for section in sections)
+
+def modified_sections(oldText, newText):
+    """Return a tuple of section titles that have been changed.
+    Only changes to content are taken into account,
+    while heading levels and spacing are omitted.
+
+    similar to the 'Summary.ModifiedSection' function found in 
AutoWikiBrowser's WikiFunctions
+
+    @param oldText: The starting wikitext
+    @type oldText: unicode or string
+    @param newText: The final wikitext with edited sections
+    @type newText: unicode or string
+
+    """
+    sectionsBefore = split_into_sections(oldText)
+    sectionsAfter = split_into_sections(newText)
+    if sectionsBefore is None or sectionsAfter is None:
+        return
+    # if number of sections has changed, can't provide section edit summary
+    if len(sectionsAfter) != len(sectionsBefore):
+        return
+    sectionsChanged = []
+    for i, section in enumerate(sectionsBefore):
+        if sectionsAfter[i][1] != section[1]:
+            return
+        if sectionsAfter[i][2] != section[2]:
+            sectionsChanged.append(section[1])
+    return tuple(sectionsChanged)
+
 
 #---------------------------------
 # Time parsing functionality (Archivebot)

-- 
To view, visit https://gerrit.wikimedia.org/r/118795
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7a157afaf985902d1c19d2d92f3c314e0d2f6ecd
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ricordisamoa <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to