Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/328378 )

Change subject: [WIP] Dissalow adding hidden categories
......................................................................

[WIP] Dissalow adding hidden categories

Filter out any hidden categories before they are added.
This also removes the need for separate de-duplication and
filtering out of BaseCategory.

To do:
* Rebuild tests since we now require object to be
  pywikibot.Category

Bug: T153746
Change-Id: I59b1849736b640337971d07c83f84f6a87b1b9d0
---
M erfgoedbot/categorize_images.py
1 file changed, 33 insertions(+), 30 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/78/328378/1

diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index 486ea17..9aa5de1 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -186,16 +186,22 @@
     return (newcats, categorisation_method)
 
 
-def replace_default_cat_with_new_categories_in_image(page, 
commonsCategoryBase, new_categories, comment, verbose=False):
-    oldtext = page.get()
-    categories_to_add = deduplicate_categories(new_categories)
-    categories_to_add = 
remove_base_category_from_categories_to_add_if_present(new_categories, 
commonsCategoryBase)
+def replace_default_cat_with_new_categories_in_image(
+        page, base_category, new_categories, comment, verbose=False):
+    old_text = page.get()
+    old_categories = list(page.categories())
+
+    # ensure base category is never re-added
+    old_categories.append(base_category)
+    categories_to_add = filter_out_categories_to_add(new_categories,
+                                                     old_categories)
     try:
-        final_text = 
replace_default_cat_with_new_categories_in_image_text(oldtext, 
commonsCategoryBase, categories_to_add)
+        final_text = replace_default_cat_with_new_categories_in_image_text(
+            old_text, base_category, categories_to_add)
     except NoCategoryToAddException:
         return False
     if verbose:
-        pywikibot.showDiff(oldtext, final_text)
+        pywikibot.showDiff(old_text, final_text)
     try:
         page.put(final_text, comment)
         return True
@@ -205,34 +211,31 @@
         return False
 
 
-def deduplicate_categories(categories):
-    return list(set(categories))
-
-
-def remove_base_category_from_categories_to_add_if_present(categories, 
base_category):
-    return set(categories) - set([base_category])
-
-
-def replace_default_cat_with_new_categories_in_image_text(oldtext, 
commonsCategoryBase, newcats):
-    if not newcats:
-        # No categories to add. We do not want to remove the base one
+def replace_default_cat_with_new_categories_in_image_text(
+        old_text, base_category, new_categories):
+    """Add new categories to page text and remove any base_category."""
+    if not new_categories:
+        # No categories to add. We do not want to remove the base one,
         raise NoCategoryToAddException()
-    # In any case we remove the base category
-    page_text_without_old_category = textlib.replaceCategoryInPlace(oldtext, 
commonsCategoryBase, None)
-    commons_site = pywikibot.Site(u'commons', u'commons')
-    currentcats = textlib.getCategoryLinks(oldtext, site=commons_site)
-    final_categories = filter_out_categories_to_add(newcats, currentcats)
-    if final_categories:
-        final_text = 
textlib.replaceCategoryLinks(page_text_without_old_category, final_categories, 
addOnly=True)
-    else:
-        final_text = page_text_without_old_category
+
+    # Remove base category
+    page_text_without_base_category = textlib.replaceCategoryInPlace(
+        old_text, base_category, None)
+    final_text = textlib.replaceCategoryLinks(
+        page_text_without_base_category, new_categories, addOnly=True)
     return final_text
 
 
-def filter_out_categories_to_add(new_categories, current_categories):
-    """Make sure we do not add categories that were already there."""
-    final_categories = set(new_categories) - set(current_categories)
-    return list(final_categories)
+def filter_out_categories_to_add(new_categories, unwanted_categories):
+    """
+    Ensure hidden, duplicate or already present categories are not added.
+
+    Requires the input to be lists of pywikibot.Category.
+    """
+    candidate_categories = set(new_categories) - set(unwanted_categories)
+    final_categories = filter(lambda cat: not cat.isHiddenCategory(),
+                              list(candidate_categories))
+    return final_categories
 
 
 def getMonData(countrycode, lang, monumentId, conn, cursor):

-- 
To view, visit https://gerrit.wikimedia.org/r/328378
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I59b1849736b640337971d07c83f84f6a87b1b9d0
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to