jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/675527 )

Change subject: Revert "[cleanup] remove interwiki_graph.py"
......................................................................

Revert "[cleanup] remove interwiki_graph.py"

This reverts commit 078cc3d719699169a01889b7f0add0ed2cdaea8c.

Bug: T278675
Change-Id: Ibedf155ca1d0620de407d9538f7897048ea21fde
---
M .travis.yml
M docs/api_ref/pywikibot.rst
M pywikibot/CONTENT.rst
M pywikibot/README.rst
A pywikibot/interwiki_graph.py
M requirements.txt
M setup.py
M tests/__init__.py
A tests/interwiki_graph_tests.py
M tox.ini
10 files changed, 330 insertions(+), 0 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/.travis.yml b/.travis.yml
index 82ec627..37e5f18 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -132,6 +132,7 @@
         apt:
           packages:
             - djvulibre-bin
+            - graphviz
     - python: '3.6'
       env: LANGUAGE=en FAMILY=wpbeta PYWIKIBOT_SITE_ONLY=1 
OAUTH_DOMAIN="en.wikipedia.beta.wmflabs.org"
     - python: '3.6'
diff --git a/docs/api_ref/pywikibot.rst b/docs/api_ref/pywikibot.rst
index 131221c..05b3f0b 100644
--- a/docs/api_ref/pywikibot.rst
+++ b/docs/api_ref/pywikibot.rst
@@ -95,6 +95,11 @@

 .. automodule:: pywikibot.i18n

+pywikibot.interwiki\_graph module
+---------------------------------
+
+.. automodule:: pywikibot.interwiki_graph
+
 pywikibot.logentries module
 ---------------------------

diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst
index eff397d..6ef9ee8 100644
--- a/pywikibot/CONTENT.rst
+++ b/pywikibot/CONTENT.rst
@@ -45,6 +45,8 @@
     | i18n.py                    | Helper functions for both the internal 
translation   |
     |                            | system and for TranslateWiki-based 
translations      |
     
+----------------------------+------------------------------------------------------+
+    | interwiki_graph.py         | Possible create graph with interwiki.py 
script       |
+    
+----------------------------+------------------------------------------------------+
     | logentries.py              | Objects representing Mediawiki log entries  
         |
     
+----------------------------+------------------------------------------------------+
     | logging.py                 | Logging and output functions                
         |
diff --git a/pywikibot/README.rst b/pywikibot/README.rst
index 82b2943..08b668f 100644
--- a/pywikibot/README.rst
+++ b/pywikibot/README.rst
@@ -15,6 +15,7 @@
 .. include:: CONTENT.rst

 **External software can be used with Pywikibot:**
+  * Pydot, Pyparsing and Graphviz for use with interwiki_graph.py
   * PyMySQL to access MySQL database for use with pagegenerators.py
   * google to access Google Web API for use with pagegenerators.py

diff --git a/pywikibot/interwiki_graph.py b/pywikibot/interwiki_graph.py
new file mode 100644
index 0000000..2acbf85
--- /dev/null
+++ b/pywikibot/interwiki_graph.py
@@ -0,0 +1,228 @@
+"""Module with the Graphviz drawing calls."""
+#
+# (C) Pywikibot team, 2006-2021
+#
+# Distributed under the terms of the MIT license.
+#
+import itertools
+import threading
+
+from collections import Counter
+from typing import Optional
+
+import pywikibot
+
+from pywikibot import config2 as config
+
+try:
+    import pydot
+except ImportError as e:
+    pydot = e
+
+
+class GraphImpossible(Exception):
+
+    """Drawing a graph is not possible on your system."""
+
+
+class GraphSavingThread(threading.Thread):
+
+    """
+    Threaded graph renderer.
+
+    Rendering a graph can take extremely long. We use
+    multithreading because of that.
+
+    TODO: Find out if several threads running in parallel
+    can slow down the system too much. Consider adding a
+    mechanism to kill a thread if it takes too long.
+    """
+
+    def __init__(self, graph, origin):
+        """Initializer."""
+        super().__init__()
+        self.graph = graph
+        self.origin = origin
+
+    def run(self):
+        """Write graphs to the data directory."""
+        for fmt in config.interwiki_graph_formats:
+            filename = config.datafilepath(
+                'interwiki-graphs/' + getFilename(self.origin, fmt))
+            if self.graph.write(filename, prog='dot', format=fmt):
+                pywikibot.output('Graph saved as ' + filename)
+            else:
+                pywikibot.output('Graph could not be saved as ' + filename)
+
+
+class Subject:
+
+    """Data about a page with translations on multiple wikis."""
+
+    def __init__(self, origin=None):
+        """Initializer.
+
+        @param origin: the page on the 'origin' wiki
+        @type origin: pywikibot.page.Page
+        """
+        # Remember the "origin page"
+        self._origin = origin
+
+        # found_in is a dictionary where pages are keys and lists of
+        # pages are values. It stores where we found each page.
+        # As we haven't yet found a page that links to the origin page, we
+        # start with an empty list for it.
+        self.found_in = {}
+        if origin:
+            self.found_in = {origin: []}
+
+    @property
+    def origin(self):
+        """Page on the origin wiki."""
+        return self._origin
+
+    @origin.setter
+    def origin(self, value):
+        self._origin = value
+
+
+class GraphDrawer:
+
+    """Graphviz (dot) code creator."""
+
+    def __init__(self, subject):
+        """Initializer.
+
+        @param subject: page data to graph
+        @type subject: pywikibot.interwiki_graph.Subject
+
+        @raises GraphImpossible: pydot is not installed
+        """
+        if isinstance(pydot, ImportError):
+            raise GraphImpossible('pydot is not installed: {}.'.format(pydot))
+        self.graph = None
+        self.subject = subject
+
+    def getLabel(self, page):
+        """Get label for page."""
+        return '"{}:{}"'.format(page.site.code, page.title())
+
+    def _octagon_site_set(self):
+        """Build a list of sites with more than one valid page."""
+        page_list = self.subject.found_in.keys()
+
+        # Only track sites of normal pages
+        each_site = (page.site for page in page_list
+                     if page.exists() and not page.isRedirectPage())
+
+        return {x[0] for x in itertools.takewhile(
+            lambda x: x[1] > 1,
+            Counter(each_site).most_common())}
+
+    def addNode(self, page):
+        """Add a node for page."""
+        node = pydot.Node(self.getLabel(page), shape='rectangle')
+        node.set_URL('"http://%s%s";'
+                     % (page.site.hostname(),
+                        page.site.get_address(page.title(as_url=True))))
+        node.set_style('filled')
+        node.set_fillcolor('white')
+        node.set_fontsize('11')
+        if not page.exists():
+            node.set_fillcolor('red')
+        elif page.isRedirectPage():
+            node.set_fillcolor('blue')
+        elif page.isDisambig():
+            node.set_fillcolor('orange')
+        if page.namespace() != self.subject.origin.namespace():
+            node.set_color('green')
+            node.set_style('filled,bold')
+        if page.site in self.octagon_sites:
+            # mark conflict by octagonal node
+            node.set_shape('octagon')
+        self.graph.add_node(node)
+
+    def addDirectedEdge(self, page, refPage):
+        """Add a directed edge from refPage to page."""
+        # if page was given as a hint, referrers would be [None]
+        if refPage is not None:
+            sourceLabel = self.getLabel(refPage)
+            targetLabel = self.getLabel(page)
+            edge = pydot.Edge(sourceLabel, targetLabel)
+
+            oppositeEdge = self.graph.get_edge(targetLabel, sourceLabel)
+            if oppositeEdge:
+                oppositeEdge = oppositeEdge[0]
+                oppositeEdge.set_dir('both')
+            # workaround for sf.net bug 401: prevent duplicate edges
+            # (it is unclear why duplicate edges occur)
+            # https://sourceforge.net/p/pywikipediabot/bugs/401/
+            elif self.graph.get_edge(sourceLabel, targetLabel):
+                pywikibot.error(
+                    'Tried to create duplicate edge from {} to {}'
+                    .format(refPage, page))
+                # duplicate edges would be bad because then get_edge() would
+                # give a list of edges, not a single edge when we handle the
+                # opposite edge.
+            else:
+                # add edge
+                if refPage.site == page.site:
+                    edge.set_color('blue')
+                elif not page.exists():
+                    # mark dead links
+                    edge.set_color('red')
+                elif refPage.isDisambig() != page.isDisambig():
+                    # mark links between disambiguation and non-disambiguation
+                    # pages
+                    edge.set_color('orange')
+                if refPage.namespace() != page.namespace():
+                    edge.set_color('green')
+                self.graph.add_edge(edge)
+
+    def saveGraphFile(self):
+        """Write graphs to the data directory."""
+        thread = GraphSavingThread(self.graph, self.subject.origin)
+        thread.start()
+
+    def createGraph(self):
+        """
+        Create graph of the interwiki links.
+
+        For more info see U{https://meta.wikimedia.org/wiki/Interwiki_graphs}
+        """
+        pywikibot.output('Preparing graph for {}'
+                         .format(self.subject.origin.title()))
+        # create empty graph
+        self.graph = pydot.Dot()
+        # self.graph.set('concentrate', 'true')
+
+        self.octagon_sites = self._octagon_site_set()
+
+        for page in self.subject.found_in.keys():
+            # a node for each found page
+            self.addNode(page)
+        # mark start node by pointing there from a black dot.
+        firstLabel = self.getLabel(self.subject.origin)
+        self.graph.add_node(pydot.Node('start', shape='point'))
+        self.graph.add_edge(pydot.Edge('start', firstLabel))
+        for page, referrers in self.subject.found_in.items():
+            for refPage in referrers:
+                self.addDirectedEdge(page, refPage)
+        self.saveGraphFile()
+
+
+def getFilename(page, extension: Optional[str] = None) -> str:
+    """
+    Create a filename that is unique for the page.
+
+    @param page: page used to create the new filename
+    @type page: pywikibot.page.Page
+    @param extension: file extension
+    @return: filename of <family>-<lang>-<page>.<ext>
+    """
+    filename = '-'.join((page.site.family.name,
+                         page.site.code,
+                         page.title(as_filename=True)))
+    if extension:
+        filename += '.{}'.format(extension)
+    return filename
diff --git a/requirements.txt b/requirements.txt
index de654f8..0a0890d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,6 +29,9 @@
 # about the user
 mwoauth>=0.2.4,!=0.3.1

+# core interwiki_graph.py:
+pydot >= 1.2
+
 # cosmetic_changes
 python-stdnum >= 1.16

diff --git a/setup.py b/setup.py
index a8378f4..8858293 100644
--- a/setup.py
+++ b/setup.py
@@ -59,6 +59,7 @@
     # Core library dependencies
     'eventstreams': ['sseclient!=0.0.23,!=0.0.24,>=0.0.18'],
     'isbn': ['python-stdnum>=1.16'],
+    'Graphviz': ['pydot>=1.2'],
     'Google': ['google>=1.7'],
     'mwparserfromhell': ['mwparserfromhell>=0.3.3'],
     'Tkinter': [  # vulnerability found in Pillow<6.2.2
diff --git a/tests/__init__.py b/tests/__init__.py
index db64b70..2602c96 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -91,6 +91,7 @@
     'flow_thanks',
     'http',
     'i18n',
+    'interwiki_graph',
     'interwiki_link',
     'link',
     'linter',
diff --git a/tests/interwiki_graph_tests.py b/tests/interwiki_graph_tests.py
new file mode 100644
index 0000000..765a360
--- /dev/null
+++ b/tests/interwiki_graph_tests.py
@@ -0,0 +1,85 @@
+#!/usr/bin/python
+"""Test Interwiki Graph functionality."""
+#
+# (C) Pywikibot team, 2015-2021
+#
+# Distributed under the terms of the MIT license.
+#
+import unittest
+
+from contextlib import suppress
+
+from pywikibot import interwiki_graph
+
+from tests.aspects import require_modules, SiteAttributeTestCase
+from tests.utils import DryPage
+
+
+@require_modules('pydot')
+class TestWiktionaryGraph(SiteAttributeTestCase):
+
+    """Tests for interwiki links to local sites."""
+
+    sites = {
+        'enwikt': {
+            'family': 'wiktionary',
+            'code': 'en',
+        },
+        'frwikt': {
+            'family': 'wiktionary',
+            'code': 'fr',
+        },
+        'plwikt': {
+            'family': 'wiktionary',
+            'code': 'pl',
+        },
+    }
+    dry = True
+
+    @classmethod
+    def setUpClass(cls):
+        """Setup test class."""
+        super(TestWiktionaryGraph, cls).setUpClass()
+
+        cls.pages = {
+            'en': DryPage(cls.enwikt, 'origin'),
+            'en2': DryPage(cls.enwikt, 'origin2'),
+            'fr': DryPage(cls.frwikt, 'origin'),
+            'pl': DryPage(cls.plwikt, 'origin'),
+        }
+
+    def setUp(self):
+        """Setup interwiki_graph data."""
+        super().setUp()
+        data = interwiki_graph.Subject(self.pages['en'])
+        data.found_in[self.pages['en']] = [self.pages['fr'], self.pages['pl']]
+        data.found_in[self.pages['fr']] = [self.pages['en'], self.pages['pl']]
+        data.found_in[self.pages['pl']] = [self.pages['en'], self.pages['fr']]
+        self.data = data
+
+    def test_simple_graph(self):
+        """Test that GraphDrawer.createGraph does not raise exception."""
+        drawer = interwiki_graph.GraphDrawer(self.data)
+        drawer.createGraph()
+
+    def test_octagon(self):
+        """Test octagon nodes."""
+        self.data.found_in[self.pages['en2']] = [self.pages['fr']]
+        drawer = interwiki_graph.GraphDrawer(self.data)
+
+        self.assertEqual({self.pages['en'].site}, drawer._octagon_site_set())
+
+        drawer.createGraph()
+        nodes = drawer.graph.obj_dict['nodes']
+
+        for node, shape in [('"pl:origin"', 'rectangle'),
+                            ('"fr:origin"', 'rectangle'),
+                            ('"en:origin"', 'octagon')]:
+            with self.subTest(node=node):
+                self.assertEqual(
+                    nodes[node][0]['attributes']['shape'], shape)
+
+
+if __name__ == '__main__':  # pragma: no cover
+    with suppress(SystemExit):
+        unittest.main()
diff --git a/tox.ini b/tox.ini
index 14d7041..04b4e0c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -130,6 +130,9 @@
     pywikibot/families/* : D102
     pywikibot/family.py : N802, N803, N806, N815
     pywikibot/fixes.py : E241
+    pywikibot/interwiki_graph.py : N802, N803, N806
+    pywikibot/logentries.py: N802
+    pywikibot/logging.py : N803
     pywikibot/login.py: N802, N816
     pywikibot/page/__init__.py: N802
     pywikibot/page/_collections.py: N802

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/675527
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ibedf155ca1d0620de407d9538f7897048ea21fde
Gerrit-Change-Number: 675527
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to