jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/780854 )

Change subject: [IMPR] Deprecate XMLDumpOldPageGenerator in favour of a 
'content' parameter
......................................................................

[IMPR] Deprecate XMLDumpOldPageGenerator in favour of a 'content' parameter

- deprecate XMLDumpOldPageGenerator; content parameter of
  XMLDumpPageGenerator should be used instead
- assigning entry.text to Page.text consumes 400 - 800 milliseconds.
  The new 'content' parameter fastens up yielding pages if old
  text entry is not required.

Bug: T306134
Change-Id: Id88c18b0122a57c1eb207076e340ad2fcc80dc31
---
M pywikibot/pagegenerators.py
1 file changed, 23 insertions(+), 18 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 0926580..a033cfa 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -59,6 +59,7 @@
 )
 from pywikibot.proofreadpage import ProofreadPage
 from pywikibot.tools import (
+    deprecated,
     DequeGenerator,
     filter_unique,
     intersect_generators,
@@ -2775,9 +2776,11 @@
         yield page


-class XMLDumpOldPageGenerator(abc.Iterator):  # type: ignore[type-arg]
-    """
-    Xml generator that yields Page objects with old text loaded.
+class XMLDumpPageGenerator(abc.Iterator):  # type: ignore[type-arg]
+    """Xml generator that yields Page objects.
+
+    .. versionadded:: 7.2
+       the `content` parameter

     :param filename: filename of XML dump
     :param start: skip entries below that value
@@ -2785,11 +2788,9 @@
     :param site: current site for the generator
     :param text_predicate: a callable with entry.text as parameter and boolean
         as result to indicate the generator should return the page or not
+    :param content: If True, assign old page content to Page.text

-    :ivar text_predicate: holds text_predicate function
     :ivar skipping: True if start parameter is given, else False
-    :ivar start: holds start parameter
-    :ivar namespaces: holds namespaces filter
     :ivar parser: holds the xmlreader.XmlDump parse method
     """

@@ -2798,11 +2799,11 @@
                      None, NAMESPACE_OR_STR_TYPE,
                      Sequence[NAMESPACE_OR_STR_TYPE]] = None,
                  site: OPT_SITE_TYPE = None,
-                 text_predicate: Optional[Callable[[str], bool]] = None
-                 ) -> None:
+                 text_predicate: Optional[Callable[[str], bool]] = None,
+                 content=False) -> None:
         """Initializer."""
         self.text_predicate = text_predicate
-
+        self.content = content
         self.skipping = bool(start)

         self.start = None  # type: Optional[str]
@@ -2814,7 +2815,6 @@
             self.namespaces = self.site.namespaces
         else:
             self.namespaces = self.site.namespaces.resolve(namespaces)
-
         dump = xmlreader.XmlDump(filename)
         self.parser = dump.parse()

@@ -2830,19 +2830,24 @@
             if page.namespace() not in self.namespaces:
                 continue
             if not self.text_predicate or self.text_predicate(entry.text):
-                page.text = entry.text
+                if self.content:
+                    page.text = entry.text
                 return page


-class XMLDumpPageGenerator(XMLDumpOldPageGenerator):
+@deprecated('XMLDumpPageGenerator with content=True parameter', since='7.2.0')
+class XMLDumpOldPageGenerator(XMLDumpPageGenerator):

-    """Xml generator that yields Page objects without text loaded."""
+    """Xml generator that yields Page objects with old text loaded.

-    def __next__(self) -> 'pywikibot.page.Page':
-        """Get next Page from dump and remove the text."""
-        page = super().__next__()
-        del page.text
-        return page
+    .. deprecated:: 7.2
+       :class:`XMLDumpPageGenerator` with `content` parameter should be
+       used instead
+    """
+
+    def __init__(self, *args, **kwargs):
+        """Initializer."""
+        super().__init__(*args, **kwargs, content=True)


 def YearPageGenerator(start: int = 1, end: int = 2050,

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/780854
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id88c18b0122a57c1eb207076e340ad2fcc80dc31
Gerrit-Change-Number: 780854
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to