jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/837207 )
Change subject: [IMPR]: pagefromfile.py: compute regex only once
......................................................................
[IMPR]: pagefromfile.py: compute regex only once
Do not compute regexes for each page.
Change-Id: If97d7ce2a52572066909474f75ac4cb6e576811a
---
M scripts/pagefromfile.py
1 file changed, 22 insertions(+), 15 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py
index 8705099..6b506f8 100755
--- a/scripts/pagefromfile.py
+++ b/scripts/pagefromfile.py
@@ -195,10 +195,26 @@
super().__init__(**kwargs)
self.filename = filename
self.site = site or pywikibot.Site()
+ self.page_regex, self.title_regex = self._make_regexes()
+
+ def _make_regexes(self):
+ """Make regex from options."""
+ if self.opt.textonly:
+ pattern = '^(.*)$'
+ else:
+ pattern = (re.escape(self.opt.begin) + '(.*?)'
+ + re.escape(self.opt.end))
+ page_regex = re.compile(pattern, re.DOTALL)
+ title_regex = re.compile(
+ re.escape(self.opt.titlestart) + '(.*?)'
+ + re.escape(self.opt.titleend))
+ return page_regex, title_regex
@property
def generator(self) -> Iterator[pywikibot.Page]:
- """Read file and yield a tuple of page title and content.
+ """Read file and yield a page with content from file.
+
+ content is stored as a page attribute defined by CTX_ATTR.
.. versionchanged:: 7.6
changed from iterator method to generator property
@@ -216,7 +232,7 @@
length = 0
while text:
try:
- length, title, contents = self.findpage(text)
+ length, title, contents = self.find_page(text)
except TypeError:
if not length:
pywikibot.info('\nStart or end marker not found.')
@@ -233,18 +249,9 @@
yield page
text = text[length:]
- def findpage(self, text) -> Tuple[int, str, str]:
+ def find_page(self, text) -> Tuple[int, str, str]:
"""Find page to work on."""
- if self.opt.textonly:
- pattern = '^(.*)$'
- else:
- pattern = (re.escape(self.opt.begin) + '(.*?)'
- + re.escape(self.opt.end))
- page_regex = re.compile(pattern, re.DOTALL)
- title_regex = re.compile(
- re.escape(self.opt.titlestart) + '(.*?)'
- + re.escape(self.opt.titleend))
- location = page_regex.search(text)
+ location = self.page_regex.search(text)
if self.opt.include:
contents = location[0]
else:
@@ -253,10 +260,10 @@
title = self.opt.title
if not title:
try:
- title = title_regex.search(contents)[1]
+ title = self.title_regex.search(contents)[1]
if self.opt.notitle:
# Remove title (to allow creation of redirects)
- contents = title_regex.sub('', contents, count=1)
+ contents = self.title_regex.sub('', contents, count=1)
except TypeError:
raise NoTitleError(location.end())
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/837207
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: If97d7ce2a52572066909474f75ac4cb6e576811a
Gerrit-Change-Number: 837207
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]