Package: python3-whoosh
Version: 2.7.4+git6-g9134ad92-8
Severity: normal
Tags: patch
Dear Maintainer,
I have whoosh installed as a dependency for the mailman3 suite.
Whenever it runs I see:
/usr/lib/python3/dist-packages/whoosh/analysis/filters.py:50: SyntaxWarning:
invalid escape sequence '\w'
url_pattern = rcompile("""
/usr/lib/python3/dist-packages/whoosh/analysis/filters.py:148: SyntaxWarning:
invalid escape sequence '\S'
"""Interleaves the results of two or more filters (or filter chains).
/usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:37: SyntaxWarning:
invalid escape sequence '\S'
"""Given a set of words (or any object with a ``__contains__`` method),
/usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:224: SyntaxWarning:
invalid escape sequence '\S'
"""Splits words into subwords and performs optional transformations on
/usr/lib/python3/dist-packages/whoosh/analysis/intraword.py:285: SyntaxWarning:
invalid escape sequence '\|'
def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"),
/usr/lib/python3/dist-packages/whoosh/codec/whoosh3.py:1116: SyntaxWarning:
"is" with 'int' literal. Did you mean "=="?
elif fixedsize is 0:
I get my email inbox spammed with this once an hour as part of
the indexing process for mailman3-web.
-- System Information:
Debian Release: trixie/sid
APT prefers stable-updates
APT policy: (500, 'stable-updates'), (500, 'stable-security'), (500,
'unstable'), (500, 'stable')
Architecture: amd64 (x86_64)
Kernel: Linux 6.10.4-cloud-amd64 (SMP w/1 CPU thread; PREEMPT)
Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.UTF-8 (charmap=UTF-8), LANGUAGE not set
Shell: /bin/sh linked to /usr/bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled
Versions of packages python3-whoosh depends on:
ii python3 3.12.5-1
python3-whoosh recommends no packages.
Versions of packages python3-whoosh suggests:
pn python-whoosh-doc <none>
-- no debconf information
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/filters.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/analysis/filters.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/filters.py
@@ -47,11 +47,11 @@ STOP_WORDS = frozenset(('a', 'an', 'and'
# Simple pattern for filtering URLs, may be useful
-url_pattern = rcompile("""
+url_pattern = rcompile(r"""
(
[A-Za-z+]+:// # URL protocol
- \\S+? # URL body
- (?=\\s|[.]\\s|$|[.]$) # Stop at space/end, or a dot followed by space/end
+ \S+? # URL body
+ (?=\s|[.]\s|$|[.]$) # Stop at space/end, or a dot followed by space/end
) | ( # or...
\w+([:.]?\w+)* # word characters, with opt. internal colons/dots
)
@@ -155,7 +155,7 @@ class TeeFilter(Filter):
>>> f1 = LowercaseFilter()
>>> # In the other branch, we'll reverse the tokens
>>> f2 = ReverseTextFilter()
- >>> ana = RegexTokenizer(r"\S+") | TeeFilter(f1, f2)
+ >>> ana = RegexTokenizer(r"\\S+") | TeeFilter(f1, f2)
>>> [token.text for token in ana(target)]
["alfa", "AFLA", "bravo", "OVARB", "charlie", "EILRAHC"]
@@ -164,7 +164,7 @@ class TeeFilter(Filter):
>>> f1 = PassFilter()
>>> f2 = BiWordFilter()
- >>> ana = RegexTokenizer(r"\S+") | TeeFilter(f1, f2) | LowercaseFilter()
+ >>> ana = RegexTokenizer(r"\\S+") | TeeFilter(f1, f2) | LowercaseFilter()
>>> [token.text for token in ana(target)]
["alfa", "alfa-bravo", "bravo", "bravo-charlie", "charlie"]
"""
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/intraword.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/analysis/intraword.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/analysis/intraword.py
@@ -46,7 +46,7 @@ class CompoundWordFilter(Filter):
compound word in the token stream along with the word segments.
>>> cwf = CompoundWordFilter(wordset, keep_compound=True)
- >>> analyzer = RegexTokenizer(r"\S+") | cwf
+ >>> analyzer = RegexTokenizer(r"\\S+") | cwf
>>> [t.text for t in analyzer("I do not like greeneggs and ham")
["I", "do", "not", "like", "greeneggs", "green", "eggs", "and", "ham"]
>>> cwf.keep_compound = False
@@ -221,7 +221,7 @@ class ShingleFilter(Filter):
class IntraWordFilter(Filter):
- """Splits words into subwords and performs optional transformations on
+ r"""Splits words into subwords and performs optional transformations on
subword groups. This filter is funtionally based on yonik's
WordDelimiterFilter in Solr, but shares no code with it.
@@ -272,7 +272,7 @@ class IntraWordFilter(Filter):
>>> iwf_i = IntraWordFilter(mergewords=True, mergenums=True)
>>> iwf_q = IntraWordFilter(mergewords=False, mergenums=False)
>>> iwf = MultiFilter(index=iwf_i, query=iwf_q)
- >>> analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter()
+ >>> analyzer = RegexTokenizer(r"\\S+") | iwf | LowercaseFilter()
(See :class:`MultiFilter`.)
"""
@@ -282,7 +282,7 @@ class IntraWordFilter(Filter):
__inittypes__ = dict(delims=text_type, splitwords=bool, splitnums=bool,
mergewords=bool, mergenums=bool)
- def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"),
+ def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"),
splitwords=True, splitnums=True,
mergewords=False, mergenums=False):
"""
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/codec/whoosh3.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/codec/whoosh3.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/codec/whoosh3.py
@@ -1113,7 +1113,7 @@ class W3LeafMatcher(LeafMatcher):
vs = self._data[2]
if fixedsize is None or fixedsize < 0:
self._values = vs
- elif fixedsize is 0:
+ elif fixedsize == 0:
self._values = (None,) * self._blocklength
else:
assert isinstance(vs, bytes_type)
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/paicehusk.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/paicehusk.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/paicehusk.py
@@ -30,7 +30,7 @@ class PaiceHuskStemmer(object):
(?P<cont>[.>])
""", re.UNICODE | re.VERBOSE)
- stem_expr = re.compile("^\w+", re.UNICODE)
+ stem_expr = re.compile(r"^\w+", re.UNICODE)
def __init__(self, ruletable):
"""
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/porter.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter.py
@@ -64,14 +64,14 @@ _c_v = re.compile("^" + _cons_seq + _vow
# Patterns used in the rules
-_ed_ing = re.compile("^(.*)(ed|ing)$")
-_at_bl_iz = re.compile("(at|bl|iz)$")
-_step1b = re.compile("([^aeiouylsz])\\1$")
-_step2 =
re.compile("^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$")
-_step3 = re.compile("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$")
-_step4_1 =
re.compile("^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$")
-_step4_2 = re.compile("^(.+?)(s|t)(ion)$")
-_step5 = re.compile("^(.+?)e$")
+_ed_ing = re.compile(r"^(.*)(ed|ing)$")
+_at_bl_iz = re.compile(r"(at|bl|iz)$")
+_step1b = re.compile(r"([^aeiouylsz])\1$")
+_step2 =
re.compile(r"^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$")
+_step3 = re.compile(r"^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$")
+_step4_1 =
re.compile(r"^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$")
+_step4_2 = re.compile(r"^(.+?)(s|t)(ion)$")
+_step5 = re.compile(r"^(.+?)e$")
# Stemming function
Index: python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter2.py
===================================================================
--- python-whoosh-2.7.4+git6-g9134ad92.orig/src/whoosh/lang/porter2.py
+++ python-whoosh-2.7.4+git6-g9134ad92/src/whoosh/lang/porter2.py
@@ -64,7 +64,7 @@ def remove_initial_apostrophe(word):
def capitalize_consonant_ys(word):
if word.startswith('y'):
word = 'Y' + word[1:]
- return ccy_exp.sub('\g<1>Y', word)
+ return ccy_exp.sub(r'\g<1>Y', word)
def step_0(word):