commit python-regex for openSUSE:Factory

Source-Sync Fri, 05 Sep 2025 12:44:19 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-regex for openSUSE:Factory 
checked in at 2025-09-05 21:42:33
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-regex (Old)
 and      /work/SRC/openSUSE:Factory/.python-regex.new.1977 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-regex"

Fri Sep  5 21:42:33 2025 rev:29 rq:1302893 version:2025.9.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-regex/python-regex.changes        
2025-01-12 11:08:36.628751612 +0100
+++ /work/SRC/openSUSE:Factory/.python-regex.new.1977/python-regex.changes      
2025-09-05 21:42:58.491795109 +0200
@@ -1,0 +2,7 @@
+Fri Sep  5 09:18:35 UTC 2025 - Dirk Müller <dmuel...@suse.com>
+
+- update to 2025.9.11:
+  * Git PR 585: Fix AttributeError: 'AnyAll' object has no attribute
+    '_key'
+
+-------------------------------------------------------------------

Old:
----
  regex-2024.11.6.tar.gz

New:
----
  regex-2025.9.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-regex.spec ++++++
--- /var/tmp/diff_new_pack.BkSQly/_old  2025-09-05 21:42:59.383832642 +0200
+++ /var/tmp/diff_new_pack.BkSQly/_new  2025-09-05 21:42:59.383832642 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-regex
 #
-# Copyright (c) 2025 SUSE LLC
+# Copyright (c) 2025 SUSE LLC and contributors
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
 
 %{?sle15_python_module_pythons}
 Name:           python-regex
-Version:        2024.11.6
+Version:        2025.9.1
 Release:        0
 Summary:        Alternative regular expression module for Python
 License:        Apache-2.0

++++++ regex-2024.11.6.tar.gz -> regex-2025.9.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/PKG-INFO new/regex-2025.9.1/PKG-INFO
--- old/regex-2024.11.6/PKG-INFO        2024-11-06 20:49:53.200616800 +0100
+++ new/regex-2025.9.1/PKG-INFO 2025-09-01 23:19:23.484337000 +0200
@@ -1,28 +1,27 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: regex
-Version: 2024.11.6
+Version: 2025.9.1
 Summary: Alternative regular expression module, to replace re.
-Home-page: https://github.com/mrabarnett/mrab-regex
-Author: Matthew Barnett
-Author-email: re...@mrabarnett.plus.com
-License: Apache Software License
+Author-email: Matthew Barnett <re...@mrabarnett.plus.com>
+License-Expression: Apache-2.0 AND CNRI-Python
+Project-URL: Homepage, https://github.com/mrabarnett/mrab-regex
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Topic :: Text Processing
 Classifier: Topic :: Text Processing :: General
-Requires-Python: >=3.8
+Requires-Python: >=3.9
 Description-Content-Type: text/x-rst
 License-File: LICENSE.txt
+Dynamic: license-file
 
 Introduction
 ------------
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/pyproject.toml 
new/regex-2025.9.1/pyproject.toml
--- old/regex-2024.11.6/pyproject.toml  2024-11-06 20:49:51.000000000 +0100
+++ new/regex-2025.9.1/pyproject.toml   2025-09-01 23:19:18.000000000 +0200
@@ -1,3 +1,44 @@
 [build-system]
-requires = ["setuptools"]
+requires = ["setuptools > 77.0.3"]
 build-backend = "setuptools.build_meta"
+
+[project]
+name = "regex"
+version = "2025.9.1"
+description = "Alternative regular expression module, to replace re."
+readme = "README.rst"
+authors = [
+    {name = "Matthew Barnett", email = "re...@mrabarnett.plus.com"},
+]
+license = "Apache-2.0 AND CNRI-Python"
+license-files = ["LICENSE.txt"]
+
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Text Processing",
+    "Topic :: Text Processing :: General",
+]
+
+requires-python = ">= 3.9"
+
+[project.urls]
+Homepage = "https://github.com/mrabarnett/mrab-regex";
+
+[tool.setuptools]
+package-dir = {regex = "regex_3"}
+py-modules = [
+    "regex.__init__",
+    "regex.regex",
+    "regex._regex_core",
+    "regex.test_regex",
+]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/regex.egg-info/PKG-INFO 
new/regex-2025.9.1/regex.egg-info/PKG-INFO
--- old/regex-2024.11.6/regex.egg-info/PKG-INFO 2024-11-06 20:49:53.000000000 
+0100
+++ new/regex-2025.9.1/regex.egg-info/PKG-INFO  2025-09-01 23:19:23.000000000 
+0200
@@ -1,28 +1,27 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: regex
-Version: 2024.11.6
+Version: 2025.9.1
 Summary: Alternative regular expression module, to replace re.
-Home-page: https://github.com/mrabarnett/mrab-regex
-Author: Matthew Barnett
-Author-email: re...@mrabarnett.plus.com
-License: Apache Software License
+Author-email: Matthew Barnett <re...@mrabarnett.plus.com>
+License-Expression: Apache-2.0 AND CNRI-Python
+Project-URL: Homepage, https://github.com/mrabarnett/mrab-regex
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Topic :: Text Processing
 Classifier: Topic :: Text Processing :: General
-Requires-Python: >=3.8
+Requires-Python: >=3.9
 Description-Content-Type: text/x-rst
 License-File: LICENSE.txt
+Dynamic: license-file
 
 Introduction
 ------------
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/regex_3/_regex.c 
new/regex-2025.9.1/regex_3/_regex.c
--- old/regex-2024.11.6/regex_3/_regex.c        2024-11-06 20:49:51.000000000 
+0100
+++ new/regex-2025.9.1/regex_3/_regex.c 2025-09-01 23:19:18.000000000 +0200
@@ -58,6 +58,9 @@
 typedef RE_UINT32 RE_CODE;
 typedef unsigned char BYTE;
 
+/* An unassigned codepoint. */
+#define UNASSIGNED_CODEPOINT 0x10FFFF
+
 /* Properties in the General Category. */
 #define RE_PROP_GC_CN ((RE_PROP_GC << 16) | RE_PROP_CN)
 #define RE_PROP_GC_LU ((RE_PROP_GC << 16) | RE_PROP_LU)
@@ -157,6 +160,11 @@
 /* Various flags stored in a node status member. */
 #define RE_STATUS_SHIFT 11
 
+#define RE_ENCODING_SHIFT 16
+#define ASCII_ENCODING 1
+#define UNICODE_ENCODING 2
+#define ENCODING_KIND(NODE) (((NODE)->status >> RE_ENCODING_SHIFT) & 0x3)
+
 #define RE_STATUS_FUZZY (RE_FUZZY_OP << RE_STATUS_SHIFT)
 #define RE_STATUS_REVERSE (RE_REVERSE_OP << RE_STATUS_SHIFT)
 #define RE_STATUS_REQUIRED (RE_REQUIRED_OP << RE_STATUS_SHIFT)
@@ -809,12 +817,8 @@
 /* Checks whether a character has a property. */
 Py_LOCAL_INLINE(BOOL) ascii_has_property(RE_CODE property, Py_UCS4 ch) {
     if (ch > RE_ASCII_MAX) {
-        /* Outside the ASCII range. */
-        RE_UINT32 value;
-
-        value = property & 0xFFFF;
-
-        return value == 0;
+        /* Treat it as an unassigned codepoint. */
+        ch = UNASSIGNED_CODEPOINT;
     }
 
     return unicode_has_property(property, ch);
@@ -824,19 +828,12 @@
 Py_LOCAL_INLINE(BOOL) ascii_has_property_ign(RE_CODE property, Py_UCS4 ch) {
     RE_UINT32 prop;
 
-    prop = property >> 16;
-
-    /* We are working with ASCII. */
-    if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property ==
-      RE_PROP_GC_LT) {
-        RE_UINT32 value;
-
-        value = re_get_general_category(ch);
+    if (ch > RE_ASCII_MAX) {
+        /* Treat it as an unassigned codepoint. */
+        ch = UNASSIGNED_CODEPOINT;
+    }
 
-        return value == RE_PROP_LU || value == RE_PROP_LL || value ==
-          RE_PROP_LT;
-    } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE)
-        return (BOOL)re_get_cased(ch);
+    prop = property >> 16;
 
     /* The property is case-insensitive. */
     return ascii_has_property(property, ch);
@@ -2902,7 +2899,14 @@
 /* Checks whether a character has a property. */
 Py_LOCAL_INLINE(BOOL) matches_PROPERTY(RE_EncodingTable* encoding,
   RE_LocaleInfo* locale_info, RE_Node* node, Py_UCS4 ch) {
-    return encoding->has_property(locale_info, node->values[0], ch);
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        return ascii_encoding.has_property(locale_info, node->values[0], ch);
+    case UNICODE_ENCODING:
+        return unicode_encoding.has_property(locale_info, node->values[0], ch);
+    default:
+        return encoding->has_property(locale_info, node->values[0], ch);
+    }
 }
 
 /* Checks whether a character has a property, ignoring case. */
@@ -2914,6 +2918,15 @@
     property = node->values[0];
     prop = property >> 16;
 
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        encoding = &ascii_encoding;
+        break;
+    case UNICODE_ENCODING:
+        encoding = &unicode_encoding;
+        break;
+    }
+
     /* We need to do special handling of case-sensitive properties according to
      * the 'encoding'.
      */
@@ -2988,6 +3001,9 @@
 Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_LocaleInfo*
   locale_info, RE_Node* member, Py_UCS4 ch) {
     switch (member->op) {
+    case RE_OP_ANY_ALL:
+        TRACE(("%s\n", re_op_text[member->op]))
+        return TRUE;
     case RE_OP_CHARACTER:
         /* values are: char_code */
         TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
@@ -2997,7 +3013,15 @@
         /* values are: property */
         TRACE(("%s %d %d\n", re_op_text[member->op], member->match,
           member->values[0]))
-        return encoding->has_property(locale_info, member->values[0], ch);
+
+        switch (ENCODING_KIND(member)) {
+        case ASCII_ENCODING:
+            return ascii_encoding.has_property(locale_info, member->values[0], 
ch);
+        case UNICODE_ENCODING:
+            return unicode_encoding.has_property(locale_info, 
member->values[0], ch);
+        default:
+            return encoding->has_property(locale_info, member->values[0], ch);
+        }
     case RE_OP_RANGE:
         /* values are: lower, upper */
         TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match,
@@ -4003,7 +4027,19 @@
 
     text = state->text;
     match = node->match == match;
-    encoding = state->encoding;
+
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        encoding = &ascii_encoding;
+        break;
+    case UNICODE_ENCODING:
+        encoding = &unicode_encoding;
+        break;
+    default:
+        encoding = state->encoding;
+        break;
+    }
+
     locale_info = state->locale_info;
     property = node->values[0];
 
@@ -4101,7 +4137,19 @@
 
     text = state->text;
     match = node->match == match;
-    encoding = state->encoding;
+
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        encoding = &ascii_encoding;
+        break;
+    case UNICODE_ENCODING:
+        encoding = &unicode_encoding;
+        break;
+    default:
+        encoding = state->encoding;
+        break;
+    }
+
     locale_info = state->locale_info;
     property = node->values[0];
 
@@ -4199,7 +4247,19 @@
 
     text = state->text;
     match = node->match == match;
-    encoding = state->encoding;
+
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        encoding = &ascii_encoding;
+        break;
+    case UNICODE_ENCODING:
+        encoding = &unicode_encoding;
+        break;
+    default:
+        encoding = state->encoding;
+        break;
+    }
+
     locale_info = state->locale_info;
     property = node->values[0];
 
@@ -4297,7 +4357,19 @@
 
     text = state->text;
     match = node->match == match;
-    encoding = state->encoding;
+
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        encoding = &ascii_encoding;
+        break;
+    case UNICODE_ENCODING:
+        encoding = &unicode_encoding;
+        break;
+    default:
+        encoding = state->encoding;
+        break;
+    }
+
     locale_info = state->locale_info;
     property = node->values[0];
 
@@ -6879,8 +6951,17 @@
 /* Checks whether a position is on a word boundary. */
 Py_LOCAL_INLINE(int) try_match_BOUNDARY(RE_State* state, RE_Node* node,
   Py_ssize_t text_pos) {
-    return bool_as_status(state->encoding->at_boundary(state, text_pos) ==
-      node->match);
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        return bool_as_status(ascii_encoding.at_boundary(state, text_pos) ==
+          node->match);
+    case UNICODE_ENCODING:
+        return bool_as_status(unicode_encoding.at_boundary(state, text_pos) ==
+          node->match);
+    default:
+        return bool_as_status(state->encoding->at_boundary(state, text_pos) ==
+            node->match);
+    }
 }
 
 /* Checks whether there's a character at a position. */
@@ -7721,7 +7802,17 @@
   node, Py_ssize_t text_pos, BOOL* is_partial) {
     BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos);
 
-    at_boundary = state->encoding->at_boundary;
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        at_boundary = ascii_encoding.at_boundary;
+        break;
+    case UNICODE_ENCODING:
+        at_boundary = unicode_encoding.at_boundary;
+        break;
+    default:
+        at_boundary = state->encoding->at_boundary;
+        break;
+    }
 
     *is_partial = FALSE;
 
@@ -7741,7 +7832,17 @@
   node, Py_ssize_t text_pos, BOOL* is_partial) {
     BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos);
 
-    at_boundary = state->encoding->at_boundary;
+    switch (ENCODING_KIND(node)) {
+    case ASCII_ENCODING:
+        at_boundary = ascii_encoding.at_boundary;
+        break;
+    case UNICODE_ENCODING:
+        at_boundary = unicode_encoding.at_boundary;
+        break;
+    default:
+        at_boundary = state->encoding->at_boundary;
+        break;
+    }
 
     *is_partial = FALSE;
 
@@ -23873,6 +23974,7 @@
     if (!node)
         return RE_ERROR_MEMORY;
 
+    node->match = TRUE;
     args->code += 2;
 
     /* Append the node. */
@@ -25009,6 +25111,11 @@
     /* Compile the character set. */
     do {
         switch (args->code[0]) {
+        case RE_OP_ANY_ALL:
+            status = build_ANY(args);
+            if (status != RE_ERROR_SUCCESS)
+                return status;
+            break;
         case RE_OP_CHARACTER:
         case RE_OP_PROPERTY:
             status = build_CHARACTER_or_PROPERTY(args);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/regex_3/_regex_core.py 
new/regex-2025.9.1/regex_3/_regex_core.py
--- old/regex-2024.11.6/regex_3/_regex_core.py  2024-11-06 20:49:51.000000000 
+0100
+++ new/regex-2025.9.1/regex_3/_regex_core.py   2025-09-01 23:19:18.000000000 
+0200
@@ -336,7 +336,7 @@
     "Compiles the firstset for the pattern."
     reverse = bool(info.flags & REVERSE)
     fs = _check_firstset(info, reverse, fs)
-    if not fs:
+    if not fs or isinstance(fs, AnyAll):
         return []
 
     # Compile the firstset.
@@ -1103,6 +1103,11 @@
     "Parses a subpattern with scoped flags."
     saved_flags = info.flags
     info.flags = (info.flags | flags_on) & ~flags_off
+
+    # Ensure that there aren't multiple encoding flags set.
+    if info.flags & (ASCII | LOCALE | UNICODE):
+        info.flags = (info.flags & ~_ALL_ENCODINGS) | flags_on
+
     source.ignore_space = bool(info.flags & VERBOSE)
     try:
         subpattern = _parse_pattern(source, info)
@@ -1235,13 +1240,23 @@
         if not in_set:
             if info.flags & WORD:
                 value = WORD_POSITION_ESCAPES.get(ch)
+            elif info.flags & ASCII:
+                value = ASCII_POSITION_ESCAPES.get(ch)
+            elif info.flags & UNICODE:
+                value = UNICODE_POSITION_ESCAPES.get(ch)
             else:
                 value = POSITION_ESCAPES.get(ch)
 
             if value:
                 return value
 
-        value = CHARSET_ESCAPES.get(ch)
+        if info.flags & ASCII:
+            value = ASCII_CHARSET_ESCAPES.get(ch)
+        elif info.flags & UNICODE:
+            value = UNICODE_CHARSET_ESCAPES.get(ch)
+        else:
+            value = CHARSET_ESCAPES.get(ch)
+
         if value:
             return value
 
@@ -1380,11 +1395,26 @@
         prop_name, name = parse_property_name(source)
         if source.match("}"):
             # It's correctly delimited.
-            prop = lookup_property(prop_name, name, positive != negate, source)
+            if info.flags & ASCII:
+                encoding = ASCII_ENCODING
+            elif info.flags & UNICODE:
+                encoding = UNICODE_ENCODING
+            else:
+                encoding = 0
+
+            prop = lookup_property(prop_name, name, positive != negate, source,
+              encoding=encoding)
             return make_property(info, prop, in_set)
     elif ch and ch in "CLMNPSZ":
         # An abbreviated property, eg \pL.
-        prop = lookup_property(None, ch, positive, source)
+        if info.flags & ASCII:
+            encoding = ASCII_ENCODING
+        elif info.flags & UNICODE:
+            encoding = UNICODE_ENCODING
+        else:
+            encoding = 0
+
+        prop = lookup_property(None, ch, positive, source, encoding=encoding)
         return make_property(info, prop, in_set)
 
     # Not a property, so treat as a literal "p" or "P".
@@ -1634,7 +1664,7 @@
 
 _BINARY_VALUES = set('YES Y NO N TRUE T FALSE F'.split())
 
-def lookup_property(property, value, positive, source=None, posix=False):
+def lookup_property(property, value, positive, source=None, posix=False, 
encoding=0):
     "Looks up a property."
     # Normalise the names (which may still be lists).
     property = standardise_name(property) if property else None
@@ -1663,7 +1693,7 @@
 
             raise error("unknown property value", source.string, source.pos)
 
-        return Property((prop_id << 16) | val_id, positive)
+        return Property((prop_id << 16) | val_id, positive, encoding=encoding)
 
     # Only the value is provided.
     # It might be the name of a GC, script or block value.
@@ -1671,16 +1701,16 @@
         prop_id, value_dict = PROPERTIES.get(property)
         val_id = value_dict.get(value)
         if val_id is not None:
-            return Property((prop_id << 16) | val_id, positive)
+            return Property((prop_id << 16) | val_id, positive, 
encoding=encoding)
 
     # It might be the name of a binary property.
     prop = PROPERTIES.get(value)
     if prop:
         prop_id, value_dict = prop
         if set(value_dict) == _BINARY_VALUES:
-            return Property((prop_id << 16) | 1, positive)
+            return Property((prop_id << 16) | 1, positive, encoding=encoding)
 
-        return Property(prop_id << 16, not positive)
+        return Property(prop_id << 16, not positive, encoding=encoding)
 
     # It might be the name of a binary property starting with a prefix.
     if value.startswith("IS"):
@@ -1688,7 +1718,7 @@
         if prop:
             prop_id, value_dict = prop
             if "YES" in value_dict:
-                return Property((prop_id << 16) | 1, positive)
+                return Property((prop_id << 16) | 1, positive, 
encoding=encoding)
 
     # It might be the name of a script or block starting with a prefix.
     for prefix, property in (("IS", "SCRIPT"), ("IN", "BLOCK")):
@@ -1696,7 +1726,7 @@
             prop_id, value_dict = PROPERTIES.get(property)
             val_id = value_dict.get(value[2 : ])
             if val_id is not None:
-                return Property((prop_id << 16) | val_id, positive)
+                return Property((prop_id << 16) | val_id, positive, 
encoding=encoding)
 
     # Unknown property.
     if not source:
@@ -1832,6 +1862,7 @@
 FUZZY_OP = 0x4
 REVERSE_OP = 0x8
 REQUIRED_OP = 0x10
+ENCODING_OP_SHIFT = 5
 
 POS_TEXT = {False: "NON-MATCH", True: "MATCH"}
 CASE_TEXT = {NOCASE: "", IGNORECASE: " SIMPLE_IGNORE_CASE", FULLCASE: "",
@@ -1914,9 +1945,10 @@
 
 # Base class for zero-width nodes.
 class ZeroWidthBase(RegexBase):
-    def __init__(self, positive=True):
+    def __init__(self, positive=True, encoding=0):
         RegexBase.__init__(self)
         self.positive = bool(positive)
+        self.encoding = encoding
 
         self._key = self.__class__, self.positive
 
@@ -1931,11 +1963,12 @@
             flags |= FUZZY_OP
         if reverse:
             flags |= REVERSE_OP
+        flags |= self.encoding << ENCODING_OP_SHIFT
         return [(self._opcode, flags)]
 
     def dump(self, indent, reverse):
-        print("{}{} {}".format(INDENT * indent, self._op_name,
-          POS_TEXT[self.positive]))
+        print("{}{} {}{}".format(INDENT * indent, self._op_name,
+          POS_TEXT[self.positive], ["", " ASCII"][self.encoding]))
 
     def max_width(self):
         return 0
@@ -1963,6 +1996,13 @@
     _opcode = {False: OP.ANY_ALL, True: OP.ANY_ALL_REV}
     _op_name = "ANY_ALL"
 
+    def __init__(self):
+        self.positive = True
+        self.zerowidth = False
+        self.case_flags = 0
+
+        self._key = self.__class__, self.positive
+
 class AnyU(Any):
     _opcode = {False: OP.ANY_U, True: OP.ANY_U_REV}
     _op_name = "ANY_U"
@@ -3211,18 +3251,20 @@
       True): OP.PROPERTY_IGN_REV}
 
     def __init__(self, value, positive=True, case_flags=NOCASE,
-      zerowidth=False):
+      zerowidth=False, encoding=0):
         RegexBase.__init__(self)
         self.value = value
         self.positive = bool(positive)
         self.case_flags = CASE_FLAGS_COMBINATIONS[case_flags]
         self.zerowidth = bool(zerowidth)
+        self.encoding = encoding
 
         self._key = (self.__class__, self.value, self.positive,
           self.case_flags, self.zerowidth)
 
     def rebuild(self, positive, case_flags, zerowidth):
-        return Property(self.value, positive, case_flags, zerowidth)
+        return Property(self.value, positive, case_flags, zerowidth,
+          self.encoding)
 
     def optimise(self, info, reverse, in_set=False):
         return self
@@ -3241,13 +3283,15 @@
             flags |= ZEROWIDTH_OP
         if fuzzy:
             flags |= FUZZY_OP
+        flags |= self.encoding << ENCODING_OP_SHIFT
         return [(self._opcode[self.case_flags, reverse], flags, self.value)]
 
     def dump(self, indent, reverse):
         prop = PROPERTY_NAMES[self.value >> 16]
         name, value = prop[0], prop[1][self.value & 0xFFFF]
-        print("{}PROPERTY {} {}:{}{}".format(INDENT * indent,
-          POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags]))
+        print("{}PROPERTY {} {}:{}{}{}".format(INDENT * indent,
+          POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags],
+          ["", " ASCII"][self.encoding]))
 
     def matches(self, ch):
         return _regex.has_property_value(self.value, ch) == self.positive
@@ -3813,9 +3857,21 @@
             if isinstance(m, SetUnion) and m.positive:
                 # Union in union.
                 items.extend(m.items)
+            elif isinstance(m, AnyAll):
+                return AnyAll()
             else:
                 items.append(m)
 
+        # Are there complementary properties?
+        properties = (set(), set())
+
+        for m in items:
+            if isinstance(m, Property):
+                properties[m.positive].add((m.value, m.case_flags, 
m.zerowidth))
+
+        if properties[0] & properties[1]:
+            return AnyAll()
+
         if len(items) == 1:
             i = items[0]
             return i.with_flags(positive=i.positive == self.positive,
@@ -4455,6 +4511,9 @@
     "v": "\v",
 }
 
+ASCII_ENCODING = 1
+UNICODE_ENCODING = 2
+
 # Predefined character set escape sequences.
 CHARSET_ESCAPES = {
     "d": lookup_property(None, "Digit", True),
@@ -4466,6 +4525,25 @@
     "W": lookup_property(None, "Word", False),
 }
 
+ASCII_CHARSET_ESCAPES = dict(CHARSET_ESCAPES)
+ASCII_CHARSET_ESCAPES.update({
+    "d": lookup_property(None, "Digit", True, encoding=ASCII_ENCODING),
+    "D": lookup_property(None, "Digit", False, encoding=ASCII_ENCODING),
+    "s": lookup_property(None, "Space", True, encoding=ASCII_ENCODING),
+    "S": lookup_property(None, "Space", False, encoding=ASCII_ENCODING),
+    "w": lookup_property(None, "Word", True, encoding=ASCII_ENCODING),
+    "W": lookup_property(None, "Word", False, encoding=ASCII_ENCODING),
+})
+UNICODE_CHARSET_ESCAPES = dict(CHARSET_ESCAPES)
+UNICODE_CHARSET_ESCAPES.update({
+    "d": lookup_property(None, "Digit", True, encoding=UNICODE_ENCODING),
+    "D": lookup_property(None, "Digit", False, encoding=UNICODE_ENCODING),
+    "s": lookup_property(None, "Space", True, encoding=UNICODE_ENCODING),
+    "S": lookup_property(None, "Space", False, encoding=UNICODE_ENCODING),
+    "w": lookup_property(None, "Word", True, encoding=UNICODE_ENCODING),
+    "W": lookup_property(None, "Word", False, encoding=UNICODE_ENCODING),
+})
+
 # Positional escape sequences.
 POSITION_ESCAPES = {
     "A": StartOfString(),
@@ -4476,6 +4554,20 @@
     "M": EndOfWord(),
     "Z": EndOfString(),
 }
+ASCII_POSITION_ESCAPES = dict(POSITION_ESCAPES)
+ASCII_POSITION_ESCAPES.update({
+    "b": Boundary(encoding=ASCII_ENCODING),
+    "B": Boundary(False, encoding=ASCII_ENCODING),
+    "m": StartOfWord(encoding=ASCII_ENCODING),
+    "M": EndOfWord(encoding=ASCII_ENCODING),
+})
+UNICODE_POSITION_ESCAPES = dict(POSITION_ESCAPES)
+UNICODE_POSITION_ESCAPES.update({
+    "b": Boundary(encoding=UNICODE_ENCODING),
+    "B": Boundary(False, encoding=UNICODE_ENCODING),
+    "m": StartOfWord(encoding=UNICODE_ENCODING),
+    "M": EndOfWord(encoding=UNICODE_ENCODING),
+})
 
 # Positional escape sequences when WORD flag set.
 WORD_POSITION_ESCAPES = dict(POSITION_ESCAPES)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/regex_3/regex.py 
new/regex-2025.9.1/regex_3/regex.py
--- old/regex-2024.11.6/regex_3/regex.py        2024-11-06 20:49:51.000000000 
+0100
+++ new/regex-2025.9.1/regex_3/regex.py 2025-09-01 23:19:18.000000000 +0200
@@ -241,7 +241,7 @@
   "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
   "__doc__", "RegexFlag"]
 
-__version__ = "2.5.148"
+__version__ = "2.5.161"
 
 # --------------------------------------------------------------------
 # Public interface.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/regex_3/test_regex.py 
new/regex-2025.9.1/regex_3/test_regex.py
--- old/regex-2024.11.6/regex_3/test_regex.py   2024-11-06 20:49:51.000000000 
+0100
+++ new/regex-2025.9.1/regex_3/test_regex.py    2025-09-01 23:19:18.000000000 
+0200
@@ -4356,6 +4356,58 @@
         self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', 
'<thinking>xyz abc foo ', partial=True)), True)
         self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', 
'<thinking>xyz abc foo bar', partial=True)), True)
 
+        # Git issue 551:
+        self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]]', 'a')), True)
+        self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]-a]', 'a')), True)
+        self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]--a]', 'a')), False)
+        self.assertEqual(bool(regex.match(r'(?V1)[[a-z]--b]', 'a')), True)
+        self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]--b]', 'a')), True)
+        self.assertEqual(bool(regex.match(r'(?V1)[a-[\s\S]]', 'a')), True)
+        self.assertEqual(bool(regex.match(r'(?V1)[a--[\s\S]]', 'a')), False)
+
+        self.assertEqual(regex.search(r'(?ifu)(H\N{LATIN SMALL LETTER O WITH 
DIAERESIS}gskolan?)[\\s\\S]*p',
+          'Yrkesh\N{LATIN SMALL LETTER O WITH DIAERESIS}gskola . 
Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER 
A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . 
Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER 
A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . 
Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen'),
+          None)
+
+        # Git issue 572: Inline ASCII modifier doesn't seem to affect anything
+        self.assertEqual(bool(regex.match(r'\d', '\uFF19')), True)
+        self.assertEqual(bool(regex.match(r'(?a:\d)', '\uFF19')), False)
+
+        # Git issue 575: Issues with ASCII/Unicode modifiers
+        self.assertEqual(regex.findall('\\d', '9\uFF19'), ['9', '\uff19'])
+        self.assertEqual(regex.findall('(?u:\\d)', '9\uFF19'), ['9', '\uff19'])
+        self.assertEqual(regex.findall('(?a:\\d)', '9\uFF19'), ['9'])
+
+        self.assertEqual(regex.findall('\\d', '9\uFF19', flags=regex.U), ['9', 
'\uff19'])
+        self.assertEqual(regex.findall('(?u:\\d)', '9\uFF19', flags=regex.U), 
['9', '\uff19'])
+        self.assertEqual(regex.findall('(?a:\\d)', '9\uFF19', flags=regex.U), 
['9'])
+
+        self.assertEqual(regex.findall('\\d', '9\uFF19', flags=regex.A), ['9'])
+        self.assertEqual(regex.findall('(?u:\\d)', '9\uFF19', flags=regex.A), 
['9', '\uff19'])
+        self.assertEqual(regex.findall('(?a:\\d)', '9\uFF19', flags=regex.A), 
['9'])
+
+        self.assertEqual(len(regex.findall(r'\p{L}', ''.join(chr(c) for c in 
range(0x100)), flags=0)), 117)
+        self.assertEqual(len(regex.findall(r'\p{L}', ''.join(chr(c) for c in 
range(0x100)), flags=regex.A)), 52)
+        self.assertEqual(len(regex.findall(r'\p{L}', ''.join(chr(c) for c in 
range(0x100)), flags=regex.U)), 117)
+
+        self.assertEqual(len(regex.findall(r'(?a:\p{L})', ''.join(chr(c) for c 
in range(0x100)), flags=0)), 52)
+        self.assertEqual(len(regex.findall(r'(?a:\p{L})', ''.join(chr(c) for c 
in range(0x100)), flags=regex.A)), 52)
+        self.assertEqual(len(regex.findall(r'(?a:\p{L})', ''.join(chr(c) for c 
in range(0x100)), flags=regex.U)), 52)
+
+        self.assertEqual(len(regex.findall(r'(?u:\p{L})', ''.join(chr(c) for c 
in range(0x100)), flags=0)), 117)
+        self.assertEqual(len(regex.findall(r'(?u:\p{L})', ''.join(chr(c) for c 
in range(0x100)), flags=regex.A)), 117)
+        self.assertEqual(len(regex.findall(r'(?u:\p{L})', ''.join(chr(c) for c 
in range(0x100)), flags=regex.U)), 117)
+
+        # Git issue 580: Regression in v2025.7.31: \P{L} no longer matches in 
simple patterns
+        self.assertEqual(bool(regex.match(r"\A\P{L}?\p{L}", "hello,")), True)
+        
self.assertEqual(bool(regex.fullmatch(r"\A\P{L}*(?P<w>\p{L}+)\P{L}*\Z", 
"hello,")), True)
+
+        # Git issue 584: AttributeError: 'AnyAll' object has no attribute 
'positive'
+        self.assertEqual(bool(regex.compile('(\\s|\\S)')), True)
+
+        # Git PR 585: Fix AttributeError: 'AnyAll' object has no attribute 
'_key'
+        self.assertEqual(bool(regex.compile('(?:[\\S\\s]|[A-D][M-Z])')), True)
+
     def test_fuzzy_ext(self):
         self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
           True)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/setup.py new/regex-2025.9.1/setup.py
--- old/regex-2024.11.6/setup.py        2024-11-06 20:49:51.000000000 +0100
+++ new/regex-2025.9.1/setup.py 2025-09-01 23:19:18.000000000 +0200
@@ -3,41 +3,7 @@
 from setuptools import setup, Extension
 from os.path import join
 
-with open('README.rst', encoding='utf-8') as file:
-    long_description = file.read()
-
 setup(
-    name='regex',
-    version='2024.11.6',
-    description='Alternative regular expression module, to replace re.',
-    long_description=long_description,
-    long_description_content_type='text/x-rst',
-    author='Matthew Barnett',
-    author_email='re...@mrabarnett.plus.com',
-    url='https://github.com/mrabarnett/mrab-regex',
-    license='Apache Software License',
-
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: Apache Software License',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: 3.12',
-        'Programming Language :: Python :: 3.13',
-        'Topic :: Scientific/Engineering :: Information Analysis',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-        'Topic :: Text Processing',
-        'Topic :: Text Processing :: General',
-    ],
-    python_requires='>=3.8',
-
-    package_dir={'regex': 'regex_3'},
-    py_modules=['regex.__init__', 'regex.regex', 'regex._regex_core',
-     'regex.test_regex'],
     ext_modules=[Extension('regex._regex', [join('regex_3', '_regex.c'),
       join('regex_3', '_regex_unicode.c')])],
 )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/regex-2024.11.6/tools/build_regex_unicode.py 
new/regex-2025.9.1/tools/build_regex_unicode.py
--- old/regex-2024.11.6/tools/build_regex_unicode.py    2024-11-06 
20:49:51.000000000 +0100
+++ new/regex-2025.9.1/tools/build_regex_unicode.py     2025-09-01 
23:19:18.000000000 +0200
@@ -793,9 +793,6 @@
 
     raise ValueError('cannot determine C type for {}..{}'.format(lower, upper))
 
-def is_binary(property):
-    return sum(1 for val in val_list if val['id'] != 0) == 1
-
 def count_ranges(property):
     count = 0
     default_id = property['values'][munge(property['default'])]['id']

commit python-regex for openSUSE:Factory

Reply via email to