Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package re2 for openSUSE:Factory checked in 
at 2023-08-02 16:48:08
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/re2 (Old)
 and      /work/SRC/openSUSE:Factory/.re2.new.22712 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "re2"

Wed Aug  2 16:48:08 2023 rev:54 rq:1101567 version:MACRO

Changes:
--------
--- /work/SRC/openSUSE:Factory/re2/re2.changes  2023-07-30 20:57:17.363047784 
+0200
+++ /work/SRC/openSUSE:Factory/.re2.new.22712/re2.changes       2023-08-02 
16:48:23.348600398 +0200
@@ -1,0 +2,8 @@
+Mon Jul 31 16:41:06 UTC 2023 - Andreas Stieger <[email protected]>
+
+- update to 2023-08-01:
+  * Stop using std::map<std::string, Prefilter*>
+  * Avoid expanding counted repetitions of empty-width ops
+  * build infrastructure fixes
+
+-------------------------------------------------------------------

Old:
----
  re2-2023-07-01.tar.gz

New:
----
  re2-2023-08-01.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ re2.spec ++++++
--- /var/tmp/diff_new_pack.rbnKRV/_old  2023-08-02 16:48:23.924603902 +0200
+++ /var/tmp/diff_new_pack.rbnKRV/_new  2023-08-02 16:48:23.932603950 +0200
@@ -16,7 +16,7 @@
 #
 
 
-%global longver 2023-07-01
+%global longver 2023-08-01
 %global shortver %(echo %{longver}|sed 's|-||g')
 %define libname libre2-11
 Name:           re2
@@ -28,7 +28,7 @@
 URL:            https://github.com/google/re2
 Source0:        %{url}/archive/%{longver}/%{name}-%{longver}.tar.gz
 Source99:       baselibs.conf
-BuildRequires:  cmake >= 3.10.2
+BuildRequires:  cmake >= 3.13
 BuildRequires:  pkgconfig
 BuildRequires:  cmake(absl)
 BuildRequires:  pkgconfig(icu-uc)

++++++ re2-2023-07-01.tar.gz -> re2-2023-08-01.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/.github/workflows/ci-bazel.yml 
new/re2-2023-08-01/.github/workflows/ci-bazel.yml
--- old/re2-2023-07-01/.github/workflows/ci-bazel.yml   2023-06-30 
16:48:20.000000000 +0200
+++ new/re2-2023-08-01/.github/workflows/ci-bazel.yml   2023-07-28 
21:03:25.000000000 +0200
@@ -13,5 +13,7 @@
       BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
       - uses: actions/checkout@v3
+      # TODO(junyer): Use `v2` whenever a new release is tagged.
+      - uses: 
bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51
       - run: .github/bazel.sh
         shell: bash
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/.github/workflows/ci.yml 
new/re2-2023-08-01/.github/workflows/ci.yml
--- old/re2-2023-07-01/.github/workflows/ci.yml 2023-06-30 16:48:20.000000000 
+0200
+++ new/re2-2023-08-01/.github/workflows/ci.yml 2023-07-28 21:03:25.000000000 
+0200
@@ -40,6 +40,8 @@
       - uses: actions/checkout@v3
       - name: Install Clang ${{ matrix.ver }}
         run: |
+          # Avoid `Conflicts: python3-lldb-x.y` between packages.
+          sudo apt purge -y python3-lldb-14
           wget https://apt.llvm.org/llvm.sh
           chmod +x ./llvm.sh
           sudo ./llvm.sh ${{ matrix.ver }}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/.github/workflows/python.yml 
new/re2-2023-08-01/.github/workflows/python.yml
--- old/re2-2023-07-01/.github/workflows/python.yml     2023-06-30 
16:48:20.000000000 +0200
+++ new/re2-2023-08-01/.github/workflows/python.yml     2023-07-28 
21:03:25.000000000 +0200
@@ -19,7 +19,7 @@
           - { name: X64,   python-name: x86_64,  runs-on: [ubuntu-latest]      
       }
           - { name: ARM64, python-name: aarch64, runs-on: [self-hosted, linux, 
arm64] }
         os: [manylinux2014, manylinux_2_28]
-        ver: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        ver: ['3.8', '3.9', '3.10', '3.11']
     env:
       BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
@@ -60,8 +60,8 @@
         arch:
           - { name: X64,   bazel-name: x86_64, python-name: x86_64 }
           - { name: ARM64, bazel-name: arm64,  python-name: arm64  }
-        os: [11, 12]
-        ver: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        os: [11, 12, 13]
+        ver: ['3.8', '3.9', '3.10', '3.11']
     env:
       BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       BAZEL_CPU: darwin_${{ matrix.arch.bazel-name }}
@@ -70,6 +70,8 @@
       SYSTEM_VERSION_COMPAT: 0
     steps:
       - uses: actions/checkout@v3
+      # TODO(junyer): Use `v2` whenever a new release is tagged.
+      - uses: 
bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.ver }}
@@ -105,35 +107,43 @@
       fail-fast: false
       matrix:
         arch:
-          - { name: X64,   bazel-name: x64,   python-name: amd64 }
-          # FIXME: Compiling succeeds, but linking fails with an error like
-          # "LINK : fatal error LNK1104: cannot open file 'python311.lib'".
-          # Maybe we will need GitHub-hosted runners for Windows on ARM64?!
-          # - { name: ARM64, bazel-name: arm64, python-name: arm64 }
-        ver: ['3.7', '3.8', '3.9', '3.10', '3.11']
+          - { name: X86, bazel-name: x64_x86, python-name: win32     }
+          - { name: X64, bazel-name: x64,     python-name: win_amd64 }
+        ver: ['3.8', '3.9', '3.10', '3.11']
     env:
       BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       BAZEL_CPU: ${{ matrix.arch.bazel-name }}_windows
     steps:
       - uses: actions/checkout@v3
+      # Avoid the Chocolatey install of Bazel getting in the way;
+      # `bazelbuild/setup-bazelisk` doesn't work for some reason.
+      - run: |
+          choco uninstall -y bazel
+          choco install -y bazelisk
+        shell: bash
+      # Lowercase the architecture name for `actions/setup-python`.
+      - run: |
+          ARCHITECTURE=${{ matrix.arch.name }}
+          echo "architecture=${ARCHITECTURE,,}" >> "${GITHUB_ENV}"
+        shell: bash
       - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.ver }}
+          architecture: ${{ env.architecture }}
       - name: Prepare Python ${{ matrix.ver }} environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install --upgrade wheel
+          python -m pip install --upgrade wheel delvewheel
           python -m pip install --upgrade absl-py
         shell: bash
       - name: Build wheel
         run: |
           python setup.py bdist_wheel \
-            --plat-name=win_${{ matrix.arch.python-name }}
-          cp dist/* .
+            --plat-name=${{ matrix.arch.python-name }}
+          python -m delvewheel repair --wheel-dir=. dist/*
         shell: bash
         working-directory: python
-      - if: matrix.arch.name == runner.arch
-        name: Test wheel
+      - name: Test wheel
         run: |
           python -m pip install google_re2-*.whl
           python re2_test.py
@@ -173,7 +183,7 @@
         run: |
           mkdir -p dist
           for WHL in */google_re2-*.whl; do
-            python -m wheel unpack ${WHL}
+            python -m wheel unpack "${WHL}"
             python -m wheel pack --dest-dir=dist --build-number=${{ 
inputs.build }} google_re2-*
             rm -rf google_re2-*
           done
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/CMakeLists.txt 
new/re2-2023-08-01/CMakeLists.txt
--- old/re2-2023-07-01/CMakeLists.txt   2023-06-30 16:48:20.000000000 +0200
+++ new/re2-2023-08-01/CMakeLists.txt   2023-07-28 21:03:25.000000000 +0200
@@ -2,9 +2,8 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
-# Old enough to support Ubuntu Bionic,
-# but just the MAJOR.MINOR components.
-cmake_minimum_required(VERSION 3.10)
+# 
https://github.com/google/oss-policies-info/blob/main/foundational-cxx-support-matrix.md
+cmake_minimum_required(VERSION 3.13)
 
 project(RE2 CXX)
 include(CMakePackageConfigHelpers)
@@ -96,11 +95,7 @@
   list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)
 endif()
 
-# TODO(junyer): Use string(JOIN " " ...) whenever CMake 3.12 (or newer) becomes
-# the minimum required: that will make this hack slightly less filthy. For now,
-# CMake does the same thing as string(CONCAT ...), basically, if we don't quote
-# ${REQUIRES}, so quote it despite prevailing style.
-string(REPLACE ";" " " REQUIRES "${REQUIRES}")
+list(JOIN REQUIRES " " REQUIRES)
 
 set(RE2_SOURCES
     re2/bitmap256.cc
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/app/build.sh 
new/re2-2023-08-01/app/build.sh
--- old/re2-2023-07-01/app/build.sh     2023-06-30 16:48:20.000000000 +0200
+++ new/re2-2023-08-01/app/build.sh     2023-07-28 21:03:25.000000000 +0200
@@ -5,7 +5,7 @@
 DSTDIR=$(mktemp --directory --tmpdir $(basename $0).XXXXXXXXXX)
 
 BAZEL=/tmp/bazel
-BAZELISK_RELEASE=v1.16.0
+BAZELISK_RELEASE=v1.17.0
 
 if [[ ${UID} -ne 0 ]]; then
   if [[ -d deploy ]]; then
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/python/setup.py 
new/re2-2023-08-01/python/setup.py
--- old/re2-2023-07-01/python/setup.py  2023-06-30 16:48:20.000000000 +0200
+++ new/re2-2023-08-01/python/setup.py  2023-07-28 21:03:25.000000000 +0200
@@ -84,7 +84,7 @@
 
 setuptools.setup(
     name='google-re2',
-    version='1.0',
+    version='1.1',
     description='RE2 Python bindings',
     long_description=long_description,
     long_description_content_type='text/plain',
@@ -98,8 +98,8 @@
         'Intended Audience :: Developers',
         'License :: OSI Approved :: BSD License',
         'Programming Language :: C++',
-        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
     ],
     cmdclass={'build_ext': BuildExt},
-    python_requires='~=3.7',
+    python_requires='~=3.8',
 )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/fuzzing/re2_fuzzer.cc 
new/re2-2023-08-01/re2/fuzzing/re2_fuzzer.cc
--- old/re2-2023-07-01/re2/fuzzing/re2_fuzzer.cc        2023-06-30 
16:48:20.000000000 +0200
+++ new/re2-2023-08-01/re2/fuzzing/re2_fuzzer.cc        2023-07-28 
21:03:25.000000000 +0200
@@ -209,6 +209,7 @@
   dummy += re.NamedCapturingGroups().size();
   dummy += re.CapturingGroupNames().size();
   dummy += RE2::QuoteMeta(pattern).size();
+  dummy += re.Regexp()->ToString().size();
 
   RE2::Set set(options, anchor);
   int index = set.Add(pattern, /*error=*/NULL);  // -1 on error
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/prefilter.h 
new/re2-2023-08-01/re2/prefilter.h
--- old/re2-2023-07-01/re2/prefilter.h  2023-06-30 16:48:20.000000000 +0200
+++ new/re2-2023-08-01/re2/prefilter.h  2023-07-28 21:03:25.000000000 +0200
@@ -59,6 +59,44 @@
   std::string DebugString() const;
 
  private:
+  template <typename H>
+  friend H AbslHashValue(H h, const Prefilter& a) {
+    h = H::combine(std::move(h), a.op_);
+    if (a.op_ == ATOM) {
+      h = H::combine(std::move(h), a.atom_);
+    } else if (a.op_ == AND || a.op_ == OR) {
+      h = H::combine(std::move(h), a.subs_->size());
+      for (size_t i = 0; i < a.subs_->size(); ++i) {
+        h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_);
+      }
+    }
+    return h;
+  }
+
+  friend bool operator==(const Prefilter& a, const Prefilter& b) {
+    if (&a == &b) {
+      return true;
+    }
+    if (a.op_ != b.op_) {
+      return false;
+    }
+    if (a.op_ == ATOM) {
+      if (a.atom_ != b.atom_) {
+        return false;
+      }
+    } else if (a.op_ == AND || a.op_ == OR) {
+      if (a.subs_->size() != b.subs_->size()) {
+        return false;
+      }
+      for (size_t i = 0; i < a.subs_->size(); ++i) {
+        if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   // A comparator used to store exact strings. We compare by length,
   // then lexicographically. This ordering makes it easier to reduce the
   // set of strings in SimplifyStringSet.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/prefilter_tree.cc 
new/re2-2023-08-01/re2/prefilter_tree.cc
--- old/re2-2023-07-01/re2/prefilter_tree.cc    2023-06-30 16:48:20.000000000 
+0200
+++ new/re2-2023-08-01/re2/prefilter_tree.cc    2023-07-28 21:03:25.000000000 
+0200
@@ -7,7 +7,6 @@
 #include <stddef.h>
 #include <algorithm>
 #include <cmath>
-#include <map>
 #include <memory>
 #include <string>
 #include <utility>
@@ -63,33 +62,18 @@
 
   compiled_ = true;
 
-  NodeMap nodes;
+  NodeSet nodes;
   AssignUniqueIds(&nodes, atom_vec);
   if (ExtraDebug)
     PrintDebugInfo(&nodes);
 }
 
-Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
-  std::string node_string = NodeString(node);
-  NodeMap::iterator iter = nodes->find(node_string);
-  if (iter == nodes->end())
-    return NULL;
-  return (*iter).second;
-}
-
-std::string PrefilterTree::NodeString(Prefilter* node) const {
-  // Adding the operation disambiguates AND/OR/atom nodes.
-  std::string s = absl::StrFormat("%d", node->op()) + ":";
-  if (node->op() == Prefilter::ATOM) {
-    s += node->atom();
-  } else {
-    for (size_t i = 0; i < node->subs()->size(); i++) {
-      if (i > 0)
-        s += ',';
-      s += absl::StrFormat("%d", (*node->subs())[i]->unique_id());
-    }
+Prefilter* PrefilterTree::CanonicalNode(NodeSet* nodes, Prefilter* node) {
+  NodeSet::const_iterator iter = nodes->find(node);
+  if (iter != nodes->end()) {
+    return *iter;
   }
-  return s;
+  return NULL;
 }
 
 bool PrefilterTree::KeepNode(Prefilter* node) const {
@@ -129,7 +113,7 @@
   }
 }
 
-void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
+void PrefilterTree::AssignUniqueIds(NodeSet* nodes,
                                     std::vector<std::string>* atom_vec) {
   atom_vec->clear();
 
@@ -169,9 +153,9 @@
     node->set_unique_id(-1);
     Prefilter* canonical = CanonicalNode(nodes, node);
     if (canonical == NULL) {
-      // Any further nodes that have the same node string
+      // Any further nodes that have the same atom/subs
       // will find this node as the canonical node.
-      nodes->emplace(NodeString(node), node);
+      nodes->emplace(node);
       if (node->op() == Prefilter::ATOM) {
         atom_vec->push_back(node->atom());
         atom_index_to_id_.push_back(unique_id);
@@ -300,7 +284,7 @@
     for (size_t j = 0; j < matched_atoms.size(); j++)
       matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]);
     PropagateMatch(matched_atom_ids, &regexps_map);
-    for (IntMap::iterator it = regexps_map.begin();
+    for (IntMap::const_iterator it = regexps_map.begin();
          it != regexps_map.end();
          ++it)
       regexps->push_back(it->index());
@@ -316,7 +300,7 @@
   IntMap work(static_cast<int>(entries_.size()));
   for (size_t i = 0; i < atom_ids.size(); i++)
     work.set(atom_ids[i], 1);
-  for (IntMap::iterator it = work.begin(); it != work.end(); ++it) {
+  for (IntMap::const_iterator it = work.begin(); it != work.end(); ++it) {
     const Entry& entry = entries_[it->index()];
     // Record regexps triggered.
     for (size_t i = 0; i < entry.regexps.size(); i++)
@@ -348,7 +332,7 @@
   LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]);
 }
 
-void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
+void PrefilterTree::PrintDebugInfo(NodeSet* nodes) {
   LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size();
   LOG(ERROR) << "#Unique Nodes: " << entries_.size();
 
@@ -360,11 +344,10 @@
     for (int parent : parents)
       LOG(ERROR) << parent;
   }
-  LOG(ERROR) << "Map:";
-  for (NodeMap::const_iterator iter = nodes->begin();
+  LOG(ERROR) << "Set:";
+  for (NodeSet::const_iterator iter = nodes->begin();
        iter != nodes->end(); ++iter)
-    LOG(ERROR) << "NodeId: " << (*iter).second->unique_id()
-               << " Str: " << (*iter).first;
+    LOG(ERROR) << "NodeId: " << (*iter)->unique_id();
 }
 
 std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/prefilter_tree.h 
new/re2-2023-08-01/re2/prefilter_tree.h
--- old/re2-2023-07-01/re2/prefilter_tree.h     2023-06-30 16:48:20.000000000 
+0200
+++ new/re2-2023-08-01/re2/prefilter_tree.h     2023-07-28 21:03:25.000000000 
+0200
@@ -16,12 +16,13 @@
 // atoms) that the user of this class should use to do the string
 // matching.
 
-#include <map>
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "re2/prefilter.h"
 #include "re2/sparse_array.h"
+#include "util/logging.h"
 
 namespace re2 {
 
@@ -57,10 +58,25 @@
   void PrintPrefilter(int regexpid);
 
  private:
-  typedef SparseArray<int> IntMap;
-  // TODO(junyer): Use absl::flat_hash_set<Prefilter*> instead?
-  // It should be trivial to get rid of the stringification...
-  typedef std::map<std::string, Prefilter*> NodeMap;
+  using IntMap = SparseArray<int>;
+
+  struct PrefilterHash {
+    size_t operator()(const Prefilter* a) const {
+      DCHECK(a != NULL);
+      return absl::Hash<Prefilter>()(*a);
+    }
+  };
+
+  struct PrefilterEqual {
+    bool operator()(const Prefilter* a, const Prefilter* b) const {
+      DCHECK(a != NULL);
+      DCHECK(b != NULL);
+      return *a == *b;
+    }
+  };
+
+  using NodeSet =
+      absl::flat_hash_set<Prefilter*, PrefilterHash, PrefilterEqual>;
 
   // Each unique node has a corresponding Entry that helps in
   // passing the matching trigger information along the tree.
@@ -90,25 +106,22 @@
   // This function assigns unique ids to various parts of the
   // prefilter, by looking at if these nodes are already in the
   // PrefilterTree.
-  void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec);
+  void AssignUniqueIds(NodeSet* nodes, std::vector<std::string>* atom_vec);
 
   // Given the matching atoms, find the regexps to be triggered.
   void PropagateMatch(const std::vector<int>& atom_ids,
                       IntMap* regexps) const;
 
-  // Returns the prefilter node that has the same NodeString as this
-  // node. For the canonical node, returns node.
-  Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node);
-
-  // A string that uniquely identifies the node. Assumes that the
-  // children of node has already been assigned unique ids.
-  std::string NodeString(Prefilter* node) const;
+  // Returns the prefilter node that has the same atom/subs as this
+  // node. For the canonical node, returns node. Assumes that the
+  // children of node have already been assigned unique ids.
+  Prefilter* CanonicalNode(NodeSet* nodes, Prefilter* node);
 
   // Recursively constructs a readable prefilter string.
   std::string DebugNodeString(Prefilter* node) const;
 
   // Used for debugging.
-  void PrintDebugInfo(NodeMap* nodes);
+  void PrintDebugInfo(NodeSet* nodes);
 
   // These are all the nodes formed by Compile. Essentially, there is
   // one node for each unique atom and each unique AND/OR node.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/regexp.cc 
new/re2-2023-08-01/re2/regexp.cc
--- old/re2-2023-07-01/re2/regexp.cc    2023-06-30 16:48:20.000000000 +0200
+++ new/re2-2023-08-01/re2/regexp.cc    2023-07-28 21:03:25.000000000 +0200
@@ -17,6 +17,7 @@
 
 #include "absl/base/call_once.h"
 #include "absl/base/macros.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/synchronization/mutex.h"
 #include "util/logging.h"
 #include "util/utf.h"
@@ -76,7 +77,7 @@
 // Similar to EmptyStorage in re2.cc.
 struct RefStorage {
   absl::Mutex ref_mutex;
-  std::map<Regexp*, int> ref_map;
+  absl::flat_hash_map<Regexp*, int> ref_map;
 };
 alignas(RefStorage) static char ref_storage[sizeof(RefStorage)];
 
@@ -84,7 +85,7 @@
   return &reinterpret_cast<RefStorage*>(ref_storage)->ref_mutex;
 }
 
-static inline std::map<Regexp*, int>* ref_map() {
+static inline absl::flat_hash_map<Regexp*, int>* ref_map() {
   return &reinterpret_cast<RefStorage*>(ref_storage)->ref_map;
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/simplify.cc 
new/re2-2023-08-01/re2/simplify.cc
--- old/re2-2023-07-01/re2/simplify.cc  2023-06-30 16:48:20.000000000 +0200
+++ new/re2-2023-08-01/re2/simplify.cc  2023-07-28 21:03:25.000000000 +0200
@@ -6,6 +6,7 @@
 // to use simple extended regular expression features.
 // Also sort and simplify character classes.
 
+#include <algorithm>
 #include <string>
 
 #include "util/logging.h"
@@ -579,6 +580,16 @@
   return re;
 }
 
+// Returns true if re is an empty-width op.
+static bool IsEmptyOp(Regexp* re) {
+  return (re->op() == kRegexpBeginLine ||
+          re->op() == kRegexpEndLine ||
+          re->op() == kRegexpWordBoundary ||
+          re->op() == kRegexpNoWordBoundary ||
+          re->op() == kRegexpBeginText ||
+          re->op() == kRegexpEndText);
+}
+
 // Simplifies the expression re{min,max} in terms of *, +, and ?.
 // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
 // Caller must Decref return value when done with it.
@@ -587,6 +598,16 @@
 // but in the Regexp* representation, both (x) are marked as $1.
 Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
                                        Regexp::ParseFlags f) {
+  // For an empty-width op OR a concatenation or alternation of empty-width
+  // ops, cap the repetition count at 1.
+  if (IsEmptyOp(re) ||
+      ((re->op() == kRegexpConcat ||
+        re->op() == kRegexpAlternate) &&
+       std::all_of(re->sub(), re->sub() + re->nsub(), IsEmptyOp))) {
+    min = std::min(min, 1);
+    max = std::min(max, 1);
+  }
+
   // x{n,} means at least n matches of x.
   if (max == -1) {
     // Special case: x{0,} is x*
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2023-07-01/re2/testing/simplify_test.cc 
new/re2-2023-08-01/re2/testing/simplify_test.cc
--- old/re2-2023-07-01/re2/testing/simplify_test.cc     2023-06-30 
16:48:20.000000000 +0200
+++ new/re2-2023-08-01/re2/testing/simplify_test.cc     2023-07-28 
21:03:25.000000000 +0200
@@ -140,6 +140,22 @@
   { "(){1,}", "()+" },
   { "(){0,2}", "(?:()()?)?" },
 
+  // For an empty-width op OR a concatenation or alternation of empty-width
+  // ops, test that the repetition count is capped at 1.
+  { "(?:^){0,}", "^*" },            // x{0,} -> x*
+  { "(?:$){28,}", "$+" },           // x{N,} -> x{1,} -> x+
+  { "(?-m:^){0,30}", "(?-m:^)?" },  // x{0,N} -> x{0,1} -> x?
+  { "(?-m:$){28,30}", "(?-m:$)" },  // x{N,M} -> x{1,1} -> x
+  { "\\b(?:\\b\\B){999}\\B", "\\b\\b\\B\\B" },
+  { "\\b(?:\\b|\\B){999}\\B", "\\b(?:\\b|\\B)\\B" },
+  // NonGreedy should also be handled.
+  { "(?:^){0,}?", "^*?" },
+  { "(?:$){28,}?", "$+?" },
+  { "(?-m:^){0,30}?", "(?-m:^)??" },
+  { "(?-m:$){28,30}?", "(?-m:$)" },
+  { "\\b(?:\\b\\B){999}?\\B", "\\b\\b\\B\\B" },
+  { "\\b(?:\\b|\\B){999}?\\B", "\\b(?:\\b|\\B)\\B" },
+
   // Test that coalescing occurs and that the resulting repeats are simplified.
   // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal:
   { "a*a*", "a*" },

Reply via email to