Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2023-08-02 16:48:08 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new.22712 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Wed Aug 2 16:48:08 2023 rev:54 rq:1101567 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2023-07-30 20:57:17.363047784 +0200 +++ /work/SRC/openSUSE:Factory/.re2.new.22712/re2.changes 2023-08-02 16:48:23.348600398 +0200 @@ -1,0 +2,8 @@ +Mon Jul 31 16:41:06 UTC 2023 - Andreas Stieger <[email protected]> + +- update to 2023-08-01: + * Stop using std::map<std::string, Prefilter*> + * Avoid expanding counted repetitions of empty-width ops + * build infrastructure fixes + +------------------------------------------------------------------- Old: ---- re2-2023-07-01.tar.gz New: ---- re2-2023-08-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.rbnKRV/_old 2023-08-02 16:48:23.924603902 +0200 +++ /var/tmp/diff_new_pack.rbnKRV/_new 2023-08-02 16:48:23.932603950 +0200 @@ -16,7 +16,7 @@ # -%global longver 2023-07-01 +%global longver 2023-08-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-11 Name: re2 @@ -28,7 +28,7 @@ URL: https://github.com/google/re2 Source0: %{url}/archive/%{longver}/%{name}-%{longver}.tar.gz Source99: baselibs.conf -BuildRequires: cmake >= 3.10.2 +BuildRequires: cmake >= 3.13 BuildRequires: pkgconfig BuildRequires: cmake(absl) BuildRequires: pkgconfig(icu-uc) ++++++ re2-2023-07-01.tar.gz -> re2-2023-08-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/.github/workflows/ci-bazel.yml new/re2-2023-08-01/.github/workflows/ci-bazel.yml --- old/re2-2023-07-01/.github/workflows/ci-bazel.yml 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/.github/workflows/ci-bazel.yml 2023-07-28 21:03:25.000000000 +0200 @@ -13,5 +13,7 @@ BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v3 + # TODO(junyer): Use `v2` whenever a new release is tagged. + - uses: bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51 - run: .github/bazel.sh shell: bash diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/.github/workflows/ci.yml new/re2-2023-08-01/.github/workflows/ci.yml --- old/re2-2023-07-01/.github/workflows/ci.yml 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/.github/workflows/ci.yml 2023-07-28 21:03:25.000000000 +0200 @@ -40,6 +40,8 @@ - uses: actions/checkout@v3 - name: Install Clang ${{ matrix.ver }} run: | + # Avoid `Conflicts: python3-lldb-x.y` between packages. + sudo apt purge -y python3-lldb-14 wget https://apt.llvm.org/llvm.sh chmod +x ./llvm.sh sudo ./llvm.sh ${{ matrix.ver }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/.github/workflows/python.yml new/re2-2023-08-01/.github/workflows/python.yml --- old/re2-2023-07-01/.github/workflows/python.yml 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/.github/workflows/python.yml 2023-07-28 21:03:25.000000000 +0200 @@ -19,7 +19,7 @@ - { name: X64, python-name: x86_64, runs-on: [ubuntu-latest] } - { name: ARM64, python-name: aarch64, runs-on: [self-hosted, linux, arm64] } os: [manylinux2014, manylinux_2_28] - ver: ['3.7', '3.8', '3.9', '3.10', '3.11'] + ver: ['3.8', '3.9', '3.10', '3.11'] env: BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: @@ -60,8 +60,8 @@ arch: - { name: X64, bazel-name: x86_64, python-name: x86_64 } - { name: ARM64, bazel-name: arm64, python-name: arm64 } - os: [11, 12] - ver: ['3.7', '3.8', '3.9', '3.10', '3.11'] + os: [11, 12, 13] + ver: ['3.8', '3.9', '3.10', '3.11'] env: BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} BAZEL_CPU: darwin_${{ matrix.arch.bazel-name }} @@ -70,6 +70,8 @@ SYSTEM_VERSION_COMPAT: 0 steps: - uses: actions/checkout@v3 + # TODO(junyer): Use `v2` whenever a new release is tagged. + - uses: bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51 - uses: actions/setup-python@v4 with: python-version: ${{ matrix.ver }} @@ -105,35 +107,43 @@ fail-fast: false matrix: arch: - - { name: X64, bazel-name: x64, python-name: amd64 } - # FIXME: Compiling succeeds, but linking fails with an error like - # "LINK : fatal error LNK1104: cannot open file 'python311.lib'". - # Maybe we will need GitHub-hosted runners for Windows on ARM64?! - # - { name: ARM64, bazel-name: arm64, python-name: arm64 } - ver: ['3.7', '3.8', '3.9', '3.10', '3.11'] + - { name: X86, bazel-name: x64_x86, python-name: win32 } + - { name: X64, bazel-name: x64, python-name: win_amd64 } + ver: ['3.8', '3.9', '3.10', '3.11'] env: BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} BAZEL_CPU: ${{ matrix.arch.bazel-name }}_windows steps: - uses: actions/checkout@v3 + # Avoid the Chocolatey install of Bazel getting in the way; + # `bazelbuild/setup-bazelisk` doesn't work for some reason. + - run: | + choco uninstall -y bazel + choco install -y bazelisk + shell: bash + # Lowercase the architecture name for `actions/setup-python`. + - run: | + ARCHITECTURE=${{ matrix.arch.name }} + echo "architecture=${ARCHITECTURE,,}" >> "${GITHUB_ENV}" + shell: bash - uses: actions/setup-python@v4 with: python-version: ${{ matrix.ver }} + architecture: ${{ env.architecture }} - name: Prepare Python ${{ matrix.ver }} environment run: | python -m pip install --upgrade pip - python -m pip install --upgrade wheel + python -m pip install --upgrade wheel delvewheel python -m pip install --upgrade absl-py shell: bash - name: Build wheel run: | python setup.py bdist_wheel \ - --plat-name=win_${{ matrix.arch.python-name }} - cp dist/* . + --plat-name=${{ matrix.arch.python-name }} + python -m delvewheel repair --wheel-dir=. dist/* shell: bash working-directory: python - - if: matrix.arch.name == runner.arch - name: Test wheel + - name: Test wheel run: | python -m pip install google_re2-*.whl python re2_test.py @@ -173,7 +183,7 @@ run: | mkdir -p dist for WHL in */google_re2-*.whl; do - python -m wheel unpack ${WHL} + python -m wheel unpack "${WHL}" python -m wheel pack --dest-dir=dist --build-number=${{ inputs.build }} google_re2-* rm -rf google_re2-* done diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/CMakeLists.txt new/re2-2023-08-01/CMakeLists.txt --- old/re2-2023-07-01/CMakeLists.txt 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/CMakeLists.txt 2023-07-28 21:03:25.000000000 +0200 @@ -2,9 +2,8 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -# Old enough to support Ubuntu Bionic, -# but just the MAJOR.MINOR components. -cmake_minimum_required(VERSION 3.10) +# https://github.com/google/oss-policies-info/blob/main/foundational-cxx-support-matrix.md +cmake_minimum_required(VERSION 3.13) project(RE2 CXX) include(CMakePackageConfigHelpers) @@ -96,11 +95,7 @@ list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre) endif() -# TODO(junyer): Use string(JOIN " " ...) whenever CMake 3.12 (or newer) becomes -# the minimum required: that will make this hack slightly less filthy. For now, -# CMake does the same thing as string(CONCAT ...), basically, if we don't quote -# ${REQUIRES}, so quote it despite prevailing style. -string(REPLACE ";" " " REQUIRES "${REQUIRES}") +list(JOIN REQUIRES " " REQUIRES) set(RE2_SOURCES re2/bitmap256.cc diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/app/build.sh new/re2-2023-08-01/app/build.sh --- old/re2-2023-07-01/app/build.sh 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/app/build.sh 2023-07-28 21:03:25.000000000 +0200 @@ -5,7 +5,7 @@ DSTDIR=$(mktemp --directory --tmpdir $(basename $0).XXXXXXXXXX) BAZEL=/tmp/bazel -BAZELISK_RELEASE=v1.16.0 +BAZELISK_RELEASE=v1.17.0 if [[ ${UID} -ne 0 ]]; then if [[ -d deploy ]]; then diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/python/setup.py new/re2-2023-08-01/python/setup.py --- old/re2-2023-07-01/python/setup.py 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/python/setup.py 2023-07-28 21:03:25.000000000 +0200 @@ -84,7 +84,7 @@ setuptools.setup( name='google-re2', - version='1.0', + version='1.1', description='RE2 Python bindings', long_description=long_description, long_description_content_type='text/plain', @@ -98,8 +98,8 @@ 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Programming Language :: C++', - 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], cmdclass={'build_ext': BuildExt}, - python_requires='~=3.7', + python_requires='~=3.8', ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/fuzzing/re2_fuzzer.cc new/re2-2023-08-01/re2/fuzzing/re2_fuzzer.cc --- old/re2-2023-07-01/re2/fuzzing/re2_fuzzer.cc 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/fuzzing/re2_fuzzer.cc 2023-07-28 21:03:25.000000000 +0200 @@ -209,6 +209,7 @@ dummy += re.NamedCapturingGroups().size(); dummy += re.CapturingGroupNames().size(); dummy += RE2::QuoteMeta(pattern).size(); + dummy += re.Regexp()->ToString().size(); RE2::Set set(options, anchor); int index = set.Add(pattern, /*error=*/NULL); // -1 on error diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/prefilter.h new/re2-2023-08-01/re2/prefilter.h --- old/re2-2023-07-01/re2/prefilter.h 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/prefilter.h 2023-07-28 21:03:25.000000000 +0200 @@ -59,6 +59,44 @@ std::string DebugString() const; private: + template <typename H> + friend H AbslHashValue(H h, const Prefilter& a) { + h = H::combine(std::move(h), a.op_); + if (a.op_ == ATOM) { + h = H::combine(std::move(h), a.atom_); + } else if (a.op_ == AND || a.op_ == OR) { + h = H::combine(std::move(h), a.subs_->size()); + for (size_t i = 0; i < a.subs_->size(); ++i) { + h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_); + } + } + return h; + } + + friend bool operator==(const Prefilter& a, const Prefilter& b) { + if (&a == &b) { + return true; + } + if (a.op_ != b.op_) { + return false; + } + if (a.op_ == ATOM) { + if (a.atom_ != b.atom_) { + return false; + } + } else if (a.op_ == AND || a.op_ == OR) { + if (a.subs_->size() != b.subs_->size()) { + return false; + } + for (size_t i = 0; i < a.subs_->size(); ++i) { + if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) { + return false; + } + } + } + return true; + } + // A comparator used to store exact strings. We compare by length, // then lexicographically. This ordering makes it easier to reduce the // set of strings in SimplifyStringSet. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/prefilter_tree.cc new/re2-2023-08-01/re2/prefilter_tree.cc --- old/re2-2023-07-01/re2/prefilter_tree.cc 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/prefilter_tree.cc 2023-07-28 21:03:25.000000000 +0200 @@ -7,7 +7,6 @@ #include <stddef.h> #include <algorithm> #include <cmath> -#include <map> #include <memory> #include <string> #include <utility> @@ -63,33 +62,18 @@ compiled_ = true; - NodeMap nodes; + NodeSet nodes; AssignUniqueIds(&nodes, atom_vec); if (ExtraDebug) PrintDebugInfo(&nodes); } -Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) { - std::string node_string = NodeString(node); - NodeMap::iterator iter = nodes->find(node_string); - if (iter == nodes->end()) - return NULL; - return (*iter).second; -} - -std::string PrefilterTree::NodeString(Prefilter* node) const { - // Adding the operation disambiguates AND/OR/atom nodes. - std::string s = absl::StrFormat("%d", node->op()) + ":"; - if (node->op() == Prefilter::ATOM) { - s += node->atom(); - } else { - for (size_t i = 0; i < node->subs()->size(); i++) { - if (i > 0) - s += ','; - s += absl::StrFormat("%d", (*node->subs())[i]->unique_id()); - } +Prefilter* PrefilterTree::CanonicalNode(NodeSet* nodes, Prefilter* node) { + NodeSet::const_iterator iter = nodes->find(node); + if (iter != nodes->end()) { + return *iter; } - return s; + return NULL; } bool PrefilterTree::KeepNode(Prefilter* node) const { @@ -129,7 +113,7 @@ } } -void PrefilterTree::AssignUniqueIds(NodeMap* nodes, +void PrefilterTree::AssignUniqueIds(NodeSet* nodes, std::vector<std::string>* atom_vec) { atom_vec->clear(); @@ -169,9 +153,9 @@ node->set_unique_id(-1); Prefilter* canonical = CanonicalNode(nodes, node); if (canonical == NULL) { - // Any further nodes that have the same node string + // Any further nodes that have the same atom/subs // will find this node as the canonical node. - nodes->emplace(NodeString(node), node); + nodes->emplace(node); if (node->op() == Prefilter::ATOM) { atom_vec->push_back(node->atom()); atom_index_to_id_.push_back(unique_id); @@ -300,7 +284,7 @@ for (size_t j = 0; j < matched_atoms.size(); j++) matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]); PropagateMatch(matched_atom_ids, ®exps_map); - for (IntMap::iterator it = regexps_map.begin(); + for (IntMap::const_iterator it = regexps_map.begin(); it != regexps_map.end(); ++it) regexps->push_back(it->index()); @@ -316,7 +300,7 @@ IntMap work(static_cast<int>(entries_.size())); for (size_t i = 0; i < atom_ids.size(); i++) work.set(atom_ids[i], 1); - for (IntMap::iterator it = work.begin(); it != work.end(); ++it) { + for (IntMap::const_iterator it = work.begin(); it != work.end(); ++it) { const Entry& entry = entries_[it->index()]; // Record regexps triggered. for (size_t i = 0; i < entry.regexps.size(); i++) @@ -348,7 +332,7 @@ LOG(ERROR) << DebugNodeString(prefilter_vec_[regexpid]); } -void PrefilterTree::PrintDebugInfo(NodeMap* nodes) { +void PrefilterTree::PrintDebugInfo(NodeSet* nodes) { LOG(ERROR) << "#Unique Atoms: " << atom_index_to_id_.size(); LOG(ERROR) << "#Unique Nodes: " << entries_.size(); @@ -360,11 +344,10 @@ for (int parent : parents) LOG(ERROR) << parent; } - LOG(ERROR) << "Map:"; - for (NodeMap::const_iterator iter = nodes->begin(); + LOG(ERROR) << "Set:"; + for (NodeSet::const_iterator iter = nodes->begin(); iter != nodes->end(); ++iter) - LOG(ERROR) << "NodeId: " << (*iter).second->unique_id() - << " Str: " << (*iter).first; + LOG(ERROR) << "NodeId: " << (*iter)->unique_id(); } std::string PrefilterTree::DebugNodeString(Prefilter* node) const { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/prefilter_tree.h new/re2-2023-08-01/re2/prefilter_tree.h --- old/re2-2023-07-01/re2/prefilter_tree.h 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/prefilter_tree.h 2023-07-28 21:03:25.000000000 +0200 @@ -16,12 +16,13 @@ // atoms) that the user of this class should use to do the string // matching. -#include <map> #include <string> #include <vector> +#include "absl/container/flat_hash_set.h" #include "re2/prefilter.h" #include "re2/sparse_array.h" +#include "util/logging.h" namespace re2 { @@ -57,10 +58,25 @@ void PrintPrefilter(int regexpid); private: - typedef SparseArray<int> IntMap; - // TODO(junyer): Use absl::flat_hash_set<Prefilter*> instead? - // It should be trivial to get rid of the stringification... - typedef std::map<std::string, Prefilter*> NodeMap; + using IntMap = SparseArray<int>; + + struct PrefilterHash { + size_t operator()(const Prefilter* a) const { + DCHECK(a != NULL); + return absl::Hash<Prefilter>()(*a); + } + }; + + struct PrefilterEqual { + bool operator()(const Prefilter* a, const Prefilter* b) const { + DCHECK(a != NULL); + DCHECK(b != NULL); + return *a == *b; + } + }; + + using NodeSet = + absl::flat_hash_set<Prefilter*, PrefilterHash, PrefilterEqual>; // Each unique node has a corresponding Entry that helps in // passing the matching trigger information along the tree. @@ -90,25 +106,22 @@ // This function assigns unique ids to various parts of the // prefilter, by looking at if these nodes are already in the // PrefilterTree. - void AssignUniqueIds(NodeMap* nodes, std::vector<std::string>* atom_vec); + void AssignUniqueIds(NodeSet* nodes, std::vector<std::string>* atom_vec); // Given the matching atoms, find the regexps to be triggered. void PropagateMatch(const std::vector<int>& atom_ids, IntMap* regexps) const; - // Returns the prefilter node that has the same NodeString as this - // node. For the canonical node, returns node. - Prefilter* CanonicalNode(NodeMap* nodes, Prefilter* node); - - // A string that uniquely identifies the node. Assumes that the - // children of node has already been assigned unique ids. - std::string NodeString(Prefilter* node) const; + // Returns the prefilter node that has the same atom/subs as this + // node. For the canonical node, returns node. Assumes that the + // children of node have already been assigned unique ids. + Prefilter* CanonicalNode(NodeSet* nodes, Prefilter* node); // Recursively constructs a readable prefilter string. std::string DebugNodeString(Prefilter* node) const; // Used for debugging. - void PrintDebugInfo(NodeMap* nodes); + void PrintDebugInfo(NodeSet* nodes); // These are all the nodes formed by Compile. Essentially, there is // one node for each unique atom and each unique AND/OR node. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/regexp.cc new/re2-2023-08-01/re2/regexp.cc --- old/re2-2023-07-01/re2/regexp.cc 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/regexp.cc 2023-07-28 21:03:25.000000000 +0200 @@ -17,6 +17,7 @@ #include "absl/base/call_once.h" #include "absl/base/macros.h" +#include "absl/container/flat_hash_map.h" #include "absl/synchronization/mutex.h" #include "util/logging.h" #include "util/utf.h" @@ -76,7 +77,7 @@ // Similar to EmptyStorage in re2.cc. struct RefStorage { absl::Mutex ref_mutex; - std::map<Regexp*, int> ref_map; + absl::flat_hash_map<Regexp*, int> ref_map; }; alignas(RefStorage) static char ref_storage[sizeof(RefStorage)]; @@ -84,7 +85,7 @@ return &reinterpret_cast<RefStorage*>(ref_storage)->ref_mutex; } -static inline std::map<Regexp*, int>* ref_map() { +static inline absl::flat_hash_map<Regexp*, int>* ref_map() { return &reinterpret_cast<RefStorage*>(ref_storage)->ref_map; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/simplify.cc new/re2-2023-08-01/re2/simplify.cc --- old/re2-2023-07-01/re2/simplify.cc 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/simplify.cc 2023-07-28 21:03:25.000000000 +0200 @@ -6,6 +6,7 @@ // to use simple extended regular expression features. // Also sort and simplify character classes. +#include <algorithm> #include <string> #include "util/logging.h" @@ -579,6 +580,16 @@ return re; } +// Returns true if re is an empty-width op. +static bool IsEmptyOp(Regexp* re) { + return (re->op() == kRegexpBeginLine || + re->op() == kRegexpEndLine || + re->op() == kRegexpWordBoundary || + re->op() == kRegexpNoWordBoundary || + re->op() == kRegexpBeginText || + re->op() == kRegexpEndText); +} + // Simplifies the expression re{min,max} in terms of *, +, and ?. // Returns a new regexp. Does not edit re. Does not consume reference to re. // Caller must Decref return value when done with it. @@ -587,6 +598,16 @@ // but in the Regexp* representation, both (x) are marked as $1. Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max, Regexp::ParseFlags f) { + // For an empty-width op OR a concatenation or alternation of empty-width + // ops, cap the repetition count at 1. + if (IsEmptyOp(re) || + ((re->op() == kRegexpConcat || + re->op() == kRegexpAlternate) && + std::all_of(re->sub(), re->sub() + re->nsub(), IsEmptyOp))) { + min = std::min(min, 1); + max = std::min(max, 1); + } + // x{n,} means at least n matches of x. if (max == -1) { // Special case: x{0,} is x* diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2023-07-01/re2/testing/simplify_test.cc new/re2-2023-08-01/re2/testing/simplify_test.cc --- old/re2-2023-07-01/re2/testing/simplify_test.cc 2023-06-30 16:48:20.000000000 +0200 +++ new/re2-2023-08-01/re2/testing/simplify_test.cc 2023-07-28 21:03:25.000000000 +0200 @@ -140,6 +140,22 @@ { "(){1,}", "()+" }, { "(){0,2}", "(?:()()?)?" }, + // For an empty-width op OR a concatenation or alternation of empty-width + // ops, test that the repetition count is capped at 1. + { "(?:^){0,}", "^*" }, // x{0,} -> x* + { "(?:$){28,}", "$+" }, // x{N,} -> x{1,} -> x+ + { "(?-m:^){0,30}", "(?-m:^)?" }, // x{0,N} -> x{0,1} -> x? + { "(?-m:$){28,30}", "(?-m:$)" }, // x{N,M} -> x{1,1} -> x + { "\\b(?:\\b\\B){999}\\B", "\\b\\b\\B\\B" }, + { "\\b(?:\\b|\\B){999}\\B", "\\b(?:\\b|\\B)\\B" }, + // NonGreedy should also be handled. + { "(?:^){0,}?", "^*?" }, + { "(?:$){28,}?", "$+?" }, + { "(?-m:^){0,30}?", "(?-m:^)??" }, + { "(?-m:$){28,30}?", "(?-m:$)" }, + { "\\b(?:\\b\\B){999}?\\B", "\\b\\b\\B\\B" }, + { "\\b(?:\\b|\\B){999}?\\B", "\\b(?:\\b|\\B)\\B" }, + // Test that coalescing occurs and that the resulting repeats are simplified. // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal: { "a*a*", "a*" },
