Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2020-08-14 09:30:55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new.3399 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Fri Aug 14 09:30:55 2020 rev:33 rq:825790 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2020-06-10 00:33:59.176684099 +0200 +++ /work/SRC/openSUSE:Factory/.re2.new.3399/re2.changes 2020-08-14 09:31:18.260336155 +0200 @@ -1,0 +2,6 @@ +Tue Aug 11 13:10:17 UTC 2020 - Martin Pluskal <[email protected]> + +- Update to version 2020-08-01: + * Various internal changes + +------------------------------------------------------------------- Old: ---- re2-2020-06-01.tar.gz New: ---- re2-2020-08-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.aO8BaF/_old 2020-08-14 09:31:20.856337486 +0200 +++ /var/tmp/diff_new_pack.aO8BaF/_new 2020-08-14 09:31:20.860337488 +0200 @@ -16,9 +16,9 @@ # -%global longver 2020-06-01 +%global longver 2020-08-01 %global shortver %(echo %{longver}|sed 's|-||g') -%define libname libre2-7 +%define libname libre2-8 Name: re2 Version: %{shortver} Release: 0 ++++++ baselibs.conf ++++++ --- /var/tmp/diff_new_pack.aO8BaF/_old 2020-08-14 09:31:20.888337502 +0200 +++ /var/tmp/diff_new_pack.aO8BaF/_new 2020-08-14 09:31:20.892337504 +0200 @@ -1 +1 @@ -libre2-7 +libre2-8 ++++++ re2-2020-06-01.tar.gz -> re2-2020-08-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/BUILD new/re2-2020-08-01/BUILD --- old/re2-2020-06-01/BUILD 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/BUILD 2020-07-14 19:26:22.000000000 +0200 @@ -9,18 +9,18 @@ exports_files(["LICENSE"]) config_setting( - name = "darwin", + name = "macos", values = {"cpu": "darwin"}, ) config_setting( - name = "windows", - values = {"cpu": "x64_windows"}, + name = "wasm", + values = {"cpu": "wasm32"}, ) config_setting( - name = "windows_msvc", - values = {"cpu": "x64_windows_msvc"}, + name = "windows", + values = {"cpu": "x64_windows"}, ) load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") @@ -75,17 +75,17 @@ "re2/stringpiece.h", ], copts = select({ + ":wasm": [], ":windows": [], - ":windows_msvc": [], "//conditions:default": ["-pthread"], }), linkopts = select({ - # Darwin doesn't need `-pthread' when linking and it appears that + # macOS doesn't need `-pthread' when linking and it appears that # older versions of Clang will warn about the unused command line # argument, so just don't pass it. - ":darwin": [], + ":macos": [], + ":wasm": [], ":windows": [], - ":windows_msvc": [], "//conditions:default": ["-pthread"], }), visibility = ["//visibility:public"], diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/Makefile new/re2-2020-08-01/Makefile --- old/re2-2020-06-01/Makefile 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/Makefile 2020-07-14 19:26:22.000000000 +0200 @@ -44,7 +44,7 @@ # ABI version # http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html -SONAME=7 +SONAME=8 # To rebuild the Tables generated by Perl and Python scripts (requires Internet # access for Unicode data), uncomment the following line: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/compile.cc new/re2-2020-08-01/re2/compile.cc --- old/re2-2020-06-01/re2/compile.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/compile.cc 2020-07-14 19:26:22.000000000 +0200 @@ -30,91 +30,57 @@ // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. // // Because the out and out1 fields in Inst are no longer pointers, -// we can't use pointers directly here either. Instead, p refers -// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1). -// p == 0 represents the NULL list. This is okay because instruction #0 +// we can't use pointers directly here either. Instead, head refers +// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1). +// head == 0 represents the NULL list. This is okay because instruction #0 // is always the fail instruction, which never appears on a list. - struct PatchList { - uint32_t p; - // Returns patch list containing just p. - static PatchList Mk(uint32_t p); + static PatchList Mk(uint32_t p) { + return {p, p}; + } - // Patches all the entries on l to have value v. + // Patches all the entries on l to have value p. // Caller must not ever use patch list again. - static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v); - - // Deref returns the next pointer pointed at by p. - static PatchList Deref(Prog::Inst *inst0, PatchList l); + static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) { + while (l.head != 0) { + Prog::Inst* ip = &inst0[l.head>>1]; + if (l.head&1) { + l.head = ip->out1(); + ip->out1_ = p; + } else { + l.head = ip->out(); + ip->set_out(p); + } + } + } // Appends two patch lists and returns result. - static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2); -}; - -static PatchList nullPatchList = { 0 }; - -// Returns patch list containing just p. -PatchList PatchList::Mk(uint32_t p) { - PatchList l; - l.p = p; - return l; -} - -// Returns the next pointer pointed at by l. -PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) { - Prog::Inst* ip = &inst0[l.p>>1]; - if (l.p&1) - l.p = ip->out1(); - else - l.p = ip->out(); - return l; -} - -// Patches all the entries on l to have value v. -void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) { - while (l.p != 0) { - Prog::Inst* ip = &inst0[l.p>>1]; - if (l.p&1) { - l.p = ip->out1(); - ip->out1_ = val; - } else { - l.p = ip->out(); - ip->set_out(val); - } + static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) { + if (l1.head == 0) + return l2; + if (l2.head == 0) + return l1; + Prog::Inst* ip = &inst0[l1.tail>>1]; + if (l1.tail&1) + ip->out1_ = l2.head; + else + ip->set_out(l2.head); + return {l1.head, l2.tail}; } -} -// Appends two patch lists and returns result. -PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) { - if (l1.p == 0) - return l2; - if (l2.p == 0) - return l1; - - PatchList l = l1; - for (;;) { - PatchList next = PatchList::Deref(inst0, l); - if (next.p == 0) - break; - l = next; - } - - Prog::Inst* ip = &inst0[l.p>>1]; - if (l.p&1) - ip->out1_ = l2.p; - else - ip->set_out(l2.p); + uint32_t head; + uint32_t tail; // for constant-time append +}; - return l1; -} +static const PatchList kNullPatchList = {0, 0}; // Compiled program fragment. struct Frag { uint32_t begin; PatchList end; - Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector + Frag() : begin(0) { end.head = 0; } // needed so Frag can go in vector Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {} }; @@ -212,8 +178,8 @@ int AddSuffixRecursive(int root, int id); // Finds the trie node for the given suffix. Returns a Frag in order to - // distinguish between pointing at the root node directly (end.p == 0) - // and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively). + // distinguish between pointing at the root node directly (end.head == 0) + // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively). Frag FindByteRange(int root, int id); // Compares two ByteRanges and returns true iff they are equal. @@ -298,7 +264,7 @@ // Returns an unmatchable fragment. Frag Compiler::NoMatch() { - return Frag(0, nullPatchList); + return Frag(0, kNullPatchList); } // Is a an unmatchable fragment? @@ -314,7 +280,7 @@ // Elide no-op. Prog::Inst* begin = &inst_[a.begin]; if (begin->opcode() == kInstNop && - a.end.p == (a.begin << 1) && + a.end.head == (a.begin << 1) && begin->out() == 0) { // in case refs to a somewhere PatchList::Patch(inst_.data(), a.end, b.begin); @@ -419,7 +385,7 @@ if (id < 0) return NoMatch(); inst_[id].InitMatch(match_id); - return Frag(id, nullPatchList); + return Frag(id, kNullPatchList); } // Returns a fragment matching a particular empty-width op (like ^ or $) @@ -467,7 +433,7 @@ void Compiler::BeginRange() { rune_cache_.clear(); rune_range_.begin = 0; - rune_range_.end = nullPatchList; + rune_range_.end = kNullPatchList; } int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, @@ -548,9 +514,9 @@ } int br; - if (f.end.p == 0) + if (f.end.head == 0) br = root; - else if (f.end.p&1) + else if (f.end.head&1) br = inst_[f.begin].out1(); else br = inst_[f.begin].out(); @@ -566,9 +532,9 @@ // Ensure that the parent points to the clone, not to the original. // Note that this could leave the head unreachable except via the cache. br = byterange; - if (f.end.p == 0) + if (f.end.head == 0) root = br; - else if (f.end.p&1) + else if (f.end.head&1) inst_[f.begin].out1_ = br; else inst_[f.begin].set_out(br); @@ -601,7 +567,7 @@ Frag Compiler::FindByteRange(int root, int id) { if (inst_[root].opcode() == kInstByteRange) { if (ByteRangeEqual(root, id)) - return Frag(root, nullPatchList); + return Frag(root, kNullPatchList); else return NoMatch(); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/dfa.cc new/re2-2020-08-01/re2/dfa.cc --- old/re2-2020-06-01/re2/dfa.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/dfa.cc 2020-07-14 19:26:22.000000000 +0200 @@ -280,10 +280,10 @@ // The generic search loop, inlined to create specialized versions. // cache_mutex_.r <= L < mutex_ // Might unlock and relock cache_mutex_ via params->cache_lock. - inline bool InlinedSearchLoop(SearchParams* params, - bool can_prefix_accel, - bool want_earliest_match, - bool run_forward); + template <bool can_prefix_accel, + bool want_earliest_match, + bool run_forward> + inline bool InlinedSearchLoop(SearchParams* params); // The specialized versions of InlinedSearchLoop. The three letters // at the ends of the name denote the true/false values used as the @@ -305,13 +305,6 @@ // Might unlock and relock cache_mutex_ via params->cache_lock. bool FastSearchLoop(SearchParams* params); - // For debugging, a slow search loop that calls InlinedSearchLoop - // directly -- because the booleans passed are not constants, the - // loop is not specialized like the SearchFFF etc. versions, so it - // runs much more slowly. Useful only for debugging. - // cache_mutex_.r <= L < mutex_ - // Might unlock and relock cache_mutex_ via params->cache_lock. - bool SlowSearchLoop(SearchParams* params); // Looks up bytes in bytemap_ but handles case c == kByteEndText too. int ByteMap(int c) { @@ -1321,10 +1314,10 @@ // The bools are equal to the same-named variables in params, but // making them function arguments lets the inliner specialize // this function to each combination (see two paragraphs above). -inline bool DFA::InlinedSearchLoop(SearchParams* params, - bool can_prefix_accel, - bool want_earliest_match, - bool run_forward) { +template <bool can_prefix_accel, + bool want_earliest_match, + bool run_forward> +inline bool DFA::InlinedSearchLoop(SearchParams* params) { State* start = params->start; const uint8_t* bp = BytePtr(params->text.data()); // start of text const uint8_t* p = bp; // text scanning point @@ -1549,36 +1542,28 @@ // Inline specializations of the general loop. bool DFA::SearchFFF(SearchParams* params) { - return InlinedSearchLoop(params, 0, 0, 0); + return InlinedSearchLoop<false, false, false>(params); } bool DFA::SearchFFT(SearchParams* params) { - return InlinedSearchLoop(params, 0, 0, 1); + return InlinedSearchLoop<false, false, true>(params); } bool DFA::SearchFTF(SearchParams* params) { - return InlinedSearchLoop(params, 0, 1, 0); + return InlinedSearchLoop<false, true, false>(params); } bool DFA::SearchFTT(SearchParams* params) { - return InlinedSearchLoop(params, 0, 1, 1); + return InlinedSearchLoop<false, true, true>(params); } bool DFA::SearchTFF(SearchParams* params) { - return InlinedSearchLoop(params, 1, 0, 0); + return InlinedSearchLoop<true, false, false>(params); } bool DFA::SearchTFT(SearchParams* params) { - return InlinedSearchLoop(params, 1, 0, 1); + return InlinedSearchLoop<true, false, true>(params); } bool DFA::SearchTTF(SearchParams* params) { - return InlinedSearchLoop(params, 1, 1, 0); + return InlinedSearchLoop<true, true, false>(params); } bool DFA::SearchTTT(SearchParams* params) { - return InlinedSearchLoop(params, 1, 1, 1); -} - -// For debugging, calls the general code directly. -bool DFA::SlowSearchLoop(SearchParams* params) { - return InlinedSearchLoop(params, - params->can_prefix_accel, - params->want_earliest_match, - params->run_forward); + return InlinedSearchLoop<true, true, true>(params); } // For performance, calls the appropriate specialized version diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/filtered_re2.cc new/re2-2020-08-01/re2/filtered_re2.cc --- old/re2-2020-06-01/re2/filtered_re2.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/filtered_re2.cc 2020-07-14 19:26:22.000000000 +0200 @@ -6,6 +6,7 @@ #include <stddef.h> #include <string> +#include <utility> #include "util/util.h" #include "util/logging.h" @@ -27,7 +28,22 @@ FilteredRE2::~FilteredRE2() { for (size_t i = 0; i < re2_vec_.size(); i++) delete re2_vec_[i]; - delete prefilter_tree_; +} + +FilteredRE2::FilteredRE2(FilteredRE2&& other) + : re2_vec_(std::move(other.re2_vec_)), + compiled_(other.compiled_), + prefilter_tree_(std::move(other.prefilter_tree_)) { + other.re2_vec_.clear(); + other.re2_vec_.shrink_to_fit(); + other.compiled_ = false; + other.prefilter_tree_.reset(new PrefilterTree()); +} + +FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) { + this->~FilteredRE2(); + (void) new (this) FilteredRE2(std::move(other)); + return *this; } RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, @@ -38,7 +54,7 @@ if (!re->ok()) { if (options.log_errors()) { LOG(ERROR) << "Couldn't compile regular expression, skipping: " - << re << " due to error " << re->error(); + << pattern << " due to error " << re->error(); } delete re; } else { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/filtered_re2.h new/re2-2020-08-01/re2/filtered_re2.h --- old/re2-2020-06-01/re2/filtered_re2.h 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/filtered_re2.h 2020-07-14 19:26:22.000000000 +0200 @@ -21,6 +21,7 @@ // or AllMatches with a vector of indices of strings that were found // in the text to get the actual regexp matches. +#include <memory> #include <string> #include <vector> @@ -36,12 +37,19 @@ explicit FilteredRE2(int min_atom_len); ~FilteredRE2(); + // Not copyable. + FilteredRE2(const FilteredRE2&) = delete; + FilteredRE2& operator=(const FilteredRE2&) = delete; + // Movable. + FilteredRE2(FilteredRE2&& other); + FilteredRE2& operator=(FilteredRE2&& other); + // Uses RE2 constructor to create a RE2 object (re). Returns // re->error_code(). If error_code is other than NoError, then re is // deleted and not added to re2_vec_. RE2::ErrorCode Add(const StringPiece& pattern, const RE2::Options& options, - int *id); + int* id); // Prepares the regexps added by Add for filtering. Returns a set // of strings that the caller should check for in candidate texts. @@ -98,10 +106,7 @@ bool compiled_; // An AND-OR tree of string atoms used for filtering regexps. - PrefilterTree* prefilter_tree_; - - FilteredRE2(const FilteredRE2&) = delete; - FilteredRE2& operator=(const FilteredRE2&) = delete; + std::unique_ptr<PrefilterTree> prefilter_tree_; }; } // namespace re2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/parse.cc new/re2-2020-08-01/re2/parse.cc --- old/re2-2020-06-01/re2/parse.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/parse.cc 2020-07-14 19:26:22.000000000 +0200 @@ -685,7 +685,7 @@ if ((r1 = stacktop_) == NULL || (r2 = r1->down_) == NULL || r2->op() != kLeftParen) { - status_->set_code(kRegexpMissingParen); + status_->set_code(kRegexpUnexpectedParen); status_->set_error_arg(whole_regexp_); return false; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/prefilter.cc new/re2-2020-08-01/re2/prefilter.cc --- old/re2-2020-06-01/re2/prefilter.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/prefilter.cc 2020-07-14 19:26:22.000000000 +0200 @@ -648,14 +648,15 @@ return NULL; Regexp* simple = re->Simplify(); - Prefilter::Info *info = BuildInfo(simple); + if (simple == NULL) + return NULL; + Prefilter::Info* info = BuildInfo(simple); simple->Decref(); if (info == NULL) return NULL; Prefilter* m = info->TakeMatch(); - delete info; return m; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/re2.cc new/re2-2020-08-01/re2/re2.cc --- old/re2-2020-06-01/re2/re2.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/re2.cc 2020-07-14 19:26:22.000000000 +0200 @@ -83,6 +83,8 @@ return RE2::ErrorMissingBracket; case re2::kRegexpMissingParen: return RE2::ErrorMissingParen; + case re2::kRegexpUnexpectedParen: + return RE2::ErrorUnexpectedParen; case re2::kRegexpTrailingBackslash: return RE2::ErrorTrailingBackslash; case re2::kRegexpRepeatArgument: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/re2.h new/re2-2020-08-01/re2/re2.h --- old/re2-2020-06-01/re2/re2.h 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/re2.h 2020-07-14 19:26:22.000000000 +0200 @@ -40,6 +40,9 @@ // R"((?i)hello)" -- (?i) turns on case-insensitive matching // R"(/\*(.*?)\*/)" -- .*? matches . minimum no. of times possible // +// When using UTF-8 encoding, case-insensitive matching will perform +// simple case folding, not full case folding. +// // ----------------------------------------------------------------------- // MATCHING INTERFACE: // @@ -244,6 +247,7 @@ ErrorBadCharRange, // bad character class range ErrorMissingBracket, // missing closing ] ErrorMissingParen, // missing closing ) + ErrorUnexpectedParen, // unexpected closing ) ErrorTrailingBackslash, // trailing \ at end of regexp ErrorRepeatArgument, // repeat argument missing, e.g. "*" ErrorRepeatSize, // bad repetition argument diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/regexp.cc new/re2-2020-08-01/re2/regexp.cc --- old/re2-2020-06-01/re2/regexp.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/regexp.cc 2020-07-14 19:26:22.000000000 +0200 @@ -498,6 +498,7 @@ "invalid character class range", "missing ]", "missing )", + "unexpected )", "trailing \\", "no argument for repetition operator", "invalid repetition size", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/regexp.h new/re2-2020-08-01/re2/regexp.h --- old/re2-2020-06-01/re2/regexp.h 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/regexp.h 2020-07-14 19:26:22.000000000 +0200 @@ -177,6 +177,7 @@ kRegexpBadCharRange, // bad character class range kRegexpMissingBracket, // missing closing ] kRegexpMissingParen, // missing closing ) + kRegexpUnexpectedParen, // unexpected closing ) kRegexpTrailingBackslash, // at end of regexp kRegexpRepeatArgument, // repeat argument missing, e.g. "*" kRegexpRepeatSize, // bad repetition argument diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/set.cc new/re2-2020-08-01/re2/set.cc --- old/re2-2020-06-01/re2/set.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/set.cc 2020-07-14 19:26:22.000000000 +0200 @@ -7,6 +7,7 @@ #include <stddef.h> #include <algorithm> #include <memory> +#include <utility> #include "util/util.h" #include "util/logging.h" @@ -18,19 +19,37 @@ namespace re2 { -RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { - options_.Copy(options); +RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) + : options_(options), + anchor_(anchor), + compiled_(false), + size_(0) { options_.set_never_capture(true); // might unblock some optimisations - anchor_ = anchor; - prog_ = NULL; - compiled_ = false; - size_ = 0; } RE2::Set::~Set() { for (size_t i = 0; i < elem_.size(); i++) elem_[i].second->Decref(); - delete prog_; +} + +RE2::Set::Set(Set&& other) + : options_(other.options_), + anchor_(other.anchor_), + elem_(std::move(other.elem_)), + compiled_(other.compiled_), + size_(other.size_), + prog_(std::move(other.prog_)) { + other.elem_.clear(); + other.elem_.shrink_to_fit(); + other.compiled_ = false; + other.size_ = 0; + other.prog_.reset(); +} + +RE2::Set& RE2::Set::operator=(Set&& other) { + this->~Set(); + (void) new (this) Set(std::move(other)); + return *this; } int RE2::Set::Add(const StringPiece& pattern, std::string* error) { @@ -97,9 +116,9 @@ options_.ParseFlags()); re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); - prog_ = Prog::CompileSet(re, anchor_, options_.max_mem()); + prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem())); re->Decref(); - return prog_ != NULL; + return prog_ != nullptr; } bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/set.h new/re2-2020-08-01/re2/set.h --- old/re2-2020-06-01/re2/set.h 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/set.h 2020-07-14 19:26:22.000000000 +0200 @@ -5,6 +5,7 @@ #ifndef RE2_SET_H_ #define RE2_SET_H_ +#include <memory> #include <string> #include <utility> #include <vector> @@ -36,6 +37,13 @@ Set(const RE2::Options& options, RE2::Anchor anchor); ~Set(); + // Not copyable. + Set(const Set&) = delete; + Set& operator=(const Set&) = delete; + // Movable. + Set(Set&& other); + Set& operator=(Set&& other); + // Adds pattern to the set using the options passed to the constructor. // Returns the index that will identify the regexp in the output of Match(), // or -1 if the regexp cannot be parsed. @@ -67,12 +75,9 @@ RE2::Options options_; RE2::Anchor anchor_; std::vector<Elem> elem_; - re2::Prog* prog_; bool compiled_; int size_; - - Set(const Set&) = delete; - Set& operator=(const Set&) = delete; + std::unique_ptr<re2::Prog> prog_; }; } // namespace re2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/simplify.cc new/re2-2020-08-01/re2/simplify.cc --- old/re2-2020-06-01/re2/simplify.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/simplify.cc 2020-07-14 19:26:22.000000000 +0200 @@ -28,8 +28,6 @@ Regexp* sre = re->Simplify(); re->Decref(); if (sre == NULL) { - // Should not happen, since Simplify never fails. - LOG(ERROR) << "Simplify failed on " << src; if (status) { status->set_code(kRegexpInternalError); status->set_error_arg(src); @@ -180,10 +178,20 @@ CoalesceWalker cw; Regexp* cre = cw.Walk(this, NULL); if (cre == NULL) - return cre; + return NULL; + if (cw.stopped_early()) { + cre->Decref(); + return NULL; + } SimplifyWalker sw; Regexp* sre = sw.Walk(cre, NULL); cre->Decref(); + if (sre == NULL) + return NULL; + if (sw.stopped_early()) { + sre->Decref(); + return NULL; + } return sre; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/testing/filtered_re2_test.cc new/re2-2020-08-01/re2/testing/filtered_re2_test.cc --- old/re2-2020-06-01/re2/testing/filtered_re2_test.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/testing/filtered_re2_test.cc 2020-07-14 19:26:22.000000000 +0200 @@ -7,6 +7,7 @@ #include <memory> #include <string> #include <vector> +#include <utility> #include "util/test.h" #include "util/logging.h" @@ -291,4 +292,49 @@ "EmptyStringInStringSetBug", &v)); } +TEST(FilteredRE2Test, MoveSemantics) { + FilterTestVars v1; + int id; + v1.f.Add("foo\\d+", v1.opts, &id); + EXPECT_EQ(0, id); + v1.f.Compile(&v1.atoms); + EXPECT_EQ(1, v1.atoms.size()); + EXPECT_EQ("foo", v1.atoms[0]); + v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); + EXPECT_EQ(1, v1.matches.size()); + EXPECT_EQ(0, v1.matches[0]); + v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); + EXPECT_EQ(0, v1.matches.size()); + + // The moved-to object should do what the moved-from object did. + FilterTestVars v2; + v2.f = std::move(v1.f); + v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches); + EXPECT_EQ(1, v2.matches.size()); + EXPECT_EQ(0, v2.matches[0]); + v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches); + EXPECT_EQ(0, v2.matches.size()); + + // The moved-from object should have been reset and be reusable. + v1.f.Add("bar\\d+", v1.opts, &id); + EXPECT_EQ(0, id); + v1.f.Compile(&v1.atoms); + EXPECT_EQ(1, v1.atoms.size()); + EXPECT_EQ("bar", v1.atoms[0]); + v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); + EXPECT_EQ(0, v1.matches.size()); + v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); + EXPECT_EQ(1, v1.matches.size()); + EXPECT_EQ(0, v1.matches[0]); + + // Verify that "overwriting" works and also doesn't leak memory. + // (The latter will need a leak detector such as LeakSanitizer.) + v1.f = std::move(v2.f); + v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); + EXPECT_EQ(1, v1.matches.size()); + EXPECT_EQ(0, v1.matches[0]); + v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); + EXPECT_EQ(0, v1.matches.size()); +} + } // namespace re2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/testing/re2_test.cc new/re2-2020-08-01/re2/testing/re2_test.cc --- old/re2-2020-06-01/re2/testing/re2_test.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/testing/re2_test.cc 2020-07-14 19:26:22.000000000 +0200 @@ -1277,38 +1277,43 @@ EXPECT_EQ(val, "1,0x2F,030,4,5"); } - // Check that RE2 returns correct regexp pieces on error. // In particular, make sure it returns whole runes // and that it always reports invalid UTF-8. // Also check that Perl error flag piece is big enough. static struct ErrorTest { const char *regexp; - const char *error; + RE2::ErrorCode error_code; + const char *error_arg; } error_tests[] = { - { "ab\\αcd", "\\α" }, - { "ef\\x☺01", "\\x☺0" }, - { "gh\\x1☺01", "\\x1☺" }, - { "ij\\x1", "\\x1" }, - { "kl\\x", "\\x" }, - { "uv\\x{0000☺}", "\\x{0000☺" }, - { "wx\\p{ABC", "\\p{ABC" }, - { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X - { "aa(?sm☺i", "(?sm☺" }, - { "bb[abc", "[abc" }, + { "ab\\αcd", RE2::ErrorBadEscape, "\\α" }, + { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" }, + { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" }, + { "ij\\x1", RE2::ErrorBadEscape, "\\x1" }, + { "kl\\x", RE2::ErrorBadEscape, "\\x" }, + { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" }, + { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" }, + // used to return (?s but the error is X + { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" }, + { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" }, + { "bb[abc", RE2::ErrorMissingBracket, "[abc" }, + { "abc(def", RE2::ErrorMissingParen, "abc(def" }, + { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" }, - { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8 - { "op\377qr", "" }, - { "st\\x{00000\377", "" }, - { "zz\\p{\377}", "" }, - { "zz\\x{00\377}", "" }, - { "zz(?P<name\377>abc)", "" }, + // no argument string returned for invalid UTF-8 + { "mn\\x1\377", RE2::ErrorBadUTF8, "" }, + { "op\377qr", RE2::ErrorBadUTF8, "" }, + { "st\\x{00000\377", RE2::ErrorBadUTF8, "" }, + { "zz\\p{\377}", RE2::ErrorBadUTF8, "" }, + { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" }, + { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" }, }; -TEST(RE2, ErrorArgs) { +TEST(RE2, ErrorCodeAndArg) { for (size_t i = 0; i < arraysize(error_tests); i++) { RE2 re(error_tests[i].regexp, RE2::Quiet); EXPECT_FALSE(re.ok()); - EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error(); + EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error(); + EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error(); } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2020-06-01/re2/testing/set_test.cc new/re2-2020-08-01/re2/testing/set_test.cc --- old/re2-2020-06-01/re2/testing/set_test.cc 2020-05-27 18:34:17.000000000 +0200 +++ new/re2-2020-08-01/re2/testing/set_test.cc 2020-07-14 19:26:22.000000000 +0200 @@ -5,6 +5,7 @@ #include <stddef.h> #include <string> #include <vector> +#include <utility> #include "util/test.h" #include "util/logging.h" @@ -201,4 +202,29 @@ ASSERT_EQ(v[0], 0); } +TEST(Set, MoveSemantics) { + RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED); + ASSERT_EQ(s1.Add("foo\\d+", NULL), 0); + ASSERT_EQ(s1.Compile(), true); + ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true); + ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false); + + // The moved-to object should do what the moved-from object did. + RE2::Set s2 = std::move(s1); + ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true); + ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false); + + // The moved-from object should have been reset and be reusable. + ASSERT_EQ(s1.Add("bar\\d+", NULL), 0); + ASSERT_EQ(s1.Compile(), true); + ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false); + ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true); + + // Verify that "overwriting" works and also doesn't leak memory. + // (The latter will need a leak detector such as LeakSanitizer.) + s1 = std::move(s2); + ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true); + ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false); +} + } // namespace re2
