Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2019-01-11 14:00:15 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new.28833 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Fri Jan 11 14:00:15 2019 rev:20 rq:662780 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2018-10-23 20:34:09.308991279 +0200 +++ /work/SRC/openSUSE:Factory/.re2.new.28833/re2.changes 2019-01-11 14:00:18.252113195 +0100 @@ -1,0 +2,6 @@ +Fri Jan 4 11:42:56 UTC 2019 - [email protected] + +- update to 2019-01-01: + * developer visible changes, performance tweaks and bug fixes + +------------------------------------------------------------------- Old: ---- 2018-10-01.tar.gz New: ---- 2019-01-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.d27vZg/_old 2019-01-11 14:00:18.840112603 +0100 +++ /var/tmp/diff_new_pack.d27vZg/_new 2019-01-11 14:00:18.844112599 +0100 @@ -1,7 +1,7 @@ # # spec file for package re2 # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,7 +16,7 @@ # -%global longver 2018-10-01 +%global longver 2019-01-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-0 Name: re2 ++++++ 2018-10-01.tar.gz -> 2019-01-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/compile.cc new/re2-2019-01-01/re2/compile.cc --- old/re2-2018-10-01/re2/compile.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/compile.cc 2018-12-24 14:14:42.000000000 +0100 @@ -14,6 +14,7 @@ #include <utility> #include "util/logging.h" +#include "util/pod_array.h" #include "util/utf.h" #include "re2/prog.h" #include "re2/re2.h" @@ -236,11 +237,9 @@ Encoding encoding_; // Input encoding bool reversed_; // Should program run backward over text? - int max_inst_; // Maximum number of instructions. - - Prog::Inst* inst_; // Pointer to first instruction. - int inst_len_; // Number of instructions used. - int inst_cap_; // Number of instructions allocated. + PODArray<Prog::Inst> inst_; + int ninst_; // Number of instructions used. + int max_ninst_; // Maximum number of instructions. int64_t max_mem_; // Total memory budget. @@ -258,41 +257,38 @@ failed_ = false; encoding_ = kEncodingUTF8; reversed_ = false; - inst_ = NULL; - inst_len_ = 0; - inst_cap_ = 0; - max_inst_ = 1; // make AllocInst for fail instruction okay + ninst_ = 0; + max_ninst_ = 1; // make AllocInst for fail instruction okay max_mem_ = 0; int fail = AllocInst(1); inst_[fail].InitFail(); - max_inst_ = 0; // Caller must change + max_ninst_ = 0; // Caller must change } Compiler::~Compiler() { delete prog_; - delete[] inst_; } int Compiler::AllocInst(int n) { - if (failed_ || inst_len_ + n > max_inst_) { + if (failed_ || ninst_ + n > max_ninst_) { failed_ = true; return -1; } - if (inst_len_ + n > inst_cap_) { - if (inst_cap_ == 0) - inst_cap_ = 8; - while (inst_len_ + n > inst_cap_) - inst_cap_ *= 2; - Prog::Inst* ip = new Prog::Inst[inst_cap_]; - if (inst_ != NULL) - memmove(ip, inst_, inst_len_ * sizeof ip[0]); - memset(ip + inst_len_, 0, (inst_cap_ - inst_len_) * sizeof ip[0]); - delete[] inst_; - inst_ = ip; + if (ninst_ + n > inst_.size()) { + int cap = inst_.size(); + if (cap == 0) + cap = 8; + while (ninst_ + n > cap) + cap *= 2; + PODArray<Prog::Inst> inst(cap); + if (inst_.data() != NULL) + memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]); + memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]); + inst_ = std::move(inst); } - int id = inst_len_; - inst_len_ += n; + int id = ninst_; + ninst_ += n; return id; } @@ -320,17 +316,18 @@ if (begin->opcode() == kInstNop && a.end.p == (a.begin << 1) && begin->out() == 0) { - PatchList::Patch(inst_, a.end, b.begin); // in case refs to a somewhere + // in case refs to a somewhere + PatchList::Patch(inst_.data(), a.end, b.begin); return b; } // To run backward over string, reverse all concatenations. if (reversed_) { - PatchList::Patch(inst_, b.end, a.begin); + PatchList::Patch(inst_.data(), b.end, a.begin); return Frag(b.begin, a.end); } - PatchList::Patch(inst_, a.end, b.begin); + PatchList::Patch(inst_.data(), a.end, b.begin); return Frag(a.begin, b.end); } @@ -347,7 +344,7 @@ return NoMatch(); inst_[id].InitAlt(a.begin, b.begin); - return Frag(id, PatchList::Append(inst_, a.end, b.end)); + return Frag(id, PatchList::Append(inst_.data(), a.end, b.end)); } // When capturing submatches in like-Perl mode, a kOpAlt Inst @@ -363,7 +360,7 @@ if (id < 0) return NoMatch(); inst_[id].InitAlt(0, 0); - PatchList::Patch(inst_, a.end, id); + PatchList::Patch(inst_.data(), a.end, id); if (nongreedy) { inst_[id].out1_ = a.begin; return Frag(id, PatchList::Mk(id << 1)); @@ -395,7 +392,7 @@ inst_[id].InitAlt(a.begin, 0); pl = PatchList::Mk((id << 1) | 1); } - return Frag(id, PatchList::Append(inst_, pl, a.end)); + return Frag(id, PatchList::Append(inst_.data(), pl, a.end)); } // Returns a fragment for the byte range lo-hi. @@ -443,7 +440,7 @@ return NoMatch(); inst_[id].InitCapture(2*n, a.begin); inst_[id+1].InitCapture(2*n+1, 0); - PatchList::Patch(inst_, a.end, id+1); + PatchList::Patch(inst_.data(), a.end, id+1); return Frag(id, PatchList::Mk((id+1) << 1)); } @@ -477,9 +474,9 @@ int next) { Frag f = ByteRange(lo, hi, foldcase); if (next != 0) { - PatchList::Patch(inst_, f.end, next); + PatchList::Patch(inst_.data(), f.end, next); } else { - rune_range_.end = PatchList::Append(inst_, rune_range_.end, f.end); + rune_range_.end = PatchList::Append(inst_.data(), rune_range_.end, f.end); } return f.begin; } @@ -581,10 +578,10 @@ if (!IsCachedRuneByteSuffix(id)) { // The head should be the instruction most recently allocated, so free it // instead of leaving it unreachable. - DCHECK_EQ(id, inst_len_-1); + DCHECK_EQ(id, ninst_-1); inst_[id].out_opcode_ = 0; inst_[id].out1_ = 0; - inst_len_--; + ninst_--; } out = AddSuffixRecursive(inst_[br].out(), out); @@ -1021,12 +1018,11 @@ if (re->nsub() > 0) { sub = re->sub()[0]->Incref(); if (IsAnchorStart(&sub, depth+1)) { - Regexp** subcopy = new Regexp*[re->nsub()]; + PODArray<Regexp*> subcopy(re->nsub()); subcopy[0] = sub; // already have reference for (int i = 1; i < re->nsub(); i++) subcopy[i] = re->sub()[i]->Incref(); - *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags()); - delete[] subcopy; + *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags()); re->Decref(); return true; } @@ -1069,12 +1065,11 @@ if (re->nsub() > 0) { sub = re->sub()[re->nsub() - 1]->Incref(); if (IsAnchorEnd(&sub, depth+1)) { - Regexp** subcopy = new Regexp*[re->nsub()]; + PODArray<Regexp*> subcopy(re->nsub()); subcopy[re->nsub() - 1] = sub; // already have reference for (int i = 0; i < re->nsub() - 1; i++) subcopy[i] = re->sub()[i]->Incref(); - *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags()); - delete[] subcopy; + *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags()); re->Decref(); return true; } @@ -1106,15 +1101,15 @@ encoding_ = kEncodingLatin1; max_mem_ = max_mem; if (max_mem <= 0) { - max_inst_ = 100000; // more than enough + max_ninst_ = 100000; // more than enough } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) { // No room for anything. - max_inst_ = 0; + max_ninst_ = 0; } else { int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); // Limit instruction count so that inst->id() fits nicely in an int. // SparseArray also assumes that the indices (inst->id()) are ints. - // The call to WalkExponential uses 2*max_inst_ below, + // The call to WalkExponential uses 2*max_ninst_ below, // and other places in the code use 2 or 3 * prog->size(). // Limiting to 2^24 should avoid overflow in those places. // (The point of allowing more than 32 bits of memory is to @@ -1127,7 +1122,7 @@ if (m > Prog::Inst::kMaxInst) m = Prog::Inst::kMaxInst; - max_inst_ = static_cast<int>(m); + max_ninst_ = static_cast<int>(m); } anchor_ = anchor; @@ -1155,7 +1150,7 @@ bool is_anchor_end = IsAnchorEnd(&sre, 0); // Generate fragment for entire regexp. - Frag all = c.WalkExponential(sre, Frag(), 2*c.max_inst_); + Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_); sre->Decref(); if (c.failed_) return NULL; @@ -1192,13 +1187,12 @@ if (prog_->start() == 0 && prog_->start_unanchored() == 0) { // No possible matches; keep Fail instruction only. - inst_len_ = 1; + ninst_ = 1; } // Hand off the array to Prog. - prog_->inst_ = inst_; - prog_->size_ = inst_len_; - inst_ = NULL; + prog_->inst_ = std::move(inst_); + prog_->size_ = ninst_; prog_->Optimize(); prog_->Flatten(); @@ -1241,7 +1235,7 @@ if (sre == NULL) return NULL; - Frag all = c.WalkExponential(sre, Frag(), 2*c.max_inst_); + Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_); sre->Decref(); if (c.failed_) return NULL; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/dfa.cc new/re2-2019-01-01/re2/dfa.cc --- old/re2-2018-10-01/re2/dfa.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/dfa.cc 2018-12-24 14:14:42.000000000 +0100 @@ -39,6 +39,7 @@ #include "util/logging.h" #include "util/mix.h" #include "util/mutex.h" +#include "util/pod_array.h" #include "util/sparse_set.h" #include "util/strutil.h" #include "re2/prog.h" @@ -347,8 +348,7 @@ // Scratch areas, protected by mutex_. Workq* q0_; // Two pre-allocated work queues. Workq* q1_; - int* astack_; // Pre-allocated stack for AddToQueue - int nastack_; + PODArray<int> stack_; // Pre-allocated stack for AddToQueue // State* cache. Many threads use and add to the cache simultaneously, // holding cache_mutex_ for reading and mutex_ (above) when adding. @@ -441,7 +441,6 @@ init_failed_(false), q0_(NULL), q1_(NULL), - astack_(NULL), mem_budget_(max_mem) { if (ExtraDebug) fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str()); @@ -449,16 +448,16 @@ if (kind_ == Prog::kLongestMatch) nmark = prog_->size(); // See DFA::AddToQueue() for why this is so. - nastack_ = prog_->inst_count(kInstCapture) + - prog_->inst_count(kInstEmptyWidth) + - prog_->inst_count(kInstNop) + - nmark + 1; // + 1 for start inst + int nstack = prog_->inst_count(kInstCapture) + + prog_->inst_count(kInstEmptyWidth) + + prog_->inst_count(kInstNop) + + nmark + 1; // + 1 for start inst - // Account for space needed for DFA, q0, q1, astack. + // Account for space needed for DFA, q0, q1, stack. mem_budget_ -= sizeof(DFA); mem_budget_ -= (prog_->size() + nmark) * (sizeof(int)+sizeof(int)) * 2; // q0, q1 - mem_budget_ -= nastack_ * sizeof(int); // astack + mem_budget_ -= nstack * sizeof(int); // stack if (mem_budget_ < 0) { init_failed_ = true; return; @@ -482,13 +481,12 @@ q0_ = new Workq(prog_->size(), nmark); q1_ = new Workq(prog_->size(), nmark); - astack_ = new int[nastack_]; + stack_ = PODArray<int>(nstack); } DFA::~DFA() { delete q0_; delete q1_; - delete[] astack_; ClearCache(); } @@ -826,7 +824,7 @@ // Adds ip to the work queue, following empty arrows according to flag. void DFA::AddToQueue(Workq* q, int id, uint32_t flag) { - // Use astack_ to hold our stack of instructions yet to process. + // Use stack_ to hold our stack of instructions yet to process. // It was preallocated as follows: // one entry per Capture; // one entry per EmptyWidth; and @@ -835,12 +833,12 @@ // perform. (Each instruction can be processed at most once.) // When using marks, we also added nmark == prog_->size(). // (Otherwise, nmark == 0.) - int* stk = astack_; + int* stk = stack_.data(); int nstk = 0; stk[nstk++] = id; while (nstk > 0) { - DCHECK_LE(nstk, nastack_); + DCHECK_LE(nstk, stack_.size()); id = stk[--nstk]; Loop: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/fuzzing/re2_fuzzer.cc new/re2-2019-01-01/re2/fuzzing/re2_fuzzer.cc --- old/re2-2018-10-01/re2/fuzzing/re2_fuzzer.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/fuzzing/re2_fuzzer.cc 2018-12-24 14:14:42.000000000 +0100 @@ -5,8 +5,11 @@ #include <stddef.h> #include <stdint.h> #include <map> +#include <memory> +#include <queue> #include <string> +#include "re2/prefilter.h" #include "re2/re2.h" using re2::StringPiece; @@ -21,36 +24,80 @@ return; // Don't waste time fuzzing high-size programs. - // (They can cause bug reports due to fuzzer timeouts.) + // They can cause bug reports due to fuzzer timeouts. int size = re.ProgramSize(); if (size > 9999) return; + int rsize = re.ReverseProgramSize(); + if (rsize > 9999) + return; // Don't waste time fuzzing high-fanout programs. - // (They can also cause bug reports due to fuzzer timeouts.) + // They can cause bug reports due to fuzzer timeouts. std::map<int, int> histogram; int fanout = re.ProgramFanout(&histogram); if (fanout > 9) return; + int rfanout = re.ReverseProgramFanout(&histogram); + if (rfanout > 9) + return; + + // Don't waste time fuzzing programs with large substrings. + // They can cause bug reports due to fuzzer timeouts when they + // are repetitions (e.g. hundreds of NUL bytes) and matching is + // unanchored. And they aren't interesting for fuzzing purposes. + std::unique_ptr<re2::Prefilter> prefilter(re2::Prefilter::FromRE2(&re)); + if (prefilter == nullptr) + return; + std::queue<re2::Prefilter*> nodes; + nodes.push(prefilter.get()); + while (!nodes.empty()) { + re2::Prefilter* node = nodes.front(); + nodes.pop(); + if (node->op() == re2::Prefilter::ATOM) { + if (node->atom().size() > 9) + return; + } else if (node->op() == re2::Prefilter::AND || + node->op() == re2::Prefilter::OR) { + for (re2::Prefilter* sub : *node->subs()) + nodes.push(sub); + } + } + + if (re.NumberOfCapturingGroups() == 0) { + // Avoid early return due to too many arguments. + StringPiece sp = text; + RE2::FullMatch(sp, re); + RE2::PartialMatch(sp, re); + RE2::Consume(&sp, re); + sp = text; // Reset. + RE2::FindAndConsume(&sp, re); + } else { + // Okay, we have at least one capturing group... + // Try conversion for variously typed arguments. + StringPiece sp = text; + short s; + RE2::FullMatch(sp, re, &s); + long l; + RE2::PartialMatch(sp, re, &l); + float f; + RE2::Consume(&sp, re, &f); + sp = text; // Reset. + double d; + RE2::FindAndConsume(&sp, re, &d); + } + + string s = string(text); + RE2::Replace(&s, re, ""); + s = string(text); // Reset. + RE2::GlobalReplace(&s, re, ""); - StringPiece sp1, sp2, sp3, sp4; - string s1, s2, s3, s4; - int i1, i2, i3, i4; - double d1, d2, d3, d4; - - RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4); - RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4); - - sp1 = sp2 = text; - RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4); - RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4); - - s3 = s4 = string(text); - RE2::Replace(&s3, re, ""); - RE2::GlobalReplace(&s4, re, ""); + string min, max; + re.PossibleMatchRange(&min, &max, /*maxlen=*/9); // Exercise some other API functionality. - dummy += re.NumberOfCapturingGroups(); + dummy += re.NamedCapturingGroups().size(); + dummy += re.CapturingGroupNames().size(); dummy += RE2::QuoteMeta(pattern).size(); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/nfa.cc new/re2-2019-01-01/re2/nfa.cc --- old/re2-2018-10-01/re2/nfa.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/nfa.cc 2018-12-24 14:14:42.000000000 +0100 @@ -34,6 +34,7 @@ #include "re2/prog.h" #include "re2/regexp.h" #include "util/logging.h" +#include "util/pod_array.h" #include "util/sparse_array.h" #include "util/sparse_set.h" #include "util/strutil.h" @@ -75,13 +76,6 @@ struct AddState { int id; // Inst to process Thread* t; // if not null, set t0 = t before processing id - - AddState() - : id(0), t(NULL) {} - explicit AddState(int id) - : id(id), t(NULL) {} - AddState(int id, Thread* t) - : id(id), t(t) {} }; // Threadq is a list of threads. The list is sorted by the order @@ -95,40 +89,38 @@ // Follows all empty arrows from id0 and enqueues all the states reached. // Enqueues only the ByteRange instructions that match byte c. - // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. + // context is used (with p) for evaluating empty-width specials. // p is the current input position, and t0 is the current thread. - void AddToThreadq(Threadq* q, int id0, int c, int flag, + void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, const char* p, Thread* t0); // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. + // context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. - // flag is the bitwise OR of Bol, Eol, etc., specifying whether - // ^, $ and \b match the current input position (after c). // Frees all the threads on runq. // If there is a shortcut to the end, returns that shortcut. - inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p); + int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, + const char* p); // Returns text version of capture information, for debugging. string FormatCapture(const char** capture); inline void CopyCapture(const char** dst, const char** src); - Prog* prog_; // underlying program - int start_; // start instruction in program - int ncapture_; // number of submatches to track - bool longest_; // whether searching for longest match - bool endmatch_; // whether match must end at text.end() - const char* btext_; // beginning of text being matched (for FormatSubmatch) - const char* etext_; // end of text being matched (for endmatch_) - Threadq q0_, q1_; // pre-allocated for Search. - const char** match_; // best match so far - bool matched_; // any match so far? - AddState* astack_; // pre-allocated for AddToThreadq - int nastack_; - - Thread* free_threads_; // free list + Prog* prog_; // underlying program + int start_; // start instruction in program + int ncapture_; // number of submatches to track + bool longest_; // whether searching for longest match + bool endmatch_; // whether match must end at text.end() + const char* btext_; // beginning of text being matched (for FormatSubmatch) + const char* etext_; // end of text being matched (for endmatch_) + Threadq q0_, q1_; // pre-allocated for Search. + PODArray<AddState> stack_; // pre-allocated for AddToThreadq + Thread* free_threads_; // free list + const char** match_; // best match so far + bool matched_; // any match so far? NFA(const NFA&) = delete; NFA& operator=(const NFA&) = delete; @@ -145,18 +137,17 @@ q0_.resize(prog_->size()); q1_.resize(prog_->size()); // See NFA::AddToThreadq() for why this is so. - nastack_ = 2*prog_->inst_count(kInstCapture) + - prog_->inst_count(kInstEmptyWidth) + - prog_->inst_count(kInstNop) + 1; // + 1 for start inst - astack_ = new AddState[nastack_]; + int nstack = 2*prog_->inst_count(kInstCapture) + + prog_->inst_count(kInstEmptyWidth) + + prog_->inst_count(kInstNop) + 1; // + 1 for start inst + stack_ = PODArray<AddState>(nstack); + free_threads_ = NULL; match_ = NULL; matched_ = false; - free_threads_ = NULL; } NFA::~NFA() { delete[] match_; - delete[] astack_; Thread* next; for (Thread* t = free_threads_; t; t = next) { next = t->next; @@ -204,26 +195,26 @@ // Follows all empty arrows from id0 and enqueues all the states reached. // Enqueues only the ByteRange instructions that match byte c. -// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. +// context is used (with p) for evaluating empty-width specials. // p is the current input position, and t0 is the current thread. -void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, +void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, const char* p, Thread* t0) { if (id0 == 0) return; - // Use astack_ to hold our stack of instructions yet to process. + // Use stack_ to hold our stack of instructions yet to process. // It was preallocated as follows: // two entries per Capture; // one entry per EmptyWidth; and // one entry per Nop. // This reflects the maximum number of stack pushes that each can // perform. (Each instruction can be processed at most once.) - AddState* stk = astack_; + AddState* stk = stack_.data(); int nstk = 0; - stk[nstk++] = AddState(id0); + stk[nstk++] = {id0, NULL}; while (nstk > 0) { - DCHECK_LE(nstk, nastack_); + DCHECK_LE(nstk, stack_.size()); AddState a = stk[--nstk]; Loop: @@ -266,25 +257,25 @@ *tp = t; DCHECK(!ip->last()); - a = AddState(id+1); + a = {id+1, NULL}; goto Loop; case kInstNop: if (!ip->last()) - stk[nstk++] = AddState(id+1); + stk[nstk++] = {id+1, NULL}; // Continue on. - a = AddState(ip->out()); + a = {ip->out(), NULL}; goto Loop; case kInstCapture: if (!ip->last()) - stk[nstk++] = AddState(id+1); + stk[nstk++] = {id+1, NULL}; if ((j=ip->cap()) < ncapture_) { // Push a dummy whose only job is to restore t0 // once we finish exploring this possibility. - stk[nstk++] = AddState(0, t0); + stk[nstk++] = {0, t0}; // Record capture. t = AllocThread(); @@ -292,7 +283,7 @@ t->capture[j] = p; t0 = t; } - a = AddState(ip->out()); + a = {ip->out(), NULL}; goto Loop; case kInstByteRange: @@ -310,17 +301,17 @@ Next: if (ip->last()) break; - a = AddState(id+1); + a = {id+1, NULL}; goto Loop; case kInstEmptyWidth: if (!ip->last()) - stk[nstk++] = AddState(id+1); + stk[nstk++] = {id+1, NULL}; // Continue on if we have all the right flag bits. - if (ip->empty() & ~flag) + if (ip->empty() & ~Prog::EmptyFlags(context, p)) break; - a = AddState(ip->out()); + a = {ip->out(), NULL}; goto Loop; } } @@ -328,13 +319,13 @@ // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. +// context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. -// flag is the bitwise OR of Bol, Eol, etc., specifying whether -// ^, $ and \b match the current input position (after c). // Frees all the threads on runq. // If there is a shortcut to the end, returns that shortcut. -int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { +int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, + const char* p) { nextq->clear(); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { @@ -360,7 +351,7 @@ break; case kInstByteRange: - AddToThreadq(nextq, ip->out(), c, flag, p, t); + AddToThreadq(nextq, ip->out(), c, context, p, t); break; case kInstAltMatch: @@ -500,38 +491,9 @@ runq->clear(); nextq->clear(); memset(&match_[0], 0, ncapture_*sizeof match_[0]); - int wasword = 0; - - if (text.begin() > context.begin()) - wasword = Prog::IsWordChar(text.begin()[-1] & 0xFF); // Loop over the text, stepping the machine. for (const char* p = text.begin();; p++) { - // Check for empty-width specials. - int flag = 0; - - // ^ and \A - if (p == context.begin()) - flag |= kEmptyBeginText | kEmptyBeginLine; - else if (p <= context.end() && p[-1] == '\n') - flag |= kEmptyBeginLine; - - // $ and \z - if (p == context.end()) - flag |= kEmptyEndText | kEmptyEndLine; - else if (p < context.end() && p[0] == '\n') - flag |= kEmptyEndLine; - - // \b and \B - int isword = 0; - if (p < context.end()) - isword = Prog::IsWordChar(p[0] & 0xFF); - - if (isword != wasword) - flag |= kEmptyWordBoundary; - else - flag |= kEmptyNonWordBoundary; - if (ExtraDebug) { int c = 0; if (p == context.begin()) @@ -541,7 +503,7 @@ else if (p < text.end()) c = p[0] & 0xFF; - fprintf(stderr, "%c[%#x/%d/%d]:", c, flag, isword, wasword); + fprintf(stderr, "%c:", c); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { Thread* t = i->second; if (t == NULL) @@ -552,7 +514,7 @@ } // This is a no-op the first time around the loop because runq is empty. - int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p); + int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p); DCHECK_EQ(runq->size(), 0); using std::swap; swap(nextq, runq); @@ -604,17 +566,14 @@ p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p)); if (p == NULL) { p = text.end(); - isword = 0; - } else { - isword = Prog::IsWordChar(p[0] & 0xFF); } - flag = Prog::EmptyFlags(context, p); } Thread* t = AllocThread(); CopyCapture(t->capture, match_); t->capture[0] = p; - AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, flag, p, t); + AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p, + t); Decref(t); } @@ -624,8 +583,6 @@ fprintf(stderr, "dead\n"); break; } - - wasword = isword; } for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/onepass.cc new/re2-2019-01-01/re2/onepass.cc --- old/re2-2018-10-01/re2/onepass.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/onepass.cc 2018-12-24 14:14:42.000000000 +0100 @@ -59,6 +59,7 @@ #include "util/util.h" #include "util/logging.h" +#include "util/pod_array.h" #include "util/sparse_set.h" #include "util/strutil.h" #include "util/utf.h" @@ -403,11 +404,11 @@ int stacksize = inst_count(kInstCapture) + inst_count(kInstEmptyWidth) + inst_count(kInstNop) + 1; // + 1 for start inst - InstCond* stack = new InstCond[stacksize]; + PODArray<InstCond> stack(stacksize); int size = this->size(); - int* nodebyid = new int[size]; // indexed by ip - memset(nodebyid, 0xFF, size*sizeof nodebyid[0]); + PODArray<int> nodebyid(size); // indexed by ip + memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]); // Originally, nodes was a uint8_t[maxnodes*statesize], but that was // unnecessarily optimistic: why allocate a large amount of memory @@ -613,14 +614,9 @@ dfa_mem_ -= nalloc*statesize; onepass_nodes_ = new uint8_t[nalloc*statesize]; memmove(onepass_nodes_, nodes.data(), nalloc*statesize); - - delete[] stack; - delete[] nodebyid; return true; fail: - delete[] stack; - delete[] nodebyid; return false; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/parse.cc new/re2-2019-01-01/re2/parse.cc --- old/re2-2018-10-01/re2/parse.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/parse.cc 2018-12-24 14:14:42.000000000 +0100 @@ -27,6 +27,7 @@ #include "util/util.h" #include "util/logging.h" +#include "util/pod_array.h" #include "util/strutil.h" #include "util/utf.h" #include "re2/regexp.h" @@ -1217,7 +1218,7 @@ return; // Construct op (alternation or concatenation), flattening op of op. - Regexp** subs = new Regexp*[n]; + PODArray<Regexp*> subs(n); next = NULL; int i = n; for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { @@ -1232,8 +1233,7 @@ } } - Regexp* re = ConcatOrAlternate(op, subs, n, flags_, true); - delete[] subs; + Regexp* re = ConcatOrAlternate(op, subs.data(), n, flags_, true); re->simple_ = re->ComputeSimple(); re->down_ = next; stacktop_ = re; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/prefilter.cc new/re2-2019-01-01/re2/prefilter.cc --- old/re2-2018-10-01/re2/prefilter.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/prefilter.cc 2018-12-24 14:14:42.000000000 +0100 @@ -214,7 +214,7 @@ static Info* Quest(Info* a); static Info* EmptyString(); static Info* NoMatch(); - static Info* AnyChar(); + static Info* AnyCharOrAnyByte(); static Info* CClass(CharClass* cc, bool latin1); static Info* Literal(Rune r); static Info* LiteralLatin1(Rune r); @@ -417,8 +417,8 @@ return info; } -// Constructs Info for dot (any character). -Prefilter::Info* Prefilter::Info::AnyChar() { +// Constructs Info for dot (any character) or \C (any byte). +Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() { Prefilter::Info* info = new Prefilter::Info(); info->match_ = new Prefilter(ALL); return info; @@ -461,7 +461,7 @@ // If the class is too large, it's okay to overestimate. if (cc->size() > 10) - return AnyChar(); + return AnyCharOrAnyByte(); Prefilter::Info *a = new Prefilter::Info(); for (CCIter i = cc->begin(); i != cc->end(); ++i) @@ -622,8 +622,9 @@ break; case kRegexpAnyChar: + case kRegexpAnyByte: // Claim nothing, except that it's not empty. - info = AnyChar(); + info = AnyCharOrAnyByte(); break; case kRegexpCharClass: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/prefilter_tree.cc new/re2-2019-01-01/re2/prefilter_tree.cc --- old/re2-2018-10-01/re2/prefilter_tree.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/prefilter_tree.cc 2018-12-24 14:14:42.000000000 +0100 @@ -138,6 +138,7 @@ return false; case Prefilter::ALL: + case Prefilter::NONE: return false; case Prefilter::ATOM: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/prog.cc new/re2-2019-01-01/re2/prog.cc --- old/re2-2018-10-01/re2/prog.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/prog.cc 2018-12-24 14:14:42.000000000 +0100 @@ -112,7 +112,6 @@ first_byte_(-1), flags_(0), list_count_(0), - inst_(NULL), onepass_nodes_(NULL), dfa_mem_(0), dfa_first_(NULL), @@ -123,7 +122,6 @@ DeleteDFA(dfa_longest_); DeleteDFA(dfa_first_); delete[] onepass_nodes_; - delete[] inst_; } typedef SparseSet Workq; @@ -628,9 +626,8 @@ // Finally, replace the old instructions with the new instructions. size_ = static_cast<int>(flat.size()); - delete[] inst_; - inst_ = new Inst[size_]; - memmove(inst_, flat.data(), size_ * sizeof *inst_); + inst_ = PODArray<Inst>(size_); + memmove(inst_.data(), flat.data(), size_*sizeof(inst_[0])); } void Prog::MarkSuccessors(SparseArray<int>* rootmap, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/prog.h new/re2-2019-01-01/re2/prog.h --- old/re2-2018-10-01/re2/prog.h 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/prog.h 2018-12-24 14:14:42.000000000 +0100 @@ -18,6 +18,7 @@ #include "util/util.h" #include "util/logging.h" +#include "util/pod_array.h" #include "util/sparse_array.h" #include "util/sparse_set.h" #include "re2/re2.h" @@ -77,7 +78,7 @@ void InitFail(); // Getters - int id(Prog* p) { return static_cast<int>(this - p->inst_); } + int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); } InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); } int last() { return (out_opcode_>>3)&1; } int out() { return out_opcode_>>4; } @@ -395,7 +396,7 @@ int list_count_; // count of lists (see above) int inst_count_[kNumInst]; // count of instructions by opcode - Inst* inst_; // pointer to instruction array + PODArray<Inst> inst_; // pointer to instruction array uint8_t* onepass_nodes_; // data for OnePass nodes int64_t dfa_mem_; // Maximum memory for DFAs. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/re2.cc new/re2-2019-01-01/re2/re2.cc --- old/re2-2018-10-01/re2/re2.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/re2.cc 2018-12-24 14:14:42.000000000 +0100 @@ -177,10 +177,10 @@ entire_regexp_ = NULL; suffix_regexp_ = NULL; prog_ = NULL; + num_captures_ = -1; rprog_ = NULL; error_ = empty_string; error_code_ = NoError; - num_captures_ = -1; named_groups_ = NULL; group_names_ = NULL; @@ -218,6 +218,11 @@ return; } + // We used to compute this lazily, but it's used during the + // typical control flow for a match call, so we now compute + // it eagerly, which avoids the overhead of std::once_flag. + num_captures_ = suffix_regexp_->NumCaptures(); + // Could delay this until the first match call that // cares about submatch information, but the one-pass // machine's memory gets cut from the DFA memory budget, @@ -262,11 +267,18 @@ return prog_->size(); } -int RE2::ProgramFanout(std::map<int, int>* histogram) const { +int RE2::ReverseProgramSize() const { if (prog_ == NULL) return -1; - SparseArray<int> fanout(prog_->size()); - prog_->Fanout(&fanout); + Prog* prog = ReverseProg(); + if (prog == NULL) + return -1; + return prog->size(); +} + +static int Fanout(Prog* prog, std::map<int, int>* histogram) { + SparseArray<int> fanout(prog->size()); + prog->Fanout(&fanout); histogram->clear(); for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) { // TODO(junyer): Optimise this? @@ -279,14 +291,19 @@ return histogram->rbegin()->first; } -// Returns num_captures_, computing it if needed, or -1 if the -// regexp wasn't valid on construction. -int RE2::NumberOfCapturingGroups() const { - std::call_once(num_captures_once_, [](const RE2* re) { - if (re->suffix_regexp_ != NULL) - re->num_captures_ = re->suffix_regexp_->NumCaptures(); - }, this); - return num_captures_; +int RE2::ProgramFanout(std::map<int, int>* histogram) const { + if (prog_ == NULL) + return -1; + return Fanout(prog_, histogram); +} + +int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const { + if (prog_ == NULL) + return -1; + Prog* prog = ReverseProg(); + if (prog == NULL) + return -1; + return Fanout(prog, histogram); } // Returns named_groups_, computing it if needed. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/re2.h new/re2-2019-01-01/re2/re2.h --- old/re2-2018-10-01/re2/re2.h 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/re2.h 2018-12-24 14:14:42.000000000 +0100 @@ -53,9 +53,19 @@ // CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1))); // // ----------------------------------------------------------------------- -// MATCHING WITH SUB-STRING EXTRACTION: +// MATCHING WITH SUBSTRING EXTRACTION: // -// You can supply extra pointer arguments to extract matched subpieces. +// You can supply extra pointer arguments to extract matched substrings. +// On match failure, none of the pointees will have been modified. +// On match success, the substrings will be converted (as necessary) and +// their values will be assigned to their pointees until all conversions +// have succeeded or one conversion has failed. +// On conversion failure, the pointees will be in an indeterminate state +// because the caller has no way of knowing which conversion failed. +// However, conversion cannot fail for types like string and StringPiece +// that do not inspect the substring contents. Hence, in the common case +// where all of the pointees are of such types, failure is always due to +// match failure and thus none of the pointees will have been modified. // // Example: extracts "ruby" into "s" and 1234 into "i" // int i; @@ -278,11 +288,13 @@ // Returns the program size, a very approximate measure of a regexp's "cost". // Larger numbers are more expensive than smaller numbers. int ProgramSize() const; + int ReverseProgramSize() const; // EXPERIMENTAL! SUBJECT TO CHANGE! // Outputs the program fanout as a histogram bucketed by powers of 2. // Returns the number of the largest non-empty bucket. int ProgramFanout(std::map<int, int>* histogram) const; + int ReverseProgramFanout(std::map<int, int>* histogram) const; // Returns the underlying Regexp; not for general use. // Returns entire_regexp_ so that callers don't need @@ -467,7 +479,7 @@ // Return the number of capturing subpatterns, or -1 if the // regexp wasn't valid on construction. The overall match ($0) // does not count: if the regexp is "(a)(b)", returns 2. - int NumberOfCapturingGroups() const; + int NumberOfCapturingGroups() const { return num_captures_; } // Return a map from names to capturing indices. // The map records the index of the leftmost group @@ -489,6 +501,7 @@ // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar", // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL. + // Caveat: submatch[] may be clobbered even on match failure. // // Don't ask for more match information than you will use: // runs much faster with nsubmatch == 1 than nsubmatch > 1, and @@ -731,6 +744,7 @@ re2::Regexp* entire_regexp_; // parsed regular expression re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed re2::Prog* prog_; // compiled program for regexp + int num_captures_; // Number of capturing groups bool is_one_pass_; // can use prog_->SearchOnePass? mutable re2::Prog* rprog_; // reverse program for regexp @@ -738,7 +752,6 @@ // (or points to empty string) mutable ErrorCode error_code_; // Error code mutable string error_arg_; // Fragment of regexp showing error - mutable int num_captures_; // Number of capturing groups // Map from capture names to indices mutable const std::map<string, int>* named_groups_; @@ -748,7 +761,6 @@ // Onces for lazy computations. mutable std::once_flag rprog_once_; - mutable std::once_flag num_captures_once_; mutable std::once_flag named_groups_once_; mutable std::once_flag group_names_once_; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/set.cc new/re2-2019-01-01/re2/set.cc --- old/re2-2018-10-01/re2/set.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/set.cc 2018-12-24 14:14:42.000000000 +0100 @@ -10,6 +10,7 @@ #include "util/util.h" #include "util/logging.h" +#include "util/pod_array.h" #include "re2/stringpiece.h" #include "re2/prog.h" #include "re2/re2.h" @@ -55,13 +56,12 @@ re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); if (re->op() == kRegexpConcat) { int nsub = re->nsub(); - re2::Regexp** sub = new re2::Regexp*[nsub + 1]; + PODArray<re2::Regexp*> sub(nsub + 1); for (int i = 0; i < nsub; i++) sub[i] = re->sub()[i]->Incref(); sub[nsub] = m; re->Decref(); - re = re2::Regexp::Concat(sub, nsub + 1, pf); - delete[] sub; + re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); } else { re2::Regexp* sub[2]; sub[0] = re; @@ -87,16 +87,15 @@ return a.first < b.first; }); - re2::Regexp** sub = new re2::Regexp*[size_]; - for (size_t i = 0; i < elem_.size(); i++) + PODArray<re2::Regexp*> sub(size_); + for (int i = 0; i < size_; i++) sub[i] = elem_[i].second; elem_.clear(); elem_.shrink_to_fit(); Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( options_.ParseFlags()); - re2::Regexp* re = re2::Regexp::Alternate(sub, size_, pf); - delete[] sub; + re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); prog_ = Prog::CompileSet(re, anchor_, options_.max_mem()); re->Decref(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/simplify.cc new/re2-2019-01-01/re2/simplify.cc --- old/re2-2018-10-01/re2/simplify.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/simplify.cc 2018-12-24 14:14:42.000000000 +0100 @@ -10,6 +10,7 @@ #include "util/util.h" #include "util/logging.h" +#include "util/pod_array.h" #include "util/utf.h" #include "re2/regexp.h" #include "re2/walker-inl.h" @@ -589,13 +590,11 @@ return Regexp::Plus(re->Incref(), f); // General case: x{4,} is xxxx+ - Regexp** nre_subs = new Regexp*[min]; + PODArray<Regexp*> nre_subs(min); for (int i = 0; i < min-1; i++) nre_subs[i] = re->Incref(); nre_subs[min-1] = Regexp::Plus(re->Incref(), f); - Regexp* nre = Regexp::Concat(nre_subs, min, f); - delete[] nre_subs; - return nre; + return Regexp::Concat(nre_subs.data(), min, f); } // Special case: (x){0} matches only empty string. @@ -613,11 +612,10 @@ // Build leading prefix: xx. Capturing only on the last one. Regexp* nre = NULL; if (min > 0) { - Regexp** nre_subs = new Regexp*[min]; + PODArray<Regexp*> nre_subs(min); for (int i = 0; i < min; i++) nre_subs[i] = re->Incref(); - nre = Regexp::Concat(nre_subs, min, f); - delete[] nre_subs; + nre = Regexp::Concat(nre_subs.data(), min, f); } // Build and attach suffix: (x(x(x)?)?)? diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/re2/testing/re2_test.cc new/re2-2019-01-01/re2/testing/re2_test.cc --- old/re2-2018-10-01/re2/testing/re2_test.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/re2/testing/re2_test.cc 2018-12-24 14:14:42.000000000 +0100 @@ -461,6 +461,10 @@ ASSERT_GT(re_simple.ProgramSize(), 0); ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); + + ASSERT_GT(re_simple.ReverseProgramSize(), 0); + ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize()); + ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize()); } TEST(ProgramFanout, BigProgram) { @@ -486,6 +490,23 @@ // 13 is the largest non-empty bucket and has 1000 elements. ASSERT_EQ(13, re1000.ProgramFanout(&histogram)); ASSERT_EQ(1000, histogram[13]); + + // 2 is the largest non-empty bucket and has 3 elements. + // This differs from the others due to how reverse `.' works. + ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram)); + ASSERT_EQ(3, histogram[2]); + + // 5 is the largest non-empty bucket and has 10 elements. + ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram)); + ASSERT_EQ(10, histogram[5]); + + // 9 is the largest non-empty bucket and has 100 elements. + ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram)); + ASSERT_EQ(100, histogram[9]); + + // 12 is the largest non-empty bucket and has 1000 elements. + ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram)); + ASSERT_EQ(1000, histogram[12]); } // Issue 956519: handling empty character sets was diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/util/benchmark.cc new/re2-2019-01-01/util/benchmark.cc --- old/re2-2018-10-01/util/benchmark.cc 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/util/benchmark.cc 2018-12-24 14:14:42.000000000 +0100 @@ -7,6 +7,7 @@ #include <stdlib.h> #include <algorithm> #include <chrono> +#include <thread> #include "util/util.h" #include "util/flags.h" @@ -67,7 +68,7 @@ } int NumCPUs() { - return 1; + return static_cast<int>(std::thread::hardware_concurrency()); } static void runN(Benchmark *b, int n, int siz) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-10-01/util/pcre.h new/re2-2019-01-01/util/pcre.h --- old/re2-2018-10-01/util/pcre.h 2018-09-28 12:00:36.000000000 +0200 +++ new/re2-2019-01-01/util/pcre.h 2018-12-24 14:14:42.000000000 +0100 @@ -61,9 +61,9 @@ // CHECK(PCRE::FullMatch(utf8_string, re)); // // ----------------------------------------------------------------------- -// MATCHING WITH SUB-STRING EXTRACTION: +// MATCHING WITH SUBSTRING EXTRACTION: // -// You can supply extra pointer arguments to extract matched subpieces. +// You can supply extra pointer arguments to extract matched substrings. // // Example: extracts "ruby" into "s" and 1234 into "i" // int i;
