commit re2 for openSUSE:Factory

root Fri, 11 Jan 2019 05:00:47 -0800

Hello community,

here is the log from the commit of package re2 for openSUSE:Factory checked in 
at 2019-01-11 14:00:15
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/re2 (Old)
 and      /work/SRC/openSUSE:Factory/.re2.new.28833 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "re2"

Fri Jan 11 14:00:15 2019 rev:20 rq:662780 version:MACRO

Changes:
--------
--- /work/SRC/openSUSE:Factory/re2/re2.changes  2018-10-23 20:34:09.308991279 
+0200
+++ /work/SRC/openSUSE:Factory/.re2.new.28833/re2.changes       2019-01-11 
14:00:18.252113195 +0100
@@ -1,0 +2,6 @@
+Fri Jan  4 11:42:56 UTC 2019 - [email protected]
+
+- update to 2019-01-01:
+  * developer visible changes, performance tweaks and bug fixes
+
+-------------------------------------------------------------------

Old:
----
  2018-10-01.tar.gz

New:
----
  2019-01-01.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ re2.spec ++++++
--- /var/tmp/diff_new_pack.d27vZg/_old  2019-01-11 14:00:18.840112603 +0100
+++ /var/tmp/diff_new_pack.d27vZg/_new  2019-01-11 14:00:18.844112599 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package re2
 #
-# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -16,7 +16,7 @@
 #
 
 
-%global longver 2018-10-01
+%global longver 2019-01-01
 %global shortver %(echo %{longver}|sed 's|-||g')
 %define libname libre2-0
 Name:           re2

++++++ 2018-10-01.tar.gz -> 2019-01-01.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/compile.cc 
new/re2-2019-01-01/re2/compile.cc
--- old/re2-2018-10-01/re2/compile.cc   2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/compile.cc   2018-12-24 14:14:42.000000000 +0100
@@ -14,6 +14,7 @@
 #include <utility>
 
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/utf.h"
 #include "re2/prog.h"
 #include "re2/re2.h"
@@ -236,11 +237,9 @@
   Encoding encoding_;  // Input encoding
   bool reversed_;      // Should program run backward over text?
 
-  int max_inst_;       // Maximum number of instructions.
-
-  Prog::Inst* inst_;   // Pointer to first instruction.
-  int inst_len_;       // Number of instructions used.
-  int inst_cap_;       // Number of instructions allocated.
+  PODArray<Prog::Inst> inst_;
+  int ninst_;          // Number of instructions used.
+  int max_ninst_;      // Maximum number of instructions.
 
   int64_t max_mem_;    // Total memory budget.
 
@@ -258,41 +257,38 @@
   failed_ = false;
   encoding_ = kEncodingUTF8;
   reversed_ = false;
-  inst_ = NULL;
-  inst_len_ = 0;
-  inst_cap_ = 0;
-  max_inst_ = 1;  // make AllocInst for fail instruction okay
+  ninst_ = 0;
+  max_ninst_ = 1;  // make AllocInst for fail instruction okay
   max_mem_ = 0;
   int fail = AllocInst(1);
   inst_[fail].InitFail();
-  max_inst_ = 0;  // Caller must change
+  max_ninst_ = 0;  // Caller must change
 }
 
 Compiler::~Compiler() {
   delete prog_;
-  delete[] inst_;
 }
 
 int Compiler::AllocInst(int n) {
-  if (failed_ || inst_len_ + n > max_inst_) {
+  if (failed_ || ninst_ + n > max_ninst_) {
     failed_ = true;
     return -1;
   }
 
-  if (inst_len_ + n > inst_cap_) {
-    if (inst_cap_ == 0)
-      inst_cap_ = 8;
-    while (inst_len_ + n > inst_cap_)
-      inst_cap_ *= 2;
-    Prog::Inst* ip = new Prog::Inst[inst_cap_];
-    if (inst_ != NULL)
-      memmove(ip, inst_, inst_len_ * sizeof ip[0]);
-    memset(ip + inst_len_, 0, (inst_cap_ - inst_len_) * sizeof ip[0]);
-    delete[] inst_;
-    inst_ = ip;
+  if (ninst_ + n > inst_.size()) {
+    int cap = inst_.size();
+    if (cap == 0)
+      cap = 8;
+    while (ninst_ + n > cap)
+      cap *= 2;
+    PODArray<Prog::Inst> inst(cap);
+    if (inst_.data() != NULL)
+      memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]);
+    memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]);
+    inst_ = std::move(inst);
   }
-  int id = inst_len_;
-  inst_len_ += n;
+  int id = ninst_;
+  ninst_ += n;
   return id;
 }
 
@@ -320,17 +316,18 @@
   if (begin->opcode() == kInstNop &&
       a.end.p == (a.begin << 1) &&
       begin->out() == 0) {
-    PatchList::Patch(inst_, a.end, b.begin);  // in case refs to a somewhere
+    // in case refs to a somewhere
+    PatchList::Patch(inst_.data(), a.end, b.begin);
     return b;
   }
 
   // To run backward over string, reverse all concatenations.
   if (reversed_) {
-    PatchList::Patch(inst_, b.end, a.begin);
+    PatchList::Patch(inst_.data(), b.end, a.begin);
     return Frag(b.begin, a.end);
   }
 
-  PatchList::Patch(inst_, a.end, b.begin);
+  PatchList::Patch(inst_.data(), a.end, b.begin);
   return Frag(a.begin, b.end);
 }
 
@@ -347,7 +344,7 @@
     return NoMatch();
 
   inst_[id].InitAlt(a.begin, b.begin);
-  return Frag(id, PatchList::Append(inst_, a.end, b.end));
+  return Frag(id, PatchList::Append(inst_.data(), a.end, b.end));
 }
 
 // When capturing submatches in like-Perl mode, a kOpAlt Inst
@@ -363,7 +360,7 @@
   if (id < 0)
     return NoMatch();
   inst_[id].InitAlt(0, 0);
-  PatchList::Patch(inst_, a.end, id);
+  PatchList::Patch(inst_.data(), a.end, id);
   if (nongreedy) {
     inst_[id].out1_ = a.begin;
     return Frag(id, PatchList::Mk(id << 1));
@@ -395,7 +392,7 @@
     inst_[id].InitAlt(a.begin, 0);
     pl = PatchList::Mk((id << 1) | 1);
   }
-  return Frag(id, PatchList::Append(inst_, pl, a.end));
+  return Frag(id, PatchList::Append(inst_.data(), pl, a.end));
 }
 
 // Returns a fragment for the byte range lo-hi.
@@ -443,7 +440,7 @@
     return NoMatch();
   inst_[id].InitCapture(2*n, a.begin);
   inst_[id+1].InitCapture(2*n+1, 0);
-  PatchList::Patch(inst_, a.end, id+1);
+  PatchList::Patch(inst_.data(), a.end, id+1);
 
   return Frag(id, PatchList::Mk((id+1) << 1));
 }
@@ -477,9 +474,9 @@
                                      int next) {
   Frag f = ByteRange(lo, hi, foldcase);
   if (next != 0) {
-    PatchList::Patch(inst_, f.end, next);
+    PatchList::Patch(inst_.data(), f.end, next);
   } else {
-    rune_range_.end = PatchList::Append(inst_, rune_range_.end, f.end);
+    rune_range_.end = PatchList::Append(inst_.data(), rune_range_.end, f.end);
   }
   return f.begin;
 }
@@ -581,10 +578,10 @@
   if (!IsCachedRuneByteSuffix(id)) {
     // The head should be the instruction most recently allocated, so free it
     // instead of leaving it unreachable.
-    DCHECK_EQ(id, inst_len_-1);
+    DCHECK_EQ(id, ninst_-1);
     inst_[id].out_opcode_ = 0;
     inst_[id].out1_ = 0;
-    inst_len_--;
+    ninst_--;
   }
 
   out = AddSuffixRecursive(inst_[br].out(), out);
@@ -1021,12 +1018,11 @@
       if (re->nsub() > 0) {
         sub = re->sub()[0]->Incref();
         if (IsAnchorStart(&sub, depth+1)) {
-          Regexp** subcopy = new Regexp*[re->nsub()];
+          PODArray<Regexp*> subcopy(re->nsub());
           subcopy[0] = sub;  // already have reference
           for (int i = 1; i < re->nsub(); i++)
             subcopy[i] = re->sub()[i]->Incref();
-          *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags());
-          delete[] subcopy;
+          *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
           re->Decref();
           return true;
         }
@@ -1069,12 +1065,11 @@
       if (re->nsub() > 0) {
         sub = re->sub()[re->nsub() - 1]->Incref();
         if (IsAnchorEnd(&sub, depth+1)) {
-          Regexp** subcopy = new Regexp*[re->nsub()];
+          PODArray<Regexp*> subcopy(re->nsub());
           subcopy[re->nsub() - 1] = sub;  // already have reference
           for (int i = 0; i < re->nsub() - 1; i++)
             subcopy[i] = re->sub()[i]->Incref();
-          *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags());
-          delete[] subcopy;
+          *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
           re->Decref();
           return true;
         }
@@ -1106,15 +1101,15 @@
     encoding_ = kEncodingLatin1;
   max_mem_ = max_mem;
   if (max_mem <= 0) {
-    max_inst_ = 100000;  // more than enough
+    max_ninst_ = 100000;  // more than enough
   } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
     // No room for anything.
-    max_inst_ = 0;
+    max_ninst_ = 0;
   } else {
     int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
     // Limit instruction count so that inst->id() fits nicely in an int.
     // SparseArray also assumes that the indices (inst->id()) are ints.
-    // The call to WalkExponential uses 2*max_inst_ below,
+    // The call to WalkExponential uses 2*max_ninst_ below,
     // and other places in the code use 2 or 3 * prog->size().
     // Limiting to 2^24 should avoid overflow in those places.
     // (The point of allowing more than 32 bits of memory is to
@@ -1127,7 +1122,7 @@
     if (m > Prog::Inst::kMaxInst)
       m = Prog::Inst::kMaxInst;
 
-    max_inst_ = static_cast<int>(m);
+    max_ninst_ = static_cast<int>(m);
   }
 
   anchor_ = anchor;
@@ -1155,7 +1150,7 @@
   bool is_anchor_end = IsAnchorEnd(&sre, 0);
 
   // Generate fragment for entire regexp.
-  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_inst_);
+  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
   sre->Decref();
   if (c.failed_)
     return NULL;
@@ -1192,13 +1187,12 @@
 
   if (prog_->start() == 0 && prog_->start_unanchored() == 0) {
     // No possible matches; keep Fail instruction only.
-    inst_len_ = 1;
+    ninst_ = 1;
   }
 
   // Hand off the array to Prog.
-  prog_->inst_ = inst_;
-  prog_->size_ = inst_len_;
-  inst_ = NULL;
+  prog_->inst_ = std::move(inst_);
+  prog_->size_ = ninst_;
 
   prog_->Optimize();
   prog_->Flatten();
@@ -1241,7 +1235,7 @@
   if (sre == NULL)
     return NULL;
 
-  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_inst_);
+  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
   sre->Decref();
   if (c.failed_)
     return NULL;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/dfa.cc 
new/re2-2019-01-01/re2/dfa.cc
--- old/re2-2018-10-01/re2/dfa.cc       2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/dfa.cc       2018-12-24 14:14:42.000000000 +0100
@@ -39,6 +39,7 @@
 #include "util/logging.h"
 #include "util/mix.h"
 #include "util/mutex.h"
+#include "util/pod_array.h"
 #include "util/sparse_set.h"
 #include "util/strutil.h"
 #include "re2/prog.h"
@@ -347,8 +348,7 @@
   // Scratch areas, protected by mutex_.
   Workq* q0_;             // Two pre-allocated work queues.
   Workq* q1_;
-  int* astack_;         // Pre-allocated stack for AddToQueue
-  int nastack_;
+  PODArray<int> stack_;   // Pre-allocated stack for AddToQueue
 
   // State* cache.  Many threads use and add to the cache simultaneously,
   // holding cache_mutex_ for reading and mutex_ (above) when adding.
@@ -441,7 +441,6 @@
     init_failed_(false),
     q0_(NULL),
     q1_(NULL),
-    astack_(NULL),
     mem_budget_(max_mem) {
   if (ExtraDebug)
     fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, 
prog_->DumpUnanchored().c_str());
@@ -449,16 +448,16 @@
   if (kind_ == Prog::kLongestMatch)
     nmark = prog_->size();
   // See DFA::AddToQueue() for why this is so.
-  nastack_ = prog_->inst_count(kInstCapture) +
-             prog_->inst_count(kInstEmptyWidth) +
-             prog_->inst_count(kInstNop) +
-             nmark + 1;  // + 1 for start inst
+  int nstack = prog_->inst_count(kInstCapture) +
+               prog_->inst_count(kInstEmptyWidth) +
+               prog_->inst_count(kInstNop) +
+               nmark + 1;  // + 1 for start inst
 
-  // Account for space needed for DFA, q0, q1, astack.
+  // Account for space needed for DFA, q0, q1, stack.
   mem_budget_ -= sizeof(DFA);
   mem_budget_ -= (prog_->size() + nmark) *
                  (sizeof(int)+sizeof(int)) * 2;  // q0, q1
-  mem_budget_ -= nastack_ * sizeof(int);  // astack
+  mem_budget_ -= nstack * sizeof(int);  // stack
   if (mem_budget_ < 0) {
     init_failed_ = true;
     return;
@@ -482,13 +481,12 @@
 
   q0_ = new Workq(prog_->size(), nmark);
   q1_ = new Workq(prog_->size(), nmark);
-  astack_ = new int[nastack_];
+  stack_ = PODArray<int>(nstack);
 }
 
 DFA::~DFA() {
   delete q0_;
   delete q1_;
-  delete[] astack_;
   ClearCache();
 }
 
@@ -826,7 +824,7 @@
 // Adds ip to the work queue, following empty arrows according to flag.
 void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
 
-  // Use astack_ to hold our stack of instructions yet to process.
+  // Use stack_ to hold our stack of instructions yet to process.
   // It was preallocated as follows:
   //   one entry per Capture;
   //   one entry per EmptyWidth; and
@@ -835,12 +833,12 @@
   // perform. (Each instruction can be processed at most once.)
   // When using marks, we also added nmark == prog_->size().
   // (Otherwise, nmark == 0.)
-  int* stk = astack_;
+  int* stk = stack_.data();
   int nstk = 0;
 
   stk[nstk++] = id;
   while (nstk > 0) {
-    DCHECK_LE(nstk, nastack_);
+    DCHECK_LE(nstk, stack_.size());
     id = stk[--nstk];
 
   Loop:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/fuzzing/re2_fuzzer.cc 
new/re2-2019-01-01/re2/fuzzing/re2_fuzzer.cc
--- old/re2-2018-10-01/re2/fuzzing/re2_fuzzer.cc        2018-09-28 
12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/fuzzing/re2_fuzzer.cc        2018-12-24 
14:14:42.000000000 +0100
@@ -5,8 +5,11 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <map>
+#include <memory>
+#include <queue>
 #include <string>
 
+#include "re2/prefilter.h"
 #include "re2/re2.h"
 
 using re2::StringPiece;
@@ -21,36 +24,80 @@
     return;
 
   // Don't waste time fuzzing high-size programs.
-  // (They can cause bug reports due to fuzzer timeouts.)
+  // They can cause bug reports due to fuzzer timeouts.
   int size = re.ProgramSize();
   if (size > 9999)
     return;
+  int rsize = re.ReverseProgramSize();
+  if (rsize > 9999)
+    return;
 
   // Don't waste time fuzzing high-fanout programs.
-  // (They can also cause bug reports due to fuzzer timeouts.)
+  // They can cause bug reports due to fuzzer timeouts.
   std::map<int, int> histogram;
   int fanout = re.ProgramFanout(&histogram);
   if (fanout > 9)
     return;
+  int rfanout = re.ReverseProgramFanout(&histogram);
+  if (rfanout > 9)
+    return;
+
+  // Don't waste time fuzzing programs with large substrings.
+  // They can cause bug reports due to fuzzer timeouts when they
+  // are repetitions (e.g. hundreds of NUL bytes) and matching is
+  // unanchored. And they aren't interesting for fuzzing purposes.
+  std::unique_ptr<re2::Prefilter> prefilter(re2::Prefilter::FromRE2(&re));
+  if (prefilter == nullptr)
+    return;
+  std::queue<re2::Prefilter*> nodes;
+  nodes.push(prefilter.get());
+  while (!nodes.empty()) {
+    re2::Prefilter* node = nodes.front();
+    nodes.pop();
+    if (node->op() == re2::Prefilter::ATOM) {
+      if (node->atom().size() > 9)
+        return;
+    } else if (node->op() == re2::Prefilter::AND ||
+               node->op() == re2::Prefilter::OR) {
+      for (re2::Prefilter* sub : *node->subs())
+        nodes.push(sub);
+    }
+  }
+
+  if (re.NumberOfCapturingGroups() == 0) {
+    // Avoid early return due to too many arguments.
+    StringPiece sp = text;
+    RE2::FullMatch(sp, re);
+    RE2::PartialMatch(sp, re);
+    RE2::Consume(&sp, re);
+    sp = text;  // Reset.
+    RE2::FindAndConsume(&sp, re);
+  } else {
+    // Okay, we have at least one capturing group...
+    // Try conversion for variously typed arguments.
+    StringPiece sp = text;
+    short s;
+    RE2::FullMatch(sp, re, &s);
+    long l;
+    RE2::PartialMatch(sp, re, &l);
+    float f;
+    RE2::Consume(&sp, re, &f);
+    sp = text;  // Reset.
+    double d;
+    RE2::FindAndConsume(&sp, re, &d);
+  }
+
+  string s = string(text);
+  RE2::Replace(&s, re, "");
+  s = string(text);  // Reset.
+  RE2::GlobalReplace(&s, re, "");
 
-  StringPiece sp1, sp2, sp3, sp4;
-  string s1, s2, s3, s4;
-  int i1, i2, i3, i4;
-  double d1, d2, d3, d4;
-
-  RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4);
-  RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4);
-
-  sp1 = sp2 = text;
-  RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4);
-  RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4);
-
-  s3 = s4 = string(text);
-  RE2::Replace(&s3, re, "");
-  RE2::GlobalReplace(&s4, re, "");
+  string min, max;
+  re.PossibleMatchRange(&min, &max, /*maxlen=*/9);
 
   // Exercise some other API functionality.
-  dummy += re.NumberOfCapturingGroups();
+  dummy += re.NamedCapturingGroups().size();
+  dummy += re.CapturingGroupNames().size();
   dummy += RE2::QuoteMeta(pattern).size();
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/nfa.cc 
new/re2-2019-01-01/re2/nfa.cc
--- old/re2-2018-10-01/re2/nfa.cc       2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/nfa.cc       2018-12-24 14:14:42.000000000 +0100
@@ -34,6 +34,7 @@
 #include "re2/prog.h"
 #include "re2/regexp.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/sparse_array.h"
 #include "util/sparse_set.h"
 #include "util/strutil.h"
@@ -75,13 +76,6 @@
   struct AddState {
     int id;     // Inst to process
     Thread* t;  // if not null, set t0 = t before processing id
-
-    AddState()
-        : id(0), t(NULL) {}
-    explicit AddState(int id)
-        : id(id), t(NULL) {}
-    AddState(int id, Thread* t)
-        : id(id), t(t) {}
   };
 
   // Threadq is a list of threads.  The list is sorted by the order
@@ -95,40 +89,38 @@
 
   // Follows all empty arrows from id0 and enqueues all the states reached.
   // Enqueues only the ByteRange instructions that match byte c.
-  // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
+  // context is used (with p) for evaluating empty-width specials.
   // p is the current input position, and t0 is the current thread.
-  void AddToThreadq(Threadq* q, int id0, int c, int flag,
+  void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
                     const char* p, Thread* t0);
 
   // Run runq on byte c, appending new states to nextq.
   // Updates matched_ and match_ as new, better matches are found.
+  // context is used (with p) for evaluating empty-width specials.
   // p is the position of byte c in the input string for AddToThreadq;
   // p-1 will be used when processing Match instructions.
-  // flag is the bitwise OR of Bol, Eol, etc., specifying whether
-  // ^, $ and \b match the current input position (after c).
   // Frees all the threads on runq.
   // If there is a shortcut to the end, returns that shortcut.
-  inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* 
p);
+  int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+           const char* p);
 
   // Returns text version of capture information, for debugging.
   string FormatCapture(const char** capture);
 
   inline void CopyCapture(const char** dst, const char** src);
 
-  Prog* prog_;          // underlying program
-  int start_;           // start instruction in program
-  int ncapture_;        // number of submatches to track
-  bool longest_;        // whether searching for longest match
-  bool endmatch_;       // whether match must end at text.end()
-  const char* btext_;   // beginning of text being matched (for FormatSubmatch)
-  const char* etext_;   // end of text being matched (for endmatch_)
-  Threadq q0_, q1_;     // pre-allocated for Search.
-  const char** match_;  // best match so far
-  bool matched_;        // any match so far?
-  AddState* astack_;    // pre-allocated for AddToThreadq
-  int nastack_;
-
-  Thread* free_threads_;  // free list
+  Prog* prog_;                // underlying program
+  int start_;                 // start instruction in program
+  int ncapture_;              // number of submatches to track
+  bool longest_;              // whether searching for longest match
+  bool endmatch_;             // whether match must end at text.end()
+  const char* btext_;         // beginning of text being matched (for 
FormatSubmatch)
+  const char* etext_;         // end of text being matched (for endmatch_)
+  Threadq q0_, q1_;           // pre-allocated for Search.
+  PODArray<AddState> stack_;  // pre-allocated for AddToThreadq
+  Thread* free_threads_;      // free list
+  const char** match_;        // best match so far
+  bool matched_;              // any match so far?
 
   NFA(const NFA&) = delete;
   NFA& operator=(const NFA&) = delete;
@@ -145,18 +137,17 @@
   q0_.resize(prog_->size());
   q1_.resize(prog_->size());
   // See NFA::AddToThreadq() for why this is so.
-  nastack_ = 2*prog_->inst_count(kInstCapture) +
-             prog_->inst_count(kInstEmptyWidth) +
-             prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
-  astack_ = new AddState[nastack_];
+  int nstack = 2*prog_->inst_count(kInstCapture) +
+               prog_->inst_count(kInstEmptyWidth) +
+               prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
+  stack_ = PODArray<AddState>(nstack);
+  free_threads_ = NULL;
   match_ = NULL;
   matched_ = false;
-  free_threads_ = NULL;
 }
 
 NFA::~NFA() {
   delete[] match_;
-  delete[] astack_;
   Thread* next;
   for (Thread* t = free_threads_; t; t = next) {
     next = t->next;
@@ -204,26 +195,26 @@
 
 // Follows all empty arrows from id0 and enqueues all the states reached.
 // Enqueues only the ByteRange instructions that match byte c.
-// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
+// context is used (with p) for evaluating empty-width specials.
 // p is the current input position, and t0 is the current thread.
-void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,
+void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
                        const char* p, Thread* t0) {
   if (id0 == 0)
     return;
 
-  // Use astack_ to hold our stack of instructions yet to process.
+  // Use stack_ to hold our stack of instructions yet to process.
   // It was preallocated as follows:
   //   two entries per Capture;
   //   one entry per EmptyWidth; and
   //   one entry per Nop.
   // This reflects the maximum number of stack pushes that each can
   // perform. (Each instruction can be processed at most once.)
-  AddState* stk = astack_;
+  AddState* stk = stack_.data();
   int nstk = 0;
 
-  stk[nstk++] = AddState(id0);
+  stk[nstk++] = {id0, NULL};
   while (nstk > 0) {
-    DCHECK_LE(nstk, nastack_);
+    DCHECK_LE(nstk, stack_.size());
     AddState a = stk[--nstk];
 
   Loop:
@@ -266,25 +257,25 @@
       *tp = t;
 
       DCHECK(!ip->last());
-      a = AddState(id+1);
+      a = {id+1, NULL};
       goto Loop;
 
     case kInstNop:
       if (!ip->last())
-        stk[nstk++] = AddState(id+1);
+        stk[nstk++] = {id+1, NULL};
 
       // Continue on.
-      a = AddState(ip->out());
+      a = {ip->out(), NULL};
       goto Loop;
 
     case kInstCapture:
       if (!ip->last())
-        stk[nstk++] = AddState(id+1);
+        stk[nstk++] = {id+1, NULL};
 
       if ((j=ip->cap()) < ncapture_) {
         // Push a dummy whose only job is to restore t0
         // once we finish exploring this possibility.
-        stk[nstk++] = AddState(0, t0);
+        stk[nstk++] = {0, t0};
 
         // Record capture.
         t = AllocThread();
@@ -292,7 +283,7 @@
         t->capture[j] = p;
         t0 = t;
       }
-      a = AddState(ip->out());
+      a = {ip->out(), NULL};
       goto Loop;
 
     case kInstByteRange:
@@ -310,17 +301,17 @@
     Next:
       if (ip->last())
         break;
-      a = AddState(id+1);
+      a = {id+1, NULL};
       goto Loop;
 
     case kInstEmptyWidth:
       if (!ip->last())
-        stk[nstk++] = AddState(id+1);
+        stk[nstk++] = {id+1, NULL};
 
       // Continue on if we have all the right flag bits.
-      if (ip->empty() & ~flag)
+      if (ip->empty() & ~Prog::EmptyFlags(context, p))
         break;
-      a = AddState(ip->out());
+      a = {ip->out(), NULL};
       goto Loop;
     }
   }
@@ -328,13 +319,13 @@
 
 // Run runq on byte c, appending new states to nextq.
 // Updates matched_ and match_ as new, better matches are found.
+// context is used (with p) for evaluating empty-width specials.
 // p is the position of byte c in the input string for AddToThreadq;
 // p-1 will be used when processing Match instructions.
-// flag is the bitwise OR of Bol, Eol, etc., specifying whether
-// ^, $ and \b match the current input position (after c).
 // Frees all the threads on runq.
 // If there is a shortcut to the end, returns that shortcut.
-int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) {
+int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+              const char* p) {
   nextq->clear();
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
@@ -360,7 +351,7 @@
         break;
 
       case kInstByteRange:
-        AddToThreadq(nextq, ip->out(), c, flag, p, t);
+        AddToThreadq(nextq, ip->out(), c, context, p, t);
         break;
 
       case kInstAltMatch:
@@ -500,38 +491,9 @@
   runq->clear();
   nextq->clear();
   memset(&match_[0], 0, ncapture_*sizeof match_[0]);
-  int wasword = 0;
-
-  if (text.begin() > context.begin())
-    wasword = Prog::IsWordChar(text.begin()[-1] & 0xFF);
 
   // Loop over the text, stepping the machine.
   for (const char* p = text.begin();; p++) {
-    // Check for empty-width specials.
-    int flag = 0;
-
-    // ^ and \A
-    if (p == context.begin())
-      flag |= kEmptyBeginText | kEmptyBeginLine;
-    else if (p <= context.end() && p[-1] == '\n')
-      flag |= kEmptyBeginLine;
-
-    // $ and \z
-    if (p == context.end())
-      flag |= kEmptyEndText | kEmptyEndLine;
-    else if (p < context.end() && p[0] == '\n')
-      flag |= kEmptyEndLine;
-
-    // \b and \B
-    int isword = 0;
-    if (p < context.end())
-      isword = Prog::IsWordChar(p[0] & 0xFF);
-
-    if (isword != wasword)
-      flag |= kEmptyWordBoundary;
-    else
-      flag |= kEmptyNonWordBoundary;
-
     if (ExtraDebug) {
       int c = 0;
       if (p == context.begin())
@@ -541,7 +503,7 @@
       else if (p < text.end())
         c = p[0] & 0xFF;
 
-      fprintf(stderr, "%c[%#x/%d/%d]:", c, flag, isword, wasword);
+      fprintf(stderr, "%c:", c);
       for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
         Thread* t = i->second;
         if (t == NULL)
@@ -552,7 +514,7 @@
     }
 
     // This is a no-op the first time around the loop because runq is empty.
-    int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p);
+    int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
     DCHECK_EQ(runq->size(), 0);
     using std::swap;
     swap(nextq, runq);
@@ -604,17 +566,14 @@
         p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
         if (p == NULL) {
           p = text.end();
-          isword = 0;
-        } else {
-          isword = Prog::IsWordChar(p[0] & 0xFF);
         }
-        flag = Prog::EmptyFlags(context, p);
       }
 
       Thread* t = AllocThread();
       CopyCapture(t->capture, match_);
       t->capture[0] = p;
-      AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, flag, p, 
t);
+      AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
+                   t);
       Decref(t);
     }
 
@@ -624,8 +583,6 @@
         fprintf(stderr, "dead\n");
       break;
     }
-
-    wasword = isword;
   }
 
   for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/onepass.cc 
new/re2-2019-01-01/re2/onepass.cc
--- old/re2-2018-10-01/re2/onepass.cc   2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/onepass.cc   2018-12-24 14:14:42.000000000 +0100
@@ -59,6 +59,7 @@
 
 #include "util/util.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/sparse_set.h"
 #include "util/strutil.h"
 #include "util/utf.h"
@@ -403,11 +404,11 @@
   int stacksize = inst_count(kInstCapture) +
                   inst_count(kInstEmptyWidth) +
                   inst_count(kInstNop) + 1;  // + 1 for start inst
-  InstCond* stack = new InstCond[stacksize];
+  PODArray<InstCond> stack(stacksize);
 
   int size = this->size();
-  int* nodebyid = new int[size];  // indexed by ip
-  memset(nodebyid, 0xFF, size*sizeof nodebyid[0]);
+  PODArray<int> nodebyid(size);  // indexed by ip
+  memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]);
 
   // Originally, nodes was a uint8_t[maxnodes*statesize], but that was
   // unnecessarily optimistic: why allocate a large amount of memory
@@ -613,14 +614,9 @@
   dfa_mem_ -= nalloc*statesize;
   onepass_nodes_ = new uint8_t[nalloc*statesize];
   memmove(onepass_nodes_, nodes.data(), nalloc*statesize);
-
-  delete[] stack;
-  delete[] nodebyid;
   return true;
 
 fail:
-  delete[] stack;
-  delete[] nodebyid;
   return false;
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/parse.cc 
new/re2-2019-01-01/re2/parse.cc
--- old/re2-2018-10-01/re2/parse.cc     2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/parse.cc     2018-12-24 14:14:42.000000000 +0100
@@ -27,6 +27,7 @@
 
 #include "util/util.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/strutil.h"
 #include "util/utf.h"
 #include "re2/regexp.h"
@@ -1217,7 +1218,7 @@
     return;
 
   // Construct op (alternation or concatenation), flattening op of op.
-  Regexp** subs = new Regexp*[n];
+  PODArray<Regexp*> subs(n);
   next = NULL;
   int i = n;
   for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {
@@ -1232,8 +1233,7 @@
     }
   }
 
-  Regexp* re = ConcatOrAlternate(op, subs, n, flags_, true);
-  delete[] subs;
+  Regexp* re = ConcatOrAlternate(op, subs.data(), n, flags_, true);
   re->simple_ = re->ComputeSimple();
   re->down_ = next;
   stacktop_ = re;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/prefilter.cc 
new/re2-2019-01-01/re2/prefilter.cc
--- old/re2-2018-10-01/re2/prefilter.cc 2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/prefilter.cc 2018-12-24 14:14:42.000000000 +0100
@@ -214,7 +214,7 @@
   static Info* Quest(Info* a);
   static Info* EmptyString();
   static Info* NoMatch();
-  static Info* AnyChar();
+  static Info* AnyCharOrAnyByte();
   static Info* CClass(CharClass* cc, bool latin1);
   static Info* Literal(Rune r);
   static Info* LiteralLatin1(Rune r);
@@ -417,8 +417,8 @@
   return info;
 }
 
-// Constructs Info for dot (any character).
-Prefilter::Info* Prefilter::Info::AnyChar() {
+// Constructs Info for dot (any character) or \C (any byte).
+Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
   Prefilter::Info* info = new Prefilter::Info();
   info->match_ = new Prefilter(ALL);
   return info;
@@ -461,7 +461,7 @@
 
   // If the class is too large, it's okay to overestimate.
   if (cc->size() > 10)
-    return AnyChar();
+    return AnyCharOrAnyByte();
 
   Prefilter::Info *a = new Prefilter::Info();
   for (CCIter i = cc->begin(); i != cc->end(); ++i)
@@ -622,8 +622,9 @@
       break;
 
     case kRegexpAnyChar:
+    case kRegexpAnyByte:
       // Claim nothing, except that it's not empty.
-      info = AnyChar();
+      info = AnyCharOrAnyByte();
       break;
 
     case kRegexpCharClass:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/prefilter_tree.cc 
new/re2-2019-01-01/re2/prefilter_tree.cc
--- old/re2-2018-10-01/re2/prefilter_tree.cc    2018-09-28 12:00:36.000000000 
+0200
+++ new/re2-2019-01-01/re2/prefilter_tree.cc    2018-12-24 14:14:42.000000000 
+0100
@@ -138,6 +138,7 @@
       return false;
 
     case Prefilter::ALL:
+    case Prefilter::NONE:
       return false;
 
     case Prefilter::ATOM:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/prog.cc 
new/re2-2019-01-01/re2/prog.cc
--- old/re2-2018-10-01/re2/prog.cc      2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/prog.cc      2018-12-24 14:14:42.000000000 +0100
@@ -112,7 +112,6 @@
     first_byte_(-1),
     flags_(0),
     list_count_(0),
-    inst_(NULL),
     onepass_nodes_(NULL),
     dfa_mem_(0),
     dfa_first_(NULL),
@@ -123,7 +122,6 @@
   DeleteDFA(dfa_longest_);
   DeleteDFA(dfa_first_);
   delete[] onepass_nodes_;
-  delete[] inst_;
 }
 
 typedef SparseSet Workq;
@@ -628,9 +626,8 @@
 
   // Finally, replace the old instructions with the new instructions.
   size_ = static_cast<int>(flat.size());
-  delete[] inst_;
-  inst_ = new Inst[size_];
-  memmove(inst_, flat.data(), size_ * sizeof *inst_);
+  inst_ = PODArray<Inst>(size_);
+  memmove(inst_.data(), flat.data(), size_*sizeof(inst_[0]));
 }
 
 void Prog::MarkSuccessors(SparseArray<int>* rootmap,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/prog.h 
new/re2-2019-01-01/re2/prog.h
--- old/re2-2018-10-01/re2/prog.h       2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/prog.h       2018-12-24 14:14:42.000000000 +0100
@@ -18,6 +18,7 @@
 
 #include "util/util.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/sparse_array.h"
 #include "util/sparse_set.h"
 #include "re2/re2.h"
@@ -77,7 +78,7 @@
     void InitFail();
 
     // Getters
-    int id(Prog* p) { return static_cast<int>(this - p->inst_); }
+    int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
     InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
     int last()      { return (out_opcode_>>3)&1; }
     int out()       { return out_opcode_>>4; }
@@ -395,7 +396,7 @@
   int list_count_;            // count of lists (see above)
   int inst_count_[kNumInst];  // count of instructions by opcode
 
-  Inst* inst_;              // pointer to instruction array
+  PODArray<Inst> inst_;     // pointer to instruction array
   uint8_t* onepass_nodes_;  // data for OnePass nodes
 
   int64_t dfa_mem_;         // Maximum memory for DFAs.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/re2.cc 
new/re2-2019-01-01/re2/re2.cc
--- old/re2-2018-10-01/re2/re2.cc       2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/re2.cc       2018-12-24 14:14:42.000000000 +0100
@@ -177,10 +177,10 @@
   entire_regexp_ = NULL;
   suffix_regexp_ = NULL;
   prog_ = NULL;
+  num_captures_ = -1;
   rprog_ = NULL;
   error_ = empty_string;
   error_code_ = NoError;
-  num_captures_ = -1;
   named_groups_ = NULL;
   group_names_ = NULL;
 
@@ -218,6 +218,11 @@
     return;
   }
 
+  // We used to compute this lazily, but it's used during the
+  // typical control flow for a match call, so we now compute
+  // it eagerly, which avoids the overhead of std::once_flag.
+  num_captures_ = suffix_regexp_->NumCaptures();
+
   // Could delay this until the first match call that
   // cares about submatch information, but the one-pass
   // machine's memory gets cut from the DFA memory budget,
@@ -262,11 +267,18 @@
   return prog_->size();
 }
 
-int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+int RE2::ReverseProgramSize() const {
   if (prog_ == NULL)
     return -1;
-  SparseArray<int> fanout(prog_->size());
-  prog_->Fanout(&fanout);
+  Prog* prog = ReverseProg();
+  if (prog == NULL)
+    return -1;
+  return prog->size();
+}
+
+static int Fanout(Prog* prog, std::map<int, int>* histogram) {
+  SparseArray<int> fanout(prog->size());
+  prog->Fanout(&fanout);
   histogram->clear();
   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
     // TODO(junyer): Optimise this?
@@ -279,14 +291,19 @@
   return histogram->rbegin()->first;
 }
 
-// Returns num_captures_, computing it if needed, or -1 if the
-// regexp wasn't valid on construction.
-int RE2::NumberOfCapturingGroups() const {
-  std::call_once(num_captures_once_, [](const RE2* re) {
-    if (re->suffix_regexp_ != NULL)
-      re->num_captures_ = re->suffix_regexp_->NumCaptures();
-  }, this);
-  return num_captures_;
+int RE2::ProgramFanout(std::map<int, int>* histogram) const {
+  if (prog_ == NULL)
+    return -1;
+  return Fanout(prog_, histogram);
+}
+
+int RE2::ReverseProgramFanout(std::map<int, int>* histogram) const {
+  if (prog_ == NULL)
+    return -1;
+  Prog* prog = ReverseProg();
+  if (prog == NULL)
+    return -1;
+  return Fanout(prog, histogram);
 }
 
 // Returns named_groups_, computing it if needed.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/re2.h new/re2-2019-01-01/re2/re2.h
--- old/re2-2018-10-01/re2/re2.h        2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/re2.h        2018-12-24 14:14:42.000000000 +0100
@@ -53,9 +53,19 @@
 //    CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
 //
 // -----------------------------------------------------------------------
-// MATCHING WITH SUB-STRING EXTRACTION:
+// MATCHING WITH SUBSTRING EXTRACTION:
 //
-// You can supply extra pointer arguments to extract matched subpieces.
+// You can supply extra pointer arguments to extract matched substrings.
+// On match failure, none of the pointees will have been modified.
+// On match success, the substrings will be converted (as necessary) and
+// their values will be assigned to their pointees until all conversions
+// have succeeded or one conversion has failed.
+// On conversion failure, the pointees will be in an indeterminate state
+// because the caller has no way of knowing which conversion failed.
+// However, conversion cannot fail for types like string and StringPiece
+// that do not inspect the substring contents. Hence, in the common case
+// where all of the pointees are of such types, failure is always due to
+// match failure and thus none of the pointees will have been modified.
 //
 // Example: extracts "ruby" into "s" and 1234 into "i"
 //    int i;
@@ -278,11 +288,13 @@
   // Returns the program size, a very approximate measure of a regexp's "cost".
   // Larger numbers are more expensive than smaller numbers.
   int ProgramSize() const;
+  int ReverseProgramSize() const;
 
   // EXPERIMENTAL! SUBJECT TO CHANGE!
   // Outputs the program fanout as a histogram bucketed by powers of 2.
   // Returns the number of the largest non-empty bucket.
   int ProgramFanout(std::map<int, int>* histogram) const;
+  int ReverseProgramFanout(std::map<int, int>* histogram) const;
 
   // Returns the underlying Regexp; not for general use.
   // Returns entire_regexp_ so that callers don't need
@@ -467,7 +479,7 @@
   // Return the number of capturing subpatterns, or -1 if the
   // regexp wasn't valid on construction.  The overall match ($0)
   // does not count: if the regexp is "(a)(b)", returns 2.
-  int NumberOfCapturingGroups() const;
+  int NumberOfCapturingGroups() const { return num_captures_; }
 
   // Return a map from names to capturing indices.
   // The map records the index of the leftmost group
@@ -489,6 +501,7 @@
   // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with
   // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",
   // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.
+  // Caveat: submatch[] may be clobbered even on match failure.
   //
   // Don't ask for more match information than you will use:
   // runs much faster with nsubmatch == 1 than nsubmatch > 1, and
@@ -731,6 +744,7 @@
   re2::Regexp*  entire_regexp_;    // parsed regular expression
   re2::Regexp*  suffix_regexp_;    // parsed regular expression, prefix removed
   re2::Prog*    prog_;             // compiled program for regexp
+  int           num_captures_;     // Number of capturing groups
   bool          is_one_pass_;      // can use prog_->SearchOnePass?
 
   mutable re2::Prog*     rprog_;         // reverse program for regexp
@@ -738,7 +752,6 @@
                                          // (or points to empty string)
   mutable ErrorCode      error_code_;    // Error code
   mutable string         error_arg_;     // Fragment of regexp showing error
-  mutable int            num_captures_;  // Number of capturing groups
 
   // Map from capture names to indices
   mutable const std::map<string, int>* named_groups_;
@@ -748,7 +761,6 @@
 
   // Onces for lazy computations.
   mutable std::once_flag rprog_once_;
-  mutable std::once_flag num_captures_once_;
   mutable std::once_flag named_groups_once_;
   mutable std::once_flag group_names_once_;
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/set.cc 
new/re2-2019-01-01/re2/set.cc
--- old/re2-2018-10-01/re2/set.cc       2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/set.cc       2018-12-24 14:14:42.000000000 +0100
@@ -10,6 +10,7 @@
 
 #include "util/util.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "re2/stringpiece.h"
 #include "re2/prog.h"
 #include "re2/re2.h"
@@ -55,13 +56,12 @@
   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
   if (re->op() == kRegexpConcat) {
     int nsub = re->nsub();
-    re2::Regexp** sub = new re2::Regexp*[nsub + 1];
+    PODArray<re2::Regexp*> sub(nsub + 1);
     for (int i = 0; i < nsub; i++)
       sub[i] = re->sub()[i]->Incref();
     sub[nsub] = m;
     re->Decref();
-    re = re2::Regexp::Concat(sub, nsub + 1, pf);
-    delete[] sub;
+    re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
   } else {
     re2::Regexp* sub[2];
     sub[0] = re;
@@ -87,16 +87,15 @@
               return a.first < b.first;
             });
 
-  re2::Regexp** sub = new re2::Regexp*[size_];
-  for (size_t i = 0; i < elem_.size(); i++)
+  PODArray<re2::Regexp*> sub(size_);
+  for (int i = 0; i < size_; i++)
     sub[i] = elem_[i].second;
   elem_.clear();
   elem_.shrink_to_fit();
 
   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
     options_.ParseFlags());
-  re2::Regexp* re = re2::Regexp::Alternate(sub, size_, pf);
-  delete[] sub;
+  re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
 
   prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
   re->Decref();
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/simplify.cc 
new/re2-2019-01-01/re2/simplify.cc
--- old/re2-2018-10-01/re2/simplify.cc  2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/re2/simplify.cc  2018-12-24 14:14:42.000000000 +0100
@@ -10,6 +10,7 @@
 
 #include "util/util.h"
 #include "util/logging.h"
+#include "util/pod_array.h"
 #include "util/utf.h"
 #include "re2/regexp.h"
 #include "re2/walker-inl.h"
@@ -589,13 +590,11 @@
       return Regexp::Plus(re->Incref(), f);
 
     // General case: x{4,} is xxxx+
-    Regexp** nre_subs = new Regexp*[min];
+    PODArray<Regexp*> nre_subs(min);
     for (int i = 0; i < min-1; i++)
       nre_subs[i] = re->Incref();
     nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
-    Regexp* nre = Regexp::Concat(nre_subs, min, f);
-    delete[] nre_subs;
-    return nre;
+    return Regexp::Concat(nre_subs.data(), min, f);
   }
 
   // Special case: (x){0} matches only empty string.
@@ -613,11 +612,10 @@
   // Build leading prefix: xx.  Capturing only on the last one.
   Regexp* nre = NULL;
   if (min > 0) {
-    Regexp** nre_subs = new Regexp*[min];
+    PODArray<Regexp*> nre_subs(min);
     for (int i = 0; i < min; i++)
       nre_subs[i] = re->Incref();
-    nre = Regexp::Concat(nre_subs, min, f);
-    delete[] nre_subs;
+    nre = Regexp::Concat(nre_subs.data(), min, f);
   }
 
   // Build and attach suffix: (x(x(x)?)?)?
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/re2/testing/re2_test.cc 
new/re2-2019-01-01/re2/testing/re2_test.cc
--- old/re2-2018-10-01/re2/testing/re2_test.cc  2018-09-28 12:00:36.000000000 
+0200
+++ new/re2-2019-01-01/re2/testing/re2_test.cc  2018-12-24 14:14:42.000000000 
+0100
@@ -461,6 +461,10 @@
   ASSERT_GT(re_simple.ProgramSize(), 0);
   ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
   ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
+
+  ASSERT_GT(re_simple.ReverseProgramSize(), 0);
+  ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
+  ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
 }
 
 TEST(ProgramFanout, BigProgram) {
@@ -486,6 +490,23 @@
   // 13 is the largest non-empty bucket and has 1000 elements.
   ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
   ASSERT_EQ(1000, histogram[13]);
+
+  // 2 is the largest non-empty bucket and has 3 elements.
+  // This differs from the others due to how reverse `.' works.
+  ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(3, histogram[2]);
+
+  // 5 is the largest non-empty bucket and has 10 elements.
+  ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(10, histogram[5]);
+
+  // 9 is the largest non-empty bucket and has 100 elements.
+  ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(100, histogram[9]);
+
+  // 12 is the largest non-empty bucket and has 1000 elements.
+  ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
+  ASSERT_EQ(1000, histogram[12]);
 }
 
 // Issue 956519: handling empty character sets was
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/util/benchmark.cc 
new/re2-2019-01-01/util/benchmark.cc
--- old/re2-2018-10-01/util/benchmark.cc        2018-09-28 12:00:36.000000000 
+0200
+++ new/re2-2019-01-01/util/benchmark.cc        2018-12-24 14:14:42.000000000 
+0100
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <algorithm>
 #include <chrono>
+#include <thread>
 
 #include "util/util.h"
 #include "util/flags.h"
@@ -67,7 +68,7 @@
 }
 
 int NumCPUs() {
-       return 1;
+       return static_cast<int>(std::thread::hardware_concurrency());
 }
 
 static void runN(Benchmark *b, int n, int siz) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2018-10-01/util/pcre.h 
new/re2-2019-01-01/util/pcre.h
--- old/re2-2018-10-01/util/pcre.h      2018-09-28 12:00:36.000000000 +0200
+++ new/re2-2019-01-01/util/pcre.h      2018-12-24 14:14:42.000000000 +0100
@@ -61,9 +61,9 @@
 //    CHECK(PCRE::FullMatch(utf8_string, re));
 //
 // -----------------------------------------------------------------------
-// MATCHING WITH SUB-STRING EXTRACTION:
+// MATCHING WITH SUBSTRING EXTRACTION:
 //
-// You can supply extra pointer arguments to extract matched subpieces.
+// You can supply extra pointer arguments to extract matched substrings.
 //
 // Example: extracts "ruby" into "s" and 1234 into "i"
 //    int i;

commit re2 for openSUSE:Factory

Reply via email to