Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2018-09-11 17:14:16 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Tue Sep 11 17:14:16 2018 rev:18 rq:633471 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2018-07-23 17:57:46.625171872 +0200 +++ /work/SRC/openSUSE:Factory/.re2.new/re2.changes 2018-09-11 17:14:44.143646765 +0200 @@ -1,0 +2,12 @@ +Wed Sep 5 12:04:00 UTC 2018 - astie...@suse.com + +- update to 2018-09-01: + * developer visible changes only + +------------------------------------------------------------------- +Thu Aug 23 12:46:51 UTC 2018 - astie...@suse.com + +- update to 2018-08-01: + * Fix the "DFA out of memory" error for the reverse Prog + +------------------------------------------------------------------- Old: ---- 2018-07-01.tar.gz New: ---- 2018-09-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.sjgM2g/_old 2018-09-11 17:14:45.799644210 +0200 +++ /var/tmp/diff_new_pack.sjgM2g/_new 2018-09-11 17:14:45.799644210 +0200 @@ -16,7 +16,7 @@ # -%global longver 2018-07-01 +%global longver 2018-09-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-0 Name: re2 ++++++ 2018-07-01.tar.gz -> 2018-09-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/BUILD new/re2-2018-09-01/BUILD --- old/re2-2018-07-01/BUILD 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/BUILD 2018-08-30 19:36:00.000000000 +0200 @@ -58,6 +58,7 @@ "util/logging.h", "util/mix.h", "util/mutex.h", + "util/pod_array.h", "util/rune.cc", "util/sparse_array.h", "util/sparse_set.h", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/Makefile new/re2-2018-09-01/Makefile --- old/re2-2018-07-01/Makefile 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/Makefile 2018-08-30 19:36:00.000000000 +0200 @@ -83,6 +83,7 @@ util/mix.h\ util/mutex.h\ util/pcre.h\ + util/pod_array.h\ util/sparse_array.h\ util/sparse_set.h\ util/strutil.h\ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/re2/bitstate.cc new/re2-2018-09-01/re2/bitstate.cc --- old/re2-2018-07-01/re2/bitstate.cc 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/re2/bitstate.cc 2018-08-30 19:36:00.000000000 +0200 @@ -20,8 +20,10 @@ #include <stddef.h> #include <stdint.h> #include <string.h> +#include <utility> #include "util/logging.h" +#include "util/pod_array.h" #include "re2/prog.h" #include "re2/regexp.h" @@ -36,7 +38,6 @@ class BitState { public: explicit BitState(Prog* prog); - ~BitState(); // The usual Search prototype. // Can only call Search once per BitState. @@ -47,7 +48,7 @@ private: inline bool ShouldVisit(int id, const char* p); void Push(int id, const char* p, int arg); - bool GrowStack(); + void GrowStack(); bool TrySearch(int id, const char* p); // Search parameters @@ -57,20 +58,15 @@ bool anchored_; // whether search is anchored at text.begin() bool longest_; // whether search wants leftmost-longest match bool endmatch_; // whether match must end at text.end() - StringPiece *submatch_; // submatches to fill in + StringPiece* submatch_; // submatches to fill in int nsubmatch_; // # of submatches to fill in // Search state - const char** cap_; // capture registers - int ncap_; - static const int VisitedBits = 32; - uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked - size_t nvisited_; // # of words in bitmap - - Job *job_; // stack of text positions to explore - int njob_; - int maxjob_; + PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited + PODArray<const char*> cap_; // capture registers + PODArray<Job> job_; // stack of text positions to explore + int njob_; // stack size }; BitState::BitState(Prog* prog) @@ -80,26 +76,15 @@ endmatch_(false), submatch_(NULL), nsubmatch_(0), - cap_(NULL), - ncap_(0), - visited_(NULL), - nvisited_(0), - job_(NULL), - njob_(0), - maxjob_(0) { -} - -BitState::~BitState() { - delete[] visited_; - delete[] job_; - delete[] cap_; + njob_(0) { } // Should the search visit the pair ip, p? // If so, remember that it was visited so that the next time, // we don't repeat the visit. bool BitState::ShouldVisit(int id, const char* p) { - size_t n = id * (text_.size() + 1) + (p - text_.begin()); + int n = id * static_cast<int>(text_.size()+1) + + static_cast<int>(p-text_.begin()); if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1)))) return false; visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1)); @@ -107,24 +92,22 @@ } // Grow the stack. -bool BitState::GrowStack() { - maxjob_ *= 2; - Job* newjob = new Job[maxjob_]; - memmove(newjob, job_, njob_*sizeof job_[0]); - delete[] job_; - job_ = newjob; - if (njob_ >= maxjob_) { - LOG(DFATAL) << "Job stack overflow."; - return false; - } - return true; +void BitState::GrowStack() { + PODArray<Job> tmp(2*job_.size()); + memmove(tmp.data(), job_.data(), njob_*sizeof job_[0]); + job_ = std::move(tmp); } // Push the triple (id, p, arg) onto the stack, growing it if necessary. void BitState::Push(int id, const char* p, int arg) { - if (njob_ >= maxjob_) { - if (!GrowStack()) + if (njob_ >= job_.size()) { + GrowStack(); + if (njob_ >= job_.size()) { + LOG(DFATAL) << "GrowStack() failed: " + << "njob_ = " << njob_ << ", " + << "job_.size() = " << job_.size(); return; + } } int op = prog_->inst(id)->opcode(); if (op == kInstFail) @@ -234,7 +217,7 @@ if (!ip->last()) Push(id+1, p, 0); // try the next when we're done - if (0 <= ip->cap() && ip->cap() < ncap_) { + if (0 <= ip->cap() && ip->cap() < cap_.size()) { // Capture p to register, but save old value. Push(id, cap_[ip->cap()], 1); // come back when we're done cap_[ip->cap()] = p; @@ -327,18 +310,19 @@ submatch_[i] = StringPiece(); // Allocate scratch space. - nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits; - visited_ = new uint32_t[nvisited_]; - memset(visited_, 0, nvisited_*sizeof visited_[0]); - - ncap_ = 2*nsubmatch; - if (ncap_ < 2) - ncap_ = 2; - cap_ = new const char*[ncap_]; - memset(cap_, 0, ncap_*sizeof cap_[0]); + int nvisited = prog_->size() * static_cast<int>(text.size()+1); + nvisited = (nvisited + VisitedBits-1) / VisitedBits; + visited_ = PODArray<uint32_t>(nvisited); + memset(visited_.data(), 0, nvisited*sizeof visited_[0]); + + int ncap = 2*nsubmatch; + if (ncap < 2) + ncap = 2; + cap_ = PODArray<const char*>(ncap); + memset(cap_.data(), 0, ncap*sizeof cap_[0]); - maxjob_ = 256; - job_ = new Job[maxjob_]; + // When sizeof(Job) == 16, we start with a nice round 4KiB. :) + job_ = PODArray<Job>(256); // Anchored search must start at text.begin(). if (anchored_) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/re2/re2.cc new/re2-2018-09-01/re2/re2.cc --- old/re2-2018-07-01/re2/re2.cc 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/re2/re2.cc 2018-08-30 19:36:00.000000000 +0200 @@ -557,7 +557,7 @@ Anchor re_anchor, StringPiece* submatch, int nsubmatch) const { - if (!ok() || suffix_regexp_ == NULL) { + if (!ok()) { if (options_.log_errors()) LOG(ERROR) << "Invalid RE2: " << *error_; return false; @@ -668,9 +668,9 @@ Prog::kLongestMatch, &match, &dfa_failed, NULL)) { if (dfa_failed) { if (options_.log_errors()) - LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " - << "bytemap range " << prog_->bytemap_range() << ", " - << "list count " << prog_->list_count(); + LOG(ERROR) << "DFA out of memory: size " << prog->size() << ", " + << "bytemap range " << prog->bytemap_range() << ", " + << "list count " << prog->list_count(); // Fall back to NFA below. skipped_test = true; break; @@ -784,6 +784,11 @@ return false; } + if (NumberOfCapturingGroups() < n) { + // RE has fewer capturing groups than number of Arg pointers passed in. + return false; + } + // Count number of capture groups needed. int nvec; if (n == 0 && consumed == NULL) @@ -816,13 +821,6 @@ return true; } - int ncap = NumberOfCapturingGroups(); - if (ncap < n) { - // RE has fewer capturing groups than number of arg pointers passed in - delete[] heapvec; - return false; - } - // If we got here, we must have matched the whole pattern. for (int i = 0; i < n; i++) { const StringPiece& s = vec[i+1]; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/re2/re2.h new/re2-2018-09-01/re2/re2.h --- old/re2-2018-07-01/re2/re2.h 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/re2/re2.h 2018-08-30 19:36:00.000000000 +0200 @@ -65,7 +65,7 @@ // Example: fails because string cannot be stored in integer // CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); // -// Example: fails because there aren't enough sub-patterns: +// Example: fails because there aren't enough sub-patterns // CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); // // Example: does not try to extract any extra sub-patterns @@ -289,54 +289,18 @@ // to know about prefix_ and prefix_foldcase_. re2::Regexp* Regexp() const { return entire_regexp_; } - /***** The useful part: the matching interface *****/ + /***** The array-based matching interface ******/ - // Matches "text" against "re". If pointer arguments are - // supplied, copies matched sub-patterns into them. - // - // You can pass in a "const char*" or a "string" for "text". - // You can pass in a "const char*" or a "string" or a "RE2" for "re". - // - // The provided pointer arguments can be pointers to any scalar numeric - // type, or one of: - // string (matched piece is copied to string) - // StringPiece (StringPiece is mutated to point to matched piece) - // T (where "bool T::ParseFrom(const char*, size_t)" exists) - // (void*)NULL (the corresponding matched sub-pattern is not copied) - // - // Returns true iff all of the following conditions are satisfied: - // a. "text" matches "re" exactly - // b. The number of matched sub-patterns is >= number of supplied pointers - // c. The "i"th argument has a suitable type for holding the - // string captured as the "i"th sub-pattern. If you pass in - // NULL for the "i"th argument, or pass fewer arguments than - // number of sub-patterns, "i"th captured sub-pattern is - // ignored. - // - // CAVEAT: An optional sub-pattern that does not exist in the - // matched string is assigned the empty string. Therefore, the - // following will return false (because the empty string is not a - // valid number): - // int number; - // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); + // The functions here have names ending in 'N' and are used to implement + // the functions whose names are the prefix before the 'N'. It is sometimes + // useful to invoke them directly, but the syntax is awkward, so the 'N'-less + // versions should be preferred. static bool FullMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int argc); - - // Exactly like FullMatch(), except that "re" is allowed to match - // a substring of "text". static bool PartialMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int argc); - - // Like FullMatch() and PartialMatch(), except that "re" has to match - // a prefix of the text, and "input" is advanced past the matched - // text. Note: "input" is modified iff this routine returns true. static bool ConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int argc); - - // Like Consume(), but does not anchor the match at the beginning of - // the text. That is, "re" need not start its match at the beginning - // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds - // the next word in "s" and stores it in "word". static bool FindAndConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int argc); @@ -360,21 +324,60 @@ // The first layer constructs the temporary Arg objects. The second layer // (above) constructs the array of pointers to the temporary Arg objects. + /***** The useful part: the matching interface *****/ + + // Matches "text" against "re". If pointer arguments are + // supplied, copies matched sub-patterns into them. + // + // You can pass in a "const char*" or a "string" for "text". + // You can pass in a "const char*" or a "string" or a "RE2" for "re". + // + // The provided pointer arguments can be pointers to any scalar numeric + // type, or one of: + // string (matched piece is copied to string) + // StringPiece (StringPiece is mutated to point to matched piece) + // T (where "bool T::ParseFrom(const char*, size_t)" exists) + // (void*)NULL (the corresponding matched sub-pattern is not copied) + // + // Returns true iff all of the following conditions are satisfied: + // a. "text" matches "re" exactly + // b. The number of matched sub-patterns is >= number of supplied pointers + // c. The "i"th argument has a suitable type for holding the + // string captured as the "i"th sub-pattern. If you pass in + // NULL for the "i"th argument, or pass fewer arguments than + // number of sub-patterns, "i"th captured sub-pattern is + // ignored. + // + // CAVEAT: An optional sub-pattern that does not exist in the + // matched string is assigned the empty string. Therefore, the + // following will return false (because the empty string is not a + // valid number): + // int number; + // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); template <typename... A> static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) { return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...); } + // Exactly like FullMatch(), except that "re" is allowed to match + // a substring of "text". template <typename... A> static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) { return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...); } + // Like FullMatch() and PartialMatch(), except that "re" has to match + // a prefix of the text, and "input" is advanced past the matched + // text. Note: "input" is modified iff this routine returns true. template <typename... A> static bool Consume(StringPiece* input, const RE2& re, A&&... a) { return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...); } + // Like Consume(), but does not anchor the match at the beginning of + // the text. That is, "re" need not start its match at the beginning + // of "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds + // the next word in "s" and stores it in "word". template <typename... A> static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) { return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...); @@ -563,7 +566,7 @@ // can have two DFAs (one first match, one longest match). // That makes 4 DFAs: // - // forward, first-match - used for UNANCHORED or ANCHOR_LEFT searches + // forward, first-match - used for UNANCHORED or ANCHOR_START searches // if opt.longest_match() == false // forward, longest-match - used for all ANCHOR_BOTH searches, // and the other two kinds if diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/re2/testing/search_test.cc new/re2-2018-09-01/re2/testing/search_test.cc --- old/re2-2018-07-01/re2/testing/search_test.cc 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/re2/testing/search_test.cc 2018-08-30 19:36:00.000000000 +0200 @@ -35,7 +35,6 @@ { "a", "aaaaaaa" }, { "a*", "aaaaaaa" }, { "a*", "" }, - { "a*", NULL }, { "ab|cd", "xabcdx" }, { "a", "cab" }, { "a*b", "cab" }, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/util/pcre.cc new/re2-2018-09-01/util/pcre.cc --- old/re2-2018-07-01/util/pcre.cc 2018-06-24 13:05:48.000000000 +0200 +++ new/re2-2018-09-01/util/pcre.cc 2018-08-30 19:36:00.000000000 +0200 @@ -613,6 +613,11 @@ int* vec, int vecsize) const { assert((1 + n) * 3 <= vecsize); // results + PCRE workspace + if (NumberOfCapturingGroups() < n) { + // RE has fewer capturing groups than number of Arg pointers passed in. + return false; + } + int matches = TryMatch(text, 0, anchor, true, vec, vecsize); assert(matches >= 0); // TryMatch never returns negatives if (matches == 0) @@ -624,10 +629,6 @@ // We are not interested in results return true; } - if (NumberOfCapturingGroups() < n) { - // PCRE has fewer capturing groups than number of arg pointers passed in - return false; - } // If we got here, we must have matched the whole pattern. // We do not need (can not do) any more checks on the value of 'matches' here diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2018-07-01/util/pod_array.h new/re2-2018-09-01/util/pod_array.h --- old/re2-2018-07-01/util/pod_array.h 1970-01-01 01:00:00.000000000 +0100 +++ new/re2-2018-09-01/util/pod_array.h 2018-08-30 19:36:00.000000000 +0200 @@ -0,0 +1,55 @@ +// Copyright 2018 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef UTIL_POD_ARRAY_H_ +#define UTIL_POD_ARRAY_H_ + +#include <memory> +#include <type_traits> + +namespace re2 { + +template <typename T> +class PODArray { + public: + static_assert(std::is_pod<T>::value, + "T must be POD"); + + PODArray() + : ptr_() {} + explicit PODArray(int len) + : ptr_(std::allocator<T>().allocate(len), Deleter(len)) {} + + T* data() const { + return ptr_.get(); + } + + int size() const { + return ptr_.get_deleter().len_; + } + + T& operator[](int pos) const { + return ptr_[pos]; + } + + private: + struct Deleter { + Deleter() + : len_(0) {} + explicit Deleter(int len) + : len_(len) {} + + void operator()(T* ptr) const { + std::allocator<T>().deallocate(ptr, len_); + } + + int len_; + }; + + std::unique_ptr<T[], Deleter> ptr_; +}; + +} // namespace re2 + +#endif // UTIL_POD_ARRAY_H_