Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2020-01-12 23:15:47 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new.6675 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "re2" Sun Jan 12 23:15:47 2020 rev:27 rq:761763 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2019-12-11 12:04:41.964751098 +0100 +++ /work/SRC/openSUSE:Factory/.re2.new.6675/re2.changes 2020-01-12 23:15:50.990626747 +0100 @@ -1,0 +2,6 @@ +Wed Jan 8 08:37:25 UTC 2020 - Tomáš Chvátal <tchva...@suse.com> + +- Update to 2020-01-01: + * various developer visible changes + +------------------------------------------------------------------- Old: ---- re2-2019-12-01.tar.gz New: ---- re2-2020-01-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.blK2rG/_old 2020-01-12 23:15:51.606626975 +0100 +++ /var/tmp/diff_new_pack.blK2rG/_new 2020-01-12 23:15:51.610626976 +0100 @@ -1,7 +1,7 @@ # # spec file for package re2 # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,7 +16,7 @@ # -%global longver 2019-12-01 +%global longver 2020-01-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-0 Name: re2 ++++++ re2-2019-12-01.tar.gz -> re2-2020-01-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2019-12-01/re2/compile.cc new/re2-2020-01-01/re2/compile.cc --- old/re2-2019-12-01/re2/compile.cc 2019-11-25 12:22:49.000000000 +0100 +++ new/re2-2020-01-01/re2/compile.cc 2019-12-30 08:29:02.000000000 +0100 @@ -662,48 +662,43 @@ static_cast<uint8_t>(hi), foldcase, 0)); } -// Table describing how to make a UTF-8 matching machine -// for the rune range 80-10FFFF (Runeself-Runemax). -// This range happens frequently enough (for example /./ and /[^a-z]/) -// and the rune_cache_ map is slow enough that this is worth -// special handling. Makes compilation of a small expression -// with a dot in it about 10% faster. -// The * in the comments below mark whole sequences. -static struct ByteRangeProg { - int next; - int lo; - int hi; -} prog_80_10ffff[] = { - // Two-byte - { -1, 0x80, 0xBF, }, // 0: 80-BF - { 0, 0xC2, 0xDF, }, // 1: C2-DF 80-BF* - - // Three-byte - { 0, 0xA0, 0xBF, }, // 2: A0-BF 80-BF - { 2, 0xE0, 0xE0, }, // 3: E0 A0-BF 80-BF* - { 0, 0x80, 0xBF, }, // 4: 80-BF 80-BF - { 4, 0xE1, 0xEF, }, // 5: E1-EF 80-BF 80-BF* - - // Four-byte - { 4, 0x90, 0xBF, }, // 6: 90-BF 80-BF 80-BF - { 6, 0xF0, 0xF0, }, // 7: F0 90-BF 80-BF 80-BF* - { 4, 0x80, 0xBF, }, // 8: 80-BF 80-BF 80-BF - { 8, 0xF1, 0xF3, }, // 9: F1-F3 80-BF 80-BF 80-BF* - { 4, 0x80, 0x8F, }, // 10: 80-8F 80-BF 80-BF - { 10, 0xF4, 0xF4, }, // 11: F4 80-8F 80-BF 80-BF* -}; - void Compiler::Add_80_10ffff() { - int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning - for (size_t i = 0; i < arraysize(prog_80_10ffff); i++) { - const ByteRangeProg& p = prog_80_10ffff[i]; - int next = 0; - if (p.next >= 0) - next = inst[p.next]; - inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo), - static_cast<uint8_t>(p.hi), false, next); - if ((p.lo & 0xC0) != 0x80) - AddSuffix(inst[i]); + // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough + // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by + // permitting overlong encodings in E0 and F0 sequences and code points + // over 10FFFF in F4 sequences, the size of the bytecode and the number + // of equivalence classes are reduced significantly. + int id; + if (reversed_) { + // Prefix factoring matters, but we don't have to handle it here + // because the rune range trie logic takes care of that already. + id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0); + id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); + AddSuffix(id); + + id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0); + id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); + id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); + AddSuffix(id); + + id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0); + id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); + id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); + id = UncachedRuneByteSuffix(0x80, 0xBF, false, id); + AddSuffix(id); + } else { + // Suffix factoring matters - and we do have to handle it here. + int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0); + id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1); + AddSuffix(id); + + int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1); + id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2); + AddSuffix(id); + + int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2); + id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3); + AddSuffix(id); } } @@ -711,9 +706,8 @@ if (lo > hi) return; - // Pick off 80-10FFFF as a common special case - // that can bypass the slow rune_cache_. - if (lo == 0x80 && hi == 0x10ffff && !reversed_) { + // Pick off 80-10FFFF as a common special case. + if (lo == 0x80 && hi == 0x10ffff) { Add_80_10ffff(); return; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2019-12-01/re2/dfa.cc new/re2-2020-01-01/re2/dfa.cc --- old/re2-2019-12-01/re2/dfa.cc 2019-11-25 12:22:49.000000000 +0100 +++ new/re2-2020-01-01/re2/dfa.cc 2019-12-30 08:29:02.000000000 +0100 @@ -971,8 +971,21 @@ break; case kInstByteRange: // can follow if c is in range - if (ip->Matches(c)) - AddToQueue(newq, ip->out(), flag); + if (!ip->Matches(c)) + break; + AddToQueue(newq, ip->out(), flag); + if (ip->hint() != 0) { + // We have a hint, but we must cancel out the + // increment that will occur after the break. + i += ip->hint() - 1; + } else { + // We have no hint, so we must find the end + // of the current list and then skip to it. + Prog::Inst* ip0 = ip; + while (!ip->last()) + ++ip; + i += ip - ip0; + } break; case kInstMatch: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2019-12-01/re2/pod_array.h new/re2-2020-01-01/re2/pod_array.h --- old/re2-2019-12-01/re2/pod_array.h 2019-11-25 12:22:49.000000000 +0100 +++ new/re2-2020-01-01/re2/pod_array.h 2019-12-30 08:29:02.000000000 +0100 @@ -13,7 +13,7 @@ template <typename T> class PODArray { public: - static_assert(std::is_pod<T>::value, + static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value, "T must be POD"); PODArray() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2019-12-01/re2/testing/compile_test.cc new/re2-2020-01-01/re2/testing/compile_test.cc --- old/re2-2019-12-01/re2/testing/compile_test.cc 2019-11-25 12:22:49.000000000 +0100 +++ new/re2-2020-01-01/re2/testing/compile_test.cc 2019-12-30 08:29:02.000000000 +0100 @@ -147,10 +147,19 @@ Regexp* re = Regexp::Parse(pattern, flags, NULL); EXPECT_TRUE(re != NULL); - Prog* prog = re->CompileToProg(0); - EXPECT_TRUE(prog != NULL); - *bytemap = prog->DumpByteMap(); - delete prog; + { + Prog* prog = re->CompileToProg(0); + EXPECT_TRUE(prog != NULL); + *bytemap = prog->DumpByteMap(); + delete prog; + } + + { + Prog* prog = re->CompileToReverseProg(0); + EXPECT_TRUE(prog != NULL); + EXPECT_EQ(*bytemap, prog->DumpByteMap()); + delete prog; + } re->Decref(); } @@ -213,16 +222,11 @@ EXPECT_EQ("[00-09] -> 0\n" "[0a-0a] -> 1\n" "[0b-7f] -> 0\n" - "[80-8f] -> 2\n" - "[90-9f] -> 3\n" - "[a0-bf] -> 4\n" + "[80-bf] -> 2\n" "[c0-c1] -> 1\n" - "[c2-df] -> 5\n" - "[e0-e0] -> 6\n" - "[e1-ef] -> 7\n" - "[f0-f0] -> 8\n" - "[f1-f3] -> 9\n" - "[f4-f4] -> 10\n" + "[c2-df] -> 3\n" + "[e0-ef] -> 4\n" + "[f0-f4] -> 5\n" "[f5-ff] -> 1\n", bytemap); } @@ -299,20 +303,22 @@ "8. byte [f0-f0] 0 -> 7\n", reverse); - Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse); - EXPECT_EQ("3. byte [80-bf] 0 -> 4\n" - "4+ byte [c2-df] 0 -> 7\n" - "5+ byte [a0-bf] 1 -> 8\n" - "6. byte [80-bf] 0 -> 9\n" + Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse); + EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n" + "4+ byte [e0-ef] 0 -> 8\n" + "5. byte [f0-f4] 0 -> 9\n" + "6. byte [80-bf] 0 -> 7\n" "7. match! 0\n" - "8. byte [e0-e0] 0 -> 7\n" - "9+ byte [e1-ef] 0 -> 7\n" - "10+ byte [90-bf] 1 -> 13\n" - "11+ byte [80-bf] 1 -> 14\n" - "12. byte [80-8f] 0 -> 15\n" - "13. byte [f0-f0] 0 -> 7\n" - "14. byte [f1-f3] 0 -> 7\n" - "15. byte [f4-f4] 0 -> 7\n", + "8. byte [80-bf] 0 -> 6\n" + "9. byte [80-bf] 0 -> 8\n", + forward); + EXPECT_EQ("3. byte [80-bf] 0 -> 4\n" + "4+ byte [c2-df] 0 -> 6\n" + "5. byte [80-bf] 0 -> 7\n" + "6. match! 0\n" + "7+ byte [e0-ef] 0 -> 6\n" + "8. byte [80-bf] 0 -> 9\n" + "9. byte [f0-f4] 0 -> 6\n", reverse); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2019-12-01/re2/testing/re2_test.cc new/re2-2020-01-01/re2/testing/re2_test.cc --- old/re2-2019-12-01/re2/testing/re2_test.cc 2019-11-25 12:22:49.000000000 +0100 +++ new/re2-2020-01-01/re2/testing/re2_test.cc 2019-12-30 08:29:02.000000000 +0100 @@ -479,22 +479,21 @@ ASSERT_EQ(3, re1.ProgramFanout(&histogram)); ASSERT_EQ(1, histogram[3]); - // 7 is the largest non-empty bucket and has 10 elements. - ASSERT_EQ(7, re10.ProgramFanout(&histogram)); - ASSERT_EQ(10, histogram[7]); + // 6 is the largest non-empty bucket and has 10 elements. + ASSERT_EQ(6, re10.ProgramFanout(&histogram)); + ASSERT_EQ(10, histogram[6]); - // 10 is the largest non-empty bucket and has 100 elements. - ASSERT_EQ(10, re100.ProgramFanout(&histogram)); - ASSERT_EQ(100, histogram[10]); + // 9 is the largest non-empty bucket and has 100 elements. + ASSERT_EQ(9, re100.ProgramFanout(&histogram)); + ASSERT_EQ(100, histogram[9]); // 13 is the largest non-empty bucket and has 1000 elements. ASSERT_EQ(13, re1000.ProgramFanout(&histogram)); ASSERT_EQ(1000, histogram[13]); - // 2 is the largest non-empty bucket and has 3 elements. - // This differs from the others due to how reverse `.' works. + // 2 is the largest non-empty bucket and has 1 element. ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram)); - ASSERT_EQ(3, histogram[2]); + ASSERT_EQ(1, histogram[2]); // 5 is the largest non-empty bucket and has 10 elements. ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));