commit re2 for openSUSE:Factory

root Sun, 12 Jan 2020 14:16:46 -0800

Hello community,

here is the log from the commit of package re2 for openSUSE:Factory checked in 
at 2020-01-12 23:15:47
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/re2 (Old)
 and      /work/SRC/openSUSE:Factory/.re2.new.6675 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "re2"

Sun Jan 12 23:15:47 2020 rev:27 rq:761763 version:MACRO

Changes:
--------
--- /work/SRC/openSUSE:Factory/re2/re2.changes  2019-12-11 12:04:41.964751098 
+0100
+++ /work/SRC/openSUSE:Factory/.re2.new.6675/re2.changes        2020-01-12 
23:15:50.990626747 +0100
@@ -1,0 +2,6 @@
+Wed Jan  8 08:37:25 UTC 2020 - Tomáš Chvátal <tchva...@suse.com>
+
+- Update to 2020-01-01:
+  * various developer visible changes
+
+-------------------------------------------------------------------

Old:
----
  re2-2019-12-01.tar.gz

New:
----
  re2-2020-01-01.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ re2.spec ++++++
--- /var/tmp/diff_new_pack.blK2rG/_old  2020-01-12 23:15:51.606626975 +0100
+++ /var/tmp/diff_new_pack.blK2rG/_new  2020-01-12 23:15:51.610626976 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package re2
 #
-# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2020 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -16,7 +16,7 @@
 #
 
 
-%global longver 2019-12-01
+%global longver 2020-01-01
 %global shortver %(echo %{longver}|sed 's|-||g')
 %define libname libre2-0
 Name:           re2

++++++ re2-2019-12-01.tar.gz -> re2-2020-01-01.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2019-12-01/re2/compile.cc 
new/re2-2020-01-01/re2/compile.cc
--- old/re2-2019-12-01/re2/compile.cc   2019-11-25 12:22:49.000000000 +0100
+++ new/re2-2020-01-01/re2/compile.cc   2019-12-30 08:29:02.000000000 +0100
@@ -662,48 +662,43 @@
                                    static_cast<uint8_t>(hi), foldcase, 0));
 }
 
-// Table describing how to make a UTF-8 matching machine
-// for the rune range 80-10FFFF (Runeself-Runemax).
-// This range happens frequently enough (for example /./ and /[^a-z]/)
-// and the rune_cache_ map is slow enough that this is worth
-// special handling.  Makes compilation of a small expression
-// with a dot in it about 10% faster.
-// The * in the comments below mark whole sequences.
-static struct ByteRangeProg {
-  int next;
-  int lo;
-  int hi;
-} prog_80_10ffff[] = {
-  // Two-byte
-  { -1, 0x80, 0xBF, },  // 0:  80-BF
-  {  0, 0xC2, 0xDF, },  // 1:  C2-DF 80-BF*
-
-  // Three-byte
-  {  0, 0xA0, 0xBF, },  // 2:  A0-BF 80-BF
-  {  2, 0xE0, 0xE0, },  // 3:  E0 A0-BF 80-BF*
-  {  0, 0x80, 0xBF, },  // 4:  80-BF 80-BF
-  {  4, 0xE1, 0xEF, },  // 5:  E1-EF 80-BF 80-BF*
-
-  // Four-byte
-  {  4, 0x90, 0xBF, },  // 6:  90-BF 80-BF 80-BF
-  {  6, 0xF0, 0xF0, },  // 7:  F0 90-BF 80-BF 80-BF*
-  {  4, 0x80, 0xBF, },  // 8:  80-BF 80-BF 80-BF
-  {  8, 0xF1, 0xF3, },  // 9: F1-F3 80-BF 80-BF 80-BF*
-  {  4, 0x80, 0x8F, },  // 10: 80-8F 80-BF 80-BF
-  { 10, 0xF4, 0xF4, },  // 11: F4 80-8F 80-BF 80-BF*
-};
-
 void Compiler::Add_80_10ffff() {
-  int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be 
initialized; silences gcc warning
-  for (size_t i = 0; i < arraysize(prog_80_10ffff); i++) {
-    const ByteRangeProg& p = prog_80_10ffff[i];
-    int next = 0;
-    if (p.next >= 0)
-      next = inst[p.next];
-    inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo),
-                                     static_cast<uint8_t>(p.hi), false, next);
-    if ((p.lo & 0xC0) != 0x80)
-      AddSuffix(inst[i]);
+  // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
+  // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
+  // permitting overlong encodings in E0 and F0 sequences and code points
+  // over 10FFFF in F4 sequences, the size of the bytecode and the number
+  // of equivalence classes are reduced significantly.
+  int id;
+  if (reversed_) {
+    // Prefix factoring matters, but we don't have to handle it here
+    // because the rune range trie logic takes care of that already.
+    id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    AddSuffix(id);
+
+    id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    AddSuffix(id);
+
+    id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
+    AddSuffix(id);
+  } else {
+    // Suffix factoring matters - and we do have to handle it here.
+    int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
+    id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
+    AddSuffix(id);
+
+    int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
+    id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
+    AddSuffix(id);
+
+    int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
+    id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
+    AddSuffix(id);
   }
 }
 
@@ -711,9 +706,8 @@
   if (lo > hi)
     return;
 
-  // Pick off 80-10FFFF as a common special case
-  // that can bypass the slow rune_cache_.
-  if (lo == 0x80 && hi == 0x10ffff && !reversed_) {
+  // Pick off 80-10FFFF as a common special case.
+  if (lo == 0x80 && hi == 0x10ffff) {
     Add_80_10ffff();
     return;
   }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2019-12-01/re2/dfa.cc 
new/re2-2020-01-01/re2/dfa.cc
--- old/re2-2019-12-01/re2/dfa.cc       2019-11-25 12:22:49.000000000 +0100
+++ new/re2-2020-01-01/re2/dfa.cc       2019-12-30 08:29:02.000000000 +0100
@@ -971,8 +971,21 @@
         break;
 
       case kInstByteRange:   // can follow if c is in range
-        if (ip->Matches(c))
-          AddToQueue(newq, ip->out(), flag);
+        if (!ip->Matches(c))
+          break;
+        AddToQueue(newq, ip->out(), flag);
+        if (ip->hint() != 0) {
+          // We have a hint, but we must cancel out the
+          // increment that will occur after the break.
+          i += ip->hint() - 1;
+        } else {
+          // We have no hint, so we must find the end
+          // of the current list and then skip to it.
+          Prog::Inst* ip0 = ip;
+          while (!ip->last())
+            ++ip;
+          i += ip - ip0;
+        }
         break;
 
       case kInstMatch:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2019-12-01/re2/pod_array.h 
new/re2-2020-01-01/re2/pod_array.h
--- old/re2-2019-12-01/re2/pod_array.h  2019-11-25 12:22:49.000000000 +0100
+++ new/re2-2020-01-01/re2/pod_array.h  2019-12-30 08:29:02.000000000 +0100
@@ -13,7 +13,7 @@
 template <typename T>
 class PODArray {
  public:
-  static_assert(std::is_pod<T>::value,
+  static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
                 "T must be POD");
 
   PODArray()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2019-12-01/re2/testing/compile_test.cc 
new/re2-2020-01-01/re2/testing/compile_test.cc
--- old/re2-2019-12-01/re2/testing/compile_test.cc      2019-11-25 
12:22:49.000000000 +0100
+++ new/re2-2020-01-01/re2/testing/compile_test.cc      2019-12-30 
08:29:02.000000000 +0100
@@ -147,10 +147,19 @@
   Regexp* re = Regexp::Parse(pattern, flags, NULL);
   EXPECT_TRUE(re != NULL);
 
-  Prog* prog = re->CompileToProg(0);
-  EXPECT_TRUE(prog != NULL);
-  *bytemap = prog->DumpByteMap();
-  delete prog;
+  {
+    Prog* prog = re->CompileToProg(0);
+    EXPECT_TRUE(prog != NULL);
+    *bytemap = prog->DumpByteMap();
+    delete prog;
+  }
+
+  {
+    Prog* prog = re->CompileToReverseProg(0);
+    EXPECT_TRUE(prog != NULL);
+    EXPECT_EQ(*bytemap, prog->DumpByteMap());
+    delete prog;
+  }
 
   re->Decref();
 }
@@ -213,16 +222,11 @@
   EXPECT_EQ("[00-09] -> 0\n"
             "[0a-0a] -> 1\n"
             "[0b-7f] -> 0\n"
-            "[80-8f] -> 2\n"
-            "[90-9f] -> 3\n"
-            "[a0-bf] -> 4\n"
+            "[80-bf] -> 2\n"
             "[c0-c1] -> 1\n"
-            "[c2-df] -> 5\n"
-            "[e0-e0] -> 6\n"
-            "[e1-ef] -> 7\n"
-            "[f0-f0] -> 8\n"
-            "[f1-f3] -> 9\n"
-            "[f4-f4] -> 10\n"
+            "[c2-df] -> 3\n"
+            "[e0-ef] -> 4\n"
+            "[f0-f4] -> 5\n"
             "[f5-ff] -> 1\n",
             bytemap);
 }
@@ -299,20 +303,22 @@
             "8. byte [f0-f0] 0 -> 7\n",
             reverse);
 
-  Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, NULL, &reverse);
-  EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
-            "4+ byte [c2-df] 0 -> 7\n"
-            "5+ byte [a0-bf] 1 -> 8\n"
-            "6. byte [80-bf] 0 -> 9\n"
+  Dump("[\\x{80}-\\x{10FFFF}]", Regexp::LikePerl, &forward, &reverse);
+  EXPECT_EQ("3+ byte [c2-df] 0 -> 6\n"
+            "4+ byte [e0-ef] 0 -> 8\n"
+            "5. byte [f0-f4] 0 -> 9\n"
+            "6. byte [80-bf] 0 -> 7\n"
             "7. match! 0\n"
-            "8. byte [e0-e0] 0 -> 7\n"
-            "9+ byte [e1-ef] 0 -> 7\n"
-            "10+ byte [90-bf] 1 -> 13\n"
-            "11+ byte [80-bf] 1 -> 14\n"
-            "12. byte [80-8f] 0 -> 15\n"
-            "13. byte [f0-f0] 0 -> 7\n"
-            "14. byte [f1-f3] 0 -> 7\n"
-            "15. byte [f4-f4] 0 -> 7\n",
+            "8. byte [80-bf] 0 -> 6\n"
+            "9. byte [80-bf] 0 -> 8\n",
+            forward);
+  EXPECT_EQ("3. byte [80-bf] 0 -> 4\n"
+            "4+ byte [c2-df] 0 -> 6\n"
+            "5. byte [80-bf] 0 -> 7\n"
+            "6. match! 0\n"
+            "7+ byte [e0-ef] 0 -> 6\n"
+            "8. byte [80-bf] 0 -> 9\n"
+            "9. byte [f0-f4] 0 -> 6\n",
             reverse);
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/re2-2019-12-01/re2/testing/re2_test.cc 
new/re2-2020-01-01/re2/testing/re2_test.cc
--- old/re2-2019-12-01/re2/testing/re2_test.cc  2019-11-25 12:22:49.000000000 
+0100
+++ new/re2-2020-01-01/re2/testing/re2_test.cc  2019-12-30 08:29:02.000000000 
+0100
@@ -479,22 +479,21 @@
   ASSERT_EQ(3, re1.ProgramFanout(&histogram));
   ASSERT_EQ(1, histogram[3]);
 
-  // 7 is the largest non-empty bucket and has 10 elements.
-  ASSERT_EQ(7, re10.ProgramFanout(&histogram));
-  ASSERT_EQ(10, histogram[7]);
+  // 6 is the largest non-empty bucket and has 10 elements.
+  ASSERT_EQ(6, re10.ProgramFanout(&histogram));
+  ASSERT_EQ(10, histogram[6]);
 
-  // 10 is the largest non-empty bucket and has 100 elements.
-  ASSERT_EQ(10, re100.ProgramFanout(&histogram));
-  ASSERT_EQ(100, histogram[10]);
+  // 9 is the largest non-empty bucket and has 100 elements.
+  ASSERT_EQ(9, re100.ProgramFanout(&histogram));
+  ASSERT_EQ(100, histogram[9]);
 
   // 13 is the largest non-empty bucket and has 1000 elements.
   ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
   ASSERT_EQ(1000, histogram[13]);
 
-  // 2 is the largest non-empty bucket and has 3 elements.
-  // This differs from the others due to how reverse `.' works.
+  // 2 is the largest non-empty bucket and has 1 element.
   ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
-  ASSERT_EQ(3, histogram[2]);
+  ASSERT_EQ(1, histogram[2]);
 
   // 5 is the largest non-empty bucket and has 10 elements.
   ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));

commit re2 for openSUSE:Factory

Reply via email to