The test for \N where N was larger than the number of capturing groups
in the regular expression was incorrect, and firing for cases such as
matching __(ARM_)?NR_([a-z]*) against __NR_read, where the first group is
empty (because it failed to match) but the second group did match "read".

Use regex_t's re_nsub for the error check, and treat rm_so == -1 as a
signal to just copy nothing into the result.

(Found trying to build minijail in AOSP.)
---
 tests/sed.test   | 11 +++++++++++
 toys/posix/sed.c | 10 +++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)
From bcdd964ae238f2c3c3ff1e7ec10f9fcc39f0de3c Mon Sep 17 00:00:00 2001
From: Elliott Hughes <[email protected]>
Date: Tue, 12 Feb 2019 16:29:09 -0800
Subject: [PATCH] sed: fix substitution of empty capturing groups.

The test for \N where N was larger than the number of capturing groups
in the regular expression was incorrect, and firing for cases such as
matching __(ARM_)?NR_([a-z]*) against __NR_read, where the first group is
empty (because it failed to match) but the second group did match "read".

Use regex_t's re_nsub for the error check, and treat rm_so == -1 as a
signal to just copy nothing into the result.

(Found trying to build minijail in AOSP.)
---
 tests/sed.test   | 11 +++++++++++
 toys/posix/sed.c | 10 +++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tests/sed.test b/tests/sed.test
index f2ff8fd7..34dfa161 100755
--- a/tests/sed.test
+++ b/tests/sed.test
@@ -72,6 +72,8 @@ testing "aci" \
 	"sed -e '3a boom' -e '/hre/i bang' -e '3a whack' -e '3c bong'" \
 	"one\ntwo\nbang\nbong\nboom\nwhack\nfour\n" "" \
 	"one\ntwo\nthree\nfour\n"
+# TODO: next test is broken on new-ish debian/bash with never-ending
+# output of `sed: short write: Broken pipe`.
 testing "b loop" "sed ':woo;=;b woo' | head -n 5" '1\n1\n1\n1\n1\n' "" "X"
 testing "b skip" "sed -n '2b zap;d;:zap;p'" "two\n" "" "one\ntwo\nthree"
 testing "b end" "sed -n '2b;p'" "one\nthree" "" "one\ntwo\nthree"
@@ -167,4 +169,13 @@ testing "end b with }" "sed -n '/START/{:a;n;/END/q;p;ba}'" "b\nc\n" \
 
 testing '-z' 'sed -z "s/\n/-/g"' "a-b-c" "" "a\nb\nc"
 
+# toybox handling of empty capturing groups broke minjail. Check that we
+# correctly replace an empty capturing group with the empty string:
+testing '\n with empty capture' \
+    'sed -E "s/(ARM_)?(NR_)([a-z]*) (.*)/\1\2\3/"' "NR_read" "" "NR_read foo"
+# ...but also that we report an error for a backreference to a group that
+# isn't in the pattern:
+testing '\n too high' \
+    'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo"
+
 # -i with $ last line test
diff --git a/toys/posix/sed.c b/toys/posix/sed.c
index 228055f9..4e6aacfb 100644
--- a/toys/posix/sed.c
+++ b/toys/posix/sed.c
@@ -528,15 +528,19 @@ static void sed_line(char **pline, long plen)
                 rswap[mlen-1] = new[off];
 
               continue;
-            } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
+            } else if (cc > reg->re_nsub) error_exit("no s//\\%d/", cc);
           } else if (new[off] != '&') {
             rswap[mlen++] = new[off];
 
             continue;
           }
 
-          ll = match[cc].rm_eo-match[cc].rm_so;
-          memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
+          if (match[cc].rm_so == -1) {
+            ll = 0; // Empty match.
+          } else {
+            ll = match[cc].rm_eo-match[cc].rm_so;
+            memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
+	  }
           mlen += ll;
         }
 
-- 
2.20.1.791.gb4d0f1c61a-goog

_______________________________________________
Toybox mailing list
[email protected]
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to