git always quotes with leading zeros to ensure the octal
representation is 3 characters long.  We enforce that to match
low ASCII characters (e.g. [x01-\x06]) that don't need the
range provided by 3 characters.

git_unquote now does a single pass so it won't get fooled by
decoded backslashes into parsing a digit as an octal character.
git_unquote is also capped to "\377" so we don't overflow a
byte.
---
 lib/PublicInbox/Git.pm | 5 ++---
 t/git.t                | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 96ac17a3..e557f6d6 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -50,14 +50,13 @@ my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
 sub git_unquote ($) {
        return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
        $_[0] = $1;
-       $_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g;
-       $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+       $_[0] =~ s!\\([\\"abfnrtv]|[0-3][0-7]{2})!$GIT_ESC{$1}//chr(oct($1))!ge;
        $_[0];
 }
 
 sub git_quote ($) {
        if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
-                     '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+                     '\\'.($ESC_GIT{$1}||sprintf("%03o",ord($1)))/egs) {
                return qq{"$_[0]"};
        }
        $_[0];
diff --git a/t/git.t b/t/git.t
index 4a45bbaf..bc1dea50 100644
--- a/t/git.t
+++ b/t/git.t
@@ -168,5 +168,7 @@ is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted 
octal');
 is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq');
 is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash');
 is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF');
+is(git_quote($s = "hello\x06world"), '"hello\\006world"', 'quoted \\x06');
+is(git_unquote($s = '"hello\\006world"'), "hello\x06world", 'unquoted \\x06');
 
 done_testing();
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/

Reply via email to