git always quotes with leading zeros to ensure the octal
representation is 3 characters long. We enforce that to match
low ASCII characters (e.g. [x01-\x06]) that don't need the
range provided by 3 characters.
git_unquote now does a single pass so it won't get fooled by
decoded backslashes into parsing a digit as an octal character.
git_unquote is also capped to "\377" so we don't overflow a
byte.
---
lib/PublicInbox/Git.pm | 5 ++---
t/git.t | 2 ++
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 96ac17a3..e557f6d6 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -50,14 +50,13 @@ my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
sub git_unquote ($) {
return $_[0] unless ($_[0] =~ /\A"(.*)"\z/);
$_[0] = $1;
- $_[0] =~ s/\\([\\"abfnrtv])/$GIT_ESC{$1}/g;
- $_[0] =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+ $_[0] =~ s!\\([\\"abfnrtv]|[0-3][0-7]{2})!$GIT_ESC{$1}//chr(oct($1))!ge;
$_[0];
}
sub git_quote ($) {
if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
- '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+ '\\'.($ESC_GIT{$1}||sprintf("%03o",ord($1)))/egs) {
return qq{"$_[0]"};
}
$_[0];
diff --git a/t/git.t b/t/git.t
index 4a45bbaf..bc1dea50 100644
--- a/t/git.t
+++ b/t/git.t
@@ -168,5 +168,7 @@ is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted
octal');
is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq');
is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash');
is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF');
+is(git_quote($s = "hello\x06world"), '"hello\\006world"', 'quoted \\x06');
+is(git_unquote($s = '"hello\\006world"'), "hello\x06world", 'unquoted \\x06');
done_testing();
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/