commit: e565696396aec1826a27bb6dfacf3b56cb369d62
Author: Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Tue Jan 27 03:09:17 2026 +0000
Commit: Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Tue Jan 27 09:48:28 2026 +0000
URL:
https://gitweb.gentoo.org/proj/gentoo-functions.git/commit/?id=e5656963
Introduce the shquote utility
As concerns the "functions.sh" unit, it provides a function named
quote_args(), the purpose of which is to transform its arguments in such
a way that they may safely be reused as input.
It has two implementations, one of which takes advantage of ${param@Q}
expansion in bash, and the other of which is implemented in awk. Of
these, the latter implementation is rather slow.
This commit introduces a utility by the name of "shquote" and integrates
it into the quote_args() function. It is written in C and is
significantly faster than the awk implementation that it replaces. In
addition to improving the performance, it requotes each argument in a
context-sensitive fashion, achieving optimal aesthetics.
$ utf8=$(printf '\345\223\210\347\275\227')
$ invalid_utf8=$(printf '\303\050')
$ quote_args foo bar 'baz quux' $'hi\nthere' "$invalid_utf8" "$utf8"
foo bar 'baz quux' $'hi\nthere' $'\303(' 哈罗
As before, dollar-single quoting may be suppressed by setting the
unfortunately named POSIXLY_CORRECT variable. I would have preferred to
remove this feature but am stymied by the fact that dash-0.5.13 remains
masked in Gentoo.
The utility is derived from code written by Leah Neukirchen and Rich
Felker, and is subject to the MIT license.
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
functions.sh | 56 +------------------
meson.build | 14 ++++-
shquote.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
test-functions | 36 +++++++++----
4 files changed, 209 insertions(+), 65 deletions(-)
diff --git a/functions.sh b/functions.sh
index 1bb5ead..ec2de56 100644
--- a/functions.sh
+++ b/functions.sh
@@ -482,61 +482,9 @@ quote_args()
# shellcheck disable=3028
if [ ! "${POSIXLY_CORRECT}" ] && [ "${BASH_VERSINFO-0}" -ge 5 ]; then
_quote_args_bash "$@"
- return
+ else
+ shquote "$@"
fi
- LC_ALL=C awk -v q=\' -f - -- "$@" <<-'EOF'
- function init_table() {
- # Iterate over ranges \001-\037 and \177-\377.
- for (i = 1; i <= 255; i += (i == 31 ? 96 : 1)) {
- char = sprintf("%c", i)
- seq_by[char] = sprintf("%03o", i)
- }
- seq_by["\007"] = "a"
- seq_by["\010"] = "b"
- seq_by["\011"] = "t"
- seq_by["\012"] = "n"
- seq_by["\013"] = "v"
- seq_by["\014"] = "f"
- seq_by["\015"] = "r"
- seq_by["\033"] = "e"
- seq_by["\047"] = "'"
- seq_by["\134"] = "\\"
- }
- BEGIN {
- issue = length(ENVIRON["POSIXLY_CORRECT"]) ? 7 : 8;
- argc = ARGC
- ARGC = 1
- for (arg_idx = 1; arg_idx < argc; arg_idx++) {
- arg = ARGV[arg_idx]
- if (arg == q) {
- word = "\\" q
- } else if (issue < 8 || arg !~ /[\001-\037\177-\377]/) {
- gsub(q, q "\\" q q, arg)
- word = q arg q
- } else {
- # Use $'' quoting per POSIX-1.2024.
- if (! ("\001" in seq_by)) {
- init_table()
- }
- word = "$'"
- for (i = 1; i <= length(arg); i++) {
- char = substr(arg, i, 1)
- if (char in seq_by) {
- word = word "\\" seq_by[char]
- } else {
- word = word char
- }
- }
- word = word q
- }
- line = line word
- if (arg_idx < argc - 1) {
- line = line " "
- }
- }
- print line
- }
- EOF
}
#
diff --git a/meson.build b/meson.build
index bcb4705..4be5cc1 100644
--- a/meson.build
+++ b/meson.build
@@ -1,7 +1,10 @@
project(
'gentoo-functions', 'c',
version: '1.7.3',
- license: 'GPL-2.0-only',
+ license: [
+ 'GPL-2.0-only',
+ 'MIT'
+ ],
default_options : [
'warning_level=2',
'c_std=gnu11',
@@ -26,6 +29,12 @@ executable(
install: true
)
+executable(
+ 'shquote',
+ 'shquote.c',
+ install: true
+)
+
install_man(
'consoletype.1',
)
@@ -36,6 +45,7 @@ if do_tests
'test-functions', files('test-functions'),
workdir : meson.current_source_dir(),
protocol : 'tap',
- verbose : true
+ verbose : true,
+ env : { 'BUILD_DIR' : meson.current_build_dir() }
)
endif
diff --git a/shquote.c b/shquote.c
new file mode 100644
index 0000000..e13d497
--- /dev/null
+++ b/shquote.c
@@ -0,0 +1,168 @@
+/*
+ * shquote - intelligently quotes arguments for use as shell input
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * This software is derived from Leah Neukirchen's lr utility.
+ */
+
+/*
+ * Copyright (C) 2025 Kerin Millar
+ * Copyright (C) 2015-2025 Leah Neukirchen <purl.org/net/chneukirchen>
+ * Parts of code derived from musl libc, which is
+ * Copyright (C) 2005-2014 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+static int esc_mode = 2;
+
+static void print_shquoted(const char *s);
+static int u8decode(const char *cs, uint32_t *cp);
+
+int
+main(int argc, char *argv[])
+{
+ char *var = getenv("POSIXLY_CORRECT");
+ if (var != NULL && strlen(var))
+ /* Disallow dollar-single quoting. */
+ esc_mode = 1;
+
+ for (int i = 1; i < argc; i++) {
+ if (i > 1)
+ putchar(' ');
+ print_shquoted(argv[i]);
+ }
+ putchar('\n');
+ return 0;
+}
+
+static void
+print_shquoted(const char *s)
+{
+ uint32_t ignored;
+ int l;
+
+ const char *t;
+ int esc = 0;
+
+ for (t = s; *t; ) {
+ if ((unsigned char)*t < 32 || strchr("'\177", *t)) {
+ esc = esc_mode;
+ break;
+ } else if (strchr("`^#*[]=|\\?${}()\"<>&;~\040", *t)) {
+ /* Bias towards single quoting. */
+ esc = 1;
+ if (esc == esc_mode)
+ break;
+ t += 1;
+ } else {
+ if ((l = u8decode(t, &ignored)) < 0) {
+ /* Invalid UTF-8 byte sequence encountered. */
+ esc = esc_mode;
+ break;
+ }
+ t += l;
+ }
+ }
+
+ switch (esc) {
+ case 0:
+ /* Convey verbatim. */
+ printf("%s", s);
+ break;
+ case 1:
+ /* Employ single quoting. */
+ putchar('\'');
+ for (; *s; s++)
+ if (*s == '\'')
+ printf("'\\''");
+ else
+ putchar(*s);
+ putchar('\'');
+ break;
+ case 2:
+ /* Employ dollar-single quoting. */
+ printf("$'");
+ for (; *s; s++)
+ switch (*s) {
+ case '\a': printf("\\a"); break;
+ case '\b': printf("\\b"); break;
+ case '\e': printf("\\e"); break;
+ case '\f': printf("\\f"); break;
+ case '\n': printf("\\n"); break;
+ case '\r': printf("\\r"); break;
+ case '\t': printf("\\t"); break;
+ case '\v': printf("\\v"); break;
+ case '\\': printf("\\\\"); break;
+ case '\'': printf("\\\'"); break;
+ default:
+ if ((unsigned char)*s < 32
+ || (unsigned char)*s == 127
+ || (l = u8decode(s, &ignored)) < 0) {
+ printf("\\%03o", (unsigned char)*s);
+ } else {
+ printf("%.*s", l, s);
+ s += l-1;
+ }
+ }
+ putchar('\'');
+ }
+}
+
+/* Decode one UTF-8 codepoint into cp, return number of bytes to next one.
+ * On invalid UTF-8, return -1, and do not change cp.
+ * Invalid codepoints are not checked.
+ *
+ * This code is meant to be inlined, if cp is unused it can be optimized away.
+ */
+static int
+u8decode(const char *cs, uint32_t *cp)
+{
+ const uint8_t *s = (uint8_t *)cs;
+
+ if (*s == 0) { *cp = 0; return 0; }
+ if (*s < 0x80) { *cp = *s; return 1; }
+ if (*s < 0xc2) { return -1; } /*cont+overlong*/
+ if (*s < 0xe0) { *cp = *s & 0x1f; goto u2; }
+ if (*s < 0xf0) {
+ if (*s == 0xe0 && (s[1] & 0xe0) == 0x80) return -1; /*overlong*/
+ if (*s == 0xed && (s[1] & 0xe0) == 0xa0) return -1;
/*surrogate*/
+ *cp = *s & 0x0f; goto u3;
+ }
+ if (*s < 0xf5) {
+ if (*s == 0xf0 && (s[1] & 0xf0) == 0x80) return -1; /*overlong*/
+ if (*s == 0xf4 && (s[1] > 0x8f)) return -1; /*too high*/
+ *cp = *s & 0x07; goto u4;
+ }
+ return -1;
+
+u4: if ((*++s & 0xc0) != 0x80) return -1; *cp = (*cp << 6) | (*s & 0x3f);
+u3: if ((*++s & 0xc0) != 0x80) return -1; *cp = (*cp << 6) | (*s & 0x3f);
+u2: if ((*++s & 0xc0) != 0x80) return -1; *cp = (*cp << 6) | (*s & 0x3f);
+ return s - (uint8_t *)cs + 1;
+}
diff --git a/test-functions b/test-functions
index feb46f5..4bd7643 100755
--- a/test-functions
+++ b/test-functions
@@ -996,9 +996,15 @@ test_quote_args() {
set -- eq 0
callback() {
- local POSIXLY_CORRECT cksum fmt i str
+ local expected_cksum cksum fmt i str
- test_description="quote_args output test (expecting cksum
380900690)"
+ if [ "${BASH}" ]; then
+ expected_cksum=380900690
+ else
+ expected_cksum=1492849101
+ fi
+
+ test_description="quote_args output test (expecting cksum
${expected_cksum})"
i=0
# The generator fails to produce the correct ouput in yash
# unless the effective character type is C/POSIX. However, once
@@ -1006,14 +1012,17 @@ test_quote_args() {
# if in its posix mode. As things stand, there is little point
# in fixing it because yash also disables the local builtin in
# its posix mode, causing test-functions to bail out sooner.
- while [ "$((i += 1))" -le 255 ]; do
- fmt=$(printf '\\%o' "$i")
- # shellcheck disable=2059
- str=$(printf "$fmt.")
- quote_args "${str%.}" || break
- done \
+ {
+ POSIXLY_CORRECT=
+ while [ "$((i += 1))" -le 255 ]; do
+ fmt=$(printf '\\%o' "$i")
+ # shellcheck disable=2059
+ str=$(printf "${fmt}.")
+ quote_args "${str%.}" || break
+ done
+ } \
| cksum \
- | { read -r cksum _ && test "${cksum}" = "380900690"; }
+ | { read -r cksum _ && test "${cksum}" = "${expected_cksum}"; }
}
iterate_tests 2 "$@"
@@ -1242,6 +1251,15 @@ elif ! GENFUN_MODULES="portage rc" . ./functions.sh; then
bailout "Couldn't source ./functions.sh"
else
assign_tmpdir
+
+ # Since the test suite is normally executed during the src_test phase,
+ # the shquote utility will not yet have been installed. Account for
+ # that by redefining the quote_args() function.
+ # shellcheck disable=3028
+ if [ "${EBUILD_PHASE}" = test ] && [ "${BASH_VERSINFO-0}" -lt 5 ]; then
+ quote_args() { "${BUILD_DIR:?}"/shquote "$@"; }
+ fi
+
test_chdir || rc=1
test_ebegin || rc=1
test_is_older_than || rc=1