Get rid of code duplication here.
---
Some alternatives I considered:
1) Keep a separate bool escape and set it when we see a backslash, still
keeping state set to VALUE or SINGLE_QUOTE or DOUBLE_QUOTE.
2) Create the enum so that ESCAPE has a bit value like 0x100 or similar, so
that we can use a bitwise operation such as state = ~ESCAPE to go back to
the unescaped state.
Not sure if we want to differentiate between backslash escapes inside single,
double quotes or unquoted in the future... But we can always split this code
back again in the future, or handle specific exceptions inside this same block.
Let me know if you'd like me to rework this to use one of the two suggested
alternate approaches above.
Cheers!
Filipe
src/shared/util.c | 68 ++-
1 file changed, 7 insertions(+), 61 deletions(-)
diff --git a/src/shared/util.c b/src/shared/util.c
index 8a6107969ae1..5b03fde1143c 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -5200,35 +5200,6 @@ int unquote_first_word(const char **p, char **ret,
UnquoteFlags flags) {
break;
-case VALUE_ESCAPE:
-if (c == 0) {
-if (flags UNQUOTE_RELAX)
-goto finish;
-return -EINVAL;
-}
-
-if (!GREEDY_REALLOC(s, allocated, sz+7))
-return -ENOMEM;
-
-if (flags UNQUOTE_CUNESCAPE) {
-uint32_t u;
-
-r = cunescape_one(*p, (size_t) -1, c, u);
-if (r 0)
-return -EINVAL;
-
-(*p) += r - 1;
-
-if (c != 0)
-s[sz++] = c; /* normal explicit char */
-else
-sz += utf8_encode_unichar(s + sz, u);
/* unicode chars we'll encode as utf8 */
-} else
-s[sz++] = c;
-
-state = VALUE;
-break;
-
case SINGLE_QUOTE:
if (c == 0) {
if (flags UNQUOTE_RELAX)
@@ -5247,35 +5218,6 @@ int unquote_first_word(const char **p, char **ret,
UnquoteFlags flags) {
break;
-case SINGLE_QUOTE_ESCAPE:
-if (c == 0) {
-if (flags UNQUOTE_RELAX)
-goto finish;
-return -EINVAL;
-}
-
-if (!GREEDY_REALLOC(s, allocated, sz+7))
-return -ENOMEM;
-
-if (flags UNQUOTE_CUNESCAPE) {
-uint32_t u;
-
-r = cunescape_one(*p, (size_t) -1, c, u);
-if (r 0)
-return -EINVAL;
-
-(*p) += r - 1;
-
-if (c != 0)
-s[sz++] = c;
-else
-sz += utf8_encode_unichar(s + sz, u);
-} else
-s[sz++] = c;
-
-state = SINGLE_QUOTE;
-break;
-
case DOUBLE_QUOTE:
if (c == 0)
return -EINVAL;
@@ -5292,7 +5234,9 @@ int unquote_first_word(const char **p, char **ret,
UnquoteFlags flags) {
break;
+case SINGLE_QUOTE_ESCAPE:
case DOUBLE_QUOTE_ESCAPE:
+case VALUE_ESCAPE:
if (c == 0) {
if (flags UNQUOTE_RELAX)
goto finish;
@@ -5312,13 +5256,15 @@ int unquote_first_word(const char **p, char **ret,
UnquoteFlags flags) {
(*p) += r - 1;
if (c != 0)
-s[sz++] = c;
+s[sz++] = c; /* normal explicit char */
else
-sz += utf8_encode_unichar(s + sz, u);
+sz += utf8_encode_unichar(s + sz, u);
/* unicode chars we'll encode as utf8 */
} else
s[sz++] = c;
-state = DOUBLE_QUOTE;
+state = (state == SINGLE_QUOTE_ESCAPE) ? SINGLE_QUOTE :
+