--- src/shared/utf8.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/shared/utf8.h | 1 + src/test/test-utf8.c | 30 ++++++++++++++++++ 3 files changed, 118 insertions(+)
diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 9353559..5245604 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -210,6 +210,93 @@ char *utf8_escape_invalid(const char *str) { return p; } +char *utf8_escape_non_printable(const char *str) { + char *p, *s; + + assert(str); + + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; + + while (*str) { + int len; + + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + if (utf8_is_printable(str, len)) { + s = mempcpy(s, str, len); + str += len; + } else { + switch (*str) { + + case '\a': + *(s++) = '\\'; + *(s++) = 'a'; + break; + case '\b': + *(s++) = '\\'; + *(s++) = 'b'; + break; + case '\f': + *(s++) = '\\'; + *(s++) = 'f'; + break; + case '\n': + *(s++) = '\\'; + *(s++) = 'n'; + break; + case '\r': + *(s++) = '\\'; + *(s++) = 'r'; + break; + case '\t': + *(s++) = '\\'; + *(s++) = 't'; + break; + case '\v': + *(s++) = '\\'; + *(s++) = 'v'; + break; + case '\\': + *(s++) = '\\'; + *(s++) = '\\'; + break; + case '"': + *(s++) = '\\'; + *(s++) = '"'; + break; + case '\'': + *(s++) = '\\'; + *(s++) = '\''; + break; + + default: + /* For special chars we prefer octal over + * hexadecimal encoding, simply because glib's + * g_strescape() does the same */ + if ((*str < ' ') || (*str >= 127)) { + *(s++) = '\\'; + *(s++) = octchar((unsigned char) *str >> 6); + *(s++) = octchar((unsigned char) *str >> 3); + *(s++) = octchar((unsigned char) *str); + } else + *(s++) = *str; + break; + } + str += 1; + } + } else { + s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER)); + str += 1; + } + } + + *s = '\0'; + + return p; +} + char *ascii_is_valid(const char *str) { const char *p; diff --git a/src/shared/utf8.h b/src/shared/utf8.h index c087995..1fe1a35 100644 --- a/src/shared/utf8.h +++ b/src/shared/utf8.h @@ -30,6 +30,7 @@ const char *utf8_is_valid(const char *s) _pure_; char *ascii_is_valid(const char *s) _pure_; char *utf8_escape_invalid(const char *s); +char *utf8_escape_non_printable(const char *str); bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_; _pure_ static inline bool utf8_is_printable(const char* str, size_t length) { diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index b7d988f..fb27fe5 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -66,12 +66,42 @@ static void test_utf8_escaping(void) { assert_se(utf8_is_valid(p3)); } +static void test_utf8_escaping_printable(void) { + _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6; + char c[10]; + + p1 = utf8_escape_non_printable("goo goo goo"); + puts(p1); + assert_se(utf8_is_valid(p1)); + + p2 = utf8_escape_non_printable("\341\204\341\204"); + puts(p2); + assert_se(utf8_is_valid(p2)); + + p3 = utf8_escape_non_printable("\341\204"); + puts(p3); + assert_se(utf8_is_valid(p3)); + + p4 = utf8_escape_non_printable("ąę"); + puts(p4); + assert_se(utf8_is_valid(p4)); + + p5 = utf8_escape_non_printable("가너도루"); + puts(p5); + assert_se(utf8_is_valid(p5)); + + p6 = utf8_escape_non_printable("\001 \019\a"); + puts(p6); + assert_se(utf8_is_valid(p6)); +} + int main(int argc, char *argv[]) { test_utf8_is_valid(); test_utf8_is_printable(); test_ascii_is_valid(); test_utf8_encoded_valid_unichar(); test_utf8_escaping(); + test_utf8_escaping_printable(); return 0; } -- 1.9.3 _______________________________________________ systemd-devel mailing list systemd-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/systemd-devel