git: 7c2c2c2a2253 - main - ed: add unicode test cases to ATF test suite

Baptiste Daroussin Sun, 01 Mar 2026 03:29:32 -0800

The branch main has been updated by bapt:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7c2c2c2a2253370c88fe428cf1c0ecebd68fe864


commit 7c2c2c2a2253370c88fe428cf1c0ecebd68fe864
Author:     Baptiste Daroussin <[email protected]>
AuthorDate: 2026-02-17 16:38:29 +0000
Commit:     Baptiste Daroussin <[email protected]>
CommitDate: 2026-03-01 11:25:16 +0000

    ed: add unicode test cases to ATF test suite
    
    Including examples in Cyrillic suggested by kib@
    
    Differential Revusion:  https://reviews.freebsd.org/D55364
---
 bin/ed/tests/ed_test.sh | 333 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 333 insertions(+)

diff --git a/bin/ed/tests/ed_test.sh b/bin/ed/tests/ed_test.sh
index c67df8ae9f65..d4b18fa92ca0 100755
--- a/bin/ed/tests/ed_test.sh
+++ b/bin/ed/tests/ed_test.sh
@@ -1687,6 +1687,322 @@ z
 CMDS
 }
 
+# ---------------------------------------------------------------------------
+# Unicode support
+# ---------------------------------------------------------------------------
+atf_test_case unicode_list_multibyte
+unicode_list_multibyte_head()
+{
+       atf_set "descr" "l command displays multibyte UTF-8 as-is";
+}
+unicode_list_multibyte_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café\n' > input.txt
+       atf_check -o inline:'café$\n' ed -s - <<'CMDS'
+H
+r input.txt
+l
+Q
+CMDS
+}
+
+atf_test_case unicode_list_cjk
+unicode_list_cjk_head()
+{
+       atf_set "descr" "l command displays CJK characters as-is";
+}
+unicode_list_cjk_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf '日本語テスト\n' > input.txt
+       atf_check -o inline:'日本語テスト$\n' ed -s - <<'CMDS'
+H
+r input.txt
+l
+Q
+CMDS
+}
+
+atf_test_case unicode_list_mixed
+unicode_list_mixed_head()
+{
+       atf_set "descr" "l command displays mixed ASCII/UTF-8 correctly";
+}
+unicode_list_mixed_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'hello café 世界\n' > input.txt
+       atf_check -o inline:'hello café 世界$\n' ed -s - <<'CMDS'
+H
+r input.txt
+l
+Q
+CMDS
+}
+
+atf_test_case unicode_list_invalid
+unicode_list_invalid_head()
+{
+       atf_set "descr" "l command escapes invalid UTF-8 as octal";
+}
+unicode_list_invalid_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf '\200\201\376\377\n' > input.txt
+       atf_check -o inline:'\\200\\201\\376\\377$\n' ed -s - <<'CMDS'
+H
+r input.txt
+l
+Q
+CMDS
+}
+
+atf_test_case unicode_list_wrap_cjk
+unicode_list_wrap_cjk_head()
+{
+       atf_set "descr" "l command wraps correctly around double-width CJK";
+}
+unicode_list_wrap_cjk_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       # 69 A's + 日本 (2 CJK chars): 69 + 2 = 71 cols for 日 (fits),
+       # 71 + 2 = 73 for 本 (exceeds 72), so 本 wraps to next line.
+       printf 
'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA日本\n' > 
input.txt
+       ed -s - <<'CMDS' > output.txt
+H
+r input.txt
+l
+Q
+CMDS
+       printf 
'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA日\\\n本$\n'
 > expected.txt
+       atf_check cmp output.txt expected.txt
+}
+
+atf_test_case unicode_print
+unicode_print_head()
+{
+       atf_set "descr" "p command passes through UTF-8 correctly";
+}
+unicode_print_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café 日本語\n' > input.txt
+       atf_check -o inline:'café 日本語\n' ed -s - <<'CMDS'
+H
+r input.txt
+p
+Q
+CMDS
+}
+
+atf_test_case unicode_number
+unicode_number_head()
+{
+       atf_set "descr" "n command displays line number with UTF-8";
+}
+unicode_number_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café 日本語\n' > input.txt
+       atf_check -o inline:'1\tcafé 日本語\n' ed -s - <<'CMDS'
+H
+r input.txt
+n
+Q
+CMDS
+}
+
+atf_test_case unicode_regex
+unicode_regex_head()
+{
+       atf_set "descr" "Regex search matches UTF-8 characters";
+}
+unicode_regex_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café\ntest\nüber\n' > input.txt
+       atf_check -o inline:'café\n' ed -s - <<'CMDS'
+H
+r input.txt
+g/é/p
+Q
+CMDS
+}
+
+atf_test_case unicode_regex_charclass
+unicode_regex_charclass_head()
+{
+       atf_set "descr" "Regex character classes work with UTF-8";
+}
+unicode_regex_charclass_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café123\ntest456\n' > input.txt
+       atf_check -o inline:'café123\n' ed -s - <<'CMDS'
+H
+r input.txt
+g/[[:alpha:]]*é/p
+Q
+CMDS
+}
+
+atf_test_case unicode_substitute
+unicode_substitute_head()
+{
+       atf_set "descr" "Substitute replaces UTF-8 characters";
+}
+unicode_substitute_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café\n' > input.txt
+       ed -s - <<'CMDS'
+H
+r input.txt
+s/é/e/
+w output.txt
+Q
+CMDS
+       printf 'cafe\n' > expected.txt
+       atf_check cmp output.txt expected.txt
+}
+
+atf_test_case unicode_substitute_cjk
+unicode_substitute_cjk_head()
+{
+       atf_set "descr" "Substitute replaces CJK characters";
+}
+unicode_substitute_cjk_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'hello 世界\n' > input.txt
+       ed -s - <<'CMDS'
+H
+r input.txt
+s/世界/world/
+w output.txt
+Q
+CMDS
+       printf 'hello world\n' > expected.txt
+       atf_check cmp output.txt expected.txt
+}
+
+atf_test_case unicode_global_substitute
+unicode_global_substitute_head()
+{
+       atf_set "descr" "Global substitute works with UTF-8";
+}
+unicode_global_substitute_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'à la carte\nà bientôt\nhello\n' > input.txt
+       ed -s - <<'CMDS'
+H
+r input.txt
+g/à/s/à/a/
+w output.txt
+Q
+CMDS
+       cat > expected.txt <<'EOF'
+a la carte
+a bientôt
+hello
+EOF
+       atf_check cmp output.txt expected.txt
+}
+
+atf_test_case unicode_join
+unicode_join_head()
+{
+       atf_set "descr" "Join preserves UTF-8 content";
+}
+unicode_join_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       printf 'café\n世界\n' > input.txt
+       ed -s - <<'CMDS'
+H
+r input.txt
+1,2j
+w output.txt
+Q
+CMDS
+       printf 'café世界\n' > expected.txt
+       atf_check cmp output.txt expected.txt
+}
+
+atf_test_case unicode_append
+unicode_append_head()
+{
+       atf_set "descr" "Append preserves UTF-8 text";
+}
+unicode_append_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       ed -s - <<'CMDS'
+H
+a
+première
+deuxième
+.
+w output.txt
+Q
+CMDS
+       cat > expected.txt <<'EOF'
+première
+deuxième
+EOF
+       atf_check cmp output.txt expected.txt
+}
+
+atf_test_case unicode_cyrillic
+unicode_cyrillic_head()
+{
+       atf_set "descr" "Cyrillic: append, substitute, print, regex search";
+}
+unicode_cyrillic_body()
+{
+
+       export LC_CTYPE=C.UTF-8
+       ed -s - <<'CMDS' > output.txt
+H
+a
+Привет
+.
+s/ривет/ока/
+1p
+a
+Строка
+.
+1
+/а/p
+1,$p
+Q
+CMDS
+       cat > expected.txt <<'EOF'
+Пока
+Пока
+Строка
+Пока
+Строка
+EOF
+       atf_check cmp output.txt expected.txt
+}
+
 # ---------------------------------------------------------------------------
 # Registration
 # ---------------------------------------------------------------------------
@@ -1735,6 +2051,23 @@ atf_init_test_cases()
        atf_add_test_case newline_insert
        atf_add_test_case newline_search
 
+       # Unicode support
+       atf_add_test_case unicode_list_multibyte
+       atf_add_test_case unicode_list_cjk
+       atf_add_test_case unicode_list_mixed
+       atf_add_test_case unicode_list_invalid
+       atf_add_test_case unicode_list_wrap_cjk
+       atf_add_test_case unicode_print
+       atf_add_test_case unicode_number
+       atf_add_test_case unicode_regex
+       atf_add_test_case unicode_regex_charclass
+       atf_add_test_case unicode_substitute
+       atf_add_test_case unicode_substitute_cjk
+       atf_add_test_case unicode_global_substitute
+       atf_add_test_case unicode_join
+       atf_add_test_case unicode_append
+       atf_add_test_case unicode_cyrillic
+
        # Error tests
        atf_add_test_case err_append_suffix
        atf_add_test_case err_addr_out_of_range

git: 7c2c2c2a2253 - main - ed: add unicode test cases to ATF test suite

Reply via email to