gbranden pushed a commit to branch master
in repository groff.
commit c8332c5c1acdd16072672f2fa401cd77805e7264
Author: G. Branden Robinson <[email protected]>
AuthorDate: Sun Aug 25 18:56:54 2024 -0500
[troff]: Begin fixing Savannah #63074.
Support construction of arbitrary byte sequences in device control
commands.
* src/roff/troff/input.cpp (encode_special_character_for_device_output):
Enhance. When constructing the content of a device control escape
sequence (and, in the future, that of a `device` request), try harder
to convert special characters into something meaningful. Is a special
character identifier looks like something other than an attempt at a
Unicode special character escape sequence already, try to convert it
into one. Otherwise, write any valid Unicode special character
identifier (in groff notation: `\[u123ABC]`) to the macro being
assembled (and thence the `special` node, and ultimately the `x X`
command this node type produces).
* src/roff/groff/tests/device-control-special-character-handling.sh:
Update test expectations. "\[u1F6C3]" is now correctly passed
through, and "\[`a]" correctly coverted to "\[u00E0]". Shorten test
cases a little.
---
ChangeLog | 23 +++++++++++++
.../device-control-special-character-handling.sh | 40 ++++++++++------------
src/roff/troff/input.cpp | 39 +++++++++++++++++----
3 files changed, 75 insertions(+), 27 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index cc3db925e..6e0bc9a4d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2024-08-25 G. Branden Robinson <[email protected]>
+
+ [troff]: Begin fixing Savannah #63074: support construction of
+ arbitrary byte sequences in device control commands using groff
+ special character sequence notation.
+
+ * src/roff/troff/input.cpp
+ (encode_special_character_for_device_output): Enhance. When
+ constructing the content of a device control escape sequence
+ {and, in the future, that of a `device` request}, try harder to
+ convert special characters into something meaningful. Is a
+ special character identifier looks like something other than an
+ attempt at a Unicode special character escape sequence already,
+ try to convert it into one. Otherwise, write any valid Unicode
+ special character identifier {in groff notation: `\[u123ABC]`}
+ to the macro being assembled {and thence the `special` node, and
+ ultimately the `x X` command this node type produces}.
+ * src/roff/groff/tests/\
+ device-control-special-character-handling.sh: Update test
+ expectations. "\[u1F6C3]" is now correctly passed through, and
+ "\[`a]" correctly coverted to "\[u00E0]". Shorten test cases a
+ little.
+
2024-08-28 G. Branden Robinson <[email protected]>
* src/roff/troff/input.cpp
diff --git a/src/roff/groff/tests/device-control-special-character-handling.sh
b/src/roff/groff/tests/device-control-special-character-handling.sh
index 661125b76..e1f4651ff 100755
--- a/src/roff/groff/tests/device-control-special-character-handling.sh
+++ b/src/roff/groff/tests/device-control-special-character-handling.sh
@@ -27,12 +27,14 @@ wail () {
fail=YES
}
-input='.nf
-\X#bogus1: esc \%man-beast\[u1F63C]\\[u1F00] -\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]#
-.device bogus1: req \%man-beast\[u1F63C]\\[u1F00]
-\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]
+input='.
+.nf
+\X#bogus1: esc \%to-do\[u1F63C]\\[u1F00] -\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]\[`a]#
+.device bogus1: req \%to-do\[u1F63C]\\[u1F00]
-\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]\[`a]
.ec @
-@X#bogus2: esc @%man-beast@[u1F63C]@@[u1F00] -@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]#
-.device bogus2: req @%man-beast@[u1F63C]@@[u1F00]
-@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]'
+@X#bogus2: esc @%to-do@[u1F63C]@@[u1F00] -@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]@[`a]#
+.device bogus2: req @%to-do@[u1F63C]@@[u1F00]
-@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]@[`a]
+.'
output=$(printf '%s\n' "$input" | "$groff" -T ps -Z 2> /dev/null \
| grep '^x X')
@@ -41,38 +43,34 @@ error=$(printf '%s\n' "$input" | "$groff" -T ps -Z 2>&1 >
/dev/null)
echo "$output"
echo "$error"
+# Expected:
+# x X bogus1: esc to-do\[u1F63C]\[u1F00] -'"`^\~\[u00E0]
+# x X bogus1: req @%to-do\[u1F63C]\[u1F00]
-\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]\[`a]#
+# x X bogus2: esc to-do\[u1F63C]\[u1F00] -'"`^\~\[u00E0]
+# x X bogus2: req @%to-do@[u1F63C]@[u1F00]
-@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]@[`a]#
+
echo "checking X escape sequence, default escape character" >&2
-# x X bogus1: esc man-beast\[u1F00] -'"`^\~
echo "$output" \
- | grep -qx 'x X bogus1: esc man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+ | grep -Fqx \
+ 'x X bogus1: esc to-do\[u1F63C]\[u1F00] -'"'"'"`^\~\[u00E0]' \
|| wail
#echo "checking device request, default escape character" >&2
-## x X bogus1: req man-beast\[u1F00] -'"`^\~
#echo "$output" \
-# | grep -qx 'x X bogus1: req man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+# | grep -qx 'x X bogus1: req to-do\\\[u1F00\] -'"'"'"`^\\~' \
# || wail
echo "checking X escape sequence, alternate escape character" >&2
-# x X bogus2: esc man-beast\[u1F00] -'"`^\~
echo "$output" \
- | grep -qx 'x X bogus2: esc man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+ | grep -Fqx \
+ 'x X bogus1: esc to-do\[u1F63C]\[u1F00] -'"'"'"`^\~\[u00E0]' \
|| wail
#echo "checking device request, alternate escape character" >&2
-## x X bogus2: req man-beast\[u1F00] -'"`^\~
#echo "$output" \
-# | grep -qx 'x X bogus2: req man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+# | grep -qx 'x X bogus2: req to-do\\\[u1F00\] -'"'"'"`^\\~' \
# || wail
-echo "checking for errors on unsupported special character escapes" >&2
-#for lineno in 2 3 5 6
-#do
-# echo "$error" \
-# | grep -q 'troff:.*:'$lineno':.* invalid.*device control command' \
-# || wail
-#done
-
test -z "$fail"
# vim:set autoindent expandtab shiftwidth=2 tabstop=2 textwidth=72:
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 10e6f92a4..71af2e5d0 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -5730,13 +5730,40 @@ static void
encode_special_character_for_device_output(macro *mac)
}
mac->append(']');
}
- else
- error("special character '%1' is unusable within a device"
- " control escape sequence", sc);
}
- else
- error("special character '%1' cannot be used within a device"
- " control escape sequence", sc);
+ else {
+ char errbuf[ERRBUFSZ];
+ char character[UNIBUFSZ + 1 /* '\0' */];
+ (void) memset(errbuf, '\0', ERRBUFSZ);
+ (void) memset(character, '\0', UNIBUFSZ);
+ // If looks like something other than an attempt at a Unicode
+ // special character escape sequence already, try to convert it
+ // into one. Output drivers don't (and shouldn't) know anything
+ // about a troff formatter's special character identifiers.
+ if ((strlen(sc) < 3)
+ || (!csxdigit(sc[1]) && (!csxdigit(sc[2])))) {
+ const char *un = glyph_name_to_unicode(sc);
+ if (un != 0 /* nullptr */)
+ strncpy(character, un, UNIBUFSZ);
+ else {
+ error("special character '%1' is unusable within a device"
+ " control escape sequence", sc);
+ return;
+ }
+ }
+ else {
+ const char *un = valid_unicode_code_sequence(sc, errbuf);
+ if (0 /* nullptr */ == un) {
+ error("special character '%1' is unusable within a device"
+ " control escape sequence: %2", sc, errbuf);
+ return;
+ }
+ strncpy(character, un, UNIBUFSZ);
+ }
+ mac->append_str("\\[u");
+ mac->append_str(character);
+ mac->append(']');
+ }
}
}
_______________________________________________
Groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit