gbranden pushed a commit to branch master
in repository groff.

commit c8332c5c1acdd16072672f2fa401cd77805e7264
Author: G. Branden Robinson <[email protected]>
AuthorDate: Sun Aug 25 18:56:54 2024 -0500

    [troff]: Begin fixing Savannah #63074.
    
    Support construction of arbitrary byte sequences in device control
    commands.
    
    * src/roff/troff/input.cpp (encode_special_character_for_device_output):
      Enhance.  When constructing the content of a device control escape
      sequence (and, in the future, that of a `device` request), try harder
      to convert special characters into something meaningful.  Is a special
      character identifier looks like something other than an attempt at a
      Unicode special character escape sequence already, try to convert it
      into one.  Otherwise, write any valid Unicode special character
      identifier (in groff notation: `\[u123ABC]`) to the macro being
      assembled (and thence the `special` node, and ultimately the `x X`
      command this node type produces).
    * src/roff/groff/tests/device-control-special-character-handling.sh:
      Update test expectations.  "\[u1F6C3]" is now correctly passed
      through, and "\[`a]" correctly coverted to "\[u00E0]".  Shorten test
      cases a little.
---
 ChangeLog                                          | 23 +++++++++++++
 .../device-control-special-character-handling.sh   | 40 ++++++++++------------
 src/roff/troff/input.cpp                           | 39 +++++++++++++++++----
 3 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index cc3db925e..6e0bc9a4d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2024-08-25  G. Branden Robinson <[email protected]>
+
+       [troff]: Begin fixing Savannah #63074: support construction of
+       arbitrary byte sequences in device control commands using groff
+       special character sequence notation.
+
+       * src/roff/troff/input.cpp
+       (encode_special_character_for_device_output): Enhance.  When
+       constructing the content of a device control escape sequence
+       {and, in the future, that of a `device` request}, try harder to
+       convert special characters into something meaningful.  Is a
+       special character identifier looks like something other than an
+       attempt at a Unicode special character escape sequence already,
+       try to convert it into one.  Otherwise, write any valid Unicode
+       special character identifier {in groff notation: `\[u123ABC]`}
+       to the macro being assembled {and thence the `special` node, and
+       ultimately the `x X` command this node type produces}.
+       * src/roff/groff/tests/\
+       device-control-special-character-handling.sh: Update test
+       expectations.  "\[u1F6C3]" is now correctly passed through, and
+       "\[`a]" correctly coverted to "\[u00E0]".  Shorten test cases a
+       little.
+
 2024-08-28  G. Branden Robinson <[email protected]>
 
        * src/roff/troff/input.cpp
diff --git a/src/roff/groff/tests/device-control-special-character-handling.sh 
b/src/roff/groff/tests/device-control-special-character-handling.sh
index 661125b76..e1f4651ff 100755
--- a/src/roff/groff/tests/device-control-special-character-handling.sh
+++ b/src/roff/groff/tests/device-control-special-character-handling.sh
@@ -27,12 +27,14 @@ wail () {
   fail=YES
 }
 
-input='.nf
-\X#bogus1: esc \%man-beast\[u1F63C]\\[u1F00] -\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]#
-.device bogus1: req \%man-beast\[u1F63C]\\[u1F00] 
-\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]
+input='.
+.nf
+\X#bogus1: esc \%to-do\[u1F63C]\\[u1F00] -\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]\[`a]#
+.device bogus1: req \%to-do\[u1F63C]\\[u1F00] 
-\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]\[`a]
 .ec @
-@X#bogus2: esc @%man-beast@[u1F63C]@@[u1F00] -@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]#
-.device bogus2: req @%man-beast@[u1F63C]@@[u1F00] 
-@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]'
+@X#bogus2: esc @%to-do@[u1F63C]@@[u1F00] -@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]@[`a]#
+.device bogus2: req @%to-do@[u1F63C]@@[u1F00] 
-@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]@[`a]
+.'
 
 output=$(printf '%s\n' "$input" | "$groff" -T ps -Z 2> /dev/null \
   | grep '^x X')
@@ -41,38 +43,34 @@ error=$(printf '%s\n' "$input" | "$groff" -T ps -Z 2>&1 > 
/dev/null)
 echo "$output"
 echo "$error"
 
+# Expected:
+# x X bogus1: esc to-do\[u1F63C]\[u1F00] -'"`^\~\[u00E0]
+# x X bogus1: req @%to-do\[u1F63C]\[u1F00] 
-\[aq]\[dq]\[ga]\[ha]\[rs]\[ti]\[`a]#
+# x X bogus2: esc to-do\[u1F63C]\[u1F00] -'"`^\~\[u00E0]
+# x X bogus2: req @%to-do@[u1F63C]@[u1F00] 
-@[aq]@[dq]@[ga]@[ha]@[rs]@[ti]@[`a]#
+
 echo "checking X escape sequence, default escape character" >&2
-# x X bogus1: esc man-beast\[u1F00] -'"`^\~
 echo "$output" \
-  | grep -qx 'x X bogus1: esc man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+  | grep -Fqx \
+  'x X bogus1: esc to-do\[u1F63C]\[u1F00] -'"'"'"`^\~\[u00E0]' \
   || wail
 
 #echo "checking device request, default escape character" >&2
-## x X bogus1: req man-beast\[u1F00] -'"`^\~
 #echo "$output" \
-#  | grep -qx 'x X bogus1: req man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+#  | grep -qx 'x X bogus1: req to-do\\\[u1F00\] -'"'"'"`^\\~' \
 #  || wail
 
 echo "checking X escape sequence, alternate escape character" >&2
-# x X bogus2: esc man-beast\[u1F00] -'"`^\~
 echo "$output" \
-  | grep -qx 'x X bogus2: esc man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+  | grep -Fqx \
+  'x X bogus1: esc to-do\[u1F63C]\[u1F00] -'"'"'"`^\~\[u00E0]' \
   || wail
 
 #echo "checking device request, alternate escape character" >&2
-## x X bogus2: req man-beast\[u1F00] -'"`^\~
 #echo "$output" \
-#  | grep -qx 'x X bogus2: req man-beast\\\[u1F00\] -'"'"'"`^\\~' \
+#  | grep -qx 'x X bogus2: req to-do\\\[u1F00\] -'"'"'"`^\\~' \
 #  || wail
 
-echo "checking for errors on unsupported special character escapes" >&2
-#for lineno in 2 3 5 6
-#do
-#  echo "$error" \
-#    | grep -q 'troff:.*:'$lineno':.* invalid.*device control command' \
-#    || wail
-#done
-
 test -z "$fail"
 
 # vim:set autoindent expandtab shiftwidth=2 tabstop=2 textwidth=72:
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 10e6f92a4..71af2e5d0 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -5730,13 +5730,40 @@ static void 
encode_special_character_for_device_output(macro *mac)
        }
        mac->append(']');
       }
-      else
-       error("special character '%1' is unusable within a device"
-             " control escape sequence", sc);
     }
-    else
-      error("special character '%1' cannot be used within a device"
-           " control escape sequence", sc);
+    else {
+       char errbuf[ERRBUFSZ];
+       char character[UNIBUFSZ + 1 /* '\0' */];
+       (void) memset(errbuf, '\0', ERRBUFSZ);
+       (void) memset(character, '\0', UNIBUFSZ);
+       // If looks like something other than an attempt at a Unicode
+       // special character escape sequence already, try to convert it
+       // into one.  Output drivers don't (and shouldn't) know anything
+       // about a troff formatter's special character identifiers.
+       if ((strlen(sc) < 3)
+           || (!csxdigit(sc[1]) && (!csxdigit(sc[2])))) {
+         const char *un = glyph_name_to_unicode(sc);
+         if (un != 0 /* nullptr */)
+           strncpy(character, un, UNIBUFSZ);
+         else {
+           error("special character '%1' is unusable within a device"
+                 " control escape sequence", sc);
+           return;
+         }
+       }
+       else {
+         const char *un = valid_unicode_code_sequence(sc, errbuf);
+         if (0 /* nullptr */ == un) {
+           error("special character '%1' is unusable within a device"
+                 " control escape sequence: %2", sc, errbuf);
+           return;
+         }
+         strncpy(character, un, UNIBUFSZ);
+       }
+       mac->append_str("\\[u");
+       mac->append_str(character);
+       mac->append(']');
+    }
   }
 }
 

_______________________________________________
Groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to