gbranden pushed a commit to branch master
in repository groff.

commit 9f82762791af0c9ed674cf59f607c65ec8f03efd
Author: G. Branden Robinson <[email protected]>
AuthorDate: Fri Apr 10 02:10:51 2026 -0500

    src/roff/troff/env.cpp: Refactor, add warning.
    
    * src/roff/troff/env.cpp (add_hyphenation_exception_words_request):
      Refactor and add warning diagnostic.  Elevate scope of `c` local
      variable and rename it to `hc` since it stores the hyphenation code of
      the character under examination in the hyphenation exception word
      argument.  Gather that hyphenation code earlier, and mark the word as
      invalid if the hyphenation code is zero--exception: the character '-'
      is also valid, since it marks the hyphenation break point.
      Consequently, we now throw a warning in category `char` on attempted
      hyphenation exception words like "non*sense" and "0123456789".  Drop
      later conditional loop break if the hyphenation code is zero since
      that now cannot be the case--we didn't enter the containing branch in
      the first place.  Replace a test of the index into the hyphenation
      exception word being greater than zero with an assert(3)ion of the
      same inside the branch, as it's now an invariant that a valid
      hyphenation exception word contains a nonzero count of characters
      bearing hyphenation codes.
    
    * doc/groff.texi.in (Warnings):
    * src/roff/troff/troff.1.man (Warnings): Document new warning
      circumstance.
---
 ChangeLog                  | 25 +++++++++++++++++++++++++
 doc/groff.texi.in          |  4 +++-
 src/roff/troff/env.cpp     | 30 +++++++++++++++++++++---------
 src/roff/troff/troff.1.man |  4 +++-
 4 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3acc2e3a3..9813c132f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2026-04-10  G. Branden Robinson <[email protected]>
+
+       * src/roff/troff/env.cpp
+       (add_hyphenation_exception_words_request): Refactor and add
+       warning diagnostic.  Elevate scope of `c` local variable and
+       rename it to `hc` since it stores the hyphenation code of the
+       character under examination in the hyphenation exception word
+       argument.  Gather that hyphenation code earlier, and mark the
+       word as invalid if the hyphenation code is zero--exception: the
+       character '-' is also valid, since it marks the hyphenation
+       break point.  Consequently, we now throw a warning in category
+       `char` on attempted hyphenation exception words like "non*sense"
+       and "0123456789".  Drop later conditional loop break if the
+       hyphenation code is zero since that now cannot be the case--we
+       didn't enter the containing branch in the first place.  Replace
+       a test of the index into the hyphenation exception word being
+       greater than zero with an assert(3)ion of the same inside the
+       branch, as it's now an invariant that a valid hyphenation
+       exception word contains a nonzero count of characters bearing
+       hyphenation codes.
+
+       * doc/groff.texi.in (Warnings):
+       * src/roff/troff/troff.1.man (Warnings): Document new warning
+       circumstance.
+
 2026-04-10  G. Branden Robinson <[email protected]>
 
        * src/roff/troff/env.cpp
diff --git a/doc/groff.texi.in b/doc/groff.texi.in
index b9f156567..b0c1ef715 100644
--- a/doc/groff.texi.in
+++ b/doc/groff.texi.in
@@ -21821,7 +21821,9 @@ circumstances.
 @itemx 1
 No user-defined character of the requested name or index exists
 and no mounted font defines a glyph for it,
-or input could not be encoded for device-independent output.
+input could not be encoded for device-independent output,
+or a character without a hyphenation code appeared
+in a hyphenation exception word.
 This category is enabled by default.
 
 @c @item number
diff --git a/src/roff/troff/env.cpp b/src/roff/troff/env.cpp
index a472a164d..3e0190872 100644
--- a/src/roff/troff/env.cpp
+++ b/src/roff/troff/env.cpp
@@ -3937,7 +3937,8 @@ static void add_hyphenation_exception_words_request() // 
.hw
   for (;;) {
     if (!has_arg())
       break;
-    int i = 0;
+    int i = 0; // index into hyphenation exception word excluding '-'s
+    unsigned char hc = 0U; // hyphenation code of current character
     int npos = 0;
     // Warn at most once per invalid word, not per request invocation.
     bool is_word_valid = true;
@@ -3947,8 +3948,14 @@ static void add_hyphenation_exception_words_request() // 
.hw
           && !tok.is_newline()
           && !tok.is_eof()) {
       charinfo *ci = tok.get_charinfo(false /* is_mandatory */);
-      if (0 /* nullptr */ == ci) {
+      if (ci != 0 /* nullptr */) {
+       hc = ci->get_hyphenation_code();
+       if ((0U == hc) && (ci->get_ascii_code() != '-'))
+         is_word_valid = false;
+      }
+      else
        is_word_valid = false;
+      if (!is_word_valid) {
        if (!was_warned) {
          warning(WARN_CHAR, "skipping hyphenation exception word"
                  " containing %1", tok.description());
@@ -3961,12 +3968,8 @@ static void add_hyphenation_exception_words_request() // 
.hw
          if ((i > 0) && ((npos == 0) || (pos[npos - 1] != i)))
            pos[npos++] = i;
        }
-       else {
-         unsigned char c = ci->get_hyphenation_code();
-         if (0U == c)
-           break;
-         buf[i++] = c;
-       }
+       else
+         buf[i++] = hc;
       }
       else {
        do
@@ -3976,7 +3979,16 @@ static void add_hyphenation_exception_words_request() // 
.hw
               && !tok.is_eof());
       }
     }
-    if (is_word_valid && (i > 0)) {
+    if (is_word_valid) {
+      // A valid hyphenation exception word contains a nonzero count of
+      // characters bearing hyphenation codes.
+      assert(i > 0);
+      // Clark uses `unsigned char` here for a small nonnegative
+      // quantity indicating the positions of hyphenation break points
+      // within a word of maximum size `WORD_MAX` (`UCHAR_MAX`).  That's
+      // kind of confusing because `unsigned char` is also GNU troff's
+      // internal "ordinary" character type.  Might be simpler just to
+      // use vector<int>.  --GBR
       pos[npos] = 0U;
       // C++03: new unsigned char[npos + 1]();
       unsigned char *tem = new unsigned char[npos + 1];
diff --git a/src/roff/troff/troff.1.man b/src/roff/troff/troff.1.man
index a17fbbf9e..8e8cd4088 100644
--- a/src/roff/troff/troff.1.man
+++ b/src/roff/troff/troff.1.man
@@ -658,7 +658,9 @@ T}
 char   1       T{
 No user-defined character of the requested name or index exists
 and no mounted font defines a glyph for it,
-or input could not be encoded for device-independent output.
+input could not be encoded for device-independent output,
+or a character without a hyphenation code appeared
+in a hyphenation exception word.
 .
 This category is enabled by default.
 T}

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to