[bug #67617] [troff] `pchar` request should resolve and report properties of character classes

G. Branden Robinson Fri, 14 Nov 2025 13:18:54 -0800

Update of bug #67617 (group groff):

                 Summary: [troff] `pchar` should resolve and report properties
of character classes => [troff] `pchar` request should resolve and report
properties of character classes


    _______________________________________________________

Follow-up Comment #1:

This seems to work.


diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 339695d2a..1195c8102 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -2918,10 +2918,15 @@ const char *token::description()
       if (strchr(sc, '\'') != 0 /* nullptr */)
        qc = '"';
       // TODO: This truncates the names of impractically long special
-      // character names.  Do something about that.  (The truncation is
-      // visually indicated by the absence of a closing quotation mark.)
-      (void) snprintf(buf, maxstr, "special character %c%s%c", qc, sc,
-                     qc);
+      // character or character class names.  Do something about that.
+      // (The truncation is visually indicated by the absence of a
+      // closing quotation mark.)
+      if (using_character_classes && tok.get_char()->is_class())
+       (void) snprintf(buf, maxstr, "character class %c%s%c", qc, sc,
+                       qc);
+      else
+       (void) snprintf(buf, maxstr, "special character %c%s%c", qc, sc,
+                       qc);
       return buf;
     }
   case TOKEN_SPREAD:
@@ -4987,8 +4992,8 @@ static void print_character_request()
     ci = tok.get_char(false /* required */,
                      true /* suppress creation */);
     if (!tok.is_character()) {
-      error("character report request expects characters as arguments;"
-           " got %1", tok.description());
+      error("character report request expects characters or character"
+           " classes as arguments; got %1", tok.description());
       break;
     }
     if (0 /* nullptr */ == ci) {
@@ -5000,9 +5005,11 @@ static void print_character_request()
     }
     else {
       // A charinfo doesn't know the name by which it is accessed.
-      if (tok.is_indexed_character())
+      if (tok.is_indexed_character()) {
        errprint("character indexed %1 in current font\n",
                 tok.character_index());
+       fflush(stderr);
+      }
       else
        errprint("%1\n", tok.description());
       fflush(stderr);
@@ -8925,6 +8932,16 @@ const char *break_flag_reg::get_string()
   return i_to_a(input_stack::get_break_flag());
 }
 
+class character_classes_in_use_reg : public reg {
+public:
+  const char *get_string();
+};
+
+const char *character_classes_in_use_reg::get_string()
+{
+  return i_to_a(using_character_classes);
+}
+
 class enclosing_want_att_compat_reg : public reg {
 public:
   const char *get_string();
@@ -9957,6 +9974,7 @@ void init_input_requests()
   register_dictionary.define(".$", new nargs_reg);
   register_dictionary.define(".br", new break_flag_reg);
   register_dictionary.define(".C", new
readonly_boolean_register(&want_att_compat));
+  register_dictionary.define(".class", new character_classes_in_use_reg);
   register_dictionary.define(".cp", new enclosing_want_att_compat_reg);
   register_dictionary.define(".O", new variable_reg(&suppression_level));
   register_dictionary.define(".c", new lineno_reg);
@@ -10700,6 +10718,48 @@ bool charinfo::contains(charinfo *, bool)
 
 void charinfo::dump()
 {
+  if (is_class()) {
+    std::vector<std::pair<int, int> >::const_iterator ranges_iter;
+    ranges_iter = ranges.begin();
+    errprint("  contains ranges: ");
+    const size_t buflen = 8; // "U+" + four/five hex digits + '\0'
+    int range_begin = 0;
+    int range_end = 0;
+    char beg_hexbuf[buflen];
+    char end_hexbuf[buflen];
+    (void) memset(beg_hexbuf, '\0', buflen);
+      }
       else
        errprint("%1\n", tok.description());
       fflush(stderr);
@@ -8925,6 +8932,16 @@ const char *break_flag_reg::get_string()
   return i_to_a(input_stack::get_break_flag());
 }
 
+class character_classes_in_use_reg : public reg {
+public:
+  const char *get_string();
+};
+
+const char *character_classes_in_use_reg::get_string()
+{
+  return i_to_a(using_character_classes);
+}
+
 class enclosing_want_att_compat_reg : public reg {
 public:
   const char *get_string();
@@ -9957,6 +9974,7 @@ void init_input_requests()
   register_dictionary.define(".$", new nargs_reg);
   register_dictionary.define(".br", new break_flag_reg);
   register_dictionary.define(".C", new
readonly_boolean_register(&want_att_compat));
+  register_dictionary.define(".class", new character_classes_in_use_reg);
   register_dictionary.define(".cp", new enclosing_want_att_compat_reg);
   register_dictionary.define(".O", new variable_reg(&suppression_level));
   register_dictionary.define(".c", new lineno_reg);
@@ -10700,6 +10718,48 @@ bool charinfo::contains(charinfo *, bool)
 
 void charinfo::dump()
 {
+  if (is_class()) {
+    std::vector<std::pair<int, int> >::const_iterator ranges_iter;
+    ranges_iter = ranges.begin();
+    errprint("  contains ranges: ");
+    const size_t buflen = 8; // "U+" + four/five hex digits + '\0'
+    int range_begin = 0;
+    int range_end = 0;
+    char beg_hexbuf[buflen];
+    char end_hexbuf[buflen];
+    (void) memset(beg_hexbuf, '\0', buflen);
+    (void) memset(end_hexbuf, '\0', buflen);
+    bool has_ranges = false;
+    while (ranges_iter != ranges.end()) {
+      has_ranges = true;
+      range_begin = ranges_iter->first;
+      range_end = ranges_iter->second;
+      (void) snprintf(beg_hexbuf, buflen, "U+%.4X", range_begin);
+      (void) snprintf(end_hexbuf, buflen, "U+%.4X", range_end);
+      // TODO: comma-separate?  JSON list?
+      if (range_begin == range_end)
+       errprint("%1 ", beg_hexbuf, end_hexbuf);
+      else
+       errprint("%1-%2 ", beg_hexbuf, end_hexbuf);
+      ++ranges_iter;
+    }
+    if (!has_ranges)
+      errprint("(none)");
+    errprint("\n");
+    errprint("  contains nested classes: ");
+    std::vector<charinfo *>::const_iterator nested_iter;
+    nested_iter = nested_classes.begin();
+    bool has_nested_classes = false;
+    while (nested_iter != nested_classes.end()) {
+      has_nested_classes = true;
+      // TODO: Here's where JSON would really pay off.
+      (*nested_iter)->dump();
+    }
+    if (!has_nested_classes)
+      errprint("(none)");
+    errprint("\n");
+  }
+  else {
   if (translation != 0 /* nullptr */)
     errprint("  is translated\n");
   else
@@ -10786,6 +10846,7 @@ void charinfo::dump()
   if (strcmp(modestr, "") == 0)
     modestr =" normal";
   errprint("  mode:%1\n", modestr);
+  }
   fflush(stderr);
 }

$ printf '.pnr .class\n.pchar \\C@[CJKprepunct]@' | ./build/test-groff -mja
/home/branden/src/GIT/groff/build/../tmac/ja.tmac: warning: font TR may lack
coverage of Japanese script
.class  1
character class '[CJKprepunct]'
  contains ranges: U+002C U+003A U+003B U+003E U+007D U+3001 U+3002 U+FF0C
U+FF0E U+30FB U+FF1A U+FF1B U+FF1F U+FF01 U+FF09 U+3015 U+FF3D U+FF5D U+300D
U+300F U+3011 U+3041 U+3043 U+3045 U+3047 U+3049 U+3063 U+3083 U+3085 U+3087
U+30FC U+30A1 U+30A3 U+30A5 U+30A7 U+30A9 U+30C3 U+30E3 U+30E5 U+30E7 
  contains nested classes: (none)
$ printf '.pnr .class\n.pchar \\C@[CJKnormal]@' | ./build/test-groff -mja
/home/branden/src/GIT/groff/build/../tmac/ja.tmac: warning: font TR may lack
coverage of Japanese script
.class  1
character class '[CJKnormal]'
  contains ranges: U+3041-U+3096 U+3096 U+30A0-U+30FF U+30FF U+4E00-U+9FFF
U+9FFF 
  contains nested classes: (none)




    _______________________________________________________

Reply to this item at:

  <https://savannah.gnu.org/bugs/?67617>

_______________________________________________
Message sent via Savannah
https://savannah.gnu.org/

signature.asc
Description: PGP signature

[bug #67617] [troff] `pchar` request should resolve and report properties of character classes

Reply via email to