gbranden pushed a commit to branch master
in repository groff.
commit 8d25aa7190b7954757fa03b8e83fee852c00c799
Author: G. Branden Robinson <[email protected]>
AuthorDate: Thu Dec 25 17:32:04 2025 -0600
src/roff/troff/input.cpp: Trivially refactor (1).
Rename `get_copy()` to `read_char_in_copy_mode()`, because this function
advances the input stream pointer and because it changes the formatter's
lexing and parsing rules from the default to "copy mode", a fundamental
*roff concept since J. F. Ossanna transformed roff(1) into the macro
language nroff(1) with the introduction of macro definitions circa 1972.
(get_copy): Rename this...
(read_char_in_copy_mode): ...to this (decl and defn).
(read_char_in_copy_mode): Rename Boolean-valued argument
`handle_escape_E` to `handle_escaped_E`, to appease the ruthless
grammarian in my head.
(read_char_in_escape_sequence_parameter)
(has_arg)
(process_input_stack)
(decode_macro_call_arguments)
(decode_escape_sequence_arguments)
(read_request)
(do_define_string)
(define_character)
(do_define_macro)
(length_request)
(do_non_interpreted)
(device_request)
(output_request)
(tag)
(taga)
(do_terminal)
(do_write_request)
(abort_request)
(read_rest_of_line_as_argument): Update call sites.
---
ChangeLog | 34 ++++++++++
src/roff/troff/input.cpp | 163 ++++++++++++++++++++++++++---------------------
2 files changed, 126 insertions(+), 71 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 673d46de7..e162c235a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2025-12-25 G. Branden Robinson <[email protected]>
+
+ * src/roff/troff/input.cpp: Trivially refactor. Rename
+ `get_copy()` to `read_char_in_copy_mode()`, because this
+ function advances the input stream pointer and because it
+ changes the formatter's lexing and parsing rules from the
+ default to "copy mode", a fundamental *roff concept since J. F.
+ Ossanna transformed roff(1) into the macro language nroff(1)
+ with the introduction of macro definitions circa 1972.
+ (get_copy): Rename this...
+ (read_char_in_copy_mode): ...to this (decl and defn).
+ (read_char_in_copy_mode): Rename Boolean-valued argument
+ `handle_escape_E` to `handle_escaped_E`, to appease the ruthless
+ grammarian in my head.
+ (read_char_in_escape_sequence_parameter)
+ (has_arg)
+ (process_input_stack)
+ (decode_macro_call_arguments)
+ (decode_escape_sequence_arguments)
+ (read_request)
+ (do_define_string)
+ (define_character)
+ (do_define_macro)
+ (length_request)
+ (do_non_interpreted)
+ (device_request)
+ (output_request)
+ (tag)
+ (taga)
+ (do_terminal)
+ (do_write_request)
+ (abort_request)
+ (read_rest_of_line_as_argument): Update call sites.
+
2025-12-23 G. Branden Robinson <[email protected]>
* src/roff/troff/number.cpp (get_number_rigidly): Delete. It no
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 1f6a9734c..d75a6fafc 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -153,7 +153,9 @@ search_path *mac_path = &safer_macro_path;
// Initialize inclusion search path with only the current directory.
search_path include_search_path(0 /* nullptr */, 0 /* nullptr */, 0, 1);
-static int get_copy(node**, bool = false, bool = false);
+static int read_char_in_copy_mode(node ** /* nd; 0 to discard */,
+ bool /* is_defining */ = false,
+ bool /* handle_escaped_E */ = false);
static void copy_mode_error(const char *,
const errarg & = empty_errarg,
const errarg & = empty_errarg,
@@ -963,12 +965,13 @@ void shift()
skip_line();
}
+// TODO: return unsigned char (future: grochar)? We handle EOF here.
static char read_char_in_escape_sequence_parameter(bool allow_space
= false)
{
- int c = get_copy(0 /* nullptr */,
- false /* is defining */,
- true /* handle \E */);
+ int c = read_char_in_copy_mode(0 /* nullptr */,
+ false /* is_defining */,
+ true /* handle_escaped_E */);
switch (c) {
case EOF:
copy_mode_error("end of input in escape sequence");
@@ -1114,7 +1117,13 @@ static symbol read_increment_and_escape_parameter(int
*incp)
return symbol(buf);
}
-static int get_copy(node **nd, bool is_defining, bool handle_escape_E)
+// In copy mode, we don't tokenize normally; characters on the input
+// stream are typically read into the contents of an existing node (like
+// a string or macro definition), or discarded. A handful of escape
+// sequences (\n, etc.) interpolate as they do outside of copy mode.
+static int read_char_in_copy_mode(node **nd,
+ bool is_defining,
+ bool handle_escaped_E)
{
for (;;) {
int c = input_stack::get(nd);
@@ -1142,7 +1151,7 @@ static int get_copy(node **nd, bool is_defining, bool
handle_escape_E)
}
if (c == DOUBLE_QUOTE)
continue;
- if (c == ESCAPE_E && handle_escape_E)
+ if (c == ESCAPE_E && handle_escaped_E)
c = escape_char;
if (c == ESCAPE_NEWLINE) {
if (is_defining)
@@ -1200,7 +1209,7 @@ static int get_copy(node **nd, bool is_defining, bool
handle_escape_E)
return ESCAPE_e;
case 'E':
(void) input_stack::get(0 /* nullptr */);
- if (handle_escape_E)
+ if (handle_escaped_E)
goto again;
return ESCAPE_E;
case 'n':
@@ -2102,7 +2111,7 @@ bool has_arg(bool want_peek)
for (;;) {
c = input_stack::peek();
if (' ' == c)
- (void) get_copy(0 /* nullptr */);
+ (void) read_char_in_copy_mode(0 /* nullptr */);
else
break;
}
@@ -3551,7 +3560,7 @@ void process_input_stack()
int cc;
do {
node *n;
- cc = get_copy(&n);
+ cc = read_char_in_copy_mode(&n);
if (cc != EOF) {
if (cc != '\0')
curdiv->transparent_output(transparent_translate(cc));
@@ -4600,10 +4609,10 @@ static void decode_macro_call_arguments(macro_iterator
*mi)
{
if (!tok.is_newline() && !tok.is_eof()) {
node *n;
- int c = get_copy(&n);
+ int c = read_char_in_copy_mode(&n);
for (;;) {
while (c == ' ')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '\n' || c == EOF)
break;
macro arg;
@@ -4614,7 +4623,7 @@ static void decode_macro_call_arguments(macro_iterator
*mi)
if (c == '"') {
arg.append(DOUBLE_QUOTE);
quote_input_level = input_stack::get_level();
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
while (c != EOF && c != '\n'
&& !(c == ' ' && quote_input_level == 0)) {
@@ -4622,10 +4631,10 @@ static void decode_macro_call_arguments(macro_iterator
*mi)
&& (want_att_compat
|| input_stack::get_level() == quote_input_level)) {
arg.append(DOUBLE_QUOTE);
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '"') {
arg.append(c);
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
else
break;
@@ -4641,7 +4650,7 @@ static void decode_macro_call_arguments(macro_iterator
*mi)
}
arg.append(c);
}
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
}
arg.append(POP_GROFFCOMP_MODE);
@@ -4653,10 +4662,10 @@ static void decode_macro_call_arguments(macro_iterator
*mi)
static void decode_escape_sequence_arguments(macro_iterator *mi)
{
node *n;
- int c = get_copy(&n);
+ int c = read_char_in_copy_mode(&n);
for (;;) {
while (c == ' ')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '\n' || c == EOF) {
error("missing ']' in parameterized escape sequence");
break;
@@ -4668,17 +4677,17 @@ static void
decode_escape_sequence_arguments(macro_iterator *mi)
bool was_warned = false; // about an input tab character
if (c == '"') {
quote_input_level = input_stack::get_level();
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
while (c != EOF && c != '\n'
&& !(c == ']' && quote_input_level == 0)
&& !(c == ' ' && quote_input_level == 0)) {
if (quote_input_level > 0 && c == '"'
&& input_stack::get_level() == quote_input_level) {
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '"') {
arg.append(c);
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
else
break;
@@ -4695,7 +4704,7 @@ static void
decode_escape_sequence_arguments(macro_iterator *mi)
}
arg.append(c);
}
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
}
mi->add_arg(arg, (c == ' '));
@@ -4924,16 +4933,16 @@ void read_request()
int reading_from_terminal = isatty(fileno(stdin));
int had_prompt = 0;
if (has_arg(true /* peek */)) {
- int c = get_copy(0);
+ int c = read_char_in_copy_mode(0 /* nullptr */);
while (c == ' ')
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
while (c != EOF && c != '\n' && c != ' ') {
if (!is_invalid_input_char(c)) {
if (reading_from_terminal)
fputc(c, stderr);
had_prompt = 1;
}
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
}
if (c == ' ') {
tok.make_space();
@@ -4992,11 +5001,11 @@ void do_define_string(define_mode mode, comp_mode comp)
return;
}
else
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
while (c == ' ')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '"')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
macro mac;
request_or_macro *rm
= static_cast<request_or_macro *>(request_dictionary.lookup(nm));
@@ -5012,7 +5021,7 @@ void do_define_string(define_mode mode, comp_mode comp)
mac.append(n);
else
mac.append((unsigned char) c);
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
if (comp == COMP_DISABLE || comp == COMP_ENABLE)
mac.append(POP_GROFFCOMP_MODE);
@@ -5105,11 +5114,11 @@ void define_character(char_mode mode, const char
*font_name)
return;
}
else
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
while (c == ' ' || c == '\t')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '"')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
macro *m = new macro;
// Construct a macro from input characters; if the input character
// code is 0, we've read a node--append that.
@@ -5118,7 +5127,7 @@ void define_character(char_mode mode, const char
*font_name)
m->append(static_cast<unsigned char>(c));
else
m->append(n);
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
// Assign the macro to the character, discarding any previous macro.
m = ci->set_macro(m, mode);
@@ -5423,7 +5432,7 @@ void do_define_macro(define_mode mode, calling_mode
calling, comp_mode comp)
&start_lineno);
node *n;
// doing this here makes the line numbers come out right
- int c = get_copy(&n, true /* is defining*/);
+ int c = read_char_in_copy_mode(&n, true /* is_defining */);
macro mac;
macro *mm = 0 /* nullptr */;
if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND) {
@@ -5445,7 +5454,7 @@ void do_define_macro(define_mode mode, calling_mode
calling, comp_mode comp)
while (c == ESCAPE_NEWLINE) {
if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND)
mac.append(c);
- c = get_copy(&n, true /* is defining */);
+ c = read_char_in_copy_mode(&n, true /* is_defining */);
}
if (reading_beginning_of_input_line && c == '.') {
const char *s = term.contents();
@@ -5453,11 +5462,11 @@ void do_define_macro(define_mode mode, calling_mode
calling, comp_mode comp)
// see if it matches term
int i = 0;
if (s[0] != 0) {
- while ((d = get_copy(&n)) == ' ' || d == '\t')
+ while (((d = read_char_in_copy_mode(&n)) == ' ') || (d == '\t'))
;
if ((unsigned char) s[0] == d) {
for (i = 1; s[i] != 0; i++) {
- d = get_copy(&n);
+ d = read_char_in_copy_mode(&n);
if ((unsigned char) s[i] != d)
break;
}
@@ -5465,7 +5474,7 @@ void do_define_macro(define_mode mode, calling_mode
calling, comp_mode comp)
}
if (s[i] == 0
&& ((i == 2 && want_att_compat)
- || (d = get_copy(&n)) == ' '
+ || ((d = read_char_in_copy_mode(&n)) == ' ')
|| d == '\n')) { // we found it
if (d == '\n')
tok.make_newline();
@@ -5521,7 +5530,7 @@ void do_define_macro(define_mode mode, calling_mode
calling, comp_mode comp)
mac.append(c);
}
reading_beginning_of_input_line = (c == '\n');
- c = get_copy(&n, true /* is defining */);
+ c = read_char_in_copy_mode(&n, true /* is_defining */);
}
}
@@ -5869,15 +5878,15 @@ void length_request()
return;
}
else
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
while (c == ' ')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
if (c == '"')
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
int len = 0;
while (c != '\n' && c != EOF) {
++len;
- c = get_copy(&n);
+ c = read_char_in_copy_mode(&n);
}
reg *r = static_cast<reg *>(register_dictionary.lookup(ret));
if (r != 0 /* nullptr */)
@@ -6551,7 +6560,9 @@ static node *do_non_interpreted() // \?
node *n;
int c;
macro mac;
- while ((c = get_copy(&n)) != ESCAPE_QUESTION && c != EOF && c != '\n')
+ while (((c = read_char_in_copy_mode(&n)) != ESCAPE_QUESTION)
+ && (c != EOF)
+ && (c != '\n'))
if (c == 0)
mac.append(n);
else
@@ -6764,9 +6775,9 @@ static void device_request()
macro mac;
int c;
for (;;) {
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if ('"' == c) {
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
break;
}
if (c != ' ' && c != '\t')
@@ -6776,7 +6787,7 @@ static void device_request()
topdiv->begin_page();
for (;
(c != '\0') && (c != '\n') && (c != EOF);
- c = get_copy(0 /* nullptr */)) {
+ c = read_char_in_copy_mode(0 /* nullptr */)) {
// We may encounter some of the C0 and C1 character codes GNU troff
// uses for special purposes; see src/roff/troff/input.h. They
// produce nothing in grout. Warn only about the ones that are left
@@ -6792,7 +6803,7 @@ static void device_request()
else if (c != '\\')
mac.append(c);
else {
- int c1 = get_copy(0 /* nullptr */);
+ int c1 = read_char_in_copy_mode(0 /* nullptr */);
if (c1 != '[') {
mac.append(c);
mac.append(c1);
@@ -6816,9 +6827,9 @@ static void device_request()
// character escape sequence?
bool is_valid = false;
string sc = "";
- int c2 = get_copy(0 /* nullptr */);
+ int c2 = read_char_in_copy_mode(0 /* nullptr */);
for (; (c2 != '\0') && (c2 != '\n') && (c2 != EOF);
- c2 = get_copy(0 /* nullptr */)) {
+ c2 = read_char_in_copy_mode(0 /* nullptr */)) {
// XXX: `map_special_character_for_device_output()` will need
// the closing bracket in the iterator we construct, but a
// composite character mapping mustn't see it.
@@ -6877,15 +6888,17 @@ static void output_request()
}
int c;
for (;;) {
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if ('"' == c) {
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
break;
}
if (c != ' ' && c != '\t')
break;
}
- for (; c != '\n' && c != EOF; c = get_copy(0 /* nullptr */))
+ for (;
+ (c != '\n') && (c != EOF);
+ (c = read_char_in_copy_mode(0 /* nullptr */)))
topdiv->transparent_output(c);
topdiv->transparent_output('\n');
tok.next();
@@ -8064,16 +8077,18 @@ void tag()
string s;
int c;
for (;;) {
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if (c == '"') {
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
break;
}
if (c != ' ' && c != '\t')
break;
}
s = "x X ";
- for (; c != '\n' && c != EOF; c = get_copy(0))
+ for (;
+ (c != '\n') && (c != EOF);
+ (c = read_char_in_copy_mode(0 /* nullptr */)))
s += (char) c;
s += '\n';
curenv->add_node(new tag_node(s, 0));
@@ -8087,16 +8102,18 @@ void taga()
string s;
int c;
for (;;) {
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if (c == '"') {
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
break;
}
if (c != ' ' && c != '\t')
break;
}
s = "x X ";
- for (; c != '\n' && c != EOF; c = get_copy(0))
+ for (;
+ (c != '\n') && (c != EOF);
+ (c = read_char_in_copy_mode(0 /* nullptr */)))
s += (char) c;
s += '\n';
curenv->add_node(new tag_node(s, 1));
@@ -8111,15 +8128,17 @@ void do_terminal(int newline, int string_like)
if (has_arg(true /* peek */)) {
int c;
for (;;) {
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if (string_like && c == '"') {
- c = get_copy(0);
+ c = read_char_in_copy_mode(0 /* nullptr */);
break;
}
if (c != ' ' && c != '\t')
break;
}
- for (; c != '\n' && c != EOF; c = get_copy(0))
+ for (;
+ (c != '\n') && (c != EOF);
+ (c = read_char_in_copy_mode(0 /* nullptr */)))
fputs(asciify(c), stderr);
}
if (newline)
@@ -8348,14 +8367,14 @@ void do_write_request(int newline)
return;
}
if (has_arg(true /* peek */)) {
- int c = get_copy(0 /* nullptr */);
+ int c = read_char_in_copy_mode(0 /* nullptr */);
while (' ' == c)
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if ('"' == c)
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
while (c != '\n' && c != EOF) {
fputs(asciify(c), fp);
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
}
}
if (newline)
@@ -9251,11 +9270,13 @@ void abort_request()
else if (tok.is_newline())
c = '\n';
else {
- while ((c = get_copy(0)) == ' ')
+ while ((c = read_char_in_copy_mode(0 /* nullptr */)) == ' ')
;
}
if (!(c == EOF || c == '\n')) {
- for (; c != '\n' && c != EOF; c = get_copy(0))
+ for (;
+ (c != '\n') && (c != EOF);
+ (c = read_char_in_copy_mode(0 /* nullptr */)))
fputs(asciify(c), stderr);
fputc('\n', stderr);
}
@@ -9277,11 +9298,11 @@ char *read_rest_of_line_as_argument()
int buf_size = 256;
char *s = new char[buf_size]; // C++03: new char[buf_size]();
(void) memset(s, 0, (buf_size * sizeof(char)));
- int c = get_copy(0 /* nullptr */);
+ int c = read_char_in_copy_mode(0 /* nullptr */);
while (' ' == c)
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
if ('"' == c)
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
int i = 0;
while ((c != '\n') && (c != EOF)) {
if (!is_invalid_input_char(c)) {
@@ -9295,7 +9316,7 @@ char *read_rest_of_line_as_argument()
}
s[i++] = c;
}
- c = get_copy(0 /* nullptr */);
+ c = read_char_in_copy_mode(0 /* nullptr */);
}
s[i] = '\0';
if (0 == i) {
_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit