Module Name:    src
Committed By:   rillig
Date:           Fri Nov 19 17:11:46 UTC 2021

Modified Files:
        src/usr.bin/indent: indent.c indent.h lexi.c pr_comment.c

Log Message:
indent: replace direct access to the input buffer

This is a preparation for abstracting away all the low-level details of
handling the input.  The goal is to fix the current bugs regarding line
number counting, out of bounds memory access, and generally unreadable
code.

No functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.221 -r1.222 src/usr.bin/indent/indent.c
cvs rdiff -u -r1.88 -r1.89 src/usr.bin/indent/indent.h
cvs rdiff -u -r1.140 -r1.141 src/usr.bin/indent/lexi.c
cvs rdiff -u -r1.118 -r1.119 src/usr.bin/indent/pr_comment.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/indent/indent.c
diff -u src/usr.bin/indent/indent.c:1.221 src/usr.bin/indent/indent.c:1.222
--- src/usr.bin/indent/indent.c:1.221	Fri Nov 19 15:34:25 2021
+++ src/usr.bin/indent/indent.c	Fri Nov 19 17:11:46 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.c,v 1.221 2021/11/19 15:34:25 rillig Exp $	*/
+/*	$NetBSD: indent.c,v 1.222 2021/11/19 17:11:46 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@ static char sccsid[] = "@(#)indent.c	5.1
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.221 2021/11/19 15:34:25 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.222 2021/11/19 17:11:46 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -344,9 +344,9 @@ search_stmt_lbrace(void)
 	 * will be moved into "the else's line", so if there was a newline
 	 * resulting from the "{" before, it must be scanned now and ignored.
 	 */
-	while (isspace((unsigned char)*inbuf.inp.s)) {
+	while (isspace((unsigned char)inp_peek())) {
 	    inp_skip();
-	    if (*inbuf.inp.s == '\n')
+	    if (inp_peek() == '\n')
 		break;
 	}
 	debug_inbuf(__func__);
@@ -443,7 +443,7 @@ search_stmt_lookahead(lexer_symbol *lsym
      * into the buffer so that the later lexi() call will read them.
      */
     if (inbuf.save_com_e != NULL) {
-	while (ch_isblank(*inbuf.inp.s))
+	while (ch_isblank(inp_peek()))
 	    save_com_add_char(inp_next());
 	debug_inbuf(__func__);
     }
@@ -1246,10 +1246,10 @@ read_preprocessing_line(void)
     state = PLAIN;
     int com_start = 0, com_end = 0;
 
-    while (ch_isblank(*inbuf.inp.s))
+    while (ch_isblank(inp_peek()))
 	inp_skip();
 
-    while (*inbuf.inp.s != '\n' || (state == COMM && !had_eof)) {
+    while (inp_peek() != '\n' || (state == COMM && !had_eof)) {
 	buf_reserve(&lab, 2);
 	*lab.e++ = inp_next();
 	switch (lab.e[-1]) {
@@ -1258,9 +1258,9 @@ read_preprocessing_line(void)
 		*lab.e++ = inp_next();
 	    break;
 	case '/':
-	    if (*inbuf.inp.s == '*' && state == PLAIN) {
+	    if (inp_peek() == '*' && state == PLAIN) {
 		state = COMM;
-		*lab.e++ = *inbuf.inp.s++;
+		*lab.e++ = inp_next();
 		com_start = (int)buf_len(&lab) - 2;
 	    }
 	    break;
@@ -1277,9 +1277,9 @@ read_preprocessing_line(void)
 		state = CHR;
 	    break;
 	case '*':
-	    if (*inbuf.inp.s == '/' && state == COMM) {
+	    if (inp_peek() == '/' && state == COMM) {
 		state = PLAIN;
-		*lab.e++ = *inbuf.inp.s++;
+		*lab.e++ = inp_next();
 		com_end = (int)buf_len(&lab);
 	    }
 	    break;

Index: src/usr.bin/indent/indent.h
diff -u src/usr.bin/indent/indent.h:1.88 src/usr.bin/indent/indent.h:1.89
--- src/usr.bin/indent/indent.h:1.88	Fri Nov 19 15:32:13 2021
+++ src/usr.bin/indent/indent.h	Fri Nov 19 17:11:46 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.h,v 1.88 2021/11/19 15:32:13 rillig Exp $	*/
+/*	$NetBSD: indent.h,v 1.89 2021/11/19 17:11:46 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -377,8 +377,10 @@ int compute_code_indent(void);
 int compute_label_indent(void);
 int ind_add(int, const char *, const char *);
 
+char inp_peek(void);
 void inp_skip(void);
 char inp_next(void);
+
 lexer_symbol lexi(void);
 void diag(int, const char *, ...)__printflike(2, 3);
 void dump_line(void);

Index: src/usr.bin/indent/lexi.c
diff -u src/usr.bin/indent/lexi.c:1.140 src/usr.bin/indent/lexi.c:1.141
--- src/usr.bin/indent/lexi.c:1.140	Fri Nov 19 15:28:32 2021
+++ src/usr.bin/indent/lexi.c	Fri Nov 19 17:11:46 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: lexi.c,v 1.140 2021/11/19 15:28:32 rillig Exp $	*/
+/*	$NetBSD: lexi.c,v 1.141 2021/11/19 17:11:46 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@ static char sccsid[] = "@(#)lexi.c	8.1 (
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: lexi.c,v 1.140 2021/11/19 15:28:32 rillig Exp $");
+__RCSID("$NetBSD: lexi.c,v 1.141 2021/11/19 17:11:46 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
 #endif
@@ -178,7 +178,7 @@ static const unsigned char lex_number_ro
     ['.'] = 15,
 };
 
-static char
+char
 inp_peek(void)
 {
     return *inbuf.inp.s;
@@ -352,7 +352,7 @@ static void
 lex_number(void)
 {
     for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
-	unsigned char ch = (unsigned char)*inbuf.inp.s;
+	unsigned char ch = (unsigned char)inp_peek();
 	if (ch >= array_length(lex_number_row) || lex_number_row[ch] == 0)
 	    break;
 
@@ -373,11 +373,11 @@ lex_number(void)
 static void
 lex_word(void)
 {
-    while (isalnum((unsigned char)*inbuf.inp.s) ||
-	    *inbuf.inp.s == '\\' ||
-	    *inbuf.inp.s == '_' || *inbuf.inp.s == '$') {
+    while (isalnum((unsigned char)inp_peek()) ||
+	    inp_peek() == '\\' ||
+	    inp_peek() == '_' || inp_peek() == '$') {
 
-	if (*inbuf.inp.s == '\\') {
+	if (inp_peek() == '\\') {
 	    if (inbuf.inp.s[1] == '\n') {
 		inbuf.inp.s += 2;
 		if (inbuf.inp.s >= inbuf.inp.e)
@@ -394,7 +394,7 @@ static void
 lex_char_or_string(void)
 {
     for (char delim = token.e[-1];;) {
-	if (*inbuf.inp.s == '\n') {
+	if (inp_peek() == '\n') {
 	    diag(1, "Unterminated literal");
 	    return;
 	}
@@ -404,7 +404,7 @@ lex_char_or_string(void)
 	    return;
 
 	if (token.e[-1] == '\\') {
-	    if (*inbuf.inp.s == '\n')
+	    if (inp_peek() == '\n')
 		++line_no;
 	    token_add_char(inp_next());
 	}
@@ -419,7 +419,7 @@ probably_typename(void)
 	return false;
     if (inbuf.inp.s[0] == '*' && inbuf.inp.s[1] != '=')
 	goto maybe;
-    if (isalpha((unsigned char)*inbuf.inp.s))
+    if (isalpha((unsigned char)inp_peek()))
 	goto maybe;
     return false;
 maybe:
@@ -468,11 +468,11 @@ cmp_keyword_by_name(const void *key, con
 static lexer_symbol
 lexi_alnum(void)
 {
-    if (isdigit((unsigned char)*inbuf.inp.s) ||
+    if (isdigit((unsigned char)inp_peek()) ||
 	    (inbuf.inp.s[0] == '.' && isdigit((unsigned char)inbuf.inp.s[1]))) {
 	lex_number();
-    } else if (isalnum((unsigned char)*inbuf.inp.s) ||
-	    *inbuf.inp.s == '_' || *inbuf.inp.s == '$') {
+    } else if (isalnum((unsigned char)inp_peek()) ||
+	    inp_peek() == '_' || inp_peek() == '$') {
 	lex_word();
     } else
 	return lsym_eof;	/* just as a placeholder */
@@ -480,7 +480,7 @@ lexi_alnum(void)
     *token.e = '\0';
 
     if (token.s[0] == 'L' && token.s[1] == '\0' &&
-	    (*inbuf.inp.s == '"' || *inbuf.inp.s == '\''))
+	    (inp_peek() == '"' || inp_peek() == '\''))
 	return lsym_string_prefix;
 
     while (ch_isblank(inp_peek()))
@@ -523,7 +523,7 @@ found_typename:
 	}
     }
 
-    if (*inbuf.inp.s == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
+    if (inp_peek() == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
 	!ps.in_parameter_declaration && !ps.block_init) {
 
 	for (const char *p = inbuf.inp.s; p < inbuf.inp.e;)
@@ -552,7 +552,7 @@ lexi(void)
     ps.curr_col_1 = ps.next_col_1;
     ps.next_col_1 = false;
 
-    while (ch_isblank(*inbuf.inp.s)) {
+    while (ch_isblank(inp_peek())) {
 	ps.curr_col_1 = false;
 	inp_skip();
     }
@@ -647,19 +647,19 @@ lexi(void)
 	lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
 	unary_delim = true;
 
-	if (*inbuf.inp.s == token.e[-1]) {	/* ++, -- */
-	    *token.e++ = *inbuf.inp.s++;
+	if (inp_peek() == token.e[-1]) {	/* ++, -- */
+	    *token.e++ = inp_next();
 	    if (ps.prev_token == lsym_word ||
 		    ps.prev_token == lsym_rparen_or_rbracket) {
 		lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op;
 		unary_delim = false;
 	    }
 
-	} else if (*inbuf.inp.s == '=') {	/* += */
-	    *token.e++ = *inbuf.inp.s++;
+	} else if (inp_peek() == '=') {	/* += */
+	    *token.e++ = inp_next();
 
-	} else if (*inbuf.inp.s == '>') {	/* -> */
-	    *token.e++ = *inbuf.inp.s++;
+	} else if (inp_peek() == '>') {	/* -> */
+	    *token.e++ = inp_next();
 	    unary_delim = false;
 	    lsym = lsym_unary_op;
 	    ps.want_blank = false;
@@ -669,8 +669,8 @@ lexi(void)
     case '=':
 	if (ps.init_or_struct)
 	    ps.block_init = true;
-	if (*inbuf.inp.s == '=') {	/* == */
-	    *token.e++ = *inbuf.inp.s++;
+	if (inp_peek() == '=') {	/* == */
+	    *token.e++ = inp_next();
 	    *token.e = '\0';
 	}
 	lsym = lsym_binary_op;
@@ -680,10 +680,10 @@ lexi(void)
     case '>':
     case '<':
     case '!':			/* ops like <, <<, <=, !=, etc */
-	if (*inbuf.inp.s == '>' || *inbuf.inp.s == '<' || *inbuf.inp.s == '=')
+	if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=')
+	    *token.e++ = inp_next();
+	if (inp_peek() == '=')
 	    *token.e++ = inp_next();
-	if (*inbuf.inp.s == '=')
-	    *token.e++ = *inbuf.inp.s++;
 	lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
 	unary_delim = true;
 	break;
@@ -691,14 +691,14 @@ lexi(void)
     case '*':
 	unary_delim = true;
 	if (!ps.next_unary) {
-	    if (*inbuf.inp.s == '=')
-		*token.e++ = *inbuf.inp.s++;
+	    if (inp_peek() == '=')
+		*token.e++ = inp_next();
 	    lsym = lsym_binary_op;
 	    break;
 	}
 
-	while (*inbuf.inp.s == '*' || isspace((unsigned char)*inbuf.inp.s)) {
-	    if (*inbuf.inp.s == '*')
+	while (inp_peek() == '*' || isspace((unsigned char)inp_peek())) {
+	    if (inp_peek() == '*')
 		token_add_char('*');
 	    inp_skip();
 	}
@@ -723,7 +723,7 @@ lexi(void)
 	break;
 
     default:
-	if (token.e[-1] == '/' && (*inbuf.inp.s == '*' || *inbuf.inp.s == '/')) {
+	if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) {
 	    *token.e++ = inp_next();
 	    lsym = lsym_comment;
 	    unary_delim = ps.next_unary;
@@ -731,7 +731,7 @@ lexi(void)
 	}
 
 	/* handle '||', '&&', etc., and also things as in 'int *****i' */
-	while (token.e[-1] == *inbuf.inp.s || *inbuf.inp.s == '=')
+	while (token.e[-1] == inp_peek() || inp_peek() == '=')
 	    token_add_char(inp_next());
 
 	lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;

Index: src/usr.bin/indent/pr_comment.c
diff -u src/usr.bin/indent/pr_comment.c:1.118 src/usr.bin/indent/pr_comment.c:1.119
--- src/usr.bin/indent/pr_comment.c:1.118	Fri Nov 19 15:32:13 2021
+++ src/usr.bin/indent/pr_comment.c	Fri Nov 19 17:11:46 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: pr_comment.c,v 1.118 2021/11/19 15:32:13 rillig Exp $	*/
+/*	$NetBSD: pr_comment.c,v 1.119 2021/11/19 17:11:46 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@ static char sccsid[] = "@(#)pr_comment.c
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: pr_comment.c,v 1.118 2021/11/19 15:32:13 rillig Exp $");
+__RCSID("$NetBSD: pr_comment.c,v 1.119 2021/11/19 17:11:46 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/pr_comment.c 334927 2018-06-10 16:44:18Z pstef $");
 #endif
@@ -117,9 +117,9 @@ analyze_comment(bool *p_may_wrap, bool *
 	com_ind = 0;
 
     } else {
-	if (*inbuf.inp.s == '-' || *inbuf.inp.s == '*' ||
+	if (inp_peek() == '-' || inp_peek() == '*' ||
 		token.e[-1] == '/' ||
-		(*inbuf.inp.s == '\n' && !opt.format_block_comments)) {
+		(inp_peek() == '\n' && !opt.format_block_comments)) {
 	    may_wrap = false;
 	    break_delim = false;
 	}
@@ -166,7 +166,7 @@ analyze_comment(bool *p_may_wrap, bool *
 	ps.n_comment_delta = -ind_add(0, start, inbuf.inp.s - 2);
     } else {
 	ps.n_comment_delta = 0;
-	while (ch_isblank(*inbuf.inp.s))
+	while (ch_isblank(inp_peek()))
 	    inbuf.inp.s++;
     }
 
@@ -175,7 +175,7 @@ analyze_comment(bool *p_may_wrap, bool *
     com_add_char(token.e[-1]);	/* either '*' or '/' */
 
     /* TODO: Maybe preserve a single '\t' as well. */
-    if (*inbuf.inp.s != ' ' && may_wrap)
+    if (inp_peek() != ' ' && may_wrap)
 	com_add_char(' ');
 
     if (break_delim && fits_in_one_line(adj_max_line_length))
@@ -208,13 +208,13 @@ copy_comment_wrap(int adj_max_line_lengt
     ssize_t last_blank = -1;	/* index of the last blank in com.buf */
 
     for (;;) {
-	switch (*inbuf.inp.s) {
+	switch (inp_peek()) {
 	case '\f':
 	    dump_line_ff();
 	    last_blank = -1;
 	    com_add_delim();
 	    inbuf.inp.s++;
-	    while (ch_isblank(*inbuf.inp.s))
+	    while (ch_isblank(inp_peek()))
 		inbuf.inp.s++;
 	    break;
 
@@ -248,19 +248,19 @@ copy_comment_wrap(int adj_max_line_lengt
 	    do {		/* flush any blanks and/or tabs at start of
 				 * next line */
 		inp_skip();
-		if (*inbuf.inp.s == '*' && skip_asterisk) {
+		if (inp_peek() == '*' && skip_asterisk) {
 		    skip_asterisk = false;
 		    inp_skip();
-		    if (*inbuf.inp.s == '/')
+		    if (inp_peek() == '/')
 			goto end_of_comment;
 		}
-	    } while (ch_isblank(*inbuf.inp.s));
+	    } while (ch_isblank(inp_peek()));
 
 	    break;		/* end of case for newline */
 
 	case '*':
 	    inp_skip();
-	    if (*inbuf.inp.s == '/') {
+	    if (inp_peek() == '/') {
 	end_of_comment:
 		inp_skip();
 
@@ -292,7 +292,7 @@ copy_comment_wrap(int adj_max_line_lengt
 		    last_blank = com.e - com.buf;
 		com_add_char(ch);
 		now_len++;
-		if (memchr("*\n\r\b\t", *inbuf.inp.s, 6) != NULL)
+		if (memchr("*\n\r\b\t", inp_peek(), 6) != NULL)
 		    break;
 		if (now_len >= adj_max_line_length && last_blank != -1)
 		    break;
@@ -328,7 +328,7 @@ static void
 copy_comment_nowrap(void)
 {
     for (;;) {
-	if (*inbuf.inp.s == '\n') {
+	if (inp_peek() == '\n') {
 	    if (token.e[-1] == '/')
 		goto finish;
 

Reply via email to