Module Name: src Committed By: rillig Date: Fri Nov 19 17:11:46 UTC 2021
Modified Files: src/usr.bin/indent: indent.c indent.h lexi.c pr_comment.c Log Message: indent: replace direct access to the input buffer This is a preparation for abstracting away all the low-level details of handling the input. The goal is to fix the current bugs regarding line number counting, out of bounds memory access, and generally unreadable code. No functional change. To generate a diff of this commit: cvs rdiff -u -r1.221 -r1.222 src/usr.bin/indent/indent.c cvs rdiff -u -r1.88 -r1.89 src/usr.bin/indent/indent.h cvs rdiff -u -r1.140 -r1.141 src/usr.bin/indent/lexi.c cvs rdiff -u -r1.118 -r1.119 src/usr.bin/indent/pr_comment.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/indent/indent.c diff -u src/usr.bin/indent/indent.c:1.221 src/usr.bin/indent/indent.c:1.222 --- src/usr.bin/indent/indent.c:1.221 Fri Nov 19 15:34:25 2021 +++ src/usr.bin/indent/indent.c Fri Nov 19 17:11:46 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.c,v 1.221 2021/11/19 15:34:25 rillig Exp $ */ +/* $NetBSD: indent.c,v 1.222 2021/11/19 17:11:46 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -43,7 +43,7 @@ static char sccsid[] = "@(#)indent.c 5.1 #include <sys/cdefs.h> #if defined(__NetBSD__) -__RCSID("$NetBSD: indent.c,v 1.221 2021/11/19 15:34:25 rillig Exp $"); +__RCSID("$NetBSD: indent.c,v 1.222 2021/11/19 17:11:46 rillig Exp $"); #elif defined(__FreeBSD__) __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $"); #endif @@ -344,9 +344,9 @@ search_stmt_lbrace(void) * will be moved into "the else's line", so if there was a newline * resulting from the "{" before, it must be scanned now and ignored. */ - while (isspace((unsigned char)*inbuf.inp.s)) { + while (isspace((unsigned char)inp_peek())) { inp_skip(); - if (*inbuf.inp.s == '\n') + if (inp_peek() == '\n') break; } debug_inbuf(__func__); @@ -443,7 +443,7 @@ search_stmt_lookahead(lexer_symbol *lsym * into the buffer so that the later lexi() call will read them. */ if (inbuf.save_com_e != NULL) { - while (ch_isblank(*inbuf.inp.s)) + while (ch_isblank(inp_peek())) save_com_add_char(inp_next()); debug_inbuf(__func__); } @@ -1246,10 +1246,10 @@ read_preprocessing_line(void) state = PLAIN; int com_start = 0, com_end = 0; - while (ch_isblank(*inbuf.inp.s)) + while (ch_isblank(inp_peek())) inp_skip(); - while (*inbuf.inp.s != '\n' || (state == COMM && !had_eof)) { + while (inp_peek() != '\n' || (state == COMM && !had_eof)) { buf_reserve(&lab, 2); *lab.e++ = inp_next(); switch (lab.e[-1]) { @@ -1258,9 +1258,9 @@ read_preprocessing_line(void) *lab.e++ = inp_next(); break; case '/': - if (*inbuf.inp.s == '*' && state == PLAIN) { + if (inp_peek() == '*' && state == PLAIN) { state = COMM; - *lab.e++ = *inbuf.inp.s++; + *lab.e++ = inp_next(); com_start = (int)buf_len(&lab) - 2; } break; @@ -1277,9 +1277,9 @@ read_preprocessing_line(void) state = CHR; break; case '*': - if (*inbuf.inp.s == '/' && state == COMM) { + if (inp_peek() == '/' && state == COMM) { state = PLAIN; - *lab.e++ = *inbuf.inp.s++; + *lab.e++ = inp_next(); com_end = (int)buf_len(&lab); } break; Index: src/usr.bin/indent/indent.h diff -u src/usr.bin/indent/indent.h:1.88 src/usr.bin/indent/indent.h:1.89 --- src/usr.bin/indent/indent.h:1.88 Fri Nov 19 15:32:13 2021 +++ src/usr.bin/indent/indent.h Fri Nov 19 17:11:46 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.h,v 1.88 2021/11/19 15:32:13 rillig Exp $ */ +/* $NetBSD: indent.h,v 1.89 2021/11/19 17:11:46 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD @@ -377,8 +377,10 @@ int compute_code_indent(void); int compute_label_indent(void); int ind_add(int, const char *, const char *); +char inp_peek(void); void inp_skip(void); char inp_next(void); + lexer_symbol lexi(void); void diag(int, const char *, ...)__printflike(2, 3); void dump_line(void); Index: src/usr.bin/indent/lexi.c diff -u src/usr.bin/indent/lexi.c:1.140 src/usr.bin/indent/lexi.c:1.141 --- src/usr.bin/indent/lexi.c:1.140 Fri Nov 19 15:28:32 2021 +++ src/usr.bin/indent/lexi.c Fri Nov 19 17:11:46 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: lexi.c,v 1.140 2021/11/19 15:28:32 rillig Exp $ */ +/* $NetBSD: lexi.c,v 1.141 2021/11/19 17:11:46 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -43,7 +43,7 @@ static char sccsid[] = "@(#)lexi.c 8.1 ( #include <sys/cdefs.h> #if defined(__NetBSD__) -__RCSID("$NetBSD: lexi.c,v 1.140 2021/11/19 15:28:32 rillig Exp $"); +__RCSID("$NetBSD: lexi.c,v 1.141 2021/11/19 17:11:46 rillig Exp $"); #elif defined(__FreeBSD__) __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $"); #endif @@ -178,7 +178,7 @@ static const unsigned char lex_number_ro ['.'] = 15, }; -static char +char inp_peek(void) { return *inbuf.inp.s; @@ -352,7 +352,7 @@ static void lex_number(void) { for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { - unsigned char ch = (unsigned char)*inbuf.inp.s; + unsigned char ch = (unsigned char)inp_peek(); if (ch >= array_length(lex_number_row) || lex_number_row[ch] == 0) break; @@ -373,11 +373,11 @@ lex_number(void) static void lex_word(void) { - while (isalnum((unsigned char)*inbuf.inp.s) || - *inbuf.inp.s == '\\' || - *inbuf.inp.s == '_' || *inbuf.inp.s == '$') { + while (isalnum((unsigned char)inp_peek()) || + inp_peek() == '\\' || + inp_peek() == '_' || inp_peek() == '$') { - if (*inbuf.inp.s == '\\') { + if (inp_peek() == '\\') { if (inbuf.inp.s[1] == '\n') { inbuf.inp.s += 2; if (inbuf.inp.s >= inbuf.inp.e) @@ -394,7 +394,7 @@ static void lex_char_or_string(void) { for (char delim = token.e[-1];;) { - if (*inbuf.inp.s == '\n') { + if (inp_peek() == '\n') { diag(1, "Unterminated literal"); return; } @@ -404,7 +404,7 @@ lex_char_or_string(void) return; if (token.e[-1] == '\\') { - if (*inbuf.inp.s == '\n') + if (inp_peek() == '\n') ++line_no; token_add_char(inp_next()); } @@ -419,7 +419,7 @@ probably_typename(void) return false; if (inbuf.inp.s[0] == '*' && inbuf.inp.s[1] != '=') goto maybe; - if (isalpha((unsigned char)*inbuf.inp.s)) + if (isalpha((unsigned char)inp_peek())) goto maybe; return false; maybe: @@ -468,11 +468,11 @@ cmp_keyword_by_name(const void *key, con static lexer_symbol lexi_alnum(void) { - if (isdigit((unsigned char)*inbuf.inp.s) || + if (isdigit((unsigned char)inp_peek()) || (inbuf.inp.s[0] == '.' && isdigit((unsigned char)inbuf.inp.s[1]))) { lex_number(); - } else if (isalnum((unsigned char)*inbuf.inp.s) || - *inbuf.inp.s == '_' || *inbuf.inp.s == '$') { + } else if (isalnum((unsigned char)inp_peek()) || + inp_peek() == '_' || inp_peek() == '$') { lex_word(); } else return lsym_eof; /* just as a placeholder */ @@ -480,7 +480,7 @@ lexi_alnum(void) *token.e = '\0'; if (token.s[0] == 'L' && token.s[1] == '\0' && - (*inbuf.inp.s == '"' || *inbuf.inp.s == '\'')) + (inp_peek() == '"' || inp_peek() == '\'')) return lsym_string_prefix; while (ch_isblank(inp_peek())) @@ -523,7 +523,7 @@ found_typename: } } - if (*inbuf.inp.s == '(' && ps.tos <= 1 && ps.ind_level == 0 && + if (inp_peek() == '(' && ps.tos <= 1 && ps.ind_level == 0 && !ps.in_parameter_declaration && !ps.block_init) { for (const char *p = inbuf.inp.s; p < inbuf.inp.e;) @@ -552,7 +552,7 @@ lexi(void) ps.curr_col_1 = ps.next_col_1; ps.next_col_1 = false; - while (ch_isblank(*inbuf.inp.s)) { + while (ch_isblank(inp_peek())) { ps.curr_col_1 = false; inp_skip(); } @@ -647,19 +647,19 @@ lexi(void) lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; unary_delim = true; - if (*inbuf.inp.s == token.e[-1]) { /* ++, -- */ - *token.e++ = *inbuf.inp.s++; + if (inp_peek() == token.e[-1]) { /* ++, -- */ + *token.e++ = inp_next(); if (ps.prev_token == lsym_word || ps.prev_token == lsym_rparen_or_rbracket) { lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op; unary_delim = false; } - } else if (*inbuf.inp.s == '=') { /* += */ - *token.e++ = *inbuf.inp.s++; + } else if (inp_peek() == '=') { /* += */ + *token.e++ = inp_next(); - } else if (*inbuf.inp.s == '>') { /* -> */ - *token.e++ = *inbuf.inp.s++; + } else if (inp_peek() == '>') { /* -> */ + *token.e++ = inp_next(); unary_delim = false; lsym = lsym_unary_op; ps.want_blank = false; @@ -669,8 +669,8 @@ lexi(void) case '=': if (ps.init_or_struct) ps.block_init = true; - if (*inbuf.inp.s == '=') { /* == */ - *token.e++ = *inbuf.inp.s++; + if (inp_peek() == '=') { /* == */ + *token.e++ = inp_next(); *token.e = '\0'; } lsym = lsym_binary_op; @@ -680,10 +680,10 @@ lexi(void) case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ - if (*inbuf.inp.s == '>' || *inbuf.inp.s == '<' || *inbuf.inp.s == '=') + if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=') + *token.e++ = inp_next(); + if (inp_peek() == '=') *token.e++ = inp_next(); - if (*inbuf.inp.s == '=') - *token.e++ = *inbuf.inp.s++; lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; unary_delim = true; break; @@ -691,14 +691,14 @@ lexi(void) case '*': unary_delim = true; if (!ps.next_unary) { - if (*inbuf.inp.s == '=') - *token.e++ = *inbuf.inp.s++; + if (inp_peek() == '=') + *token.e++ = inp_next(); lsym = lsym_binary_op; break; } - while (*inbuf.inp.s == '*' || isspace((unsigned char)*inbuf.inp.s)) { - if (*inbuf.inp.s == '*') + while (inp_peek() == '*' || isspace((unsigned char)inp_peek())) { + if (inp_peek() == '*') token_add_char('*'); inp_skip(); } @@ -723,7 +723,7 @@ lexi(void) break; default: - if (token.e[-1] == '/' && (*inbuf.inp.s == '*' || *inbuf.inp.s == '/')) { + if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) { *token.e++ = inp_next(); lsym = lsym_comment; unary_delim = ps.next_unary; @@ -731,7 +731,7 @@ lexi(void) } /* handle '||', '&&', etc., and also things as in 'int *****i' */ - while (token.e[-1] == *inbuf.inp.s || *inbuf.inp.s == '=') + while (token.e[-1] == inp_peek() || inp_peek() == '=') token_add_char(inp_next()); lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; Index: src/usr.bin/indent/pr_comment.c diff -u src/usr.bin/indent/pr_comment.c:1.118 src/usr.bin/indent/pr_comment.c:1.119 --- src/usr.bin/indent/pr_comment.c:1.118 Fri Nov 19 15:32:13 2021 +++ src/usr.bin/indent/pr_comment.c Fri Nov 19 17:11:46 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: pr_comment.c,v 1.118 2021/11/19 15:32:13 rillig Exp $ */ +/* $NetBSD: pr_comment.c,v 1.119 2021/11/19 17:11:46 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -43,7 +43,7 @@ static char sccsid[] = "@(#)pr_comment.c #include <sys/cdefs.h> #if defined(__NetBSD__) -__RCSID("$NetBSD: pr_comment.c,v 1.118 2021/11/19 15:32:13 rillig Exp $"); +__RCSID("$NetBSD: pr_comment.c,v 1.119 2021/11/19 17:11:46 rillig Exp $"); #elif defined(__FreeBSD__) __FBSDID("$FreeBSD: head/usr.bin/indent/pr_comment.c 334927 2018-06-10 16:44:18Z pstef $"); #endif @@ -117,9 +117,9 @@ analyze_comment(bool *p_may_wrap, bool * com_ind = 0; } else { - if (*inbuf.inp.s == '-' || *inbuf.inp.s == '*' || + if (inp_peek() == '-' || inp_peek() == '*' || token.e[-1] == '/' || - (*inbuf.inp.s == '\n' && !opt.format_block_comments)) { + (inp_peek() == '\n' && !opt.format_block_comments)) { may_wrap = false; break_delim = false; } @@ -166,7 +166,7 @@ analyze_comment(bool *p_may_wrap, bool * ps.n_comment_delta = -ind_add(0, start, inbuf.inp.s - 2); } else { ps.n_comment_delta = 0; - while (ch_isblank(*inbuf.inp.s)) + while (ch_isblank(inp_peek())) inbuf.inp.s++; } @@ -175,7 +175,7 @@ analyze_comment(bool *p_may_wrap, bool * com_add_char(token.e[-1]); /* either '*' or '/' */ /* TODO: Maybe preserve a single '\t' as well. */ - if (*inbuf.inp.s != ' ' && may_wrap) + if (inp_peek() != ' ' && may_wrap) com_add_char(' '); if (break_delim && fits_in_one_line(adj_max_line_length)) @@ -208,13 +208,13 @@ copy_comment_wrap(int adj_max_line_lengt ssize_t last_blank = -1; /* index of the last blank in com.buf */ for (;;) { - switch (*inbuf.inp.s) { + switch (inp_peek()) { case '\f': dump_line_ff(); last_blank = -1; com_add_delim(); inbuf.inp.s++; - while (ch_isblank(*inbuf.inp.s)) + while (ch_isblank(inp_peek())) inbuf.inp.s++; break; @@ -248,19 +248,19 @@ copy_comment_wrap(int adj_max_line_lengt do { /* flush any blanks and/or tabs at start of * next line */ inp_skip(); - if (*inbuf.inp.s == '*' && skip_asterisk) { + if (inp_peek() == '*' && skip_asterisk) { skip_asterisk = false; inp_skip(); - if (*inbuf.inp.s == '/') + if (inp_peek() == '/') goto end_of_comment; } - } while (ch_isblank(*inbuf.inp.s)); + } while (ch_isblank(inp_peek())); break; /* end of case for newline */ case '*': inp_skip(); - if (*inbuf.inp.s == '/') { + if (inp_peek() == '/') { end_of_comment: inp_skip(); @@ -292,7 +292,7 @@ copy_comment_wrap(int adj_max_line_lengt last_blank = com.e - com.buf; com_add_char(ch); now_len++; - if (memchr("*\n\r\b\t", *inbuf.inp.s, 6) != NULL) + if (memchr("*\n\r\b\t", inp_peek(), 6) != NULL) break; if (now_len >= adj_max_line_length && last_blank != -1) break; @@ -328,7 +328,7 @@ static void copy_comment_nowrap(void) { for (;;) { - if (*inbuf.inp.s == '\n') { + if (inp_peek() == '\n') { if (token.e[-1] == '/') goto finish;