Module Name: src Committed By: rillig Date: Fri Jun 9 16:23:43 UTC 2023
Modified Files: src/usr.bin/indent: indent.c indent.h Log Message: indent: group lexer symbols by topic, sort processing functions No functional change. To generate a diff of this commit: cvs rdiff -u -r1.346 -r1.347 src/usr.bin/indent/indent.c cvs rdiff -u -r1.181 -r1.182 src/usr.bin/indent/indent.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/indent/indent.c diff -u src/usr.bin/indent/indent.c:1.346 src/usr.bin/indent/indent.c:1.347 --- src/usr.bin/indent/indent.c:1.346 Fri Jun 9 11:22:31 2023 +++ src/usr.bin/indent/indent.c Fri Jun 9 16:23:43 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.c,v 1.346 2023/06/09 11:22:31 rillig Exp $ */ +/* $NetBSD: indent.c,v 1.347 2023/06/09 16:23:43 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: indent.c,v 1.346 2023/06/09 11:22:31 rillig Exp $"); +__RCSID("$NetBSD: indent.c,v 1.347 2023/06/09 16:23:43 rillig Exp $"); #include <sys/param.h> #include <err.h> @@ -185,6 +185,23 @@ init_globals(void) backup_suffix = suffix; } +static void +load_profiles(int argc, char **argv) +{ + const char *profile_name = NULL; + + for (int i = 1; i < argc; ++i) { + const char *arg = argv[i]; + + if (strcmp(arg, "-npro") == 0) + return; + if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0') + profile_name = arg + 2; + } + + load_profile_files(profile_name); +} + /* * Copy the input file to the backup file, then make the backup file the input * and the original input file the output. @@ -227,23 +244,6 @@ bakcopy(void) } static void -load_profiles(int argc, char **argv) -{ - const char *profile_name = NULL; - - for (int i = 1; i < argc; ++i) { - const char *arg = argv[i]; - - if (strcmp(arg, "-npro") == 0) - return; - if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0') - profile_name = arg + 2; - } - - load_profile_files(profile_name); -} - -static void parse_command_line(int argc, char **argv) { for (int i = 1; i < argc; ++i) { @@ -308,6 +308,44 @@ set_initial_indentation(void) } static void +maybe_break_line(lexer_symbol lsym) +{ + if (!ps.force_nl) + return; + if (lsym == lsym_semicolon) + return; + if (lsym == lsym_lbrace && opt.brace_same_line + && ps.prev_lsym != lsym_lbrace) + return; + + output_line(); + ps.force_nl = false; +} + +static void +move_com_to_code(lexer_symbol lsym) +{ + if (ps.want_blank) + buf_add_char(&code, ' '); + buf_add_buf(&code, &com); + com.len = 0; + ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket; +} + +static void +update_ps_lbrace_kind(lexer_symbol lsym) +{ + if (lsym == lsym_tag) { + ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct : + token.s[0] == 'u' ? psym_lbrace_union : + psym_lbrace_enum; + } else if (lsym != lsym_type_outside_parentheses + && lsym != lsym_word + && lsym != lsym_lbrace) + ps.lbrace_kind = psym_lbrace_block; +} + +static void indent_declarator(int decl_ind, bool tabs_to_var) { int base = ps.ind_level * opt.indent_size; @@ -329,17 +367,14 @@ indent_declarator(int decl_ind, bool tab ps.decl_indent_done = true; } -static void -update_ps_lbrace_kind(lexer_symbol lsym) +static bool +is_function_pointer_declaration(void) { - if (lsym == lsym_tag) { - ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct : - token.s[0] == 'u' ? psym_lbrace_union : - psym_lbrace_enum; - } else if (lsym != lsym_type_outside_parentheses - && lsym != lsym_word - && lsym != lsym_lbrace) - ps.lbrace_kind = psym_lbrace_block; + return ps.in_decl + && !ps.block_init + && !ps.decl_indent_done + && !ps.is_function_definition + && ps.line_start_nparen == 0; } static int @@ -359,29 +394,90 @@ process_eof(void) return found_err ? EXIT_FAILURE : EXIT_SUCCESS; } +/* move the whole line to the 'label' buffer */ static void -maybe_break_line(lexer_symbol lsym) +read_preprocessing_line(void) { - if (!ps.force_nl) - return; - if (lsym == lsym_semicolon) - return; - if (lsym == lsym_lbrace && opt.brace_same_line - && ps.prev_lsym != lsym_lbrace) - return; + enum { + PLAIN, STR, CHR, COMM + } state = PLAIN; - output_line(); - ps.force_nl = false; + buf_add_char(&lab, '#'); + + while (inp_p[0] != '\n' || (state == COMM && !had_eof)) { + buf_add_char(&lab, inp_next()); + switch (lab.s[lab.len - 1]) { + case '\\': + if (state != COMM) + buf_add_char(&lab, inp_next()); + break; + case '/': + if (inp_p[0] == '*' && state == PLAIN) { + state = COMM; + buf_add_char(&lab, *inp_p++); + } + break; + case '"': + if (state == STR) + state = PLAIN; + else if (state == PLAIN) + state = STR; + break; + case '\'': + if (state == CHR) + state = PLAIN; + else if (state == PLAIN) + state = CHR; + break; + case '*': + if (inp_p[0] == '/' && state == COMM) { + state = PLAIN; + buf_add_char(&lab, *inp_p++); + } + break; + } + } + + while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1])) + lab.len--; } static void -move_com_to_code(lexer_symbol lsym) +process_preprocessing(void) { - if (ps.want_blank) - buf_add_char(&code, ' '); - buf_add_buf(&code, &com); - com.len = 0; - ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket; + if (lab.len > 0 || code.len > 0 || com.len > 0) + output_line(); + + read_preprocessing_line(); + + const char *dir = lab.s + 1, *line_end = lab.s + lab.len; + while (dir < line_end && ch_isblank(*dir)) + dir++; + size_t dir_len = 0; + while (dir + dir_len < line_end && ch_isalpha(dir[dir_len])) + dir_len++; + + if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) { + if ((size_t)ifdef_level < array_length(state_stack)) + state_stack[ifdef_level++] = ps; + else + diag(1, "#if stack overflow"); + out.line_kind = lk_if; + + } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) { + if (ifdef_level <= 0) + diag(1, dir[2] == 'i' + ? "Unmatched #elif" : "Unmatched #else"); + else + ps = state_stack[ifdef_level - 1]; + + } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) { + if (ifdef_level <= 0) + diag(1, "Unmatched #endif"); + else + ifdef_level--; + out.line_kind = lk_endif; + } } static void @@ -406,16 +502,6 @@ stay_in_line: } static bool -is_function_pointer_declaration(void) -{ - return ps.in_decl - && !ps.block_init - && !ps.decl_indent_done - && !ps.is_function_definition - && ps.line_start_nparen == 0; -} - -static bool want_blank_before_lparen(void) { if (!ps.want_blank) @@ -477,28 +563,6 @@ process_lparen(void) } static void -process_lbracket(void) -{ - if (++ps.nparen == array_length(ps.paren)) { - diag(0, "Reached internal limit of %zu unclosed parentheses", - array_length(ps.paren)); - ps.nparen--; - } - - if (code.len > 0 - && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op)) - buf_add_char(&code, ' '); - ps.want_blank = false; - buf_add_char(&code, token.s[0]); - - int indent = ind_add(0, code.s, code.len); - - ps.paren[ps.nparen - 1].indent = indent; - ps.paren[ps.nparen - 1].cast = cast_no; - debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent); -} - -static void process_rparen(void) { if (ps.nparen == 0) { @@ -537,6 +601,28 @@ unbalanced: } static void +process_lbracket(void) +{ + if (++ps.nparen == array_length(ps.paren)) { + diag(0, "Reached internal limit of %zu unclosed parentheses", + array_length(ps.paren)); + ps.nparen--; + } + + if (code.len > 0 + && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op)) + buf_add_char(&code, ' '); + ps.want_blank = false; + buf_add_char(&code, token.s[0]); + + int indent = ind_add(0, code.s, code.len); + + ps.paren[ps.nparen - 1].indent = indent; + ps.paren[ps.nparen - 1].cast = cast_no; + debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent); +} + +static void process_rbracket(void) { if (ps.nparen == 0) { @@ -554,129 +640,15 @@ unbalanced: } static void -process_unary_op(void) +process_lbrace(void) { - if (is_function_pointer_declaration()) { - int ind = ps.decl_ind - (int)token.len; - indent_declarator(ind, ps.tabs_to_var); - ps.want_blank = false; - } else if ((token.s[0] == '+' || token.s[0] == '-') - && code.len > 0 && code.s[code.len - 1] == token.s[0]) - ps.want_blank = true; + if (ps.prev_lsym == lsym_rparen && ps.prev_paren_was_cast) { + ps.block_init = true; + ps.init_or_struct = true; + } - if (ps.want_blank) - buf_add_char(&code, ' '); - buf_add_buf(&code, &token); - ps.want_blank = false; -} - -static void -process_postfix_op(void) -{ - buf_add_buf(&code, &token); - ps.want_blank = true; -} - -static void -process_question(void) -{ - ps.quest_level++; - if (code.len == 0) { - ps.in_stmt_cont = true; - ps.in_stmt_or_decl = true; - ps.in_decl = false; - } -} - -static void -process_colon_question(void) -{ - if (code.len == 0) { - ps.in_stmt_cont = true; - ps.in_stmt_or_decl = true; - ps.in_decl = false; - } -} - -static void -process_colon_label(void) -{ - buf_add_buf(&lab, &code); - buf_add_char(&lab, ':'); - code.len = 0; - - if (ps.seen_case) - out.line_kind = lk_case_or_default; - ps.in_stmt_or_decl = false; - ps.force_nl = ps.seen_case; - ps.seen_case = false; - ps.want_blank = false; -} - -static void -process_colon_other(void) -{ - buf_add_char(&code, ':'); - ps.want_blank = false; -} - -static void -process_semicolon(void) -{ - if (out.line_kind == lk_stmt_head) - out.line_kind = lk_other; - if (ps.decl_level == 0) - ps.init_or_struct = false; - ps.seen_case = false; /* only needs to be reset on error */ - ps.quest_level = 0; /* only needs to be reset on error */ - if (ps.prev_lsym == lsym_rparen) - ps.in_func_def_params = false; - ps.block_init = false; - ps.block_init_level = 0; - ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no; - - if (ps.in_decl && code.len == 0 && !ps.block_init && - !ps.decl_indent_done && ps.line_start_nparen == 0) { - /* indent stray semicolons in declarations */ - indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); - } - - ps.in_decl = ps.decl_level > 0; /* if we were in a first level - * structure declaration before, we - * aren't anymore */ - - if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) { - /* There were unbalanced parentheses in the statement. It is a - * bit complicated, because the semicolon might be in a for - * statement. */ - diag(1, "Unbalanced parentheses"); - ps.nparen = 0; - if (ps.spaced_expr_psym != psym_0) { - parse(ps.spaced_expr_psym); - ps.spaced_expr_psym = psym_0; - } - } - buf_add_char(&code, ';'); - ps.want_blank = true; - ps.in_stmt_or_decl = ps.nparen > 0; - ps.decl_ind = 0; - - if (ps.spaced_expr_psym == psym_0) { - parse(psym_stmt); - ps.force_nl = true; - } -} - -static void -process_lbrace(void) -{ - if (ps.prev_lsym == lsym_rparen && ps.prev_paren_was_cast) { - ps.block_init = true; - ps.init_or_struct = true; - } - - if (out.line_kind == lk_stmt_head) - out.line_kind = lk_other; + if (out.line_kind == lk_stmt_head) + out.line_kind = lk_other; ps.in_stmt_or_decl = false; /* don't indent the {} */ @@ -779,29 +751,151 @@ process_rbrace(void) } static void -process_do(void) +process_period(void) { - ps.in_stmt_or_decl = false; - ps.in_decl = false; + if (code.len > 0 && code.s[code.len - 1] == ',') + buf_add_char(&code, ' '); + buf_add_char(&code, '.'); + ps.want_blank = false; +} - if (code.len > 0) - output_line(); +static void +process_unary_op(void) +{ + if (is_function_pointer_declaration()) { + int ind = ps.decl_ind - (int)token.len; + indent_declarator(ind, ps.tabs_to_var); + ps.want_blank = false; + } else if ((token.s[0] == '+' || token.s[0] == '-') + && code.len > 0 && code.s[code.len - 1] == token.s[0]) + ps.want_blank = true; - ps.force_nl = true; - parse(psym_do); + if (ps.want_blank) + buf_add_char(&code, ' '); + buf_add_buf(&code, &token); + ps.want_blank = false; } static void -process_else(void) +process_postfix_op(void) +{ + buf_add_buf(&code, &token); + ps.want_blank = true; +} + +static void +process_question(void) +{ + ps.quest_level++; + if (code.len == 0) { + ps.in_stmt_cont = true; + ps.in_stmt_or_decl = true; + ps.in_decl = false; + } +} + +static void +process_colon_question(void) +{ + if (code.len == 0) { + ps.in_stmt_cont = true; + ps.in_stmt_or_decl = true; + ps.in_decl = false; + } +} + +static void +process_comma(void) +{ + ps.want_blank = code.len > 0; /* only put blank after comma if comma + * does not start the line */ + + if (ps.in_decl && !ps.is_function_definition && !ps.block_init && + !ps.decl_indent_done && ps.line_start_nparen == 0) { + /* indent leading commas and not the actual identifiers */ + indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); + } + + buf_add_char(&code, ','); + + if (ps.nparen == 0) { + if (ps.block_init_level == 0) + ps.block_init = false; + int typical_varname_length = 8; + if (ps.break_after_comma && (opt.break_after_comma || + ind_add(compute_code_indent(), code.s, code.len) + >= opt.max_line_length - typical_varname_length)) + ps.force_nl = true; + } +} + +static void +process_colon_label(void) { + buf_add_buf(&lab, &code); + buf_add_char(&lab, ':'); + code.len = 0; + + if (ps.seen_case) + out.line_kind = lk_case_or_default; ps.in_stmt_or_decl = false; + ps.force_nl = ps.seen_case; + ps.seen_case = false; + ps.want_blank = false; +} - if (code.len > 0 - && !(opt.cuddle_else && code.s[code.len - 1] == '}')) - output_line(); +static void +process_colon_other(void) +{ + buf_add_char(&code, ':'); + ps.want_blank = false; +} - ps.force_nl = true; - parse(psym_else); +static void +process_semicolon(void) +{ + if (out.line_kind == lk_stmt_head) + out.line_kind = lk_other; + if (ps.decl_level == 0) + ps.init_or_struct = false; + ps.seen_case = false; /* only needs to be reset on error */ + ps.quest_level = 0; /* only needs to be reset on error */ + if (ps.prev_lsym == lsym_rparen) + ps.in_func_def_params = false; + ps.block_init = false; + ps.block_init_level = 0; + ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no; + + if (ps.in_decl && code.len == 0 && !ps.block_init && + !ps.decl_indent_done && ps.line_start_nparen == 0) { + /* indent stray semicolons in declarations */ + indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); + } + + ps.in_decl = ps.decl_level > 0; /* if we were in a first level + * structure declaration before, we + * aren't anymore */ + + if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) { + /* There were unbalanced parentheses in the statement. It is a + * bit complicated, because the semicolon might be in a for + * statement. */ + diag(1, "Unbalanced parentheses"); + ps.nparen = 0; + if (ps.spaced_expr_psym != psym_0) { + parse(ps.spaced_expr_psym); + ps.spaced_expr_psym = psym_0; + } + } + buf_add_char(&code, ';'); + ps.want_blank = true; + ps.in_stmt_or_decl = ps.nparen > 0; + ps.decl_ind = 0; + + if (ps.spaced_expr_psym == psym_0) { + parse(psym_stmt); + ps.force_nl = true; + } } static void @@ -862,130 +956,36 @@ process_ident(lexer_symbol lsym) } static void -process_period(void) -{ - if (code.len > 0 && code.s[code.len - 1] == ',') - buf_add_char(&code, ' '); - buf_add_char(&code, '.'); - ps.want_blank = false; -} - -static void -process_comma(void) +process_do(void) { - ps.want_blank = code.len > 0; /* only put blank after comma if comma - * does not start the line */ - - if (ps.in_decl && !ps.is_function_definition && !ps.block_init && - !ps.decl_indent_done && ps.line_start_nparen == 0) { - /* indent leading commas and not the actual identifiers */ - indent_declarator(ps.decl_ind - 1, ps.tabs_to_var); - } + ps.in_stmt_or_decl = false; + ps.in_decl = false; - buf_add_char(&code, ','); + if (code.len > 0) + output_line(); - if (ps.nparen == 0) { - if (ps.block_init_level == 0) - ps.block_init = false; - int typical_varname_length = 8; - if (ps.break_after_comma && (opt.break_after_comma || - ind_add(compute_code_indent(), code.s, code.len) - >= opt.max_line_length - typical_varname_length)) - ps.force_nl = true; - } + ps.force_nl = true; + parse(psym_do); } -/* move the whole line to the 'label' buffer */ static void -read_preprocessing_line(void) +process_else(void) { - enum { - PLAIN, STR, CHR, COMM - } state = PLAIN; - - buf_add_char(&lab, '#'); - - while (inp_p[0] != '\n' || (state == COMM && !had_eof)) { - buf_add_char(&lab, inp_next()); - switch (lab.s[lab.len - 1]) { - case '\\': - if (state != COMM) - buf_add_char(&lab, inp_next()); - break; - case '/': - if (inp_p[0] == '*' && state == PLAIN) { - state = COMM; - buf_add_char(&lab, *inp_p++); - } - break; - case '"': - if (state == STR) - state = PLAIN; - else if (state == PLAIN) - state = STR; - break; - case '\'': - if (state == CHR) - state = PLAIN; - else if (state == PLAIN) - state = CHR; - break; - case '*': - if (inp_p[0] == '/' && state == COMM) { - state = PLAIN; - buf_add_char(&lab, *inp_p++); - } - break; - } - } - - while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1])) - lab.len--; -} + ps.in_stmt_or_decl = false; -static void -process_preprocessing(void) -{ - if (lab.len > 0 || code.len > 0 || com.len > 0) + if (code.len > 0 + && !(opt.cuddle_else && code.s[code.len - 1] == '}')) output_line(); - read_preprocessing_line(); - - const char *dir = lab.s + 1, *line_end = lab.s + lab.len; - while (dir < line_end && ch_isblank(*dir)) - dir++; - size_t dir_len = 0; - while (dir + dir_len < line_end && ch_isalpha(dir[dir_len])) - dir_len++; - - if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) { - if ((size_t)ifdef_level < array_length(state_stack)) - state_stack[ifdef_level++] = ps; - else - diag(1, "#if stack overflow"); - out.line_kind = lk_if; - - } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) { - if (ifdef_level <= 0) - diag(1, dir[2] == 'i' - ? "Unmatched #elif" : "Unmatched #else"); - else - ps = state_stack[ifdef_level - 1]; - - } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) { - if (ifdef_level <= 0) - diag(1, "Unmatched #endif"); - else - ifdef_level--; - out.line_kind = lk_endif; - } + ps.force_nl = true; + parse(psym_else); } static void process_lsym(lexer_symbol lsym) { switch (lsym) { - /* INDENT OFF */ + /* INDENT OFF */ case lsym_preprocessing: process_preprocessing(); break; case lsym_newline: process_newline(); break; case lsym_comment: process_comment(); break; @@ -1015,7 +1015,7 @@ process_lsym(lexer_symbol lsym) case lsym_if: ps.spaced_expr_psym = psym_if_expr; goto copy_token; case lsym_switch: ps.spaced_expr_psym = psym_switch_expr; goto copy_token; case lsym_while: ps.spaced_expr_psym = psym_while_expr; goto copy_token; - /* INDENT ON */ + /* INDENT ON */ case lsym_tag: if (ps.nparen > 0) Index: src/usr.bin/indent/indent.h diff -u src/usr.bin/indent/indent.h:1.181 src/usr.bin/indent/indent.h:1.182 --- src/usr.bin/indent/indent.h:1.181 Fri Jun 9 10:24:55 2023 +++ src/usr.bin/indent/indent.h Fri Jun 9 16:23:43 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.h,v 1.181 2023/06/09 10:24:55 rillig Exp $ */ +/* $NetBSD: indent.h,v 1.182 2023/06/09 16:23:43 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD @@ -73,36 +73,40 @@ typedef enum lexer_symbol { lsym_preprocessing, /* the initial '#' of a preprocessing line */ lsym_newline, lsym_comment, /* the initial '/ *' or '//' of a comment */ + lsym_lparen, - lsym_lbracket, lsym_rparen, + lsym_lbracket, lsym_rbracket, lsym_lbrace, lsym_rbrace, + lsym_period, lsym_unary_op, /* e.g. '*', '&', '-' or leading '++' */ + lsym_sizeof, + lsym_offsetof, lsym_postfix_op, /* trailing '++' or '--' */ lsym_binary_op, /* e.g. '*', '&', '<<', '&&' or '/=' */ lsym_question, /* the '?' from a '?:' expression */ lsym_colon_question, /* the ':' from a '?:' expression */ + lsym_comma, + + lsym_typedef, + lsym_modifier, /* modifiers for types, functions, variables */ + lsym_tag, /* 'struct', 'union' or 'enum' */ + lsym_type_outside_parentheses, + lsym_type_in_parentheses, + lsym_word, /* identifier, constant or string */ + lsym_funcname, /* name of a function being defined */ lsym_colon_label, /* the ':' after a label */ lsym_colon_other, /* bit-fields, generic-association (C11), * enum-type-specifier (C23), * attribute-prefixed-token (C23), * pp-prefixed-parameter (C23 6.10) */ - lsym_comma, lsym_semicolon, - lsym_typedef, - lsym_modifier, /* modifiers for types, functions, variables */ - lsym_type_outside_parentheses, - lsym_type_in_parentheses, - lsym_tag, /* 'struct', 'union' or 'enum' */ + lsym_case, lsym_default, - lsym_sizeof, - lsym_offsetof, - lsym_word, /* identifier, constant or string */ - lsym_funcname, /* name of a function being defined */ lsym_do, lsym_else, lsym_for,