Module Name:    src
Committed By:   rillig
Date:           Sat May 11 16:12:28 UTC 2024

Modified Files:
        src/tests/usr.bin/xlint/lint1: c23.c
        src/usr.bin/xlint/lint1: cgram.y debug.c externs1.h lint1.h scan.l

Log Message:
lint: parse but otherwise ignore C23 attributes

The C23 attributes are only parsed before an expression in an expression
statement, as a proof of concept.  Other places will follow later.


To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/tests/usr.bin/xlint/lint1/c23.c
cvs rdiff -u -r1.499 -r1.500 src/usr.bin/xlint/lint1/cgram.y
cvs rdiff -u -r1.78 -r1.79 src/usr.bin/xlint/lint1/debug.c
cvs rdiff -u -r1.225 -r1.226 src/usr.bin/xlint/lint1/externs1.h
cvs rdiff -u -r1.226 -r1.227 src/usr.bin/xlint/lint1/lint1.h
cvs rdiff -u -r1.140 -r1.141 src/usr.bin/xlint/lint1/scan.l

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/tests/usr.bin/xlint/lint1/c23.c
diff -u src/tests/usr.bin/xlint/lint1/c23.c:1.12 src/tests/usr.bin/xlint/lint1/c23.c:1.13
--- src/tests/usr.bin/xlint/lint1/c23.c:1.12	Thu May  9 20:56:41 2024
+++ src/tests/usr.bin/xlint/lint1/c23.c	Sat May 11 16:12:28 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: c23.c,v 1.12 2024/05/09 20:56:41 rillig Exp $	*/
+/*	$NetBSD: c23.c,v 1.13 2024/05/11 16:12:28 rillig Exp $	*/
 # 3 "c23.c"
 
 // Tests for the option -Ac23, which allows features from C23 and all earlier
@@ -109,3 +109,57 @@ thread_local extern int extern_thread_lo
 static thread_local int static_thread_local_1;
 /* expect+1: warning: static variable 'static_thread_local_2' unused [226] */
 thread_local static int static_thread_local_2;
+
+
+int
+attributes(int i)
+{
+	// An attribute specifier list may be empty.
+	[[]]i++;
+
+	// There may be leading or trailing commas.
+	[[,]]i++;
+
+	// There may be arbitrary commas around or between the attributes.
+	[[,,,,,]]i++;
+
+	// An attribute may be a plain identifier without arguments.
+	[[identifier]]i++;
+
+	// The identifier may be prefixed with one additional identifier.
+	[[prefix::identifier]]i++;
+
+	// An attribute may have empty arguments.
+	[[identifier()]]i++;
+
+	// The arguments of an attribute may be arbitrary tokens.
+	[[identifier([])]]i++;
+
+	// The commas in this "argument list" are ordinary punctuator tokens,
+	// they do not separate any arguments.
+	// The structure of the attribute argument is:
+	//	1. empty balanced token sequence between '[' and ']'
+	//	2. token ','
+	//	3. empty balanced token sequence between '{' and '}'
+	//	4. token ','
+	//	5. empty balanced token sequence between '(' and ')'
+	[[identifier([], {}, ())]]i++;
+
+	// Inside an argument, parentheses may be nested.
+	[[identifier(((((())))))]]i++;
+	// Inside an argument, brackets may be nested.
+	[[identifier([[[[[]]]]])]]i++;
+	// Inside an argument, braces may be nested.
+	[[identifier({{{{{}}}}})]]i++;
+
+	// An attribute argument may contain arbitrary punctuation.
+	[[identifier(++++ ? ? ? : : :: )]]i++;
+
+	// An attribute argument may contain constants and string literals.
+	[[identifier(0, 0.0, "hello" " " "world")]]i++;
+
+	// There may be multiple attribute specifier sequences in a row.
+	[[]][[]][[]]i++;
+
+	return i;
+}

Index: src/usr.bin/xlint/lint1/cgram.y
diff -u src/usr.bin/xlint/lint1/cgram.y:1.499 src/usr.bin/xlint/lint1/cgram.y:1.500
--- src/usr.bin/xlint/lint1/cgram.y:1.499	Thu May  9 20:56:41 2024
+++ src/usr.bin/xlint/lint1/cgram.y	Sat May 11 16:12:28 2024
@@ -1,5 +1,5 @@
 %{
-/* $NetBSD: cgram.y,v 1.499 2024/05/09 20:56:41 rillig Exp $ */
+/* $NetBSD: cgram.y,v 1.500 2024/05/11 16:12:28 rillig Exp $ */
 
 /*
  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
@@ -35,7 +35,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__RCSID)
-__RCSID("$NetBSD: cgram.y,v 1.499 2024/05/09 20:56:41 rillig Exp $");
+__RCSID("$NetBSD: cgram.y,v 1.500 2024/05/11 16:12:28 rillig Exp $");
 #endif
 
 #include <limits.h>
@@ -69,6 +69,7 @@ static int saved_lwarn = LWARN_NOTHING_S
 
 static void cgram_declare(sym_t *, bool, sbuf_t *);
 static void read_until_rparen(void);
+static balanced_token_sequence read_balanced_token_sequence(void);
 static sym_t *symbolrename(sym_t *, sbuf_t *);
 
 
@@ -110,6 +111,42 @@ is_either(const char *s, const char *a, 
 	return strcmp(s, a) == 0 || strcmp(s, b) == 0;
 }
 
+static void
+attribute_list_add(attribute_list *list, attribute attr)
+{
+	if (list->len >= list->cap) {
+		attribute *old_attrs = list->attrs;
+		list->cap = 16 + 2 * list->cap;
+		list->attrs = block_zero_alloc(
+		    list->cap * sizeof(*list->attrs), "attribute_list.attrs");
+		memcpy(list->attrs, old_attrs,
+		    list->len * sizeof(*list->attrs));
+	}
+	list->attrs[list->len++] = attr;
+}
+
+static void
+attribute_list_add_all(attribute_list *dst, attribute_list src)
+{
+	for (size_t i = 0, n = src.len; i < n; i++)
+		attribute_list_add(dst, src.attrs[i]);
+}
+
+static attribute
+new_attribute(const sbuf_t *prefix, const sbuf_t *name,
+	      const balanced_token_sequence *arg)
+{
+	attribute attr = { .name = xstrdup(name->sb_name) };
+	if (prefix != NULL)
+		attr.prefix = xstrdup(prefix->sb_name);
+	if (arg != NULL) {
+		attr.arg = block_zero_alloc(sizeof(*attr.arg),
+		    "balanced_token_sequence");
+		*attr.arg = *arg;
+	}
+	return attr;
+}
+
 #if YYDEBUG && YYBYACC
 #define YYSTYPE_TOSTRING cgram_to_string
 #endif
@@ -141,6 +178,9 @@ is_either(const char *s, const char *a, 
 	bool	y_in_system_header;
 	designation y_designation;
 	named_constant y_named_constant;
+	attribute y_attribute;
+	attribute_list y_attribute_list;
+	balanced_token_sequence y_tokens;
 };
 
 /* for Bison:
@@ -231,6 +271,7 @@ is_either(const char *s, const char *a, 
 %token			T_COMMA
 %token			T_SEMI
 %token			T_ELLIPSIS
+%token			T_DCOLON
 %token			T_REAL
 %token			T_IMAG
 %token			T_GENERIC
@@ -388,6 +429,11 @@ is_either(const char *s, const char *a, 
 %type	<y_range>	range
 /* No type for init_lbrace. */
 /* No type for init_rbrace. */
+%type	<y_attribute_list>	attribute_specifier_sequence
+%type	<y_attribute_list>	attribute_specifier
+%type	<y_attribute_list>	attribute_list
+%type	<y_attribute>		attribute
+%type	<y_tokens>		attribute_argument_clause
 %type	<y_name>	asm_or_symbolrename_opt
 /* No type for statement. */
 /* No type for no_attr_statement. */
@@ -1854,27 +1900,71 @@ init_rbrace:			/* helper */
 	}
 ;
 
-/* TODO: Implement 'attribute_specifier_sequence' from C23 6.7.13.2. */
-
-/* TODO: Implement 'attribute_specifier' from C23 6.7.13.2. */
-
-/* TODO: Implement 'attribute_list' from C23 6.7.13.2. */
-
-/* TODO: Implement 'attribute' from C23 6.7.13.2. */
-
-/* TODO: Implement 'attribute_token' from C23 6.7.13.2. */
+/* C23 6.7.13.2 */
+attribute_specifier_sequence:
+	attribute_specifier {
+		$$ = (attribute_list) { NULL, 0, 0 };
+		attribute_list_add_all(&$$, $1);
+	}
+|	attribute_specifier_sequence attribute_specifier {
+		$$ = $1;
+		attribute_list_add_all(&$$, $2);
+	}
+;
 
-/* TODO: Implement 'standard_attribute' from C23 6.7.13.2. */
+/* C23 6.7.13.2 */
+attribute_specifier:
+	T_LBRACK T_LBRACK attribute_list T_RBRACK T_RBRACK {
+		$$ = $3;
+	}
+;
 
-/* TODO: Implement 'attribute_prefixed_token' from C23 6.7.13.2. */
+/* C23 6.7.13.2 */
+attribute_list:
+	/* empty */ {
+		$$ = (attribute_list) { NULL, 0, 0 };
+	}
+|	attribute {
+		$$ = (attribute_list) { NULL, 0, 0 };
+		attribute_list_add(&$$, $1);
+	}
+|	attribute_list T_COMMA
+|	attribute_list T_COMMA attribute {
+		$$ = $1;
+		attribute_list_add(&$$, $3);
+	}
+;
 
-/* TODO: Implement 'attribute_prefix' from C23 6.7.13.2. */
+/* C23 6.7.13.2 */
+attribute:
+	identifier {
+		$$ = new_attribute(NULL, $1, NULL);
+	}
+|	identifier T_DCOLON identifier {
+		$$ = new_attribute($1, $3, NULL);
+	}
+|	identifier attribute_argument_clause {
+		$$ = new_attribute(NULL, $1, &$2);
+	}
+|	identifier T_DCOLON identifier attribute_argument_clause {
+		$$ = new_attribute($1, $3, &$4);
+	}
+;
 
-/* TODO: Implement 'attribute_argument_clause' from C23 6.7.13.2. */
+/* The rule 'attribute_token' is inlined into 'attribute'. */
+/* The rule 'standard_attribute' is inlined into 'attribute_token'. */
+/* The rule 'attribute_prefixed_token' is inlined into 'attribute_token'. */
+/* The rule 'attribute_prefix' is inlined into 'attribute_token'. */
 
-/* TODO: Implement 'balanced_token_sequence' from C23 6.7.13.2. */
+/* C23 6.7.13.2 */
+attribute_argument_clause:
+	T_LPAREN {
+		$$ = read_balanced_token_sequence();
+	}
+;
 
-/* TODO: Implement 'balanced_token' from C23 6.7.13.2. */
+/* The rule 'balanced_token_sequence' is inlined into 'attribute_argument_clause'. */
+/* The rule 'balanced_token' is inlined into 'balanced_token_sequence'. */
 
 asm_or_symbolrename_opt:	/* GCC extensions */
 	/* empty */ {
@@ -2013,6 +2103,11 @@ expression_statement:
 		check_statement_reachable();
 		suppress_fallthrough = false;
 	}
+|	attribute_specifier_sequence expression T_SEMI {
+		debug_attribute_list(&$1);
+		expr($2, false, false, false, false);
+		suppress_fallthrough = false;
+	}
 ;
 
 /* C99 6.8.4, C23 6.8.5.1 */
@@ -2404,10 +2499,10 @@ yyerror(const char *msg)
 
 #if YYDEBUG && YYBYACC
 static const char *
-cgram_to_string(int token, YYSTYPE val)
+cgram_to_string(int tok, YYSTYPE val)
 {
 
-	switch (token) {
+	switch (tok) {
 	case T_INCDEC:
 		return val.y_inc ? "++" : "--";
 	case T_MULTIPLICATIVE:
@@ -2467,6 +2562,82 @@ read_until_rparen(void)
 	yyclearin;
 }
 
+static void
+fill_token(token *tok)
+{
+	switch (yychar) {
+	case T_NAME:
+	case T_TYPENAME:
+		tok->kind = TK_IDENTIFIER;
+		tok->u.identifier = xstrdup(yylval.y_name->sb_name);
+		break;
+	case T_CON:
+		tok->kind = TK_CONSTANT;
+		tok->u.constant = *yylval.y_val;
+		break;
+	case T_NAMED_CONSTANT:
+		tok->kind = TK_IDENTIFIER;
+		tok->u.identifier = xstrdup(yytext);
+		break;
+	case T_STRING:;
+		tok->kind = TK_STRING_LITERALS;
+		tok->u.string_literals.len = yylval.y_string->len;
+		tok->u.string_literals.cap = yylval.y_string->cap;
+		tok->u.string_literals.data = xstrdup(yylval.y_string->data);
+		break;
+	default:
+		tok->kind = TK_PUNCTUATOR;
+		tok->u.punctuator = xstrdup(yytext);
+	}
+}
+
+static void
+seq_reserve(balanced_token_sequence *seq)
+{
+	if (seq->len >= seq->cap) {
+		seq->cap = 16 + 2 * seq->cap;
+		const balanced_token *old_tokens = seq->tokens;
+		balanced_token *new_tokens = block_zero_alloc(
+		    seq->cap * sizeof(*seq->tokens), "balanced_tokens");
+		memcpy(new_tokens, old_tokens, seq->len * sizeof(*seq->tokens));
+		seq->tokens = new_tokens;
+	}
+}
+
+static balanced_token_sequence
+read_balanced(int opening)
+{
+	debug_enter();
+	int closing = opening == T_LPAREN ? T_RPAREN
+	    : opening == T_LBRACK ? T_RBRACK : T_RBRACE;
+	balanced_token_sequence seq = { NULL, 0, 0 };
+	debug_step("opening %d, closing %d", opening, closing);
+
+	while (yychar = yylex(), yychar > 0 && yychar != closing) {
+		debug_step("reading token %d", yychar);
+		seq_reserve(&seq);
+		if (yychar == T_LPAREN
+		    || yychar == T_LBRACK
+		    || yychar == T_LBRACE) {
+			seq.tokens[seq.len].kind = yychar == T_LPAREN ? '('
+			    : yychar == T_LBRACK ? '[' : '{';
+			seq.tokens[seq.len++].u.tokens = read_balanced(yychar);
+		} else
+			fill_token(&seq.tokens[seq.len++].u.token);
+	}
+	debug_leave();
+	return seq;
+}
+
+static balanced_token_sequence
+read_balanced_token_sequence(void)
+{
+	lint_assert(yychar < 0);
+	balanced_token_sequence seq = read_balanced(T_LPAREN);
+	yyclearin;
+	return seq;
+}
+
 static sym_t *
 symbolrename(sym_t *s, sbuf_t *sb)
 {

Index: src/usr.bin/xlint/lint1/debug.c
diff -u src/usr.bin/xlint/lint1/debug.c:1.78 src/usr.bin/xlint/lint1/debug.c:1.79
--- src/usr.bin/xlint/lint1/debug.c:1.78	Thu May  9 11:08:07 2024
+++ src/usr.bin/xlint/lint1/debug.c	Sat May 11 16:12:28 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: debug.c,v 1.78 2024/05/09 11:08:07 rillig Exp $ */
+/* $NetBSD: debug.c,v 1.79 2024/05/11 16:12:28 rillig Exp $ */
 
 /*-
  * Copyright (c) 2021 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__RCSID)
-__RCSID("$NetBSD: debug.c,v 1.78 2024/05/09 11:08:07 rillig Exp $");
+__RCSID("$NetBSD: debug.c,v 1.79 2024/05/11 16:12:28 rillig Exp $");
 #endif
 
 #include <stdlib.h>
@@ -526,4 +526,73 @@ debug_dcs_all(void)
 		debug_decl_level(dl);
 	}
 }
+
+static void
+debug_token(const token *tok)
+{
+	switch (tok->kind) {
+	case TK_IDENTIFIER:
+		debug_printf("%s", tok->u.identifier);
+		break;
+	case TK_CONSTANT:;
+		val_t c = tok->u.constant;
+		tspec_t t = c.v_tspec;
+		if (is_floating(t))
+			debug_printf("%Lg", c.u.floating);
+		else if (is_uinteger(t))
+			debug_printf("%llu", (unsigned long long)c.u.integer);
+		else if (is_integer(t))
+			debug_printf("%lld", (long long)c.u.integer);
+		else {
+			lint_assert(t == BOOL);
+			debug_printf("%s",
+			    c.u.integer != 0 ? "true" : "false");
+		}
+		break;
+	case TK_STRING_LITERALS:
+		debug_printf("%s", tok->u.string_literals.data);
+		break;
+	case TK_PUNCTUATOR:
+		debug_printf("%s", tok->u.punctuator);
+		break;
+	}
+}
+
+static void
+debug_balanced_token_sequence(const balanced_token_sequence *seq)
+{
+	const char *sep = "";
+	for (size_t i = 0, n = seq->len; i < n; i++) {
+		const balanced_token *tok = seq->tokens + i;
+		if (tok->kind != '\0') {
+			debug_printf("%s%c", sep, tok->kind);
+			debug_balanced_token_sequence(&tok->u.tokens);
+			debug_printf("%c", tok->kind == '(' ? ')'
+			    : tok->kind == '[' ? ']' : '}');
+		} else {
+			debug_printf("%s", sep);
+			debug_token(&tok->u.token);
+		}
+		sep = " ";
+	}
+}
+
+void
+debug_attribute_list(const attribute_list *list)
+{
+	for (size_t i = 0, n = list->len; i < n; i++) {
+		const attribute *attr = list->attrs + i;
+		debug_printf("attribute [[");
+		if (attr->prefix != NULL)
+			debug_printf("%s::", attr->prefix);
+		debug_printf("%s", attr->name);
+		if (attr->arg != NULL) {
+			debug_printf("(");
+			debug_balanced_token_sequence(attr->arg);
+			debug_printf(")");
+		}
+		debug_step("]]");
+	}
+}
+
 #endif

Index: src/usr.bin/xlint/lint1/externs1.h
diff -u src/usr.bin/xlint/lint1/externs1.h:1.225 src/usr.bin/xlint/lint1/externs1.h:1.226
--- src/usr.bin/xlint/lint1/externs1.h:1.225	Thu May  9 11:08:07 2024
+++ src/usr.bin/xlint/lint1/externs1.h	Sat May 11 16:12:28 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: externs1.h,v 1.225 2024/05/09 11:08:07 rillig Exp $	*/
+/*	$NetBSD: externs1.h,v 1.226 2024/05/11 16:12:28 rillig Exp $	*/
 
 /*
  * Copyright (c) 1994, 1995 Jochen Pohl
@@ -152,6 +152,7 @@ void debug_pop_indented(bool);
 void debug_enter_func(const char *);
 void debug_step(const char *fmt, ...) __printflike(1, 2);
 void debug_leave_func(const char *);
+void debug_attribute_list(const attribute_list *);
 #define	debug_enter()		debug_enter_func(__func__)
 #define	debug_leave()		debug_leave_func(__func__)
 #else
@@ -171,6 +172,7 @@ void debug_leave_func(const char *);
 #define	debug_enter()		debug_noop()
 #define	debug_step(...)		debug_noop()
 #define	debug_leave()		debug_noop()
+#define	debug_attribute_list(list) debug_noop()
 #endif
 
 /*

Index: src/usr.bin/xlint/lint1/lint1.h
diff -u src/usr.bin/xlint/lint1/lint1.h:1.226 src/usr.bin/xlint/lint1/lint1.h:1.227
--- src/usr.bin/xlint/lint1/lint1.h:1.226	Thu May  9 11:08:07 2024
+++ src/usr.bin/xlint/lint1/lint1.h	Sat May 11 16:12:28 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: lint1.h,v 1.226 2024/05/09 11:08:07 rillig Exp $ */
+/* $NetBSD: lint1.h,v 1.227 2024/05/11 16:12:28 rillig Exp $ */
 
 /*
  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
@@ -513,6 +513,52 @@ typedef struct {
 	bool unescaped_newline;	/* stops iterating */
 } quoted_iterator;
 
+typedef enum {
+	TK_IDENTIFIER,
+	TK_CONSTANT,
+	TK_STRING_LITERALS,
+	TK_PUNCTUATOR,
+} token_kind;
+
+typedef struct token {
+	token_kind kind;
+	union {
+		const char *identifier;
+		val_t constant;
+		buffer string_literals;
+		const char *punctuator;
+	} u;
+} token;
+
+typedef struct balanced_token_sequence balanced_token_sequence;
+typedef struct balanced_token balanced_token;
+
+struct balanced_token_sequence {
+	balanced_token *tokens;
+	size_t len;
+	size_t cap;
+};
+
+struct balanced_token {
+	char kind;	// '\0', '(', '[', '{'
+	union {
+		token token;
+		balanced_token_sequence tokens;
+	} u;
+};
+
+typedef struct {
+	const char *prefix;
+	const char *name;
+	balanced_token_sequence *arg;
+} attribute;
+
+typedef struct {
+	attribute *attrs;
+	size_t len;
+	size_t cap;
+} attribute_list;
+
 #include "externs1.h"
 
 #define lint_assert(cond)						\

Index: src/usr.bin/xlint/lint1/scan.l
diff -u src/usr.bin/xlint/lint1/scan.l:1.140 src/usr.bin/xlint/lint1/scan.l:1.141
--- src/usr.bin/xlint/lint1/scan.l:1.140	Thu Sep 14 22:20:08 2023
+++ src/usr.bin/xlint/lint1/scan.l	Sat May 11 16:12:28 2024
@@ -1,5 +1,5 @@
 %{
-/* $NetBSD: scan.l,v 1.140 2023/09/14 22:20:08 rillig Exp $ */
+/* $NetBSD: scan.l,v 1.141 2024/05/11 16:12:28 rillig Exp $ */
 
 /*
  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
@@ -35,7 +35,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__RCSID)
-__RCSID("$NetBSD: scan.l,v 1.140 2023/09/14 22:20:08 rillig Exp $");
+__RCSID("$NetBSD: scan.l,v 1.141 2024/05/11 16:12:28 rillig Exp $");
 #endif
 
 #include "lint1.h"
@@ -111,6 +111,7 @@ FSUF	([fFlL]?[i]?)
 "("				return T_LPAREN;
 ")"				return T_RPAREN;
 "..."				return T_ELLIPSIS;
+"::"				return T_DCOLON;
 "'"				return lex_character_constant();
 "L'"				return lex_wide_character_constant();
 ^#.*$				lex_directive(yytext);

Reply via email to