================ @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +# A tool to automatically generate documentation for the config options of the +# clang static analyzer by reading `AnalyzerOptions.def`. + +import argparse +from collections import namedtuple +from enum import Enum, auto +import re +import sys +import textwrap + + +# The following code implements a trivial parser for the narrow subset of C++ +# which is used in AnalyzerOptions.def. This supports the following features: +# - ignores preprocessor directives, even if they are continued with \ at EOL +# - ignores comments: both /* ... */ and // ... +# - parses string literals (even if they contain \" escapes) +# - concatenates adjacent string literals +# - parses numbers even if they contain ' as a thousands separator +# - recognizes MACRO(arg1, arg2, ..., argN) calls + + +class TT(Enum): + "Token type enum." + number = auto() + ident = auto() + string = auto() + punct = auto() + + +TOKENS = [ + (re.compile(r"-?[0-9']+"), TT.number), + (re.compile(r"\w+"), TT.ident), + (re.compile(r'"([^\\"]|\\.)*"'), TT.string), + (re.compile(r"[(),]"), TT.punct), + (re.compile(r"/\*((?!\*/).)*\*/", re.S), None), # C-style comment + (re.compile(r"//.*\n"), None), # C++ style oneline comment + (re.compile(r"#.*(\\\n.*)*(?<!\\)\n"), None), # preprocessor directive + (re.compile(r"\s+"), None), # whitespace +] + +Token = namedtuple("Token", "kind code") + + +def report_unexpected(s, pos): + lines = (s[:pos] + "X").split("\n") + lineno, col = (len(lines), len(lines[-1])) + print( + "unexpected character %r in AnalyzerOptions.def at line %d column %d" + % (s[pos], lineno, col), + file=sys.stderr, + ) + + +def tokenize(s): + result = [] + pos = 0 + while pos < len(s): + for regex, kind in TOKENS: + if m := regex.match(s, pos): + if kind is not None: + result.append(Token(kind, m.group(0))) + pos = m.end() + break + else: + report_unexpected(s, pos) + pos += 1 + return result ---------------- steakhal wrote:
Do you intentionally tokenize yourself? I figured Python had dozens of libraries doing this for us. Did you try to avoid 3rd party deps by this? https://github.com/llvm/llvm-project/pull/135169 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits