================ @@ -6,17 +6,27 @@ import collections import re import os -from urllib.request import urlopen +CURRENT_DIR = os.path.dirname(__file__) -CLASS_INDEX_PAGE_URL = "https://clang.llvm.org/doxygen/classes.html" -try: - CLASS_INDEX_PAGE = urlopen(CLASS_INDEX_PAGE_URL).read().decode("utf-8") -except Exception as e: - CLASS_INDEX_PAGE = None - print("Unable to get %s: %s" % (CLASS_INDEX_PAGE_URL, e)) -CURRENT_DIR = os.path.dirname(__file__) +def _build_local_class_set(): + """Return the set of class names declared in clang/include/clang/AST/""" + classes = set() + ast_dir = os.path.join(CURRENT_DIR, "../../include/clang/AST") + for fname in os.listdir(ast_dir): + if not fname.endswith(".h"): + continue + try: + content = open(os.path.join(ast_dir, fname)).read() + except OSError: + continue + for m in re.finditer(r"\b(?:class|struct)\s+([A-Z][a-zA-Z0-9_]+)\b", content): ---------------- Endilll wrote:
> I'm not sure I understand what is "clang-format infra" I was referring to the fact that clang-format does some limited semantic analysis of the tokens to figure out if an identifier is e.g. a class name based on just raw text, without any buildsystem input: https://github.com/llvm/llvm-project/blob/359b47535c3f9def01f48d7d08c76a42b41d0645/clang/lib/Format/UnwrappedLineParser.cpp#L4250-L4254 If you were able to reuse it, then you'd be able to feed the header to clang-format, and then replace regex with much simpler matchers of clang-format-tokens. https://github.com/llvm/llvm-project/pull/203784 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
