================
@@ -6,17 +6,27 @@
 import collections
 import re
 import os
-from urllib.request import urlopen
 
+CURRENT_DIR = os.path.dirname(__file__)
 
-CLASS_INDEX_PAGE_URL = "https://clang.llvm.org/doxygen/classes.html";
-try:
-    CLASS_INDEX_PAGE = urlopen(CLASS_INDEX_PAGE_URL).read().decode("utf-8")
-except Exception as e:
-    CLASS_INDEX_PAGE = None
-    print("Unable to get %s: %s" % (CLASS_INDEX_PAGE_URL, e))
 
-CURRENT_DIR = os.path.dirname(__file__)
+def _build_local_class_set():
+    """Return the set of class names declared in clang/include/clang/AST/"""
+    classes = set()
+    ast_dir = os.path.join(CURRENT_DIR, "../../include/clang/AST")
+    for fname in os.listdir(ast_dir):
+        if not fname.endswith(".h"):
+            continue
+        try:
+            content = open(os.path.join(ast_dir, fname)).read()
+        except OSError:
+            continue
+        for m in re.finditer(r"\b(?:class|struct)\s+([A-Z][a-zA-Z0-9_]+)\b", 
content):
----------------
Endilll wrote:

> I'm not sure I understand what is "clang-format infra"

I was referring to the fact that clang-format does some limited semantic 
analysis of the tokens to figure out if an identifier is e.g. a class name 
based on just raw text, without any buildsystem input: 
https://github.com/llvm/llvm-project/blob/359b47535c3f9def01f48d7d08c76a42b41d0645/clang/lib/Format/UnwrappedLineParser.cpp#L4250-L4254

If you were able to reuse it, then you'd be able to feed the header to 
clang-format, and then replace regex with much simpler matchers of 
clang-format-tokens.

https://github.com/llvm/llvm-project/pull/203784
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to