mik-laj commented on a change in pull request #9290:
URL: https://github.com/apache/airflow/pull/9290#discussion_r440189048
##########
File path: docs/build
##########
@@ -127,6 +128,71 @@ def display_errors_summary() -> None:
print("=" * 50)
+def find_existing_guide_operator_names():
+ operator_names = set()
+
+ paths = glob("howto/operator/**/*.rst", recursive=True)
+ for path in paths:
+ with open(path) as f:
+ operator_names |= set(re.findall(".. _howto/operator:(.+?):",
f.read()))
+
+ return operator_names
+
+
+def extract_ast_class_def_by_name(ast_tree, class_name):
+ class ClassVisitor(ast.NodeVisitor):
+ def __init__(self):
+ self.found_class_node = None
+
+ def visit_ClassDef(self, node):
+ if node.name == class_name:
+ self.found_class_node = node
+
+ visitor = ClassVisitor()
+ visitor.visit(ast_tree)
+
+ return visitor.found_class_node
+
+
+def check_guide_links_in_operator_descriptions():
+ def generate_build_error(path, line_no, operator_name):
+ return DocBuildError(
+ file_path=path,
+ line_no=line_no,
+ message=(
+ f"Link to the guide is missing in operator's
description: {operator_name}.\n"
+ f"Please add link to the guide to the description in
the following form:\n"
+ f".. seealso::\n"
+ f"For more information on how to use this operator,
take a look at the guide:\n"
+ f":ref:`howto/operator:{operator_name}`\n"
+ )
+ )
+
+ # Extract operators for which there are existing .rst guides
+ operator_names = find_existing_guide_operator_names()
+
+ # Extract all potential python modules that can contain operators
+ python_module_paths = glob(f"{ROOT_PACKAGE_DIR}/**/*.py", recursive=True)
+
+ for py_module_path in python_module_paths:
+ with open(py_module_path) as f:
+ py_content = f.read()
+ for existing_operator in operator_names:
+ if f"class {existing_operator}" in py_content:
+ # This is a potential file with necessary class definition.
+ # To make sure it's a real Python class definition, we
build AST tree
+ ast_tree = ast.parse(py_content)
+ class_def = extract_ast_class_def_by_name(ast_tree,
existing_operator)
+
+ if class_def is not None:
+ # Real class definition is found
+ if f":ref:`howto/operator:{existing_operator}`" not in
ast.get_docstring(class_def):
+ # Docstring does not contain reference to the
existing guide
+ build_errors.append(
+ generate_build_error(py_module_path,
class_def.lineno, existing_operator)
+ )
Review comment:
```suggestion
for py_module_path in python_module_paths:
with open(py_module_path) as f:
py_content = f.read()
for existing_operator in operator_names:
if f"class {existing_operator}" not in py_content:
continue
# This is a potential file with necessary class definition.
# To make sure it's a real Python class definition, we build AST
tree
ast_tree = ast.parse(py_content)
class_def = extract_ast_class_def_by_name(ast_tree,
existing_operator)
# Real class definition is found and docstring does not contain
reference to the existing guide
if class_def is not None and
f":ref:`howto/operator:{existing_operator}`" not in
ast.get_docstring(class_def):
build_errors.append(
generate_build_error(py_module_path, class_def.lineno,
existing_operator)
)
```
There are a little too many indentations in the code, which makes it
difficult to understand. I really like using AST.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]