mik-laj commented on a change in pull request #8807:
URL: https://github.com/apache/airflow/pull/8807#discussion_r422772250
##########
File path: backport_packages/setup_backport_packages.py
##########
@@ -422,13 +371,376 @@ def usage():
print(text)
print()
print("You can see all packages configured by specifying
list-backport-packages as first argument")
+ print("You can generate release notes by specifying:"
+ " update-package-release-notes YYYY.MM.DD [PACKAGES]")
+
+
+def is_imported_google_base_hook(name: str) -> bool:
+ if name.endswith("GoogleBaseHook") and name != \
+ "airflow.providers.google.common.hooks.base_google.GoogleBaseHook":
+ return True
+ return False
+
+
+# return list of tuples (objclass, name) containing all subclasses in package
specified
+def find_all_subclasses(full_package: str, class_type,
expected_in_package_name: Optional[str] = None,
+ exclude_class_type=None):
+ import inspect
+ subclasses = set()
+ for global_name, global_object in globals().items():
+ if global_name.startswith(full_package) and
inspect.isclass(global_object):
+ mro = inspect.getmro(global_object)
+ if global_object is not class_type and \
+ class_type in mro and \
+ "example_dags" not in global_name and \
+ (expected_in_package_name is None or expected_in_package_name
in global_name) and \
+ (exclude_class_type is None or exclude_class_type not in mro)
and \
+ global_name not in EXCLUDED_DUPLICATED_OBJECTS and \
+ not is_imported_google_base_hook(global_name) and\
+ global_object.__module__.startswith(full_package):
+ subclasses.add(global_name)
+ return subclasses
+
+
+def get_new_and_moved_objects(objects: Set[str], test_moved_object_dict:
Dict[str, str]):
+ new_objects = []
+ moved_objects = {}
+ for obj in objects:
+ if obj in test_moved_object_dict:
+ moved_objects[obj] = test_moved_object_dict[obj]
+ del test_moved_object_dict[obj]
+ else:
+ new_objects.append(obj)
+ new_objects.sort()
+ return new_objects, moved_objects
+
+
+def strip_package(base_package: str, obj_name: str):
+ if obj_name.startswith(base_package):
+ return obj_name[len(base_package) + 1:]
+ else:
+ return obj_name
+
+
+def convert_obj_name_to_url(prefix: str, obj_name):
+ return prefix + "/".join(obj_name.split(".")[:-1]) + ".py"
+
+
+def get_object_code_link(base_package: str, obj_name: str, tag: str):
+ url_prefix = f'https://github.com/apache/airflow/blob/{tag}/'
+ return f'[{strip_package(base_package,
obj_name)}]({convert_obj_name_to_url(url_prefix, obj_name)})'
+
+
+def convert_new_objects_to_table(obj_list: List[str], full_package_name: str,
object_type: str):
+ from tabulate import tabulate
+ headers = [f"New Airflow 2.0 {object_type}: `{full_package_name}` package"]
+ table = [(get_object_code_link(full_package_name, obj, "master"),) for obj
in obj_list]
+ return tabulate(table, headers=headers, tablefmt="pipe")
+
+
+def convert_moved_objects_to_table(obj_dict: Dict[str, str],
full_package_name: str, object_type: str):
+ from tabulate import tabulate
+ headers = [f"Airflow 2.0 {object_type}: `{full_package_name}` package",
+ f"Airflow 1.10.* previous location (usually `airflow.contrib`)"]
+ table = [
+ (get_object_code_link(full_package_name, obj, "master"),
+ get_object_code_link("airflow.contrib", obj_dict[obj],
"v1-10-stable"))
+ for obj in sorted(obj_dict.keys())
+ ]
+ return tabulate(table, headers=headers, tablefmt="pipe")
+
+
+def get_package_class_summary(full_package_name: str):
+ from airflow.secrets import BaseSecretsBackend
+ from airflow.sensors.base_sensor_operator import BaseSensorOperator
+ from airflow.hooks.base_hook import BaseHook
+ from airflow.models.baseoperator import BaseOperator
+ from typing_extensions import Protocol
+ operators = find_all_subclasses(full_package=full_package_name,
+ class_type=BaseOperator,
+ expected_in_package_name=".operators.",
+ exclude_class_type=BaseSensorOperator)
+ sensors = find_all_subclasses(full_package=full_package_name,
+ class_type=BaseSensorOperator,
+ expected_in_package_name='.sensors.')
+ hooks = find_all_subclasses(full_package=full_package_name,
+ class_type=BaseHook,
+ expected_in_package_name='.hooks.')
+ protocols = find_all_subclasses(full_package=full_package_name,
+ class_type=Protocol)
+ secrets = find_all_subclasses(full_package=full_package_name,
+ class_type=BaseSecretsBackend)
+ new_operators, moved_operators = get_new_and_moved_objects(operators,
MOVED_OPERATORS_DICT)
+ new_sensors, moved_sensors = get_new_and_moved_objects(sensors,
MOVED_SENSORS_DICT)
+ new_hooks, moved_hooks = get_new_and_moved_objects(hooks, MOVED_HOOKS_DICT)
+ new_protocols, moved_protocols = get_new_and_moved_objects(protocols,
MOVED_PROTOCOLS_DICT)
+ new_secrets, moved_secrets = get_new_and_moved_objects(secrets,
MOVED_SECRETS_DICT)
+ class_summary = {
+ "NEW_OPERATORS": new_operators,
+ "MOVED_OPERATORS": moved_operators,
+ "NEW_SENSORS": new_sensors,
+ "MOVED_SENSORS": moved_sensors,
+ "NEW_HOOKS": new_hooks,
+ "MOVED_HOOKS": moved_hooks,
+ "NEW_PROTOCOLS": new_protocols,
+ "MOVED_PROTOCOLS": moved_protocols,
+ "NEW_SECRETS": new_secrets,
+ "MOVED_SECRETS": moved_secrets,
+ }
+ for from_name, to_name, object_type in [
+ ("NEW_OPERATORS", "NEW_OPERATORS_TABLE", "operators"),
+ ("NEW_SENSORS", "NEW_SENSORS_TABLE", "sensors"),
+ ("NEW_HOOKS", "NEW_HOOKS_TABLE", "hooks"),
+ ("NEW_PROTOCOLS", "NEW_PROTOCOLS_TABLE", "protocols"),
+ ("NEW_SECRETS", "NEW_SECRETS_TABLE", "secrets"),
+ ]:
+ class_summary[to_name] =
convert_new_objects_to_table(class_summary[from_name],
+
full_package_name,
+ object_type)
+ for from_name, to_name, object_type in [
+ ("MOVED_OPERATORS", "MOVED_OPERATORS_TABLE", "operators"),
+ ("MOVED_SENSORS", "MOVED_SENSORS_TABLE", "sensors"),
+ ("MOVED_HOOKS", "MOVED_HOOKS_TABLE", "hooks"),
+ ("MOVED_PROTOCOLS", "MOVED_PROTOCOLS_TABLE", "protocols"),
+ ("MOVED_SECRETS", "MOVED_SECRETS_TABLE", "protocols"),
+ ]:
+ class_summary[to_name] =
convert_moved_objects_to_table(class_summary[from_name],
+
full_package_name,
+ object_type)
+ return class_summary
+
+
+def prepare_readme_from_template(template_name: str, context: Dict[str, Any]):
+ from jinja2 import Template
+ template_file_path = os.path.join(MY_DIR_PATH,
f"{template_name}_TEMPLATE.md.jinja2")
+ with open(template_file_path, "rt") as template_file:
+ # remove comment
+ template = Template(template_file.read(), autoescape=True)
+ return template.render(**context)
+
+
+def convert_git_changes_to_table(changes: str, commit_url_prefix: str):
+ from tabulate import tabulate
+ lines = changes.split("\n")
+ headers = ["Commit", "Date", "Subject"]
+ table_data = []
+ for line in lines:
+ full_hash, short_hash, date, message = line.split(" ", maxsplit=3)
+ table_data.append((f"[{short_hash}]({commit_url_prefix}{full_hash})",
date, message))
+
+ return tabulate(table_data, headers=headers, tablefmt="pipe")
+
+
+def convert_pip_requirements_to_table(requirements: List[str]):
+ from tabulate import tabulate
+ headers = ["PIP package", "Version required"]
+ table_data = []
+ for dependency in requirements:
+ found = re.match(r"(^[^<=>~]*)([^<=>~]?.*)$", dependency)
+ if found:
+ package = found.group(1)
+ version_required = found.group(2)
+ table_data.append((package, version_required))
+ else:
+ table_data.append((dependency, ""))
+ return tabulate(table_data, headers=headers, tablefmt="pipe")
+
+
+def convert_cross_package_dependencies_to_table(cross_package_dependencies:
List[str], url_prefix: str):
+ from tabulate import tabulate
+ headers = ["Dependent package", "Extra"]
+ table_data = []
+ for dependency in cross_package_dependencies:
+ pip_package_name =
f"apache-airflow-backport-providers-{dependency.replace('.','-')}"
+ url_suffix = f"{dependency.replace('.','/')}"
+ table_data.append((f"[{pip_package_name}]({url_prefix}{url_suffix})",
dependency))
+ return tabulate(table_data, headers=headers, tablefmt="pipe")
+
+
+LICENCE = """<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ -->
+"""
+
+PROVIDERS_CHANGES_PREFIX = "PROVIDERS_CHANGES_"
+
+ReleaseInfo = collections.namedtuple("ReleaseInfo", "release_version
last_commit_hash content file_name")
+
+
+def get_all_releases(package_path: str) -> List[ReleaseInfo]:
+ past_releases: List[ReleaseInfo] = []
+ changes_file_names = listdir(package_path)
+ for file_name in sorted(changes_file_names, reverse=True):
+ if file_name.startswith(PROVIDERS_CHANGES_PREFIX) and
file_name.endswith(".md"):
+ changes_file_path = os.path.join(package_path, file_name)
+ with open(changes_file_path, "rt") as changes_file:
+ content = changes_file.read()
+ found = re.search(r'/([a-z0-9]*)\)', content, flags=re.MULTILINE)
+ if not found:
+ raise Exception(f"Commit not found in {changes_file_path}.
Something is wrong there.")
+ last_commit_hash = found.group(1)
+ release_version = file_name[len(PROVIDERS_CHANGES_PREFIX):][:-3]
+ past_releases.append(ReleaseInfo(release_version=release_version,
+ last_commit_hash=last_commit_hash,
+ content=content,
+ file_name=file_name))
+ return past_releases
+
+
+def get_previous_release(last_release: str,
+ past_releases: List[ReleaseInfo],
+ current_release_version: str) -> Optional[str]:
+ previous_release = None
+ if last_release == current_release_version:
+ # Re-running for current release - use previous release as base for
git log
+ if len(past_releases) > 1:
+ previous_release = past_releases[1].last_commit_hash
+ else:
+ previous_release = past_releases[0].last_commit_hash if past_releases
else None
+ return previous_release
+
+
+def check_if_release_version_ok(past_releases, release_version):
+ last_release = past_releases[0].release_version if past_releases else None
+ if last_release and last_release > release_version:
+ print(f"The release {release_version} must be not less than "
+ f"{last_release} - last release for the package")
+ sys.exit(2)
+ return last_release
+
+
+def get_cross_provider_dependent_packages(package_id: str) -> List[str]:
+ with open(os.path.join(PROVIDERS_PATH, "dependencies.json"), "rt") as
dependencies_file:
+ dependent_packages = json.load(dependencies_file).get(package_id) or []
+ return dependent_packages
+
+
+def make_sure_remote_apache_exists():
+ try:
+ subprocess.check_call(["git", "remote", "add", "apache",
"https://github.com/apache/airflow.git"])
+ except subprocess.CalledProcessError as e:
+ if e.returncode == 128:
+ print("The remote `apache` already exists. If you have trouble
running git log delete the remote")
+ else:
+ raise
+ subprocess.check_call(["git", "fetch", "apache"])
+
+
+def get_git_command(previous_release):
+ # Make sure you have the remote apache
+ git_cmd = ["git", "log", "apache/master", "--pretty=format:%H %h %cd %s",
"--date=short"]
+ if previous_release:
+ git_cmd.append(f"{previous_release}...HEAD")
+ git_cmd.extend(['--', '.'])
+ return git_cmd
+
+
+def store_current_changes(package_path: str, current_release_version: str,
current_changes: str):
+ current_changes_file_path = os.path.join(package_path,
+ PROVIDERS_CHANGES_PREFIX +
current_release_version + ".md")
+ with open(current_changes_file_path, "wt") as current_changes_file:
+ current_changes_file.write(current_changes)
+ current_changes_file.write("\n")
+
+
+def update_release_notes_for_package(package_id: str, current_release_version:
str):
+ full_package_name = f"airflow.providers.{package_id}"
+ package_path = os.path.join(PROVIDERS_PATH, *package_id.split("."))
+ class_summary = get_package_class_summary(full_package_name)
+ past_releases = get_all_releases(package_path=package_path)
+ last_release = check_if_release_version_ok(past_releases,
current_release_version)
+ cross_providers_dependencies =
get_cross_provider_dependent_packages(package_id=package_id)
+ previous_release = get_previous_release(last_release=last_release,
past_releases=past_releases,
+
current_release_version=current_release_version)
+ git_cmd = get_git_command(previous_release)
+ changes = subprocess.check_output(git_cmd, cwd=package_path,
universal_newlines=True)
+ if changes == "":
+ print(f"The code has not changed since last release {last_release}.
Skipping generating README.")
+ return
+ changes_table = convert_git_changes_to_table(
+ changes,
+ commit_url_prefix="https://github.com/apache/airflow/commit/")
+ pip_requirements_table =
convert_pip_requirements_to_table(PROVIDERS_REQUIREMENTS[package_id])
+ cross_providers_dependencies_table = \
+ convert_cross_package_dependencies_to_table(
+ cross_providers_dependencies,
+
url_prefix="https://github.com/apache/airflow/tree/master/airflow/providers/")
+ context: Dict[str, Any] = {
+ "PACKAGE_ID": package_id,
+ "PACKAGE_PIP_NAME":
f"apache-airflow-backport-providers-{package_id.replace('.', '-')}",
+ "FULL_PACKAGE_NAME": full_package_name,
+ "RELEASE": current_release_version,
+ "CURRENT_CHANGES_TABLE": changes_table,
+ "CROSS_PROVIDERS_DEPENDENCIES": cross_providers_dependencies,
+ "CROSS_PROVIDERS_DEPENDENCIES_TABLE":
cross_providers_dependencies_table,
+ "PIP_REQUIREMENTS": PROVIDERS_REQUIREMENTS[package_id],
+ "PIP_REQUIREMENTS_TABLE": pip_requirements_table
+ }
+ current_changes =
prepare_readme_from_template(template_name="PROVIDERS_CHANGES", context=context)
+ store_current_changes(package_path=package_path,
current_release_version=current_release_version,
+ current_changes=current_changes)
+ context.update(class_summary)
+ all_releases = get_all_releases(package_path)
+ context["RELEASES"] = all_releases
+ readme = LICENCE
+ readme += prepare_readme_from_template(template_name="PROVIDERS_README",
context=context)
+ readme += prepare_readme_from_template(template_name="PROVIDERS_CLASSES",
context=context)
+ for a_release in all_releases:
+ readme += a_release.content
+ readme_file_path = os.path.join(package_path, "README.md")
+ with open(readme_file_path, "wt") as readme_file:
+ readme_file.write(readme)
+ print(f"Generated {readme_file_path} file for the {package_id} provider")
+
+
+def import_all_providers_classes():
+ for loader, module_name, is_pkg in
pkgutil.walk_packages([SOURCE_DIR_PATH]):
Review comment:
Why do we need it?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]