This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 2dbb963324 Refactor: Improve detection of duplicates and list sorting
(#33675)
2dbb963324 is described below
commit 2dbb9633240777d658031d32217255849150684b
Author: Miroslav Šedivý <[email protected]>
AuthorDate: Thu Aug 24 05:51:47 2023 +0000
Refactor: Improve detection of duplicates and list sorting (#33675)
---
airflow/example_dags/example_params_trigger_ui.py | 2 +-
airflow/example_dags/example_params_ui_tutorial.py | 2 +-
.../cncf/kubernetes/pod_launcher_deprecated.py | 6 ++--
.../providers/cncf/kubernetes/utils/pod_manager.py | 6 ++--
.../databricks/operators/databricks_repos.py | 7 ++--
airflow/providers/docker/operators/docker.py | 2 +-
.../providers/singularity/operators/singularity.py | 2 +-
airflow/utils/log/json_formatter.py | 2 +-
.../src/airflow_breeze/utils/selective_checks.py | 17 ++-------
.../in_container/run_provider_yaml_files_check.py | 40 ++++++++++------------
10 files changed, 33 insertions(+), 53 deletions(-)
diff --git a/airflow/example_dags/example_params_trigger_ui.py
b/airflow/example_dags/example_params_trigger_ui.py
index b7554a4463..564861f52b 100644
--- a/airflow/example_dags/example_params_trigger_ui.py
+++ b/airflow/example_dags/example_params_trigger_ui.py
@@ -33,7 +33,7 @@ from airflow.utils.trigger_rule import TriggerRule
with DAG(
dag_id=Path(__file__).stem,
- description=__doc__[0 : __doc__.find(".")],
+ description=__doc__.partition(".")[0],
doc_md=__doc__,
schedule=None,
start_date=datetime.datetime(2022, 3, 4),
diff --git a/airflow/example_dags/example_params_ui_tutorial.py
b/airflow/example_dags/example_params_ui_tutorial.py
index cb25b1b408..9af4e0ba4a 100644
--- a/airflow/example_dags/example_params_ui_tutorial.py
+++ b/airflow/example_dags/example_params_ui_tutorial.py
@@ -35,7 +35,7 @@ from airflow.models.taskinstance import TaskInstance
with DAG(
dag_id=Path(__file__).stem,
- description=__doc__[0 : __doc__.find(".")],
+ description=__doc__.partition(".")[0],
doc_md=__doc__,
schedule=None,
start_date=datetime.datetime(2022, 3, 4),
diff --git a/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
b/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
index 9fc964cdb6..045c42e9ff 100644
--- a/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
+++ b/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
@@ -185,16 +185,14 @@ class PodLauncher(LoggingMixin):
:param line: k8s log line
:return: timestamp and log message
"""
- split_at = line.find(" ")
- if split_at == -1:
+ timestamp, sep, message = line.strip().partition(" ")
+ if not sep:
self.log.error(
"Error parsing timestamp (no timestamp in message: %r). "
"Will continue execution but won't update timestamp",
line,
)
return None, line
- timestamp = line[:split_at]
- message = line[split_at + 1 :].rstrip()
return timestamp, message
def _task_status(self, event):
diff --git a/airflow/providers/cncf/kubernetes/utils/pod_manager.py
b/airflow/providers/cncf/kubernetes/utils/pod_manager.py
index 77600b4fcf..08a5d904ca 100644
--- a/airflow/providers/cncf/kubernetes/utils/pod_manager.py
+++ b/airflow/providers/cncf/kubernetes/utils/pod_manager.py
@@ -551,16 +551,14 @@ class PodManager(LoggingMixin):
:param line: k8s log line
:return: timestamp and log message
"""
- split_at = line.find(" ")
- if split_at == -1:
+ timestamp, sep, message = line.strip().partition(" ")
+ if not sep:
self.log.error(
"Error parsing timestamp (no timestamp in message %r). "
"Will continue execution but won't update timestamp",
line,
)
return None, line
- timestamp = line[:split_at]
- message = line[split_at + 1 :].rstrip()
try:
last_log_time = cast(DateTime, pendulum.parse(timestamp))
except ParserError:
diff --git a/airflow/providers/databricks/operators/databricks_repos.py
b/airflow/providers/databricks/operators/databricks_repos.py
index 64735cadbf..ad9be926a0 100644
--- a/airflow/providers/databricks/operators/databricks_repos.py
+++ b/airflow/providers/databricks/operators/databricks_repos.py
@@ -105,11 +105,8 @@ class DatabricksReposCreateOperator(BaseOperator):
def __detect_repo_provider__(url):
provider = None
try:
- netloc = urlsplit(url).netloc
- idx = netloc.rfind("@")
- if idx != -1:
- netloc = netloc[(idx + 1) :]
- netloc = netloc.lower()
+ netloc = urlsplit(url).netloc.lower()
+ _, _, netloc = netloc.rpartition("@")
provider =
DatabricksReposCreateOperator.__git_providers__.get(netloc)
if provider is None and
DatabricksReposCreateOperator.__aws_code_commit_regexp__.match(netloc):
provider = "awsCodeCommit"
diff --git a/airflow/providers/docker/operators/docker.py
b/airflow/providers/docker/operators/docker.py
index 96f78baa9b..d034e1ff32 100644
--- a/airflow/providers/docker/operators/docker.py
+++ b/airflow/providers/docker/operators/docker.py
@@ -486,7 +486,7 @@ class DockerOperator(BaseOperator):
:return: the command (or commands)
"""
- if isinstance(command, str) and command.strip().find("[") == 0:
+ if isinstance(command, str) and command.strip().startswith("["):
command = ast.literal_eval(command)
return command
diff --git a/airflow/providers/singularity/operators/singularity.py
b/airflow/providers/singularity/operators/singularity.py
index c8c90b9e76..dbc2796daa 100644
--- a/airflow/providers/singularity/operators/singularity.py
+++ b/airflow/providers/singularity/operators/singularity.py
@@ -167,7 +167,7 @@ class SingularityOperator(BaseOperator):
self.log.info("Output from command %s", result["message"])
def _get_command(self) -> Any | None:
- if self.command is not None and self.command.strip().find("[") == 0:
# type: ignore
+ if self.command is not None and self.command.strip().startswith("["):
# type: ignore
commands = ast.literal_eval(self.command)
else:
commands = self.command
diff --git a/airflow/utils/log/json_formatter.py
b/airflow/utils/log/json_formatter.py
index d191e69a20..eb19fe8969 100644
--- a/airflow/utils/log/json_formatter.py
+++ b/airflow/utils/log/json_formatter.py
@@ -37,7 +37,7 @@ class JSONFormatter(logging.Formatter):
self.extras = extras
def usesTime(self):
- return self.json_fields.count("asctime") > 0
+ return "asctime" in self.json_fields
def format(self, record):
super().format(record)
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index bedd9267fa..c6b9aa00b4 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -675,20 +675,9 @@ class SelectiveChecks:
# this should be hard-coded as we want to have very specific sequence
of tests
sorting_order = ["Core", "Providers[-amazon,google]", "Other",
"Providers[amazon]", "WWW"]
-
- def sort_key(t: str) -> str:
- # Put the test types in the order we want them to run
- if t in sorting_order:
- return str(sorting_order.index(t))
- else:
- return str(len(sorting_order)) + t
-
- return " ".join(
- sorted(
- current_test_types,
- key=sort_key,
- )
- )
+ sort_key = {item: i for i, item in enumerate(sorting_order)}
+ # Put the test types in the order we want them to run
+ return " ".join(sorted(current_test_types, key=lambda x:
(sort_key.get(x, len(sorting_order)), x)))
@cached_property
def basic_checks_only(self) -> bool:
diff --git a/scripts/in_container/run_provider_yaml_files_check.py
b/scripts/in_container/run_provider_yaml_files_check.py
index 721117cafc..a300bb8be6 100755
--- a/scripts/in_container/run_provider_yaml_files_check.py
+++ b/scripts/in_container/run_provider_yaml_files_check.py
@@ -319,14 +319,13 @@ def
check_duplicates_in_integrations_names_of_hooks_sensors_operators(yaml_files
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
):
resource_data = provider_data.get(resource_type, [])
- current_integrations = [r.get("integration-name", "") for r in
resource_data]
- if len(current_integrations) != len(set(current_integrations)):
- for integration in current_integrations:
- if current_integrations.count(integration) > 1:
- errors.append(
- f"Duplicated content of
'{resource_type}/integration-name/{integration}' "
- f"in file: {yaml_file_path}"
- )
+ count_integrations = Counter(r.get("integration-name", "") for r in
resource_data)
+ for integration, count in count_integrations.items():
+ if count > 1:
+ errors.append(
+ f"Duplicated content of
'{resource_type}/integration-name/{integration}' "
+ f"in file: {yaml_file_path}"
+ )
def check_completeness_of_list_of_transfers(yaml_files: dict[str, dict]):
@@ -422,19 +421,18 @@ def check_duplicates_in_list_of_transfers(yaml_files:
dict[str, dict]):
for yaml_file_path, provider_data in yaml_files.items():
resource_data = provider_data.get(resource_type, [])
- source_target_integrations = [
+ count_integrations = Counter(
(r.get("source-integration-name", ""),
r.get("target-integration-name", ""))
for r in resource_data
- ]
- if len(source_target_integrations) !=
len(set(source_target_integrations)):
- for integration_couple in source_target_integrations:
- if source_target_integrations.count(integration_couple) > 1:
- errors.append(
- f"Duplicated content of \n"
- f"
'{resource_type}/source-integration-name/{integration_couple[0]}' "
- f"
'{resource_type}/target-integration-name/{integration_couple[1]}' "
- f"in file: {yaml_file_path}"
- )
+ )
+ for (source, target), count in count_integrations.items():
+ if count > 1:
+ errors.append(
+ f"Duplicated content of \n"
+ f" '{resource_type}/source-integration-name/{source}' "
+ f" '{resource_type}/target-integration-name/{target}' "
+ f"in file: {yaml_file_path}"
+ )
def check_invalid_integration(yaml_files: dict[str, dict]):
@@ -533,8 +531,8 @@ def check_doc_files(yaml_files: dict[str, dict]):
def check_unique_provider_name(yaml_files: dict[str, dict]):
- provider_names = [d["name"] for d in yaml_files.values()]
- duplicates = {x for x in provider_names if provider_names.count(x) > 1}
+ name_counter = Counter(d["name"] for d in yaml_files.values())
+ duplicates = {k for k, v in name_counter.items() if v > 1}
if duplicates:
errors.append(f"Provider name must be unique. Duplicates:
{duplicates}")