This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2dbb963324 Refactor: Improve detection of duplicates and list sorting 
(#33675)
2dbb963324 is described below

commit 2dbb9633240777d658031d32217255849150684b
Author: Miroslav Šedivý <[email protected]>
AuthorDate: Thu Aug 24 05:51:47 2023 +0000

    Refactor: Improve detection of duplicates and list sorting (#33675)
---
 airflow/example_dags/example_params_trigger_ui.py  |  2 +-
 airflow/example_dags/example_params_ui_tutorial.py |  2 +-
 .../cncf/kubernetes/pod_launcher_deprecated.py     |  6 ++--
 .../providers/cncf/kubernetes/utils/pod_manager.py |  6 ++--
 .../databricks/operators/databricks_repos.py       |  7 ++--
 airflow/providers/docker/operators/docker.py       |  2 +-
 .../providers/singularity/operators/singularity.py |  2 +-
 airflow/utils/log/json_formatter.py                |  2 +-
 .../src/airflow_breeze/utils/selective_checks.py   | 17 ++-------
 .../in_container/run_provider_yaml_files_check.py  | 40 ++++++++++------------
 10 files changed, 33 insertions(+), 53 deletions(-)

diff --git a/airflow/example_dags/example_params_trigger_ui.py 
b/airflow/example_dags/example_params_trigger_ui.py
index b7554a4463..564861f52b 100644
--- a/airflow/example_dags/example_params_trigger_ui.py
+++ b/airflow/example_dags/example_params_trigger_ui.py
@@ -33,7 +33,7 @@ from airflow.utils.trigger_rule import TriggerRule
 
 with DAG(
     dag_id=Path(__file__).stem,
-    description=__doc__[0 : __doc__.find(".")],
+    description=__doc__.partition(".")[0],
     doc_md=__doc__,
     schedule=None,
     start_date=datetime.datetime(2022, 3, 4),
diff --git a/airflow/example_dags/example_params_ui_tutorial.py 
b/airflow/example_dags/example_params_ui_tutorial.py
index cb25b1b408..9af4e0ba4a 100644
--- a/airflow/example_dags/example_params_ui_tutorial.py
+++ b/airflow/example_dags/example_params_ui_tutorial.py
@@ -35,7 +35,7 @@ from airflow.models.taskinstance import TaskInstance
 
 with DAG(
     dag_id=Path(__file__).stem,
-    description=__doc__[0 : __doc__.find(".")],
+    description=__doc__.partition(".")[0],
     doc_md=__doc__,
     schedule=None,
     start_date=datetime.datetime(2022, 3, 4),
diff --git a/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py 
b/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
index 9fc964cdb6..045c42e9ff 100644
--- a/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
+++ b/airflow/providers/cncf/kubernetes/pod_launcher_deprecated.py
@@ -185,16 +185,14 @@ class PodLauncher(LoggingMixin):
         :param line: k8s log line
         :return: timestamp and log message
         """
-        split_at = line.find(" ")
-        if split_at == -1:
+        timestamp, sep, message = line.strip().partition(" ")
+        if not sep:
             self.log.error(
                 "Error parsing timestamp (no timestamp in message: %r). "
                 "Will continue execution but won't update timestamp",
                 line,
             )
             return None, line
-        timestamp = line[:split_at]
-        message = line[split_at + 1 :].rstrip()
         return timestamp, message
 
     def _task_status(self, event):
diff --git a/airflow/providers/cncf/kubernetes/utils/pod_manager.py 
b/airflow/providers/cncf/kubernetes/utils/pod_manager.py
index 77600b4fcf..08a5d904ca 100644
--- a/airflow/providers/cncf/kubernetes/utils/pod_manager.py
+++ b/airflow/providers/cncf/kubernetes/utils/pod_manager.py
@@ -551,16 +551,14 @@ class PodManager(LoggingMixin):
         :param line: k8s log line
         :return: timestamp and log message
         """
-        split_at = line.find(" ")
-        if split_at == -1:
+        timestamp, sep, message = line.strip().partition(" ")
+        if not sep:
             self.log.error(
                 "Error parsing timestamp (no timestamp in message %r). "
                 "Will continue execution but won't update timestamp",
                 line,
             )
             return None, line
-        timestamp = line[:split_at]
-        message = line[split_at + 1 :].rstrip()
         try:
             last_log_time = cast(DateTime, pendulum.parse(timestamp))
         except ParserError:
diff --git a/airflow/providers/databricks/operators/databricks_repos.py 
b/airflow/providers/databricks/operators/databricks_repos.py
index 64735cadbf..ad9be926a0 100644
--- a/airflow/providers/databricks/operators/databricks_repos.py
+++ b/airflow/providers/databricks/operators/databricks_repos.py
@@ -105,11 +105,8 @@ class DatabricksReposCreateOperator(BaseOperator):
     def __detect_repo_provider__(url):
         provider = None
         try:
-            netloc = urlsplit(url).netloc
-            idx = netloc.rfind("@")
-            if idx != -1:
-                netloc = netloc[(idx + 1) :]
-            netloc = netloc.lower()
+            netloc = urlsplit(url).netloc.lower()
+            _, _, netloc = netloc.rpartition("@")
             provider = 
DatabricksReposCreateOperator.__git_providers__.get(netloc)
             if provider is None and 
DatabricksReposCreateOperator.__aws_code_commit_regexp__.match(netloc):
                 provider = "awsCodeCommit"
diff --git a/airflow/providers/docker/operators/docker.py 
b/airflow/providers/docker/operators/docker.py
index 96f78baa9b..d034e1ff32 100644
--- a/airflow/providers/docker/operators/docker.py
+++ b/airflow/providers/docker/operators/docker.py
@@ -486,7 +486,7 @@ class DockerOperator(BaseOperator):
 
         :return: the command (or commands)
         """
-        if isinstance(command, str) and command.strip().find("[") == 0:
+        if isinstance(command, str) and command.strip().startswith("["):
             command = ast.literal_eval(command)
         return command
 
diff --git a/airflow/providers/singularity/operators/singularity.py 
b/airflow/providers/singularity/operators/singularity.py
index c8c90b9e76..dbc2796daa 100644
--- a/airflow/providers/singularity/operators/singularity.py
+++ b/airflow/providers/singularity/operators/singularity.py
@@ -167,7 +167,7 @@ class SingularityOperator(BaseOperator):
         self.log.info("Output from command %s", result["message"])
 
     def _get_command(self) -> Any | None:
-        if self.command is not None and self.command.strip().find("[") == 0:  
# type: ignore
+        if self.command is not None and self.command.strip().startswith("["):  
# type: ignore
             commands = ast.literal_eval(self.command)
         else:
             commands = self.command
diff --git a/airflow/utils/log/json_formatter.py 
b/airflow/utils/log/json_formatter.py
index d191e69a20..eb19fe8969 100644
--- a/airflow/utils/log/json_formatter.py
+++ b/airflow/utils/log/json_formatter.py
@@ -37,7 +37,7 @@ class JSONFormatter(logging.Formatter):
         self.extras = extras
 
     def usesTime(self):
-        return self.json_fields.count("asctime") > 0
+        return "asctime" in self.json_fields
 
     def format(self, record):
         super().format(record)
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py 
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index bedd9267fa..c6b9aa00b4 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -675,20 +675,9 @@ class SelectiveChecks:
 
         # this should be hard-coded as we want to have very specific sequence 
of tests
         sorting_order = ["Core", "Providers[-amazon,google]", "Other", 
"Providers[amazon]", "WWW"]
-
-        def sort_key(t: str) -> str:
-            # Put the test types in the order we want them to run
-            if t in sorting_order:
-                return str(sorting_order.index(t))
-            else:
-                return str(len(sorting_order)) + t
-
-        return " ".join(
-            sorted(
-                current_test_types,
-                key=sort_key,
-            )
-        )
+        sort_key = {item: i for i, item in enumerate(sorting_order)}
+        # Put the test types in the order we want them to run
+        return " ".join(sorted(current_test_types, key=lambda x: 
(sort_key.get(x, len(sorting_order)), x)))
 
     @cached_property
     def basic_checks_only(self) -> bool:
diff --git a/scripts/in_container/run_provider_yaml_files_check.py 
b/scripts/in_container/run_provider_yaml_files_check.py
index 721117cafc..a300bb8be6 100755
--- a/scripts/in_container/run_provider_yaml_files_check.py
+++ b/scripts/in_container/run_provider_yaml_files_check.py
@@ -319,14 +319,13 @@ def 
check_duplicates_in_integrations_names_of_hooks_sensors_operators(yaml_files
         yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
     ):
         resource_data = provider_data.get(resource_type, [])
-        current_integrations = [r.get("integration-name", "") for r in 
resource_data]
-        if len(current_integrations) != len(set(current_integrations)):
-            for integration in current_integrations:
-                if current_integrations.count(integration) > 1:
-                    errors.append(
-                        f"Duplicated content of 
'{resource_type}/integration-name/{integration}' "
-                        f"in file: {yaml_file_path}"
-                    )
+        count_integrations = Counter(r.get("integration-name", "") for r in 
resource_data)
+        for integration, count in count_integrations.items():
+            if count > 1:
+                errors.append(
+                    f"Duplicated content of 
'{resource_type}/integration-name/{integration}' "
+                    f"in file: {yaml_file_path}"
+                )
 
 
 def check_completeness_of_list_of_transfers(yaml_files: dict[str, dict]):
@@ -422,19 +421,18 @@ def check_duplicates_in_list_of_transfers(yaml_files: 
dict[str, dict]):
     for yaml_file_path, provider_data in yaml_files.items():
         resource_data = provider_data.get(resource_type, [])
 
-        source_target_integrations = [
+        count_integrations = Counter(
             (r.get("source-integration-name", ""), 
r.get("target-integration-name", ""))
             for r in resource_data
-        ]
-        if len(source_target_integrations) != 
len(set(source_target_integrations)):
-            for integration_couple in source_target_integrations:
-                if source_target_integrations.count(integration_couple) > 1:
-                    errors.append(
-                        f"Duplicated content of \n"
-                        f" 
'{resource_type}/source-integration-name/{integration_couple[0]}' "
-                        f" 
'{resource_type}/target-integration-name/{integration_couple[1]}' "
-                        f"in file: {yaml_file_path}"
-                    )
+        )
+        for (source, target), count in count_integrations.items():
+            if count > 1:
+                errors.append(
+                    f"Duplicated content of \n"
+                    f" '{resource_type}/source-integration-name/{source}' "
+                    f" '{resource_type}/target-integration-name/{target}' "
+                    f"in file: {yaml_file_path}"
+                )
 
 
 def check_invalid_integration(yaml_files: dict[str, dict]):
@@ -533,8 +531,8 @@ def check_doc_files(yaml_files: dict[str, dict]):
 
 
 def check_unique_provider_name(yaml_files: dict[str, dict]):
-    provider_names = [d["name"] for d in yaml_files.values()]
-    duplicates = {x for x in provider_names if provider_names.count(x) > 1}
+    name_counter = Counter(d["name"] for d in yaml_files.values())
+    duplicates = {k for k, v in name_counter.items() if v > 1}
     if duplicates:
         errors.append(f"Provider name must be unique. Duplicates: 
{duplicates}")
 

Reply via email to