(airflow) branch main updated: Remove offset-based pagination from `list_jobs` function in `DatabricksHook` (#34926)

potiuk Fri, 03 Nov 2023 06:43:02 -0700

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git



The following commit(s) were added to refs/heads/main by this push:
     new 10bac853d2 Remove offset-based pagination from `list_jobs` function in 
`DatabricksHook` (#34926)
10bac853d2 is described below

commit 10bac853d2fb183e673faef6efaeb95a47c80a40
Author: Oleksii Davydenko <[email protected]>
AuthorDate: Fri Nov 3 14:42:49 2023 +0100

    Remove offset-based pagination from `list_jobs` function in 
`DatabricksHook` (#34926)
---
 airflow/providers/databricks/CHANGELOG.rst       |  9 ++++++++-
 airflow/providers/databricks/hooks/databricks.py | 22 ++--------------------
 airflow/providers/databricks/provider.yaml       |  1 +
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/airflow/providers/databricks/CHANGELOG.rst 
b/airflow/providers/databricks/CHANGELOG.rst
index dbcc4160a7..80d75ee37f 100644
--- a/airflow/providers/databricks/CHANGELOG.rst
+++ b/airflow/providers/databricks/CHANGELOG.rst
@@ -23,10 +23,17 @@
 
 ``apache-airflow-providers-databricks``
 
-
 Changelog
 ---------
 
+5.0.0
+.....
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+The ``offset`` parameter has been deprecated from ``list_jobs`` in favor of 
faster pagination with ``page_token`` similarly to `Databricks API 
<https://docs.databricks.com/api/workspace/jobs/list>`_.
+
 4.7.0
 .....
 
diff --git a/airflow/providers/databricks/hooks/databricks.py 
b/airflow/providers/databricks/hooks/databricks.py
index e1da837f43..6cb5b37e46 100644
--- a/airflow/providers/databricks/hooks/databricks.py
+++ b/airflow/providers/databricks/hooks/databricks.py
@@ -28,12 +28,11 @@ or the ``api/2.1/jobs/runs/submit``
 from __future__ import annotations
 
 import json
-import warnings
 from typing import Any
 
 from requests import exceptions as requests_exceptions
 
-from airflow.exceptions import AirflowException, 
AirflowProviderDeprecationWarning
+from airflow.exceptions import AirflowException
 from airflow.providers.databricks.hooks.databricks_base import 
BaseDatabricksHook
 
 GET_CLUSTER_ENDPOINT = ("GET", "api/2.0/clusters/get")
@@ -237,7 +236,6 @@ class DatabricksHook(BaseDatabricksHook):
     def list_jobs(
         self,
         limit: int = 25,
-        offset: int | None = None,
         expand_tasks: bool = False,
         job_name: str | None = None,
         page_token: str | None = None,
@@ -246,7 +244,6 @@ class DatabricksHook(BaseDatabricksHook):
         List the jobs in the Databricks Job Service.
 
         :param limit: The limit/batch size used to retrieve jobs.
-        :param offset: The offset of the first job to return, relative to the 
most recently created job.
         :param expand_tasks: Whether to include task and cluster details in 
the response.
         :param job_name: Optional name of a job to search.
         :param page_token: The optional page token pointing at the first first 
job to return.
@@ -254,29 +251,15 @@ class DatabricksHook(BaseDatabricksHook):
         """
         has_more = True
         all_jobs = []
-        use_token_pagination = (page_token is not None) or (offset is None)
-        if offset is not None:
-            warnings.warn(
-                """You are using the deprecated offset parameter in list_jobs.
-                It will be hard-limited at the maximum value of 1000 by 
Databricks API after Oct 9, 2023.
-                Please paginate using page_token instead.""",
-                AirflowProviderDeprecationWarning,
-                stacklevel=2,
-            )
         if page_token is None:
             page_token = ""
-        if offset is None:
-            offset = 0
 
         while has_more:
             payload: dict[str, Any] = {
                 "limit": limit,
                 "expand_tasks": expand_tasks,
             }
-            if use_token_pagination:
-                payload["page_token"] = page_token
-            else:  # offset pagination
-                payload["offset"] = offset
+            payload["page_token"] = page_token
             if job_name:
                 payload["name"] = job_name
             response = self._do_api_call(LIST_JOBS_ENDPOINT, payload)
@@ -288,7 +271,6 @@ class DatabricksHook(BaseDatabricksHook):
             has_more = response.get("has_more", False)
             if has_more:
                 page_token = response.get("next_page_token", "")
-                offset += len(jobs)
 
         return all_jobs
 
diff --git a/airflow/providers/databricks/provider.yaml 
b/airflow/providers/databricks/provider.yaml
index 17dda0b7e5..cefb777bec 100644
--- a/airflow/providers/databricks/provider.yaml
+++ b/airflow/providers/databricks/provider.yaml
@@ -23,6 +23,7 @@ description: |
 
 suspended: false
 versions:
+  - 5.0.0
   - 4.7.0
   - 4.6.0
   - 4.5.0

(airflow) branch main updated: Remove offset-based pagination from `list_jobs` function in `DatabricksHook` (#34926)

Reply via email to