This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 10bac853d2 Remove offset-based pagination from `list_jobs` function in
`DatabricksHook` (#34926)
10bac853d2 is described below
commit 10bac853d2fb183e673faef6efaeb95a47c80a40
Author: Oleksii Davydenko <[email protected]>
AuthorDate: Fri Nov 3 14:42:49 2023 +0100
Remove offset-based pagination from `list_jobs` function in
`DatabricksHook` (#34926)
---
airflow/providers/databricks/CHANGELOG.rst | 9 ++++++++-
airflow/providers/databricks/hooks/databricks.py | 22 ++--------------------
airflow/providers/databricks/provider.yaml | 1 +
3 files changed, 11 insertions(+), 21 deletions(-)
diff --git a/airflow/providers/databricks/CHANGELOG.rst
b/airflow/providers/databricks/CHANGELOG.rst
index dbcc4160a7..80d75ee37f 100644
--- a/airflow/providers/databricks/CHANGELOG.rst
+++ b/airflow/providers/databricks/CHANGELOG.rst
@@ -23,10 +23,17 @@
``apache-airflow-providers-databricks``
-
Changelog
---------
+5.0.0
+.....
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+The ``offset`` parameter has been deprecated from ``list_jobs`` in favor of
faster pagination with ``page_token`` similarly to `Databricks API
<https://docs.databricks.com/api/workspace/jobs/list>`_.
+
4.7.0
.....
diff --git a/airflow/providers/databricks/hooks/databricks.py
b/airflow/providers/databricks/hooks/databricks.py
index e1da837f43..6cb5b37e46 100644
--- a/airflow/providers/databricks/hooks/databricks.py
+++ b/airflow/providers/databricks/hooks/databricks.py
@@ -28,12 +28,11 @@ or the ``api/2.1/jobs/runs/submit``
from __future__ import annotations
import json
-import warnings
from typing import Any
from requests import exceptions as requests_exceptions
-from airflow.exceptions import AirflowException,
AirflowProviderDeprecationWarning
+from airflow.exceptions import AirflowException
from airflow.providers.databricks.hooks.databricks_base import
BaseDatabricksHook
GET_CLUSTER_ENDPOINT = ("GET", "api/2.0/clusters/get")
@@ -237,7 +236,6 @@ class DatabricksHook(BaseDatabricksHook):
def list_jobs(
self,
limit: int = 25,
- offset: int | None = None,
expand_tasks: bool = False,
job_name: str | None = None,
page_token: str | None = None,
@@ -246,7 +244,6 @@ class DatabricksHook(BaseDatabricksHook):
List the jobs in the Databricks Job Service.
:param limit: The limit/batch size used to retrieve jobs.
- :param offset: The offset of the first job to return, relative to the
most recently created job.
:param expand_tasks: Whether to include task and cluster details in
the response.
:param job_name: Optional name of a job to search.
:param page_token: The optional page token pointing at the first first
job to return.
@@ -254,29 +251,15 @@ class DatabricksHook(BaseDatabricksHook):
"""
has_more = True
all_jobs = []
- use_token_pagination = (page_token is not None) or (offset is None)
- if offset is not None:
- warnings.warn(
- """You are using the deprecated offset parameter in list_jobs.
- It will be hard-limited at the maximum value of 1000 by
Databricks API after Oct 9, 2023.
- Please paginate using page_token instead.""",
- AirflowProviderDeprecationWarning,
- stacklevel=2,
- )
if page_token is None:
page_token = ""
- if offset is None:
- offset = 0
while has_more:
payload: dict[str, Any] = {
"limit": limit,
"expand_tasks": expand_tasks,
}
- if use_token_pagination:
- payload["page_token"] = page_token
- else: # offset pagination
- payload["offset"] = offset
+ payload["page_token"] = page_token
if job_name:
payload["name"] = job_name
response = self._do_api_call(LIST_JOBS_ENDPOINT, payload)
@@ -288,7 +271,6 @@ class DatabricksHook(BaseDatabricksHook):
has_more = response.get("has_more", False)
if has_more:
page_token = response.get("next_page_token", "")
- offset += len(jobs)
return all_jobs
diff --git a/airflow/providers/databricks/provider.yaml
b/airflow/providers/databricks/provider.yaml
index 17dda0b7e5..cefb777bec 100644
--- a/airflow/providers/databricks/provider.yaml
+++ b/airflow/providers/databricks/provider.yaml
@@ -23,6 +23,7 @@ description: |
suspended: false
versions:
+ - 5.0.0
- 4.7.0
- 4.6.0
- 4.5.0