This is an automated email from the ASF dual-hosted git repository.
eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new caec4c7c5c Simplify hive client connection (#37043)
caec4c7c5c is described below
commit caec4c7c5c1bc249df7820da12ba929bed9f4dfa
Author: rom sharon <[email protected]>
AuthorDate: Sun Feb 4 11:36:25 2024 +0200
Simplify hive client connection (#37043)
* change hive proxy user
* remove extra from connection
* change documentation
* add breaking change and change version
* fix spell check
* fix docs
* Update airflow/providers/apache/hive/CHANGELOG.rst
* fix typo
* proxy user sent in operator will override proxy user in connection
* fix change log
---------
Co-authored-by: Elad Kalif <[email protected]>
---
airflow/providers/apache/hive/CHANGELOG.rst | 10 ++++++
airflow/providers/apache/hive/hooks/hive.py | 38 +++++++++++++++-------
airflow/providers/apache/hive/operators/hive.py | 6 ----
airflow/providers/apache/hive/provider.yaml | 1 +
.../connections/hive_cli.rst | 21 +++++-------
tests/providers/apache/hive/hooks/test_hive.py | 15 ++-------
6 files changed, 49 insertions(+), 42 deletions(-)
diff --git a/airflow/providers/apache/hive/CHANGELOG.rst
b/airflow/providers/apache/hive/CHANGELOG.rst
index 3ab9928de0..66836e6887 100644
--- a/airflow/providers/apache/hive/CHANGELOG.rst
+++ b/airflow/providers/apache/hive/CHANGELOG.rst
@@ -27,6 +27,16 @@
Changelog
---------
+7.0.0
+.....
+
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+* Remove the ability of specify a proxy user as an ``owner`` or ``login`` or
``as_param`` in the connection. Now, setting the user in ``Proxy User``
connection parameter or passing ``proxy_user`` to HiveHook will do the job.
+
+
6.4.2
.....
diff --git a/airflow/providers/apache/hive/hooks/hive.py
b/airflow/providers/apache/hive/hooks/hive.py
index 5bdaf8084c..d0dfa10c62 100644
--- a/airflow/providers/apache/hive/hooks/hive.py
+++ b/airflow/providers/apache/hive/hooks/hive.py
@@ -91,7 +91,6 @@ class HiveCliHook(BaseHook):
def __init__(
self,
hive_cli_conn_id: str = default_conn_name,
- run_as: str | None = None,
mapred_queue: str | None = None,
mapred_queue_priority: str | None = None,
mapred_job_name: str | None = None,
@@ -105,7 +104,6 @@ class HiveCliHook(BaseHook):
self.use_beeline: bool = conn.extra_dejson.get("use_beeline", False)
self.auth = auth
self.conn = conn
- self.run_as = run_as
self.sub_process: Any = None
if mapred_queue_priority:
mapred_queue_priority = mapred_queue_priority.upper()
@@ -119,20 +117,38 @@ class HiveCliHook(BaseHook):
self.mapred_job_name = mapred_job_name
self.proxy_user = proxy_user
+ @classmethod
+ def get_connection_form_widgets(cls) -> dict[str, Any]:
+ """Returns connection widgets to add to connection form."""
+ from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
+ from flask_babel import lazy_gettext
+ from wtforms import BooleanField, StringField
+
+ return {
+ "use_beeline": BooleanField(lazy_gettext("Use Beeline"),
default=False),
+ "proxy_user": StringField(lazy_gettext("Proxy User"),
widget=BS3TextFieldWidget(), default=""),
+ "principal": StringField(
+ lazy_gettext("Principal"), widget=BS3TextFieldWidget(),
default="hive/[email protected]"
+ ),
+ }
+
+ @classmethod
+ def get_ui_field_behaviour(cls) -> dict[str, Any]:
+ """Returns custom field behaviour."""
+ return {
+ "hidden_fields": ["extra"],
+ "relabeling": {},
+ }
+
def _get_proxy_user(self) -> str:
"""Set the proper proxy_user value in case the user overwrite the
default."""
conn = self.conn
-
- proxy_user_value: str = conn.extra_dejson.get("proxy_user", "")
- if proxy_user_value == "login" and conn.login:
- return f"hive.server2.proxy.user={conn.login}"
- if proxy_user_value == "owner" and self.run_as:
- return f"hive.server2.proxy.user={self.run_as}"
- if proxy_user_value == "as_param" and self.proxy_user:
+ if self.proxy_user is not None:
return f"hive.server2.proxy.user={self.proxy_user}"
- if proxy_user_value != "": # There is a custom proxy user
+ proxy_user_value: str = conn.extra_dejson.get("proxy_user", "")
+ if proxy_user_value != "":
return f"hive.server2.proxy.user={proxy_user_value}"
- return proxy_user_value # The default proxy user (undefined)
+ return ""
def _prepare_cli_cmd(self) -> list[Any]:
"""Create the command list from available information."""
diff --git a/airflow/providers/apache/hive/operators/hive.py
b/airflow/providers/apache/hive/operators/hive.py
index 8c7a7cf1e1..398cadce0d 100644
--- a/airflow/providers/apache/hive/operators/hive.py
+++ b/airflow/providers/apache/hive/operators/hive.py
@@ -54,7 +54,6 @@ class HiveOperator(BaseOperator):
object documentation for more details.
:param script_begin_tag: If defined, the operator will get rid of the
part of the script before the first occurrence of `script_begin_tag`
- :param run_as_owner: Run HQL code as a DAG's owner.
:param mapred_queue: queue used by the Hadoop CapacityScheduler.
(templated)
:param mapred_queue_priority: priority within CapacityScheduler queue.
Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW
@@ -91,7 +90,6 @@ class HiveOperator(BaseOperator):
hiveconfs: dict[Any, Any] | None = None,
hiveconf_jinja_translate: bool = False,
script_begin_tag: str | None = None,
- run_as_owner: bool = False,
mapred_queue: str | None = None,
mapred_queue_priority: str | None = None,
mapred_job_name: str | None = None,
@@ -107,9 +105,6 @@ class HiveOperator(BaseOperator):
self.hiveconfs = hiveconfs or {}
self.hiveconf_jinja_translate = hiveconf_jinja_translate
self.script_begin_tag = script_begin_tag
- self.run_as = None
- if run_as_owner:
- self.run_as = self.dag.owner
self.mapred_queue = mapred_queue
self.mapred_queue_priority = mapred_queue_priority
self.mapred_job_name = mapred_job_name
@@ -128,7 +123,6 @@ class HiveOperator(BaseOperator):
"""Get Hive cli hook."""
return HiveCliHook(
hive_cli_conn_id=self.hive_cli_conn_id,
- run_as=self.run_as,
mapred_queue=self.mapred_queue,
mapred_queue_priority=self.mapred_queue_priority,
mapred_job_name=self.mapred_job_name,
diff --git a/airflow/providers/apache/hive/provider.yaml
b/airflow/providers/apache/hive/provider.yaml
index 12dcf4f3ed..ea24c2b370 100644
--- a/airflow/providers/apache/hive/provider.yaml
+++ b/airflow/providers/apache/hive/provider.yaml
@@ -24,6 +24,7 @@ description: |
state: ready
source-date-epoch: 1705911912
versions:
+ - 7.0.0
- 6.4.2
- 6.4.1
- 6.4.0
diff --git a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst
b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst
index 2e23ec63d5..cc52f1db92 100644
--- a/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst
+++ b/docs/apache-airflow-providers-apache-hive/connections/hive_cli.rst
@@ -64,19 +64,14 @@ Schema (optional)
Specify your JDBC Hive database that you want to connect to with Beeline
or specify a schema for an HQL statement to run with the Hive CLI.
-Extra (optional)
- Specify the extra parameters (as json dictionary) that can be used in Hive
CLI connection.
- The following parameters are all optional:
-
- * ``use_beeline``
- Specify as ``True`` if using the Beeline CLI. Default is ``False``.
- * ``proxy_user``
- Specify a proxy user as an ``owner`` or ``login`` or ``as_param`` keep
blank if using a
- custom proxy user.
- When using ``owner`` you will want to pass the operator
``run_as_owner=True`` if you don't you will run the hql as user="owner"
- When using ``as_param`` you will want to pass the operator
``proxy_user=<some_user>`` if you don't you will run the hql as user="as_param"
- * ``principal``
- Specify the JDBC Hive principal to be used with Hive Beeline.
+Use Beeline (optional)
+ Specify as ``True`` if using the Beeline CLI. Default is ``False``.
+
+Proxy User (optional)
+ Specify a proxy user to run HQL code as this user.
+
+Principal (optional)
+ Specify the JDBC Hive principal to be used with Hive Beeline.
When specifying the connection in environment variable you should specify
diff --git a/tests/providers/apache/hive/hooks/test_hive.py
b/tests/providers/apache/hive/hooks/test_hive.py
index 461b101641..a137364767 100644
--- a/tests/providers/apache/hive/hooks/test_hive.py
+++ b/tests/providers/apache/hive/hooks/test_hive.py
@@ -880,20 +880,12 @@ class TestHiveCli:
self.nondefault_schema = "nondefault"
@pytest.mark.parametrize(
- "extra_dejson, correct_proxy_user, run_as, proxy_user",
+ "extra_dejson, correct_proxy_user, proxy_user",
[
- ({"proxy_user": "a_user_proxy"},
"hive.server2.proxy.user=a_user_proxy", None, None),
- ({"proxy_user": "owner"},
"hive.server2.proxy.user=dummy_dag_owner", "dummy_dag_owner", None),
- ({"proxy_user": "login"}, "hive.server2.proxy.user=admin", None,
None),
- (
- {"proxy_user": "as_param"},
- "hive.server2.proxy.user=param_proxy_user",
- None,
- "param_proxy_user",
- ),
+ ({"proxy_user": "a_user_proxy"},
"hive.server2.proxy.user=a_user_proxy", None),
],
)
- def test_get_proxy_user_value(self, extra_dejson, correct_proxy_user,
run_as, proxy_user):
+ def test_get_proxy_user_value(self, extra_dejson, correct_proxy_user,
proxy_user):
hook = MockHiveCliHook()
returner = mock.MagicMock()
returner.extra_dejson = extra_dejson
@@ -901,7 +893,6 @@ class TestHiveCli:
hook.use_beeline = True
hook.conn = returner
hook.proxy_user = proxy_user
- hook.run_as = run_as
# Run
result = hook._prepare_cli_cmd()