Repository: incubator-airflow Updated Branches: refs/heads/master 0f7ddbbed -> 4c41f6e96
[AIRFLOW-1016] Allow HTTP HEAD request method on HTTPSensor This PR provides the HEAD http method on top of GET. This is useful for getting responses without a body, and provides a lighter weight response. Closes #2175 from msempere/AIRFLOW-1016/allow- http-head-request-method-on-httpsensor Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/4c41f6e9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/4c41f6e9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/4c41f6e9 Branch: refs/heads/master Commit: 4c41f6e96e0fafd9eb409fa20fb5e62f70fa7f17 Parents: 0f7ddbb Author: MSempere <[email protected]> Authored: Wed Apr 5 08:09:32 2017 -0700 Committer: Arthur Wiedmer <[email protected]> Committed: Wed Apr 5 08:10:20 2017 -0700 ---------------------------------------------------------------------- airflow/hooks/http_hook.py | 7 +++- airflow/operators/sensors.py | 7 +++- tests/operators/sensors.py | 73 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/airflow/hooks/http_hook.py ---------------------------------------------------------------------- diff --git a/airflow/hooks/http_hook.py b/airflow/hooks/http_hook.py index 7cf9a24..041328f 100644 --- a/airflow/hooks/http_hook.py +++ b/airflow/hooks/http_hook.py @@ -66,6 +66,11 @@ class HttpHook(BaseHook): url, params=data, headers=headers) + elif self.method == 'HEAD': + # HEAD doesn't use params + req = requests.Request(self.method, + url, + headers=headers) else: # Others use data req = requests.Request(self.method, @@ -100,7 +105,7 @@ class HttpHook(BaseHook): # to get reason and code for failure by checking first 3 chars # for the code, or do a split on ':' logging.error("HTTP error: " + response.reason) - if self.method != 'GET': + if self.method not in ('GET', 'HEAD'): # The sensor uses GET, so this prevents filling up the log # with the body every time the GET 'misses'. # That's ok to do, because GETs should be repeatable and http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/airflow/operators/sensors.py ---------------------------------------------------------------------- diff --git a/airflow/operators/sensors.py b/airflow/operators/sensors.py index ae50bc5..b561b49 100644 --- a/airflow/operators/sensors.py +++ b/airflow/operators/sensors.py @@ -629,6 +629,8 @@ class HttpSensor(BaseSensorOperator): :param http_conn_id: The connection to run the sensor against :type http_conn_id: string + :param method: The HTTP request method to use + :type method: string :param endpoint: The relative part of the full url :type endpoint: string :param params: The parameters to be added to the GET url @@ -650,6 +652,7 @@ class HttpSensor(BaseSensorOperator): def __init__(self, endpoint, http_conn_id='http_default', + method='GET', params=None, headers=None, response_check=None, @@ -662,7 +665,9 @@ class HttpSensor(BaseSensorOperator): self.extra_options = extra_options or {} self.response_check = response_check - self.hook = HttpHook(method='GET', http_conn_id=http_conn_id) + self.hook = HttpHook( + method=method, + http_conn_id=http_conn_id) def poke(self, context): logging.info('Poking: ' + self.endpoint) http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/tests/operators/sensors.py ---------------------------------------------------------------------- diff --git a/tests/operators/sensors.py b/tests/operators/sensors.py index e77216b..ea1e6cc 100644 --- a/tests/operators/sensors.py +++ b/tests/operators/sensors.py @@ -19,6 +19,7 @@ import sys import time import unittest +from mock import patch from datetime import datetime, timedelta from airflow import DAG, configuration @@ -94,6 +95,14 @@ class SensorTimeoutTest(unittest.TestCase): class HttpSensorTests(unittest.TestCase): + def setUp(self): + configuration.load_test_config() + args = { + 'owner': 'airflow', + 'start_date': DEFAULT_DATE + } + dag = DAG(TEST_DAG_ID, default_args=args) + self.dag = dag def test_poke_exception(self): """ @@ -112,6 +121,70 @@ class HttpSensorTests(unittest.TestCase): with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'): task.execute(None) + @patch("airflow.hooks.http_hook.requests.Session.send") + def test_head_method(self, mock_session_send): + def resp_check(resp): + return True + + task = HttpSensor( + dag=self.dag, + task_id='http_sensor_head_method', + http_conn_id='http_default', + endpoint='', + params={}, + method='HEAD', + response_check=resp_check, + timeout=5, + poke_interval=1) + + import requests + task.execute(None) + + args, kwargs = mock_session_send.call_args + received_request = args[0] + + prep_request = requests.Request( + 'HEAD', + 'https://www.google.com', + {}).prepare() + + self.assertEqual(prep_request.url, received_request.url) + self.assertTrue(prep_request.method, received_request.method) + + @patch("airflow.hooks.http_hook.requests.Session.send") + @patch("airflow.hooks.http_hook.logging.error") + def test_logging_head_error_request( + self, + mock_error_logging, + mock_session_send + ): + + def resp_check(resp): + return True + + import requests + response = requests.Response() + response.status_code = 404 + response.reason = 'Not Found' + mock_session_send.return_value = response + + task = HttpSensor( + dag=self.dag, + task_id='http_sensor_head_method', + http_conn_id='http_default', + endpoint='', + params={}, + method='HEAD', + response_check=resp_check, + timeout=5, + poke_interval=1) + + with self.assertRaises(AirflowSensorTimeout): + task.execute(None) + + self.assertTrue(mock_error_logging.called) + mock_error_logging.assert_called_with('HTTP error: Not Found') + class HdfsSensorTests(unittest.TestCase):
