Repository: incubator-airflow
Updated Branches:
  refs/heads/master 0f7ddbbed -> 4c41f6e96


[AIRFLOW-1016] Allow HTTP HEAD request method on HTTPSensor

This PR provides the HEAD http method on top of GET. This is useful
for getting responses without a body, and provides a lighter weight
response.

Closes #2175 from msempere/AIRFLOW-1016/allow-
http-head-request-method-on-httpsensor


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/4c41f6e9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/4c41f6e9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/4c41f6e9

Branch: refs/heads/master
Commit: 4c41f6e96e0fafd9eb409fa20fb5e62f70fa7f17
Parents: 0f7ddbb
Author: MSempere <[email protected]>
Authored: Wed Apr 5 08:09:32 2017 -0700
Committer: Arthur Wiedmer <[email protected]>
Committed: Wed Apr 5 08:10:20 2017 -0700

----------------------------------------------------------------------
 airflow/hooks/http_hook.py   |  7 +++-
 airflow/operators/sensors.py |  7 +++-
 tests/operators/sensors.py   | 73 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/airflow/hooks/http_hook.py
----------------------------------------------------------------------
diff --git a/airflow/hooks/http_hook.py b/airflow/hooks/http_hook.py
index 7cf9a24..041328f 100644
--- a/airflow/hooks/http_hook.py
+++ b/airflow/hooks/http_hook.py
@@ -66,6 +66,11 @@ class HttpHook(BaseHook):
                                    url,
                                    params=data,
                                    headers=headers)
+        elif self.method == 'HEAD':
+            # HEAD doesn't use params
+            req = requests.Request(self.method,
+                                   url,
+                                   headers=headers)
         else:
             # Others use data
             req = requests.Request(self.method,
@@ -100,7 +105,7 @@ class HttpHook(BaseHook):
             # to get reason and code for failure by checking first 3 chars
             # for the code, or do a split on ':'
             logging.error("HTTP error: " + response.reason)
-            if self.method != 'GET':
+            if self.method not in ('GET', 'HEAD'):
                 # The sensor uses GET, so this prevents filling up the log
                 # with the body every time the GET 'misses'.
                 # That's ok to do, because GETs should be repeatable and

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/airflow/operators/sensors.py
----------------------------------------------------------------------
diff --git a/airflow/operators/sensors.py b/airflow/operators/sensors.py
index ae50bc5..b561b49 100644
--- a/airflow/operators/sensors.py
+++ b/airflow/operators/sensors.py
@@ -629,6 +629,8 @@ class HttpSensor(BaseSensorOperator):
 
     :param http_conn_id: The connection to run the sensor against
     :type http_conn_id: string
+    :param method: The HTTP request method to use
+    :type method: string
     :param endpoint: The relative part of the full url
     :type endpoint: string
     :param params: The parameters to be added to the GET url
@@ -650,6 +652,7 @@ class HttpSensor(BaseSensorOperator):
     def __init__(self,
                  endpoint,
                  http_conn_id='http_default',
+                 method='GET',
                  params=None,
                  headers=None,
                  response_check=None,
@@ -662,7 +665,9 @@ class HttpSensor(BaseSensorOperator):
         self.extra_options = extra_options or {}
         self.response_check = response_check
 
-        self.hook = HttpHook(method='GET', http_conn_id=http_conn_id)
+        self.hook = HttpHook(
+            method=method,
+            http_conn_id=http_conn_id)
 
     def poke(self, context):
         logging.info('Poking: ' + self.endpoint)

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/4c41f6e9/tests/operators/sensors.py
----------------------------------------------------------------------
diff --git a/tests/operators/sensors.py b/tests/operators/sensors.py
index e77216b..ea1e6cc 100644
--- a/tests/operators/sensors.py
+++ b/tests/operators/sensors.py
@@ -19,6 +19,7 @@ import sys
 import time
 import unittest
 
+from mock import patch
 from datetime import datetime, timedelta
 
 from airflow import DAG, configuration
@@ -94,6 +95,14 @@ class SensorTimeoutTest(unittest.TestCase):
 
 
 class HttpSensorTests(unittest.TestCase):
+    def setUp(self):
+        configuration.load_test_config()
+        args = {
+            'owner': 'airflow',
+            'start_date': DEFAULT_DATE
+        }
+        dag = DAG(TEST_DAG_ID, default_args=args)
+        self.dag = dag
 
     def test_poke_exception(self):
         """
@@ -112,6 +121,70 @@ class HttpSensorTests(unittest.TestCase):
         with self.assertRaisesRegexp(AirflowException, 'AirflowException 
raised here!'):
             task.execute(None)
 
+    @patch("airflow.hooks.http_hook.requests.Session.send")
+    def test_head_method(self, mock_session_send):
+        def resp_check(resp):
+            return True
+
+        task = HttpSensor(
+            dag=self.dag,
+            task_id='http_sensor_head_method',
+            http_conn_id='http_default',
+            endpoint='',
+            params={},
+            method='HEAD',
+            response_check=resp_check,
+            timeout=5,
+            poke_interval=1)
+
+        import requests
+        task.execute(None)
+
+        args, kwargs = mock_session_send.call_args
+        received_request = args[0]
+
+        prep_request = requests.Request(
+            'HEAD',
+            'https://www.google.com',
+            {}).prepare()
+
+        self.assertEqual(prep_request.url, received_request.url)
+        self.assertTrue(prep_request.method, received_request.method)
+
+    @patch("airflow.hooks.http_hook.requests.Session.send")
+    @patch("airflow.hooks.http_hook.logging.error")
+    def test_logging_head_error_request(
+        self,
+        mock_error_logging,
+        mock_session_send
+    ):
+
+        def resp_check(resp):
+            return True
+
+        import requests
+        response = requests.Response()
+        response.status_code = 404
+        response.reason = 'Not Found'
+        mock_session_send.return_value = response
+
+        task = HttpSensor(
+            dag=self.dag,
+            task_id='http_sensor_head_method',
+            http_conn_id='http_default',
+            endpoint='',
+            params={},
+            method='HEAD',
+            response_check=resp_check,
+            timeout=5,
+            poke_interval=1)
+
+        with self.assertRaises(AirflowSensorTimeout):
+            task.execute(None)
+
+        self.assertTrue(mock_error_logging.called)
+        mock_error_logging.assert_called_with('HTTP error: Not Found')
+
 
 class HdfsSensorTests(unittest.TestCase):
 

Reply via email to