This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 522083cab4 Updated app to support configuring the caching hash method 
for FIPS v2 (#30675)
522083cab4 is described below

commit 522083cab4705789c79466933238e3874253c8a6
Author: Victor Chiapaikeo <[email protected]>
AuthorDate: Mon Apr 17 12:46:32 2023 -0400

    Updated app to support configuring the caching hash method for FIPS v2 
(#30675)
---
 airflow/config_templates/config.yml          |  7 ++++
 airflow/config_templates/default_airflow.cfg |  4 +++
 airflow/models/serialized_dag.py             |  4 +--
 airflow/utils/hashlib_wrapper.py             | 37 ++++++++++++++++++++
 airflow/www/app.py                           |  6 ++--
 airflow/www/extensions/init_cache.py         | 52 ++++++++++++++++++++++++++++
 newsfragments/28846.misc.rst                 |  1 +
 tests/www/test_app.py                        | 25 +++++++++++++
 8 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/airflow/config_templates/config.yml 
b/airflow/config_templates/config.yml
index 43bad45e41..bf88cdaa79 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -1692,6 +1692,13 @@ webserver:
       type: string
       example: ~
       default: "5 per 40 second"
+    caching_hash_method:
+      description: |
+        The caching algorithm used by the webserver. Must be a valid hashlib 
function name.
+      version_added:
+      type: string
+      example: "sha256"
+      default: "md5"
 
 email:
   description: |
diff --git a/airflow/config_templates/default_airflow.cfg 
b/airflow/config_templates/default_airflow.cfg
index e5d92429c0..684a78992d 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -856,6 +856,10 @@ auth_rate_limited = True
 # Rate limit for authentication endpoints.
 auth_rate_limit = 5 per 40 second
 
+# The caching algorithm used by the webserver. Must be a valid hashlib 
function name.
+# Example: caching_hash_method = sha256
+caching_hash_method = md5
+
 [email]
 
 # Configuration email backend and whether to
diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py
index a693279367..d3f49c64d3 100644
--- a/airflow/models/serialized_dag.py
+++ b/airflow/models/serialized_dag.py
@@ -18,7 +18,6 @@
 """Serialized DAG table in database."""
 from __future__ import annotations
 
-import hashlib
 import logging
 import zlib
 from datetime import datetime, timedelta
@@ -35,6 +34,7 @@ from airflow.models.dagrun import DagRun
 from airflow.serialization.serialized_objects import DagDependency, 
SerializedDAG
 from airflow.settings import COMPRESS_SERIALIZED_DAGS, 
MIN_SERIALIZED_DAG_UPDATE_INTERVAL, json
 from airflow.utils import timezone
+from airflow.utils.hashlib_wrapper import md5
 from airflow.utils.session import NEW_SESSION, provide_session
 from airflow.utils.sqlalchemy import UtcDateTime
 
@@ -102,7 +102,7 @@ class SerializedDagModel(Base):
         dag_data = SerializedDAG.to_dict(dag)
         dag_data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8")
 
-        self.dag_hash = hashlib.md5(dag_data_json).hexdigest()
+        self.dag_hash = md5(dag_data_json, usedforsecurity=False).hexdigest()
 
         if COMPRESS_SERIALIZED_DAGS:
             self._data = None
diff --git a/airflow/utils/hashlib_wrapper.py b/airflow/utils/hashlib_wrapper.py
new file mode 100644
index 0000000000..2415f3d00e
--- /dev/null
+++ b/airflow/utils/hashlib_wrapper.py
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import hashlib
+
+from airflow import PY39
+
+
+def md5(data: bytes, *, usedforsecurity: bool | None = None):
+    """
+    Safely allows calling the hashlib.md5 function with the "usedforsecurity" 
param.
+    :param data: The data to hash.
+    :param usedforsecurity: The value to pass to the md5 function's 
"usedforsecurity" param.
+        Defaults to None.
+    :return: The hashed value.
+    :rtype: _Hash
+    """
+    if PY39 and usedforsecurity is not None:
+        return hashlib.md5(data, usedforsecurity=usedforsecurity)  # type: 
ignore
+    else:
+        return hashlib.md5(data)
diff --git a/airflow/www/app.py b/airflow/www/app.py
index 5283718089..246381f003 100644
--- a/airflow/www/app.py
+++ b/airflow/www/app.py
@@ -19,11 +19,9 @@ from __future__ import annotations
 
 import warnings
 from datetime import timedelta
-from tempfile import gettempdir
 
 from flask import Flask
 from flask_appbuilder import SQLA
-from flask_caching import Cache
 from flask_wtf.csrf import CSRFProtect
 from markupsafe import Markup
 from sqlalchemy.engine.url import make_url
@@ -38,6 +36,7 @@ from airflow.settings import _ENABLE_AIP_44
 from airflow.utils.json import AirflowJsonProvider
 from airflow.www.extensions.init_appbuilder import init_appbuilder
 from airflow.www.extensions.init_appbuilder_links import init_appbuilder_links
+from airflow.www.extensions.init_cache import init_cache
 from airflow.www.extensions.init_dagbag import init_dagbag
 from airflow.www.extensions.init_jinja_globals import init_jinja_globals
 from airflow.www.extensions.init_manifest_files import configure_manifest_files
@@ -143,8 +142,7 @@ def create_app(config=None, testing=False):
 
     init_robots(flask_app)
 
-    cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", 
"CACHE_DIR": gettempdir()}
-    Cache(app=flask_app, config=cache_config)
+    init_cache(flask_app)
 
     init_flash_views(flask_app)
 
diff --git a/airflow/www/extensions/init_cache.py 
b/airflow/www/extensions/init_cache.py
new file mode 100644
index 0000000000..84d952dd71
--- /dev/null
+++ b/airflow/www/extensions/init_cache.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import hashlib
+from tempfile import gettempdir
+
+from flask_caching import Cache
+
+from airflow.configuration import conf
+from airflow.exceptions import AirflowConfigException
+
+HASH_METHOD_MAPPING = {
+    "md5": hashlib.md5,
+    "sha1": hashlib.sha1,
+    "sha224": hashlib.sha224,
+    "sha256": hashlib.sha256,
+    "sha384": hashlib.sha384,
+    "sha512": hashlib.sha512,
+}
+
+
+def init_cache(app):
+    webserver_caching_hash_method = conf.get(
+        section="webserver", key="CACHING_HASH_METHOD", fallback="md5"
+    ).casefold()
+    cache_config = {"CACHE_TYPE": "flask_caching.backends.filesystem", 
"CACHE_DIR": gettempdir()}
+
+    mapped_hash_method = HASH_METHOD_MAPPING.get(webserver_caching_hash_method)
+
+    if mapped_hash_method is None:
+        raise AirflowConfigException(
+            f"Unsupported webserver caching hash method: 
`{webserver_caching_hash_method}`."
+        )
+
+    cache_config["CACHE_OPTIONS"] = {"hash_method": mapped_hash_method}
+
+    Cache(app=app, config=cache_config)
diff --git a/newsfragments/28846.misc.rst b/newsfragments/28846.misc.rst
new file mode 100644
index 0000000000..14d072c877
--- /dev/null
+++ b/newsfragments/28846.misc.rst
@@ -0,0 +1 @@
+Various updates for FIPS-compliance when running Airflow in Python 3.9+. This 
includes a new webserver option, ``caching_hash_method``, for changing the 
default flask caching method.
diff --git a/tests/www/test_app.py b/tests/www/test_app.py
index 29b8653397..61bfae3e12 100644
--- a/tests/www/test_app.py
+++ b/tests/www/test_app.py
@@ -17,6 +17,7 @@
 # under the License.
 from __future__ import annotations
 
+import hashlib
 import runpy
 import sys
 from datetime import timedelta
@@ -27,6 +28,7 @@ from werkzeug.routing import Rule
 from werkzeug.test import create_environ
 from werkzeug.wrappers import Response
 
+from airflow.exceptions import AirflowConfigException
 from airflow.www import app as application
 from tests.test_utils.config import conf_vars
 from tests.test_utils.decorators import dont_initialize_flask_app_submodules
@@ -228,6 +230,29 @@ class TestApp:
             app = application.cached_app(testing=True)
         assert app.config["SESSION_COOKIE_SAMESITE"] == "Lax"
 
+    @pytest.mark.parametrize(
+        "hash_method, result, exception",
+        [
+            ("sha512", hashlib.sha512, None),
+            ("sha384", hashlib.sha384, None),
+            ("sha256", hashlib.sha256, None),
+            ("sha224", hashlib.sha224, None),
+            ("sha1", hashlib.sha1, None),
+            ("md5", hashlib.md5, None),
+            (None, hashlib.md5, None),
+            ("invalid", None, AirflowConfigException),
+        ],
+    )
+    @dont_initialize_flask_app_submodules
+    def test_should_respect_caching_hash_method(self, hash_method, result, 
exception):
+        with conf_vars({("webserver", "caching_hash_method"): hash_method}):
+            if exception:
+                with pytest.raises(expected_exception=exception):
+                    app = application.cached_app(testing=True)
+            else:
+                app = application.cached_app(testing=True)
+                assert next(iter(app.extensions["cache"])).cache._hash_method 
== result
+
 
 class TestFlaskCli:
     @dont_initialize_flask_app_submodules(skip_all_except=["init_appbuilder"])

Reply via email to