ashb commented on a change in pull request #12925:
URL: https://github.com/apache/airflow/pull/12925#discussion_r648695501



##########
File path: airflow/providers/grafana/log/loki_task_handler.py
##########
@@ -0,0 +1,242 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Loki logging handler for tasks"""
+import time
+from typing import Dict, Optional, Tuple, Union
+
+import logging_loki
+import requests
+from cached_property import cached_property
+
+from airflow.hooks.base_hook import BaseHook
+from airflow.models import TaskInstance
+from airflow.utils.log.file_task_handler import FileTaskHandler
+from airflow.utils.log.logging_mixin import LoggingMixin
+
+DEFAULT_LOGGER_NAME = "airflow"
+
+
+class LokiTaskHandler(FileTaskHandler, LoggingMixin):
+    """
+    LokiTaskHandler that directly makes Loki logging API calls while reading 
and writing logs.
+    This is a Python standard ``logging`` handler using that can be used to 
route Python standard
+    logging messages directly to the Loki Logging API. It can also be used to 
save logs for
+    executing tasks. To do this, you should set as a handler with the name 
"tasks". In this case,
+    it will also be used to read the log for display in Web UI.
+    :param base_log_folder: Base log folder to place logs (incase Loki is 
down).
+    :type base_log_folder: str
+    :param filename_template: template filename string (incase Loki is down)
+    :type filename_template: str
+    :param loki_conn_id: Connection ID that will be used for authorization to 
the Loki Platform.
+    :type loki_conn_id: str
+    :param name: the name of the custom log in Loki Logging. Defaults to 
'airflow'.
+    :type name: str
+    :param labels: (Optional) Mapping of labels for the entry.
+    :type labels: dict
+    """
+
+    # pylint: disable=too-many-arguments
+    def __init__(
+        self,
+        base_log_folder: str,
+        filename_template: str,
+        loki_conn_id: str,
+        name: str = DEFAULT_LOGGER_NAME,
+        labels: Optional[Dict[str, str]] = None,
+    ):
+        super().__init__(base_log_folder, filename_template)
+        self.loki_conn_id = loki_conn_id
+        self.name: str = name
+        self.timestamp_pattern = "%Y-%m-%dT%H:%M:%S"
+        self.labels = labels
+        self._session: Optional[requests.Session] = None
+
+    @cached_property
+    def get_conn(self):
+        """Loki connection for client"""
+        return BaseHook.get_connection(self.loki_conn_id)
+
+    @property
+    def session(self) -> requests.Session:
+        """Create HTTP session"""
+        if self._session is None:
+            self._session = requests.Session()
+            self._session.auth = (self.get_conn.login, self.get_conn.password) 
or None
+        return self._session
+
+    def is_loki_alive(self):
+        """Checks whether Loki is ready for pushing/pulling logs"""
+        try:
+            status = self.session.get(
+                f"{self.get_conn.host}/ready",
+            )

Review comment:
       ```suggestion
               status = self.http_session.get(f"{self.get_conn.host}/ready")
   ```

##########
File path: airflow/providers/grafana/log/loki_task_handler.py
##########
@@ -0,0 +1,242 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Loki logging handler for tasks"""
+import time
+from typing import Dict, Optional, Tuple, Union
+
+import logging_loki
+import requests
+from cached_property import cached_property
+
+from airflow.hooks.base_hook import BaseHook
+from airflow.models import TaskInstance
+from airflow.utils.log.file_task_handler import FileTaskHandler
+from airflow.utils.log.logging_mixin import LoggingMixin
+
+DEFAULT_LOGGER_NAME = "airflow"
+
+
+class LokiTaskHandler(FileTaskHandler, LoggingMixin):
+    """
+    LokiTaskHandler that directly makes Loki logging API calls while reading 
and writing logs.
+    This is a Python standard ``logging`` handler using that can be used to 
route Python standard
+    logging messages directly to the Loki Logging API. It can also be used to 
save logs for
+    executing tasks. To do this, you should set as a handler with the name 
"tasks". In this case,
+    it will also be used to read the log for display in Web UI.
+    :param base_log_folder: Base log folder to place logs (incase Loki is 
down).
+    :type base_log_folder: str
+    :param filename_template: template filename string (incase Loki is down)
+    :type filename_template: str
+    :param loki_conn_id: Connection ID that will be used for authorization to 
the Loki Platform.
+    :type loki_conn_id: str
+    :param name: the name of the custom log in Loki Logging. Defaults to 
'airflow'.
+    :type name: str
+    :param labels: (Optional) Mapping of labels for the entry.
+    :type labels: dict
+    """
+
+    # pylint: disable=too-many-arguments
+    def __init__(
+        self,
+        base_log_folder: str,
+        filename_template: str,
+        loki_conn_id: str,
+        name: str = DEFAULT_LOGGER_NAME,
+        labels: Optional[Dict[str, str]] = None,
+    ):
+        super().__init__(base_log_folder, filename_template)
+        self.loki_conn_id = loki_conn_id
+        self.name: str = name
+        self.timestamp_pattern = "%Y-%m-%dT%H:%M:%S"
+        self.labels = labels
+        self._session: Optional[requests.Session] = None
+
+    @cached_property
+    def get_conn(self):
+        """Loki connection for client"""
+        return BaseHook.get_connection(self.loki_conn_id)
+
+    @property
+    def session(self) -> requests.Session:
+        """Create HTTP session"""
+        if self._session is None:
+            self._session = requests.Session()
+            self._session.auth = (self.get_conn.login, self.get_conn.password) 
or None
+        return self._session

Review comment:
       Session in Airflow commonly refers to the SQLAlchemy DB session, so lets 
rename this:
   
   ```suggestion
       @cached_property
       def http_session(self) -> requests.Session:
           """Create HTTP session"""
           session = requests.Session()
           session.auth = (self.get_conn.login, self.get_conn.password) or None
           return session
   ```

##########
File path: airflow/providers/grafana/log/loki_task_handler.py
##########
@@ -0,0 +1,242 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Loki logging handler for tasks"""
+import time
+from typing import Dict, Optional, Tuple, Union
+
+import logging_loki
+import requests
+from cached_property import cached_property
+
+from airflow.hooks.base_hook import BaseHook
+from airflow.models import TaskInstance
+from airflow.utils.log.file_task_handler import FileTaskHandler
+from airflow.utils.log.logging_mixin import LoggingMixin
+
+DEFAULT_LOGGER_NAME = "airflow"
+
+
+class LokiTaskHandler(FileTaskHandler, LoggingMixin):
+    """
+    LokiTaskHandler that directly makes Loki logging API calls while reading 
and writing logs.
+    This is a Python standard ``logging`` handler using that can be used to 
route Python standard
+    logging messages directly to the Loki Logging API. It can also be used to 
save logs for
+    executing tasks. To do this, you should set as a handler with the name 
"tasks". In this case,
+    it will also be used to read the log for display in Web UI.
+    :param base_log_folder: Base log folder to place logs (incase Loki is 
down).
+    :type base_log_folder: str
+    :param filename_template: template filename string (incase Loki is down)
+    :type filename_template: str
+    :param loki_conn_id: Connection ID that will be used for authorization to 
the Loki Platform.
+    :type loki_conn_id: str
+    :param name: the name of the custom log in Loki Logging. Defaults to 
'airflow'.
+    :type name: str
+    :param labels: (Optional) Mapping of labels for the entry.
+    :type labels: dict
+    """
+
+    # pylint: disable=too-many-arguments
+    def __init__(
+        self,
+        base_log_folder: str,
+        filename_template: str,
+        loki_conn_id: str,
+        name: str = DEFAULT_LOGGER_NAME,
+        labels: Optional[Dict[str, str]] = None,
+    ):
+        super().__init__(base_log_folder, filename_template)
+        self.loki_conn_id = loki_conn_id
+        self.name: str = name
+        self.timestamp_pattern = "%Y-%m-%dT%H:%M:%S"
+        self.labels = labels
+        self._session: Optional[requests.Session] = None
+
+    @cached_property
+    def get_conn(self):
+        """Loki connection for client"""
+        return BaseHook.get_connection(self.loki_conn_id)
+
+    @property
+    def session(self) -> requests.Session:
+        """Create HTTP session"""
+        if self._session is None:
+            self._session = requests.Session()

Review comment:
       You should set a timeout on this session, otherwise the 
`loki_is_alive()` check may hang for a long long time.

##########
File path: airflow/providers/grafana/log/loki_task_handler.py
##########
@@ -0,0 +1,242 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Loki logging handler for tasks"""
+import time
+from typing import Dict, Optional, Tuple, Union
+
+import logging_loki
+import requests
+from cached_property import cached_property
+
+from airflow.hooks.base_hook import BaseHook
+from airflow.models import TaskInstance
+from airflow.utils.log.file_task_handler import FileTaskHandler
+from airflow.utils.log.logging_mixin import LoggingMixin
+
+DEFAULT_LOGGER_NAME = "airflow"
+
+
+class LokiTaskHandler(FileTaskHandler, LoggingMixin):
+    """
+    LokiTaskHandler that directly makes Loki logging API calls while reading 
and writing logs.
+    This is a Python standard ``logging`` handler using that can be used to 
route Python standard
+    logging messages directly to the Loki Logging API. It can also be used to 
save logs for
+    executing tasks. To do this, you should set as a handler with the name 
"tasks". In this case,
+    it will also be used to read the log for display in Web UI.
+    :param base_log_folder: Base log folder to place logs (incase Loki is 
down).
+    :type base_log_folder: str
+    :param filename_template: template filename string (incase Loki is down)
+    :type filename_template: str
+    :param loki_conn_id: Connection ID that will be used for authorization to 
the Loki Platform.
+    :type loki_conn_id: str
+    :param name: the name of the custom log in Loki Logging. Defaults to 
'airflow'.
+    :type name: str
+    :param labels: (Optional) Mapping of labels for the entry.
+    :type labels: dict
+    """
+
+    # pylint: disable=too-many-arguments
+    def __init__(
+        self,
+        base_log_folder: str,
+        filename_template: str,
+        loki_conn_id: str,
+        name: str = DEFAULT_LOGGER_NAME,
+        labels: Optional[Dict[str, str]] = None,
+    ):
+        super().__init__(base_log_folder, filename_template)
+        self.loki_conn_id = loki_conn_id
+        self.name: str = name
+        self.timestamp_pattern = "%Y-%m-%dT%H:%M:%S"
+        self.labels = labels
+        self._session: Optional[requests.Session] = None
+
+    @cached_property
+    def get_conn(self):
+        """Loki connection for client"""
+        return BaseHook.get_connection(self.loki_conn_id)
+
+    @property
+    def session(self) -> requests.Session:
+        """Create HTTP session"""
+        if self._session is None:
+            self._session = requests.Session()
+            self._session.auth = (self.get_conn.login, self.get_conn.password) 
or None
+        return self._session
+
+    def is_loki_alive(self):
+        """Checks whether Loki is ready for pushing/pulling logs"""
+        try:
+            status = self.session.get(
+                f"{self.get_conn.host}/ready",
+            )
+            return status.status_code
+        except ConnectionError as error_msg:
+            self.log.exception(error_msg)
+            return None
+
+    @staticmethod
+    def _task_label(task_instance: TaskInstance) -> Dict[str, str]:
+        """
+        Returns task instance labels for Loki which will use while reading
+        and writing logs from loki.
+        :param task_instance: task instance object
+        :type: task_instance: TaskInstance
+        """
+        # Not adding execution date since it violates Loki label standards
+        # 
https://grafana.com/blog/2020/08/27/the-concise-guide-to-labels-in-loki/
+
+        return {
+            "airflow_dag_id": task_instance.dag_id,
+            "airflow_task_id": task_instance.task_id,
+            "airflow_try_number": str(task_instance.try_number),
+        }
+
+    def get_label(self, task_instance: TaskInstance) -> Dict[str, str]:
+        """
+        Update task_labels with optional labels and return Loki labels.
+        :param task_instance: task instance object
+        :type: task_instance: TaskInstance
+        """
+        tags = {}
+        task_labels = self._task_label(task_instance)
+        if self.labels:
+            tags.update(self.labels)
+            tags.update(task_labels)
+            return tags
+        return task_labels

Review comment:
       ```suggestion
           if self.labels:
               tags = copy.copy(self.labels)
           else:
               tags = {}
           tags.update(self._task_label(task_instance))
           return tags
   ```
   
   You'll need to `import copy` too.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to