amoghrajesh commented on code in PR #68213:
URL: https://github.com/apache/airflow/pull/68213#discussion_r3381614428
##########
shared/observability/src/airflow_shared/observability/metrics/metrics_template.yaml:
##########
@@ -59,6 +59,27 @@ metrics:
legacy_name: "operator_successes_{operator_name}"
name_variables: ["operator_name"]
+ - name: "resumable_job.fresh_submit"
Review Comment:
Handled in [comments from
namrata](https://github.com/apache/airflow/pull/68213/commits/badd9e25d18a7b3819a7ba781555220319069f31)
##########
task-sdk/src/airflow/sdk/bases/resumablejobmixin.py:
##########
@@ -101,41 +107,70 @@ def execute_resumable(self, context: Context) -> Any:
Closing this window would require atomic "submit + persist", which is
not possible across
an external system boundary.
"""
- task_store = context.get("task_store")
-
- if task_store is None:
- self.log.warning("task_store not available in context, crash
recovery is disabled for this run")
- else:
- external_id = task_store.get(self.external_id_key)
- if external_id:
- status = self.get_job_status(external_id, context)
- if self.is_job_active(status):
- self.log.info(
- "Reconnecting to existing job",
- external_id_key=self.external_id_key,
- external_id=external_id,
- status=status,
- )
- return self.poll_until_complete(external_id, context)
- if self.is_job_succeeded(status):
- self.log.info(
- "Job already completed successfully, skipping
resubmission",
- external_id_key=self.external_id_key,
- external_id=external_id,
- )
- return self.get_job_result(external_id, context)
+ operator_tag = {"operator": type(self).__name__}
+ reconnect_to: Any = None
+ already_succeeded_id: Any = None
Review Comment:
Handled in [comments from
namrata](https://github.com/apache/airflow/pull/68213/commits/badd9e25d18a7b3819a7ba781555220319069f31)
##########
task-sdk/tests/task_sdk/bases/test_resumablemixin.py:
##########
@@ -200,6 +205,100 @@ class CustomKeyOp(ConcreteResumableOperator):
assert task_state.get("my_custom_key") == "job-001"
+class TestMetrics:
+ _PATCH = "airflow.sdk._shared.observability.metrics.stats.incr"
+ _TAG = {"operator": "ConcreteResumableOperator"}
+
+ def test_fresh_submit_fires_only_fresh_submit_counter(self):
+ op = ConcreteResumableOperator(task_id="test_task")
+ mock_incr = MagicMock()
+ with patch(self._PATCH, mock_incr):
+ op.execute_resumable(make_context(FakeTaskState()))
+ called_names = [call.args[0] for call in mock_incr.call_args_list]
+ assert called_names == ["resumable_job.fresh_submit"]
+ mock_incr.assert_called_once_with("resumable_job.fresh_submit",
tags=self._TAG)
+
+ def test_reconnect_fires_attempt_and_success(self):
+ op = ConcreteResumableOperator(task_id="test_task")
+ op._status_map["job-001"] = "RUNNING"
+ mock_incr = MagicMock()
+ with patch(self._PATCH, mock_incr):
+ op.execute_resumable(make_context(FakeTaskState({"test_job_id":
"job-001"})))
+ called_names = [call.args[0] for call in mock_incr.call_args_list]
+ assert "resumable_job.reconnect_attempt" in called_names
+ assert "resumable_job.reconnect_success" in called_names
+ assert "resumable_job.fresh_submit" not in called_names
+
+ @pytest.mark.parametrize("status", ["SUCCEEDED", "FAILED"])
+ def test_reconnect_attempt_without_success_when_job_not_active(self,
status):
Review Comment:
Handled in [comments from
namrata](https://github.com/apache/airflow/pull/68213/commits/badd9e25d18a7b3819a7ba781555220319069f31)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]