ferruzzi commented on code in PR #56936:
URL: https://github.com/apache/airflow/pull/56936#discussion_r2457119334


##########
providers/amazon/src/airflow/providers/amazon/aws/operators/ssm.py:
##########
@@ -129,3 +132,91 @@ def execute(self, context: Context):
                 )
 
         return command_id
+
+
+class SsmGetCommandInvocationOperator(AwsBaseOperator[SsmHook]):
+    """
+    Retrieves the output and execution details of an SSM command invocation.
+
+    This operator allows you to fetch the standard output, standard error, 
execution status,
+    and other details from SSM commands. It can be used to retrieve output 
from commands
+    executed by SsmRunCommandOperator in previous tasks, or from commands 
executed outside
+    of Airflow entirely.
+
+    The operator returns structured data including stdout, stderr, execution 
times, and
+    status information for each instance that executed the command.
+
+    .. seealso::
+        For more information on how to use this operator, take a look at the 
guide:
+        :ref:`howto/operator:SsmGetCommandInvocationOperator`
+
+    :param command_id: The ID of the SSM command to retrieve output for.
+    :param instance_id: The ID of the specific instance to retrieve output for.
+        If not provided, retrieves output from all instances that executed the 
command.
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. 
If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default 
boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore 
client. See:
+        
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
+    """
+
+    aws_hook_class = SsmHook
+    template_fields: Sequence[str] = aws_template_fields(
+        "command_id",
+        "instance_id",
+    )
+
+    def __init__(
+        self,
+        *,
+        command_id: str,
+        instance_id: str | None = None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.command_id = command_id
+        self.instance_id = instance_id
+
+    def execute(self, context: Context) -> dict[str, Any]:
+        """Execute the operator to retrieve command invocation output."""
+        if self.instance_id:
+            self.log.info(
+                "Retrieving output for command %s on instance %s", 
self.command_id, self.instance_id
+            )
+            invocations = [{"InstanceId": self.instance_id}]
+        else:
+            self.log.info("Retrieving output for command %s from all 
instances", self.command_id)
+            response = self.hook.list_command_invocations(self.command_id)
+            invocations = response.get("CommandInvocations", [])
+
+        # TBD: Do we return formatted or RAW API output? 
+        # My rationalle for going formatted: Raw AWS API response contains a 
lot of metadata that's typically not needed for downstream tasks.

Review Comment:
   I can get behind this.  Feel free to remove the TBD comment and resolve this 
conversation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to