potiuk commented on a change in pull request #7613: [AIRFLOW-6978] Add
PubSubPullOperator
URL: https://github.com/apache/airflow/pull/7613#discussion_r387671400
##########
File path: airflow/providers/google/cloud/operators/pubsub.py
##########
@@ -674,3 +675,124 @@ def execute(self, context):
self.log.info("Publishing to topic %s", self.topic)
hook.publish(project_id=self.project_id, topic=self.topic,
messages=self.messages)
self.log.info("Published to topic %s", self.topic)
+
+
+class PubSubPullOperator(BaseOperator):
+ """Pulls messages from a PubSub subscription and passes them through XCom.
+ If the queue is empty, returns empty list - never waits for messages.
+ If you do need to wait, please use
+ :class:`airflow.providers.google.cloud.sensors.PubSubPullSensor`
+ instead.
+
+ .. seealso::
+ For more information on how to use this operator, take a look at the
guide:
+ :ref:`howto/operator:PubSubPullSensor`
+
+ This sensor operator will pull up to ``max_messages`` messages from the
+ specified PubSub subscription. When the subscription returns messages,
+ the poke method's criteria will be fulfilled and the messages will be
+ returned from the operator and passed through XCom for downstream tasks.
+
+ If ``ack_messages`` is set to True, messages will be immediately
+ acknowledged before being returned, otherwise, downstream tasks will be
+ responsible for acknowledging them.
+
+ ``project`` and ``subscription`` are templated so you can use
+ variables in them.
+
+ :param project: the GCP project ID for the subscription (templated)
+ :type project: str
+ :param subscription: the Pub/Sub subscription name. Do not include the
+ full subscription path.
+ :type subscription: str
+ :param max_messages: The maximum number of messages to retrieve per
+ PubSub pull request
+ :type max_messages: int
+ :param ack_messages: If True, each message will be acknowledged
+ immediately rather than by any downstream tasks
+ :type ack_messages: bool
+ :param gcp_conn_id: The connection ID to use connecting to
+ Google Cloud Platform.
+ :type gcp_conn_id: str
+ :param delegate_to: The account to impersonate, if any.
+ For this to work, the service account making the request
+ must have domain-wide delegation enabled.
+ :type delegate_to: str
+ :param messages_callback: (Optional) Callback to process received messages.
+ It's return value will be saved to XCom.
+ If you are pulling large messages, you probably want to provide a
custom callback.
+ If not provided, the default implementation will convert
`ReceivedMessage` objects
+ into JSON-serializable dicts using
`google.protobuf.json_format.MessageToDict` function.
+ :type messages_callback: Optional[Callable[[List[ReceivedMessage],
Dict[str, Any]], Any]]
+ """
+ template_fields = ['project_id', 'subscription']
+
+ @apply_defaults
+ def __init__(
+ self,
+ project_id: str,
+ subscription: str,
+ max_messages: int = 5,
+ ack_messages: bool = False,
+ messages_callback: Optional[Callable[[List[ReceivedMessage],
Dict[str, Any]], Any]] = None,
+ gcp_conn_id: str = 'google_cloud_default',
+ delegate_to: Optional[str] = None,
+ *args,
+ **kwargs
+ ) -> None:
+ super().__init__(*args, **kwargs)
+ self.gcp_conn_id = gcp_conn_id
+ self.delegate_to = delegate_to
+ self.project_id = project_id
+ self.subscription = subscription
+ self.max_messages = max_messages
+ self.ack_messages = ack_messages
+ self.messages_callback = messages_callback
+
+ def execute(self, context):
+ hook = PubSubHook(
+ gcp_conn_id=self.gcp_conn_id,
+ delegate_to=self.delegate_to,
+ )
+
+ pulled_messages = hook.pull(
+ project_id=self.project_id,
+ subscription=self.subscription,
+ max_messages=self.max_messages,
+ return_immediately=True,
+ )
+
+ handle_messages = self.messages_callback or
self._default_message_callback
+
+ ret = handle_messages(pulled_messages, context)
+
+ if pulled_messages and self.ack_messages:
+ hook.acknowledge(
+ project_id=self.project_id,
+ subscription=self.subscription,
+ messages=pulled_messages,
+ )
+
+ return ret
+
+ def _default_message_callback(
Review comment:
Nice!
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services