josix commented on code in PR #41554:
URL: https://github.com/apache/airflow/pull/41554#discussion_r1724699557


##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -393,3 +417,76 @@ def delete_vector_store_file(self, vector_store_id: str, 
file_id: str) -> Vector
         """
         response = 
self.conn.beta.vector_stores.files.delete(vector_store_id=vector_store_id, 
file_id=file_id)
         return response
+
+    def create_batch(
+        self,
+        file_id: str,
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", 
"/v1/completions"],
+        metadata: dict[str, str] | None = None,
+        completion_window: Literal["24h"] = "24h",
+    ) -> Batch:
+        """
+        Create a batch for a given model and files.
+
+        :param file_id: The ID of the file to be used for this batch.
+        :param endpoint: The endpoint to use for this batch. Allowed values 
include:
+            '/v1/chat/completions', '/v1/embeddings', '/v1/completions'.
+        :param metadata: A set of key-value pairs that can be attached to an 
object.
+        :param completion_window: The time window for the batch to complete. 
Default is 24 hours.
+        """
+        batch = self.conn.batches.create(
+            input_file_id=file_id, endpoint=endpoint, metadata=metadata, 
completion_window=completion_window
+        )
+        return batch
+
+    def get_batch(self, batch_id: str) -> Batch:
+        """
+        Get the status of a batch.
+
+        :param batch_id: The ID of the batch to get the status of.
+        """
+        batch = self.conn.batches.retrieve(batch_id=batch_id)
+        return batch
+
+    def wait_for_batch(self, batch_id: str, wait_seconds: float = 3, timeout: 
float = 3600) -> None:
+        """
+        Poll a batch to check if it finishes.
+
+        :param batch_id: Id of the Batch to wait for.
+        :param wait_seconds: Optional. Number of seconds between checks.
+        :param timeout: Optional. How many seconds wait for batch to be ready.
+            Used only if not ran in deferred operator.
+        """
+        start = time.monotonic()
+        while True:
+            if start + timeout < time.monotonic():
+                self.cancel_batch(batch_id=batch_id)
+                raise OpenAIBatchTimeout(f"Timeout: OpenAI Batch {batch_id} is 
not ready after {timeout}s")
+            time.sleep(wait_seconds)
+            batch = self.get_batch(batch_id=batch_id)
+
+            if BatchStatus.is_in_progress(batch.status):
+                continue
+            if batch.status == BatchStatus.COMPLETED:
+                return
+            if batch.status == BatchStatus.FAILED:
+                raise OpenAIBatchJobException(f"Batch failed - \n{batch_id}")
+            elif batch.status in (BatchStatus.CANCELLED, 
BatchStatus.CANCELLING):
+                raise OpenAIBatchJobException(f"Batch failed - batch was 
cancelled:\n{batch_id}")
+            elif batch.status == BatchStatus.EXPIRED:
+                raise OpenAIBatchJobException(
+                    f"Batch failed - batch couldn't be completed within the 
hour time window :\n{batch_id}"
+                )
+
+            raise OpenAIBatchJobException(
+                f"Batch failed - encountered unexpected status 
`{batch.status}` for batch_id `{batch_id}`"
+            )

Review Comment:
   Nice Catch!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to