josix commented on code in PR #41554:
URL: https://github.com/apache/airflow/pull/41554#discussion_r1723089576
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -393,3 +413,76 @@ def delete_vector_store_file(self, vector_store_id: str,
file_id: str) -> Vector
"""
response =
self.conn.beta.vector_stores.files.delete(vector_store_id=vector_store_id,
file_id=file_id)
return response
+
+ def create_batch(
+ self,
+ file_id: str,
+ endpoint: Literal["/v1/chat/completions", "/v1/embeddings",
"/v1/completions"],
+ metadata: dict[str, str] | None = None,
+ completion_window: Literal["24h"] = "24h",
+ ) -> Batch:
+ """
+ Create a batch for a given model and files.
+
+ :param file_id: The ID of the file to be used for this batch.
+ :param endpoint: The endpoint to use for this batch. Allowed values
include:
+ '/v1/chat/completions', '/v1/embeddings', '/v1/completions'.
+ :param metadata: A set of key-value pairs that can be attached to an
object.
+ :param completion_window: The time window for the batch to complete.
Default is 24 hours.
+ """
+ batch = self.conn.batches.create(
+ input_file_id=file_id, endpoint=endpoint, metadata=metadata,
completion_window=completion_window
+ )
+ return batch
+
+ def get_batch(self, batch_id: str) -> Batch:
+ """
+ Get the status of a batch.
+
+ :param batch_id: The ID of the batch to get the status of.
+ """
+ batch = self.conn.batches.retrieve(batch_id=batch_id)
+ return batch
+
+ def wait_for_batch(self, batch_id: str, wait_seconds: float = 3, timeout:
float = 3600) -> None:
+ """
+ Poll a batch to check if it finishes.
+
+ :param batch_id: Id of the Batch to wait for.
+ :param wait_seconds: Optional. Number of seconds between checks.
+ :param timeout: Optional. How many seconds wait for batch to be ready.
+ Used only if not ran in deferred operator.
+ """
+ start = time.monotonic()
+ while True:
+ if start + timeout < time.monotonic():
+ self.cancel_batch(batch_id=batch_id)
+ raise AirflowException(f"Timeout: OpenAI Batch {batch_id} is
not ready after {timeout}s")
+ time.sleep(wait_seconds)
+ batch = self.get_batch(batch_id=batch_id)
+
+ if batch.status in {BatchStatus.IN_PROGRESS,
BatchStatus.VALIDATING, BatchStatus.FINALIZING}:
Review Comment:
yeah, it's a minor difference that it would run in sequential lookup or hash
lookup. Both are okay to me.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]