Lee-W commented on code in PR #41554:
URL: https://github.com/apache/airflow/pull/41554#discussion_r1724641016


##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -393,3 +417,76 @@ def delete_vector_store_file(self, vector_store_id: str, 
file_id: str) -> Vector
         """
         response = 
self.conn.beta.vector_stores.files.delete(vector_store_id=vector_store_id, 
file_id=file_id)
         return response
+
+    def create_batch(
+        self,
+        file_id: str,
+        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", 
"/v1/completions"],
+        metadata: dict[str, str] | None = None,
+        completion_window: Literal["24h"] = "24h",
+    ) -> Batch:
+        """
+        Create a batch for a given model and files.
+
+        :param file_id: The ID of the file to be used for this batch.
+        :param endpoint: The endpoint to use for this batch. Allowed values 
include:
+            '/v1/chat/completions', '/v1/embeddings', '/v1/completions'.
+        :param metadata: A set of key-value pairs that can be attached to an 
object.
+        :param completion_window: The time window for the batch to complete. 
Default is 24 hours.
+        """
+        batch = self.conn.batches.create(
+            input_file_id=file_id, endpoint=endpoint, metadata=metadata, 
completion_window=completion_window
+        )
+        return batch
+
+    def get_batch(self, batch_id: str) -> Batch:
+        """
+        Get the status of a batch.
+
+        :param batch_id: The ID of the batch to get the status of.
+        """
+        batch = self.conn.batches.retrieve(batch_id=batch_id)
+        return batch
+
+    def wait_for_batch(self, batch_id: str, wait_seconds: float = 3, timeout: 
float = 3600) -> None:
+        """
+        Poll a batch to check if it finishes.
+
+        :param batch_id: Id of the Batch to wait for.
+        :param wait_seconds: Optional. Number of seconds between checks.
+        :param timeout: Optional. How many seconds wait for batch to be ready.
+            Used only if not ran in deferred operator.
+        """
+        start = time.monotonic()
+        while True:
+            if start + timeout < time.monotonic():
+                self.cancel_batch(batch_id=batch_id)
+                raise OpenAIBatchTimeout(f"Timeout: OpenAI Batch {batch_id} is 
not ready after {timeout}s")
+            time.sleep(wait_seconds)
+            batch = self.get_batch(batch_id=batch_id)
+
+            if BatchStatus.is_in_progress(batch.status):
+                continue
+            if batch.status == BatchStatus.COMPLETED:
+                return
+            if batch.status == BatchStatus.FAILED:
+                raise OpenAIBatchJobException(f"Batch failed - \n{batch_id}")
+            elif batch.status in (BatchStatus.CANCELLED, 
BatchStatus.CANCELLING):
+                raise OpenAIBatchJobException(f"Batch failed - batch was 
cancelled:\n{batch_id}")
+            elif batch.status == BatchStatus.EXPIRED:
+                raise OpenAIBatchJobException(
+                    f"Batch failed - batch couldn't be completed within the 
hour time window :\n{batch_id}"
+                )
+
+            raise OpenAIBatchJobException(
+                f"Batch failed - encountered unexpected status 
`{batch.status}` for batch_id `{batch_id}`"
+            )

Review Comment:
   ```suggestion
               if BatchStatus.is_in_progress(batch.status):
                   time.sleep(wait_seconds)
                   continue
               elif batch.status == BatchStatus.COMPLETED:
                   return
               elif batch.status == BatchStatus.FAILED:
                   raise OpenAIBatchJobException(f"Batch failed - \n{batch_id}")
               elif batch.status in (BatchStatus.CANCELLED, 
BatchStatus.CANCELLING):
                   raise OpenAIBatchJobException(f"Batch failed - batch was 
cancelled:\n{batch_id}")
               elif batch.status == BatchStatus.EXPIRED:
                   raise OpenAIBatchJobException(
                       f"Batch failed - batch couldn't be completed within the 
hour time window :\n{batch_id}"
                   )
               
               raise OpenAIBatchJobException(
                   f"Batch failed - encountered unexpected status 
`{batch.status}` for batch_id `{batch_id}`"
               )
   ```
   
   I think we can reduce one wait by this change and would be better if we 
could unify if, if-else usage



##########
airflow/providers/openai/exceptions.py:
##########
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from airflow.exceptions import AirflowException
+
+# Note: Any AirflowException raised is expected to cause the TaskInstance
+#       to be marked in an ERROR state

Review Comment:
   I'm okay, but just not sure whether we actually need this 🤔 



##########
tests/system/providers/openai/example_trigger_batch_operator.py:
##########
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from airflow.decorators import dag, task
+
+OPENAI_CONN_ID = "openai_default"
+
+POKEMONS = [
+    "pikachu",
+    "charmander",
+    "bulbasaur",

Review Comment:
   😆 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to