Lee-W commented on code in PR #39248:
URL: https://github.com/apache/airflow/pull/39248#discussion_r1580846341
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -257,11 +287,137 @@ def create_embeddings(
model: str = "text-embedding-ada-002",
**kwargs: Any,
) -> list[float]:
- """Generate embeddings for the given text using the given model.
+ """
+ Generate embeddings for the given text using the given model.
:param text: The text to generate embeddings for.
:param model: The model to use for generating embeddings.
"""
response = self.conn.embeddings.create(model=model, input=text,
**kwargs)
embeddings: list[float] = response.data[0].embedding
return embeddings
+
+ def upload_file(self, file: str, purpose: Literal["fine-tune",
"assistants"]) -> FileObject:
+ """
+ Upload a file that can be used across various endpoints. The size of
all the files uploaded by one organization can be up to 100 GB.
+
+ :param file: The File object (not file name) to be uploaded.
+ :param purpose: The intended purpose of the uploaded file. Use
"fine-tune" for
+ Fine-tuning and "assistants" for Assistants and Messages.
+ """
+ file_object = self.conn.files.create(file=open(file, "rb"),
purpose=purpose)
+ return file_object
+
+ def get_file(self, file_id: str) -> FileObject:
+ """
+ Return information about a specific file.
+
+ :param file_id: The ID of the file to use for this request.
+ """
+ file = self.conn.files.retrieve(file_id=file_id)
+ return file
+
+ def get_files(self) -> list[FileObject]:
+ """Return a list of files that belong to the user's organization."""
+ files = self.conn.files.list()
+ return files.data
+
+ def get_file_by_name(self, file_name: str) -> FileObject | None:
+ """
+ Get an OpenAI Assistant object for a given name.
+
+ :param file_name: The name of the file object to retrieve
+ """
+ files = self.get_files()
+ return next((file for file in files if file.filename == file_name),
None)
+
+ def delete_file(self, file_id) -> FileDeleted:
+ """
+ Delete a file.
+
+ :param file_id: The ID of the file to be deleted.
+ """
+ response = self.conn.files.delete(file_id=file_id)
+ return response
+
+ def create_vector_store(self, **kwargs) -> VectorStore:
Review Comment:
```suggestion
def create_vector_store(self, **kwargs: Any) -> VectorStore:
```
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -257,11 +287,137 @@ def create_embeddings(
model: str = "text-embedding-ada-002",
**kwargs: Any,
) -> list[float]:
- """Generate embeddings for the given text using the given model.
+ """
+ Generate embeddings for the given text using the given model.
:param text: The text to generate embeddings for.
:param model: The model to use for generating embeddings.
"""
response = self.conn.embeddings.create(model=model, input=text,
**kwargs)
embeddings: list[float] = response.data[0].embedding
return embeddings
+
+ def upload_file(self, file: str, purpose: Literal["fine-tune",
"assistants"]) -> FileObject:
+ """
+ Upload a file that can be used across various endpoints. The size of
all the files uploaded by one organization can be up to 100 GB.
+
+ :param file: The File object (not file name) to be uploaded.
+ :param purpose: The intended purpose of the uploaded file. Use
"fine-tune" for
+ Fine-tuning and "assistants" for Assistants and Messages.
+ """
+ file_object = self.conn.files.create(file=open(file, "rb"),
purpose=purpose)
Review Comment:
Should we use a context manger for `open`? e.g.,
```python
with open(...):
file_object...
```
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -133,26 +143,26 @@ def get_assistants(self, **kwargs: Any) ->
list[Assistant]:
return assistants.data
def get_assistant_by_name(self, assistant_name: str) -> Assistant | None:
- """Get an OpenAI Assistant object for a given name.
+ """
+ Get an OpenAI Assistant object for a given name.
:param assistant_name: The name of the assistant to retrieve
"""
- response = self.get_assistants()
- for assistant in response:
- if assistant.name == assistant_name:
- return assistant
- return None
+ assistants = self.get_assistants()
+ return next((assistant for assistant in assistants if assistant.name
== assistant_name), None)
Review Comment:
I'm not sure. I feel like it's a bit harder to read 🤔 Is there any reason we
need to do it this way? (maybe performance?) also, I just notice this only
returns the first found. is it possible to have multiple ones with the same
name?
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -257,11 +287,137 @@ def create_embeddings(
model: str = "text-embedding-ada-002",
**kwargs: Any,
) -> list[float]:
- """Generate embeddings for the given text using the given model.
+ """
+ Generate embeddings for the given text using the given model.
:param text: The text to generate embeddings for.
:param model: The model to use for generating embeddings.
"""
response = self.conn.embeddings.create(model=model, input=text,
**kwargs)
embeddings: list[float] = response.data[0].embedding
return embeddings
+
+ def upload_file(self, file: str, purpose: Literal["fine-tune",
"assistants"]) -> FileObject:
+ """
+ Upload a file that can be used across various endpoints. The size of
all the files uploaded by one organization can be up to 100 GB.
+
+ :param file: The File object (not file name) to be uploaded.
+ :param purpose: The intended purpose of the uploaded file. Use
"fine-tune" for
+ Fine-tuning and "assistants" for Assistants and Messages.
+ """
+ file_object = self.conn.files.create(file=open(file, "rb"),
purpose=purpose)
+ return file_object
+
+ def get_file(self, file_id: str) -> FileObject:
+ """
+ Return information about a specific file.
+
+ :param file_id: The ID of the file to use for this request.
+ """
+ file = self.conn.files.retrieve(file_id=file_id)
+ return file
+
+ def get_files(self) -> list[FileObject]:
+ """Return a list of files that belong to the user's organization."""
+ files = self.conn.files.list()
+ return files.data
+
+ def get_file_by_name(self, file_name: str) -> FileObject | None:
+ """
+ Get an OpenAI Assistant object for a given name.
+
+ :param file_name: The name of the file object to retrieve
+ """
+ files = self.get_files()
+ return next((file for file in files if file.filename == file_name),
None)
+
+ def delete_file(self, file_id) -> FileDeleted:
+ """
+ Delete a file.
+
+ :param file_id: The ID of the file to be deleted.
+ """
+ response = self.conn.files.delete(file_id=file_id)
+ return response
+
+ def create_vector_store(self, **kwargs) -> VectorStore:
+ """Create a vector store."""
+ vector_store = self.conn.beta.vector_stores.create(**kwargs)
+ return vector_store
+
+ def get_vectors_stores(self, **kwargs) -> list[VectorStore]:
Review Comment:
```suggestion
def get_vectors_stores(self, **kwargs: Any) -> list[VectorStore]:
```
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -257,11 +287,137 @@ def create_embeddings(
model: str = "text-embedding-ada-002",
**kwargs: Any,
) -> list[float]:
- """Generate embeddings for the given text using the given model.
+ """
+ Generate embeddings for the given text using the given model.
:param text: The text to generate embeddings for.
:param model: The model to use for generating embeddings.
"""
response = self.conn.embeddings.create(model=model, input=text,
**kwargs)
embeddings: list[float] = response.data[0].embedding
return embeddings
+
+ def upload_file(self, file: str, purpose: Literal["fine-tune",
"assistants"]) -> FileObject:
+ """
+ Upload a file that can be used across various endpoints. The size of
all the files uploaded by one organization can be up to 100 GB.
+
+ :param file: The File object (not file name) to be uploaded.
+ :param purpose: The intended purpose of the uploaded file. Use
"fine-tune" for
+ Fine-tuning and "assistants" for Assistants and Messages.
+ """
+ file_object = self.conn.files.create(file=open(file, "rb"),
purpose=purpose)
+ return file_object
+
+ def get_file(self, file_id: str) -> FileObject:
+ """
+ Return information about a specific file.
+
+ :param file_id: The ID of the file to use for this request.
+ """
+ file = self.conn.files.retrieve(file_id=file_id)
+ return file
+
+ def get_files(self) -> list[FileObject]:
+ """Return a list of files that belong to the user's organization."""
+ files = self.conn.files.list()
+ return files.data
+
+ def get_file_by_name(self, file_name: str) -> FileObject | None:
+ """
+ Get an OpenAI Assistant object for a given name.
+
+ :param file_name: The name of the file object to retrieve
+ """
+ files = self.get_files()
+ return next((file for file in files if file.filename == file_name),
None)
+
+ def delete_file(self, file_id) -> FileDeleted:
Review Comment:
```suggestion
def delete_file(self, file_id: str) -> FileDeleted:
```
##########
airflow/providers/openai/hooks/openai.py:
##########
@@ -257,11 +287,137 @@ def create_embeddings(
model: str = "text-embedding-ada-002",
**kwargs: Any,
) -> list[float]:
- """Generate embeddings for the given text using the given model.
+ """
+ Generate embeddings for the given text using the given model.
:param text: The text to generate embeddings for.
:param model: The model to use for generating embeddings.
"""
response = self.conn.embeddings.create(model=model, input=text,
**kwargs)
embeddings: list[float] = response.data[0].embedding
return embeddings
+
+ def upload_file(self, file: str, purpose: Literal["fine-tune",
"assistants"]) -> FileObject:
+ """
+ Upload a file that can be used across various endpoints. The size of
all the files uploaded by one organization can be up to 100 GB.
+
+ :param file: The File object (not file name) to be uploaded.
+ :param purpose: The intended purpose of the uploaded file. Use
"fine-tune" for
+ Fine-tuning and "assistants" for Assistants and Messages.
+ """
+ file_object = self.conn.files.create(file=open(file, "rb"),
purpose=purpose)
+ return file_object
+
+ def get_file(self, file_id: str) -> FileObject:
+ """
+ Return information about a specific file.
+
+ :param file_id: The ID of the file to use for this request.
+ """
+ file = self.conn.files.retrieve(file_id=file_id)
+ return file
+
+ def get_files(self) -> list[FileObject]:
+ """Return a list of files that belong to the user's organization."""
+ files = self.conn.files.list()
+ return files.data
+
+ def get_file_by_name(self, file_name: str) -> FileObject | None:
+ """
+ Get an OpenAI Assistant object for a given name.
+
+ :param file_name: The name of the file object to retrieve
+ """
+ files = self.get_files()
+ return next((file for file in files if file.filename == file_name),
None)
+
+ def delete_file(self, file_id) -> FileDeleted:
+ """
+ Delete a file.
+
+ :param file_id: The ID of the file to be deleted.
+ """
+ response = self.conn.files.delete(file_id=file_id)
+ return response
+
+ def create_vector_store(self, **kwargs) -> VectorStore:
+ """Create a vector store."""
+ vector_store = self.conn.beta.vector_stores.create(**kwargs)
+ return vector_store
+
+ def get_vectors_stores(self, **kwargs) -> list[VectorStore]:
+ """Return a list of vector stores."""
+ vector_stores = self.conn.beta.vector_stores.list(**kwargs)
+ return vector_stores.data
+
+ def get_vector_store(self, vector_store_id: str) -> VectorStore:
+ """
+ Retrieve a vector store.
+
+ :param vector_store_id: The ID of the vector store to retrieve.
+ """
+ vector_store =
self.conn.beta.vector_stores.retrieve(vector_store_id=vector_store_id)
+ return vector_store
+
+ def get_vector_store_by_name(self, vector_store_name: str) -> VectorStore
| None:
+ """
+ Get an OpenAI Vector Store object for a given name.
+
+ :param vector_store_name: The name of the vector store to retrieve.
+ """
+ vector_stores = self.get_vectors_stores()
+ return next(
+ (vector_store for vector_store in vector_stores if
vector_store.name == vector_store_name), None
+ )
+
+ def modify_vector_store(self, vector_store_id: str, **kwargs) ->
VectorStore:
Review Comment:
```suggestion
def modify_vector_store(self, vector_store_id: str, **kwargs: Any) ->
VectorStore:
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]