ephraimbuddy commented on code in PR #35934:
URL: https://github.com/apache/airflow/pull/35934#discussion_r1417391758


##########
airflow/providers/weaviate/hooks/weaviate.py:
##########
@@ -200,3 +209,147 @@ def query_without_vector(
             .do()
         )
         return results
+
+    def create_object(
+        self, data_object: dict | str, class_name: str, **kwargs
+    ) -> str | dict[str, Any] | None:
+        """Create a new object.
+
+        :param data_object: Object to be added. If type is str it should be 
either a URL or a file.
+        :param class_name: Class name associated with the object given.
+        :param kwargs: Additional parameters to be passed to 
weaviate_client.data_object.create()
+        """
+        client = self.conn
+        # generate deterministic uuid if not provided
+        uuid = kwargs.pop("uuid", generate_uuid5(data_object))
+        try:
+            return client.data_object.create(data_object, class_name, 
uuid=uuid, **kwargs)
+        except ObjectAlreadyExistsException:
+            self.log.warning("Object with the UUID %s already exists", uuid)
+            return None
+
+    def get_or_create_object(
+        self,
+        data_object: dict | str | None = None,
+        class_name: str | None = None,
+        vector: Sequence | None = None,
+        consistency_level: ConsistencyLevel | None = None,
+        tenant: str | None = None,
+        **kwargs,
+    ) -> str | dict[str, Any] | None:
+        """Get or Create a new object.
+
+        Returns the object if already exists
+
+        :param data_object: Object to be added. If type is str it should be 
either a URL or a file. This is required
+            to create a new object.
+        :param class_name: Class name associated with the object given. This 
is required to create a new object.
+        :param vector: Vector associated with the object given. This argument 
is only used when creating object.
+        :param consistency_level: Consistency level to be used. Applies to 
both create and get operations.
+        :tenant: Tenant to be used. Applies to both create and get operations.
+        :param kwargs: Additional parameters to be passed to 
weaviate_client.data_object.create() and
+            weaviate_client.data_object.get()
+        """
+        obj = self.get_object(
+            class_name=class_name, consistency_level=consistency_level, 
tenant=tenant, **kwargs
+        )
+        if not obj:
+            if not (data_object and class_name):
+                raise ValueError("data_object and class_name are required to 
create a new object")
+            uuid = kwargs.pop("uuid", generate_uuid5(data_object))
+            return self.create_object(
+                data_object,
+                class_name,
+                vector=vector,
+                uuid=uuid,
+                consistency_level=consistency_level,
+                tenant=tenant,
+            )
+        return obj
+
+    def get_object(self, **kwargs) -> dict[str, Any] | None:
+        """Get objects or an object from weaviate.
+
+        :param kwargs: parameters to be passed to 
weaviate_client.data_object.get() or
+            weaviate_client.data_object.get_by_id()
+        """
+        client = self.conn
+        return client.data_object.get(**kwargs)
+
+    def get_all_objects(

Review Comment:
   Yeah, we need it. It was requested by @mpgreg. The get_object method has the 
pagination that users can use to access millions of objects in pages. I think 
this is useful for small objects that the user want streamed without writing 
the logic



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to