This is an automated email from the ASF dual-hosted git repository.

jgresock pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new 3afeac6341 NIFI-12636 - Upgrade dependencies for Pinecone, ChromaDB 
and OpenAI processors
3afeac6341 is described below

commit 3afeac634140a284f661f640c4c63f1d8a687de8
Author: Pierre Villard <[email protected]>
AuthorDate: Tue Jan 23 21:02:40 2024 +0400

    NIFI-12636 - Upgrade dependencies for Pinecone, ChromaDB and OpenAI 
processors
    
    Signed-off-by: Joe Gresock <[email protected]>
    This closes #8293.
---
 .../src/main/python/PromptChatGPT.py               |  2 +-
 .../src/main/python/vectorstores/EmbeddingUtils.py |  2 +-
 .../src/main/python/vectorstores/PutPinecone.py    | 22 ++++++++++------------
 .../src/main/python/vectorstores/QueryChroma.py    |  2 +-
 .../src/main/python/vectorstores/QueryPinecone.py  | 22 ++++++++++------------
 .../src/main/python/vectorstores/QueryUtils.py     |  2 +-
 .../src/main/python/vectorstores/requirements.txt  |  8 ++++----
 7 files changed, 28 insertions(+), 32 deletions(-)

diff --git 
a/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py 
b/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
index 5b8e3738fe..a372a20690 100644
--- a/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
+++ b/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
@@ -33,7 +33,7 @@ class PromptChatGPT(FlowFileTransform):
         version = '2.0.0-SNAPSHOT'
         description = "Submits a prompt to ChatGPT, writing the results either 
to a FlowFile attribute or to the contents of the FlowFile"
         tags = ["text", "chatgpt", "gpt", "machine learning", "ML", 
"artificial intelligence", "ai", "document", "langchain"]
-        dependencies = ['langchain==0.0.331', 'openai==0.28.1', 'jsonpath-ng']
+        dependencies = ['langchain==0.1.2', 'openai==1.9.0', 'jsonpath-ng']
 
 
     MODEL = PropertyDescriptor(
diff --git 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
index 9b0218c9c0..1b9ed90ba6 100644
--- 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
+++ 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nifiapi.properties import PropertyDescriptor, StandardValidators, 
PropertyDependency, ExpressionLanguageScope
+from nifiapi.properties import PropertyDescriptor, StandardValidators, 
PropertyDependency
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
 
diff --git 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
index 67abfa6b09..495f41fea3 100644
--- 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
+++ 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
@@ -13,13 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from langchain.vectorstores import Pinecone
+import langchain.vectorstores
 from nifiapi.flowfiletransform import FlowFileTransform, 
FlowFileTransformResult
 from nifiapi.properties import PropertyDescriptor, StandardValidators, 
ExpressionLanguageScope, PropertyDependency
-import pinecone
+from pinecone import Pinecone
 import json
 from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, 
create_embedding_service
-from nifiapi.documentation import use_case, multi_processor_use_case, 
ProcessorConfiguration
+from nifiapi.documentation import use_case
 
 @use_case(description="Create vectors/embeddings that represent text content 
and send the vectors to Pinecone",
           notes="This use case assumes that the data has already been 
formatted in JSONL format with the text to store in Pinecone provided in the 
'text' field.",
@@ -149,6 +149,7 @@ class PutPinecone(FlowFileTransform):
                   DOC_ID_FIELD_NAME]
 
     embeddings = None
+    pc = None
 
     def __init__(self, **kwargs):
         pass
@@ -157,15 +158,12 @@ class PutPinecone(FlowFileTransform):
         return self.properties
 
     def onScheduled(self, context):
-        api_key = context.getProperty(self.PINECONE_API_KEY).getValue()
-        pinecone_env = context.getProperty(self.PINECONE_ENV).getValue()
-
         # initialize pinecone
-        pinecone.init(
-            api_key=api_key,
-            environment=pinecone_env,
+        self.pc = Pinecone(
+            api_key=context.getProperty(self.PINECONE_API_KEY).getValue(),
+            environment=context.getProperty(self.PINECONE_ENV).getValue()
         )
-
+        # initialize embedding service
         self.embeddings = create_embedding_service(context)
 
     def transform(self, context, flowfile):
@@ -174,7 +172,7 @@ class PutPinecone(FlowFileTransform):
         namespace = 
context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue()
         id_field_name = 
context.getProperty(self.DOC_ID_FIELD_NAME).evaluateAttributeExpressions(flowfile).getValue()
 
-        index = pinecone.Index(index_name)
+        index = self.pc.Index(index_name)
 
         # Read the FlowFile content as "json-lines".
         json_lines = flowfile.getContentsAsBytes().decode()
@@ -210,6 +208,6 @@ class PutPinecone(FlowFileTransform):
             i += 1
 
         text_key = 
context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
-        vectorstore = Pinecone(index, self.embeddings.embed_query, text_key)
+        vectorstore = langchain.vectorstores.Pinecone(index, 
self.embeddings.embed_query, text_key)
         vectorstore.add_texts(texts=texts, metadatas=metadatas, ids=ids, 
namespace=namespace)
         return FlowFileTransformResult(relationship="success")
diff --git 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
index ee0b62ebe4..947b281058 100644
--- 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
+++ 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
@@ -16,7 +16,7 @@
 import json
 
 from nifiapi.flowfiletransform import FlowFileTransform, 
FlowFileTransformResult
-from nifiapi.properties import PropertyDescriptor, StandardValidators, 
ExpressionLanguageScope, PropertyDependency
+from nifiapi.properties import PropertyDescriptor, StandardValidators, 
ExpressionLanguageScope
 import ChromaUtils
 import EmbeddingUtils
 import QueryUtils
diff --git 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
index b5ed592378..0202388196 100644
--- 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
+++ 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
@@ -13,11 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from langchain.vectorstores import Pinecone
+import langchain.vectorstores
 from nifiapi.flowfiletransform import FlowFileTransform, 
FlowFileTransformResult
 from nifiapi.properties import PropertyDescriptor, StandardValidators, 
ExpressionLanguageScope, PropertyDependency
 import QueryUtils
-import pinecone
+from pinecone import Pinecone
 import json
 from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, 
create_embedding_service
 
@@ -143,6 +143,7 @@ class QueryPinecone(FlowFileTransform):
 
     embeddings = None
     query_utils = None
+    pc = None
 
     def __init__(self, **kwargs):
         pass
@@ -151,18 +152,15 @@ class QueryPinecone(FlowFileTransform):
         return self.properties
 
     def onScheduled(self, context):
-        api_key = context.getProperty(self.PINECONE_API_KEY).getValue()
-        pinecone_env = context.getProperty(self.PINECONE_ENV).getValue()
-
         # initialize pinecone
-        pinecone.init(
-            api_key=api_key,
-            environment=pinecone_env,
+        self.pc = Pinecone(
+            api_key=context.getProperty(self.PINECONE_API_KEY).getValue(),
+            environment=context.getProperty(self.PINECONE_ENV).getValue()
         )
-        self.embeddings =  create_embedding_service(context)
+        # initialize embedding service
+        self.embeddings = create_embedding_service(context)
         self.query_utils = QueryUtils.QueryUtils(context)
 
-
     def transform(self, context, flowfile):
         # First, check if our index already exists. If it doesn't, we create it
         index_name = 
context.getProperty(self.INDEX_NAME).evaluateAttributeExpressions(flowfile).getValue()
@@ -170,11 +168,11 @@ class QueryPinecone(FlowFileTransform):
         namespace = 
context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue()
         num_results = 
context.getProperty(self.NUMBER_OF_RESULTS).evaluateAttributeExpressions(flowfile).asInteger()
 
-        index = pinecone.Index(index_name)
+        index = self.pc.Index(index_name)
 
         text_key = 
context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
         filter = 
context.getProperty(self.FILTER).evaluateAttributeExpressions(flowfile).getValue()
-        vectorstore = Pinecone(index, self.embeddings.embed_query, text_key, 
namespace=namespace)
+        vectorstore = langchain.vectorstores.Pinecone(index, 
self.embeddings.embed_query, text_key, namespace=namespace)
         results = vectorstore.similarity_search_with_score(query, num_results, 
filter=None if filter is None else json.loads(filter))
 
         documents = []
diff --git 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
index f27a7f8a8d..0ca33fdd94 100644
--- 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
+++ 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 from typing import Tuple
 
-from nifiapi.properties import PropertyDescriptor, StandardValidators, 
ExpressionLanguageScope, PropertyDependency
+from nifiapi.properties import PropertyDescriptor, StandardValidators, 
PropertyDependency
 import json
 
 ROW_ORIENTED = "Row-Oriented"
diff --git 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
index 4e0669a38e..f3fea58948 100644
--- 
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
+++ 
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
@@ -14,16 +14,16 @@
 # limitations under the License.
 
 # Shared requirements
-openai==0.28.1
+openai==1.9.0
 
 # Chroma requirements
-chromadb==0.4.14
+chromadb==0.4.22
 onnxruntime
 tokenizers
 tqdm
 requests
 
 # Pinecone requirements
-pinecone-client
+pinecone-client==3.0.1
 tiktoken
-langchain==0.0.331
+langchain==0.1.2

Reply via email to