This is an automated email from the ASF dual-hosted git repository.
jgresock pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new 3afeac6341 NIFI-12636 - Upgrade dependencies for Pinecone, ChromaDB
and OpenAI processors
3afeac6341 is described below
commit 3afeac634140a284f661f640c4c63f1d8a687de8
Author: Pierre Villard <[email protected]>
AuthorDate: Tue Jan 23 21:02:40 2024 +0400
NIFI-12636 - Upgrade dependencies for Pinecone, ChromaDB and OpenAI
processors
Signed-off-by: Joe Gresock <[email protected]>
This closes #8293.
---
.../src/main/python/PromptChatGPT.py | 2 +-
.../src/main/python/vectorstores/EmbeddingUtils.py | 2 +-
.../src/main/python/vectorstores/PutPinecone.py | 22 ++++++++++------------
.../src/main/python/vectorstores/QueryChroma.py | 2 +-
.../src/main/python/vectorstores/QueryPinecone.py | 22 ++++++++++------------
.../src/main/python/vectorstores/QueryUtils.py | 2 +-
.../src/main/python/vectorstores/requirements.txt | 8 ++++----
7 files changed, 28 insertions(+), 32 deletions(-)
diff --git
a/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
b/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
index 5b8e3738fe..a372a20690 100644
--- a/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
+++ b/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py
@@ -33,7 +33,7 @@ class PromptChatGPT(FlowFileTransform):
version = '2.0.0-SNAPSHOT'
description = "Submits a prompt to ChatGPT, writing the results either
to a FlowFile attribute or to the contents of the FlowFile"
tags = ["text", "chatgpt", "gpt", "machine learning", "ML",
"artificial intelligence", "ai", "document", "langchain"]
- dependencies = ['langchain==0.0.331', 'openai==0.28.1', 'jsonpath-ng']
+ dependencies = ['langchain==0.1.2', 'openai==1.9.0', 'jsonpath-ng']
MODEL = PropertyDescriptor(
diff --git
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
index 9b0218c9c0..1b9ed90ba6 100644
---
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
+++
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from nifiapi.properties import PropertyDescriptor, StandardValidators,
PropertyDependency, ExpressionLanguageScope
+from nifiapi.properties import PropertyDescriptor, StandardValidators,
PropertyDependency
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
diff --git
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
index 67abfa6b09..495f41fea3 100644
---
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
+++
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py
@@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from langchain.vectorstores import Pinecone
+import langchain.vectorstores
from nifiapi.flowfiletransform import FlowFileTransform,
FlowFileTransformResult
from nifiapi.properties import PropertyDescriptor, StandardValidators,
ExpressionLanguageScope, PropertyDependency
-import pinecone
+from pinecone import Pinecone
import json
from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL,
create_embedding_service
-from nifiapi.documentation import use_case, multi_processor_use_case,
ProcessorConfiguration
+from nifiapi.documentation import use_case
@use_case(description="Create vectors/embeddings that represent text content
and send the vectors to Pinecone",
notes="This use case assumes that the data has already been
formatted in JSONL format with the text to store in Pinecone provided in the
'text' field.",
@@ -149,6 +149,7 @@ class PutPinecone(FlowFileTransform):
DOC_ID_FIELD_NAME]
embeddings = None
+ pc = None
def __init__(self, **kwargs):
pass
@@ -157,15 +158,12 @@ class PutPinecone(FlowFileTransform):
return self.properties
def onScheduled(self, context):
- api_key = context.getProperty(self.PINECONE_API_KEY).getValue()
- pinecone_env = context.getProperty(self.PINECONE_ENV).getValue()
-
# initialize pinecone
- pinecone.init(
- api_key=api_key,
- environment=pinecone_env,
+ self.pc = Pinecone(
+ api_key=context.getProperty(self.PINECONE_API_KEY).getValue(),
+ environment=context.getProperty(self.PINECONE_ENV).getValue()
)
-
+ # initialize embedding service
self.embeddings = create_embedding_service(context)
def transform(self, context, flowfile):
@@ -174,7 +172,7 @@ class PutPinecone(FlowFileTransform):
namespace =
context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue()
id_field_name =
context.getProperty(self.DOC_ID_FIELD_NAME).evaluateAttributeExpressions(flowfile).getValue()
- index = pinecone.Index(index_name)
+ index = self.pc.Index(index_name)
# Read the FlowFile content as "json-lines".
json_lines = flowfile.getContentsAsBytes().decode()
@@ -210,6 +208,6 @@ class PutPinecone(FlowFileTransform):
i += 1
text_key =
context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
- vectorstore = Pinecone(index, self.embeddings.embed_query, text_key)
+ vectorstore = langchain.vectorstores.Pinecone(index,
self.embeddings.embed_query, text_key)
vectorstore.add_texts(texts=texts, metadatas=metadatas, ids=ids,
namespace=namespace)
return FlowFileTransformResult(relationship="success")
diff --git
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
index ee0b62ebe4..947b281058 100644
---
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
+++
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py
@@ -16,7 +16,7 @@
import json
from nifiapi.flowfiletransform import FlowFileTransform,
FlowFileTransformResult
-from nifiapi.properties import PropertyDescriptor, StandardValidators,
ExpressionLanguageScope, PropertyDependency
+from nifiapi.properties import PropertyDescriptor, StandardValidators,
ExpressionLanguageScope
import ChromaUtils
import EmbeddingUtils
import QueryUtils
diff --git
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
index b5ed592378..0202388196 100644
---
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
+++
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py
@@ -13,11 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from langchain.vectorstores import Pinecone
+import langchain.vectorstores
from nifiapi.flowfiletransform import FlowFileTransform,
FlowFileTransformResult
from nifiapi.properties import PropertyDescriptor, StandardValidators,
ExpressionLanguageScope, PropertyDependency
import QueryUtils
-import pinecone
+from pinecone import Pinecone
import json
from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL,
create_embedding_service
@@ -143,6 +143,7 @@ class QueryPinecone(FlowFileTransform):
embeddings = None
query_utils = None
+ pc = None
def __init__(self, **kwargs):
pass
@@ -151,18 +152,15 @@ class QueryPinecone(FlowFileTransform):
return self.properties
def onScheduled(self, context):
- api_key = context.getProperty(self.PINECONE_API_KEY).getValue()
- pinecone_env = context.getProperty(self.PINECONE_ENV).getValue()
-
# initialize pinecone
- pinecone.init(
- api_key=api_key,
- environment=pinecone_env,
+ self.pc = Pinecone(
+ api_key=context.getProperty(self.PINECONE_API_KEY).getValue(),
+ environment=context.getProperty(self.PINECONE_ENV).getValue()
)
- self.embeddings = create_embedding_service(context)
+ # initialize embedding service
+ self.embeddings = create_embedding_service(context)
self.query_utils = QueryUtils.QueryUtils(context)
-
def transform(self, context, flowfile):
# First, check if our index already exists. If it doesn't, we create it
index_name =
context.getProperty(self.INDEX_NAME).evaluateAttributeExpressions(flowfile).getValue()
@@ -170,11 +168,11 @@ class QueryPinecone(FlowFileTransform):
namespace =
context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue()
num_results =
context.getProperty(self.NUMBER_OF_RESULTS).evaluateAttributeExpressions(flowfile).asInteger()
- index = pinecone.Index(index_name)
+ index = self.pc.Index(index_name)
text_key =
context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
filter =
context.getProperty(self.FILTER).evaluateAttributeExpressions(flowfile).getValue()
- vectorstore = Pinecone(index, self.embeddings.embed_query, text_key,
namespace=namespace)
+ vectorstore = langchain.vectorstores.Pinecone(index,
self.embeddings.embed_query, text_key, namespace=namespace)
results = vectorstore.similarity_search_with_score(query, num_results,
filter=None if filter is None else json.loads(filter))
documents = []
diff --git
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
index f27a7f8a8d..0ca33fdd94 100644
---
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
+++
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py
@@ -14,7 +14,7 @@
# limitations under the License.
from typing import Tuple
-from nifiapi.properties import PropertyDescriptor, StandardValidators,
ExpressionLanguageScope, PropertyDependency
+from nifiapi.properties import PropertyDescriptor, StandardValidators,
PropertyDependency
import json
ROW_ORIENTED = "Row-Oriented"
diff --git
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
index 4e0669a38e..f3fea58948 100644
---
a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
+++
b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt
@@ -14,16 +14,16 @@
# limitations under the License.
# Shared requirements
-openai==0.28.1
+openai==1.9.0
# Chroma requirements
-chromadb==0.4.14
+chromadb==0.4.22
onnxruntime
tokenizers
tqdm
requests
# Pinecone requirements
-pinecone-client
+pinecone-client==3.0.1
tiktoken
-langchain==0.0.331
+langchain==0.1.2