pvillard31 commented on code in PR #8590: URL: https://github.com/apache/nifi/pull/8590#discussion_r1588993880
########## nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryQdrant.py: ########## @@ -0,0 +1,192 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from langchain.vectorstores.qdrant import Qdrant +from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult +from nifiapi.properties import ( + PropertyDescriptor, + StandardValidators, + ExpressionLanguageScope, +) +import QueryUtils +import json +from EmbeddingUtils import ( + create_embedding_service, +) + +from nifiapi.documentation import use_case + +from qdrant_client import QdrantClient + +import QdrantUtils + + +@use_case( + description="Semantically search for documents stored in Qdrant - https://qdrant.tech/", + keywords=["qdrant", "embedding", "vector", "text", "vectorstore", "search"], + configuration=""" + Configure 'Collection Name' to the name of the Qdrant collection to use. + Configure 'Qdrant URL' to the fully qualified URL of the Qdrant instance. + Configure 'Qdrant API Key' to the API Key to use in order to authenticate with Qdrant. + Configure 'Prefer gRPC' to True if you want to use gRPC for interfacing with Qdrant. + Configure 'Use HTTPS' to True if you want to use TLS(HTTPS) while interfacing with Qdrant. + Configure 'Embedding Model' to indicate whether OpenAI embeddings should be used or a HuggingFace embedding model should be used: 'Hugging Face Model' or 'OpenAI Model' + Configure 'HuggingFace API Key' or 'OpenAI API Key', depending on the chosen Embedding Model. + Configure 'HuggingFace Model' or 'OpenAI Model' to the name of the model to use. + Configure 'Query' to the text of the query to send to Qdrant. + Configure 'Number of Results' to the number of results to return from Qdrant. + Configure 'Metadata Filter' to apply an optional metadata filter with the query. For example: { "author": "john.doe" } + Configure 'Output Strategy' to indicate how the output should be formatted: 'Row-Oriented', 'Text', or 'Column-Oriented'. + Configure 'Results Field' to the name of the field to insert the results, if the input FlowFile is JSON Formatted,. + Configure 'Include Metadatas' to True if metadata should be included in the output. + Configure 'Include Distances' to True if distances should be included in the output. + """, +) +class QueryQdrant(FlowFileTransform): + class Java: + implements = ["org.apache.nifi.python.processor.FlowFileTransform"] + + class ProcessorDetails: + version = "2.0.0-SNAPSHOT" Review Comment: ```suggestion version = "@project.version@" ``` ########## nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutQdrant.py: ########## @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from langchain.vectorstores.qdrant import Qdrant +from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult +from nifiapi.properties import ( + PropertyDescriptor, + StandardValidators, + ExpressionLanguageScope, +) +import json +from EmbeddingUtils import ( + create_embedding_service, +) +from nifiapi.documentation import use_case + +from qdrant_client.models import Distance + +import QdrantUtils + + +@use_case( + description="Create embeddings that semantically represent text content and upload to Qdrant - https://qdrant.tech/", + notes="This processor assumes that the data has already been formatted in JSONL format with the text to store in Qdrant provided in the 'text' field.", + keywords=["qdrant", "embedding", "vector", "text", "vectorstore", "insert"], + configuration=""" + Configure 'Collection Name' to the name of the Qdrant collection to use. + Configure 'Qdrant URL' to the fully qualified URL of the Qdrant instance. + Configure 'Qdrant API Key' to the API Key to use in order to authenticate with Qdrant. + Configure 'Prefer gRPC' to True if you want to use gRPC for interfacing with Qdrant. + Configure 'Use HTTPS' to True if you want to use TLS(HTTPS) while interfacing with Qdrant. + Configure 'Embedding Model' to indicate whether OpenAI embeddings should be used or a HuggingFace embedding model should be used: 'Hugging Face Model' or 'OpenAI Model' + Configure 'HuggingFace API Key' or 'OpenAI API Key', depending on the chosen Embedding Model. + Configure 'HuggingFace Model' or 'OpenAI Model' to the name of the model to use. + Configure 'Force Recreate Collection' to True if you want to recreate the collection if it already exists. + Configure 'Similarity Metric' to the similarity metric to use when querying Qdrant. + + If the documents to send to Qdrant contain a unique identifier(UUID), set the 'Document ID Field Name' property to the name of the field that contains the document ID. + This property can be left blank, in which case a UUID will be generated based on the FlowFile's filename. + """, +) +class PutQdrant(FlowFileTransform): + class Java: + implements = ["org.apache.nifi.python.processor.FlowFileTransform"] + + class ProcessorDetails: + version = "2.0.0-SNAPSHOT" Review Comment: ```suggestion version = "@project.version@" ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
