lordgamez commented on code in PR #1903: URL: https://github.com/apache/nifi-minifi-cpp/pull/1903#discussion_r2037474713
########## extensions/llamacpp/processors/RunLlamaCppInference.h: ########## @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "core/Processor.h" +#include "core/logging/LoggerFactory.h" +#include "core/PropertyDefinitionBuilder.h" +#include "LlamaContext.h" + +namespace org::apache::nifi::minifi::extensions::llamacpp::processors { + +class RunLlamaCppInference : public core::ProcessorImpl { + struct LLMExample { + std::string input_role; + std::string input; + std::string output_role; + std::string output; + }; + + public: + explicit RunLlamaCppInference(std::string_view name, const utils::Identifier& uuid = {}) + : core::ProcessorImpl(name, uuid) { + } + ~RunLlamaCppInference() override = default; + + EXTENSIONAPI static constexpr const char* Description = "LlamaCpp processor to use llama.cpp library for running language model inference. " + "The final prompt used for the inference created using the System Prompt and Prompt proprerty values and the content of the flowfile referred to as input data or flow file content."; + + EXTENSIONAPI static constexpr auto ModelPath = core::PropertyDefinitionBuilder<>::createProperty("Model Path") + .withDescription("The filesystem path of the model file in gguf format.") + .isRequired(true) + .build(); + EXTENSIONAPI static constexpr auto Temperature = core::PropertyDefinitionBuilder<>::createProperty("Temperature") + .withDescription("The temperature to use for sampling.") + .withDefaultValue("0.8") + .build(); + EXTENSIONAPI static constexpr auto TopK = core::PropertyDefinitionBuilder<>::createProperty("Top K") + .withDescription("Limit the next token selection to the K most probable tokens. Set <= 0 value to use vocab size.") + .withDefaultValue("40") + .build(); + EXTENSIONAPI static constexpr auto TopP = core::PropertyDefinitionBuilder<>::createProperty("Top P") + .withDescription("Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. 1.0 = disabled.") + .withDefaultValue("0.9") + .build(); + EXTENSIONAPI static constexpr auto MinP = core::PropertyDefinitionBuilder<>::createProperty("Min P") + .withDescription("Sets a minimum base probability threshold for token selection. 0.0 = disabled.") + .build(); + EXTENSIONAPI static constexpr auto MinKeep = core::PropertyDefinitionBuilder<>::createProperty("Min Keep") + .withDescription("If greater than 0, force samplers to return N possible tokens at minimum.") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::UNSIGNED_INT_TYPE) + .withDefaultValue("0") + .build(); + EXTENSIONAPI static constexpr auto TextContextSize = core::PropertyDefinitionBuilder<>::createProperty("Text Context Size") + .withDescription("Size of the text context, use 0 to use size set in model.") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::UNSIGNED_INT_TYPE) + .withDefaultValue("4096") + .build(); + EXTENSIONAPI static constexpr auto LogicalMaximumBatchSize = core::PropertyDefinitionBuilder<>::createProperty("Logical Maximum Batch Size") + .withDescription("Logical maximum batch size that can be submitted to the llama.cpp decode function.") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::UNSIGNED_INT_TYPE) + .withDefaultValue("2048") + .build(); + EXTENSIONAPI static constexpr auto PhysicalMaximumBatchSize = core::PropertyDefinitionBuilder<>::createProperty("Physical Maximum Batch Size") + .withDescription("Physical maximum batch size.") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::UNSIGNED_INT_TYPE) + .withDefaultValue("512") + .build(); + EXTENSIONAPI static constexpr auto MaxNumberOfSequences = core::PropertyDefinitionBuilder<>::createProperty("Max Number Of Sequences") + .withDescription("Maximum number of sequences (i.e. distinct states for recurrent models).") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::UNSIGNED_INT_TYPE) + .withDefaultValue("1") + .build(); + EXTENSIONAPI static constexpr auto ThreadsForGeneration = core::PropertyDefinitionBuilder<>::createProperty("Threads For Generation") + .withDescription("Number of threads to use for generation.") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::INTEGER_TYPE) + .withDefaultValue("4") + .build(); + EXTENSIONAPI static constexpr auto ThreadsForBatchProcessing = core::PropertyDefinitionBuilder<>::createProperty("Threads For Batch Processing") + .withDescription("Number of threads to use for batch processing.") + .isRequired(true) + .withPropertyType(core::StandardPropertyTypes::INTEGER_TYPE) + .withDefaultValue("4") + .build(); + EXTENSIONAPI static constexpr auto Prompt = core::PropertyDefinitionBuilder<>::createProperty("Prompt") + .withDescription("The user prompt for the inference.") + .supportsExpressionLanguage(true) + .isRequired(true) + .build(); + EXTENSIONAPI static constexpr auto SystemPrompt = core::PropertyDefinitionBuilder<>::createProperty("System Prompt") + .withDescription("The system prompt for the inference.") + .withDefaultValue("You are a helpful assistant. You are given a question with some possible input data otherwise called flow file content. " + "You are expected to generate a response based on the question and the input data.") + .isRequired(true) + .build(); + + EXTENSIONAPI static constexpr auto Properties = std::to_array<core::PropertyReference>({ + ModelPath, + Temperature, + TopK, + TopP, + MinP, + MinKeep, + TextContextSize, + LogicalMaximumBatchSize, + PhysicalMaximumBatchSize, + MaxNumberOfSequences, + ThreadsForGeneration, + ThreadsForBatchProcessing, + Prompt, + SystemPrompt + }); + + + EXTENSIONAPI static constexpr auto Success = core::RelationshipDefinition{"success", "Generated results from the model"}; + EXTENSIONAPI static constexpr auto Failure = core::RelationshipDefinition{"failure", "Generation failed"}; + EXTENSIONAPI static constexpr auto Relationships = std::array{Success, Failure}; + + EXTENSIONAPI static constexpr bool SupportsDynamicProperties = false; + EXTENSIONAPI static constexpr bool SupportsDynamicRelationships = true; Review Comment: Good point, it was only used in a previous iteration, updated in https://github.com/apache/nifi-minifi-cpp/pull/1903/commits/86cf24d3a23204cd9d313bdf254d04bdbb9dc1f8 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
