Re: [PR] MINIFICPP-2556 Create llama.cpp processor for language model inference [nifi-minifi-cpp]

via GitHub Thu, 10 Apr 2025 09:18:38 -0700


lordgamez commented on code in PR #1903:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1903#discussion_r2037470022



##########
cmake/LlamaCpp.cmake:
##########
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include(FetchContent)
+
+set(BUILD_SHARED_LIBS "OFF" CACHE STRING "" FORCE)
+set(LLAMA_BUILD_TESTS "OFF" CACHE STRING "" FORCE)
+set(LLAMA_BUILD_EXAMPLES "OFF" CACHE STRING "" FORCE)
+set(LLAMA_BUILD_SERVER "OFF" CACHE STRING "" FORCE)
+set(GGML_OPENMP "OFF" CACHE STRING "" FORCE)
+
+set(PATCH_FILE_1 "${CMAKE_SOURCE_DIR}/thirdparty/llamacpp/metal.patch")
+set(PATCH_FILE_2 
"${CMAKE_SOURCE_DIR}/thirdparty/llamacpp/lu8_macro_fix.patch")  # 
https://github.com/ggml-org/llama.cpp/issues/12740

Review Comment:
   I removed dependencies from mac build in 
https://github.com/apache/nifi-minifi-cpp/pull/1903/commits/1ca2c3a46abf4486a51f6483d0b4c6fee604c197
 will see if it builds and runs correctly



##########
PROCESSORS.md:
##########
@@ -1727,7 +1728,42 @@ In the list below, the names of required properties 
appear in bold. Any other pr
 | lastModifiedTime | success      | The timestamp of when the file's content 
changed in the filesystem as 'yyyy-MM-dd'T'HH:mm:ss'.                           
                                                                                
                                                                                
                                                                    |
 | creationTime     | success      | The timestamp of when the file was created 
in the filesystem as 'yyyy-MM-dd'T'HH:mm:ss'.                                   
                                                                                
                                                                                
                                                                  |
 | lastAccessTime   | success      | The timestamp of when the file was 
accessed in the filesystem as 'yyyy-MM-dd'T'HH:mm:ss'.                          
                                                                                
                                                                                
                                                                          |
-| size             | success      | The size of the file in bytes.             
                                                                                
                                                                                
                                                                                
                                                                  |
+| size             | success      | The size of the file in bytes.

Review Comment:
   Fixed in 
https://github.com/apache/nifi-minifi-cpp/pull/1903/commits/86cf24d3a23204cd9d313bdf254d04bdbb9dc1f8



##########
docker/test/integration/cluster/containers/MinifiContainer.py:
##########
@@ -47,6 +47,7 @@ def __init__(self):
             self.enable_openssl_fips_mode = True
         else:
             self.enable_openssl_fips_mode = False
+        self.download_llama_model = True

Review Comment:
   Good catch, fixed in 
https://github.com/apache/nifi-minifi-cpp/pull/1903/commits/86cf24d3a23204cd9d313bdf254d04bdbb9dc1f8



##########
extensions/llamacpp/tests/RunLlamaCppInferenceTests.cpp:
##########
@@ -0,0 +1,257 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "unit/TestBase.h"
+#include "unit/Catch.h"
+#include "RunLlamaCppInference.h"
+#include "unit/SingleProcessorTestController.h"
+#include "core/FlowFile.h"
+
+namespace org::apache::nifi::minifi::extensions::llamacpp::test {
+
+class MockLlamaContext : public processors::LlamaContext {
+ public:
+  std::string applyTemplate(const std::vector<processors::LlamaChatMessage>& 
messages) override {
+    messages_ = messages;
+    return "Test input";
+  }
+
+  nonstd::expected<uint64_t, std::string> generate(const std::string& input, 
std::function<void(std::string_view/*token*/)> token_handler) override {
+    if (fail_generation_) {
+      return nonstd::make_unexpected("Generation failed");
+    }
+    input_ = input;
+    token_handler("Test ");
+    token_handler("generated");
+    token_handler(" content");
+    return 3;
+  }
+
+  [[nodiscard]] const std::vector<processors::LlamaChatMessage>& getMessages() 
const {
+    return messages_;
+  }
+
+  [[nodiscard]] const std::string& getInput() const {
+    return input_;
+  }
+
+  void setFailure() {
+    fail_generation_ = true;
+  }
+
+ private:
+  bool fail_generation_{false};
+  std::vector<processors::LlamaChatMessage> messages_;
+  std::string input_;
+};
+
+TEST_CASE("Prompt is generated correctly with default parameters") {
+  auto mock_llama_context = std::make_unique<MockLlamaContext>();
+  auto mock_llama_context_ptr = mock_llama_context.get();
+  std::filesystem::path test_model_path;
+  processors::LlamaSamplerParams test_sampler_params;
+  processors::LlamaContextParams test_context_params;
+  processors::LlamaContext::testSetProvider(
+    [&](const std::filesystem::path& model_path, const 
processors::LlamaSamplerParams& sampler_params, const 
processors::LlamaContextParams& context_params) {
+      test_model_path = model_path;
+      test_sampler_params = sampler_params;
+      test_context_params = context_params;
+      return std::move(mock_llama_context);
+    });
+  minifi::test::SingleProcessorTestController 
controller(std::make_unique<processors::RunLlamaCppInference>("RunLlamaCppInference"));
+  
LogTestController::getInstance().setTrace<processors::RunLlamaCppInference>();
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::ModelPath,
 "Dummy model");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::Prompt,
 "Question: What is the answer to life, the universe and everything?");
+
+  auto results = controller.trigger(minifi::test::InputFlowFileData{.content = 
"42", .attributes = {}});
+  CHECK(test_model_path == "Dummy model");
+  CHECK(test_sampler_params.temperature == 0.8F);
+  CHECK(test_sampler_params.top_k == 40);
+  CHECK(test_sampler_params.top_p == 0.9F);
+  CHECK(test_sampler_params.min_p == std::nullopt);
+  CHECK(test_sampler_params.min_keep == 0);
+  CHECK(test_context_params.n_ctx == 4096);
+  CHECK(test_context_params.n_batch == 2048);
+  CHECK(test_context_params.n_ubatch == 512);
+  CHECK(test_context_params.n_seq_max == 1);
+  CHECK(test_context_params.n_threads == 4);
+  CHECK(test_context_params.n_threads_batch == 4);
+
+  REQUIRE(results.at(processors::RunLlamaCppInference::Success).size() == 1);
+  auto& output_flow_file = 
results.at(processors::RunLlamaCppInference::Success)[0];
+  CHECK(controller.plan->getContent(output_flow_file) == "Test generated 
content");
+  CHECK(mock_llama_context_ptr->getInput() == "Test input");
+  REQUIRE(mock_llama_context_ptr->getMessages().size() == 2);
+  CHECK(mock_llama_context_ptr->getMessages()[0].role == "system");
+  CHECK(mock_llama_context_ptr->getMessages()[0].content == "You are a helpful 
assistant. You are given a question with some possible input data otherwise 
called flow file content. "
+                                                            "You are expected 
to generate a response based on the question and the input data.");
+  CHECK(mock_llama_context_ptr->getMessages()[1].role == "user");
+  CHECK(mock_llama_context_ptr->getMessages()[1].content == "Input data (or 
flow file content):\n42\n\nQuestion: What is the answer to life, the universe 
and everything?");
+}
+
+TEST_CASE("Prompt is generated correctly with custom parameters") {
+  auto mock_llama_context = std::make_unique<MockLlamaContext>();
+  auto mock_llama_context_ptr = mock_llama_context.get();
+  std::filesystem::path test_model_path;
+  processors::LlamaSamplerParams test_sampler_params;
+  processors::LlamaContextParams test_context_params;
+  processors::LlamaContext::testSetProvider(
+    [&](const std::filesystem::path& model_path, const 
processors::LlamaSamplerParams& sampler_params, const 
processors::LlamaContextParams& context_params) {
+      test_model_path = model_path;
+      test_sampler_params = sampler_params;
+      test_context_params = context_params;
+      return std::move(mock_llama_context);
+    });
+  minifi::test::SingleProcessorTestController 
controller(std::make_unique<processors::RunLlamaCppInference>("RunLlamaCppInference"));
+  
LogTestController::getInstance().setTrace<processors::RunLlamaCppInference>();
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::ModelPath,
 "/path/to/model");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::Prompt,
 "Question: What is the answer to life, the universe and everything?");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::Temperature,
 "0.4");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::TopK, 
"20");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::TopP, 
"");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::MinP, 
"0.1");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::MinKeep,
 "1");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::TextContextSize,
 "4096");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::LogicalMaximumBatchSize,
 "1024");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::PhysicalMaximumBatchSize,
 "796");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::MaxNumberOfSequences,
 "2");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::ThreadsForGeneration,
 "12");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::ThreadsForBatchProcessing,
 "8");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::SystemPrompt,
 "Whatever");
+
+  auto results = controller.trigger(minifi::test::InputFlowFileData{.content = 
"42", .attributes = {}});
+  CHECK(test_model_path == "/path/to/model");
+  CHECK(test_sampler_params.temperature == 0.4F);
+  CHECK(test_sampler_params.top_k == 20);
+  CHECK(test_sampler_params.top_p == std::nullopt);
+  CHECK(test_sampler_params.min_p == 0.1F);
+  CHECK(test_sampler_params.min_keep == 1);
+  CHECK(test_context_params.n_ctx == 4096);
+  CHECK(test_context_params.n_batch == 1024);
+  CHECK(test_context_params.n_ubatch == 796);
+  CHECK(test_context_params.n_seq_max == 2);
+  CHECK(test_context_params.n_threads == 12);
+  CHECK(test_context_params.n_threads_batch == 8);
+
+  REQUIRE(results.at(processors::RunLlamaCppInference::Success).size() == 1);
+  auto& output_flow_file = 
results.at(processors::RunLlamaCppInference::Success)[0];
+  CHECK(controller.plan->getContent(output_flow_file) == "Test generated 
content");
+  CHECK(mock_llama_context_ptr->getInput() == "Test input");
+  REQUIRE(mock_llama_context_ptr->getMessages().size() == 2);
+  CHECK(mock_llama_context_ptr->getMessages()[0].role == "system");
+  CHECK(mock_llama_context_ptr->getMessages()[0].content == "Whatever");
+  CHECK(mock_llama_context_ptr->getMessages()[1].role == "user");
+  CHECK(mock_llama_context_ptr->getMessages()[1].content == "Input data (or 
flow file content):\n42\n\nQuestion: What is the answer to life, the universe 
and everything?");
+}
+
+TEST_CASE("Empty flow file does not include input data in prompt") {
+  auto mock_llama_context = std::make_unique<MockLlamaContext>();
+  auto mock_llama_context_ptr = mock_llama_context.get();
+  processors::LlamaContext::testSetProvider(
+    [&](const std::filesystem::path&, const processors::LlamaSamplerParams&, 
const processors::LlamaContextParams&) {
+      return std::move(mock_llama_context);
+    });
+  minifi::test::SingleProcessorTestController 
controller(std::make_unique<processors::RunLlamaCppInference>("RunLlamaCppInference"));
+  
LogTestController::getInstance().setTrace<processors::RunLlamaCppInference>();
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::ModelPath,
 "Dummy model");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::Prompt,
 "Question: What is the answer to life, the universe and everything?");
+
+  auto results = controller.trigger(minifi::test::InputFlowFileData{.content = 
"", .attributes = {}});
+
+  REQUIRE(results.at(processors::RunLlamaCppInference::Success).size() == 1);
+  auto& output_flow_file = 
results.at(processors::RunLlamaCppInference::Success)[0];
+  CHECK(controller.plan->getContent(output_flow_file) == "Test generated 
content");
+  CHECK(mock_llama_context_ptr->getInput() == "Test input");
+  REQUIRE(mock_llama_context_ptr->getMessages().size() == 2);
+  CHECK(mock_llama_context_ptr->getMessages()[0].role == "system");
+  CHECK(mock_llama_context_ptr->getMessages()[0].content == "You are a helpful 
assistant. You are given a question with some possible input data otherwise 
called flow file content. "
+                                                            "You are expected 
to generate a response based on the question and the input data.");
+  CHECK(mock_llama_context_ptr->getMessages()[1].role == "user");
+  CHECK(mock_llama_context_ptr->getMessages()[1].content == "Question: What is 
the answer to life, the universe and everything?");
+}
+
+TEST_CASE("Invalid values for optional double type properties throw 
exception") {
+  processors::LlamaContext::testSetProvider(
+    [&](const std::filesystem::path&, const processors::LlamaSamplerParams&, 
const processors::LlamaContextParams&) {
+      return std::make_unique<MockLlamaContext>();
+    });
+  minifi::test::SingleProcessorTestController 
controller(std::make_unique<processors::RunLlamaCppInference>("RunLlamaCppInference"));
+  
LogTestController::getInstance().setTrace<processors::RunLlamaCppInference>();
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::ModelPath,
 "Dummy model");
+  
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::Prompt,
 "Question: What is the answer to life, the universe and everything?");
+
+  std::string property_name;
+  SECTION("Invalid value for Temperature property") {
+    
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::Temperature,
 "invalid_value");
+    property_name = processors::RunLlamaCppInference::Temperature.name;
+  }
+  SECTION("Invalid value for Top P property") {
+    
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::TopP, 
"invalid_value");
+    property_name = processors::RunLlamaCppInference::TopP.name;
+  }
+  SECTION("Invalid value for Min P property") {
+    
controller.getProcessor()->setProperty(processors::RunLlamaCppInference::MinP, 
"invalid_value");
+    property_name = processors::RunLlamaCppInference::MinP.name;
+  }
+
+  
REQUIRE_THROWS_WITH(controller.trigger(minifi::test::InputFlowFileData{.content 
= "42", .attributes = {}}),
+                      fmt::format("Process Schedule Operation: Property '{}' 
has invalid value 'invalid_value'", property_name));
+}
+
+TEST_CASE("Top K property empty and invalid values are handled properly") {
+  std::optional<int32_t> test_top_k;

Review Comment:
   Updated in 
https://github.com/apache/nifi-minifi-cpp/pull/1903/commits/86cf24d3a23204cd9d313bdf254d04bdbb9dc1f8



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] MINIFICPP-2556 Create llama.cpp processor for language model inference [nifi-minifi-cpp]

Reply via email to