This is an automated email from the ASF dual-hosted git repository. lordgamez pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit f4611e542674f65d5a8d7d115bd528179261e67f Author: Adam Debreceni <[email protected]> AuthorDate: Tue Aug 23 17:13:56 2022 +0200 MINIFICPP-1917 - Add json schema generation Co-authored-by: Martin Zink <[email protected]> Signed-off-by: Gabor Gyimesi <[email protected]> This closes #1413 --- CMakeLists.txt | 2 + cmake/{Nlohmann.cmake => Findnlohmann_json.cmake} | 17 +- cmake/GoogleCloudCpp.cmake | 2 - .../{Nlohmann.cmake => JsonSchemaValidator.cmake} | 16 +- libminifi/CMakeLists.txt | 2 +- libminifi/include/agent/JsonSchema.h | 26 ++ libminifi/include/core/PropertyValue.h | 4 + libminifi/src/agent/JsonSchema.cpp | 467 +++++++++++++++++++++ .../test/schema-tests/CMakeLists.txt | 20 +- libminifi/test/schema-tests/SchemaTests.cpp | 258 ++++++++++++ minifi_main/MiNiFiMain.cpp | 27 +- .../nlohmann_lib_as_interface.patch | 37 +- 12 files changed, 847 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb79627e9..b049ec65e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -610,6 +610,8 @@ registerTest("${TEST_DIR}/keyvalue-tests") registerTest("${TEST_DIR}/flow-tests") +registerTest("${TEST_DIR}/schema-tests") + if (NOT DISABLE_ROCKSDB AND NOT DISABLE_LIBARCHIVE) registerTest("${TEST_DIR}/persistence-tests") endif() diff --git a/cmake/Nlohmann.cmake b/cmake/Findnlohmann_json.cmake similarity index 51% copy from cmake/Nlohmann.cmake copy to cmake/Findnlohmann_json.cmake index 9ad44c56c..e81fd9659 100644 --- a/cmake/Nlohmann.cmake +++ b/cmake/Findnlohmann_json.cmake @@ -17,8 +17,17 @@ # under the License. # -set(NLOHMANN_JSON_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE STRING "" FORCE) -if(NOT EXISTS "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp") - file(DOWNLOAD "https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp" "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp" - EXPECTED_HASH SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86) +if (NOT nlohmann_json_FOUND) + set(nlohmann_json_FOUND "YES" CACHE STRING "" FORCE) + set(nlohmann_json_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE STRING "" FORCE) + if(NOT EXISTS "${nlohmann_json_INCLUDE_DIR}/nlohmann/json.hpp") + file(DOWNLOAD "https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp" "${nlohmann_json_INCLUDE_DIR}/nlohmann/json.hpp" + EXPECTED_HASH SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86) + endif() +endif() + +if(NOT TARGET nlohmann_json::nlohmann_json) + add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED) + set_target_properties(nlohmann_json::nlohmann_json PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${nlohmann_json_INCLUDE_DIR}") endif() diff --git a/cmake/GoogleCloudCpp.cmake b/cmake/GoogleCloudCpp.cmake index 4aa57040b..71d08c4c2 100644 --- a/cmake/GoogleCloudCpp.cmake +++ b/cmake/GoogleCloudCpp.cmake @@ -17,10 +17,8 @@ # under the License. # include(FetchContent) -include(Nlohmann) include(Abseil) -set(GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER ${NLOHMANN_JSON_INCLUDE_DIR}) set(CRC32C_USE_GLOG OFF CACHE INTERNAL crc32c-glog-off) set(CRC32C_BUILD_TESTS OFF CACHE INTERNAL crc32c-gtest-off) set(CRC32C_BUILD_BENCHMARKS OFF CACHE INTERNAL crc32-benchmarks-off) diff --git a/cmake/Nlohmann.cmake b/cmake/JsonSchemaValidator.cmake similarity index 66% copy from cmake/Nlohmann.cmake copy to cmake/JsonSchemaValidator.cmake index 9ad44c56c..6cad30dac 100644 --- a/cmake/Nlohmann.cmake +++ b/cmake/JsonSchemaValidator.cmake @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,10 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# -set(NLOHMANN_JSON_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE STRING "" FORCE) -if(NOT EXISTS "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp") - file(DOWNLOAD "https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp" "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp" - EXPECTED_HASH SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86) +include(FetchContent) + +FetchContent_Declare(json-schema-validator + URL https://github.com/pboettch/json-schema-validator/archive/2.2.0.tar.gz + URL_HASH SHA256=03897867bd757ecac1db7545babf0c6c128859655b496582a9cea4809c2260aa) + +FetchContent_MakeAvailable(json-schema-validator) + +if (NOT WIN32) + target_compile_options(nlohmann_json_schema_validator PRIVATE -Wno-error) endif() diff --git a/libminifi/CMakeLists.txt b/libminifi/CMakeLists.txt index 27d1c4f50..049e5ef41 100644 --- a/libminifi/CMakeLists.txt +++ b/libminifi/CMakeLists.txt @@ -57,7 +57,7 @@ if (NOT OPENSSL_OFF) set(TLS_SOURCES "src/utils/tls/*.cpp" "src/io/tls/*.cpp") endif() -file(GLOB SOURCES "src/agent/agent_docs.cpp" "src/agent/build_description.cpp" "src/properties/*.cpp" "src/utils/file/*.cpp" "src/sitetosite/*.cpp" "src/core/logging/*.cpp" "src/core/logging/internal/*.cpp" "src/core/logging/alert/*.cpp" "src/core/state/*.cpp" "src/core/state/nodes/*.cpp" "src/c2/protocols/*.cpp" "src/c2/triggers/*.cpp" "src/c2/*.cpp" "src/io/*.cpp" ${SOCKET_SOURCES} ${TLS_SOURCES} "src/core/controller/*.cpp" "src/controllers/*.cpp" "src/controllers/keyvalue/*.cpp" "src [...] +file(GLOB SOURCES "src/agent/*.cpp" "src/properties/*.cpp" "src/utils/file/*.cpp" "src/sitetosite/*.cpp" "src/core/logging/*.cpp" "src/core/logging/internal/*.cpp" "src/core/logging/alert/*.cpp" "src/core/state/*.cpp" "src/core/state/nodes/*.cpp" "src/c2/protocols/*.cpp" "src/c2/triggers/*.cpp" "src/c2/*.cpp" "src/io/*.cpp" ${SOCKET_SOURCES} ${TLS_SOURCES} "src/core/controller/*.cpp" "src/controllers/*.cpp" "src/controllers/keyvalue/*.cpp" "src/core/*.cpp" "src/core/repository/*.cpp" " [...] # manually add this as it might not yet be present when this executes list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/agent_version.cpp") diff --git a/libminifi/include/agent/JsonSchema.h b/libminifi/include/agent/JsonSchema.h new file mode 100644 index 000000000..bc843ee6f --- /dev/null +++ b/libminifi/include/agent/JsonSchema.h @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <string> + +namespace org::apache::nifi::minifi::docs { + +std::string generateJsonSchema(); + +} // namespace org::apache::nifi::minifi::docs diff --git a/libminifi/include/core/PropertyValue.h b/libminifi/include/core/PropertyValue.h index fc1af9f05..e40d77b53 100644 --- a/libminifi/include/core/PropertyValue.h +++ b/libminifi/include/core/PropertyValue.h @@ -106,6 +106,10 @@ class PropertyValue : public state::response::ValueNode { return convertImpl<bool>("bool"); } + operator double() const { + return convertImpl<double>("double"); + } + const char* c_str() const { if (!isValueUsable()) { throw utils::internal::InvalidValueException("Cannot convert invalid value"); diff --git a/libminifi/src/agent/JsonSchema.cpp b/libminifi/src/agent/JsonSchema.cpp new file mode 100644 index 000000000..857525e4e --- /dev/null +++ b/libminifi/src/agent/JsonSchema.cpp @@ -0,0 +1,467 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "agent/JsonSchema.h" + +#include <string> +#include <unordered_map> +#include <vector> + +#include "agent/agent_version.h" +#include "agent/build_description.h" +#include "rapidjson/document.h" +#include "rapidjson/prettywriter.h" +#include "RemoteProcessorGroupPort.h" +#include "utils/gsl.h" + +#include "range/v3/view/filter.hpp" +#include "range/v3/view/transform.hpp" +#include "range/v3/view/join.hpp" +#include "range/v3/range/conversion.hpp" + +namespace org::apache::nifi::minifi::docs { + +static std::string escape(std::string str) { + utils::StringUtils::replaceAll(str, "\\", "\\\\"); + utils::StringUtils::replaceAll(str, "\"", "\\\""); + utils::StringUtils::replaceAll(str, "\n", "\\n"); + return str; +} + +static std::string prettifyJson(const std::string& str) { + rapidjson::Document doc; + rapidjson::ParseResult res = doc.Parse(str.c_str(), str.length()); + gsl_Assert(res); + + rapidjson::StringBuffer buffer; + + rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer); + doc.Accept(writer); + + return std::string{buffer.GetString(), buffer.GetSize()}; +} + +void writePropertySchema(const core::Property& prop, std::ostream& out) { + out << "\"" << escape(prop.getName()) << "\" : {"; + out << R"("description": ")" << escape(prop.getDescription()) << "\""; + if (const auto& values = prop.getAllowedValues(); !values.empty()) { + out << R"(, "enum": [)" + << (values + | ranges::views::transform([] (auto& val) {return '"' + escape(val.to_string()) + '"';}) + | ranges::views::join(',') + | ranges::to<std::string>()) + << "]"; + } + if (const auto& def_value = prop.getDefaultValue(); !def_value.empty()) { + const auto& type = def_value.getTypeInfo(); + // order is important as both DataSizeValue and TimePeriodValue's type_id is uint64_t + if (std::dynamic_pointer_cast<core::DataSizeValue>(def_value.getValue()) + || std::dynamic_pointer_cast<core::TimePeriodValue>(def_value.getValue())) { // NOLINT(bugprone-branch-clone) + // special value types + out << R"(, "type": "string", "default": ")" << escape(def_value.to_string()) << "\""; + } else if (type == state::response::Value::INT_TYPE + || type == state::response::Value::INT64_TYPE + || type == state::response::Value::UINT32_TYPE + || type == state::response::Value::UINT64_TYPE) { + out << R"(, "type": "integer", "default": )" << static_cast<int64_t>(def_value); + } else if (type == state::response::Value::DOUBLE_TYPE) { + out << R"(, "type": "number", "default": )" << static_cast<double>(def_value); + } else if (type == state::response::Value::BOOL_TYPE) { + out << R"(, "type": "boolean", "default": )" << (static_cast<bool>(def_value) ? "true" : "false"); + } else { + out << R"(, "type": "string", "default": ")" << escape(def_value.to_string()) << "\""; + } + } else { + // no default value, no type information, fallback to string + out << R"(, "type": "string")"; + } + out << "}"; // property.getName() +} + +template<typename PropertyContainer> +void writeProperties(const PropertyContainer& props, bool supports_dynamic, std::ostream& out) { + out << R"("Properties": {)" + << R"("type": "object",)" + << R"("additionalProperties": )" << (supports_dynamic? "true" : "false") << "," + << R"("required": [)" + << (props + | ranges::views::filter([] (auto& prop) {return prop.getRequired() && prop.getDefaultValue().empty();}) + | ranges::views::transform([] (auto& prop) {return '"' + escape(prop.getName()) + '"';}) + | ranges::views::join(',') + | ranges::to<std::string>()) + << "]"; + + out << R"(, "properties": {)"; + for (size_t prop_idx = 0; prop_idx < props.size(); ++prop_idx) { + const auto& property = props[prop_idx]; + if (prop_idx != 0) out << ","; + writePropertySchema(property, out); + } + out << "}"; // "properties" + out << "}"; // "Properties" +} + +static std::string buildSchema(const std::unordered_map<std::string, std::string>& relationships, const std::string& processors, const std::string& controller_services) { + std::stringstream all_rels; + for (const auto& [name, rels] : relationships) { + all_rels << "\"relationships-" << escape(name) << "\": " << rels << ", "; + } + + std::stringstream remote_port_props; + writeProperties(minifi::RemoteProcessorGroupPort::properties(), minifi::RemoteProcessorGroupPort::SupportsDynamicProperties, remote_port_props); + + std::string process_group_properties = R"( + "Processors": { + "type": "array", + "items": {"$ref": "#/definitions/processor"} + }, + "Connections": { + "type": "array", + "items": {"$ref": "#/definitions/connection"} + }, + "Controller Services": { + "type": "array", + "items": {"$ref": "#/definitions/controller_service"} + }, + "Remote Process Groups": { + "type": "array", + "items": {"$ref": "#/definitions/remote_process_group"} + }, + "Process Groups": { + "type": "array", + "items": {"$ref": "#/definitions/simple_process_group"} + }, + "Funnels": { + "type": "array", + "items": {"$ref": "#/definitions/funnel"} + }, + "Input Ports": { + "type": "array", + "items": {"$ref": "#/definitions/port"} + }, + "Output Ports": { + "type": "array", + "items": {"$ref": "#/definitions/port"} + } + )"; + + std::stringstream cron_pattern; + { + const char* all = "\\\\*"; + const char* any = "\\\\?"; + const char* increment = "(-?[0-9]+)"; + const char* secs = "([0-5]?[0-9])"; + const char* mins = "([0-5]?[0-9])"; + const char* hours = "(1?[0-9]|2[0-3])"; + const char* days = "([1-2]?[0-9]|3[0-1])"; + const char* months = "([0-9]|1[0-2]|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)"; + const char* weekdays = "([0-7]|sun|mon|tue|wed|thu|fri|sat)"; + const char* years = "([0-9]+)"; + + auto makeCommon = [&] (const char* pattern) { + std::stringstream common; + common << all << "|" << any + << "|" << pattern << "(," << pattern << ")*" + << "|" << pattern << "-" << pattern + << "|" << "(" << all << "|" << pattern << ")" << "/" << increment; + return std::move(common).str(); + }; + + cron_pattern << "^" + << "(" << makeCommon(secs) << ")" + << " (" << makeCommon(mins) << ")" + << " (" << makeCommon(hours) << ")" + << " (" << makeCommon(days) << "|LW|L|L-" << days << "|" << days << "W" << ")" + << " (" << makeCommon(months) << ")" + << " (" << makeCommon(weekdays) << "|" << weekdays << "?L|" << weekdays << "#" << "[1-5]" << ")" + << "( (" << makeCommon(years) << "))?" + << "$"; + } + + // the schema specification does not allow case-insensitive regex + std::stringstream cron_pattern_case_insensitive; + for (char ch : cron_pattern.str()) { + if (std::isalpha(static_cast<unsigned char>(ch))) { + cron_pattern_case_insensitive << "[" + << static_cast<char>(std::tolower(static_cast<unsigned char>(ch))) + << static_cast<char>(std::toupper(static_cast<unsigned char>(ch))) + << "]"; + } else { + cron_pattern_case_insensitive << ch; + } + } + + return prettifyJson(R"( +{ + "$schema": "http://json-schema.org/draft-07/schema", + "definitions": {)" + std::move(all_rels).str() + R"( + "datasize": { + "type": "string", + "pattern": "^\\s*[0-9]+\\s*(B|K|M|G|T|P|KB|MB|GB|TB|PB)\\s*$" + }, + "uuid": { + "type": "string", + "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", + "default": "00000000-0000-0000-0000-000000000000" + }, + "cron_pattern": { + "type": "string", + "pattern": ")" + std::move(cron_pattern_case_insensitive).str() + R"(" + }, + "remote_port": { + "type": "object", + "required": ["name", "id", "Properties"], + "properties": { + "name": {"type": "string"}, + "id": {"$ref": "#/definitions/uuid"}, + "max concurrent tasks": {"type": "integer"}, + )" + std::move(remote_port_props).str() + R"( + } + }, + "port": { + "type": "object", + "required": ["name", "id"], + "properties": { + "name": {"type": "string"}, + "id": {"$ref": "#/definitions/uuid"} + } + }, + "time": { + "type": "string", + "pattern": "^\\s*[0-9]+\\s*(ns|nano|nanos|nanoseconds|nanosecond|us|micro|micros|microseconds|microsecond|msec|ms|millisecond|milliseconds|msecs|millis|milli|sec|s|second|seconds|secs|min|m|mins|minute|minutes|h|hr|hour|hrs|hours|d|day|days)\\s*$" + }, + "controller_service": {"allOf": [{ + "type": "object", + "required": ["name", "id", "class"], + "properties": { + "name": {"type": "string"}, + "class": {"type": "string"}, + "id": {"$ref": "#/definitions/uuid"} + } + }, )" + controller_services + R"(]}, + "processor": {"allOf": [{ + "type": "object", + "required": ["name", "id", "class", "scheduling strategy"], + "additionalProperties": false, + "properties": { + "name": {"type": "string"}, + "id": {"$ref": "#/definitions/uuid"}, + "class": {"type": "string"}, + "max concurrent tasks": {"type": "integer", "default": 1}, + "penalization period": {"$ref": "#/definitions/time"}, + "yield period": {"$ref": "#/definitions/time"}, + "run duration nanos": {"$ref": "#/definitions/time"}, + "Properties": {}, + "scheduling strategy": {"enum": ["EVENT_DRIVEN", "TIMER_DRIVEN", "CRON_DRIVEN"]}, + "scheduling period": {}, + "auto-terminated relationships list": { + "type": "array", + "items": { + "type": "string" + }, + "uniqueItems": true + } + }}, { + "if": {"properties": {"scheduling strategy": {"const": "EVENT_DRIVEN"}}}, + "then": {"properties": {"scheduling period": false}} + }, { + "if": {"properties": {"scheduling strategy": {"const": "TIMER_DRIVEN"}}}, + "then": {"required": ["scheduling period"], "properties": {"scheduling period": {"$ref": "#/definitions/time"}}} + }, { + "if": {"properties": {"scheduling strategy": {"const": "CRON_DRIVEN"}}}, + "then": {"required": ["scheduling period"], "properties": {"scheduling period": {"$ref": "#/definitions/cron_pattern"}}} + })" + (!processors.empty() ? ", " : "") + processors + R"(] + }, + "remote_process_group": {"allOf": [{ + "type": "object", + "required": ["name", "id", "Input Ports"], + "properties": { + "name": {"type": "string"}, + "id": {"$ref": "#/definitions/uuid"}, + "url": {"type": "string"}, + "yield period": {"$ref": "#/definitions/time"}, + "timeout": {"$ref": "#/definitions/time"}, + "local network interface": {"type": "string"}, + "transport protocol": {"enum": ["HTTP", "RAW"]}, + "Input Ports": { + "type": "array", + "items": {"$ref": "#/definitions/remote_port"} + }, + "Output Ports": { + "type": "array", + "items": {"$ref": "#/definitions/remote_port"} + } + } + }, { + "if": {"properties": {"transport protocol": {"const": "HTTP"}}}, + "then": {"properties": { + "proxy host": {"type": "string"}, + "proxy user": {"type": "string"}, + "proxy password": {"type": "string"}, + "proxy port": {"type": "integer"} + }} + }]}, + "connection": { + "type": "object", + "additionalProperties": false, + "required": ["name", "id", "source id", "source relationship names", "destination id"], + "properties": { + "name": {"type": "string"}, + "id": {"$ref": "#/definitions/uuid"}, + "source name": {"type": "string"}, + "source id": {"$ref": "#/definitions/uuid"}, + "source relationship names": { + "type": "array", + "items": {"type": "string"} + }, + "destination name": {"type": "string"}, + "destination id": {"$ref": "#/definitions/uuid"}, + "max work queue size": {"type": "integer", "default": 10000}, + "max work queue data size": {"$ref": "#/definitions/datasize", "default": "10 MB"}, + "flowfile expiration": {"$ref": "#/definitions/time", "default": "0 ms"} + } + }, + "funnel": { + "type": "object", + "required": ["id"], + "properties": { + "id": {"$ref": "#/definitions/uuid"}, + "name": {"type": "string"} + } + }, + "simple_process_group": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": {"type": "string"}, + "version": {"type": "integer"}, + "onschedule retry interval": {"$ref": "#/definitions/time"}, + )" + process_group_properties + R"( + } + }, + "root_process_group": { + "type": "object", + "required": ["Flow Controller"], + "additionalProperties": false, + "properties": { + "$schema": {"type": "string"}, + "Flow Controller": { + "type": "object", + "required": ["name"], + "properties": { + "name": {"type": "string"}, + "version": {"type": "integer"}, + "onschedule retry interval": {"$ref": "#/definitions/time"} + } + }, + )" + process_group_properties + R"( + } + } + }, + "$ref": "#/definitions/root_process_group" +} +)"); +} + +std::string generateJsonSchema() { + std::unordered_map<std::string, std::string> relationships; + std::vector<std::string> proc_schemas; + auto putProcSchema = [&] (const ClassDescription& proc) { + std::stringstream schema; + schema + << "{" + << R"("if": {"properties": {"class": {"const": ")" << escape(proc.short_name_) << "\"}}}," + << R"("then": {)" + << R"("required": ["Properties"],)" + << R"("properties": {)"; + + if (proc.isSingleThreaded_) { + schema << R"("max concurrent tasks": {"const": 1},)"; + } + + schema << R"("auto-terminated relationships list": {"items": {"$ref": "#/definitions/relationships-)" << escape(proc.short_name_) << "\"}},"; + { + std::stringstream rel_schema; + rel_schema << R"({"anyOf": [)"; + if (proc.dynamic_relationships_) { + rel_schema << R"({"type": "string"})"; + } + for (size_t rel_idx = 0; rel_idx < proc.class_relationships_.size(); ++rel_idx) { + if (rel_idx != 0 || proc.dynamic_relationships_) rel_schema << ", "; + rel_schema << R"({"const": ")" << escape(proc.class_relationships_[rel_idx].getName()) << "\"}"; + } + rel_schema << "]}"; + relationships[proc.short_name_] = std::move(rel_schema).str(); + } + + writeProperties(proc.class_properties_, proc.dynamic_properties_, schema); + + schema << "}"; // "properties" + schema << "}"; // "then" + schema << "}"; // if-block + + proc_schemas.push_back(std::move(schema).str()); + }; + + std::vector<std::string> controller_services; + auto putControllerService = [&] (const ClassDescription& service) { + std::stringstream schema; + schema + << "{" + << R"("if": {"properties": {"class": {"const": ")" << escape(service.short_name_) << "\"}}}," + << R"("then": {)" + << R"("required": ["Properties"],)" + << R"("properties": {)"; + + writeProperties(service.class_properties_, service.dynamic_properties_, schema); + + schema << "}"; // "properties" + schema << "}"; // "then" + schema << "}"; // if-block + + controller_services.push_back(std::move(schema).str()); + }; + + const auto& descriptions = AgentDocs::getClassDescriptions(); + for (const std::string& group : AgentBuild::getExtensions()) { + auto it = descriptions.find(group); + if (it == descriptions.end()) { + continue; + } + for (const auto& proc : it->second.processors_) { + putProcSchema(proc); + } + for (const auto& service : it->second.controller_services_) { + putControllerService(service); + } + } + + for (const auto& bundle : ExternalBuildDescription::getExternalGroups()) { + auto description = ExternalBuildDescription::getClassDescriptions(bundle.artifact); + for (const auto& proc : description.processors_) { + putProcSchema(proc); + } + for (const auto& service : description.controller_services_) { + putControllerService(service); + } + } + + return buildSchema(relationships, utils::StringUtils::join(", ", proc_schemas), utils::StringUtils::join(", ", controller_services)); +} + +} // namespace org::apache::nifi::minifi::docs diff --git a/cmake/Nlohmann.cmake b/libminifi/test/schema-tests/CMakeLists.txt similarity index 52% rename from cmake/Nlohmann.cmake rename to libminifi/test/schema-tests/CMakeLists.txt index 9ad44c56c..3b8dbe0c2 100644 --- a/cmake/Nlohmann.cmake +++ b/libminifi/test/schema-tests/CMakeLists.txt @@ -17,8 +17,18 @@ # under the License. # -set(NLOHMANN_JSON_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE STRING "" FORCE) -if(NOT EXISTS "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp") - file(DOWNLOAD "https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp" "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp" - EXPECTED_HASH SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86) -endif() +include(JsonSchemaValidator) + +file(GLOB SCHEMA_TESTS "*.cpp") +SET(SCHEMA_TEST_COUNT 0) +FOREACH(testfile ${SCHEMA_TESTS}) + get_filename_component(testfilename "${testfile}" NAME_WE) + add_executable("${testfilename}" "${testfile}") + createTests("${testfilename}") + target_link_libraries(${testfilename} ${CATCH_MAIN_LIB}) + target_link_libraries(${testfilename} minifi-standard-processors) + target_link_libraries(${testfilename} nlohmann_json_schema_validator) + MATH(EXPR SCHEMA_TEST_COUNT "${SCHEMA_TEST_COUNT}+1") + add_test(NAME "${testfilename}" COMMAND "${testfilename}" WORKING_DIRECTORY ${TEST_DIR}) +ENDFOREACH() +message("-- Finished building ${SCHEMA_TEST_COUNT} Json Schema related test file(s)...") diff --git a/libminifi/test/schema-tests/SchemaTests.cpp b/libminifi/test/schema-tests/SchemaTests.cpp new file mode 100644 index 000000000..d4907ec1c --- /dev/null +++ b/libminifi/test/schema-tests/SchemaTests.cpp @@ -0,0 +1,258 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fstream> + +#include "../TestBase.h" +#include "../Catch.h" +#include "../agent/JsonSchema.h" +#include "nlohmann/json-schema.hpp" +#include "utils/RegexUtils.h" +#include "utils/StringUtils.h" + +struct JsonError { + std::string path; + std::string error; +}; + +class ErrorHandler : public nlohmann::json_schema::error_handler { + public: + explicit ErrorHandler(std::function<void(const JsonError&)> handler) + : handler_(std::move(handler)) {} + void error(const nlohmann::json::json_pointer& ptr, const nlohmann::json& /*instance*/, const std::string& message) override { + handler_(JsonError{ptr.to_string(), message}); + } + + private: + std::function<void(const JsonError&)> handler_; +}; + +void extractExpectedErrors(nlohmann::json& node, const std::string& path, std::unordered_map<std::string, std::string>& errors) { + if (node.is_object() && node.contains("$err")) { + errors[path] = node["$err"].get<std::string>(); + node = node["$value"]; + } + if (node.is_object() || node.is_array()) { + for (auto& [key, val] : node.items()) { + extractExpectedErrors(val, utils::StringUtils::join_pack(path, "/", key), errors); + } + } +} + +TEST_CASE("The generated JSON schema matches a valid json flow") { + const nlohmann::json config_schema = nlohmann::json::parse(minifi::docs::generateJsonSchema()); + + nlohmann::json_schema::json_validator validator; + validator.set_root_schema(config_schema); + + auto config_json = R"( + { + "Flow Controller": {"name": "Test"}, + "Processors": [ + { + "id": "00000000-0000-0000-0000-000000000000", + "class": "GenerateFlowFile", + "name": "Proc1", + "scheduling strategy": "TIMER_DRIVEN", + "scheduling period": "1 min", + "Properties": {} + } + ], + "Connections": [ + { + "id": "00000000-0000-0000-0000-000000000000", + "name": "Conn1", + "source id": "00000000-0000-0000-0000-000000000000", + "source relationship names": ["success"], + "destination id": "00000000-0000-0000-0000-000000000000" + } + ], + "Input Ports": [ + {"id": "00000000-0000-0000-0000-000000000000", "name": "In1"} + ], + "Output Ports": [ + {"id": "00000000-0000-0000-0000-000000000000", "name": "Out1"} + ], + "Funnels": [ + {"id": "00000000-0000-0000-0000-000000000000", "name": "Fun1"} + ], + "Process Groups": [ + { + "name": "Group1", + "Processors": [], + "Connections": [], + "Process Groups": [] + } + ], + "Remote Process Groups": [ + { + "id": "00000000-0000-0000-0000-000000000000", + "name": "RPG1", + "Input Ports": [{ + "id": "00000000-0000-0000-0000-000000000000", + "name": "RIn1", + "Properties": { + "Host Name": "localhost" + } + }] + } + ], + "Controller Services": [ + { + "id": "00000000-0000-0000-0000-000000000000", + "class": "SSLContextService", + "name": "Service1", + "Properties": { + "Client Certificate": "", + "Private Key": "", + "Passphrase": "", + "CA Certificate": "" + } + } + ] + } + )"_json; + + validator.validate(config_json); +} + +TEST_CASE("The JSON schema detects invalid values in the json flow") { + const nlohmann::json config_schema = nlohmann::json::parse(minifi::docs::generateJsonSchema()); + + nlohmann::json_schema::json_validator validator; + validator.set_root_schema(config_schema); + + // the objects of type {"$err": <error>, "$value": <value>} are special + // in the sense that they are preprocessed, replaced by <value> and we expect + // a validation error at the position of this object that matches the + // regex <error> + auto config_json = R"( + { + "Flow Controller": {"name": "Test"}, + "Processors": [ + {"$err": "property 'scheduling period' not found", "$value": { + "id": "00000000-0000-0000-0000-000000000000", + "class": "GenerateFlowFile", + "name": "Proc1", + "scheduling strategy": "TIMER_DRIVEN", + "Properties": {} + }}, + { + "id": "00000000-0000-0000-0000-000000000000", + "class": "GenerateFlowFile", + "name": "Proc1", + "scheduling strategy": "TIMER_DRIVEN", + "scheduling period": "1 min", + "Properties": { + "Batch Size": {"$value": "not a number", "$err": "unexpected instance type"} + } + }, + { + "id": "00000000-0000-0000-0000-000000000000", + "class": "GenerateFlowFile", + "name": "Proc1", + "scheduling strategy": "TIMER_DRIVEN", + "scheduling period": "1 min", + "Properties": {"$err": "", "$value": { + "No such property": 5 + }} + } + ], + "Connections": [ + {"$err": "property 'name' not found", "$value": { + "id": {"$value": "00000000-0000-0000-0000-00000000000", "$err": ""}, + "source id": "00000000-0000-0000-0000-000000000000", + "source relationship names": ["success"], + "destination id": "00000000-0000-0000-0000-000000000000" + }} + ], + "Input Ports": [ + {"id": "00000000-0000-0000-0000-000000000000", "name": {"$value": 5, "$err": "unexpected instance type"}} + ], + "Output Ports": [ + {"id": "00000000-0000-0000-0000-000000000000", "name": "Out1"} + ], + "Funnels": [ + {"id": {"$value": 5, "$err": "unexpected instance type"}} + ], + "Process Groups": [ + {"$err": "", "$value": { + "name": "Group1", + "no such property": [] + }} + ], + "Remote Process Groups": [ + {"$err": "property 'Input Ports' not found", "$value": { + "id": "00000000-0000-0000-0000-000000000000", + "name": "RPG1" + }} + ], + "Controller Services": [ + { + "id": {"$value": "00000000-0000-0000-0000-00000000000", "$err": ""}, + "class": "SSLContextService", + "name": "Service1", + "Properties": { + "Client Certificate": 6, + "Private Key": "", + "Passphrase": "", + "CA Certificate": "" + } + }, + { + "id": "00000000-0000-0000-0000-000000000001", + "class": "SSLContextService", + "name": "Service1", + "Properties": { + "Client Certificate": {"$value": 6, "$err": "unexpected instance type"}, + "Private Key": "", + "Passphrase": "", + "CA Certificate": "" + } + }, + {"$value": "kenyer", "$err": "unexpected instance type"} + ] + } + )"_json; + + std::unordered_map<std::string, std::string> errors; + extractExpectedErrors(config_json, "", errors); + + ErrorHandler err_handler{[&] (auto err) { + auto it = errors.find(err.path); + if (it == errors.end()) { + throw std::logic_error("Unexpected error in json flow at " + err.path + ": " + err.error); + } + if (!it->second.empty()) { + minifi::utils::Regex re(it->second); + if (!minifi::utils::regexSearch(err.error, re)) { + throw std::logic_error("Error in json flow at " + err.path + " does not match expected pattern, expected: '" + it->second + "', actual: " + err.error); + } + } + errors.erase(it); + }}; + validator.validate(config_json, err_handler); + + // all expected errors should have been processed + if (!errors.empty()) { + for (const auto& [path, err] : errors) { + std::cerr << "Expected error at " << path << ": " << err << std::endl; + } + throw std::logic_error("There were some expected errors that did not occur"); + } +} diff --git a/minifi_main/MiNiFiMain.cpp b/minifi_main/MiNiFiMain.cpp index 8dee11d0f..b41935130 100644 --- a/minifi_main/MiNiFiMain.cpp +++ b/minifi_main/MiNiFiMain.cpp @@ -63,6 +63,7 @@ #include "FlowController.h" #include "AgentDocs.h" #include "MainHelper.h" +#include "agent/JsonSchema.h" namespace minifi = org::apache::nifi::minifi; namespace core = minifi::core; @@ -119,6 +120,15 @@ void dumpDocs(const std::shared_ptr<minifi::Configure> &configuration, const std docsCreator.generate(dir, out); } +void writeJsonSchema(const std::shared_ptr<minifi::Configure> &configuration, std::ostream& out) { + auto pythoncreator = core::ClassLoader::getDefaultClassLoader().instantiate("PythonCreator", "PythonCreator"); + if (nullptr != pythoncreator) { + pythoncreator->configure(configuration); + } + + out << minifi::docs::generateJsonSchema(); +} + int main(int argc, char **argv) { #ifdef WIN32 RunAsServiceIfNeeded(); @@ -252,7 +262,7 @@ int main(int argc, char **argv) { exit(1); } - std::cerr << "Dumping docs to " << argv[2] << std::endl; + std::cout << "Dumping docs to " << argv[2] << std::endl; if (argc == 4) { std::string filepath; std::string filename; @@ -269,6 +279,21 @@ int main(int argc, char **argv) { exit(0); } + if (argc >= 2 && std::string("schema") == argv[1]) { + if (argc != 3) { + std::cerr << "Malformed schema command, expected '<minifiexe> schema <output-file>'" << std::endl; + std::exit(1); + } + + std::cout << "Writing json schema to " << argv[2] << std::endl; + + { + std::ofstream schema_file{argv[2]}; + writeJsonSchema(configure, schema_file); + } + std::exit(0); + } + if (configure->get(minifi::Configure::nifi_graceful_shutdown_seconds, graceful_shutdown_seconds)) { try { stop_wait_time = std::stoi(graceful_shutdown_seconds); diff --git a/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch b/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch index 56d951a85..3c48b7d21 100644 --- a/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch +++ b/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch @@ -1,13 +1,26 @@ -diff --git a/cmake/IncludeNlohmannJson.cmake b/cmake/IncludeNlohmannJson.cmake -index db8056ae0..613f18b97 100644 ---- a/cmake/IncludeNlohmannJson.cmake -+++ b/cmake/IncludeNlohmannJson.cmake -@@ -23,7 +23,7 @@ function (find_nlohmann_json) - # library that is all we need. - find_path(GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER "nlohmann/json.hpp" - REQUIRED) +diff -rupN orig/cmake/IncludeNlohmannJson.cmake patched/cmake/IncludeNlohmannJson.cmake +--- google-cloud-cpp-1.37.0/cmake/IncludeNlohmannJson.cmake 2022-03-01 19:09:39.000000000 +0100 ++++ google-cloud-cpp-1.37.0-new/cmake/IncludeNlohmannJson.cmake 2022-12-09 10:32:45.000000000 +0100 +@@ -14,21 +14,4 @@ + # limitations under the License. + # ~~~ + +-function (find_nlohmann_json) +- find_package(nlohmann_json CONFIG QUIET) +- if (nlohmann_json_FOUND) +- return() +- endif () +- # As a fall back, try finding the header. Since this is a header-only +- # library that is all we need. +- find_path(GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER "nlohmann/json.hpp" +- REQUIRED) - add_library(nlohmann_json::nlohmann_json UNKNOWN IMPORTED) -+ add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED) - set_property( - TARGET nlohmann_json::nlohmann_json - APPEND +- set_property( +- TARGET nlohmann_json::nlohmann_json +- APPEND +- PROPERTY INTERFACE_INCLUDE_DIRECTORIES +- ${GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER}) +-endfunction () +- +-find_nlohmann_json() ++find_package(nlohmann_json REQUIRED)
