This is an automated email from the ASF dual-hosted git repository.

lordgamez pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit f4611e542674f65d5a8d7d115bd528179261e67f
Author: Adam Debreceni <[email protected]>
AuthorDate: Tue Aug 23 17:13:56 2022 +0200

    MINIFICPP-1917 - Add json schema generation
    
    Co-authored-by: Martin Zink <[email protected]>
    Signed-off-by: Gabor Gyimesi <[email protected]>
    
    This closes #1413
---
 CMakeLists.txt                                     |   2 +
 cmake/{Nlohmann.cmake => Findnlohmann_json.cmake}  |  17 +-
 cmake/GoogleCloudCpp.cmake                         |   2 -
 .../{Nlohmann.cmake => JsonSchemaValidator.cmake}  |  16 +-
 libminifi/CMakeLists.txt                           |   2 +-
 libminifi/include/agent/JsonSchema.h               |  26 ++
 libminifi/include/core/PropertyValue.h             |   4 +
 libminifi/src/agent/JsonSchema.cpp                 | 467 +++++++++++++++++++++
 .../test/schema-tests/CMakeLists.txt               |  20 +-
 libminifi/test/schema-tests/SchemaTests.cpp        | 258 ++++++++++++
 minifi_main/MiNiFiMain.cpp                         |  27 +-
 .../nlohmann_lib_as_interface.patch                |  37 +-
 12 files changed, 847 insertions(+), 31 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cb79627e9..b049ec65e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -610,6 +610,8 @@ registerTest("${TEST_DIR}/keyvalue-tests")
 
 registerTest("${TEST_DIR}/flow-tests")
 
+registerTest("${TEST_DIR}/schema-tests")
+
 if (NOT DISABLE_ROCKSDB AND NOT DISABLE_LIBARCHIVE)
     registerTest("${TEST_DIR}/persistence-tests")
 endif()
diff --git a/cmake/Nlohmann.cmake b/cmake/Findnlohmann_json.cmake
similarity index 51%
copy from cmake/Nlohmann.cmake
copy to cmake/Findnlohmann_json.cmake
index 9ad44c56c..e81fd9659 100644
--- a/cmake/Nlohmann.cmake
+++ b/cmake/Findnlohmann_json.cmake
@@ -17,8 +17,17 @@
 # under the License.
 #
 
-set(NLOHMANN_JSON_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE 
STRING "" FORCE)
-if(NOT EXISTS "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp")
-    file(DOWNLOAD 
"https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp"; 
"${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp"
-            EXPECTED_HASH 
SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86)
+if (NOT nlohmann_json_FOUND)
+    set(nlohmann_json_FOUND "YES" CACHE STRING "" FORCE)
+    set(nlohmann_json_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE 
STRING "" FORCE)
+    if(NOT EXISTS "${nlohmann_json_INCLUDE_DIR}/nlohmann/json.hpp")
+        file(DOWNLOAD 
"https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp"; 
"${nlohmann_json_INCLUDE_DIR}/nlohmann/json.hpp"
+                EXPECTED_HASH 
SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86)
+    endif()
+endif()
+
+if(NOT TARGET nlohmann_json::nlohmann_json)
+    add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED)
+    set_target_properties(nlohmann_json::nlohmann_json PROPERTIES
+        INTERFACE_INCLUDE_DIRECTORIES "${nlohmann_json_INCLUDE_DIR}")
 endif()
diff --git a/cmake/GoogleCloudCpp.cmake b/cmake/GoogleCloudCpp.cmake
index 4aa57040b..71d08c4c2 100644
--- a/cmake/GoogleCloudCpp.cmake
+++ b/cmake/GoogleCloudCpp.cmake
@@ -17,10 +17,8 @@
 # under the License.
 #
 include(FetchContent)
-include(Nlohmann)
 include(Abseil)
 
-set(GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER ${NLOHMANN_JSON_INCLUDE_DIR})
 set(CRC32C_USE_GLOG OFF CACHE INTERNAL crc32c-glog-off)
 set(CRC32C_BUILD_TESTS OFF CACHE INTERNAL crc32c-gtest-off)
 set(CRC32C_BUILD_BENCHMARKS OFF CACHE INTERNAL crc32-benchmarks-off)
diff --git a/cmake/Nlohmann.cmake b/cmake/JsonSchemaValidator.cmake
similarity index 66%
copy from cmake/Nlohmann.cmake
copy to cmake/JsonSchemaValidator.cmake
index 9ad44c56c..6cad30dac 100644
--- a/cmake/Nlohmann.cmake
+++ b/cmake/JsonSchemaValidator.cmake
@@ -1,4 +1,3 @@
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -15,10 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-#
 
-set(NLOHMANN_JSON_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE 
STRING "" FORCE)
-if(NOT EXISTS "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp")
-    file(DOWNLOAD 
"https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp"; 
"${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp"
-            EXPECTED_HASH 
SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86)
+include(FetchContent)
+
+FetchContent_Declare(json-schema-validator
+    URL https://github.com/pboettch/json-schema-validator/archive/2.2.0.tar.gz
+    URL_HASH 
SHA256=03897867bd757ecac1db7545babf0c6c128859655b496582a9cea4809c2260aa)
+
+FetchContent_MakeAvailable(json-schema-validator)
+
+if (NOT WIN32)
+  target_compile_options(nlohmann_json_schema_validator PRIVATE -Wno-error)
 endif()
diff --git a/libminifi/CMakeLists.txt b/libminifi/CMakeLists.txt
index 27d1c4f50..049e5ef41 100644
--- a/libminifi/CMakeLists.txt
+++ b/libminifi/CMakeLists.txt
@@ -57,7 +57,7 @@ if (NOT OPENSSL_OFF)
     set(TLS_SOURCES "src/utils/tls/*.cpp" "src/io/tls/*.cpp")
 endif()
 
-file(GLOB SOURCES "src/agent/agent_docs.cpp" "src/agent/build_description.cpp" 
"src/properties/*.cpp" "src/utils/file/*.cpp" "src/sitetosite/*.cpp"  
"src/core/logging/*.cpp" "src/core/logging/internal/*.cpp" 
"src/core/logging/alert/*.cpp" "src/core/state/*.cpp" 
"src/core/state/nodes/*.cpp" "src/c2/protocols/*.cpp" "src/c2/triggers/*.cpp" 
"src/c2/*.cpp" "src/io/*.cpp" ${SOCKET_SOURCES} ${TLS_SOURCES} 
"src/core/controller/*.cpp" "src/controllers/*.cpp" 
"src/controllers/keyvalue/*.cpp" "src [...]
+file(GLOB SOURCES "src/agent/*.cpp" "src/properties/*.cpp" 
"src/utils/file/*.cpp" "src/sitetosite/*.cpp"  "src/core/logging/*.cpp" 
"src/core/logging/internal/*.cpp" "src/core/logging/alert/*.cpp" 
"src/core/state/*.cpp" "src/core/state/nodes/*.cpp" "src/c2/protocols/*.cpp" 
"src/c2/triggers/*.cpp" "src/c2/*.cpp" "src/io/*.cpp" ${SOCKET_SOURCES} 
${TLS_SOURCES} "src/core/controller/*.cpp" "src/controllers/*.cpp" 
"src/controllers/keyvalue/*.cpp" "src/core/*.cpp"  "src/core/repository/*.cpp" 
" [...]
 # manually add this as it might not yet be present when this executes
 list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/agent_version.cpp")
 
diff --git a/libminifi/include/agent/JsonSchema.h 
b/libminifi/include/agent/JsonSchema.h
new file mode 100644
index 000000000..bc843ee6f
--- /dev/null
+++ b/libminifi/include/agent/JsonSchema.h
@@ -0,0 +1,26 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+namespace org::apache::nifi::minifi::docs {
+
+std::string generateJsonSchema();
+
+}  // namespace org::apache::nifi::minifi::docs
diff --git a/libminifi/include/core/PropertyValue.h 
b/libminifi/include/core/PropertyValue.h
index fc1af9f05..e40d77b53 100644
--- a/libminifi/include/core/PropertyValue.h
+++ b/libminifi/include/core/PropertyValue.h
@@ -106,6 +106,10 @@ class PropertyValue : public state::response::ValueNode {
     return convertImpl<bool>("bool");
   }
 
+  operator double() const {
+    return convertImpl<double>("double");
+  }
+
   const char* c_str() const {
     if (!isValueUsable()) {
       throw utils::internal::InvalidValueException("Cannot convert invalid 
value");
diff --git a/libminifi/src/agent/JsonSchema.cpp 
b/libminifi/src/agent/JsonSchema.cpp
new file mode 100644
index 000000000..857525e4e
--- /dev/null
+++ b/libminifi/src/agent/JsonSchema.cpp
@@ -0,0 +1,467 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "agent/JsonSchema.h"
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "agent/agent_version.h"
+#include "agent/build_description.h"
+#include "rapidjson/document.h"
+#include "rapidjson/prettywriter.h"
+#include "RemoteProcessorGroupPort.h"
+#include "utils/gsl.h"
+
+#include "range/v3/view/filter.hpp"
+#include "range/v3/view/transform.hpp"
+#include "range/v3/view/join.hpp"
+#include "range/v3/range/conversion.hpp"
+
+namespace org::apache::nifi::minifi::docs {
+
+static std::string escape(std::string str) {
+  utils::StringUtils::replaceAll(str, "\\", "\\\\");
+  utils::StringUtils::replaceAll(str, "\"", "\\\"");
+  utils::StringUtils::replaceAll(str, "\n", "\\n");
+  return str;
+}
+
+static std::string prettifyJson(const std::string& str) {
+  rapidjson::Document doc;
+  rapidjson::ParseResult res = doc.Parse(str.c_str(), str.length());
+  gsl_Assert(res);
+
+  rapidjson::StringBuffer buffer;
+
+  rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
+  doc.Accept(writer);
+
+  return std::string{buffer.GetString(), buffer.GetSize()};
+}
+
+void writePropertySchema(const core::Property& prop, std::ostream& out) {
+  out << "\"" << escape(prop.getName()) << "\" : {";
+  out << R"("description": ")" << escape(prop.getDescription()) << "\"";
+  if (const auto& values = prop.getAllowedValues(); !values.empty()) {
+    out << R"(, "enum": [)"
+        << (values
+            | ranges::views::transform([] (auto& val) {return '"' + 
escape(val.to_string()) + '"';})
+            | ranges::views::join(',')
+            | ranges::to<std::string>())
+        << "]";
+  }
+  if (const auto& def_value = prop.getDefaultValue(); !def_value.empty()) {
+    const auto& type = def_value.getTypeInfo();
+    // order is important as both DataSizeValue and TimePeriodValue's type_id 
is uint64_t
+    if (std::dynamic_pointer_cast<core::DataSizeValue>(def_value.getValue())
+        || 
std::dynamic_pointer_cast<core::TimePeriodValue>(def_value.getValue())) {  // 
NOLINT(bugprone-branch-clone)
+      // special value types
+      out << R"(, "type": "string", "default": ")" << 
escape(def_value.to_string()) << "\"";
+    } else if (type == state::response::Value::INT_TYPE
+        || type == state::response::Value::INT64_TYPE
+        || type == state::response::Value::UINT32_TYPE
+        || type == state::response::Value::UINT64_TYPE) {
+      out << R"(, "type": "integer", "default": )" << 
static_cast<int64_t>(def_value);
+    } else if (type == state::response::Value::DOUBLE_TYPE) {
+      out << R"(, "type": "number", "default": )" << 
static_cast<double>(def_value);
+    } else if (type == state::response::Value::BOOL_TYPE) {
+      out << R"(, "type": "boolean", "default": )" << 
(static_cast<bool>(def_value) ? "true" : "false");
+    } else {
+      out << R"(, "type": "string", "default": ")" << 
escape(def_value.to_string()) << "\"";
+    }
+  } else {
+    // no default value, no type information, fallback to string
+    out << R"(, "type": "string")";
+  }
+  out << "}";  // property.getName()
+}
+
+template<typename PropertyContainer>
+void writeProperties(const PropertyContainer& props, bool supports_dynamic, 
std::ostream& out) {
+  out << R"("Properties": {)"
+        << R"("type": "object",)"
+        << R"("additionalProperties": )" << (supports_dynamic? "true" : 
"false") << ","
+        << R"("required": [)"
+        << (props
+            | ranges::views::filter([] (auto& prop) {return prop.getRequired() 
&& prop.getDefaultValue().empty();})
+            | ranges::views::transform([] (auto& prop) {return '"' + 
escape(prop.getName()) + '"';})
+            | ranges::views::join(',')
+            | ranges::to<std::string>())
+        << "]";
+
+  out << R"(, "properties": {)";
+  for (size_t prop_idx = 0; prop_idx < props.size(); ++prop_idx) {
+    const auto& property = props[prop_idx];
+    if (prop_idx != 0) out << ",";
+    writePropertySchema(property, out);
+  }
+  out << "}";  // "properties"
+  out << "}";  // "Properties"
+}
+
+static std::string buildSchema(const std::unordered_map<std::string, 
std::string>& relationships, const std::string& processors, const std::string& 
controller_services) {
+  std::stringstream all_rels;
+  for (const auto& [name, rels] : relationships) {
+    all_rels << "\"relationships-" << escape(name) << "\": " << rels << ", ";
+  }
+
+  std::stringstream remote_port_props;
+  writeProperties(minifi::RemoteProcessorGroupPort::properties(), 
minifi::RemoteProcessorGroupPort::SupportsDynamicProperties, remote_port_props);
+
+  std::string process_group_properties = R"(
+    "Processors": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/processor"}
+    },
+    "Connections": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/connection"}
+    },
+    "Controller Services": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/controller_service"}
+    },
+    "Remote Process Groups": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/remote_process_group"}
+    },
+    "Process Groups": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/simple_process_group"}
+    },
+    "Funnels": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/funnel"}
+    },
+    "Input Ports": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/port"}
+    },
+    "Output Ports": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/port"}
+    }
+  )";
+
+  std::stringstream cron_pattern;
+  {
+    const char* all = "\\\\*";
+    const char* any = "\\\\?";
+    const char* increment = "(-?[0-9]+)";
+    const char* secs = "([0-5]?[0-9])";
+    const char* mins = "([0-5]?[0-9])";
+    const char* hours = "(1?[0-9]|2[0-3])";
+    const char* days = "([1-2]?[0-9]|3[0-1])";
+    const char* months = 
"([0-9]|1[0-2]|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)";
+    const char* weekdays = "([0-7]|sun|mon|tue|wed|thu|fri|sat)";
+    const char* years = "([0-9]+)";
+
+    auto makeCommon = [&] (const char* pattern) {
+      std::stringstream common;
+      common << all << "|" << any
+        << "|" << pattern << "(," << pattern << ")*"
+        << "|" << pattern << "-" << pattern
+        << "|" << "(" << all << "|" << pattern << ")" << "/" << increment;
+      return std::move(common).str();
+    };
+
+    cron_pattern << "^"
+      << "(" << makeCommon(secs) << ")"
+      << " (" << makeCommon(mins) << ")"
+      << " (" << makeCommon(hours) << ")"
+      << " (" << makeCommon(days) << "|LW|L|L-" << days << "|" << days << "W" 
<< ")"
+      << " (" << makeCommon(months) << ")"
+      << " (" << makeCommon(weekdays) << "|" << weekdays << "?L|" << weekdays 
<< "#" << "[1-5]" << ")"
+      << "( (" << makeCommon(years) << "))?"
+      << "$";
+  }
+
+  // the schema specification does not allow case-insensitive regex
+  std::stringstream cron_pattern_case_insensitive;
+  for (char ch : cron_pattern.str()) {
+    if (std::isalpha(static_cast<unsigned char>(ch))) {
+      cron_pattern_case_insensitive << "["
+          << static_cast<char>(std::tolower(static_cast<unsigned char>(ch)))
+          << static_cast<char>(std::toupper(static_cast<unsigned char>(ch)))
+          << "]";
+    } else {
+      cron_pattern_case_insensitive << ch;
+    }
+  }
+
+  return prettifyJson(R"(
+{
+  "$schema": "http://json-schema.org/draft-07/schema";,
+  "definitions": {)" + std::move(all_rels).str() + R"(
+    "datasize": {
+      "type": "string",
+      "pattern": "^\\s*[0-9]+\\s*(B|K|M|G|T|P|KB|MB|GB|TB|PB)\\s*$"
+    },
+    "uuid": {
+      "type": "string",
+      "pattern": 
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$",
+      "default": "00000000-0000-0000-0000-000000000000"
+    },
+    "cron_pattern": {
+      "type": "string",
+      "pattern": ")" + std::move(cron_pattern_case_insensitive).str() + R"("
+    },
+    "remote_port": {
+      "type": "object",
+      "required": ["name", "id", "Properties"],
+      "properties": {
+        "name": {"type": "string"},
+        "id": {"$ref": "#/definitions/uuid"},
+        "max concurrent tasks": {"type": "integer"},
+        )" + std::move(remote_port_props).str() +  R"(
+      }
+    },
+    "port": {
+      "type": "object",
+      "required": ["name", "id"],
+      "properties": {
+        "name": {"type": "string"},
+        "id": {"$ref": "#/definitions/uuid"}
+      }
+    },
+    "time": {
+      "type": "string",
+      "pattern": 
"^\\s*[0-9]+\\s*(ns|nano|nanos|nanoseconds|nanosecond|us|micro|micros|microseconds|microsecond|msec|ms|millisecond|milliseconds|msecs|millis|milli|sec|s|second|seconds|secs|min|m|mins|minute|minutes|h|hr|hour|hrs|hours|d|day|days)\\s*$"
+    },
+    "controller_service": {"allOf": [{
+      "type": "object",
+      "required": ["name", "id", "class"],
+      "properties": {
+        "name": {"type": "string"},
+        "class": {"type": "string"},
+        "id": {"$ref": "#/definitions/uuid"}
+      }
+    }, )" + controller_services + R"(]},
+    "processor": {"allOf": [{
+      "type": "object",
+      "required": ["name", "id", "class", "scheduling strategy"],
+      "additionalProperties": false,
+      "properties": {
+        "name": {"type": "string"},
+        "id": {"$ref": "#/definitions/uuid"},
+        "class": {"type": "string"},
+        "max concurrent tasks": {"type": "integer", "default": 1},
+        "penalization period": {"$ref": "#/definitions/time"},
+        "yield period": {"$ref": "#/definitions/time"},
+        "run duration nanos": {"$ref": "#/definitions/time"},
+        "Properties": {},
+        "scheduling strategy": {"enum": ["EVENT_DRIVEN", "TIMER_DRIVEN", 
"CRON_DRIVEN"]},
+        "scheduling period": {},
+        "auto-terminated relationships list": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "uniqueItems": true
+        }
+      }}, {
+        "if": {"properties": {"scheduling strategy": {"const": 
"EVENT_DRIVEN"}}},
+        "then": {"properties": {"scheduling period": false}}
+      }, {
+        "if": {"properties": {"scheduling strategy": {"const": 
"TIMER_DRIVEN"}}},
+        "then": {"required": ["scheduling period"], "properties": {"scheduling 
period": {"$ref": "#/definitions/time"}}}
+      }, {
+        "if": {"properties": {"scheduling strategy": {"const": 
"CRON_DRIVEN"}}},
+        "then": {"required": ["scheduling period"], "properties": {"scheduling 
period": {"$ref": "#/definitions/cron_pattern"}}}
+      })" + (!processors.empty() ? ", " : "") + processors + R"(]
+    },
+    "remote_process_group": {"allOf": [{
+      "type": "object",
+      "required": ["name", "id", "Input Ports"],
+      "properties": {
+        "name": {"type": "string"},
+        "id": {"$ref": "#/definitions/uuid"},
+        "url": {"type": "string"},
+        "yield period": {"$ref": "#/definitions/time"},
+        "timeout": {"$ref": "#/definitions/time"},
+        "local network interface": {"type": "string"},
+        "transport protocol": {"enum": ["HTTP", "RAW"]},
+        "Input Ports": {
+          "type": "array",
+          "items": {"$ref": "#/definitions/remote_port"}
+        },
+        "Output Ports": {
+          "type": "array",
+          "items": {"$ref": "#/definitions/remote_port"}
+        }
+      }
+    }, {
+      "if": {"properties": {"transport protocol": {"const": "HTTP"}}},
+      "then": {"properties": {
+        "proxy host": {"type": "string"},
+        "proxy user": {"type": "string"},
+        "proxy password": {"type": "string"},
+        "proxy port": {"type": "integer"}
+      }}
+    }]},
+    "connection": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["name", "id", "source id", "source relationship names", 
"destination id"],
+      "properties": {
+        "name": {"type": "string"},
+        "id": {"$ref": "#/definitions/uuid"},
+        "source name": {"type": "string"},
+        "source id": {"$ref": "#/definitions/uuid"},
+        "source relationship names": {
+          "type": "array",
+          "items": {"type": "string"}
+        },
+        "destination name": {"type": "string"},
+        "destination id": {"$ref": "#/definitions/uuid"},
+        "max work queue size": {"type": "integer", "default": 10000},
+        "max work queue data size": {"$ref": "#/definitions/datasize", 
"default": "10 MB"},
+        "flowfile expiration": {"$ref": "#/definitions/time", "default": "0 
ms"}
+      }
+    },
+    "funnel": {
+      "type": "object",
+      "required": ["id"],
+      "properties": {
+        "id": {"$ref": "#/definitions/uuid"},
+        "name": {"type": "string"}
+      }
+    },
+    "simple_process_group": {
+      "type": "object",
+      "required": ["name"],
+      "additionalProperties": false,
+      "properties": {
+        "name": {"type": "string"},
+        "version": {"type": "integer"},
+        "onschedule retry interval": {"$ref": "#/definitions/time"},
+        )" + process_group_properties + R"(
+      }
+    },
+    "root_process_group": {
+      "type": "object",
+      "required": ["Flow Controller"],
+      "additionalProperties": false,
+      "properties": {
+        "$schema": {"type": "string"},
+        "Flow Controller": {
+          "type": "object",
+          "required": ["name"],
+          "properties": {
+            "name": {"type": "string"},
+            "version": {"type": "integer"},
+            "onschedule retry interval": {"$ref": "#/definitions/time"}
+          }
+        },
+        )" + process_group_properties + R"(
+      }
+    }
+  },
+  "$ref": "#/definitions/root_process_group"
+}
+)");
+}
+
+std::string generateJsonSchema() {
+  std::unordered_map<std::string, std::string> relationships;
+  std::vector<std::string> proc_schemas;
+  auto putProcSchema = [&] (const ClassDescription& proc) {
+    std::stringstream schema;
+    schema
+        << "{"
+        << R"("if": {"properties": {"class": {"const": ")" << 
escape(proc.short_name_) << "\"}}},"
+        << R"("then": {)"
+        << R"("required": ["Properties"],)"
+        << R"("properties": {)";
+
+    if (proc.isSingleThreaded_) {
+      schema << R"("max concurrent tasks": {"const": 1},)";
+    }
+
+    schema << R"("auto-terminated relationships list": {"items": {"$ref": 
"#/definitions/relationships-)" << escape(proc.short_name_) << "\"}},";
+    {
+      std::stringstream rel_schema;
+      rel_schema << R"({"anyOf": [)";
+      if (proc.dynamic_relationships_) {
+        rel_schema << R"({"type": "string"})";
+      }
+      for (size_t rel_idx = 0; rel_idx < proc.class_relationships_.size(); 
++rel_idx) {
+        if (rel_idx != 0 || proc.dynamic_relationships_) rel_schema << ", ";
+        rel_schema << R"({"const": ")" << 
escape(proc.class_relationships_[rel_idx].getName()) << "\"}";
+      }
+      rel_schema << "]}";
+      relationships[proc.short_name_] = std::move(rel_schema).str();
+    }
+
+    writeProperties(proc.class_properties_, proc.dynamic_properties_, schema);
+
+    schema << "}";  // "properties"
+    schema << "}";  // "then"
+    schema << "}";  // if-block
+
+    proc_schemas.push_back(std::move(schema).str());
+  };
+
+  std::vector<std::string> controller_services;
+  auto putControllerService = [&] (const ClassDescription& service) {
+    std::stringstream schema;
+    schema
+        << "{"
+        << R"("if": {"properties": {"class": {"const": ")" << 
escape(service.short_name_) << "\"}}},"
+        << R"("then": {)"
+        << R"("required": ["Properties"],)"
+        << R"("properties": {)";
+
+    writeProperties(service.class_properties_, service.dynamic_properties_, 
schema);
+
+    schema << "}";  // "properties"
+    schema << "}";  // "then"
+    schema << "}";  // if-block
+
+    controller_services.push_back(std::move(schema).str());
+  };
+
+  const auto& descriptions = AgentDocs::getClassDescriptions();
+  for (const std::string& group : AgentBuild::getExtensions()) {
+    auto it = descriptions.find(group);
+    if (it == descriptions.end()) {
+      continue;
+    }
+    for (const auto& proc : it->second.processors_) {
+      putProcSchema(proc);
+    }
+    for (const auto& service : it->second.controller_services_) {
+      putControllerService(service);
+    }
+  }
+
+  for (const auto& bundle : ExternalBuildDescription::getExternalGroups()) {
+    auto description = 
ExternalBuildDescription::getClassDescriptions(bundle.artifact);
+    for (const auto& proc : description.processors_) {
+      putProcSchema(proc);
+    }
+    for (const auto& service : description.controller_services_) {
+      putControllerService(service);
+    }
+  }
+
+  return buildSchema(relationships, utils::StringUtils::join(", ", 
proc_schemas), utils::StringUtils::join(", ", controller_services));
+}
+
+}  // namespace org::apache::nifi::minifi::docs
diff --git a/cmake/Nlohmann.cmake b/libminifi/test/schema-tests/CMakeLists.txt
similarity index 52%
rename from cmake/Nlohmann.cmake
rename to libminifi/test/schema-tests/CMakeLists.txt
index 9ad44c56c..3b8dbe0c2 100644
--- a/cmake/Nlohmann.cmake
+++ b/libminifi/test/schema-tests/CMakeLists.txt
@@ -17,8 +17,18 @@
 # under the License.
 #
 
-set(NLOHMANN_JSON_INCLUDE_DIR "${CMAKE_BINARY_DIR}/_deps/nlohmann/" CACHE 
STRING "" FORCE)
-if(NOT EXISTS "${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp")
-    file(DOWNLOAD 
"https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp"; 
"${NLOHMANN_JSON_INCLUDE_DIR}/nlohmann/json.hpp"
-            EXPECTED_HASH 
SHA256=e832d339d9e0c042e7dff807754769d778cf5d6ae9730ce21eed56de99cb5e86)
-endif()
+include(JsonSchemaValidator)
+
+file(GLOB SCHEMA_TESTS  "*.cpp")
+SET(SCHEMA_TEST_COUNT 0)
+FOREACH(testfile ${SCHEMA_TESTS})
+  get_filename_component(testfilename "${testfile}" NAME_WE)
+  add_executable("${testfilename}" "${testfile}")
+  createTests("${testfilename}")
+  target_link_libraries(${testfilename} ${CATCH_MAIN_LIB})
+  target_link_libraries(${testfilename} minifi-standard-processors)
+  target_link_libraries(${testfilename} nlohmann_json_schema_validator)
+  MATH(EXPR SCHEMA_TEST_COUNT "${SCHEMA_TEST_COUNT}+1")
+  add_test(NAME "${testfilename}" COMMAND "${testfilename}" WORKING_DIRECTORY 
${TEST_DIR})
+ENDFOREACH()
+message("-- Finished building ${SCHEMA_TEST_COUNT} Json Schema related test 
file(s)...")
diff --git a/libminifi/test/schema-tests/SchemaTests.cpp 
b/libminifi/test/schema-tests/SchemaTests.cpp
new file mode 100644
index 000000000..d4907ec1c
--- /dev/null
+++ b/libminifi/test/schema-tests/SchemaTests.cpp
@@ -0,0 +1,258 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+
+#include "../TestBase.h"
+#include "../Catch.h"
+#include "../agent/JsonSchema.h"
+#include "nlohmann/json-schema.hpp"
+#include "utils/RegexUtils.h"
+#include "utils/StringUtils.h"
+
+struct JsonError {
+  std::string path;
+  std::string error;
+};
+
+class ErrorHandler : public nlohmann::json_schema::error_handler {
+ public:
+  explicit ErrorHandler(std::function<void(const JsonError&)> handler)
+    : handler_(std::move(handler)) {}
+  void error(const nlohmann::json::json_pointer& ptr, const nlohmann::json& 
/*instance*/, const std::string& message) override {
+    handler_(JsonError{ptr.to_string(), message});
+  }
+
+ private:
+  std::function<void(const JsonError&)> handler_;
+};
+
+void extractExpectedErrors(nlohmann::json& node, const std::string& path, 
std::unordered_map<std::string, std::string>& errors) {
+  if (node.is_object() && node.contains("$err")) {
+    errors[path] = node["$err"].get<std::string>();
+    node = node["$value"];
+  }
+  if (node.is_object() || node.is_array()) {
+    for (auto& [key, val] : node.items()) {
+      extractExpectedErrors(val, utils::StringUtils::join_pack(path, "/", 
key), errors);
+    }
+  }
+}
+
+TEST_CASE("The generated JSON schema matches a valid json flow") {
+  const nlohmann::json config_schema = 
nlohmann::json::parse(minifi::docs::generateJsonSchema());
+
+  nlohmann::json_schema::json_validator validator;
+  validator.set_root_schema(config_schema);
+
+  auto config_json = R"(
+    {
+      "Flow Controller": {"name": "Test"},
+      "Processors": [
+        {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "class": "GenerateFlowFile",
+          "name": "Proc1",
+          "scheduling strategy": "TIMER_DRIVEN",
+          "scheduling period": "1 min",
+          "Properties": {}
+        }
+      ],
+      "Connections": [
+        {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "name": "Conn1",
+          "source id": "00000000-0000-0000-0000-000000000000",
+          "source relationship names": ["success"],
+          "destination id": "00000000-0000-0000-0000-000000000000"
+        }
+      ],
+      "Input Ports": [
+        {"id": "00000000-0000-0000-0000-000000000000", "name": "In1"}
+      ],
+      "Output Ports": [
+        {"id": "00000000-0000-0000-0000-000000000000", "name": "Out1"}
+      ],
+      "Funnels": [
+        {"id": "00000000-0000-0000-0000-000000000000", "name": "Fun1"}
+      ],
+      "Process Groups": [
+        {
+          "name": "Group1",
+          "Processors": [],
+          "Connections": [],
+          "Process Groups": []
+        }
+      ],
+      "Remote Process Groups": [
+        {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "name": "RPG1",
+          "Input Ports": [{
+            "id": "00000000-0000-0000-0000-000000000000",
+            "name": "RIn1",
+            "Properties": {
+              "Host Name": "localhost"
+            }
+          }]
+        }
+      ],
+      "Controller Services": [
+        {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "class": "SSLContextService",
+          "name": "Service1",
+          "Properties": {
+            "Client Certificate": "",
+            "Private Key": "",
+            "Passphrase": "",
+            "CA Certificate": ""
+          }
+        }
+      ]
+    }
+  )"_json;
+
+  validator.validate(config_json);
+}
+
+TEST_CASE("The JSON schema detects invalid values in the json flow") {
+  const nlohmann::json config_schema = 
nlohmann::json::parse(minifi::docs::generateJsonSchema());
+
+  nlohmann::json_schema::json_validator validator;
+  validator.set_root_schema(config_schema);
+
+  // the objects of type {"$err": <error>, "$value": <value>} are special
+  // in the sense that they are preprocessed, replaced by <value> and we expect
+  // a validation error at the position of this object that matches the
+  // regex <error>
+  auto config_json = R"(
+    {
+      "Flow Controller": {"name": "Test"},
+      "Processors": [
+        {"$err": "property 'scheduling period' not found", "$value": {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "class": "GenerateFlowFile",
+          "name": "Proc1",
+          "scheduling strategy": "TIMER_DRIVEN",
+          "Properties": {}
+        }},
+        {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "class": "GenerateFlowFile",
+          "name": "Proc1",
+          "scheduling strategy": "TIMER_DRIVEN",
+          "scheduling period": "1 min",
+          "Properties": {
+            "Batch Size": {"$value": "not a number", "$err": "unexpected 
instance type"}
+          }
+        },
+        {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "class": "GenerateFlowFile",
+          "name": "Proc1",
+          "scheduling strategy": "TIMER_DRIVEN",
+          "scheduling period": "1 min",
+          "Properties": {"$err": "", "$value": {
+            "No such property": 5
+          }}
+        }
+      ],
+      "Connections": [
+        {"$err": "property 'name' not found", "$value": {
+          "id": {"$value": "00000000-0000-0000-0000-00000000000", "$err": ""},
+          "source id": "00000000-0000-0000-0000-000000000000",
+          "source relationship names": ["success"],
+          "destination id": "00000000-0000-0000-0000-000000000000"
+        }}
+      ],
+      "Input Ports": [
+        {"id": "00000000-0000-0000-0000-000000000000", "name": {"$value": 5, 
"$err": "unexpected instance type"}}
+      ],
+      "Output Ports": [
+        {"id": "00000000-0000-0000-0000-000000000000", "name": "Out1"}
+      ],
+      "Funnels": [
+        {"id": {"$value": 5, "$err": "unexpected instance type"}}
+      ],
+      "Process Groups": [
+        {"$err": "", "$value": {
+          "name": "Group1",
+          "no such property": []
+        }}
+      ],
+      "Remote Process Groups": [
+        {"$err": "property 'Input Ports' not found", "$value": {
+          "id": "00000000-0000-0000-0000-000000000000",
+          "name": "RPG1"
+        }}
+      ],
+      "Controller Services": [
+        {
+          "id": {"$value": "00000000-0000-0000-0000-00000000000", "$err": ""},
+          "class": "SSLContextService",
+          "name": "Service1",
+          "Properties": {
+            "Client Certificate": 6,
+            "Private Key": "",
+            "Passphrase": "",
+            "CA Certificate": ""
+          }
+        },
+        {
+          "id": "00000000-0000-0000-0000-000000000001",
+          "class": "SSLContextService",
+          "name": "Service1",
+          "Properties": {
+            "Client Certificate": {"$value": 6, "$err": "unexpected instance 
type"},
+            "Private Key": "",
+            "Passphrase": "",
+            "CA Certificate": ""
+          }
+        },
+        {"$value": "kenyer", "$err": "unexpected instance type"}
+      ]
+    }
+  )"_json;
+
+  std::unordered_map<std::string, std::string> errors;
+  extractExpectedErrors(config_json, "", errors);
+
+  ErrorHandler err_handler{[&] (auto err) {
+    auto it = errors.find(err.path);
+    if (it == errors.end()) {
+      throw std::logic_error("Unexpected error in json flow at " + err.path + 
": " + err.error);
+    }
+    if (!it->second.empty()) {
+      minifi::utils::Regex re(it->second);
+      if (!minifi::utils::regexSearch(err.error, re)) {
+        throw std::logic_error("Error in json flow at " + err.path + " does 
not match expected pattern, expected: '" + it->second + "', actual: " + 
err.error);
+      }
+    }
+    errors.erase(it);
+  }};
+  validator.validate(config_json, err_handler);
+
+  // all expected errors should have been processed
+  if (!errors.empty()) {
+    for (const auto& [path, err] : errors) {
+      std::cerr << "Expected error at " << path << ": " << err << std::endl;
+    }
+    throw std::logic_error("There were some expected errors that did not 
occur");
+  }
+}
diff --git a/minifi_main/MiNiFiMain.cpp b/minifi_main/MiNiFiMain.cpp
index 8dee11d0f..b41935130 100644
--- a/minifi_main/MiNiFiMain.cpp
+++ b/minifi_main/MiNiFiMain.cpp
@@ -63,6 +63,7 @@
 #include "FlowController.h"
 #include "AgentDocs.h"
 #include "MainHelper.h"
+#include "agent/JsonSchema.h"
 
 namespace minifi = org::apache::nifi::minifi;
 namespace core = minifi::core;
@@ -119,6 +120,15 @@ void dumpDocs(const std::shared_ptr<minifi::Configure> 
&configuration, const std
   docsCreator.generate(dir, out);
 }
 
+void writeJsonSchema(const std::shared_ptr<minifi::Configure> &configuration, 
std::ostream& out) {
+  auto pythoncreator = 
core::ClassLoader::getDefaultClassLoader().instantiate("PythonCreator", 
"PythonCreator");
+  if (nullptr != pythoncreator) {
+    pythoncreator->configure(configuration);
+  }
+
+  out << minifi::docs::generateJsonSchema();
+}
+
 int main(int argc, char **argv) {
 #ifdef WIN32
   RunAsServiceIfNeeded();
@@ -252,7 +262,7 @@ int main(int argc, char **argv) {
         exit(1);
       }
 
-      std::cerr << "Dumping docs to " << argv[2] << std::endl;
+      std::cout << "Dumping docs to " << argv[2] << std::endl;
       if (argc == 4) {
         std::string filepath;
         std::string filename;
@@ -269,6 +279,21 @@ int main(int argc, char **argv) {
       exit(0);
     }
 
+    if (argc >= 2 && std::string("schema") == argv[1]) {
+      if (argc != 3) {
+        std::cerr << "Malformed schema command, expected '<minifiexe> schema 
<output-file>'" << std::endl;
+        std::exit(1);
+      }
+
+      std::cout << "Writing json schema to " << argv[2] << std::endl;
+
+      {
+        std::ofstream schema_file{argv[2]};
+        writeJsonSchema(configure, schema_file);
+      }
+      std::exit(0);
+    }
+
     if (configure->get(minifi::Configure::nifi_graceful_shutdown_seconds, 
graceful_shutdown_seconds)) {
       try {
         stop_wait_time = std::stoi(graceful_shutdown_seconds);
diff --git a/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch 
b/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch
index 56d951a85..3c48b7d21 100644
--- a/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch
+++ b/thirdparty/google-cloud-cpp/nlohmann_lib_as_interface.patch
@@ -1,13 +1,26 @@
-diff --git a/cmake/IncludeNlohmannJson.cmake b/cmake/IncludeNlohmannJson.cmake
-index db8056ae0..613f18b97 100644
---- a/cmake/IncludeNlohmannJson.cmake
-+++ b/cmake/IncludeNlohmannJson.cmake
-@@ -23,7 +23,7 @@ function (find_nlohmann_json)
-     # library that is all we need.
-     find_path(GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER "nlohmann/json.hpp"
-               REQUIRED)
+diff -rupN orig/cmake/IncludeNlohmannJson.cmake 
patched/cmake/IncludeNlohmannJson.cmake
+--- google-cloud-cpp-1.37.0/cmake/IncludeNlohmannJson.cmake    2022-03-01 
19:09:39.000000000 +0100
++++ google-cloud-cpp-1.37.0-new/cmake/IncludeNlohmannJson.cmake        
2022-12-09 10:32:45.000000000 +0100
+@@ -14,21 +14,4 @@
+ # limitations under the License.
+ # ~~~
+ 
+-function (find_nlohmann_json)
+-    find_package(nlohmann_json CONFIG QUIET)
+-    if (nlohmann_json_FOUND)
+-        return()
+-    endif ()
+-    # As a fall back, try finding the header. Since this is a header-only
+-    # library that is all we need.
+-    find_path(GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER "nlohmann/json.hpp"
+-              REQUIRED)
 -    add_library(nlohmann_json::nlohmann_json UNKNOWN IMPORTED)
-+    add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED)
-     set_property(
-         TARGET nlohmann_json::nlohmann_json
-         APPEND
+-    set_property(
+-        TARGET nlohmann_json::nlohmann_json
+-        APPEND
+-        PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+-                 ${GOOGLE_CLOUD_CPP_NLOHMANN_JSON_HEADER})
+-endfunction ()
+-
+-find_nlohmann_json()
++find_package(nlohmann_json REQUIRED)


Reply via email to