adamdebreceni commented on code in PR #1692:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1692#discussion_r1423923909


##########
extensions/standard-processors/utils/JoltUtils.cpp:
##########
@@ -0,0 +1,1134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "JoltUtils.h"
+#include "rapidjson/error/en.h"
+#include "Exception.h"
+
+namespace org::apache::nifi::minifi::utils::jolt {
+
+
+static bool isSpecialChar(char ch) {
+  static constexpr std::array SPECIAL_CHARS{'.', '[', ']', '$', '&', '@', '#', 
'*'};
+  return std::find(SPECIAL_CHARS.begin(), SPECIAL_CHARS.end(), ch) != 
SPECIAL_CHARS.end();
+}
+
+bool Spec::Template::check(std::string_view str) {
+  enum class State {
+    Plain,
+    Escaped
+  } state = State::Plain;
+  for (char ch : str) {
+    switch (state) {
+      case State::Plain: {
+        if (ch == '&') {
+          return true;
+        } else if (ch == '\\') {
+          state = State::Escaped;
+        }
+        break;
+      }
+      case State::Escaped: {
+        state = State::Plain;
+        break;
+      }
+    }
+  }
+  return false;
+}
+
+nonstd::expected<std::pair<Spec::Template, Spec::It>, std::string> 
Spec::Template::parse(It begin, It end) {
+  enum class State {
+    Plain,
+    Escaped,
+    Template,  // &
+    SimpleIndex,  // &1
+    CanonicalTemplate,  // &(
+    ParentIndex,  // &(1
+    NextIndex,  // &(1,
+    MatchIndex  // &(1,0
+  };
+
+  std::vector<std::string> fragments;
+  std::vector<std::pair<size_t, size_t>> references;
+  fragments.push_back({});
+  State state = State::Plain;
+  std::string target;
+  // go beyond the last char on purpose
+  auto ch_it = begin;
+  while (ch_it <= end) {
+    std::optional<char> ch;
+    if (ch_it < end) {
+      ch = *ch_it;
+    }
+    bool force_terminate = false;
+    switch (state) {
+      case State::Plain: {
+        if (ch == '\\') {
+          state = State::Escaped;
+        } else if (ch == '&') {
+          references.push_back({});
+          fragments.push_back({});
+          state = State::Template;
+        } else if (ch == ')' || ch == ']' || ch == '.' || ch == '[') {
+          force_terminate = true;
+        } else if (ch) {
+          fragments.back() += ch.value();
+        }
+        break;
+      }
+      case State::Escaped: {
+        if (!ch) {
+          return nonstd::make_unexpected("Unterminated escape sequence");
+        }
+        if (ch != '\\' && !isSpecialChar(ch.value())) {
+          return nonstd::make_unexpected(fmt::format("Unknown escape sequence 
in template '\\{}'", ch.value()));
+        }
+        fragments.back() += ch.value();
+        state = State::Plain;
+        break;
+      }
+      case State::Template: {
+        if (ch == '(') {
+          state = State::CanonicalTemplate;
+        } else if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) 
{
+          target.clear();
+          target += ch.value();
+          state = State::SimpleIndex;
+        } else {
+          state = State::Plain;
+          // reprocess this char in a different state
+          --ch_it;
+        }
+        break;
+      }
+      case State::SimpleIndex: {
+        if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target += ch.value();
+        } else {
+          references.back().first = std::stoi(target);
+          state = State::Plain;
+          // reprocess this char in a different state
+          --ch_it;
+        }
+        break;
+      }
+      case State::CanonicalTemplate: {
+        if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target.clear();
+          target += ch.value();
+          state = State::ParentIndex;
+        } else {
+          return nonstd::make_unexpected(fmt::format("Expected an index at 
{}", std::distance(begin, ch_it)));
+        }
+        break;
+      }
+      case State::ParentIndex: {
+        if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target += ch.value();
+        } else if (ch == ',') {
+          references.back().first = std::stoi(target);
+          state = State::NextIndex;
+        } else if (ch == ')') {
+          references.back().first = std::stoi(target);
+          state = State::Plain;
+        } else {
+          return nonstd::make_unexpected(fmt::format("Invalid character at {}, 
expected digit, comma or close parenthesis", std::distance(begin, ch_it)));
+        }
+        break;
+      }
+      case State::NextIndex: {
+        if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target.clear();
+          target += ch.value();
+          state = State::MatchIndex;
+        } else {
+          return nonstd::make_unexpected(fmt::format("Expected an index at 
{}", std::distance(begin, ch_it)));
+        }
+        break;
+      }
+      case State::MatchIndex: {
+        if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target += ch.value();
+        } else if (ch == ')') {
+          references.back().second = std::stoi(target);
+          state = State::Plain;
+        } else {
+          return nonstd::make_unexpected(fmt::format("Invalid character at {}, 
expected digit or close parenthesis", std::distance(begin, ch_it)));
+        }
+        break;
+      }
+    }
+    if (force_terminate) {
+      break;
+    }
+    if (ch_it != end) {
+      ++ch_it;
+    } else {
+      break;
+    }
+  }
+
+  gsl_Assert(state == State::Plain);
+  return std::pair<Template, It>{Template{std::move(fragments), 
std::move(references)}, ch_it};
+}
+
+bool Spec::Regex::check(std::string_view str) {
+  enum class State {
+    Plain,
+    Escaped
+  } state = State::Plain;
+  for (char ch : str) {
+    switch (state) {
+      case State::Plain: {
+        if (ch == '*') {
+          return true;
+        } else if (ch == '\\') {
+          state = State::Escaped;
+        }
+        break;
+      }
+      case State::Escaped: {
+        state = State::Plain;
+        break;
+      }
+    }
+  }
+  return false;
+}
+
+nonstd::expected<Spec::Regex, std::string> Spec::Regex::parse(std::string_view 
str) {
+  enum class State {
+    Plain,
+    Escaped
+  };
+  std::vector<std::string> fragments;
+  fragments.push_back({});
+  State state = State::Plain;
+  for (size_t idx = 0; idx <= str.size(); ++idx) {
+    std::optional<char> ch;
+    if (idx < str.size()) {
+      ch = str[idx];
+    }
+    switch (state) {
+      case State::Plain: {
+        if (ch == '\\') {
+          state = State::Escaped;
+        } else if (ch == '*') {
+          fragments.push_back({});
+        } else if (ch) {
+          fragments.back() += ch.value();
+        }
+        break;
+      }
+      case State::Escaped: {
+        if (!ch) {
+          return nonstd::make_unexpected("Unterminated escape sequence");
+        }
+        if (ch != '\\' && !isSpecialChar(ch.value())) {
+          return nonstd::make_unexpected(fmt::format("Unknown escape sequence 
in pattern '\\{}'", ch.value()));
+        }
+        fragments.back() += ch.value();
+        state = State::Plain;
+        break;
+      }
+    }
+  }
+  gsl_Assert(state == State::Plain);
+  return Regex{std::move(fragments)};
+}
+
+std::string Spec::Template::eval(const Context& ctx) const {
+  std::string res;
+  for (size_t idx = 0; idx + 1 < fragments.size(); ++idx) {
+    res += fragments.at(idx);
+    auto& ref = references.at(idx);
+    auto* target = ctx.find(ref.first);
+    if (!target) {
+      throw Exception(GENERAL_EXCEPTION, fmt::format("Invalid reference to {} 
at {}", ref.first, ctx.path()));
+    }
+    if (target->matches.size() <= ref.second) {
+      throw Exception(GENERAL_EXCEPTION, fmt::format("Could not find match {} 
in '{}' at {}", ref.second, target->matches.at(0), ctx.path()));
+    }
+    res += target->matches.at(ref.second);
+  }
+  res += fragments.back();
+  return res;
+}
+
+std::optional<std::vector<std::string_view>> 
Spec::Regex::match(std::string_view str) const {
+  std::vector<std::string_view> matches;
+  matches.push_back(str);
+  if (fragments.size() == 1) {
+    if (str == fragments.front()) {
+      return matches;
+    } else {
+      return std::nullopt;
+    }
+  }
+
+  // first fragment is at the beginning of the string
+  if (str.substr(0, fragments.front().size()) != fragments.front()) {
+    return std::nullopt;
+  }
+  auto it = str.begin() + fragments.front().size();
+  for (size_t idx = 1; idx + 1 < fragments.size(); ++idx) {
+    auto& frag = fragments[idx];
+    auto next_it = std::search(it, str.end(), frag.begin(), frag.end());
+    if (next_it == str.end() && !frag.empty()) {
+      return std::nullopt;
+    }
+    matches.push_back({it, next_it});
+    it = next_it + frag.size();
+  }
+  // last fragment is at the end of the string
+  if (gsl::narrow<size_t>(std::distance(it, str.end())) < 
fragments.back().size()) {
+    // not enough characters left
+    return std::nullopt;
+  }
+  auto next_it = std::next(str.rbegin(), fragments.back().size()).base();
+  if (std::string_view(next_it, str.end()) != fragments.back()) {
+    return std::nullopt;
+  }
+  matches.push_back({it, next_it});
+  return matches;
+}
+
+namespace {
+
+nonstd::expected<std::pair<Spec::Destination, Spec::It>, std::string> 
parseDestination(const Spec::Context& ctx, Spec::It begin, Spec::It end);
+Spec::Destinations parseDestinations(const Spec::Context& ctx, const 
rapidjson::Value& val);
+
+Spec::Pattern::Value parseValue(const Spec::Context& ctx, const 
rapidjson::Value& val);
+
+std::pair<size_t, size_t> parseKeyAccess(std::string_view str) {
+  enum class State {
+    Begin,
+    BeginRef,
+    PrimaryIndex,
+    BeginFirstIndex,
+    FirstIndex,
+    BeginSecondIndex,
+    SecondIndex,
+    End
+  } state = State::Begin;
+  std::string target;
+  std::pair<size_t, size_t> result{0, 0};
+  for (size_t idx = 0; idx <= str.size(); ++idx) {
+    std::optional<char> ch;
+    if (idx < str.size()) {
+      ch = str[idx];
+    }
+    switch (state) {
+      case State::Begin: {
+        if (ch != '$') {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Expected '$' in key 
access in '{}' at {}", str, idx));
+        }
+        state = State::BeginRef;
+        break;
+      }
+      case State::BeginRef: {
+        if (ch == '(') {
+          state = State::BeginFirstIndex;
+        } else if (ch && std::isdigit(static_cast<unsigned char>(ch.value()))) 
{
+          target.clear();
+          target += ch.value();
+          state = State::PrimaryIndex;
+        } else if (ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Expected index in 
key access in '{}' at {}", str, idx));
+        }
+        break;
+      }
+      case State::PrimaryIndex: {
+        if (!ch) {
+          result.first = std::stoull(target);
+        } else if (std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target += ch.value();
+        } else {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Expected digit in 
key access in '{}' at {}", str, idx));
+        }
+        break;
+      }
+      case State::BeginFirstIndex: {
+        if (!ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Unterminated first 
index in key access in '{}'", str));
+        } else if (std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target.clear();
+          target += ch.value();
+          state = State::FirstIndex;
+        } else {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Expected digit in 
key access in '{}' at {}", str, idx));
+        }
+        break;
+      }
+      case State::FirstIndex: {
+        if (!ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Unterminated first 
index in key access in '{}'", str));
+        } else if (std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target += ch.value();
+        } else if (ch == ',') {
+          result.first = std::stoull(target);
+          state = State::BeginSecondIndex;
+        }
+        break;
+      }
+      case State::BeginSecondIndex: {
+        if (!ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Unterminated second 
index in key access in '{}'", str));
+        } else if (std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target.clear();
+          target += ch.value();
+          state = State::SecondIndex;
+        } else {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Expected digit in 
key access in '{}' at {}", str, idx));
+        }
+        break;
+      }
+      case State::SecondIndex: {
+        if (!ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Unterminated second 
index in key access in '{}'", str));
+        } else if (std::isdigit(static_cast<unsigned char>(ch.value()))) {
+          target += ch.value();
+        } else if (ch == ')') {
+          result.second = std::stoull(target);
+          state = State::End;
+        }
+        break;
+      }
+      case State::End: {
+        if (ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Expected end of 
string in '{}' at {}", str, idx));
+        }
+        break;
+      }
+    }
+  }
+  return result;
+}
+
+std::string parseLiteral(std::string_view str) {
+  enum class State {
+    Plain,
+    Escaped
+  } state = State::Plain;
+  std::string result;
+  for (size_t idx = 0; idx <= str.size(); ++idx) {
+    std::optional<char> ch;
+    if (idx < str.size()) {
+      ch = str[idx];
+    }
+    switch (state) {
+      case State::Plain: {
+        if (ch == '\\') {
+          state = State::Escaped;
+        } else if (ch) {
+          result += ch.value();
+        }
+        break;
+      }
+      case State::Escaped: {
+        if (!ch) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Unterminated escape 
sequence in '{}'", str));
+        }
+        if (ch != '\\' && !isSpecialChar(ch.value())) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Unknown escape 
sequence in literal '\\{}'", ch.value()));
+        }
+        result += ch.value();
+        state = State::Plain;
+        break;
+      }
+    }
+  }
+
+  gsl_Expects(state == State::Plain);
+  return result;
+}
+
+nonstd::expected<std::pair<Spec::Path, Spec::It>, std::string> parsePath(const 
Spec::Context& ctx, Spec::It begin, Spec::It end) {
+  auto dst = parseDestination(ctx, begin, end);
+  if (!dst) {
+    return nonstd::make_unexpected(std::move(dst.error()));
+  }
+  Spec::Path result;
+  for (auto&& [member, type] : std::move(dst->first)) {
+    if (!holds_alternative<Spec::Template>(member)) {
+      return nonstd::make_unexpected(fmt::format("Value reference at {} cannot 
contain nested value reference path", ctx.path()));
+    }
+    result.emplace_back(std::move(std::get<Spec::Template>(member)), type);
+  }
+  return std::pair<Spec::Path, Spec::It>{result, dst->second};
+}
+
+nonstd::expected<std::pair<Spec::ValueRef, Spec::It>, std::string> 
parseValueReference(const Spec::Context& ctx, Spec::It begin, Spec::It end, 
bool greedy_path) {
+  using ResultT = std::pair<Spec::ValueRef, Spec::It>;
+  auto it = begin;
+  if (it == end) {
+    return nonstd::make_unexpected("Cannot parse value reference from empty 
string");
+  }
+  if (*it != '@') {
+    return nonstd::make_unexpected("Value reference must start with '@'");
+  }
+  ++it;
+  if (it == end) {
+    return ResultT{{0, {}}, it};
+  }
+  if (*it != '(') {
+    if (std::isdigit(static_cast<unsigned char>(*it))) {
+      // format is @123...
+      auto idx_begin = it;
+      while (it != end && std::isdigit(static_cast<unsigned char>(*it))) {
+        ++it;
+      }
+      return ResultT{{std::stoull(std::string{idx_begin, it}), {}}, it};
+    }
+    // format is @field.inner
+    if (greedy_path) {
+      if (auto path = parsePath(ctx, it, end)) {
+        return ResultT{{0, std::move(path->first)}, path->second};
+      } else {
+        return ResultT {{0, {}}, it};
+      }
+    } else {
+      if (auto templ = Spec::Template::parse(it, end)) {
+        return ResultT{{0, Spec::Path{{std::move(templ->first), 
Spec::MemberType::FIELD}}}, templ->second};
+      } else {
+        return ResultT {{0, {}}, it};
+      }
+    }
+  }
+  ++it;
+  size_t idx = 0;
+  if (it != end && std::isdigit(static_cast<unsigned char>(*it))) {
+    auto idx_begin = it;
+    while (it != end && std::isdigit(static_cast<unsigned char>(*it))) {
+      ++it;
+    }
+    auto idx_end = it;
+    idx = std::stoull(std::string{idx_begin, idx_end});
+    if (it == end) {
+      return nonstd::make_unexpected("Expected ')' in value reference");
+    }
+    if (*it != ',') {
+      if (*it != ')') {
+        return nonstd::make_unexpected("Expected ')' in value reference");
+      }
+      ++it;
+      return ResultT{{idx, {}}, it};
+    }
+    // *it == ','
+    ++it;
+  }
+  if (it == end) {
+    return nonstd::make_unexpected("Expected member accessor in value 
reference");
+  }
+  auto path = parsePath(ctx, it, end);
+  if (!path) {
+    return nonstd::make_unexpected(fmt::format("Invalid path in value 
reference: {}", path.error()));
+  }
+  it = path->second;
+  if (it == end || *it != ')') {
+    return nonstd::make_unexpected("Expected ')' in value reference");
+  }
+  ++it;
+  return ResultT{{idx, std::move(path->first)}, it};
+}
+
+template<typename T>
+bool isAllDigits(T begin, T end) {
+  return std::all_of(begin, end, [] (auto ch) {return 
std::isdigit(static_cast<unsigned char>(ch));});
+}
+
+void parseMember(const Spec::Context& ctx, const 
std::unique_ptr<Spec::Pattern>& result, std::string_view name, const 
rapidjson::Value& member) {
+  if (name.starts_with("@")) {
+    if (auto ref = parseValueReference(ctx, name.begin(), name.end(), true)) {
+      if (ref->second != name.end()) {
+        throw Exception(GENERAL_EXCEPTION, "Failed to fully parse value 
reference");
+      }
+      Spec::Context sub_ctx = ctx.extend(ctx.matches, ctx.node);
+      result->values.push_back({Spec::ValueRef{ref->first}, 
parseValue(sub_ctx, member)});
+    } else {
+      throw Exception(GENERAL_EXCEPTION, fmt::format("Failed to parse value 
reference at '{}/{}': {}", ctx.path(), name, ref.error()));
+    }
+  } else if (name.starts_with("$")) {
+    Spec::Context sub_ctx = ctx.extend({name}, nullptr);
+    result->keys.insert({parseKeyAccess(name), parseDestinations(sub_ctx, 
member)});
+  } else if (name.starts_with("#")) {
+    result->defaults.insert({std::string{name.substr(1)}, 
parseDestinations(ctx, member)});
+  } else {
+    const bool is_template = Spec::Template::check(name);
+    const bool is_regex = Spec::Regex::check(name);
+    if (is_template && is_regex) {
+      throw Exception(GENERAL_EXCEPTION, "Pattern cannot contain both & and 
*");
+    }
+    if (is_template) {
+      if (auto templ = Spec::Template::parse(name.begin(), name.end())) {
+        if (templ->second != name.end()) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Failed to parse 
template at {}, unexpected char at {}", ctx.path(), std::distance(name.begin(), 
templ->second)));
+        }
+        // dry eval so we can check if the references refer to valid substrings
+        (void)templ->first.eval(ctx);
+        Spec::Context sub_ctx = ctx.extend({name}, nullptr);
+        result->templates.insert({templ->first, parseValue(sub_ctx, member)});
+      } else {
+        throw Exception(GENERAL_EXCEPTION, fmt::format("Error while parsing 
key template at {}: {}", ctx.path(), templ.error()));
+      }
+    } else if (is_regex) {
+      if (auto reg = Spec::Regex::parse(name)) {
+        Spec::Context sub_ctx = ctx.extend({name}, nullptr);
+        sub_ctx.matches.resize(reg.value().size());
+        result->regexes.insert({reg.value(), parseValue(sub_ctx, member)});
+      } else {
+        throw Exception(GENERAL_EXCEPTION, fmt::format("Error while parsing 
key regex at {}: {}", ctx.path(), reg.error()));
+      }
+    } else {
+      Spec::Context sub_ctx = ctx.extend({name}, nullptr);
+      std::optional<size_t> numeric_value;
+      auto literal_name = parseLiteral(name);
+      result->literal_indices.insert({literal_name, result->literals.size()});
+      if (isAllDigits(literal_name.begin(), literal_name.end())) {
+        numeric_value = std::stoull(literal_name);
+      }
+      result->literals.push_back({literal_name, numeric_value, 
parseValue(sub_ctx, member)});
+    }
+  }
+}
+
+std::unique_ptr<Spec::Pattern> parseMap(const Spec::Context& ctx, const 
rapidjson::Value& val) {
+  if (!val.IsObject()) {
+    throw Exception(GENERAL_EXCEPTION, fmt::format("Expected a map at '{}'", 
ctx.path()));
+  }
+  auto map = std::make_unique<Spec::Pattern>();
+
+  enum class State {
+    Plain,
+    Escaped
+  } state = State::Plain;
+
+  for (auto& [name_val, member] : val.GetObject()) {
+    std::string_view name{name_val.GetString(), name_val.GetStringLength()};
+    std::string subkey;
+    for (size_t idx = 0; idx <= name.size(); ++idx) {
+      std::optional<char> ch;
+      if (idx < name.size()) {
+        ch = name[idx];
+      }
+      switch (state) {
+        case State::Plain: {
+          if (ch == '\\') {
+            state = State::Escaped;
+          } else if (!ch || ch == '|') {
+            parseMember(ctx, map, subkey, member);
+            subkey.clear();
+          } else {
+            subkey += ch.value();
+          }
+          break;
+        }
+        case State::Escaped: {
+          if (!ch) {
+            throw Exception(GENERAL_EXCEPTION, "Unterminated escape sequence");
+          }
+          if (ch == '|') {
+            subkey += "|";
+          } else {
+            subkey += "\\";
+            subkey += ch.value();
+          }
+          state = State::Plain;
+          break;
+        }
+      }
+    }
+  }
+  return map;
+}
+
+nonstd::expected<std::pair<Spec::MatchingIndex, Spec::It>, std::string> 
parseMatchingIndex(Spec::It begin, Spec::It end) {
+  auto it = begin;
+  if (it == end) {
+    return nonstd::make_unexpected("Empty matching index");
+  }
+  if (*it != '#') {
+    return nonstd::make_unexpected("Matching must start with a '#'");
+  }
+  ++it;
+  auto idx_begin = it;
+  while (it != end && std::isdigit(static_cast<unsigned char>(*it))) {
+    ++it;
+  }
+  return std::pair<Spec::MatchingIndex, 
Spec::It>{std::stoull(std::string{idx_begin, it}), it};
+}
+
+// dot-delimited list of templates and value references
+nonstd::expected<std::pair<Spec::Destination, Spec::It>, std::string> 
parseDestination(const Spec::Context& ctx, Spec::It begin, Spec::It end) {
+  Spec::Destination result;
+  Spec::MemberType type = Spec::MemberType::FIELD;
+  auto ch_it = begin;
+  auto isEnd = [&] () {
+    return ch_it == end || *ch_it == ')';
+  };
+  while (!isEnd()) {
+    if (auto match_idx = parseMatchingIndex(ch_it, end)) {
+      if (type != Spec::MemberType::INDEX) {
+        return nonstd::make_unexpected("Matching index can only be used in 
index context, e.g. apple[#2]");
+      }
+      if (!ctx.find(match_idx->first)) {
+        return nonstd::make_unexpected(fmt::format("Invalid matching index at 
{} to ancestor {}", ctx.path(), match_idx->first));
+      }
+      result.push_back({match_idx->first, type});
+      ch_it = match_idx->second;
+    } else if (auto val_ref = parseValueReference(ctx, ch_it, end, false)) {
+      result.push_back({std::move(val_ref->first), type});
+      ch_it = val_ref->second;
+    } else if (auto templ = Spec::Template::parse(ch_it, end)) {
+      // dry eval to verify that references are valid
+      (void)templ->first.eval(ctx);
+      result.push_back({std::move(templ->first), type});
+      ch_it = templ->second;
+    } else {
+      return nonstd::make_unexpected(fmt::format("Could not parse neither 
value reference or template in {} at {}", ctx.path(), std::distance(begin, 
ch_it)));
+    }
+    if (type == Spec::MemberType::INDEX) {
+      if (ch_it == end || *ch_it != ']') {
+        return nonstd::make_unexpected(fmt::format("Expected closing index ']' 
in {} at {}", ctx.path(), std::distance(begin, ch_it)));
+      }
+      ++ch_it;
+    }
+    if (!isEnd()) {
+      if (*ch_it == '.') {
+        type = Spec::MemberType::FIELD;
+      } else if (*ch_it == '[') {
+        type = Spec::MemberType::INDEX;
+      } else {
+        return nonstd::make_unexpected(fmt::format("Unexpected destination 
delimiter '{}' in {} at {}", *ch_it, ctx.path(), std::distance(begin, ch_it)));
+      }
+      ++ch_it;
+      if (ch_it == end) {
+        if (type == Spec::MemberType::FIELD) {
+          return nonstd::make_unexpected(fmt::format("Unterminated member in 
{} at {}", ctx.path(), std::distance(begin, ch_it)));
+        } else {
+          return nonstd::make_unexpected(fmt::format("Unterminated indexed 
member in {} at {}", ctx.path(), std::distance(begin, ch_it)));
+        }
+      }
+    }
+  }
+
+  return std::pair<Spec::Destination, Spec::It>{result, ch_it};
+}
+
+Spec::Destinations parseDestinations(const Spec::Context& ctx, const 
rapidjson::Value& val) {
+  Spec::Destinations res;
+  if (val.IsNull()) {
+    return res;
+  }
+  if (val.IsArray()) {
+    for (rapidjson::SizeType i = 0; i < val.GetArray().Size(); ++i) {
+      auto& item = val.GetArray()[i];
+      if (!item.IsString()) {
+        throw Exception(GENERAL_EXCEPTION, fmt::format("Expected a string or 
array of strings at '{}/{}'", ctx.path(), i));
+      }
+      if (auto dst = parseDestination(ctx, item.GetString(), item.GetString() 
+ item.GetStringLength())) {
+        if (dst->second != item.GetString() + item.GetStringLength()) {
+          throw Exception(GENERAL_EXCEPTION, fmt::format("Failed to fully 
parse destination at '{}/{}'", ctx.path(), i));
+        }
+        res.push_back(std::move(dst->first));
+      } else {
+        throw Exception(GENERAL_EXCEPTION, fmt::format("Failed to parse 
destination at '{}/{}': {}", ctx.path(), i, dst.error()));
+      }
+    }
+  } else {
+    if (!val.IsString()) {
+      throw Exception(GENERAL_EXCEPTION, fmt::format("Expected a string or 
array of strings at '{}'", ctx.path()));
+    }
+    if (auto dst = parseDestination(ctx, val.GetString(), val.GetString() + 
val.GetStringLength())) {
+      if (dst->second != val.GetString() + val.GetStringLength()) {
+        throw Exception(GENERAL_EXCEPTION, fmt::format("Failed to fully parse 
destination at '{}'", ctx.path()));
+      }
+      res.push_back(std::move(dst->first));
+    } else {
+      throw Exception(GENERAL_EXCEPTION, fmt::format("Failed to parse 
destination at '{}': {}", ctx.path(), dst.error()));
+    }
+  }
+  return res;
+}
+
+std::optional<std::string> jsonValueToString(const rapidjson::Value& val) {
+  if (val.IsString()) {
+    return std::string{val.GetString(), val.GetStringLength()};
+  }
+  if (val.IsUint64()) {
+    return std::to_string(val.GetUint64());
+  }
+  if (val.IsInt64()) {
+    return std::to_string(val.GetInt64());
+  }
+  if (val.IsDouble()) {
+    return std::to_string(static_cast<int64_t>(val.GetDouble()));

Review Comment:
   it is weird, but the reference impl does this conversion 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@nifi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to