This is an automated email from the ASF dual-hosted git repository.
yongwww pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new a8bd559e42 [FFI] Lightweight json parser/writer (#18186)
a8bd559e42 is described below
commit a8bd559e42fa2558f0837eaab9cf3ec801b99a4b
Author: Tianqi Chen <[email protected]>
AuthorDate: Mon Aug 4 17:11:20 2025 -0400
[FFI] Lightweight json parser/writer (#18186)
This PR adds a lightweight json parser/writer to extra component.
---
ffi/CMakeLists.txt | 2 +
ffi/include/tvm/ffi/extra/json.h | 84 ++++
ffi/src/ffi/extra/json_parser.cc | 692 ++++++++++++++++++++++++++++++++
ffi/src/ffi/extra/json_writer.cc | 266 ++++++++++++
ffi/tests/cpp/extra/test_json_parser.cc | 363 +++++++++++++++++
ffi/tests/cpp/extra/test_json_writer.cc | 241 +++++++++++
6 files changed, 1648 insertions(+)
diff --git a/ffi/CMakeLists.txt b/ffi/CMakeLists.txt
index 76b2901c7a..b67611a273 100644
--- a/ffi/CMakeLists.txt
+++ b/ffi/CMakeLists.txt
@@ -66,6 +66,8 @@ if (TVM_FFI_USE_EXTRA_CXX_API)
list(APPEND tvm_ffi_objs_sources
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/structural_equal.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/structural_hash.cc"
+ "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/json_parser.cc"
+ "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/json_writer.cc"
)
endif()
diff --git a/ffi/include/tvm/ffi/extra/json.h b/ffi/include/tvm/ffi/extra/json.h
new file mode 100644
index 0000000000..847e60c0f6
--- /dev/null
+++ b/ffi/include/tvm/ffi/extra/json.h
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ * \file tvm/ffi/json/json.h
+ * \brief Minimal lightweight JSON parsing and serialization utilities
+ */
+#ifndef TVM_FFI_EXTRA_JSON_H_
+#define TVM_FFI_EXTRA_JSON_H_
+
+#include <tvm/ffi/any.h>
+#include <tvm/ffi/container/array.h>
+#include <tvm/ffi/container/map.h>
+#include <tvm/ffi/extra/base.h>
+
+namespace tvm {
+namespace ffi {
+namespace json {
+
+/*!
+ * \brief alias Any as json Value.
+ *
+ * To keep things lightweight, we simply reuse the ffi::Any system.
+ */
+using Value = Any;
+
+/*!
+ * \brief alias Map<Any, Any> as json Object.
+ * \note We use Map<Any, Any> instead of Map<String, Any> to avoid
+ * the overhead of key checking when doing as conversion,
+ * the check will be performed at runtime when we read each key
+ */
+using Object = ffi::Map<Any, Any>;
+
+/*! \brief alias Array<Any> as json Array. */
+using Array = ffi::Array<Any>;
+
+/*!
+ * \brief Parse a JSON string into an Any value.
+ *
+ * Besides the standard JSON syntax, this function also supports:
+ * - Infinity/NaN as javascript syntax
+ * - int64 integer value
+ *
+ * If error_msg is not nullptr, the error message will be written to it
+ * and no exception will be thrown when parsing fails.
+ *
+ * \param json_str The JSON string to parse.
+ * \param error_msg The output error message, can be nullptr.
+ *
+ * \return The parsed Any value.
+ */
+TVM_FFI_EXTRA_CXX_API json::Value Parse(const String& json_str, String*
error_msg = nullptr);
+
+/*!
+ * \brief Serialize an Any value into a JSON string.
+ *
+ * \param value The Any value to serialize.
+ * \param indent The number of spaces to indent the output.
+ * If not specified, the output will be compact.
+ * \return The output JSON string.
+ */
+TVM_FFI_EXTRA_CXX_API String Stringify(const json::Value& value,
+ Optional<int> indent = std::nullopt);
+
+} // namespace json
+} // namespace ffi
+} // namespace tvm
+#endif // TVM_FFI_EXTRA_JSON_H_
diff --git a/ffi/src/ffi/extra/json_parser.cc b/ffi/src/ffi/extra/json_parser.cc
new file mode 100644
index 0000000000..dd3fae351d
--- /dev/null
+++ b/ffi/src/ffi/extra/json_parser.cc
@@ -0,0 +1,692 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * \file src/ffi/json/parser.cc
+ *
+ * \brief A minimalistic JSON parser based on ffi values.
+ */
+#include <tvm/ffi/any.h>
+#include <tvm/ffi/container/array.h>
+#include <tvm/ffi/container/map.h>
+#include <tvm/ffi/error.h>
+#include <tvm/ffi/extra/json.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/ffi/string.h>
+
+#include <cinttypes>
+#include <limits>
+
+namespace tvm {
+namespace ffi {
+namespace json {
+
+/*!
+ * \brief Helper class to parse a JSON string.
+ *
+ * Keep leaf level string/number parse also in context.
+ */
+class JSONParserContext {
+ public:
+ JSONParserContext(const char* begin, const char* end) : begin_(begin),
cur_(begin), end_(end) {
+ last_line_begin_ = cur_;
+ }
+
+ /*!
+ * \brief Peek the current character.
+ * \return The current character, or -1 if the end of the string is reached.
+ */
+ int Peek() const {
+ return (cur_ != end_ ? static_cast<int>(*reinterpret_cast<const
uint8_t*>(cur_)) : -1);
+ }
+
+ /*!
+ * \brief Skip the next char that we know is not a space
+ *
+ * \note Caller must explicitly call SkipSpaces first or use
+ * Peek already that confirms char is not any space char.
+ */
+ void SkipNextAssumeNoSpace() { ++cur_; }
+
+ /*!
+ * \brief Get the current position.
+ * \return The current position.
+ */
+ const char* GetCurrentPos() const { return cur_; }
+
+ /*!
+ * \brief Set the current position for better error message
+ * \param pos The new position.
+ * \note implementation can do it as no-op if needed
+ */
+ void SetCurrentPosForBetterErrorMsg(const char* pos) { cur_ = pos; }
+
+ /*!
+ * \brief Skip the space characters.
+ * \note This function does not check if the end of the string is reached.
+ */
+ void SkipSpaces() {
+ while (cur_ != end_) {
+ if (!(*cur_ == ' ' || *cur_ == '\t' || *cur_ == '\n' || *cur_ == '\r')) {
+ break;
+ }
+ if (*cur_ == '\n') {
+ ++line_counter_;
+ last_line_begin_ = cur_ + 1;
+ }
+ ++cur_;
+ }
+ }
+
+ /*!
+ * \brief Check if the next characters match the given string.
+ * \param str The string to match.
+ * \param len The length of the string.
+ * \return True if the next characters match the given string, false
otherwise.
+ */
+ bool MatchLiteral(const char* pattern, int len) {
+ const char* pend = pattern + len;
+ const char* ptr = pattern;
+ for (; ptr != pend && cur_ != end_; ++ptr, ++cur_) {
+ if (*ptr != *cur_) {
+ return false;
+ }
+ }
+ // we get to the end of the pattern and match is successful
+ return ptr == pend;
+ }
+
+ /*
+ * \brief Parse the next strin starting with a double quote.
+ * \param out The output string.
+ * \return Whether the next string parsing is successful.
+ */
+ bool NextString(json::Value* out) {
+ // NOTE: we keep string parsing logic here to allow some special
+ // optimizations for simple string that do not e
+ const char* start_pos = cur_;
+ TVM_FFI_ICHECK(*cur_ == '\"');
+ // skip first double quote
+ ++cur_;
+ // the loop focuses on simple string without escape characters
+ for (; cur_ != end_; ++cur_) {
+ if (*cur_ == '\"') {
+ *out = String(start_pos + 1, cur_ - start_pos - 1);
+ ++cur_;
+ return true;
+ }
+ if (*cur_ < ' ' || *cur_ == '\\') {
+ // fallback to full string handling
+ return this->NextStringWithFullHandling(out, start_pos);
+ }
+ }
+ this->SetCurrentPosForBetterErrorMsg(start_pos);
+ this->SetErrorUnterminatedString();
+ return false;
+ }
+
+ /*!
+ * \brief Parse the next number.
+ * \param out The output number.
+ * \return Whether the next number parsing is successful.
+ */
+ bool NextNumber(json::Value* out) {
+ const char* start_pos = cur_;
+ if (cur_ == end_) {
+ this->SetErrorExpectingValue();
+ return false;
+ }
+ // JSON number grammar:
+ //
+ // number = [ minus ] int [ frac ] [ exp ]
+ // decimal-point = %x2E ; .
+ // digit1-9 = %x31-39 ; 1-9
+ // e = %x65 / %x45 ; e E
+ // exp = e [ minus / plus ] 1*DIGIT
+ // frac = decimal-point 1*DIGIT
+ std::string temp_buffer;
+ bool maybe_int = true;
+ // parse [minus], cross check for Infinity/NaN/-Infinity
+ if (*cur_ == '-') {
+ temp_buffer.push_back('-');
+ ++cur_;
+ if (cur_ != end_ && *cur_ == 'I') {
+ if (this->MatchLiteral("Infinity", 8)) {
+ *out = -std::numeric_limits<double>::infinity();
+ return true;
+ } else {
+ this->SetCurrentPosForBetterErrorMsg(start_pos);
+ this->SetErrorExpectingValue();
+ return false;
+ }
+ }
+ } else if (*cur_ == 'I') {
+ if (this->MatchLiteral("Infinity", 8)) {
+ *out = std::numeric_limits<double>::infinity();
+ return true;
+ } else {
+ this->SetCurrentPosForBetterErrorMsg(start_pos);
+ this->SetErrorExpectingValue();
+ return false;
+ }
+ } else if (*cur_ == 'N') {
+ if (this->MatchLiteral("NaN", 3)) {
+ *out = std::numeric_limits<double>::quiet_NaN();
+ return true;
+ } else {
+ this->SetCurrentPosForBetterErrorMsg(start_pos);
+ this->SetErrorExpectingValue();
+ return false;
+ }
+ }
+ // read in all parts that are possibly part of a number
+ while (cur_ != end_) {
+ char next_char = *cur_;
+ if ((next_char >= '0' && next_char <= '9') || next_char == 'e' ||
next_char == 'E' ||
+ next_char == '+' || next_char == '-' || next_char == '.') {
+ temp_buffer.push_back(next_char);
+ if (next_char == '.' || next_char == 'e' || next_char == 'E') {
+ maybe_int = false;
+ }
+ ++cur_;
+ } else {
+ break;
+ }
+ }
+ if (temp_buffer.empty()) {
+ this->SetErrorExpectingValue();
+ return false;
+ }
+ // parse from temp_buffer_
+ if (maybe_int) {
+ // now try to parse the number as int64
+ char* end_ptr;
+ errno = 0;
+ intmax_t int_val = strtoimax(temp_buffer.data(), &end_ptr, 10);
+ if (errno == 0 && int_val >= std::numeric_limits<int64_t>::min() &&
+ int_val <= std::numeric_limits<int64_t>::max() &&
+ end_ptr == temp_buffer.data() + temp_buffer.size()) {
+ *out = static_cast<int64_t>(int_val);
+ return true;
+ }
+ }
+ {
+ // now try to parse number as double
+ char* end_ptr;
+ errno = 0;
+ double double_val = strtod(temp_buffer.data(), &end_ptr);
+ if (errno == 0 && end_ptr == temp_buffer.data() + temp_buffer.size()) {
+ *out = double_val;
+ return true;
+ } else {
+ this->SetCurrentPosForBetterErrorMsg(start_pos);
+ this->SetErrorExpectingValue();
+ return false;
+ }
+ }
+ }
+
+ /*!
+ * \brief Get the current line context.
+ * \return The current line context.
+ */
+ String GetSyntaxErrorContext(std::string err_prefix) const {
+ int64_t column = static_cast<int64_t>(cur_ - last_line_begin_) + 1;
+ int64_t char_pos = static_cast<int64_t>(cur_ - begin_);
+ if (err_prefix.empty()) {
+ err_prefix = "Syntax error";
+ }
+ err_prefix += ": line " + std::to_string(line_counter_) + " column " +
std::to_string(column) +
+ " (char " + std::to_string(char_pos) + ")";
+ return String(err_prefix);
+ }
+
+ std::string FinalizeErrorMsg() {
+ if (error_msg_.empty()) {
+ SetErrorDefault();
+ }
+ return std::string(error_msg_);
+ }
+
+ void SetErrorDefault() { error_msg_ = GetSyntaxErrorContext("Syntax error
near"); }
+
+ void SetErrorExpectingValue() { error_msg_ =
GetSyntaxErrorContext("Expecting value"); }
+
+ void SetErrorInvalidControlCharacter() {
+ error_msg_ = GetSyntaxErrorContext("Invalid control character at");
+ }
+
+ void SetErrorUnterminatedString() {
+ error_msg_ = GetSyntaxErrorContext("Unterminated string starting at");
+ }
+
+ void SetErrorInvalidUnicodeEscape() {
+ error_msg_ = GetSyntaxErrorContext("Invalid \\uXXXX escape");
+ }
+
+ void SetErrorInvalidSurrogatePair() {
+ error_msg_ = GetSyntaxErrorContext("Invalid surrogate pair of \\uXXXX
escapes");
+ }
+
+ void SetErrorInvalidEscape() { error_msg_ = GetSyntaxErrorContext("Invalid
\\escape"); }
+
+ void SetErrorExtraData() { error_msg_ = GetSyntaxErrorContext("Extra data");
}
+
+ void SetErrorExpectingPropertyName() {
+ error_msg_ = GetSyntaxErrorContext("Expecting property name enclosed in
double quotes");
+ }
+
+ void SetErrorExpectingColon() { error_msg_ =
GetSyntaxErrorContext("Expecting \':\' delimiter"); }
+
+ void SetErrorExpectingComma() { error_msg_ =
GetSyntaxErrorContext("Expecting \',\' delimiter"); }
+
+ private:
+ // Full string parsing with escape and unicode handling
+ bool NextStringWithFullHandling(Any* out, const char* start_pos) {
+ // copy over the prefix that was already parsed
+ std::string out_str(start_pos + 1, cur_ - start_pos - 1);
+ while (cur_ != end_) {
+ if (*cur_ < ' ') {
+ this->SetErrorInvalidControlCharacter();
+ return false;
+ }
+ if (*cur_ == '\"') {
+ *out = String(std::move(out_str));
+ ++cur_;
+ return true;
+ }
+ if (*cur_ == '\\') {
+ ++cur_;
+ switch (*cur_) {
+ // handle escape characters per JSON spec(RFC 8259)
+#define HANDLE_ESCAPE_CHAR(pattern, val) \
+ case pattern: \
+ ++cur_; \
+ out_str.push_back(val); \
+ break
+ HANDLE_ESCAPE_CHAR('\"', '\"');
+ HANDLE_ESCAPE_CHAR('\\', '\\');
+ HANDLE_ESCAPE_CHAR('/', '/');
+ HANDLE_ESCAPE_CHAR('b', '\b');
+ HANDLE_ESCAPE_CHAR('f', '\f');
+ HANDLE_ESCAPE_CHAR('n', '\n');
+ HANDLE_ESCAPE_CHAR('r', '\r');
+ HANDLE_ESCAPE_CHAR('t', '\t');
+#undef HANDLE_ESCAPE_CHAR
+ case 'u': {
+ const char* escape_pos = cur_;
+ // handle unicode code point
+ ++cur_;
+ int32_t first_i16, code_point = 0;
+ if (!Parse4Hex(&first_i16)) {
+ this->SetCurrentPosForBetterErrorMsg(escape_pos);
+ this->SetErrorInvalidUnicodeEscape();
+ return false;
+ }
+ // Check if the first i16 is a UTF-16 surrogate pair
+ //
+ // Surrogate pair encoding rule:
+ // U' = yyyyyyyyyyxxxxxxxxxx // U - 0x10000
+ // W1 = 110110yyyyyyyyyy // 0xD800 + yyyyyyyyyy
+ // W2 = 110111xxxxxxxxxx // 0xDC00 + xxxxxxxxxx
+ //
+ // Range of W1 and W2:
+ // 0xD800–0xDBFF for W1
+ // 0xDC00–0xDFFF for W2
+ // both W1 and W2 fit into 0xD800–0xDFFF
+ // Detect if the first i16 fit into range of W1/W2
+ if (first_i16 >= 0xD800 && first_i16 <= 0xDFFF) {
+ // we are in the surrogate pair range
+ if (first_i16 >= 0xDC00) {
+ this->SetCurrentPosForBetterErrorMsg(escape_pos);
+ this->SetErrorInvalidSurrogatePair();
+ // we need to return false instead because this range is for W2
+ return false;
+ }
+ if (!this->MatchLiteral("\\u", 2)) {
+ this->SetCurrentPosForBetterErrorMsg(escape_pos);
+ this->SetErrorInvalidSurrogatePair();
+ return false;
+ }
+ escape_pos = cur_;
+ // get the value of the W2 (second i16)
+ int32_t second_i16;
+ if (!Parse4Hex(&second_i16)) {
+ this->SetCurrentPosForBetterErrorMsg(escape_pos);
+ this->SetErrorInvalidUnicodeEscape();
+ return false;
+ }
+ if (!(second_i16 >= 0xDC00 && second_i16 <= 0xDFFF)) {
+ this->SetCurrentPosForBetterErrorMsg(escape_pos);
+ this->SetErrorInvalidSurrogatePair();
+ return false;
+ }
+ // recover the code point
+ code_point = ((first_i16 - 0xD800) << 10) + (second_i16 -
0xDC00) + 0x10000;
+ } else {
+ // not a surrogate case, just assign as code point
+ code_point = first_i16;
+ }
+ // now need to push back the string based on UTF-8 encoding
+ // UTF-8 encoding rule: four cases
+ // ------------------------------------------------------------
+ // Pattern | code point range
+ // ------------------------------------------------------------
+ // 0xxxxxxx | 0x0 - 0x7F
+ // 110xxxxx 10xxxxxx | 0x80 - 0x7FF
+ // 1110xxxx 10xxxxxx 10xxxxxx | 0x800 - 0xFFFF
+ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | 0x10000 - end
+ // ------------------------------------------------------------
+ if (code_point < 0x80) {
+ out_str.push_back(code_point);
+ } else if (code_point < 0x800) {
+ // first byte: 110xxxxx (5 effective bits)
+ // second byte: 10xxxxxx (6 effecive bits)
+ // shift by 6 bits to get the first bytes
+ out_str.push_back(0xC0 | (code_point >> 6));
+ // mask by 6 effective bits
+ out_str.push_back(0x80 | (code_point & 0x3F));
+ } else if (code_point < 0x10000) {
+ // first byte: 1110xxxx (4 effective bits)
+ // second byte: 10xxxxxx (6 effecive bits)
+ // third byte: 10xxxxxx (6 effecive bits)
+ // shift by 12 bits to get the first bytes
+ out_str.push_back(0xE0 | (code_point >> 12));
+ // shift by 6 bits to get the second bytes, mask by 6 effective
bits
+ out_str.push_back(0x80 | ((code_point >> 6) & 0x3F));
+ // mask by 6 effective bits
+ out_str.push_back(0x80 | (code_point & 0x3F));
+ } else {
+ // first byte: 11110xxx (3 effective bits)
+ // second byte: 10xxxxxx (6 effecive bits)
+ // third byte: 10xxxxxx (6 effecive bits)
+ // fourth byte: 10xxxxxx (6 effecive bits)
+ // shift by 18 bits to get the first bytes
+ out_str.push_back(0xF0 | (code_point >> 18));
+ // shift by 12 bits to get the second bytes, mask by 6 effective
bits
+ out_str.push_back(0x80 | ((code_point >> 12) & 0x3F));
+ // shift by 6 bits to get the third bytes, mask by 6 effective
bits
+ out_str.push_back(0x80 | ((code_point >> 6) & 0x3F));
+ // mask by 6 effective bits
+ out_str.push_back(0x80 | (code_point & 0x3F));
+ }
+ break;
+ }
+ default: {
+ this->SetErrorInvalidEscape();
+ return false;
+ }
+ }
+ } else {
+ out_str.push_back(*cur_);
+ ++cur_;
+ }
+ }
+ this->SetCurrentPosForBetterErrorMsg(start_pos);
+ this->SetErrorUnterminatedString();
+ return false;
+ }
+ /*!
+ * \brief Parse the four hex digits of a unicode code point per json spec.
+ * \param out_i16 The output i16 number
+ * \return True if four hex digits are parsed successfully, false otherwise.
+ */
+ bool Parse4Hex(int32_t* out_i16) {
+ int32_t result = 0;
+ for (int i = 0; i < 4; ++i, ++cur_) {
+ int hex_val = *reinterpret_cast<const uint8_t*>(cur_);
+ if (hex_val >= '0' && hex_val <= '9') {
+ hex_val -= '0';
+ } else if (hex_val >= 'a' && hex_val <= 'f') {
+ hex_val -= 'a' - 0xa;
+ } else if (hex_val >= 'A' && hex_val <= 'F') {
+ hex_val -= 'A' - 0xa;
+ } else {
+ return false;
+ }
+ result = result * 16 + hex_val;
+ }
+ *out_i16 = result;
+ return true;
+ }
+
+ /*! \brief The beginning of the string */
+ const char* begin_;
+ /*! \brief The current pointer */
+ const char* cur_;
+ /*! \brief End of the string */
+ const char* end_;
+ /*! \brief The beginning of the last line */
+ const char* last_line_begin_;
+ /*! \brief The error message */
+ std::string error_msg_;
+ /*! \brief The line counter */
+ int64_t line_counter_{1};
+};
+
+class JSONParser {
+ public:
+ static json::Value Parse(const String& json_str, String* error_msg) {
+ JSONParser parser(json_str);
+ json::Value result;
+ if (parser.ParseValue(&result) && parser.ParseTail()) {
+ if (error_msg != nullptr) {
+ *error_msg = String("");
+ }
+ return result;
+ }
+ if (error_msg != nullptr) {
+ *error_msg = parser.ctx_.FinalizeErrorMsg();
+ TVM_FFI_ICHECK(!error_msg->empty());
+ } else {
+ TVM_FFI_THROW(ValueError) << parser.ctx_.FinalizeErrorMsg();
+ }
+ // note that when we don't throw, error msg is set to indicate
+ // an error happens
+ return nullptr;
+ }
+
+ private:
+ explicit JSONParser(String json_str) : ctx_(json_str.data(), json_str.data()
+ json_str.size()) {}
+
+ bool ParseTail() {
+ ctx_.SkipSpaces();
+ // there are extra data in the tail
+ if (ctx_.Peek() != -1) {
+ ctx_.SetErrorExtraData();
+ return false;
+ }
+ return true;
+ }
+
+ bool ParseValue(json::Value* out) {
+ ctx_.SkipSpaces();
+ // record start pos for cases where we might need to reset
+ // current position for better error message
+ auto start_pos = ctx_.GetCurrentPos();
+ // check if the end of the string is reached
+ switch (ctx_.Peek()) {
+ case -1: {
+ ctx_.SetErrorExpectingValue();
+ return false;
+ }
+ case '{': {
+ return ParseObject(out);
+ }
+ case '[': {
+ return ParseArray(out);
+ }
+ case '\"': {
+ return ctx_.NextString(out);
+ }
+ case 't': {
+ ctx_.SkipNextAssumeNoSpace();
+ if (ctx_.MatchLiteral("rue", 3)) {
+ *out = true;
+ return true;
+ } else {
+ ctx_.SetCurrentPosForBetterErrorMsg(start_pos);
+ ctx_.SetErrorExpectingValue();
+ return false;
+ }
+ }
+ case 'f': {
+ ctx_.SkipNextAssumeNoSpace();
+ if (ctx_.MatchLiteral("alse", 4)) {
+ *out = false;
+ return true;
+ } else {
+ ctx_.SetCurrentPosForBetterErrorMsg(start_pos);
+ ctx_.SetErrorExpectingValue();
+ return false;
+ }
+ }
+ case 'n': {
+ ctx_.SkipNextAssumeNoSpace();
+ if (ctx_.MatchLiteral("ull", 3)) {
+ *out = nullptr;
+ return true;
+ } else {
+ ctx_.SetCurrentPosForBetterErrorMsg(start_pos);
+ ctx_.SetErrorExpectingValue();
+ return false;
+ }
+ }
+ default: {
+ return ctx_.NextNumber(out);
+ }
+ }
+ return false;
+ }
+
+ bool ParseObject(json::Value* out) {
+ size_t stack_top = object_temp_stack_.size();
+ json::Object result;
+ ctx_.SkipNextAssumeNoSpace();
+ ctx_.SkipSpaces();
+ int next_char = ctx_.Peek();
+ if (next_char == -1) {
+ ctx_.SetErrorExpectingPropertyName();
+ return false;
+ }
+ // empty object
+ if (next_char == '}') {
+ ctx_.SkipNextAssumeNoSpace();
+ *out = json::Object();
+ return true;
+ }
+ // non-empty object
+ while ((next_char = ctx_.Peek()) != -1) {
+ if (next_char != '\"') {
+ ctx_.SetErrorExpectingPropertyName();
+ return false;
+ }
+ json::Value key;
+ if (!ctx_.NextString(&key)) return false;
+ ctx_.SkipSpaces();
+ if (ctx_.Peek() != ':') {
+ ctx_.SetErrorExpectingColon();
+ return false;
+ }
+ ctx_.SkipNextAssumeNoSpace();
+ json::Value value;
+ if (!ParseValue(&value)) return false;
+ object_temp_stack_.emplace_back(key, value);
+ // result.Set(key, value);
+ ctx_.SkipSpaces();
+ if (ctx_.Peek() == '}') {
+ ctx_.SkipNextAssumeNoSpace();
+ *out = json::Object(object_temp_stack_.begin() + stack_top,
object_temp_stack_.end());
+ // recover the stack to original state
+ object_temp_stack_.resize(stack_top);
+ return true;
+ } else if (ctx_.Peek() == ',') {
+ ctx_.SkipNextAssumeNoSpace();
+ // must skip space so next iteration do not have to do so
+ ctx_.SkipSpaces();
+ } else {
+ ctx_.SetErrorExpectingComma();
+ return false;
+ }
+ }
+ return false;
+ }
+
+ bool ParseArray(json::Value* out) {
+ size_t stack_top = array_temp_stack_.size();
+ ctx_.SkipNextAssumeNoSpace();
+ ctx_.SkipSpaces();
+ int next_char = ctx_.Peek();
+ if (next_char == -1) {
+ ctx_.SetErrorExpectingValue();
+ return false;
+ }
+ // empty array
+ if (next_char == ']') {
+ ctx_.SkipNextAssumeNoSpace();
+ *out = json::Array();
+ return true;
+ }
+ // non-empty array
+ while ((next_char = ctx_.Peek()) != -1) {
+ json::Value value;
+ // no need to skip space here because we already skipped space
+ // at the beginning or in previous iteration
+ if (!ParseValue(&value)) return false;
+ array_temp_stack_.emplace_back(std::move(value));
+ ctx_.SkipSpaces();
+ next_char = ctx_.Peek();
+ if (next_char == ',') {
+ ctx_.SkipNextAssumeNoSpace();
+ // must skip space so next iteration do not have to do so
+ ctx_.SkipSpaces();
+ } else if (next_char == ']') {
+ ctx_.SkipNextAssumeNoSpace();
+ *out = json::Array(array_temp_stack_.begin() + stack_top,
array_temp_stack_.end());
+ // recover the stack
+ array_temp_stack_.resize(stack_top);
+ return true;
+ } else {
+ ctx_.SetErrorExpectingComma();
+ return false;
+ }
+ }
+ return false;
+ }
+
+ JSONParserContext ctx_;
+ // Temp stack for intermediate values
+ // we first create a persistent stack to store the parsed values
+ // then create the final array/object object with the precise size
+ std::vector<Any> array_temp_stack_;
+ std::vector<std::pair<Any, Any>> object_temp_stack_;
+};
+
+json::Value Parse(const String& json_str, String* error_msg) {
+ return JSONParser::Parse(json_str, error_msg);
+}
+
+TVM_FFI_STATIC_INIT_BLOCK({
+ namespace refl = tvm::ffi::reflection;
+ refl::GlobalDef().def("ffi.json.Parse",
+ [](const String& json_str) { return
json::Parse(json_str); });
+});
+
+} // namespace json
+} // namespace ffi
+} // namespace tvm
diff --git a/ffi/src/ffi/extra/json_writer.cc b/ffi/src/ffi/extra/json_writer.cc
new file mode 100644
index 0000000000..94ba5e4a5a
--- /dev/null
+++ b/ffi/src/ffi/extra/json_writer.cc
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * \file src/ffi/json/writer.cc
+ *
+ * \brief A minimalistic JSON writer based on ffi values.
+ */
+#include <tvm/ffi/any.h>
+#include <tvm/ffi/container/array.h>
+#include <tvm/ffi/container/map.h>
+#include <tvm/ffi/error.h>
+#include <tvm/ffi/extra/json.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/ffi/string.h>
+
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#ifdef _MSC_VER
+#define TVM_FFI_SNPRINTF _snprintf_s
+#pragma warning(push)
+#pragma warning(disable : 4244)
+#pragma warning(disable : 4127)
+#pragma warning(disable : 4702)
+#else
+#define TVM_FFI_SNPRINTF snprintf
+#endif
+
+namespace tvm {
+namespace ffi {
+namespace json {
+
+class JSONWriter {
+ public:
+ static String Stringify(const json::Value& value, Optional<int> indent) {
+ JSONWriter writer(indent.value_or(0));
+ writer.WriteValue(value);
+ return String(std::move(writer.result_));
+ }
+
+ private:
+ explicit JSONWriter(int indent) : indent_(indent), out_iter_(result_) {}
+
+ void WriteValue(const json::Value& value) {
+ switch (value.type_index()) {
+ case TypeIndex::kTVMFFINone: {
+ WriteLiteral("null", 4);
+ break;
+ }
+ case TypeIndex::kTVMFFIBool: {
+ bool bool_value =
details::AnyUnsafe::CopyFromAnyViewAfterCheck<bool>(value);
+ if (bool_value) {
+ WriteLiteral("true", 4);
+ } else {
+ WriteLiteral("false", 5);
+ }
+ break;
+ }
+ case TypeIndex::kTVMFFIInt: {
+
WriteInt(details::AnyUnsafe::CopyFromAnyViewAfterCheck<int64_t>(value));
+ break;
+ }
+ case TypeIndex::kTVMFFIFloat: {
+
WriteFloat(details::AnyUnsafe::CopyFromAnyViewAfterCheck<double>(value));
+ break;
+ }
+ case TypeIndex::kTVMFFISmallStr:
+ case TypeIndex::kTVMFFIStr: {
+
WriteString(details::AnyUnsafe::CopyFromAnyViewAfterCheck<String>(value));
+ break;
+ }
+ case TypeIndex::kTVMFFIArray: {
+
WriteArray(details::AnyUnsafe::CopyFromAnyViewAfterCheck<json::Array>(value));
+ break;
+ }
+ case TypeIndex::kTVMFFIMap: {
+
WriteObject(details::AnyUnsafe::CopyFromAnyViewAfterCheck<json::Object>(value));
+ break;
+ }
+ default: {
+ TVM_FFI_THROW(ValueError) << "Unsupported type: `" <<
value.GetTypeKey() << "`";
+ TVM_FFI_UNREACHABLE();
+ }
+ }
+ }
+
+ void WriteLiteral(const char* literal, int size) {
+ for (int i = 0; i < size; ++i) {
+ *out_iter_++ = literal[i];
+ }
+ }
+
+ void WriteInt(int64_t value) {
+ // the biggest possible string representation of -INT64_MIN
+ char buffer[sizeof("-9223372036854775808") + 1];
+ int size = TVM_FFI_SNPRINTF(buffer, sizeof(buffer), "%" PRId64, value);
+ WriteLiteral(buffer, size);
+ }
+
+ void WriteFloat(double value) {
+ // largest possible string representation of a double is around 24 chars
plus
+ // one null terminator keep 32 to be safe
+ char buffer[32];
+ if (std::isnan(value)) {
+ WriteLiteral("NaN", 3);
+ } else if (std::isinf(value)) {
+ if (value < 0) {
+ WriteLiteral("-Infinity", 9);
+ } else {
+ WriteLiteral("Infinity", 8);
+ }
+ } else {
+ double int_part;
+ // if the value can be represented as integer
+ if (std::fabs(value) < (1ULL << 53) && std::modf(value, &int_part) == 0)
{
+ // always print an extra .0 for integer so integer numbers are printed
as floats
+ // this helps us to distinguish between integer and float, which is
not necessary
+ // but helps to ensure roundtrip property of the parser/printer in
terms of int/float types
+ int size = TVM_FFI_SNPRINTF(buffer, sizeof(buffer), "%.1f", int_part);
+ WriteLiteral(buffer, size);
+ } else {
+ // Save 17 decimal digits to avoid loss during loading JSON
+ // this is the maximum precision that can be represented in a double
+ int size = TVM_FFI_SNPRINTF(buffer, sizeof(buffer), "%.17g", value);
+ WriteLiteral(buffer, size);
+ }
+ }
+ }
+
+ void WriteString(const String& value) {
+ *out_iter_++ = '"';
+ const char* data = value.data();
+ const size_t size = value.size();
+ for (size_t i = 0; i < size; ++i) {
+ switch (data[i]) {
+// handle escape characters per JSON spec(RFC 8259)
+#define HANDLE_ESCAPE_CHAR(pattern, val) \
+ case pattern: \
+ WriteLiteral(val, std::char_traits<char>::length(val)); \
+ break
+ HANDLE_ESCAPE_CHAR('\"', "\\\"");
+ HANDLE_ESCAPE_CHAR('\\', "\\\\");
+ HANDLE_ESCAPE_CHAR('/', "\\/");
+ HANDLE_ESCAPE_CHAR('\b', "\\b");
+ HANDLE_ESCAPE_CHAR('\f', "\\f");
+ HANDLE_ESCAPE_CHAR('\n', "\\n");
+ HANDLE_ESCAPE_CHAR('\r', "\\r");
+ HANDLE_ESCAPE_CHAR('\t', "\\t");
+#undef HANDLE_ESCAPE_CHAR
+ default: {
+ uint8_t u8_val = static_cast<uint8_t>(data[i]);
+ // this is a control character, print as \uXXXX
+ if (u8_val < 0x20 || u8_val == 0x7f) {
+ char buffer[8];
+ int size = TVM_FFI_SNPRINTF(buffer, sizeof(buffer), "\\u%04x",
+ static_cast<int32_t>(data[i]) & 0xff);
+ WriteLiteral(buffer, size);
+ } else {
+ *out_iter_++ = data[i];
+ }
+ break;
+ }
+ }
+ }
+ *out_iter_++ = '"';
+ }
+
+ void WriteArray(const json::Array& value) {
+ *out_iter_++ = '[';
+ if (indent_ != 0) {
+ total_indent_ += indent_;
+ }
+ for (size_t i = 0; i < value.size(); ++i) {
+ if (i != 0) {
+ *out_iter_++ = ',';
+ }
+ if (indent_ != 0) {
+ WriteIndent();
+ }
+ WriteValue(value[i]);
+ }
+ if (indent_ != 0) {
+ total_indent_ -= indent_;
+ WriteIndent();
+ }
+ *out_iter_++ = ']';
+ }
+
+ void WriteObject(const json::Object& value) {
+ *out_iter_++ = '{';
+ if (indent_ != 0) {
+ total_indent_ += indent_;
+ }
+ int counter = 0;
+ for (const auto& [key, value] : value) {
+ if (counter++ != 0) {
+ *out_iter_++ = ',';
+ }
+ if (indent_ != 0) {
+ WriteIndent();
+ }
+ auto opt_key = key.as<String>();
+ if (!opt_key.has_value()) {
+ TVM_FFI_THROW(ValueError) << "Expect key to be string, got `" <<
key.GetTypeKey() << "`";
+ }
+ WriteString(*opt_key);
+ *out_iter_++ = ':';
+ if (indent_ != 0) {
+ *out_iter_++ = ' ';
+ }
+ WriteValue(value);
+ }
+ if (indent_ != 0) {
+ total_indent_ -= indent_;
+ WriteIndent();
+ }
+ *out_iter_++ = '}';
+ }
+
+ // Write a newline and indent the current level
+ void WriteIndent() {
+ *out_iter_++ = '\n';
+ for (int i = 0; i < total_indent_; ++i) {
+ *out_iter_++ = ' ';
+ }
+ }
+
+ int indent_ = 0;
+ int total_indent_ = 0;
+ std::string result_;
+ std::back_insert_iterator<std::string> out_iter_;
+};
+
+String Stringify(const json::Value& value, Optional<int> indent) {
+ return JSONWriter::Stringify(value, indent);
+}
+
+TVM_FFI_STATIC_INIT_BLOCK({
+ namespace refl = tvm::ffi::reflection;
+ refl::GlobalDef().def("ffi.json.Stringify", Stringify);
+});
+
+} // namespace json
+} // namespace ffi
+} // namespace tvm
+
+#undef TVM_FFI_SNPRINTF
diff --git a/ffi/tests/cpp/extra/test_json_parser.cc
b/ffi/tests/cpp/extra/test_json_parser.cc
new file mode 100644
index 0000000000..c0332e6f8f
--- /dev/null
+++ b/ffi/tests/cpp/extra/test_json_parser.cc
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <gtest/gtest.h>
+#include <tvm/ffi/container/array.h>
+#include <tvm/ffi/container/map.h>
+#include <tvm/ffi/extra/json.h>
+#include <tvm/ffi/extra/structural_equal.h>
+
+#include <cmath>
+
+namespace {
+
+using namespace tvm::ffi;
+
+TEST(JSONParser, BoolNull) {
+ // boolean value
+ EXPECT_EQ(json::Parse("true").cast<bool>(), true);
+ EXPECT_EQ(json::Parse("false").cast<bool>(), false);
+ EXPECT_EQ(json::Parse("null"), nullptr);
+}
+
+TEST(JSONParser, WrongBoolNull) {
+ String error_msg;
+ EXPECT_EQ(json::Parse("nul", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("fals", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("\n\nfx", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 3 column 1 (char 2)");
+ EXPECT_EQ(json::Parse("fx", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("n1", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("t1", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("f1", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+}
+
+TEST(JSONParser, Number) {
+ // number
+ EXPECT_EQ(json::Parse("123").cast<int64_t>(), 123);
+ EXPECT_EQ(json::Parse("-124").cast<int64_t>(), -124);
+ EXPECT_EQ(json::Parse("123.456").cast<double>(), 123.456);
+ // parsing scientific notation
+ EXPECT_EQ(json::Parse("1.456e12").cast<double>(), 1.456e12);
+ // NaN
+ EXPECT_EQ(std::isnan(json::Parse("NaN").cast<double>()), true);
+ // Infinity
+ EXPECT_EQ(std::isinf(json::Parse("Infinity").cast<double>()), true);
+ // -Infinity
+ EXPECT_EQ(std::isinf(-json::Parse("-Infinity").cast<double>()), true);
+
+ // Test zero variants
+ EXPECT_EQ(json::Parse("0").cast<int64_t>(), 0);
+ EXPECT_EQ(json::Parse("-0").cast<double>(), -0.0);
+ EXPECT_EQ(json::Parse("0.0").cast<double>(), 0.0);
+
+ // Test very large numbers
+ EXPECT_EQ(json::Parse("9223372036854775807").cast<int64_t>(),
+ std::numeric_limits<int64_t>::max());
+ EXPECT_EQ(json::Parse("-9223372036854775808").cast<int64_t>(),
+ std::numeric_limits<int64_t>::min());
+
+ // Test very small decimals
+ EXPECT_EQ(json::Parse("1e-10").cast<double>(), 1e-10);
+ EXPECT_EQ(json::Parse("-1e-10").cast<double>(), -1e-10);
+
+ // Test scientific notation edge cases
+ EXPECT_EQ(json::Parse("1E+10").cast<double>(), 1E+10);
+ EXPECT_EQ(json::Parse("1e+10").cast<double>(), 1e+10);
+ EXPECT_EQ(json::Parse("1E-10").cast<double>(), 1E-10);
+ EXPECT_EQ(json::Parse("123.456E+10").cast<double>(), 123.456E+10);
+}
+
+TEST(JSONParser, WrongNumber) {
+ String error_msg;
+ EXPECT_EQ(json::Parse("123.456.789", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+
+ // Test invalid number formats
+ EXPECT_EQ(json::Parse("123e", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("123e+", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+ EXPECT_EQ(json::Parse("123E-", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+}
+
+TEST(JSONParser, String) {
+ EXPECT_EQ(json::Parse("\"hello\"").cast<String>(), "hello");
+ EXPECT_EQ(json::Parse("\n\t \"hello\"\n\r").cast<String>(), "hello");
+ EXPECT_EQ(json::Parse("\"hello\\nworld\"").cast<String>(), "hello\nworld");
+ EXPECT_EQ(json::Parse("\"\"").cast<String>(), "");
+ // test escape characters
+ EXPECT_EQ(json::Parse("\"\\ta\\n\\/\\f\\\"\\\\\"").cast<String>(),
"\ta\n/\f\"\\");
+ // test unicode code point
+ EXPECT_EQ(json::Parse("\"\\u0041\"").cast<String>(), "A");
+ // test unicode surrogate pair
+ EXPECT_EQ(json::Parse("\"\\uD83D\\uDE04hello\"").cast<String>(),
u8"\U0001F604hello");
+}
+
+TEST(JSONParser, WrongString) {
+ String error_msg;
+ EXPECT_EQ(json::Parse("\"hello", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Unterminated string starting at: line 1 column 1 (char
0)");
+
+ EXPECT_EQ(json::Parse("\"hello\x01\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Invalid control character at: line 1 column 7 (char
6)");
+
+ EXPECT_EQ(json::Parse("\"hello\\uxx\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Invalid \\uXXXX escape: line 1 column 8 (char 7)");
+
+ EXPECT_EQ(json::Parse("\"hello\\uDC00\\uDE04\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Invalid surrogate pair of \\uXXXX escapes: line 1
column 8 (char 7)");
+
+ EXPECT_EQ(json::Parse("\"hello\\uD800\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Invalid surrogate pair of \\uXXXX escapes: line 1
column 8 (char 7)");
+
+ EXPECT_EQ(json::Parse("\"hello\\uD800\\uxx\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Invalid \\uXXXX escape: line 1 column 15 (char 14)");
+
+ EXPECT_EQ(json::Parse("\"hello\\a\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Invalid \\escape: line 1 column 8 (char 7)");
+}
+
+TEST(JSONParser, Array) {
+ EXPECT_TRUE(StructuralEqual()(json::Parse("[]"), json::Array{}));
+
+ EXPECT_TRUE(StructuralEqual()(json::Parse("[1, 2,\n\t\"a\"]"),
json::Array{1, 2, "a"}));
+}
+
+TEST(JSONParser, WrongArray) {
+ String error_msg;
+
+ EXPECT_EQ(json::Parse("]", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+
+ EXPECT_EQ(json::Parse("[1,]", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 4 (char 3)");
+
+ EXPECT_EQ(json::Parse("[", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 2 (char 1)");
+
+ EXPECT_EQ(json::Parse("[1a", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting ',' delimiter: line 1 column 3 (char 2)");
+
+ EXPECT_EQ(json::Parse("[1,2,3", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting ',' delimiter: line 1 column 7 (char 6)");
+
+ EXPECT_EQ(json::Parse("[1] a", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Extra data: line 1 column 6 (char 5)");
+}
+
+TEST(JSONParser, Object) {
+ EXPECT_TRUE(StructuralEqual()(json::Parse("{}"), json::Object{}));
+
+ EXPECT_TRUE(StructuralEqual()(json::Parse("{\"a\": 1, \n\"b\": \t\"c\"}
"),
+ json::Object{{"a", 1}, {"b", "c"}}));
+}
+
+TEST(JSONParser, ObjectOrderPreserving) {
+ auto obj = json::Parse("{\"c\": 1, \"a\": 2, \"b\": 3} ");
+ json::Array keys;
+ for (auto& [key, value] : obj.cast<json::Object>()) {
+ keys.push_back(key);
+ }
+ EXPECT_TRUE(StructuralEqual()(keys, json::Array{"c", "a", "b"}));
+}
+
+TEST(JSONParser, WrongObject) {
+ String error_msg;
+ EXPECT_EQ(json::Parse("{\"a\":", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 6 (char 5)");
+
+ EXPECT_EQ(json::Parse("{", &error_msg), nullptr);
+ EXPECT_EQ(error_msg,
+ "Expecting property name enclosed in double quotes: line 1 column
2 (char 1)");
+
+ // Test incomplete structures
+ EXPECT_EQ(json::Parse("{\"incomplete\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting ':' delimiter: line 1 column 14 (char 13)");
+}
+
+TEST(JSONParser, NestedObject) {
+ EXPECT_TRUE(
+ StructuralEqual()(json::Parse("{\"a\": \t{\"b\": 1}, \n\"c\": [1, 2,
3]}"),
+ json::Object{{"a", json::Object{{"b", 1}}}, {"c",
json::Array{1, 2, 3}}}));
+
+ EXPECT_TRUE(StructuralEqual()(
+ json::Parse("{\"a\": \t{\"b\": 1}, \n\"c\": [1, null, Infinity]}"),
+ json::Object{{"a", json::Object{{"b", 1}}},
+ {"c", json::Array{1, nullptr,
std::numeric_limits<double>::infinity()}}}));
+
+ EXPECT_TRUE(StructuralEqual()(
+ json::Parse("[{}, {\"a\": [1.1, 1000000]}]"),
+ json::Array{json::Object{}, json::Object{{"a", json::Array{1.1,
1000000}}}}));
+}
+
+TEST(JSONParser, WrongNestedObject) {
+ String error_msg;
+ EXPECT_EQ(json::Parse("{\"a\":\n\n[1]", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting ',' delimiter: line 3 column 4 (char 10)");
+
+ EXPECT_EQ(json::Parse("{\"a\":\n\n[abc]}", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 3 column 2 (char 8)");
+}
+
+// edge cases
+TEST(JSONParser, WhitespaceHandling) {
+ // Test various whitespace characters
+ EXPECT_EQ(json::Parse(" \t\n\r true \t\n\r ").cast<bool>(), true);
+ EXPECT_EQ(json::Parse("\n\n\n123\n\n\n").cast<int64_t>(), 123);
+ EXPECT_EQ(json::Parse(" \"hello world\" ").cast<String>(), "hello
world");
+
+ // Test whitespace in arrays and objects
+ EXPECT_TRUE(StructuralEqual()(json::Parse(" [ 1 , 2 , 3 ] "),
json::Array{1, 2, 3}));
+
+ EXPECT_TRUE(StructuralEqual()(json::Parse(" { \"a\" : 1 , \"b\" : 2
} "),
+ json::Object{{"a", 1}, {"b", 2}}));
+}
+
+TEST(JSONParser, WrongEmptyAndMinimalInputs) {
+ String error_msg;
+ // Test empty string
+ EXPECT_EQ(json::Parse("", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 1 column 1 (char 0)");
+
+ // Test only whitespace
+ EXPECT_EQ(json::Parse(" \t\n ", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Expecting value: line 2 column 5 (char 9)");
+}
+
+TEST(JSONParser, UnicodeEdgeCases) {
+ // Test various unicode characters
+ EXPECT_EQ(json::Parse("\"\\u0000\"").cast<String>(), std::string("\0", 1));
+ // replace using \U to avoid encoding issues
+ EXPECT_EQ(json::Parse("\"\\u00FF\"").cast<String>(), u8"\U000000FF");
+ EXPECT_EQ(json::Parse("\"\\u4E2D\\u6587\"").cast<String>(),
u8"\U00004E2D\U00006587");
+
+ // Test multiple surrogate pairs
+ EXPECT_EQ(json::Parse("\"\\uD83D\\uDE00\\uD83D\\uDE01\"").cast<String>(),
+ u8"\U0001F600\U0001F601");
+}
+
+TEST(JSONParser, LargeInputs) {
+ // Test large array
+ std::string large_array = "[";
+ for (int i = 0; i < 1000; ++i) {
+ if (i > 0) large_array += ",";
+ large_array += std::to_string(i);
+ }
+ large_array += "]";
+
+ auto result = json::Parse(large_array);
+ EXPECT_TRUE(result != nullptr);
+ EXPECT_EQ(result.cast<json::Array>().size(), 1000);
+
+ // Test large object
+ std::string large_object = "{";
+ for (int i = 0; i < 500; ++i) {
+ if (i > 0) large_object += ",";
+ large_object += "\"key" + std::to_string(i) + "\":" + std::to_string(i);
+ }
+ large_object += "}";
+
+ result = json::Parse(large_object);
+ EXPECT_TRUE(result != nullptr);
+ EXPECT_EQ(result.cast<json::Object>().size(), 500);
+}
+
+TEST(JSONParser, MixedDataTypes) {
+ // Test complex nested structure with all data types
+ std::string complex_json = R"({
+ "null_value": null,
+ "boolean_true": true,
+ "boolean_false": false,
+ "integer": 42,
+ "negative_integer": -42,
+ "float": 3.14159,
+ "scientific": 1.23e-4,
+ "string": "hello world",
+ "unicode_string": "Hello \u4e16\u754c \ud83c\udf0d",
+ "empty_string": "",
+ "empty_array": [],
+ "empty_object": {},
+ "number_array": [1, 2, 3, 4, 5],
+ "mixed_array": [1, "two", true, null, 3.14],
+ "nested_object": {
+ "level1": {
+ "level2": {
+ "data": [1, 2, {"nested_array": [true, false]}]
+ }
+ }
+ }
+ })";
+
+ auto result = json::Parse(complex_json);
+
+ // Create expected structure for comparison
+ json::Object expected{
+ {"null_value", nullptr},
+ {"boolean_true", true},
+ {"boolean_false", false},
+ {"integer", 42},
+ {"negative_integer", -42},
+ {"float", 3.14159},
+ {"scientific", 1.23e-4},
+ {"string", "hello world"},
+ {"unicode_string", u8"Hello \U00004E16\U0000754C \U0001F30D"},
+ {"empty_string", ""},
+ {"empty_array", json::Array{}},
+ {"empty_object", json::Object{}},
+ {"number_array", json::Array{1, 2, 3, 4, 5}},
+ {"mixed_array", json::Array{1, "two", true, nullptr, 3.14}},
+ {"nested_object",
+ json::Object{
+ {"level1",
+ json::Object{
+ {"level2",
+ json::Object{
+ {"data",
+ json::Array{1, 2,
+ json::Object{{"nested_array",
json::Array{true, false}}}}}}}}}}}};
+
+ EXPECT_TRUE(StructuralEqual()(result, expected));
+}
+
+TEST(JSONParser, WrongExtraData) {
+ String error_msg;
+
+ EXPECT_EQ(json::Parse("truee", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Extra data: line 1 column 5 (char 4)");
+
+ EXPECT_EQ(json::Parse("true false", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Extra data: line 1 column 6 (char 5)");
+
+ EXPECT_EQ(json::Parse("123 456", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Extra data: line 1 column 5 (char 4)");
+
+ EXPECT_EQ(json::Parse("\"hello\" \"world\"", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Extra data: line 1 column 9 (char 8)");
+
+ EXPECT_EQ(json::Parse("{} []", &error_msg), nullptr);
+ EXPECT_EQ(error_msg, "Extra data: line 1 column 4 (char 3)");
+}
+} // namespace
diff --git a/ffi/tests/cpp/extra/test_json_writer.cc
b/ffi/tests/cpp/extra/test_json_writer.cc
new file mode 100644
index 0000000000..ae6172c2e5
--- /dev/null
+++ b/ffi/tests/cpp/extra/test_json_writer.cc
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <gtest/gtest.h>
+#include <tvm/ffi/container/array.h>
+#include <tvm/ffi/container/map.h>
+#include <tvm/ffi/extra/json.h>
+
+#include <limits>
+
+namespace {
+
+using namespace tvm::ffi;
+
+TEST(JSONWriter, BoolNull) {
+ // boolean value
+ EXPECT_EQ(json::Stringify(json::Value(true)), "true");
+ EXPECT_EQ(json::Stringify(json::Value(false)), "false");
+ EXPECT_EQ(json::Stringify(json::Value(nullptr)), "null");
+}
+
+TEST(JSONWriter, Integer) {
+ // positive integer
+ EXPECT_EQ(json::Stringify(json::Value(42)), "42");
+ // negative integer
+ EXPECT_EQ(json::Stringify(json::Value(-123)), "-123");
+ // zero
+ EXPECT_EQ(json::Stringify(json::Value(0)), "0");
+ // large positive integer
+ EXPECT_EQ(json::Stringify(json::Value(std::numeric_limits<int64_t>::max())),
+ "9223372036854775807");
+ // large negative integer
+ EXPECT_EQ(json::Stringify(json::Value(std::numeric_limits<int64_t>::min())),
+ "-9223372036854775808");
+}
+
+TEST(JSONWriter, Float) {
+ // regular float
+ EXPECT_EQ(json::Stringify(json::Value(2.5)), "2.5");
+ // integer-like float (should have .0 suffix)
+ EXPECT_EQ(json::Stringify(json::Value(5.0)), "5.0");
+ EXPECT_EQ(json::Stringify(json::Value(-10.0)), "-10.0");
+ // zero float
+ EXPECT_EQ(json::Stringify(json::Value(0.0)), "0.0");
+ // scientific notation for very small numbers
+ EXPECT_EQ(json::Stringify(json::Value(-7.89e-15)), "-7.89e-15");
+ // short scientific notation (shorter than fixed-point)
+ EXPECT_EQ(json::Stringify(json::Value(2e-8)), "2e-08");
+ // NaN
+
EXPECT_EQ(json::Stringify(json::Value(std::numeric_limits<double>::quiet_NaN())),
"NaN");
+ // positive infinity
+
EXPECT_EQ(json::Stringify(json::Value(std::numeric_limits<double>::infinity())),
"Infinity");
+ // negative infinity
+
EXPECT_EQ(json::Stringify(json::Value(-std::numeric_limits<double>::infinity())),
"-Infinity");
+}
+
+TEST(JSONWriter, String) {
+ // simple string
+ EXPECT_EQ(json::Stringify(json::Value(String("hello"))), "\"hello\"");
+ // empty string
+ EXPECT_EQ(json::Stringify(json::Value(String(""))), "\"\"");
+ // string with escaped characters
+ EXPECT_EQ(json::Stringify(json::Value(String("\"quoted\""))),
"\"\\\"quoted\\\"\"");
+ EXPECT_EQ(json::Stringify(json::Value(String("backslash\\"))),
"\"backslash\\\\\"");
+ EXPECT_EQ(json::Stringify(json::Value(String("forward/slash"))),
"\"forward\\/slash\"");
+ EXPECT_EQ(json::Stringify(json::Value(String("line\nbreak"))),
"\"line\\nbreak\"");
+ EXPECT_EQ(json::Stringify(json::Value(String("tab\there"))),
"\"tab\\there\"");
+ EXPECT_EQ(json::Stringify(json::Value(String("carriage\rreturn"))),
"\"carriage\\rreturn\"");
+ // string with control character
+ EXPECT_EQ(json::Stringify(json::Value(String(std::string("\x01", 1) +
"control"))),
+ "\"\\u0001control\"");
+}
+
+TEST(JSONWriter, Array) {
+ // empty array
+ json::Array empty_array;
+ EXPECT_EQ(json::Stringify(empty_array), "[]");
+
+ // single element array
+ json::Array single_array{42};
+ EXPECT_EQ(json::Stringify(single_array), "[42]");
+
+ // multiple elements array
+ json::Array multi_array{1, "hello", true};
+ EXPECT_EQ(json::Stringify(multi_array), "[1,\"hello\",true]");
+
+ // nested array
+ json::Array nested_array{json::Array{1, 2}, 3};
+ EXPECT_EQ(json::Stringify(nested_array), "[[1,2],3]");
+}
+
+TEST(JSONWriter, Object) {
+ // empty object
+ json::Object empty_object;
+ EXPECT_EQ(json::Stringify(empty_object), "{}");
+
+ // single key-value pair
+ json::Object single_object{{String("key"), String("value")}};
+ EXPECT_EQ(json::Stringify(single_object), "{\"key\":\"value\"}");
+
+ // multiple key-value pairs - insertion order preservation
+ json::Object multi_object{{"name", "Alice"}, {"age", 30}, {"active", true},
{"score", 95.5}};
+ EXPECT_EQ(json::Stringify(multi_object),
+ "{\"name\":\"Alice\",\"age\":30,\"active\":true,\"score\":95.5}");
+}
+
+TEST(JSONWriter, InsertionOrderPreservation) {
+ // test that objects preserve insertion order
+ json::Object ordered_object{
+ {"zebra", "last"}, {"alpha", "first"}, {"beta", "middle"}, {"gamma",
123}, {"delta", true}};
+ EXPECT_EQ(
+ json::Stringify(ordered_object),
+
"{\"zebra\":\"last\",\"alpha\":\"first\",\"beta\":\"middle\",\"gamma\":123,\"delta\":true}");
+
+ // test with indentation to verify order is preserved
+ std::string ordered_indented = json::Stringify(ordered_object, 2);
+ EXPECT_EQ(ordered_indented, String(R"({
+ "zebra": "last",
+ "alpha": "first",
+ "beta": "middle",
+ "gamma": 123,
+ "delta": true
+})"));
+
+ // test nested objects also preserve order
+ json::Object nested_ordered{
+ {"outer1",
+ json::Object{{"inner_z", "z_value"}, {"inner_a", "a_value"},
{"inner_m", "m_value"}}},
+ {"outer2", json::Object{{"third", 3}, {"first", 1}, {"second", 2}}}};
+ std::string nested_ordered_indented = json::Stringify(nested_ordered, 2);
+ EXPECT_EQ(nested_ordered_indented, String(R"({
+ "outer1": {
+ "inner_z": "z_value",
+ "inner_a": "a_value",
+ "inner_m": "m_value"
+ },
+ "outer2": {
+ "third": 3,
+ "first": 1,
+ "second": 2
+ }
+})"));
+}
+
+TEST(JSONWriter, NestedStructures) {
+ // object containing array
+ json::Object obj_with_array{{String("numbers"), json::Array{1, 2, 3}}};
+ EXPECT_EQ(json::Stringify(obj_with_array), "{\"numbers\":[1,2,3]}");
+
+ // array containing object
+ json::Array arr_with_obj{json::Object{{String("key"), String("value")}}};
+ EXPECT_EQ(json::Stringify(arr_with_obj), "[{\"key\":\"value\"}]");
+
+ // deeply nested structure
+ json::Object nested_obj{
+ {String("nested"), json::Array{json::Object{{String("deep"),
String("value")}}}}};
+ EXPECT_EQ(json::Stringify(nested_obj),
"{\"nested\":[{\"deep\":\"value\"}]}");
+}
+
+TEST(JSONWriter, Indentation) {
+ // test with indentation
+ json::Array arr{1, 2};
+ std::string indented = json::Stringify(arr, 2);
+ EXPECT_EQ(indented, String(R"([
+ 1,
+ 2
+])"));
+
+ // object with indentation
+ json::Object obj{{"key", "value"}};
+ std::string indented_obj = json::Stringify(obj, 2);
+ EXPECT_EQ(indented_obj, String(R"({
+ "key": "value"
+})"));
+
+ // complex nested structure with multiple data types
+ // keep double as .5 so output is deterministic as they exactly rounds to
power of 2
+ json::Object complex_nested{
+ {"name", "test"},
+ {"count", 42},
+ {"price", 3.5},
+ {"active", true},
+ {"metadata", nullptr},
+ {"numbers", json::Array{1, 2, 3}},
+ {"config", json::Object{{"enabled", false},
+ {"timeout", 30.5},
+ {"tags", json::Array{"production", "critical",
nullptr}}}},
+ {"matrix", json::Array{json::Array{1, 2}, json::Array{3.5, 4.5},
json::Array{"a", "b"}}}};
+ std::string complex_indented = json::Stringify(complex_nested, 2);
+ EXPECT_EQ(complex_indented, String(R"({
+ "name": "test",
+ "count": 42,
+ "price": 3.5,
+ "active": true,
+ "metadata": null,
+ "numbers": [
+ 1,
+ 2,
+ 3
+ ],
+ "config": {
+ "enabled": false,
+ "timeout": 30.5,
+ "tags": [
+ "production",
+ "critical",
+ null
+ ]
+ },
+ "matrix": [
+ [
+ 1,
+ 2
+ ],
+ [
+ 3.5,
+ 4.5
+ ],
+ [
+ "a",
+ "b"
+ ]
+ ]
+})"));
+}
+} // namespace