This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new a440eb67d7 AVRO 4175: [C++] Allow previously parsed schemas to be 
referenced when parsing a schema (#3475)
a440eb67d7 is described below

commit a440eb67d781322d08544c2096a1b0877aa5e027
Author: Robert Yokota <[email protected]>
AuthorDate: Mon Sep 22 10:34:54 2025 -0700

    AVRO 4175: [C++] Allow previously parsed schemas to be referenced when 
parsing a schema (#3475)
---
 lang/c++/impl/Compiler.cc         | 29 +++++++++++++++++-
 lang/c++/include/avro/Compiler.hh |  5 ++++
 lang/c++/test/CompilerTests.cc    | 63 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index 5f59fb5bac..d0ac20d020 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -94,7 +94,15 @@ static NodePtr makeNode(const string &t, SymbolTable &st, 
const string &ns) {
 
     auto it = st.find(n);
     if (it != st.end()) {
-        return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
+        // Return the raw NodePtr instead of creating a new "NodeSymbolic"
+        // via "NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second))"
+        // in order to support externally resolved named references.
+        // This is safe because the validator canonicalizes duplicates:
+        // when it sees the same named node again (including self-recursion),
+        // it replaces that leaf with a NodeSymbolic via "setLeafToSymbolic".
+        // So even if the raw NodePtr is returned initially, validation
+        // converts repeats to symbolic links.
+        return it->second;
     }
     throw Exception("Unknown type: {}", n);
 }
@@ -638,4 +646,23 @@ AVRO_DECL bool compileJsonSchema(std::istream &is, 
ValidSchema &schema, string &
     }
 }
 
+AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
+                                                           const 
std::map<Name, ValidSchema> &namedReferences) {
+    if (!is.good()) {
+        throw Exception("Input stream is not good");
+    }
+
+    std::unique_ptr<InputStream> in = istreamInputStream(is);
+    json::Entity e = json::loadEntity(*in);
+
+    // Convert the map<Name, ValidSchema> to SymbolTable (map<Name, NodePtr>)
+    SymbolTable st;
+    for (const auto &entry : namedReferences) {
+        st[entry.first] = entry.second.root();
+    }
+
+    NodePtr n = makeNode(e, st, "");
+    return ValidSchema(n);
+}
+
 } // namespace avro
diff --git a/lang/c++/include/avro/Compiler.hh 
b/lang/c++/include/avro/Compiler.hh
index bdcbb355e2..911a8aed0c 100644
--- a/lang/c++/include/avro/Compiler.hh
+++ b/lang/c++/include/avro/Compiler.hh
@@ -22,6 +22,7 @@
 #include "Config.hh"
 #include <cstdint>
 #include <istream>
+#include <map>
 
 namespace avro {
 
@@ -32,6 +33,7 @@ class AVRO_DECL InputStream;
 /// lexer object for each parse.  The bison parser also uses this class to
 /// build up an avro parse tree as the avro spec is parsed.
 
+class AVRO_DECL Name;
 class AVRO_DECL ValidSchema;
 
 /// Given a stream containing a JSON schema, compiles the schema to a
@@ -58,6 +60,9 @@ AVRO_DECL ValidSchema compileJsonSchemaFromString(const 
std::string &input);
 
 AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename);
 
+AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is, 
+                                                           const 
std::map<Name, ValidSchema> &namedReferences);
+
 } // namespace avro
 
 #endif
diff --git a/lang/c++/test/CompilerTests.cc b/lang/c++/test/CompilerTests.cc
index 6fa5344ac1..072b977566 100644
--- a/lang/c++/test/CompilerTests.cc
+++ b/lang/c++/test/CompilerTests.cc
@@ -22,6 +22,7 @@
 #include <boost/test/unit_test.hpp>
 
 #include "Compiler.hh"
+#include "Node.hh"
 #include "ValidSchema.hh"
 
 // Assert that empty defaults don't make json schema compilation violate bounds
@@ -82,6 +83,66 @@ void test2dArray() {
     BOOST_CHECK_EQUAL(expected, actual.str());
 }
 
+void testRecordWithNamedReference() {
+    std::string nestedSchema = 
"{\"name\":\"NestedRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"stringField\",\"type\":\"string\"}]}";
+    // The root schema references the nested schema above by name only.
+    // This mimics tools that allow schemas to have references to other 
schemas.
+    std::string rootSchema = 
"{\"name\":\"RootRecord\",\"type\":\"record\",\"fields\":[{\"name\": 
\"nestedField\",\"type\":\"NestedRecord\"}]}";
+
+    // First compile the nested schema
+    avro::ValidSchema nestedRecord = 
avro::compileJsonSchemaFromString(nestedSchema);
+
+    // Create a map of named references
+    std::map<avro::Name, avro::ValidSchema> namedReferences;
+    namedReferences[avro::Name("NestedRecord")] = nestedRecord;
+
+    // Parse the root schema with named references
+    std::istringstream rootSchemaStream(rootSchema);
+    avro::ValidSchema rootRecord = 
avro::compileJsonSchemaWithNamedReferences(rootSchemaStream, namedReferences);
+
+    // Verify the schema was compiled correctly
+    BOOST_CHECK_EQUAL("RootRecord", rootRecord.root()->name().simpleName());
+
+    // Get the nested field and verify its type
+    const avro::NodePtr &rootNode = rootRecord.root();
+    BOOST_CHECK_EQUAL(avro::AVRO_RECORD, rootNode->type());
+    BOOST_CHECK_EQUAL(1, rootNode->leaves());
+
+    const avro::NodePtr &nestedFieldNode = rootNode->leafAt(0);
+    BOOST_CHECK_EQUAL("NestedRecord", nestedFieldNode->name().simpleName());
+}
+
+// Verify recursive schemas don't create shared_ptr cycles by ensuring the
+// root node expires once the ValidSchema goes out of scope. Example: binary
+// tree node with left/right as union of null and the node type itself.
+void testRecursiveBinaryTreeWeakPtrExpires() {
+    std::weak_ptr<avro::Node> weakRoot;
+
+    {
+        const std::string schema = R"({
+            "type": "record",
+            "name": "Node",
+            "fields": [
+                {"name": "value", "type": "int"},
+                {"name": "left",  "type": ["null", "Node"],  "default": null},
+                {"name": "right", "type": ["null", "Node"],  "default": null}
+            ]
+        })";
+
+        avro::ValidSchema s = avro::compileJsonSchemaFromString(schema);
+        // Capture a weak reference to the root node while the schema is alive.
+        weakRoot = s.root();
+
+        // Optionally exercise the schema to ensure validation completed.
+        BOOST_CHECK_EQUAL(avro::AVRO_RECORD, s.root()->type());
+        BOOST_CHECK_EQUAL("Node", s.root()->name().simpleName());
+    }
+
+    // After the ValidSchema (and any strong references) go out of scope,
+    // the weak pointer must not be lockable if there are no cycles.
+    BOOST_CHECK(weakRoot.expired());
+}
+
 boost::unit_test::test_suite *
 init_unit_test_suite(int /*argc*/, char * /*argv*/[]) {
     using namespace boost::unit_test;
@@ -89,5 +150,7 @@ init_unit_test_suite(int /*argc*/, char * /*argv*/[]) {
     auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for Compiler.cc");
     ts->add(BOOST_TEST_CASE(&testEmptyBytesDefault));
     ts->add(BOOST_TEST_CASE(&test2dArray));
+    ts->add(BOOST_TEST_CASE(&testRecordWithNamedReference));
+    ts->add(BOOST_TEST_CASE(&testRecursiveBinaryTreeWeakPtrExpires));
     return ts;
 }

Reply via email to