[ 
https://issues.apache.org/jira/browse/AVRO-1256?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16683764#comment-16683764
 ] 

ASF GitHub Bot commented on AVRO-1256:
--------------------------------------

thiru-apache closed pull request #345:  AVRO-1256: C++ API compileJsonSchema 
ignores "doc" and custom attributes on a field/record
URL: https://github.com/apache/avro/pull/345
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/lang/c++/.gitignore b/lang/c++/.gitignore
index 76f012595..4ac073b92 100644
--- a/lang/c++/.gitignore
+++ b/lang/c++/.gitignore
@@ -3,3 +3,5 @@ build.mac/
 doc/
 test.avro
 test6.df
+test8.df
+test9.df
diff --git a/lang/c++/api/Node.hh b/lang/c++/api/Node.hh
index ebba375be..4d54a5edf 100644
--- a/lang/c++/api/Node.hh
+++ b/lang/c++/api/Node.hh
@@ -78,7 +78,7 @@ std::ostream& operator << (std::ostream& os, const Name& n) {
 /// objects.
 ///
 /// The Node object uses reference-counted pointers.  This is so that schemas
-/// may be reused in other other schemas, without needing to worry about memory
+/// may be reused in other schemas, without needing to worry about memory
 /// deallocation for nodes that are added to multiple schema parse trees.
 ///
 /// Node has minimal implementation, serving as an abstract base class for
@@ -117,6 +117,12 @@ class AVRO_DECL Node : private boost::noncopyable
     }
     virtual const Name &name() const = 0;
 
+    virtual const std::string &getDoc() const = 0;
+    void setDoc(const std::string &doc) {
+        checkLock();
+        doSetDoc(doc);
+    }
+
     void addLeaf(const NodePtr &newLeaf) {
         checkLock();
         doAddLeaf(newLeaf);
@@ -170,6 +176,8 @@ class AVRO_DECL Node : private boost::noncopyable
     }
 
     virtual void doSetName(const Name &name) = 0;
+    virtual void doSetDoc(const std::string &name) = 0;
+
     virtual void doAddLeaf(const NodePtr &newLeaf) = 0;
     virtual void doAddName(const std::string &name) = 0;
     virtual void doSetFixedSize(int size) = 0;
diff --git a/lang/c++/api/NodeImpl.hh b/lang/c++/api/NodeImpl.hh
index 0f3202368..d4c7639f5 100644
--- a/lang/c++/api/NodeImpl.hh
+++ b/lang/c++/api/NodeImpl.hh
@@ -35,7 +35,7 @@
 namespace avro {
 
 /// Implementation details for Node.  NodeImpl represents all the avro types,
-/// whose properties are enabled are disabled by selecting concept classes.
+/// whose properties are enabled and disabled by selecting concept classes.
 
 template
 <
@@ -52,6 +52,7 @@ class NodeImpl : public Node
     NodeImpl(Type type) :
         Node(type),
         nameAttribute_(),
+        docAttribute_(),
         leafAttributes_(),
         leafNameAttributes_(),
         sizeAttribute_()
@@ -64,13 +65,30 @@ class NodeImpl : public Node
              const SizeConcept &size) :
         Node(type),
         nameAttribute_(name),
+        docAttribute_(),
         leafAttributes_(leaves),
         leafNameAttributes_(leafNames),
         sizeAttribute_(size)
     { }
 
+    // Ctor with "doc"
+    NodeImpl(Type type,
+             const NameConcept &name,
+             const concepts::SingleAttribute<std::string> &doc,
+             const LeavesConcept &leaves,
+             const LeafNamesConcept &leafNames,
+             const SizeConcept &size) :
+        Node(type),
+        nameAttribute_(name),
+        docAttribute_(doc),
+        leafAttributes_(leaves),
+        leafNameAttributes_(leafNames),
+        sizeAttribute_(size)
+    {}
+
     void swap(NodeImpl& impl) {
         std::swap(nameAttribute_, impl.nameAttribute_);
+        std::swap(docAttribute_, impl.docAttribute_);
         std::swap(leafAttributes_, impl.leafAttributes_);
         std::swap(leafNameAttributes_, impl.leafNameAttributes_);
         std::swap(sizeAttribute_, impl.sizeAttribute_);
@@ -78,6 +96,7 @@ class NodeImpl : public Node
     }
 
     bool hasName() const {
+        // e.g.: true for single and multiattributes, false for noattributes.
         return NameConcept::hasAttribute;
     }
 
@@ -89,6 +108,14 @@ class NodeImpl : public Node
         return nameAttribute_.get();
     }
 
+    void doSetDoc(const std::string &doc) {
+      docAttribute_.add(doc);
+    }
+
+    const std::string &getDoc() const {
+      return docAttribute_.get();
+    }
+
     void doAddLeaf(const NodePtr &newLeaf) {
         leafAttributes_.add(newLeaf);
     }
@@ -172,6 +199,10 @@ class NodeImpl : public Node
     }
 
     NameConcept nameAttribute_;
+
+    // Rem: NameConcept type is HasName (= SingleAttribute<Name>), we use 
std::string instead
+    concepts::SingleAttribute<std::string> docAttribute_; /** Doc used to 
compare schemas */
+
     LeavesConcept leafAttributes_;
     LeafNamesConcept leafNameAttributes_;
     SizeConcept sizeAttribute_;
@@ -181,6 +212,8 @@ class NodeImpl : public Node
 typedef concepts::NoAttribute<Name>     NoName;
 typedef concepts::SingleAttribute<Name> HasName;
 
+typedef concepts::SingleAttribute<std::string> HasDoc;
+
 typedef concepts::NoAttribute<NodePtr>      NoLeaves;
 typedef concepts::SingleAttribute<NodePtr>  SingleLeaf;
 typedef concepts::MultiAttribute<NodePtr>   MultiLeaves;
@@ -287,6 +320,20 @@ public:
         }
     }
 
+    NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves 
&fields,
+        const LeafNames &fieldsNames,
+        const std::vector<GenericDatum> &dv) :
+        NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, NoSize()),
+        defaultValues(dv) {
+        for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
+            if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
+                throw Exception(boost::format(
+                    "Cannot add duplicate name: %1%") %
+                    leafNameAttributes_.get(i));
+            }
+        }
+    }
+
     void swap(NodeRecord& r) {
         NodeImplRecord::swap(r);
         defaultValues.swap(r.defaultValues);
diff --git a/lang/c++/api/Schema.hh b/lang/c++/api/Schema.hh
index 8ce5f8df4..646b95e0d 100644
--- a/lang/c++/api/Schema.hh
+++ b/lang/c++/api/Schema.hh
@@ -16,11 +16,12 @@
  * limitations under the License.
  */
 
-#ifndef avro_Schema_hh__ 
-#define avro_Schema_hh__ 
+#ifndef avro_Schema_hh__
+#define avro_Schema_hh__
 
 #include "Config.hh"
 #include "NodeImpl.hh"
+#include <string>
 
 /// \file
 ///
@@ -102,6 +103,9 @@ class AVRO_DECL RecordSchema : public Schema {
 public:
     RecordSchema(const std::string &name);
     void addField(const std::string &name, const Schema &fieldSchema);
+
+    std::string getDoc() const;
+    void setDoc(const std::string &);
 };
 
 class AVRO_DECL EnumSchema : public Schema {
diff --git a/lang/c++/api/ValidSchema.hh b/lang/c++/api/ValidSchema.hh
index 30eb33eb6..d0bbd4e3f 100644
--- a/lang/c++/api/ValidSchema.hh
+++ b/lang/c++/api/ValidSchema.hh
@@ -50,11 +50,15 @@ public:
     }
 
     void toJson(std::ostream &os) const;
+    std::string toJson(bool prettyPrint = true) const;
 
     void toFlatList(std::ostream &os) const;
 
   protected:
     NodePtr root_;
+
+  private:
+    static std::string compactSchema(const std::string &schema);
 };
 
 } // namespace avro
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index 725136d1f..bc0f3cdd5 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -15,6 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <boost/algorithm/string/replace.hpp>
 #include <sstream>
 
 #include "Compiler.hh"
@@ -42,7 +43,7 @@ typedef map<Name, NodePtr> SymbolTable;
 
 // #define DEBUG_VERBOSE
 
-static NodePtr makePrimitive(const std::string& t)
+static NodePtr makePrimitive(const string& t)
 {
     if (t == "null") {
         return NodePtr(new NodePrimitive(AVRO_NULL));
@@ -65,7 +66,7 @@ static NodePtr makePrimitive(const std::string& t)
     }
 }
 
-static NodePtr makeNode(const json::Entity& e, SymbolTable& st, const string& 
ns);
+static NodePtr makeNode(const json::Entity& e, SymbolTable& st, const string 
&ns);
 
 template <typename T>
 concepts::SingleAttribute<T> asSingleAttribute(const T& t)
@@ -75,17 +76,17 @@ concepts::SingleAttribute<T> asSingleAttribute(const T& t)
     return n;
 }
 
-static bool isFullName(const string& s)
+static bool isFullName(const string &s)
 {
     return s.find('.') != string::npos;
 }
 
-static Name getName(const string& name, const string& ns)
+static Name getName(const string &name, const string &ns)
 {
     return (isFullName(name)) ? Name(name) : Name(name, ns);
 }
 
-static NodePtr makeNode(const std::string& t, SymbolTable& st, const string& 
ns)
+static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns)
 {
     NodePtr result = makePrimitive(t);
     if (result) {
@@ -100,8 +101,15 @@ static NodePtr makeNode(const std::string& t, SymbolTable& 
st, const string& ns)
     throw Exception(boost::format("Unknown type: %1%") % n.fullname());
 }
 
-const json::Object::const_iterator findField(const Entity& e,
-    const Object& m, const string& fieldName)
+/** Returns "true" if the field is in the container */
+// e.g.: can be false for non-mandatory fields
+bool containsField(const Object &m, const string &fieldName) {
+    Object::const_iterator it = m.find(fieldName);
+    return it != m.end();
+}
+
+const json::Object::const_iterator findField(const Entity &e,
+    const Object &m, const string &fieldName)
 {
     Object::const_iterator it = m.find(fieldName);
     if (it == m.end()) {
@@ -112,7 +120,7 @@ const json::Object::const_iterator findField(const Entity& 
e,
     }
 }
 
-template <typename T> void ensureType(const Entity& e, const string& name)
+template <typename T> void ensureType(const Entity &e, const string &name)
 {
     if (e.type() != json::type_traits<T>::type()) {
         throw Exception(boost::format("Json field \"%1%\" is not a %2%: %3%") %
@@ -120,8 +128,8 @@ template <typename T> void ensureType(const Entity& e, 
const string& name)
     }
 }
 
-const string& getStringField(const Entity& e, const Object& m,
-                             const string& fieldName)
+const string& getStringField(const Entity &e, const Object &m,
+                             const string &fieldName)
 {
     Object::const_iterator it = findField(e, m, fieldName);
     ensureType<string>(it->second, fieldName);
@@ -144,6 +152,19 @@ const int64_t getLongField(const Entity& e, const Object& 
m,
     return it->second.longValue();
 }
 
+// Unescape double quotes (") for de-serialization.  This method complements 
the
+// method NodeImpl::escape() which is used for serialization.
+static void unescape(string& s) {
+    boost::replace_all(s, "\\\"", "\"");
+}
+
+const string getDocField(const Entity& e, const Object& m)
+{
+    string doc = getStringField(e, m, "doc");
+    unescape(doc);
+    return doc;
+}
+
 struct Field {
     const string& name;
     const NodePtr schema;
@@ -162,7 +183,7 @@ static void assertType(const Entity& e, EntityType et)
     }
 }
 
-static vector<uint8_t> toBin(const std::string& s)
+static vector<uint8_t> toBin(const string& s)
 {
     vector<uint8_t> result(s.size());
     if (s.size() > 0) {
@@ -278,14 +299,18 @@ static Field makeField(const Entity& e, SymbolTable& st, 
const string& ns)
     Object::const_iterator it = findField(e, m, "type");
     map<string, Entity>::const_iterator it2 = m.find("default");
     NodePtr node = makeNode(it->second, st, ns);
+    if (containsField(m, "doc")) {
+        node->setDoc(getDocField(e, m));
+    }
     GenericDatum d = (it2 == m.end()) ? GenericDatum() :
         makeGenericDatum(node, it2->second, st);
     return Field(n, node, d);
 }
 
-static NodePtr makeRecordNode(const Entity& e,
-    const Name& name, const Object& m, SymbolTable& st, const string& ns)
-{
+// Extended makeRecordNode (with doc).
+static NodePtr makeRecordNode(const Entity& e, const Name& name,
+                              const string* doc, const Object& m,
+                              SymbolTable& st, const string& ns) {
     const Array& v = getArrayField(e, m, "fields");
     concepts::MultiAttribute<string> fieldNames;
     concepts::MultiAttribute<NodePtr> fieldValues;
@@ -297,8 +322,15 @@ static NodePtr makeRecordNode(const Entity& e,
         fieldValues.add(f.schema);
         defaultValues.push_back(f.defaultValue);
     }
-    return NodePtr(new NodeRecord(asSingleAttribute(name),
-        fieldValues, fieldNames, defaultValues));
+    NodeRecord* node;
+    if (doc == NULL) {
+        node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames,
+                              defaultValues);
+    } else {
+        node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc),
+                              fieldValues, fieldNames, defaultValues);
+    }
+    return NodePtr(node);
 }
 
 static NodePtr makeEnumNode(const Entity& e,
@@ -313,7 +345,11 @@ static NodePtr makeEnumNode(const Entity& e,
         }
         symbols.add(it->stringValue());
     }
-    return NodePtr(new NodeEnum(asSingleAttribute(name), symbols));
+    NodePtr node = NodePtr(new NodeEnum(asSingleAttribute(name), symbols));
+    if (containsField(m, "doc")) {
+        node->setDoc(getDocField(e, m));
+    }
+    return node;
 }
 
 static NodePtr makeFixedNode(const Entity& e,
@@ -324,16 +360,24 @@ static NodePtr makeFixedNode(const Entity& e,
         throw Exception(boost::format("Size for fixed is not positive: %1%") %
             e.toString());
     }
-    return NodePtr(new NodeFixed(asSingleAttribute(name),
-        asSingleAttribute(v)));
+    NodePtr node =
+        NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(v)));
+    if (containsField(m, "doc")) {
+        node->setDoc(getDocField(e, m));
+    }
+    return node;
 }
 
 static NodePtr makeArrayNode(const Entity& e, const Object& m,
     SymbolTable& st, const string& ns)
 {
     Object::const_iterator it = findField(e, m, "items");
-    return NodePtr(new NodeArray(asSingleAttribute(
-        makeNode(it->second, st, ns))));
+    NodePtr node = NodePtr(new NodeArray(
+        asSingleAttribute(makeNode(it->second, st, ns))));
+    if (containsField(m, "doc")) {
+        node->setDoc(getDocField(e, m));
+    }
+    return node;
 }
 
 static NodePtr makeMapNode(const Entity& e, const Object& m,
@@ -341,8 +385,12 @@ static NodePtr makeMapNode(const Entity& e, const Object& 
m,
 {
     Object::const_iterator it = findField(e, m, "values");
 
-    return NodePtr(new NodeMap(asSingleAttribute(
-        makeNode(it->second, st, ns))));
+    NodePtr node = NodePtr(new NodeMap(
+        asSingleAttribute(makeNode(it->second, st, ns))));
+    if (containsField(m, "doc")) {
+        node->setDoc(getDocField(e, m));
+    }
+    return node;
 }
 
 static Name getName(const Entity& e, const Object& m, const string& ns)
@@ -380,9 +428,19 @@ static NodePtr makeNode(const Entity& e, const Object& m,
         if (type == "record" || type == "error") {
             result = NodePtr(new NodeRecord());
             st[nm] = result;
-            NodePtr r = makeRecordNode(e, nm, m, st, nm.ns());
-            (boost::dynamic_pointer_cast<NodeRecord>(r))->swap(
-                *boost::dynamic_pointer_cast<NodeRecord>(result));
+            // Get field doc
+            if (containsField(m, "doc")) {
+                string doc = getDocField(e, m);
+
+                NodePtr r = makeRecordNode(e, nm, &doc, m, st, nm.ns());
+                (boost::dynamic_pointer_cast<NodeRecord>(r))->swap(
+                    *boost::dynamic_pointer_cast<NodeRecord>(result));
+            } else {  // No doc
+                NodePtr r =
+                    makeRecordNode(e, nm, NULL, m, st, nm.ns());
+                (boost::dynamic_pointer_cast<NodeRecord>(r))
+                    ->swap(*boost::dynamic_pointer_cast<NodeRecord>(result));
+            }
         } else {
             result = (type == "enum") ? makeEnumNode(e, nm, m) :
                 makeFixedNode(e, nm, m);
@@ -447,7 +505,7 @@ AVRO_DECL ValidSchema compileJsonSchemaFromString(const 
char* input)
         ::strlen(input));
 }
 
-AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string& input)
+AVRO_DECL ValidSchema compileJsonSchemaFromString(const string& input)
 {
     return compileJsonSchemaFromMemory(
         reinterpret_cast<const uint8_t*>(&input[0]), input.size());
@@ -468,7 +526,7 @@ AVRO_DECL void compileJsonSchema(std::istream &is, 
ValidSchema &schema)
     schema = compile(is);
 }
 
-AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, 
std::string &error)
+AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, string 
&error)
 {
     try {
         compileJsonSchema(is, schema);
diff --git a/lang/c++/impl/DataFile.cc b/lang/c++/impl/DataFile.cc
index a71860d1f..1777d532e 100644
--- a/lang/c++/impl/DataFile.cc
+++ b/lang/c++/impl/DataFile.cc
@@ -121,7 +121,7 @@ void DataFileWriterBase::init(const ValidSchema &schema, 
size_t syncInterval, co
     } else {
       throw Exception(boost::format("Unknown codec: %1%") % codec);
     }
-    setMetadata(AVRO_SCHEMA_KEY, toString(schema));
+    setMetadata(AVRO_SCHEMA_KEY, schema.toJson(false));
 
     writeHeader();
     encoderPtr_->init(*buffer_);
@@ -296,7 +296,7 @@ void DataFileReaderBase::init()
 void DataFileReaderBase::init(const ValidSchema& readerSchema)
 {
     readerSchema_ = readerSchema;
-    dataDecoder_  = (toString(readerSchema_) != toString(dataSchema_)) ?
+    dataDecoder_  = (readerSchema_.toJson(true) != dataSchema_.toJson(true)) ?
         resolvingDecoder(dataSchema_, readerSchema_, binaryDecoder()) :
         binaryDecoder();
     readDataBlock();
diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc
index 50b7fbaa8..bdb05a0b5 100644
--- a/lang/c++/impl/NodeImpl.cc
+++ b/lang/c++/impl/NodeImpl.cc
@@ -17,6 +17,8 @@
  */
 
 
+#include <sstream>
+#include <iomanip>
 #include <boost/algorithm/string/replace.hpp>
 #include "NodeImpl.hh"
 
@@ -25,6 +27,7 @@ using std::string;
 namespace avro {
 
 namespace {
+
 // Escape string for serialization.
 string escape(const string &unescaped) {
   string s;
@@ -219,12 +222,20 @@ void
 NodePrimitive::printJson(std::ostream &os, int depth) const
 {
     os << '\"' << type() << '\"';
+    if (getDoc().size()) {
+        os << ",\n" << indent(depth) << "\"doc\": \""
+           << escape(getDoc()) << "\"";
+    }
 }
 
 void
 NodeSymbolic::printJson(std::ostream &os, int depth) const
 {
     os << '\"' << nameAttribute_.get() << '\"';
+    if (getDoc().size()) {
+        os << ",\n" << indent(depth) << "\"doc\": \""
+           << escape(getDoc()) << "\"";
+    }
 }
 
 static void printName(std::ostream& os, const Name& n, int depth)
@@ -241,6 +252,10 @@ NodeRecord::printJson(std::ostream &os, int depth) const
     os << "{\n";
     os << indent(++depth) << "\"type\": \"record\",\n";
     printName(os, nameAttribute_.get(), depth);
+    if (getDoc().size()) {
+        os << indent(depth) << "\"doc\": \""
+           << escape(getDoc()) << "\",\n";
+    }
     os << indent(depth) << "\"fields\": [";
 
     size_t fields = leafAttributes_.size();
@@ -430,6 +445,10 @@ NodeEnum::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
     os << indent(++depth) << "\"type\": \"enum\",\n";
+    if (getDoc().size()) {
+        os << indent(depth) << "\"doc\": \""
+           << escape(getDoc()) << "\",\n";
+    }
     printName(os, nameAttribute_.get(), depth);
     os << indent(depth) << "\"symbols\": [\n";
 
@@ -451,6 +470,10 @@ NodeArray::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
     os << indent(depth+1) << "\"type\": \"array\",\n";
+    if (getDoc().size()) {
+        os << indent(depth+1) << "\"doc\": \""
+           << escape(getDoc()) << "\",\n";
+    }
     os << indent(depth+1) <<  "\"items\": ";
     leafAttributes_.get()->printJson(os, depth+1);
     os << '\n';
@@ -462,6 +485,10 @@ NodeMap::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
     os << indent(depth+1) <<"\"type\": \"map\",\n";
+    if (getDoc().size()) {
+        os << indent(depth+1) << "\"doc\": \""
+           << escape(getDoc()) << "\",\n";
+    }
     os << indent(depth+1) << "\"values\": ";
     leafAttributes_.get(1)->printJson(os, depth+1);
     os << '\n';
@@ -490,6 +517,10 @@ NodeFixed::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
     os << indent(++depth) << "\"type\": \"fixed\",\n";
+    if (getDoc().size()) {
+        os << indent(depth) << "\"doc\": \""
+           << escape(getDoc()) << "\",\n";
+    }
     printName(os, nameAttribute_.get(), depth);
     os << indent(depth) << "\"size\": " << sizeAttribute_.get() << "\n";
     os << indent(--depth) << '}';
diff --git a/lang/c++/impl/Schema.cc b/lang/c++/impl/Schema.cc
index b5457aeec..6676574fa 100644
--- a/lang/c++/impl/Schema.cc
+++ b/lang/c++/impl/Schema.cc
@@ -51,6 +51,15 @@ RecordSchema::addField(const std::string &name, const Schema 
&fieldSchema)
     node_->addLeaf(fieldSchema.root());
 }
 
+std::string RecordSchema::getDoc() const
+{
+    return node_->getDoc();
+}
+void RecordSchema::setDoc(const std::string& doc)
+{
+    node_->setDoc(doc);
+}
+
 EnumSchema::EnumSchema(const std::string &name) :
     Schema(new NodeEnum)
 {
diff --git a/lang/c++/impl/ValidSchema.cc b/lang/c++/impl/ValidSchema.cc
index bd28079bc..17ab9949e 100644
--- a/lang/c++/impl/ValidSchema.cc
+++ b/lang/c++/impl/ValidSchema.cc
@@ -24,16 +24,16 @@
 #include "Node.hh"
 
 using std::string;
+using std::ostringstream;
 using std::make_pair;
 using boost::format;
 using boost::shared_ptr;
 using boost::static_pointer_cast;
 
 namespace avro {
-
 typedef std::map<Name, NodePtr> SymbolMap;
 
-static bool validate(const NodePtr &node, SymbolMap &symbolMap) 
+static bool validate(const NodePtr &node, SymbolMap &symbolMap)
 {
     if (! node->isValid()) {
         throw Exception(format("Schema is invalid, due to bad node of type 
%1%")
@@ -77,7 +77,7 @@ static bool validate(const NodePtr &node, SymbolMap 
&symbolMap)
             // map (which could potentially create circular shared pointer
             // links that could not be easily freed), replace this node with a
             // symbolic link to the original one.
-            
+
             node->setLeafToSymbolic(i, symbolMap.find(leaf->name())->second);
         }
     }
@@ -101,7 +101,7 @@ ValidSchema::ValidSchema(const Schema &schema) : 
root_(schema.root())
     validate(root_);
 }
 
-ValidSchema::ValidSchema() : root_(NullSchema().root()) 
+ValidSchema::ValidSchema() : root_(NullSchema().root())
 {
     validate(root_);
 }
@@ -113,18 +113,80 @@ ValidSchema::setSchema(const Schema &schema)
     validate(root_);
 }
 
-void 
+void
 ValidSchema::toJson(std::ostream &os) const
-{ 
+{
     root_->printJson(os, 0);
     os << '\n';
 }
 
-void 
+string
+ValidSchema::toJson(bool prettyPrint) const
+{
+    ostringstream oss;
+    toJson(oss);
+    if (!prettyPrint) {
+        return compactSchema(oss.str());
+    }
+    return oss.str();
+}
+
+void
 ValidSchema::toFlatList(std::ostream &os) const
-{ 
+{
     root_->printBasicInfo(os);
 }
 
+/*
+ * compactSchema compacts and returns a formatted string representation
+ * of a ValidSchema object by removing the whitespaces outside of the quoted
+ * field names and values. It can handle the cases where the quoted value is
+ * in UTF-8 format. Note that this method is not responsible for validating
+ * the schema.
+ */
+string ValidSchema::compactSchema(const string& schema) {
+    bool insideQuote = false;
+    size_t newPos = 0;
+    string data(schema.data());
+
+    for (size_t currentPos = 0; currentPos < schema.size(); currentPos++) {
+        if (!insideQuote && std::isspace(data[currentPos])) {
+            // Skip the white spaces outside quotes.
+            continue;
+        }
+
+        if (data[currentPos] == '\"') {
+            // It is valid for a quote to be part of the value for some fields,
+            // e.g., the "doc" field.  In that case, the quote is expected to 
be
+            // escaped inside the schema.  Since the escape character '\\' 
could
+            // be escaped itself, we need to check whether there are an even
+            // number of consecutive slashes prior to the quote.
+            int leadingSlashes = 0;
+            for (int i = newPos - 1; i >= 0; i--) {
+                if (data[i] == '\\') {
+                    leadingSlashes++;
+                } else {
+                    break;
+                }
+            }
+            if (leadingSlashes % 2 == 0) {
+                // Found a real quote which identifies either the start or the
+                // end of a field name or value.
+                insideQuote = !insideQuote;
+            }
+        }
+        data[newPos++] = data[currentPos];
+    }
+
+    if (insideQuote) {
+        throw Exception("Schema is not well formed with mismatched quotes");
+    }
+
+    if (newPos < schema.size()) {
+        data.resize(newPos);
+    }
+    return data;
+}
+
 } // namespace avro
 
diff --git a/lang/c++/jsonschemas/bigrecord b/lang/c++/jsonschemas/bigrecord
index ba430a035..af8a5ad39 100644
--- a/lang/c++/jsonschemas/bigrecord
+++ b/lang/c++/jsonschemas/bigrecord
@@ -1,9 +1,11 @@
 {
     "type": "record",
+    "doc": "Top level Doc.",
     "name": "RootRecord",
     "fields": [
         {
             "name": "mylong",
+            "doc": "mylong field doc.",
             "type": "long"
         },
         {
diff --git a/lang/c++/test/DataFileTests.cc b/lang/c++/test/DataFileTests.cc
index 8d8c7b08e..bb2efc601 100644
--- a/lang/c++/test/DataFileTests.cc
+++ b/lang/c++/test/DataFileTests.cc
@@ -45,6 +45,7 @@ using boost::unit_test::test_suite;
 using avro::ValidSchema;
 using avro::GenericDatum;
 using avro::GenericRecord;
+using avro::NodePtr;
 
 const int count = 1000;
 
@@ -133,13 +134,29 @@ static const char dsch[] = "{\"type\": \"record\","
         "{\"name\":\"re\", \"type\":\"double\"},"
         "{\"name\":\"im\", \"type\":\"double\"}"
     "]}";
-static const char dblsch[] = "{\"type\": \"record\","
-    "\"name\":\"ComplexDouble\", \"fields\": ["
+static const char dblsch[] =
+    "{\"type\": \"record\","
+    "\"name\":\"ComplexDouble\", "
+    "\"doc\": \"\\\"Quoted_doc_string\\\"\", "
+    "\"fields\": ["
         "{\"name\":\"re\", \"type\":\"double\"}"
     "]}";
 static const char fsch[] = "{\"type\": \"fixed\","
     "\"name\":\"Fixed_32\", \"size\":4}";
-
+static const char ischWithDoc[] =
+    "{\"type\": \"record\","
+    "\"name\":\"ComplexInteger\", "
+    "\"doc\": \"record_doc\", "
+    "\"fields\": ["
+        "{\"name\":\"re1\", \"type\":\"long\", \"doc\": \"field_doc\"},"
+        "{\"name\":\"re2\", \"type\":\"long\"},"
+        "{\"name\":\"re3\", \"type\":\"long\", \"doc\": \"\"},"
+        "{\"name\":\"re4\", \"type\":\"long\", "
+        "\"doc\": \"A_\\\"quoted_doc\\\"\"},"
+        "{\"name\":\"re5\", \"type\":\"long\", \"doc\": \"doc with\nspaces\"},"
+        "{\"name\":\"re6\", \"type\":\"long\", "
+        "\"doc\": \"extra slashes\\\\\\\\\"}"
+    "]}";
 
 string toString(const ValidSchema& s)
 {
@@ -561,18 +578,42 @@ class DataFileTest {
 #endif
 
     void testSchemaReadWrite() {
-    uint32_t a=42;
-    {
+        uint32_t a=42;
+        {
             avro::DataFileWriter<uint32_t> df(filename, writerSchema);
-        df.write(a);
+            df.write(a);
         }
 
         {
-        avro::DataFileReader<uint32_t> df(filename);
-        uint32_t b;
+            avro::DataFileReader<uint32_t> df(filename);
+            uint32_t b;
             df.read(b);
             BOOST_CHECK_EQUAL(b, a);
+        }
     }
+
+    void testSchemaReadWriteWithDoc() {
+        uint32_t a=42;
+        {
+          avro::DataFileWriter<uint32_t> df(filename, writerSchema);
+          df.write(a);
+        }
+
+        {
+          avro::DataFileReader<uint32_t> df(filename);
+          uint32_t b;
+          df.read(b);
+          BOOST_CHECK_EQUAL(b, a);
+
+          const NodePtr& root = df.readerSchema().root();
+          BOOST_CHECK_EQUAL(root->getDoc(), "record_doc");
+          BOOST_CHECK_EQUAL(root->leafAt(0)->getDoc(), "field_doc");
+          BOOST_CHECK_EQUAL(root->leafAt(1)->getDoc(), "");
+          BOOST_CHECK_EQUAL(root->leafAt(2)->getDoc(), "");
+          BOOST_CHECK_EQUAL(root->leafAt(3)->getDoc(), "A_\"quoted_doc\"");
+          BOOST_CHECK_EQUAL(root->leafAt(4)->getDoc(), "doc with\nspaces");
+          BOOST_CHECK_EQUAL(root->leafAt(5)->getDoc(), "extra slashes\\\\");
+        }
     }
 };
 
@@ -677,6 +718,14 @@ init_unit_test_suite(int argc, char *argv[])
         ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t));
         boost::unit_test::framework::master_test_suite().add(ts);
     }
+    {
+        test_suite *ts = BOOST_TEST_SUITE("DataFile tests: test12.df");
+        shared_ptr<DataFileTest> t(new DataFileTest("test12.df", ischWithDoc, 
ischWithDoc));
+        ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testWrite, t));
+        
ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testSchemaReadWriteWithDoc, t));
+        ts->add(BOOST_CLASS_TEST_CASE(&DataFileTest::testCleanup, t));
+        boost::unit_test::framework::master_test_suite().add(ts);
+    }
 
     return 0;
 }
diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc
index 026c4d0b7..f6d6195da 100644
--- a/lang/c++/test/SchemaTests.cc
+++ b/lang/c++/test/SchemaTests.cc
@@ -47,42 +47,42 @@ const char* basicSchemas[] = {
     "{ \"type\": \"string\" }",
 
     // Record
-    "{\"type\": \"record\",\"name\": \"Test\",\"fields\": []}",
-    "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
-        "[{\"name\": \"f\",\"type\": \"long\"}]}",
-    "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
-        "[{\"name\": \"f1\",\"type\": \"long\"},"
-        "{\"name\": \"f2\", \"type\": \"int\"}]}",
-    "{\"type\": \"error\",\"name\": \"Test\",\"fields\": "
-        "[{\"name\": \"f1\",\"type\": \"long\"},"
-        "{\"name\": \"f2\", \"type\": \"int\"}]}",
+    
"{\"type\":\"record\",\"name\":\"Test\",\"doc\":\"Doc_string\",\"fields\":[]}",
+    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
+        "[{\"name\":\"f\",\"type\":\"long\"}]}",
+    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
+        "[{\"name\":\"f1\",\"type\":\"long\",\"doc\":\"field_doc\"},"
+        "{\"name\":\"f2\",\"type\":\"int\"}]}",
+    "{\"type\":\"error\",\"name\":\"Test\",\"fields\":"
+        "[{\"name\":\"f1\",\"type\":\"long\"},"
+        "{\"name\":\"f2\",\"type\":\"int\"}]}",
 
     // Recursive.
     "{\"type\":\"record\",\"name\":\"LongList\","
-        "\"fields\":[{\"name\":\"value\",\"type\":\"long\"},"
+        
"\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"doc\":\"recursive_doc\"},"
         "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}",
     // Enum
-    "{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}",
+    
"{\"type\":\"enum\",\"doc\":\"enum_doc\",\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}",
 
     // Array
-    "{\"type\": \"array\", \"items\": \"long\"}",
-    "{\"type\": \"array\",\"items\": {\"type\": \"enum\", "
-        "\"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}}",
+    "{\"type\":\"array\",\"doc\":\"array_doc\",\"items\":\"long\"}",
+    "{\"type\":\"array\",\"items\":{\"type\":\"enum\","
+        "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
 
     // Map
-    "{\"type\": \"map\", \"values\": \"long\"}",
-    "{\"type\": \"map\",\"values\": {\"type\": \"enum\", "
-        "\"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}}",
+    "{\"type\":\"map\",\"doc\":\"map_doc\",\"values\":\"long\"}",
+    "{\"type\":\"map\",\"values\":{\"type\":\"enum\", "
+        "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}",
 
     // Union
-    "[\"string\", \"null\", \"long\"]",
+    "[\"string\",\"null\",\"long\"]",
 
     // Fixed
-    "{ \"type\": \"fixed\", \"name\": \"Test\", \"size\": 1}",
-    "{\"type\": \"fixed\", \"name\": \"MyFixed\", "
-        "\"namespace\": \"org.apache.hadoop.avro\", \"size\": 1}",
-    "{ \"type\": \"fixed\", \"name\": \"Test\", \"size\": 1}",
-    "{ \"type\": \"fixed\", \"name\": \"Test\", \"size\": 1}",
+    "{\"type\":\"fixed\",\"doc\":\"fixed_doc\",\"name\":\"Test\",\"size\":1}",
+    "{\"type\":\"fixed\",\"name\":\"MyFixed\","
+        "\"namespace\":\"org.apache.hadoop.avro\",\"size\":1}",
+    "{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}",
+    "{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}",
 
     // Extra attributes (should be ignored)
     "{\"type\": \"null\", \"extra attribute\": \"should be ignored\"}",
@@ -135,7 +135,7 @@ const char* basicSchemaErrors[] = {
     // Duplicate type
     "[{\"type\": \"array\", \"items\": \"long\"}, "
         "{\"type\": \"array\", \"items\": \"string\"}]",
-        
+
     // Fixed
     // No size
     "{\"type\": \"fixed\", \"name\": \"Missing size\"}",
@@ -166,7 +166,7 @@ const char* roundTripSchemas[] = {
     "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
         "[{\"name\":\"f1\",\"type\":\"long\"},"
         "{\"name\":\"f2\",\"type\":\"int\"}]}",
-/* Avro-C++ cannot do a round-trip on error schemas. 
+/* Avro-C++ cannot do a round-trip on error schemas.
  * "{\"type\":\"error\",\"name\":\"Test\",\"fields\":"
  *       "[{\"name\":\"f1\",\"type\":\"long\"},"
  *       "{\"name\":\"f2\",\"type\":\"int\"}]}"
@@ -199,7 +199,32 @@ const char* roundTripSchemas[] = {
     "{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}"
 };
 
+const char* schemasToCompact[] = {
+    // Schema without any whitespace
+    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":[]}",
+
+    // Schema with whitespaces outside of field names/values only.
+    "{\"type\":   \"record\",\n   \n\"name\":\"Test\", \t\t\"fields\":[]}\n 
\n",
 
+    // Schema with whitespaces both inside and outside of field names/values.
+    "{\"type\":   \"record\",  \"name\":               \"ComplexInteger\"\n, "
+    "\"doc\": \"record_doc °C \u00f8 \x1f \\n \n \t\", "
+    "\"fields\": ["
+        "{\"name\":   \"re1\", \"type\":               \"long\", "
+        "\"doc\":   \"A \\\"quoted doc\\\"\"      },                 "
+        "{\"name\":  \"re2\", \"type\":   \"long\", \n\t"
+        "\"doc\": \"extra slashes\\\\\\\\\"}"
+    "]}"};
+
+const char* compactSchemas[] = {
+    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":[]}",
+    "{\"type\":\"record\",\"name\":\"Test\",\"fields\":[]}",
+    "{\"type\":\"record\",\"name\":\"ComplexInteger\","
+    "\"doc\":\"record_doc °C \u00f8 \\u001f \\n \\n \\t\","
+    "\"fields\":["
+        "{\"name\":\"re1\",\"type\":\"long\",\"doc\":\"A \\\"quoted 
doc\\\"\"},"
+        "{\"name\":\"re2\",\"type\":\"long\",\"doc\":\"extra 
slashes\\\\\\\\\"}"
+    "]}"};
 
 static void testBasic(const char* schema)
 {
@@ -219,17 +244,36 @@ static void testCompile(const char* schema)
     compileJsonSchemaFromString(std::string(schema));
 }
 
-// Test that the JSON output from a valid schema matches the JSON that was 
+// Test that the JSON output from a valid schema matches the JSON that was
 // used to construct it, apart from whitespace changes.
 static void testRoundTrip(const char* schema)
 {
     BOOST_TEST_CHECKPOINT(schema);
-    avro::ValidSchema compiledSchema = 
compileJsonSchemaFromString(std::string(schema));
+    avro::ValidSchema compiledSchema =
+        compileJsonSchemaFromString(std::string(schema));
     std::ostringstream os;
     compiledSchema.toJson(os);
     std::string result = os.str();
     result.erase(std::remove_if(result.begin(), result.end(), ::isspace), 
result.end()); // Remove whitespace
     BOOST_CHECK(result == std::string(schema));
+    // Verify that the compact schema from toJson has the same content as the
+    // schema.
+    std::string result2 = compiledSchema.toJson(false);
+    BOOST_CHECK(result2 == std::string(schema));
+}
+
+static void testCompactSchemas()
+{
+  for (size_t i = 0; i < sizeof(schemasToCompact)/ 
sizeof(schemasToCompact[0]); i++)
+  {
+    const char* schema = schemasToCompact[i];
+    BOOST_TEST_CHECKPOINT(schema);
+    avro::ValidSchema compiledSchema =
+        compileJsonSchemaFromString(std::string(schema));
+
+    std::string result = compiledSchema.toJson(false);
+    BOOST_CHECK_EQUAL(result, compactSchemas[i]);
+  }
 }
 
 }
@@ -239,10 +283,10 @@ static void testRoundTrip(const char* schema)
 
 #define ADD_PARAM_TEST(ts, func, data) \
     ts->add(BOOST_PARAM_TEST_CASE(&func, data, ENDOF(data)))
-    
+
 
 boost::unit_test::test_suite*
-init_unit_test_suite(int argc, char* argv[]) 
+init_unit_test_suite(int argc, char* argv[])
 {
     using namespace boost::unit_test;
 
@@ -252,5 +296,6 @@ init_unit_test_suite(int argc, char* argv[])
         avro::schema::basicSchemaErrors);
     ADD_PARAM_TEST(ts, avro::schema::testCompile, avro::schema::basicSchemas);
     ADD_PARAM_TEST(ts, avro::schema::testRoundTrip, 
avro::schema::roundTripSchemas);
+    ts->add(BOOST_TEST_CASE(&avro::schema::testCompactSchemas));
     return ts;
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> C++ API compileJsonSchema ignores "doc" and custom attributes on a 
> field/record
> -------------------------------------------------------------------------------
>
>                 Key: AVRO-1256
>                 URL: https://issues.apache.org/jira/browse/AVRO-1256
>             Project: Apache Avro
>          Issue Type: Improvement
>          Components: c++
>    Affects Versions: 1.7.2
>         Environment: Running on all platforms (Windows, OSX, Linux)
>            Reporter: Tim Menninger
>            Assignee: Aniket Mokashi
>            Priority: Minor
>         Attachments: AVRO-1256.patch
>
>
> It appears that when my JSON is compiled into a valid schema object it is 
> ignoring all types of "documentation" that I am trying to adorn with each 
> field in my record. Reading through the Java issues it seems that this was a 
> bug and fixed (AVRO-601, AVRO-612, AVRO-779) but it seems the C++ 
> implementation has yet to adopt this feature? This is my sample schema, I 
> have attempted to insert both "doc" and "mycustom" in multiple places to see 
> if it is supported at any level. Please excuse if there appears to be a 
> syntax error in the JSON I hand tweaked some of this. The schema is valid and 
> successfully parses.
> {
>       "type": "record",
>       "name": "myschema",
>       "doc": "Doc Meta",
>       "mycustom": "My Custom",
>       "fields": [
>               { "name":"field_a","type":["string","null"], "doc":"Doc Meta", 
> "mycustom":"My Custom A"},
>               { "name":"field_b","type":["string","null"], "doc":"Doc Meta", 
> "mycustom":"My Custom B"},
>               { "name":"field_c","type":["string","null"], "doc":"Doc Meta", 
> "mycustom":"My Custom C"}
>       ]
> }
> I looked through the SchemaTests.cc code for 1.7.3 and there was not a test 
> case for this there so i didn't think this was addressed in that version. I 
> am running 1.7.2. When this schema is used to load with compileJsonSchema and 
> then a file is serialized the file schema looks like this.
> {
>       "type":"record",
>       "name":"myschema",
>       "fields": [
>               { "name":"field_a","type":["string","null"]},
>               { "name":"field_b","type":["string","null"]},
>               { "name":"field_c","type":["string","null"]}
>       ]
> }



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to