[jira] [Commented] (AVRO-1335) C++ should support field default values

ASF GitHub Bot (JIRA) Sun, 14 Oct 2018 11:26:09 -0700


    [ 
https://issues.apache.org/jira/browse/AVRO-1335?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16649483#comment-16649483
 ]


ASF GitHub Bot commented on AVRO-1335:
--------------------------------------

thiru-apache closed pull request #241: AVRO-1335: Adds C++ support for default 
values in schema serializatio…
URL: https://github.com/apache/avro/pull/241
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/lang/c++/api/Node.hh b/lang/c++/api/Node.hh
index ff227b9e4..ebba375be 100644
--- a/lang/c++/api/Node.hh
+++ b/lang/c++/api/Node.hh
@@ -77,7 +77,7 @@ std::ostream& operator << (std::ostream& os, const Name& n) {
 /// The user does not use the Node object directly, they interface with Schema
 /// objects.
 ///
-/// The Node object uses reference-counted pointers.  This is so that schemas 
+/// The Node object uses reference-counted pointers.  This is so that schemas
 /// may be reused in other other schemas, without needing to worry about memory
 /// deallocation for nodes that are added to multiple schema parse trees.
 ///
@@ -152,6 +152,11 @@ class AVRO_DECL Node : private boost::noncopyable
 
     virtual void setLeafToSymbolic(int index, const NodePtr &node) = 0;
 
+    // Serialize the default value GenericDatum g for the node contained
+    // in a record node.
+    virtual void printDefaultToJson(const GenericDatum& g, std::ostream &os,
+                                    int depth) const = 0;
+
   protected:
 
     void checkLock() const {
diff --git a/lang/c++/api/NodeImpl.hh b/lang/c++/api/NodeImpl.hh
index cbfcfb51f..0f3202368 100644
--- a/lang/c++/api/NodeImpl.hh
+++ b/lang/c++/api/NodeImpl.hh
@@ -25,6 +25,9 @@
 #include <limits>
 #include <set>
 #include <boost/weak_ptr.hpp>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
 
 #include "Node.hh"
 #include "NodeConcepts.hh"
@@ -34,8 +37,8 @@ namespace avro {
 /// Implementation details for Node.  NodeImpl represents all the avro types,
 /// whose properties are enabled are disabled by selecting concept classes.
 
-template 
-< 
+template
+<
     class NameConcept,
     class LeavesConcept,
     class LeafNamesConcept,
@@ -54,9 +57,9 @@ class NodeImpl : public Node
         sizeAttribute_()
     { }
 
-    NodeImpl(Type type, 
-             const NameConcept &name, 
-             const LeavesConcept &leaves, 
+    NodeImpl(Type type,
+             const NameConcept &name,
+             const LeavesConcept &leaves,
              const LeafNamesConcept &leafNames,
              const SizeConcept &size) :
         Node(type),
@@ -81,12 +84,12 @@ class NodeImpl : public Node
     void doSetName(const Name &name) {
         nameAttribute_.add(name);
     }
-    
+
     const Name &name() const {
         return nameAttribute_.get();
     }
 
-    void doAddLeaf(const NodePtr &newLeaf) { 
+    void doAddLeaf(const NodePtr &newLeaf) {
         leafAttributes_.add(newLeaf);
     }
 
@@ -94,11 +97,11 @@ class NodeImpl : public Node
         return leafAttributes_.size();
     }
 
-    const NodePtr &leafAt(int index) const { 
+    const NodePtr &leafAt(int index) const {
         return leafAttributes_.get(index);
     }
 
-    void doAddName(const std::string &name) { 
+    void doAddName(const std::string &name) {
         if (! nameIndex_.add(name, leafNameAttributes_.size())) {
             throw Exception(boost::format("Cannot add duplicate name: %1%") % 
name);
         }
@@ -109,7 +112,7 @@ class NodeImpl : public Node
         return leafNameAttributes_.size();
     }
 
-    const std::string &nameAt(int index) const { 
+    const std::string &nameAt(int index) const {
         return leafNameAttributes_.get(index);
     }
 
@@ -130,12 +133,12 @@ class NodeImpl : public Node
     void printBasicInfo(std::ostream &os) const;
 
     void setLeafToSymbolic(int index, const NodePtr &node);
-   
+
     SchemaResolution furtherResolution(const Node &reader) const {
         SchemaResolution match = RESOLVE_NO_MATCH;
 
         if (reader.type() == AVRO_SYMBOLIC) {
-    
+
             // resolve the symbolic type, and check again
             const NodePtr &node = reader.leafAt(0);
             match = resolve(*node);
@@ -145,7 +148,7 @@ class NodeImpl : public Node
             // in this case, need to see if there is an exact match for the
             // writer's type, or if not, the first one that can be promoted to 
a
             // match
-        
+
             for(size_t i= 0; i < reader.leaves(); ++i)  {
 
                 const NodePtr &node = reader.leafAt(i);
@@ -157,7 +160,7 @@ class NodeImpl : public Node
                     break;
                 }
 
-                // thisMatch is either no match, or promotable, this will set 
match to 
+                // thisMatch is either no match, or promotable, this will set 
match to
                 // promotable if it hasn't been set already
                 if (match == RESOLVE_NO_MATCH) {
                     match = thisMatch;
@@ -213,6 +216,8 @@ class AVRO_DECL NodePrimitive : public NodeImplPrimitive
     bool isValid() const {
         return true;
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 class AVRO_DECL NodeSymbolic : public NodeImplSymbolic
@@ -240,6 +245,8 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic
         return (nameAttribute_.size() == 1);
     }
 
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
+
     bool isSet() const {
          return (actualNode_.lock() != 0);
     }
@@ -265,12 +272,12 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic
 class AVRO_DECL NodeRecord : public NodeImplRecord {
     std::vector<GenericDatum> defaultValues;
 public:
-    NodeRecord() : NodeImplRecord(AVRO_RECORD) { } 
+    NodeRecord() : NodeImplRecord(AVRO_RECORD) { }
     NodeRecord(const HasName &name, const MultiLeaves &fields,
         const LeafNames &fieldsNames,
         const std::vector<GenericDatum>& dv) :
         NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()),
-        defaultValues(dv) { 
+        defaultValues(dv) {
         for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
             if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
                 throw Exception(boost::format(
@@ -290,13 +297,15 @@ public:
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
-        return ((nameAttribute_.size() == 1) && 
+        return ((nameAttribute_.size() == 1) &&
             (leafAttributes_.size() == leafNameAttributes_.size()));
     }
 
     const GenericDatum& defaultValueAt(int index) {
         return defaultValues[index];
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 class AVRO_DECL NodeEnum : public NodeImplEnum
@@ -304,29 +313,31 @@ class AVRO_DECL NodeEnum : public NodeImplEnum
   public:
 
     NodeEnum() :
-        NodeImplEnum(AVRO_ENUM) 
+        NodeImplEnum(AVRO_ENUM)
     { }
 
     NodeEnum(const HasName &name, const LeafNames &symbols) :
         NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize())
-    { 
+    {
         for(size_t i=0; i < leafNameAttributes_.size(); ++i) {
             if(!nameIndex_.add(leafNameAttributes_.get(i), i)) {
                  throw Exception(boost::format("Cannot add duplicate name: 
%1%") % leafNameAttributes_.get(i));
             }
         }
     }
-        
+
     SchemaResolution resolve(const Node &reader)  const;
 
     void printJson(std::ostream &os, int depth) const;
 
     bool isValid() const {
         return (
-                (nameAttribute_.size() == 1) && 
-                (leafNameAttributes_.size() > 0) 
+                (nameAttribute_.size() == 1) &&
+                (leafNameAttributes_.size() > 0)
                );
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 class AVRO_DECL NodeArray : public NodeImplArray
@@ -348,6 +359,8 @@ class AVRO_DECL NodeArray : public NodeImplArray
     bool isValid() const {
         return (leafAttributes_.size() == 1);
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 class AVRO_DECL NodeMap : public NodeImplMap
@@ -356,14 +369,14 @@ class AVRO_DECL NodeMap : public NodeImplMap
 
     NodeMap() :
         NodeImplMap(AVRO_MAP)
-    { 
+    {
          NodePtr key(new NodePrimitive(AVRO_STRING));
          doAddLeaf(key);
     }
 
     explicit NodeMap(const SingleLeaf &values) :
         NodeImplMap(AVRO_MAP, NoName(), values, NoLeafNames(), NoSize())
-    { 
+    {
         // need to add the key for the map too
         NodePtr key(new NodePrimitive(AVRO_STRING));
         doAddLeaf(key);
@@ -379,6 +392,8 @@ class AVRO_DECL NodeMap : public NodeImplMap
     bool isValid() const {
         return (leafAttributes_.size() == 2);
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 class AVRO_DECL NodeUnion : public NodeImplUnion
@@ -453,6 +468,8 @@ class AVRO_DECL NodeUnion : public NodeImplUnion
         }
         return false;
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 class AVRO_DECL NodeFixed : public NodeImplFixed
@@ -473,19 +490,21 @@ class AVRO_DECL NodeFixed : public NodeImplFixed
 
     bool isValid() const {
         return (
-                (nameAttribute_.size() == 1) && 
-                (sizeAttribute_.size() == 1) 
+                (nameAttribute_.size() == 1) &&
+                (sizeAttribute_.size() == 1)
                );
     }
+
+    void printDefaultToJson(const GenericDatum& g, std::ostream &os, int 
depth) const;
 };
 
 template < class A, class B, class C, class D >
-inline void 
+inline void
 NodeImpl<A,B,C,D>::setLeafToSymbolic(int index, const NodePtr &node)
 {
     if(!B::hasAttribute) {
         throw Exception("Cannot change leaf node for nonexistent leaf");
-    } 
+    }
 
     NodePtr &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index));
     if(replaceNode->name() != node->name()) {
@@ -501,7 +520,7 @@ NodeImpl<A,B,C,D>::setLeafToSymbolic(int index, const 
NodePtr &node)
 }
 
 template < class A, class B, class C, class D >
-inline void 
+inline void
 NodeImpl<A,B,C,D>::printBasicInfo(std::ostream &os) const
 {
     os << type();
@@ -529,7 +548,7 @@ NodeImpl<A,B,C,D>::printBasicInfo(std::ostream &os) const
 }
 
 
-inline NodePtr resolveSymbol(const NodePtr &node) 
+inline NodePtr resolveSymbol(const NodePtr &node)
 {
     if(node->type() != AVRO_SYMBOLIC) {
         throw Exception("Only symbolic nodes may be resolved");
@@ -538,6 +557,16 @@ inline NodePtr resolveSymbol(const NodePtr &node)
     return symNode->getNode();
 }
 
+template< typename T >
+inline std::string intToHex(T i)
+{
+  std::stringstream stream;
+  stream << "\\u"
+         << std::setfill('0') << std::setw(sizeof(T))
+         << std::hex << i;
+  return stream.str();
+}
+
 } // namespace avro
 
 #endif
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index 1252a717f..f3610651b 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -349,7 +349,7 @@ static NodePtr makeFixedNode(const Entity& e,
 {
     int v = static_cast<int>(getLongField(e, m, "size"));
     if (v <= 0) {
-        throw Exception(boost::format("Size for fixed is not positive: ") %
+        throw Exception(boost::format("Size for fixed is not positive: %1%") %
             e.toString());
     }
     return NodePtr(new NodeFixed(asSingleAttribute(name),
diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc
index 606cd2093..435d2ef5d 100644
--- a/lang/c++/impl/NodeImpl.cc
+++ b/lang/c++/impl/NodeImpl.cc
@@ -17,14 +17,82 @@
  */
 
 
+#include <boost/algorithm/string/replace.hpp>
 #include "NodeImpl.hh"
 
+
 namespace avro {
 
-SchemaResolution 
+namespace {
+// Escape string for serialization.
+std::string escape(const std::string &unescaped) {
+  std::string s;
+  s.reserve(unescaped.length());
+  for (auto c : unescaped) {
+    switch (c) {
+      case '\\':
+      case '"':
+      case '/':
+        s += '\\';
+        s += c;
+        break;
+      case '\b':
+        s += '\\';
+        s += 'b';
+        break;
+      case '\f':
+        s += '\f';
+        break;
+      case '\n':
+        s += '\\';
+        s += 'n';
+        break;
+      case '\r':
+        s += '\\';
+        s += 'r';
+        break;
+      case '\t':
+        s += '\\';
+        s += 't';
+        break;
+      default:
+        if (!std::iscntrl(c, std::locale::classic())) {
+          s += c;
+          continue;
+        }
+        s += intToHex(static_cast<unsigned int>(c));
+        break;
+    }
+  }
+  return s;
+}
+
+// Wrap an indentation in a struct for ostream operator<<
+struct indent {
+    indent(int depth) :
+        d(depth)
+    { }
+    int d;
+};
+
+/// ostream operator for indent
+std::ostream& operator <<(std::ostream &os, indent x)
+{
+    static const std::string spaces("    ");
+    while (x.d--) {
+        os << spaces;
+    }
+    return os;
+}
+
+} // anonymous namespace
+
+const int kByteStringSize = 6;
+
+SchemaResolution
 NodePrimitive::resolve(const Node &reader) const
 {
-    if(type() == reader.type()) {
+    if (type() == reader.type()) {
         return RESOLVE_MATCH;
     }
 
@@ -32,17 +100,17 @@ NodePrimitive::resolve(const Node &reader) const
 
       case AVRO_INT:
 
-        if( reader.type() == AVRO_LONG ) { 
+        if ( reader.type() == AVRO_LONG ) {
             return RESOLVE_PROMOTABLE_TO_LONG;
-        }   
+        }
 
         // fall-through intentional
 
       case AVRO_LONG:
- 
+
         if (reader.type() == AVRO_FLOAT) {
             return RESOLVE_PROMOTABLE_TO_FLOAT;
-        }   
+        }
 
         // fall-through intentional
 
@@ -50,49 +118,49 @@ NodePrimitive::resolve(const Node &reader) const
 
         if (reader.type() == AVRO_DOUBLE) {
             return RESOLVE_PROMOTABLE_TO_DOUBLE;
-        }   
+        }
 
       default:
         break;
-    }   
+    }
 
     return furtherResolution(reader);
 }
 
-SchemaResolution 
+SchemaResolution
 NodeRecord::resolve(const Node &reader) const
 {
-    if(reader.type() == AVRO_RECORD) {
-        if(name() == reader.name()) {
+    if (reader.type() == AVRO_RECORD) {
+        if (name() == reader.name()) {
             return RESOLVE_MATCH;
         }
     }
     return furtherResolution(reader);
 }
 
-SchemaResolution 
+SchemaResolution
 NodeEnum::resolve(const Node &reader) const
 {
-    if(reader.type() == AVRO_ENUM) {
+    if (reader.type() == AVRO_ENUM) {
         return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
     }
     return furtherResolution(reader);
 }
 
-SchemaResolution 
+SchemaResolution
 NodeArray::resolve(const Node &reader) const
 {
-    if(reader.type() == AVRO_ARRAY) {
+    if (reader.type() == AVRO_ARRAY) {
         const NodePtr &arrayType = leafAt(0);
         return arrayType->resolve(*reader.leafAt(0));
     }
     return furtherResolution(reader);
 }
 
-SchemaResolution 
+SchemaResolution
 NodeMap::resolve(const Node &reader) const
 {
-    if(reader.type() == AVRO_MAP) {
+    if (reader.type() == AVRO_MAP) {
         const NodePtr &mapType = leafAt(1);
         return mapType->resolve(*reader.leafAt(1));
     }
@@ -100,7 +168,7 @@ NodeMap::resolve(const Node &reader) const
 }
 
 SchemaResolution
-NodeUnion::resolve(const Node &reader) const 
+NodeUnion::resolve(const Node &reader) const
 {
 
     // If the writer is union, resolution only needs to occur when the selected
@@ -109,67 +177,49 @@ NodeUnion::resolve(const Node &reader) const
     // In this case, this function returns if there is a possible match given
     // any writer type, so just search type by type returning the best match
     // found.
-    
+
     SchemaResolution match = RESOLVE_NO_MATCH;
-    for(size_t i=0; i < leaves(); ++i) {
+    for (size_t i=0; i < leaves(); ++i) {
         const NodePtr &node = leafAt(i);
         SchemaResolution thisMatch = node->resolve(reader);
-        if(thisMatch == RESOLVE_MATCH) {
+        if (thisMatch == RESOLVE_MATCH) {
             match = thisMatch;
             break;
         }
-        if(match == RESOLVE_NO_MATCH) {
+        if (match == RESOLVE_NO_MATCH) {
             match = thisMatch;
         }
     }
     return match;
 }
 
-SchemaResolution 
+SchemaResolution
 NodeFixed::resolve(const Node &reader) const
 {
-    if(reader.type() == AVRO_FIXED) {
+    if (reader.type() == AVRO_FIXED) {
         return (
                 (reader.fixedSize() == fixedSize()) &&
-                (reader.name() == name()) 
-            ) ? 
+                (reader.name() == name())
+            ) ?
             RESOLVE_MATCH : RESOLVE_NO_MATCH;
     }
     return furtherResolution(reader);
 }
 
-SchemaResolution 
+SchemaResolution
 NodeSymbolic::resolve(const Node &reader) const
 {
     const NodePtr &node = leafAt(0);
     return node->resolve(reader);
 }
 
-// Wrap an indentation in a struct for ostream operator<< 
-struct indent { 
-    indent(int depth) :
-        d(depth)
-    { }
-    int d; 
-};
-
-/// ostream operator for indent
-std::ostream& operator <<(std::ostream &os, indent x)
-{
-    static const std::string spaces("    ");
-    while(x.d--) {
-        os << spaces; 
-    }
-    return os;
-}
-
-void 
+void
 NodePrimitive::printJson(std::ostream &os, int depth) const
 {
     os << '\"' << type() << '\"';
 }
 
-void 
+void
 NodeSymbolic::printJson(std::ostream &os, int depth) const
 {
     os << '\"' << nameAttribute_.get() << '\"';
@@ -183,7 +233,7 @@ static void printName(std::ostream& os, const Name& n, int 
depth)
     os << indent(depth) << "\"name\": \"" << n.simpleName() << "\",\n";
 }
 
-void 
+void
 NodeRecord::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
@@ -193,14 +243,27 @@ NodeRecord::printJson(std::ostream &os, int depth) const
 
     int fields = leafAttributes_.size();
     ++depth;
-    for(int i = 0; i < fields; ++i) {
-        if(i > 0) {
+    // Serialize "default" field:
+    assert(defaultValues.empty() || (defaultValues.size() == fields));
+    for (int i = 0; i < fields; ++i) {
+        if (i > 0) {
             os << ',';
         }
         os << '\n' << indent(depth) << "{\n";
         os << indent(++depth) << "\"name\": \"" << leafNameAttributes_.get(i) 
<< "\",\n";
         os << indent(depth) << "\"type\": ";
         leafAttributes_.get(i)->printJson(os, depth);
+
+        if (!defaultValues.empty()) {
+          if (!defaultValues[i].isUnion() &&
+              defaultValues[i].type() == AVRO_NULL) {
+            // No "default" field.
+          } else {
+            os << ",\n" << indent(depth) << "\"default\": ";
+            leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os,
+                                                       depth);
+          }
+        }
         os << '\n';
         os << indent(--depth) << '}';
     }
@@ -208,7 +271,159 @@ NodeRecord::printJson(std::ostream &os, int depth) const
     os << indent(--depth) << '}';
 }
 
-void 
+void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                       int depth) const {
+  assert(isPrimitive(g.type()));
+
+  switch (g.type()) {
+    case AVRO_NULL:
+      os << "null";
+      break;
+    case AVRO_BOOL:
+      os << (g.value<bool>() ? "true" : "false");
+      break;
+    case AVRO_INT:
+      os << g.value<int32_t>();
+      break;
+    case AVRO_LONG:
+      os << g.value<int64_t>();
+      break;
+    case AVRO_FLOAT:
+      os << std::to_string(g.value<float>());
+      break;
+    case AVRO_DOUBLE:
+      os << std::to_string(g.value<double>());
+      break;
+    case AVRO_STRING:
+      os << "\"" << escape(g.value<std::string>()) << "\"";
+      break;
+    case AVRO_BYTES: {
+      // Convert to a string:
+      const std::vector<uint8_t> &vg = g.value<std::vector<uint8_t> >();
+      std::string s;
+      s.resize(vg.size() * kByteStringSize);
+      for (unsigned int i = 0; i < vg.size(); i++) {
+        std::string hex_string = intToHex(static_cast<int>(vg[i]));
+        s.replace(i*kByteStringSize, kByteStringSize, hex_string);
+      }
+      os << "\"" << s << "\"";
+    } break;
+    default:
+      break;
+  }
+}
+
+void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                  int depth) const {
+  assert(g.type() == AVRO_ENUM);
+  os << "\"" << g.value<GenericEnum>().symbol() << "\"";
+}
+
+void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                   int depth) const {
+  assert(g.type() == AVRO_FIXED);
+  // ex: "\uOOff"
+  // Convert to a string
+  const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
+  std::string s;
+  s.resize(vg.size() * kByteStringSize);
+  for (unsigned int i = 0; i < vg.size(); i++) {
+    std::string hex_string = intToHex(static_cast<int>(vg[i]));
+    s.replace(i*kByteStringSize, kByteStringSize, hex_string);
+  }
+  os << "\"" << s << "\"";
+}
+
+void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                   int depth) const {
+  leafAt(0)->printDefaultToJson(g, os, depth);
+}
+
+void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                   int depth) const {
+  assert(g.type() == AVRO_ARRAY);
+  // ex: "default": [1]
+  if (g.value<GenericArray>().value().empty()) {
+    os << "[]";
+  } else {
+    os << "[\n";
+    depth++;
+
+    // Serialize all values of the array with recursive calls:
+    for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
+      if (i > 0) {
+        os << ",\n";
+      }
+      os << indent(depth);
+      leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
+                                    depth);
+    }
+    os << "\n" << indent(--depth) << "]";
+  }
+}
+
+void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                      int depth) const {
+  getNode()->printDefaultToJson(g, os, depth);
+}
+
+void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                    int depth) const {
+  assert(g.type() == AVRO_RECORD);
+  if (g.value<GenericRecord>().fieldCount() == 0) {
+    os << "{}";
+  } else {
+    os << "{\n";
+
+    // Serialize all fields of the record with recursive calls:
+    for (unsigned int i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
+      if (i == 0) {
+        ++depth;
+      } else {  // i > 0
+        os << ",\n";
+      }
+
+      os << indent(depth) << "\"";
+      assert(i < leaves());
+      os << leafNameAttributes_.get(i);
+      os << "\": ";
+
+      // Recursive call on child node to be able to get the name attribute
+      // (In case of a record we need the name of the leaves (contained in
+      // 'this'))
+      leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
+                                    depth);
+    }
+    os << "\n" << indent(--depth) << "}";
+  }
+}
+
+void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
+                                 int depth) const {
+  assert(g.type() == AVRO_MAP);
+  //{"a": 1}
+  if (g.value<GenericMap>().value().empty()) {
+    os << "{}";
+  } else {
+    os << "{\n";
+
+    for (unsigned int i = 0; i < g.value<GenericMap>().value().size(); i++) {
+      if (i == 0) {
+        ++depth;
+      } else {
+        os << ",\n";
+      }
+      os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
+         << "\": ";
+
+      leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, 
os,
+                                    depth);
+    }
+    os << "\n" << indent(--depth) << "}";
+  }
+}
+
+void
 NodeEnum::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
@@ -218,8 +433,8 @@ NodeEnum::printJson(std::ostream &os, int depth) const
 
     int names = leafNameAttributes_.size();
     ++depth;
-    for(int i = 0; i < names; ++i) {
-        if(i > 0) {
+    for (int i = 0; i < names; ++i) {
+        if (i > 0) {
             os << ",\n";
         }
         os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
@@ -229,7 +444,7 @@ NodeEnum::printJson(std::ostream &os, int depth) const
     os << indent(--depth) << '}';
 }
 
-void 
+void
 NodeArray::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
@@ -240,7 +455,7 @@ NodeArray::printJson(std::ostream &os, int depth) const
     os << indent(depth) << '}';
 }
 
-void 
+void
 NodeMap::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
@@ -251,14 +466,14 @@ NodeMap::printJson(std::ostream &os, int depth) const
     os << indent(depth) << '}';
 }
 
-void 
+void
 NodeUnion::printJson(std::ostream &os, int depth) const
 {
     os << "[\n";
     int fields = leafAttributes_.size();
     ++depth;
-    for(int i = 0; i < fields; ++i) {
-        if(i > 0) {
+    for (int i = 0; i < fields; ++i) {
+        if (i > 0) {
             os << ",\n";
         }
         os << indent(depth);
@@ -268,7 +483,7 @@ NodeUnion::printJson(std::ostream &os, int depth) const
     os << indent(--depth) << ']';
 }
 
-void 
+void
 NodeFixed::printJson(std::ostream &os, int depth) const
 {
     os << "{\n";
diff --git a/lang/c++/jsonschemas/bigrecord_r b/lang/c++/jsonschemas/bigrecord_r
index 7c477cd0f..5da05222e 100644
--- a/lang/c++/jsonschemas/bigrecord_r
+++ b/lang/c++/jsonschemas/bigrecord_r
@@ -62,7 +62,7 @@
                 ]
             },
             "default": {
-                "s1": "sval",
+                "s1": "\"sval\"",
                 "d1": 5.67,
                 "i1": 99
             }
@@ -84,6 +84,17 @@
                 "items": "double"
             }
         },
+        {
+            "name": "myarraywithDefaultValue",
+            "type": {
+                "type": "array",
+                "items": "int"
+            },
+            "default": [
+                2,
+                3
+            ]
+        },
         {
             "name": "myenum",
             "type": {
@@ -152,6 +163,15 @@
                 "name": "md5"
             }
         },
+        {
+            "name": "myfixedwithDefaultValue",
+            "type": [{
+                "type": "fixed",
+                "size": 1,
+                "name": "val"
+            }, "null"],
+            "default": "\u0001"
+        },
         {
             "name": "anotherint",
             "type": "int"
@@ -159,6 +179,11 @@
         {
             "name": "bytes",
             "type": "bytes"
+        },
+        {
+            "name": "byteswithDefaultValue",
+            "type": ["bytes", "null"],
+            "default": "\u00ff\u00AA"
         }
     ]
 }
diff --git a/lang/c++/test/AvrogencppTests.cc b/lang/c++/test/AvrogencppTests.cc
index 1b429433c..46b2fc978 100644
--- a/lang/c++/test/AvrogencppTests.cc
+++ b/lang/c++/test/AvrogencppTests.cc
@@ -136,9 +136,14 @@ void checkRecord(const T1& r1, const T2& r2)
 
 void checkDefaultValues(const testgen_r::RootRecord& r)
 {
-    BOOST_CHECK_EQUAL(r.withDefaultValue.s1, "sval");
+    BOOST_CHECK_EQUAL(r.withDefaultValue.s1, "\"sval\"");
     BOOST_CHECK_EQUAL(r.withDefaultValue.i1, 99);
     BOOST_CHECK_CLOSE(r.withDefaultValue.d1, 5.67, 1e-10);
+    BOOST_CHECK_EQUAL(r.myarraywithDefaultValue[0], 2);
+    BOOST_CHECK_EQUAL(r.myarraywithDefaultValue[1], 3);
+    BOOST_CHECK_EQUAL(r.myfixedwithDefaultValue.get_val()[0], 0x01);
+    BOOST_CHECK_EQUAL(r.byteswithDefaultValue.get_bytes()[0], 0xff);
+    BOOST_CHECK_EQUAL(r.byteswithDefaultValue.get_bytes()[1], 0xaa);
 }
 
 
@@ -198,6 +203,24 @@ void testResolution()
     checkRecord(t3, t1);
     checkDefaultValues(t3);
 
+    // Test serialization of default values.
+    // Serialize to string then compile from string.
+    std::ostringstream oss;
+    s_r.toJson(oss);
+    ValidSchema s_rs = avro::compileJsonSchemaFromString(oss.str());
+
+    auto_ptr<InputStream> is2 = memoryInputStream(*os);
+    dd->init(*is2);
+    rd = resolvingDecoder(s_w, s_rs, dd);
+    testgen_r::RootRecord t4;
+    avro::decode(*rd, t4);
+    checkDefaultValues(t4);
+
+    std::ostringstream oss_r;
+    std::ostringstream oss_rs;
+    s_r.toJson(oss_r);
+    s_rs.toJson(oss_rs);
+    BOOST_CHECK_EQUAL(oss_r.str(), oss_rs.str());
 }
 
 void testNamespace()
diff --git a/lang/c++/test/CompilerTests.cc b/lang/c++/test/CompilerTests.cc
index a40d6a1a7..0b63dbb4e 100644
--- a/lang/c++/test/CompilerTests.cc
+++ b/lang/c++/test/CompilerTests.cc
@@ -47,7 +47,8 @@ void testEmptyBytesDefault()
     \"fields\": [\n\
         {\n\
             \"name\": \"testbytes\",\n\
-            \"type\": \"bytes\"\n\
+            \"type\": \"bytes\",\n\
+            \"default\": \"\"\n\
         }\n\
     ]\n\
 }\n\
diff --git a/lang/c++/test/unittest.cc b/lang/c++/test/unittest.cc
index fc81f2b61..83493144b 100644
--- a/lang/c++/test/unittest.cc
+++ b/lang/c++/test/unittest.cc
@@ -46,22 +46,22 @@ static const uint8_t fixeddata[16] = 
{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 #endif
 struct TestSchema
 {
-    TestSchema() 
+    TestSchema()
     {}
 
     void createExampleSchema()
     {
         // First construct our complex data type:
         avro::RecordSchema myRecord("complex");
-   
+
         // Now populate my record with fields (each field is another schema):
         myRecord.addField("real", avro::DoubleSchema());
         myRecord.addField("imaginary", avro::DoubleSchema());
 
-        // The complex record is the same as used above, let's make a schema 
+        // The complex record is the same as used above, let's make a schema
         // for an array of these record
-  
-        avro::ArraySchema complexArray(myRecord); 
+
+        avro::ArraySchema complexArray(myRecord);
 
         avro::ValidSchema validComplexArray(complexArray);
         validComplexArray.toJson(std::cout);
@@ -99,14 +99,14 @@ struct TestSchema
         }
         BOOST_CHECK_EQUAL(caught, true);
 
-        record.addField("myenum", myenum); 
+        record.addField("myenum", myenum);
 
         UnionSchema onion;
         onion.addType(NullSchema());
         onion.addType(map);
         onion.addType(FloatSchema());
-       
-        record.addField("myunion", onion); 
+
+        record.addField("myunion", onion);
 
         RecordSchema nestedRecord("NestedRecord");
         nestedRecord.addField("floatInNested", FloatSchema());
@@ -227,7 +227,7 @@ struct TestSchema
         s.writeBool(true);
 
         std::cout << "Fixed16\n";
-        
+
         s.writeFixed(fixeddata);
 
         std::cout << "Long\n";
@@ -253,7 +253,7 @@ struct TestSchema
         std::cout << s.buffer();
     }
 
-    void saveValidatingEncoding(int path) 
+    void saveValidatingEncoding(int path)
     {
         std::ofstream out("test.avro");
         Serializer<ValidatingWriter> s(schema_);
@@ -286,7 +286,7 @@ struct TestSchema
     void readMap(Parser &p)
     {
         int64_t size = 0;
-        do { 
+        do {
             printNext(p);
             size = p.readMapBlockSize();
             std::cout << "Size " << size << '\n';
@@ -477,7 +477,7 @@ struct TestNested
     TestNested()
     {}
 
-    void createSchema() 
+    void createSchema()
     {
         std::cout << "TestNested\n";
         RecordSchema rec("LongListContainer");
@@ -496,7 +496,7 @@ struct TestNested
         arrayTree.addField("children", ArraySchema(
             SymbolicSchema(Name("ArrayTree"), arrayTree.root())));
         rec.addField("array_tree", arrayTree);
-        
+
         schema_.setSchema(rec);
         schema_.toJson(std::cout);
         schema_.toFlatList(std::cout);
@@ -644,8 +644,8 @@ struct TestNested
         readArrayRecord(p);
         p.readRecordEnd();
     }
-  
-    void readListRecord(Parser<ValidatingReader> &p) 
+
+    void readListRecord(Parser<ValidatingReader> &p)
     {
         p.readRecord();
         int64_t val = p.readLong();
@@ -677,7 +677,7 @@ struct TestNested
         p.readRecordEnd();
     }
 
-    void validatingParser(InputBuffer &buf) 
+    void validatingParser(InputBuffer &buf)
     {
         Parser<ValidatingReader> p(schema_, buf);
         readRecord(p);
@@ -695,7 +695,7 @@ struct TestNested
           d.decodeNull();
         }
         bool b = d.decodeBool();
-        std::cout << "bval = " << b << '\n';      
+        std::cout << "bval = " << b << '\n';
     }
 
     void decodeArrayRecord(Decoder& d)
@@ -715,7 +715,7 @@ struct TestNested
       decodeListRecord(d);
       decodeArrayRecord(d);
     }
-  
+
     void testToScreen() {
         InputBuffer buf1 = serializeNoRecurse();
         InputBuffer buf2 = serializeRecurse();
@@ -727,7 +727,7 @@ struct TestNested
     void testParseNoRecurse() {
         std::cout << "ParseNoRecurse\n";
         InputBuffer buf = serializeNoRecurse();
-    
+
         validatingParser(buf);
     }
 
@@ -755,7 +755,7 @@ struct TestNested
        runEncodeDecode(*validatingEncoder(schema_, binaryEncoder()),
                        *validatingDecoder(schema_, binaryDecoder()),
                        encodeNoRecurse);
-       
+
     }
 
     void testDecodeRecurse()
@@ -772,7 +772,7 @@ struct TestNested
        runEncodeDecode(*jsonEncoder(schema_),
                        *jsonDecoder(schema_),
                        encodeNoRecurse);
-       
+
     }
 
     void testDecodeRecurseJson()
@@ -804,7 +804,7 @@ struct TestGenerated
     TestGenerated()
     {}
 
-    void test() 
+    void test()
     {
         std::cout << "TestGenerated\n";
 
@@ -823,7 +823,7 @@ struct TestGenerated
 
 struct TestBadStuff
 {
-    void testBadFile() 
+    void testBadFile()
     {
         std::cout << "TestBadFile\n";
 
@@ -849,7 +849,7 @@ struct TestBadStuff
         std::cout << "(intentional) error: " << error << '\n';
     }
 
-    void test() 
+    void test()
     {
         std::cout << "TestBadStuff\n";
         testBadFile();
@@ -860,10 +860,10 @@ struct TestBadStuff
 struct TestResolution
 {
     TestResolution() :
-        int_(IntSchema()), 
+        int_(IntSchema()),
         long_(LongSchema()),
-        bool_(BoolSchema()), 
-        float_(FloatSchema()), 
+        bool_(BoolSchema()),
+        float_(FloatSchema()),
         double_(DoubleSchema()),
 
         mapOfInt_(MapSchema(IntSchema())),
@@ -881,7 +881,7 @@ struct TestResolution
             two.addSymbol("Y");
             enumTwo_.setSchema(two);
         }
-    
+
         {
             UnionSchema one;
             one.addType(IntSchema());
@@ -900,31 +900,31 @@ struct TestResolution
         return writer.root()->resolve(*reader.root());
     }
 
-    void test() 
+    void test()
     {
         std::cout << "TestResolution\n";
 
-        BOOST_CHECK_EQUAL(resolve(long_, long_), RESOLVE_MATCH); 
-        BOOST_CHECK_EQUAL(resolve(long_, bool_), RESOLVE_NO_MATCH); 
-        BOOST_CHECK_EQUAL(resolve(bool_, long_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(long_, long_), RESOLVE_MATCH);
+        BOOST_CHECK_EQUAL(resolve(long_, bool_), RESOLVE_NO_MATCH);
+        BOOST_CHECK_EQUAL(resolve(bool_, long_), RESOLVE_NO_MATCH);
 
-        BOOST_CHECK_EQUAL(resolve(int_, long_), RESOLVE_PROMOTABLE_TO_LONG); 
-        BOOST_CHECK_EQUAL(resolve(long_, int_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(int_, long_), RESOLVE_PROMOTABLE_TO_LONG);
+        BOOST_CHECK_EQUAL(resolve(long_, int_), RESOLVE_NO_MATCH);
 
-        BOOST_CHECK_EQUAL(resolve(int_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); 
-        BOOST_CHECK_EQUAL(resolve(float_, int_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(int_, float_), RESOLVE_PROMOTABLE_TO_FLOAT);
+        BOOST_CHECK_EQUAL(resolve(float_, int_), RESOLVE_NO_MATCH);
 
-        BOOST_CHECK_EQUAL(resolve(int_, double_), 
RESOLVE_PROMOTABLE_TO_DOUBLE); 
-        BOOST_CHECK_EQUAL(resolve(double_, int_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(int_, double_), 
RESOLVE_PROMOTABLE_TO_DOUBLE);
+        BOOST_CHECK_EQUAL(resolve(double_, int_), RESOLVE_NO_MATCH);
 
-        BOOST_CHECK_EQUAL(resolve(long_, float_), 
RESOLVE_PROMOTABLE_TO_FLOAT); 
-        BOOST_CHECK_EQUAL(resolve(float_, long_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(long_, float_), RESOLVE_PROMOTABLE_TO_FLOAT);
+        BOOST_CHECK_EQUAL(resolve(float_, long_), RESOLVE_NO_MATCH);
 
-        BOOST_CHECK_EQUAL(resolve(long_, double_), 
RESOLVE_PROMOTABLE_TO_DOUBLE); 
-        BOOST_CHECK_EQUAL(resolve(double_, long_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(long_, double_), 
RESOLVE_PROMOTABLE_TO_DOUBLE);
+        BOOST_CHECK_EQUAL(resolve(double_, long_), RESOLVE_NO_MATCH);
 
-        BOOST_CHECK_EQUAL(resolve(float_, double_), 
RESOLVE_PROMOTABLE_TO_DOUBLE); 
-        BOOST_CHECK_EQUAL(resolve(double_, float_), RESOLVE_NO_MATCH); 
+        BOOST_CHECK_EQUAL(resolve(float_, double_), 
RESOLVE_PROMOTABLE_TO_DOUBLE);
+        BOOST_CHECK_EQUAL(resolve(double_, float_), RESOLVE_NO_MATCH);
 
         BOOST_CHECK_EQUAL(resolve(int_, mapOfInt_), RESOLVE_NO_MATCH);
         BOOST_CHECK_EQUAL(resolve(mapOfInt_, int_), RESOLVE_NO_MATCH);
@@ -975,7 +975,7 @@ struct TestResolution
 };
 
 boost::unit_test::test_suite*
-init_unit_test_suite( int argc, char* argv[] ) 
+init_unit_test_suite( int argc, char* argv[] )
 {
     using namespace boost::unit_test;
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> C++ should support field default values
> ---------------------------------------
>
>                 Key: AVRO-1335
>                 URL: https://issues.apache.org/jira/browse/AVRO-1335
>             Project: Avro
>          Issue Type: Improvement
>          Components: c++
>    Affects Versions: 1.7.4
>            Reporter: Bin Guo
>            Assignee: Victor Mota
>            Priority: Major
>         Attachments: AVRO-1335.patch
>
>
> We found that resolvingDecoder could not provide bidirectional compatibility 
> between different version of schemas.
> Especially for records, for example:
> {code:title=First schema}
> {
>     "type": "record",
>     "name": "TestRecord",
>     "fields": [
>         {
>             "name": "MyData",
>                       "type": {
>                               "type": "record",
>                               "name": "SubData",
>                               "fields": [
>                                       {
>                                               "name": "Version1",
>                                               "type": "string"
>                                       }
>                               ]
>                       }
>         },
>       {
>             "name": "OtherData",
>             "type": "string"
>         }
>     ]
> }
> {code}
> {code:title=Second schema}
> {
>     "type": "record",
>     "name": "TestRecord",
>     "fields": [
>         {
>             "name": "MyData",
>                       "type": {
>                               "type": "record",
>                               "name": "SubData",
>                               "fields": [
>                                       {
>                                               "name": "Version1",
>                                               "type": "string"
>                                       },
>                                       {
>                                               "name": "Version2",
>                                               "type": "string"
>                                       }
>                               ]
>                       }
>         },
>       {
>             "name": "OtherData",
>             "type": "string"
>         }
>     ]
> }
> {code}
> Say, node A knows only the first schema and node B knows the second schema, 
> and the second schema has more fields. 
> Any data generated by node B can be resolved by first schema 'cause the 
> additional field is marked as skipped.
> But data generated by node A can not be resolved by second schema and throws 
> an exception *"Don't know how to handle excess fields for reader."*
> This is because data is resolved exactly according to the auto-generated 
> codec_traits which trying to read the excess field.
> The problem is we just can not only ignore the excess field in record, since 
> the data after the troublesome record also needs to be resolved.
> Actually this problem stucked us for a very long time.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

[jira] [Commented] (AVRO-1335) C++ should support field default values

Reply via email to