[ https://issues.apache.org/jira/browse/AVRO-1335?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16649483#comment-16649483 ]
ASF GitHub Bot commented on AVRO-1335: -------------------------------------- thiru-apache closed pull request #241: AVRO-1335: Adds C++ support for default values in schema serializatio… URL: https://github.com/apache/avro/pull/241 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/lang/c++/api/Node.hh b/lang/c++/api/Node.hh index ff227b9e4..ebba375be 100644 --- a/lang/c++/api/Node.hh +++ b/lang/c++/api/Node.hh @@ -77,7 +77,7 @@ std::ostream& operator << (std::ostream& os, const Name& n) { /// The user does not use the Node object directly, they interface with Schema /// objects. /// -/// The Node object uses reference-counted pointers. This is so that schemas +/// The Node object uses reference-counted pointers. This is so that schemas /// may be reused in other other schemas, without needing to worry about memory /// deallocation for nodes that are added to multiple schema parse trees. /// @@ -152,6 +152,11 @@ class AVRO_DECL Node : private boost::noncopyable virtual void setLeafToSymbolic(int index, const NodePtr &node) = 0; + // Serialize the default value GenericDatum g for the node contained + // in a record node. + virtual void printDefaultToJson(const GenericDatum& g, std::ostream &os, + int depth) const = 0; + protected: void checkLock() const { diff --git a/lang/c++/api/NodeImpl.hh b/lang/c++/api/NodeImpl.hh index cbfcfb51f..0f3202368 100644 --- a/lang/c++/api/NodeImpl.hh +++ b/lang/c++/api/NodeImpl.hh @@ -25,6 +25,9 @@ #include <limits> #include <set> #include <boost/weak_ptr.hpp> +#include <iomanip> +#include <iostream> +#include <sstream> #include "Node.hh" #include "NodeConcepts.hh" @@ -34,8 +37,8 @@ namespace avro { /// Implementation details for Node. NodeImpl represents all the avro types, /// whose properties are enabled are disabled by selecting concept classes. -template -< +template +< class NameConcept, class LeavesConcept, class LeafNamesConcept, @@ -54,9 +57,9 @@ class NodeImpl : public Node sizeAttribute_() { } - NodeImpl(Type type, - const NameConcept &name, - const LeavesConcept &leaves, + NodeImpl(Type type, + const NameConcept &name, + const LeavesConcept &leaves, const LeafNamesConcept &leafNames, const SizeConcept &size) : Node(type), @@ -81,12 +84,12 @@ class NodeImpl : public Node void doSetName(const Name &name) { nameAttribute_.add(name); } - + const Name &name() const { return nameAttribute_.get(); } - void doAddLeaf(const NodePtr &newLeaf) { + void doAddLeaf(const NodePtr &newLeaf) { leafAttributes_.add(newLeaf); } @@ -94,11 +97,11 @@ class NodeImpl : public Node return leafAttributes_.size(); } - const NodePtr &leafAt(int index) const { + const NodePtr &leafAt(int index) const { return leafAttributes_.get(index); } - void doAddName(const std::string &name) { + void doAddName(const std::string &name) { if (! nameIndex_.add(name, leafNameAttributes_.size())) { throw Exception(boost::format("Cannot add duplicate name: %1%") % name); } @@ -109,7 +112,7 @@ class NodeImpl : public Node return leafNameAttributes_.size(); } - const std::string &nameAt(int index) const { + const std::string &nameAt(int index) const { return leafNameAttributes_.get(index); } @@ -130,12 +133,12 @@ class NodeImpl : public Node void printBasicInfo(std::ostream &os) const; void setLeafToSymbolic(int index, const NodePtr &node); - + SchemaResolution furtherResolution(const Node &reader) const { SchemaResolution match = RESOLVE_NO_MATCH; if (reader.type() == AVRO_SYMBOLIC) { - + // resolve the symbolic type, and check again const NodePtr &node = reader.leafAt(0); match = resolve(*node); @@ -145,7 +148,7 @@ class NodeImpl : public Node // in this case, need to see if there is an exact match for the // writer's type, or if not, the first one that can be promoted to a // match - + for(size_t i= 0; i < reader.leaves(); ++i) { const NodePtr &node = reader.leafAt(i); @@ -157,7 +160,7 @@ class NodeImpl : public Node break; } - // thisMatch is either no match, or promotable, this will set match to + // thisMatch is either no match, or promotable, this will set match to // promotable if it hasn't been set already if (match == RESOLVE_NO_MATCH) { match = thisMatch; @@ -213,6 +216,8 @@ class AVRO_DECL NodePrimitive : public NodeImplPrimitive bool isValid() const { return true; } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; class AVRO_DECL NodeSymbolic : public NodeImplSymbolic @@ -240,6 +245,8 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic return (nameAttribute_.size() == 1); } + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; + bool isSet() const { return (actualNode_.lock() != 0); } @@ -265,12 +272,12 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic class AVRO_DECL NodeRecord : public NodeImplRecord { std::vector<GenericDatum> defaultValues; public: - NodeRecord() : NodeImplRecord(AVRO_RECORD) { } + NodeRecord() : NodeImplRecord(AVRO_RECORD) { } NodeRecord(const HasName &name, const MultiLeaves &fields, const LeafNames &fieldsNames, const std::vector<GenericDatum>& dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()), - defaultValues(dv) { + defaultValues(dv) { for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { throw Exception(boost::format( @@ -290,13 +297,15 @@ public: void printJson(std::ostream &os, int depth) const; bool isValid() const { - return ((nameAttribute_.size() == 1) && + return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size())); } const GenericDatum& defaultValueAt(int index) { return defaultValues[index]; } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; class AVRO_DECL NodeEnum : public NodeImplEnum @@ -304,29 +313,31 @@ class AVRO_DECL NodeEnum : public NodeImplEnum public: NodeEnum() : - NodeImplEnum(AVRO_ENUM) + NodeImplEnum(AVRO_ENUM) { } NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize()) - { + { for(size_t i=0; i < leafNameAttributes_.size(); ++i) { if(!nameIndex_.add(leafNameAttributes_.get(i), i)) { throw Exception(boost::format("Cannot add duplicate name: %1%") % leafNameAttributes_.get(i)); } } } - + SchemaResolution resolve(const Node &reader) const; void printJson(std::ostream &os, int depth) const; bool isValid() const { return ( - (nameAttribute_.size() == 1) && - (leafNameAttributes_.size() > 0) + (nameAttribute_.size() == 1) && + (leafNameAttributes_.size() > 0) ); } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; class AVRO_DECL NodeArray : public NodeImplArray @@ -348,6 +359,8 @@ class AVRO_DECL NodeArray : public NodeImplArray bool isValid() const { return (leafAttributes_.size() == 1); } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; class AVRO_DECL NodeMap : public NodeImplMap @@ -356,14 +369,14 @@ class AVRO_DECL NodeMap : public NodeImplMap NodeMap() : NodeImplMap(AVRO_MAP) - { + { NodePtr key(new NodePrimitive(AVRO_STRING)); doAddLeaf(key); } explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), values, NoLeafNames(), NoSize()) - { + { // need to add the key for the map too NodePtr key(new NodePrimitive(AVRO_STRING)); doAddLeaf(key); @@ -379,6 +392,8 @@ class AVRO_DECL NodeMap : public NodeImplMap bool isValid() const { return (leafAttributes_.size() == 2); } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; class AVRO_DECL NodeUnion : public NodeImplUnion @@ -453,6 +468,8 @@ class AVRO_DECL NodeUnion : public NodeImplUnion } return false; } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; class AVRO_DECL NodeFixed : public NodeImplFixed @@ -473,19 +490,21 @@ class AVRO_DECL NodeFixed : public NodeImplFixed bool isValid() const { return ( - (nameAttribute_.size() == 1) && - (sizeAttribute_.size() == 1) + (nameAttribute_.size() == 1) && + (sizeAttribute_.size() == 1) ); } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; }; template < class A, class B, class C, class D > -inline void +inline void NodeImpl<A,B,C,D>::setLeafToSymbolic(int index, const NodePtr &node) { if(!B::hasAttribute) { throw Exception("Cannot change leaf node for nonexistent leaf"); - } + } NodePtr &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index)); if(replaceNode->name() != node->name()) { @@ -501,7 +520,7 @@ NodeImpl<A,B,C,D>::setLeafToSymbolic(int index, const NodePtr &node) } template < class A, class B, class C, class D > -inline void +inline void NodeImpl<A,B,C,D>::printBasicInfo(std::ostream &os) const { os << type(); @@ -529,7 +548,7 @@ NodeImpl<A,B,C,D>::printBasicInfo(std::ostream &os) const } -inline NodePtr resolveSymbol(const NodePtr &node) +inline NodePtr resolveSymbol(const NodePtr &node) { if(node->type() != AVRO_SYMBOLIC) { throw Exception("Only symbolic nodes may be resolved"); @@ -538,6 +557,16 @@ inline NodePtr resolveSymbol(const NodePtr &node) return symNode->getNode(); } +template< typename T > +inline std::string intToHex(T i) +{ + std::stringstream stream; + stream << "\\u" + << std::setfill('0') << std::setw(sizeof(T)) + << std::hex << i; + return stream.str(); +} + } // namespace avro #endif diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc index 1252a717f..f3610651b 100644 --- a/lang/c++/impl/Compiler.cc +++ b/lang/c++/impl/Compiler.cc @@ -349,7 +349,7 @@ static NodePtr makeFixedNode(const Entity& e, { int v = static_cast<int>(getLongField(e, m, "size")); if (v <= 0) { - throw Exception(boost::format("Size for fixed is not positive: ") % + throw Exception(boost::format("Size for fixed is not positive: %1%") % e.toString()); } return NodePtr(new NodeFixed(asSingleAttribute(name), diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc index 606cd2093..435d2ef5d 100644 --- a/lang/c++/impl/NodeImpl.cc +++ b/lang/c++/impl/NodeImpl.cc @@ -17,14 +17,82 @@ */ +#include <boost/algorithm/string/replace.hpp> #include "NodeImpl.hh" + namespace avro { -SchemaResolution +namespace { +// Escape string for serialization. +std::string escape(const std::string &unescaped) { + std::string s; + s.reserve(unescaped.length()); + for (auto c : unescaped) { + switch (c) { + case '\\': + case '"': + case '/': + s += '\\'; + s += c; + break; + case '\b': + s += '\\'; + s += 'b'; + break; + case '\f': + s += '\f'; + break; + case '\n': + s += '\\'; + s += 'n'; + break; + case '\r': + s += '\\'; + s += 'r'; + break; + case '\t': + s += '\\'; + s += 't'; + break; + default: + if (!std::iscntrl(c, std::locale::classic())) { + s += c; + continue; + } + s += intToHex(static_cast<unsigned int>(c)); + break; + } + } + return s; +} + +// Wrap an indentation in a struct for ostream operator<< +struct indent { + indent(int depth) : + d(depth) + { } + int d; +}; + +/// ostream operator for indent +std::ostream& operator <<(std::ostream &os, indent x) +{ + static const std::string spaces(" "); + while (x.d--) { + os << spaces; + } + return os; +} + +} // anonymous namespace + +const int kByteStringSize = 6; + +SchemaResolution NodePrimitive::resolve(const Node &reader) const { - if(type() == reader.type()) { + if (type() == reader.type()) { return RESOLVE_MATCH; } @@ -32,17 +100,17 @@ NodePrimitive::resolve(const Node &reader) const case AVRO_INT: - if( reader.type() == AVRO_LONG ) { + if ( reader.type() == AVRO_LONG ) { return RESOLVE_PROMOTABLE_TO_LONG; - } + } // fall-through intentional case AVRO_LONG: - + if (reader.type() == AVRO_FLOAT) { return RESOLVE_PROMOTABLE_TO_FLOAT; - } + } // fall-through intentional @@ -50,49 +118,49 @@ NodePrimitive::resolve(const Node &reader) const if (reader.type() == AVRO_DOUBLE) { return RESOLVE_PROMOTABLE_TO_DOUBLE; - } + } default: break; - } + } return furtherResolution(reader); } -SchemaResolution +SchemaResolution NodeRecord::resolve(const Node &reader) const { - if(reader.type() == AVRO_RECORD) { - if(name() == reader.name()) { + if (reader.type() == AVRO_RECORD) { + if (name() == reader.name()) { return RESOLVE_MATCH; } } return furtherResolution(reader); } -SchemaResolution +SchemaResolution NodeEnum::resolve(const Node &reader) const { - if(reader.type() == AVRO_ENUM) { + if (reader.type() == AVRO_ENUM) { return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH; } return furtherResolution(reader); } -SchemaResolution +SchemaResolution NodeArray::resolve(const Node &reader) const { - if(reader.type() == AVRO_ARRAY) { + if (reader.type() == AVRO_ARRAY) { const NodePtr &arrayType = leafAt(0); return arrayType->resolve(*reader.leafAt(0)); } return furtherResolution(reader); } -SchemaResolution +SchemaResolution NodeMap::resolve(const Node &reader) const { - if(reader.type() == AVRO_MAP) { + if (reader.type() == AVRO_MAP) { const NodePtr &mapType = leafAt(1); return mapType->resolve(*reader.leafAt(1)); } @@ -100,7 +168,7 @@ NodeMap::resolve(const Node &reader) const } SchemaResolution -NodeUnion::resolve(const Node &reader) const +NodeUnion::resolve(const Node &reader) const { // If the writer is union, resolution only needs to occur when the selected @@ -109,67 +177,49 @@ NodeUnion::resolve(const Node &reader) const // In this case, this function returns if there is a possible match given // any writer type, so just search type by type returning the best match // found. - + SchemaResolution match = RESOLVE_NO_MATCH; - for(size_t i=0; i < leaves(); ++i) { + for (size_t i=0; i < leaves(); ++i) { const NodePtr &node = leafAt(i); SchemaResolution thisMatch = node->resolve(reader); - if(thisMatch == RESOLVE_MATCH) { + if (thisMatch == RESOLVE_MATCH) { match = thisMatch; break; } - if(match == RESOLVE_NO_MATCH) { + if (match == RESOLVE_NO_MATCH) { match = thisMatch; } } return match; } -SchemaResolution +SchemaResolution NodeFixed::resolve(const Node &reader) const { - if(reader.type() == AVRO_FIXED) { + if (reader.type() == AVRO_FIXED) { return ( (reader.fixedSize() == fixedSize()) && - (reader.name() == name()) - ) ? + (reader.name() == name()) + ) ? RESOLVE_MATCH : RESOLVE_NO_MATCH; } return furtherResolution(reader); } -SchemaResolution +SchemaResolution NodeSymbolic::resolve(const Node &reader) const { const NodePtr &node = leafAt(0); return node->resolve(reader); } -// Wrap an indentation in a struct for ostream operator<< -struct indent { - indent(int depth) : - d(depth) - { } - int d; -}; - -/// ostream operator for indent -std::ostream& operator <<(std::ostream &os, indent x) -{ - static const std::string spaces(" "); - while(x.d--) { - os << spaces; - } - return os; -} - -void +void NodePrimitive::printJson(std::ostream &os, int depth) const { os << '\"' << type() << '\"'; } -void +void NodeSymbolic::printJson(std::ostream &os, int depth) const { os << '\"' << nameAttribute_.get() << '\"'; @@ -183,7 +233,7 @@ static void printName(std::ostream& os, const Name& n, int depth) os << indent(depth) << "\"name\": \"" << n.simpleName() << "\",\n"; } -void +void NodeRecord::printJson(std::ostream &os, int depth) const { os << "{\n"; @@ -193,14 +243,27 @@ NodeRecord::printJson(std::ostream &os, int depth) const int fields = leafAttributes_.size(); ++depth; - for(int i = 0; i < fields; ++i) { - if(i > 0) { + // Serialize "default" field: + assert(defaultValues.empty() || (defaultValues.size() == fields)); + for (int i = 0; i < fields; ++i) { + if (i > 0) { os << ','; } os << '\n' << indent(depth) << "{\n"; os << indent(++depth) << "\"name\": \"" << leafNameAttributes_.get(i) << "\",\n"; os << indent(depth) << "\"type\": "; leafAttributes_.get(i)->printJson(os, depth); + + if (!defaultValues.empty()) { + if (!defaultValues[i].isUnion() && + defaultValues[i].type() == AVRO_NULL) { + // No "default" field. + } else { + os << ",\n" << indent(depth) << "\"default\": "; + leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os, + depth); + } + } os << '\n'; os << indent(--depth) << '}'; } @@ -208,7 +271,159 @@ NodeRecord::printJson(std::ostream &os, int depth) const os << indent(--depth) << '}'; } -void +void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + assert(isPrimitive(g.type())); + + switch (g.type()) { + case AVRO_NULL: + os << "null"; + break; + case AVRO_BOOL: + os << (g.value<bool>() ? "true" : "false"); + break; + case AVRO_INT: + os << g.value<int32_t>(); + break; + case AVRO_LONG: + os << g.value<int64_t>(); + break; + case AVRO_FLOAT: + os << std::to_string(g.value<float>()); + break; + case AVRO_DOUBLE: + os << std::to_string(g.value<double>()); + break; + case AVRO_STRING: + os << "\"" << escape(g.value<std::string>()) << "\""; + break; + case AVRO_BYTES: { + // Convert to a string: + const std::vector<uint8_t> &vg = g.value<std::vector<uint8_t> >(); + std::string s; + s.resize(vg.size() * kByteStringSize); + for (unsigned int i = 0; i < vg.size(); i++) { + std::string hex_string = intToHex(static_cast<int>(vg[i])); + s.replace(i*kByteStringSize, kByteStringSize, hex_string); + } + os << "\"" << s << "\""; + } break; + default: + break; + } +} + +void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + assert(g.type() == AVRO_ENUM); + os << "\"" << g.value<GenericEnum>().symbol() << "\""; +} + +void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + assert(g.type() == AVRO_FIXED); + // ex: "\uOOff" + // Convert to a string + const std::vector<uint8_t> &vg = g.value<GenericFixed>().value(); + std::string s; + s.resize(vg.size() * kByteStringSize); + for (unsigned int i = 0; i < vg.size(); i++) { + std::string hex_string = intToHex(static_cast<int>(vg[i])); + s.replace(i*kByteStringSize, kByteStringSize, hex_string); + } + os << "\"" << s << "\""; +} + +void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + leafAt(0)->printDefaultToJson(g, os, depth); +} + +void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + assert(g.type() == AVRO_ARRAY); + // ex: "default": [1] + if (g.value<GenericArray>().value().empty()) { + os << "[]"; + } else { + os << "[\n"; + depth++; + + // Serialize all values of the array with recursive calls: + for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) { + if (i > 0) { + os << ",\n"; + } + os << indent(depth); + leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os, + depth); + } + os << "\n" << indent(--depth) << "]"; + } +} + +void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + getNode()->printDefaultToJson(g, os, depth); +} + +void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + assert(g.type() == AVRO_RECORD); + if (g.value<GenericRecord>().fieldCount() == 0) { + os << "{}"; + } else { + os << "{\n"; + + // Serialize all fields of the record with recursive calls: + for (unsigned int i = 0; i < g.value<GenericRecord>().fieldCount(); i++) { + if (i == 0) { + ++depth; + } else { // i > 0 + os << ",\n"; + } + + os << indent(depth) << "\""; + assert(i < leaves()); + os << leafNameAttributes_.get(i); + os << "\": "; + + // Recursive call on child node to be able to get the name attribute + // (In case of a record we need the name of the leaves (contained in + // 'this')) + leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os, + depth); + } + os << "\n" << indent(--depth) << "}"; + } +} + +void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os, + int depth) const { + assert(g.type() == AVRO_MAP); + //{"a": 1} + if (g.value<GenericMap>().value().empty()) { + os << "{}"; + } else { + os << "{\n"; + + for (unsigned int i = 0; i < g.value<GenericMap>().value().size(); i++) { + if (i == 0) { + ++depth; + } else { + os << ",\n"; + } + os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first + << "\": "; + + leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os, + depth); + } + os << "\n" << indent(--depth) << "}"; + } +} + +void NodeEnum::printJson(std::ostream &os, int depth) const { os << "{\n"; @@ -218,8 +433,8 @@ NodeEnum::printJson(std::ostream &os, int depth) const int names = leafNameAttributes_.size(); ++depth; - for(int i = 0; i < names; ++i) { - if(i > 0) { + for (int i = 0; i < names; ++i) { + if (i > 0) { os << ",\n"; } os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"'; @@ -229,7 +444,7 @@ NodeEnum::printJson(std::ostream &os, int depth) const os << indent(--depth) << '}'; } -void +void NodeArray::printJson(std::ostream &os, int depth) const { os << "{\n"; @@ -240,7 +455,7 @@ NodeArray::printJson(std::ostream &os, int depth) const os << indent(depth) << '}'; } -void +void NodeMap::printJson(std::ostream &os, int depth) const { os << "{\n"; @@ -251,14 +466,14 @@ NodeMap::printJson(std::ostream &os, int depth) const os << indent(depth) << '}'; } -void +void NodeUnion::printJson(std::ostream &os, int depth) const { os << "[\n"; int fields = leafAttributes_.size(); ++depth; - for(int i = 0; i < fields; ++i) { - if(i > 0) { + for (int i = 0; i < fields; ++i) { + if (i > 0) { os << ",\n"; } os << indent(depth); @@ -268,7 +483,7 @@ NodeUnion::printJson(std::ostream &os, int depth) const os << indent(--depth) << ']'; } -void +void NodeFixed::printJson(std::ostream &os, int depth) const { os << "{\n"; diff --git a/lang/c++/jsonschemas/bigrecord_r b/lang/c++/jsonschemas/bigrecord_r index 7c477cd0f..5da05222e 100644 --- a/lang/c++/jsonschemas/bigrecord_r +++ b/lang/c++/jsonschemas/bigrecord_r @@ -62,7 +62,7 @@ ] }, "default": { - "s1": "sval", + "s1": "\"sval\"", "d1": 5.67, "i1": 99 } @@ -84,6 +84,17 @@ "items": "double" } }, + { + "name": "myarraywithDefaultValue", + "type": { + "type": "array", + "items": "int" + }, + "default": [ + 2, + 3 + ] + }, { "name": "myenum", "type": { @@ -152,6 +163,15 @@ "name": "md5" } }, + { + "name": "myfixedwithDefaultValue", + "type": [{ + "type": "fixed", + "size": 1, + "name": "val" + }, "null"], + "default": "\u0001" + }, { "name": "anotherint", "type": "int" @@ -159,6 +179,11 @@ { "name": "bytes", "type": "bytes" + }, + { + "name": "byteswithDefaultValue", + "type": ["bytes", "null"], + "default": "\u00ff\u00AA" } ] } diff --git a/lang/c++/test/AvrogencppTests.cc b/lang/c++/test/AvrogencppTests.cc index 1b429433c..46b2fc978 100644 --- a/lang/c++/test/AvrogencppTests.cc +++ b/lang/c++/test/AvrogencppTests.cc @@ -136,9 +136,14 @@ void checkRecord(const T1& r1, const T2& r2) void checkDefaultValues(const testgen_r::RootRecord& r) { - BOOST_CHECK_EQUAL(r.withDefaultValue.s1, "sval"); + BOOST_CHECK_EQUAL(r.withDefaultValue.s1, "\"sval\""); BOOST_CHECK_EQUAL(r.withDefaultValue.i1, 99); BOOST_CHECK_CLOSE(r.withDefaultValue.d1, 5.67, 1e-10); + BOOST_CHECK_EQUAL(r.myarraywithDefaultValue[0], 2); + BOOST_CHECK_EQUAL(r.myarraywithDefaultValue[1], 3); + BOOST_CHECK_EQUAL(r.myfixedwithDefaultValue.get_val()[0], 0x01); + BOOST_CHECK_EQUAL(r.byteswithDefaultValue.get_bytes()[0], 0xff); + BOOST_CHECK_EQUAL(r.byteswithDefaultValue.get_bytes()[1], 0xaa); } @@ -198,6 +203,24 @@ void testResolution() checkRecord(t3, t1); checkDefaultValues(t3); + // Test serialization of default values. + // Serialize to string then compile from string. + std::ostringstream oss; + s_r.toJson(oss); + ValidSchema s_rs = avro::compileJsonSchemaFromString(oss.str()); + + auto_ptr<InputStream> is2 = memoryInputStream(*os); + dd->init(*is2); + rd = resolvingDecoder(s_w, s_rs, dd); + testgen_r::RootRecord t4; + avro::decode(*rd, t4); + checkDefaultValues(t4); + + std::ostringstream oss_r; + std::ostringstream oss_rs; + s_r.toJson(oss_r); + s_rs.toJson(oss_rs); + BOOST_CHECK_EQUAL(oss_r.str(), oss_rs.str()); } void testNamespace() diff --git a/lang/c++/test/CompilerTests.cc b/lang/c++/test/CompilerTests.cc index a40d6a1a7..0b63dbb4e 100644 --- a/lang/c++/test/CompilerTests.cc +++ b/lang/c++/test/CompilerTests.cc @@ -47,7 +47,8 @@ void testEmptyBytesDefault() \"fields\": [\n\ {\n\ \"name\": \"testbytes\",\n\ - \"type\": \"bytes\"\n\ + \"type\": \"bytes\",\n\ + \"default\": \"\"\n\ }\n\ ]\n\ }\n\ diff --git a/lang/c++/test/unittest.cc b/lang/c++/test/unittest.cc index fc81f2b61..83493144b 100644 --- a/lang/c++/test/unittest.cc +++ b/lang/c++/test/unittest.cc @@ -46,22 +46,22 @@ static const uint8_t fixeddata[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; #endif struct TestSchema { - TestSchema() + TestSchema() {} void createExampleSchema() { // First construct our complex data type: avro::RecordSchema myRecord("complex"); - + // Now populate my record with fields (each field is another schema): myRecord.addField("real", avro::DoubleSchema()); myRecord.addField("imaginary", avro::DoubleSchema()); - // The complex record is the same as used above, let's make a schema + // The complex record is the same as used above, let's make a schema // for an array of these record - - avro::ArraySchema complexArray(myRecord); + + avro::ArraySchema complexArray(myRecord); avro::ValidSchema validComplexArray(complexArray); validComplexArray.toJson(std::cout); @@ -99,14 +99,14 @@ struct TestSchema } BOOST_CHECK_EQUAL(caught, true); - record.addField("myenum", myenum); + record.addField("myenum", myenum); UnionSchema onion; onion.addType(NullSchema()); onion.addType(map); onion.addType(FloatSchema()); - - record.addField("myunion", onion); + + record.addField("myunion", onion); RecordSchema nestedRecord("NestedRecord"); nestedRecord.addField("floatInNested", FloatSchema()); @@ -227,7 +227,7 @@ struct TestSchema s.writeBool(true); std::cout << "Fixed16\n"; - + s.writeFixed(fixeddata); std::cout << "Long\n"; @@ -253,7 +253,7 @@ struct TestSchema std::cout << s.buffer(); } - void saveValidatingEncoding(int path) + void saveValidatingEncoding(int path) { std::ofstream out("test.avro"); Serializer<ValidatingWriter> s(schema_); @@ -286,7 +286,7 @@ struct TestSchema void readMap(Parser &p) { int64_t size = 0; - do { + do { printNext(p); size = p.readMapBlockSize(); std::cout << "Size " << size << '\n'; @@ -477,7 +477,7 @@ struct TestNested TestNested() {} - void createSchema() + void createSchema() { std::cout << "TestNested\n"; RecordSchema rec("LongListContainer"); @@ -496,7 +496,7 @@ struct TestNested arrayTree.addField("children", ArraySchema( SymbolicSchema(Name("ArrayTree"), arrayTree.root()))); rec.addField("array_tree", arrayTree); - + schema_.setSchema(rec); schema_.toJson(std::cout); schema_.toFlatList(std::cout); @@ -644,8 +644,8 @@ struct TestNested readArrayRecord(p); p.readRecordEnd(); } - - void readListRecord(Parser<ValidatingReader> &p) + + void readListRecord(Parser<ValidatingReader> &p) { p.readRecord(); int64_t val = p.readLong(); @@ -677,7 +677,7 @@ struct TestNested p.readRecordEnd(); } - void validatingParser(InputBuffer &buf) + void validatingParser(InputBuffer &buf) { Parser<ValidatingReader> p(schema_, buf); readRecord(p); @@ -695,7 +695,7 @@ struct TestNested d.decodeNull(); } bool b = d.decodeBool(); - std::cout << "bval = " << b << '\n'; + std::cout << "bval = " << b << '\n'; } void decodeArrayRecord(Decoder& d) @@ -715,7 +715,7 @@ struct TestNested decodeListRecord(d); decodeArrayRecord(d); } - + void testToScreen() { InputBuffer buf1 = serializeNoRecurse(); InputBuffer buf2 = serializeRecurse(); @@ -727,7 +727,7 @@ struct TestNested void testParseNoRecurse() { std::cout << "ParseNoRecurse\n"; InputBuffer buf = serializeNoRecurse(); - + validatingParser(buf); } @@ -755,7 +755,7 @@ struct TestNested runEncodeDecode(*validatingEncoder(schema_, binaryEncoder()), *validatingDecoder(schema_, binaryDecoder()), encodeNoRecurse); - + } void testDecodeRecurse() @@ -772,7 +772,7 @@ struct TestNested runEncodeDecode(*jsonEncoder(schema_), *jsonDecoder(schema_), encodeNoRecurse); - + } void testDecodeRecurseJson() @@ -804,7 +804,7 @@ struct TestGenerated TestGenerated() {} - void test() + void test() { std::cout << "TestGenerated\n"; @@ -823,7 +823,7 @@ struct TestGenerated struct TestBadStuff { - void testBadFile() + void testBadFile() { std::cout << "TestBadFile\n"; @@ -849,7 +849,7 @@ struct TestBadStuff std::cout << "(intentional) error: " << error << '\n'; } - void test() + void test() { std::cout << "TestBadStuff\n"; testBadFile(); @@ -860,10 +860,10 @@ struct TestBadStuff struct TestResolution { TestResolution() : - int_(IntSchema()), + int_(IntSchema()), long_(LongSchema()), - bool_(BoolSchema()), - float_(FloatSchema()), + bool_(BoolSchema()), + float_(FloatSchema()), double_(DoubleSchema()), mapOfInt_(MapSchema(IntSchema())), @@ -881,7 +881,7 @@ struct TestResolution two.addSymbol("Y"); enumTwo_.setSchema(two); } - + { UnionSchema one; one.addType(IntSchema()); @@ -900,31 +900,31 @@ struct TestResolution return writer.root()->resolve(*reader.root()); } - void test() + void test() { std::cout << "TestResolution\n"; - BOOST_CHECK_EQUAL(resolve(long_, long_), RESOLVE_MATCH); - BOOST_CHECK_EQUAL(resolve(long_, bool_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(bool_, long_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(long_, long_), RESOLVE_MATCH); + BOOST_CHECK_EQUAL(resolve(long_, bool_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(bool_, long_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(int_, long_), RESOLVE_PROMOTABLE_TO_LONG); - BOOST_CHECK_EQUAL(resolve(long_, int_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(int_, long_), RESOLVE_PROMOTABLE_TO_LONG); + BOOST_CHECK_EQUAL(resolve(long_, int_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(int_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); - BOOST_CHECK_EQUAL(resolve(float_, int_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(int_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); + BOOST_CHECK_EQUAL(resolve(float_, int_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(int_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); - BOOST_CHECK_EQUAL(resolve(double_, int_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(int_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); + BOOST_CHECK_EQUAL(resolve(double_, int_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(long_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); - BOOST_CHECK_EQUAL(resolve(float_, long_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(long_, float_), RESOLVE_PROMOTABLE_TO_FLOAT); + BOOST_CHECK_EQUAL(resolve(float_, long_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(long_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); - BOOST_CHECK_EQUAL(resolve(double_, long_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(long_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); + BOOST_CHECK_EQUAL(resolve(double_, long_), RESOLVE_NO_MATCH); - BOOST_CHECK_EQUAL(resolve(float_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); - BOOST_CHECK_EQUAL(resolve(double_, float_), RESOLVE_NO_MATCH); + BOOST_CHECK_EQUAL(resolve(float_, double_), RESOLVE_PROMOTABLE_TO_DOUBLE); + BOOST_CHECK_EQUAL(resolve(double_, float_), RESOLVE_NO_MATCH); BOOST_CHECK_EQUAL(resolve(int_, mapOfInt_), RESOLVE_NO_MATCH); BOOST_CHECK_EQUAL(resolve(mapOfInt_, int_), RESOLVE_NO_MATCH); @@ -975,7 +975,7 @@ struct TestResolution }; boost::unit_test::test_suite* -init_unit_test_suite( int argc, char* argv[] ) +init_unit_test_suite( int argc, char* argv[] ) { using namespace boost::unit_test; ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > C++ should support field default values > --------------------------------------- > > Key: AVRO-1335 > URL: https://issues.apache.org/jira/browse/AVRO-1335 > Project: Avro > Issue Type: Improvement > Components: c++ > Affects Versions: 1.7.4 > Reporter: Bin Guo > Assignee: Victor Mota > Priority: Major > Attachments: AVRO-1335.patch > > > We found that resolvingDecoder could not provide bidirectional compatibility > between different version of schemas. > Especially for records, for example: > {code:title=First schema} > { > "type": "record", > "name": "TestRecord", > "fields": [ > { > "name": "MyData", > "type": { > "type": "record", > "name": "SubData", > "fields": [ > { > "name": "Version1", > "type": "string" > } > ] > } > }, > { > "name": "OtherData", > "type": "string" > } > ] > } > {code} > {code:title=Second schema} > { > "type": "record", > "name": "TestRecord", > "fields": [ > { > "name": "MyData", > "type": { > "type": "record", > "name": "SubData", > "fields": [ > { > "name": "Version1", > "type": "string" > }, > { > "name": "Version2", > "type": "string" > } > ] > } > }, > { > "name": "OtherData", > "type": "string" > } > ] > } > {code} > Say, node A knows only the first schema and node B knows the second schema, > and the second schema has more fields. > Any data generated by node B can be resolved by first schema 'cause the > additional field is marked as skipped. > But data generated by node A can not be resolved by second schema and throws > an exception *"Don't know how to handle excess fields for reader."* > This is because data is resolved exactly according to the auto-generated > codec_traits which trying to read the excess field. > The problem is we just can not only ignore the excess field in record, since > the data after the troublesome record also needs to be resolved. > Actually this problem stucked us for a very long time. -- This message was sent by Atlassian JIRA (v7.6.3#76005)