This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/branch-1.11 by this push:
new 0e89c57b9 AVRO-3547: Add support for custom attributes in Avro Schema
(#1736)
0e89c57b9 is described below
commit 0e89c57b9884d25b47136a3f344a96539c05337e
Author: nileyadav <[email protected]>
AuthorDate: Mon Jul 25 12:36:44 2022 -0700
AVRO-3547: Add support for custom attributes in Avro Schema (#1736)
* Add support for custom attributes in Avro Schema
Custom attributes are allowed at field level as per Avro specification :
https://avro.apache.org/docs/current/spec.html#schemas
But Avro c++ implementation does not support custom attribute at field
level.
Update implementation to support custom attribute at field level.
* Add unit tests
* Avro-3547: Apply suggestions from code review
Co-authored-by: Martin Grigorov <[email protected]>
* Avro-3547: Fix build errors
* Update lang/c++/impl/CustomFields.cc
Co-authored-by: Martin Grigorov <[email protected]>
* Avro-3547: remove unused method
* AVRO-3547: fix build errors and unit test data
* AVRO-3547: Add documentation for CustomFields
Co-authored-by: nileyadav <[email protected]>
Co-authored-by: Martin Grigorov <[email protected]>
(cherry picked from commit 61a1cdcd500356726d4995c19c8a4d5d89989b13)
---
lang/c++/CMakeLists.txt | 1 +
lang/c++/api/CustomFields.hh | 55 ++++++++++++++++++++++
lang/c++/api/Node.hh | 7 +++
lang/c++/api/NodeImpl.hh | 100 ++++++++++++++++++++++++++++------------
lang/c++/api/Schema.hh | 4 ++
lang/c++/impl/Compiler.cc | 38 ++++++++++++++--
lang/c++/impl/CustomFields.cc | 59 ++++++++++++++++++++++++
lang/c++/impl/NodeImpl.cc | 17 ++++++-
lang/c++/impl/Schema.cc | 8 ++++
lang/c++/test/SchemaTests.cc | 16 +++++++
lang/c++/test/unittest.cc | 103 ++++++++++++++++++++++++++++++++++++++++--
11 files changed, 371 insertions(+), 37 deletions(-)
diff --git a/lang/c++/CMakeLists.txt b/lang/c++/CMakeLists.txt
index b02aea2c8..6098613ed 100644
--- a/lang/c++/CMakeLists.txt
+++ b/lang/c++/CMakeLists.txt
@@ -111,6 +111,7 @@ set (AVRO_SOURCE_FILES
impl/json/JsonIO.cc
impl/json/JsonDom.cc
impl/Resolver.cc impl/Validator.cc
+ impl/CustomFields.cc
)
add_library (avrocpp SHARED ${AVRO_SOURCE_FILES})
diff --git a/lang/c++/api/CustomFields.hh b/lang/c++/api/CustomFields.hh
new file mode 100644
index 000000000..01468ff4a
--- /dev/null
+++ b/lang/c++/api/CustomFields.hh
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef avro_CustomFields_hh__
+#define avro_CustomFields_hh__
+
+#include <iostream>
+
+#include "../impl/json/JsonDom.hh"
+
+namespace avro {
+
+// CustomFields class stores avro custom attributes.
+// Each field is represented by a unique name and value.
+// User is supposed to create CustomFields object and then add it to Schema.
+class AVRO_DECL CustomFields {
+ public:
+ // Retrieves the custom field json entity for that fieldName, returns an
+ // null Entity if the field doesn't exist.
+ json::Entity getField(const std::string &fieldName) const;
+
+ // Adds a custom field. If the field already exists, throw an exception.
+ void addField(const std::string &fieldName, const json::Entity
&fieldValue);
+ void addField(const std::string &fieldName, const std::string &fieldValue);
+
+ // Provides a way to iterate over the custom fields or check field size.
+ const std::map<std::string, json::Entity> &fields() const {
+ return fields_;
+ }
+
+ // Prints the json string for the specific field.
+ void printJson(std::ostream& os, const std::string &fieldName) const;
+
+ private:
+ std::map<std::string, json::Entity> fields_;
+};
+
+} // namespace avro
+
+#endif
diff --git a/lang/c++/api/Node.hh b/lang/c++/api/Node.hh
index 3c9389da5..c9af126f1 100644
--- a/lang/c++/api/Node.hh
+++ b/lang/c++/api/Node.hh
@@ -26,6 +26,7 @@
#include <memory>
#include <utility>
+#include "CustomFields.hh"
#include "Exception.hh"
#include "LogicalType.hh"
#include "SchemaResolution.hh"
@@ -153,6 +154,11 @@ public:
}
virtual size_t fixedSize() const = 0;
+ void addCustomAttributesForField(const CustomFields& customAttributes) {
+ checkLock();
+ doAddCustomAttribute(customAttributes);
+ }
+
virtual bool isValid() const = 0;
virtual SchemaResolution resolve(const Node &reader) const = 0;
@@ -185,6 +191,7 @@ protected:
virtual void doAddLeaf(const NodePtr &newLeaf) = 0;
virtual void doAddName(const std::string &name) = 0;
virtual void doSetFixedSize(size_t size) = 0;
+ virtual void doAddCustomAttribute(const CustomFields& customFields) = 0;
private:
const Type type_;
diff --git a/lang/c++/api/NodeImpl.hh b/lang/c++/api/NodeImpl.hh
index c74d39e6b..62e62eb65 100644
--- a/lang/c++/api/NodeImpl.hh
+++ b/lang/c++/api/NodeImpl.hh
@@ -32,6 +32,7 @@
#include "Node.hh"
#include "NodeConcepts.hh"
+#include "CustomFields.hh"
namespace avro {
@@ -42,6 +43,7 @@ template<
class NameConcept,
class LeavesConcept,
class LeafNamesConcept,
+ class MultiAttributesConcept,
class SizeConcept>
class NodeImpl : public Node {
@@ -51,17 +53,20 @@ protected:
docAttribute_(),
leafAttributes_(),
leafNameAttributes_(),
+ customAttributes_(),
sizeAttribute_() {}
NodeImpl(Type type,
const NameConcept &name,
const LeavesConcept &leaves,
const LeafNamesConcept &leafNames,
+ const MultiAttributesConcept &customAttributes,
const SizeConcept &size) : Node(type),
nameAttribute_(name),
docAttribute_(),
leafAttributes_(leaves),
leafNameAttributes_(leafNames),
+ customAttributes_(customAttributes),
sizeAttribute_(size) {}
// Ctor with "doc"
@@ -70,11 +75,13 @@ protected:
const concepts::SingleAttribute<std::string> &doc,
const LeavesConcept &leaves,
const LeafNamesConcept &leafNames,
+ const MultiAttributesConcept &customAttributes,
const SizeConcept &size) : Node(type),
nameAttribute_(name),
docAttribute_(doc),
leafAttributes_(leaves),
leafNameAttributes_(leafNames),
+ customAttributes_(customAttributes),
sizeAttribute_(size) {}
void swap(NodeImpl &impl) {
@@ -83,6 +90,7 @@ protected:
std::swap(leafAttributes_, impl.leafAttributes_);
std::swap(leafNameAttributes_, impl.leafNameAttributes_);
std::swap(sizeAttribute_, impl.sizeAttribute_);
+ std::swap(customAttributes_, impl.customAttributes_);
std::swap(nameIndex_, impl.nameIndex_);
}
@@ -152,6 +160,10 @@ protected:
void setLeafToSymbolic(size_t index, const NodePtr &node) override;
+ void doAddCustomAttribute(const CustomFields &customfields) override {
+ customAttributes_.add(customfields);
+ }
+
SchemaResolution furtherResolution(const Node &reader) const {
SchemaResolution match = RESOLVE_NO_MATCH;
@@ -195,6 +207,7 @@ protected:
LeavesConcept leafAttributes_;
LeafNamesConcept leafNameAttributes_;
+ MultiAttributesConcept customAttributes_;
SizeConcept sizeAttribute_;
concepts::NameIndexConcept<LeafNamesConcept> nameIndex_;
};
@@ -210,19 +223,21 @@ using MultiLeaves = concepts::MultiAttribute<NodePtr>;
using NoLeafNames = concepts::NoAttribute<std::string>;
using LeafNames = concepts::MultiAttribute<std::string>;
+using MultiAttributes = concepts::MultiAttribute<CustomFields>;
+using NoAttributes = concepts::NoAttribute<CustomFields>;
using NoSize = concepts::NoAttribute<int>;
using HasSize = concepts::SingleAttribute<int>;
-using NodeImplPrimitive = NodeImpl<NoName, NoLeaves, NoLeafNames, NoSize>;
-using NodeImplSymbolic = NodeImpl<HasName, NoLeaves, NoLeafNames, NoSize>;
+using NodeImplPrimitive = NodeImpl<NoName, NoLeaves, NoLeafNames,
MultiAttributes, NoSize>;
+using NodeImplSymbolic = NodeImpl<HasName, NoLeaves, NoLeafNames,
NoAttributes, NoSize>;
-using NodeImplRecord = NodeImpl<HasName, MultiLeaves, LeafNames, NoSize>;
-using NodeImplEnum = NodeImpl<HasName, NoLeaves, LeafNames, NoSize>;
-using NodeImplArray = NodeImpl<NoName, SingleLeaf, NoLeafNames, NoSize>;
-using NodeImplMap = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoSize>;
-using NodeImplUnion = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoSize>;
-using NodeImplFixed = NodeImpl<HasName, NoLeaves, NoLeafNames, HasSize>;
+using NodeImplRecord = NodeImpl<HasName, MultiLeaves, LeafNames,
MultiAttributes, NoSize>;
+using NodeImplEnum = NodeImpl<HasName, NoLeaves, LeafNames, NoAttributes,
NoSize>;
+using NodeImplArray = NodeImpl<NoName, SingleLeaf, NoLeafNames, NoAttributes,
NoSize>;
+using NodeImplMap = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoAttributes,
NoSize>;
+using NodeImplUnion = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoAttributes,
NoSize>;
+using NodeImplFixed = NodeImpl<HasName, NoLeaves, NoLeafNames, NoAttributes,
HasSize>;
class AVRO_DECL NodePrimitive : public NodeImplPrimitive {
public:
@@ -245,9 +260,9 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic {
public:
NodeSymbolic() : NodeImplSymbolic(AVRO_SYMBOLIC) {}
- explicit NodeSymbolic(const HasName &name) :
NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()) {}
+ explicit NodeSymbolic(const HasName &name) :
NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(),
NoAttributes(), NoSize()) {}
- NodeSymbolic(const HasName &name, const NodePtr &n) :
NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()),
actualNode_(n) {}
+ NodeSymbolic(const HasName &name, const NodePtr &n) :
NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(),
NoAttributes(), NoSize()), actualNode_(n) {}
SchemaResolution resolve(const Node &reader) const override;
void printJson(std::ostream &os, size_t depth) const override;
@@ -289,15 +304,27 @@ public:
NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves
&fields,
const LeafNames &fieldsNames,
- std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD,
name, doc, fields, fieldsNames, NoSize()),
+ std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD,
name, doc, fields, fieldsNames, MultiAttributes(), NoSize()),
defaultValues(std::move(dv)) {
- for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
- if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
- throw Exception(boost::format(
- "Cannot add duplicate field: %1%")
- % leafNameAttributes_.get(i));
- }
- }
+ leafNameCheck();
+ }
+
+ NodeRecord(const HasName &name, const MultiLeaves &fields,
+ const LeafNames &fieldsNames,
+ const std::vector<GenericDatum>& dv,
+ const MultiAttributes &customAttributes) :
+ NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames,
customAttributes, NoSize()),
+ defaultValues(dv) {
+ leafNameCheck();
+ }
+
+ NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves
&fields,
+ const LeafNames &fieldsNames,
+ const std::vector<GenericDatum>& dv,
+ const MultiAttributes &customAttributes) :
+ NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames,
customAttributes, NoSize()),
+ defaultValues(dv) {
+ leafNameCheck();
}
void swap(NodeRecord &r) {
@@ -310,7 +337,10 @@ public:
void printJson(std::ostream &os, size_t depth) const override;
bool isValid() const override {
- return ((nameAttribute_.size() == 1) && (leafAttributes_.size() ==
leafNameAttributes_.size()));
+ return ((nameAttribute_.size() == 1) &&
+ (leafAttributes_.size() == leafNameAttributes_.size()) &&
+ (customAttributes_.size() == 0 ||
+ customAttributes_.size() == leafAttributes_.size()));
}
const GenericDatum &defaultValueAt(size_t index) override {
@@ -318,13 +348,25 @@ public:
}
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t
depth) const override;
+
+private:
+ // check if leaf name is valid Name and is not duplicate
+ void leafNameCheck() {
+ for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
+ if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
+ throw Exception(boost::format(
+ "Cannot add duplicate field: %1%")
+ % leafNameAttributes_.get(i));
+ }
+ }
+ }
};
class AVRO_DECL NodeEnum : public NodeImplEnum {
public:
NodeEnum() : NodeImplEnum(AVRO_ENUM) {}
- NodeEnum(const HasName &name, const LeafNames &symbols) :
NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize()) {
+ NodeEnum(const HasName &name, const LeafNames &symbols) :
NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoAttributes(), NoSize()) {
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
throw Exception(boost::format("Cannot add duplicate enum:
%1%") % leafNameAttributes_.get(i));
@@ -348,7 +390,7 @@ class AVRO_DECL NodeArray : public NodeImplArray {
public:
NodeArray() : NodeImplArray(AVRO_ARRAY) {}
- explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY,
NoName(), items, NoLeafNames(), NoSize()) {}
+ explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY,
NoName(), items, NoLeafNames(), NoAttributes(), NoSize()) {}
SchemaResolution resolve(const Node &reader) const override;
@@ -365,7 +407,7 @@ class AVRO_DECL NodeMap : public NodeImplMap {
public:
NodeMap();
- explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP,
NoName(), MultiLeaves(values), NoLeafNames(), NoSize()) {
+ explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP,
NoName(), MultiLeaves(values), NoLeafNames(), NoAttributes(), NoSize()) {
// need to add the key for the map too
NodePtr key(new NodePrimitive(AVRO_STRING));
doAddLeaf(key);
@@ -389,7 +431,7 @@ class AVRO_DECL NodeUnion : public NodeImplUnion {
public:
NodeUnion() : NodeImplUnion(AVRO_UNION) {}
- explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION,
NoName(), types, NoLeafNames(), NoSize()) {}
+ explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION,
NoName(), types, NoLeafNames(), NoAttributes(), NoSize()) {}
SchemaResolution resolve(const Node &reader) const override;
@@ -458,7 +500,7 @@ class AVRO_DECL NodeFixed : public NodeImplFixed {
public:
NodeFixed() : NodeImplFixed(AVRO_FIXED) {}
- NodeFixed(const HasName &name, const HasSize &size) :
NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size) {}
+ NodeFixed(const HasName &name, const HasSize &size) :
NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), NoAttributes(),
size) {}
SchemaResolution resolve(const Node &reader) const override;
@@ -472,9 +514,9 @@ public:
void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t
depth) const override;
};
-template<class A, class B, class C, class D>
+template<class A, class B, class C, class D, class E>
inline void
-NodeImpl<A, B, C, D>::setLeafToSymbolic(size_t index, const NodePtr &node) {
+NodeImpl<A, B, C, D, E>::setLeafToSymbolic(size_t index, const NodePtr &node) {
if (!B::hasAttribute) {
throw Exception("Cannot change leaf node for nonexistent leaf");
}
@@ -490,15 +532,15 @@ NodeImpl<A, B, C, D>::setLeafToSymbolic(size_t index,
const NodePtr &node) {
replaceNode = symbol;
}
-template<class A, class B, class C, class D>
+template<class A, class B, class C, class D, class E>
inline void
-NodeImpl<A, B, C, D>::printBasicInfo(std::ostream &os) const {
+NodeImpl<A, B, C, D, E>::printBasicInfo(std::ostream &os) const {
os << type();
if (hasName()) {
os << ' ' << nameAttribute_.get();
}
- if (D::hasAttribute) {
+ if (E::hasAttribute) {
os << " " << sizeAttribute_.get();
}
os << '\n';
diff --git a/lang/c++/api/Schema.hh b/lang/c++/api/Schema.hh
index abd646f9f..fa504815a 100644
--- a/lang/c++/api/Schema.hh
+++ b/lang/c++/api/Schema.hh
@@ -21,6 +21,7 @@
#include "Config.hh"
#include "NodeImpl.hh"
+#include "CustomFields.hh"
#include <string>
/// \file
@@ -100,6 +101,9 @@ class AVRO_DECL RecordSchema : public Schema {
public:
explicit RecordSchema(const std::string &name);
void addField(const std::string &name, const Schema &fieldSchema);
+ // Add a field with custom attributes
+ void addField(const std::string &name, const Schema &fieldSchema,
+ const CustomFields &customFields);
std::string getDoc() const;
void setDoc(const std::string &);
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index d76546f31..014229e11 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -17,9 +17,12 @@
*/
#include <boost/algorithm/string/replace.hpp>
#include <sstream>
+#include <unordered_set>
#include <utility>
#include "Compiler.hh"
+#include "CustomFields.hh"
+#include "NodeConcepts.hh"
#include "Schema.hh"
#include "Stream.hh"
#include "Types.hh"
@@ -146,7 +149,8 @@ struct Field {
const string name;
const NodePtr schema;
const GenericDatum defaultValue;
- Field(string n, NodePtr v, GenericDatum dv) : name(std::move(n)),
schema(std::move(v)), defaultValue(std::move(dv)) {}
+ const CustomFields customFields;
+ Field(string n, NodePtr v, GenericDatum dv, const CustomFields& cf) :
name(std::move(n)), schema(std::move(v)), defaultValue(std::move(dv)),
customFields(std::move(cf)) {}
};
static void assertType(const Entity &e, EntityType et) {
@@ -256,6 +260,26 @@ static GenericDatum makeGenericDatum(NodePtr n,
}
}
+static const std::unordered_set<std::string>& getKnownFields() {
+ // return known fields
+ static const std::unordered_set<std::string> kKnownFields =
+ {"name", "type", "default", "doc", "size", "logicalType",
+ "values", "precision", "scale", "namespace"};
+ return kKnownFields;
+}
+
+static void getCustomAttributes(const Object& m, CustomFields
&customAttributes)
+{
+ // Don't add known fields on primitive type and fixed type into custom
+ // fields.
+ const std::unordered_set<std::string>& kKnownFields = getKnownFields();
+ for (const auto &entry : m) {
+ if (kKnownFields.find(entry.first) == kKnownFields.end()) {
+ customAttributes.addField(entry.first, entry.second);
+ }
+ }
+}
+
static Field makeField(const Entity &e, SymbolTable &st, const string &ns) {
const Object &m = e.objectValue();
const string &n = getStringField(e, m, "name");
@@ -266,7 +290,11 @@ static Field makeField(const Entity &e, SymbolTable &st,
const string &ns) {
node->setDoc(getDocField(e, m));
}
GenericDatum d = (it2 == m.end()) ? GenericDatum() :
makeGenericDatum(node, it2->second, st);
- return Field(n, node, d);
+ // Get custom attributes
+ CustomFields customAttributes;
+ getCustomAttributes(m, customAttributes);
+
+ return Field(n, node, d, customAttributes);
}
// Extended makeRecordNode (with doc).
@@ -276,6 +304,7 @@ static NodePtr makeRecordNode(const Entity &e, const Name
&name,
const Array &v = getArrayField(e, m, "fields");
concepts::MultiAttribute<string> fieldNames;
concepts::MultiAttribute<NodePtr> fieldValues;
+ concepts::MultiAttribute<CustomFields> customAttributes;
vector<GenericDatum> defaultValues;
for (const auto &it : v) {
@@ -283,14 +312,15 @@ static NodePtr makeRecordNode(const Entity &e, const Name
&name,
fieldNames.add(f.name);
fieldValues.add(f.schema);
defaultValues.push_back(f.defaultValue);
+ customAttributes.add(f.customFields);
}
NodeRecord *node;
if (doc == nullptr) {
node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames,
- defaultValues);
+ defaultValues, customAttributes);
} else {
node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc),
- fieldValues, fieldNames, defaultValues);
+ fieldValues, fieldNames, defaultValues,
customAttributes);
}
return NodePtr(node);
}
diff --git a/lang/c++/impl/CustomFields.cc b/lang/c++/impl/CustomFields.cc
new file mode 100644
index 000000000..04541daae
--- /dev/null
+++ b/lang/c++/impl/CustomFields.cc
@@ -0,0 +1,59 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "CustomFields.hh"
+#include <map>
+#include <memory>
+#include "Exception.hh"
+
+namespace avro {
+
+using json::Entity;
+
+Entity CustomFields::getField(const std::string &fieldName) const {
+ std::map<std::string, Entity>::const_iterator iter =
+ fields_.find(fieldName);
+ if (iter == fields_.end()) {
+ return Entity();
+ }
+ return iter->second;
+}
+
+void CustomFields::addField(const std::string& fieldName,
+ const std::string& fieldValue) {
+ addField(fieldName,
+ json::Entity(std::make_shared<std::string>(fieldValue)));
+}
+
+void CustomFields::addField(const std::string& fieldName,
+ const Entity& fieldValue) {
+ auto iter_and_find =
+ fields_.insert(std::pair<std::string, Entity>(fieldName, fieldValue));
+ if (!iter_and_find.second) {
+ throw Exception(fieldName + " already exists and cannot be added");
+ }
+}
+
+void CustomFields::printJson(std::ostream& os,
+ const std::string& fieldName) const {
+ if (fields_.find(fieldName) == fields_.end()) {
+ throw Exception(fieldName + " doesn't exist");
+ }
+ os << "\"" << fieldName << "\": " << fields_.at(fieldName).toString();
+}
+} // namespace avro
diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc
index 810e1641e..37c855505 100644
--- a/lang/c++/impl/NodeImpl.cc
+++ b/lang/c++/impl/NodeImpl.cc
@@ -83,6 +83,17 @@ std::ostream &operator<<(std::ostream &os, indent x) {
return os;
}
+void printCustomFields(const CustomFields& customFields, int depth,
+ std::ostream &os) {
+ std::map<std::string, json::Entity>::const_iterator iter =
+ customFields.fields().begin();
+ while (iter != customFields.fields().end()) {
+ os << ",\n" << indent(depth);
+ customFields.printJson(os, iter->first);
+ ++iter;
+ }
+}
+
} // anonymous namespace
const int kByteStringSize = 6;
@@ -254,6 +265,7 @@ void NodeRecord::printJson(std::ostream &os, size_t depth)
const {
++depth;
// Serialize "default" field:
assert(defaultValues.empty() || (defaultValues.size() == fields));
+ assert(customAttributes_.size() == 0 || customAttributes_.size() ==
fields);
for (size_t i = 0; i < fields; ++i) {
if (i > 0) {
os << ',';
@@ -274,6 +286,9 @@ void NodeRecord::printJson(std::ostream &os, size_t depth)
const {
depth);
}
}
+ if(customAttributes_.size() == fields) {
+ printCustomFields(customAttributes_.get(i), depth, os);
+ }
os << '\n';
os << indent(--depth) << '}';
}
@@ -412,7 +427,7 @@ void NodeRecord::printDefaultToJson(const GenericDatum &g,
std::ostream &os,
NodeRecord::NodeRecord(const HasName &name,
const MultiLeaves &fields,
const LeafNames &fieldsNames,
- std::vector<GenericDatum> dv) :
NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()),
+ std::vector<GenericDatum> dv) :
NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, MultiAttributes(),
NoSize()),
defaultValues(std::move(dv)) {
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
diff --git a/lang/c++/impl/Schema.cc b/lang/c++/impl/Schema.cc
index 42245292e..fa90d3422 100644
--- a/lang/c++/impl/Schema.cc
+++ b/lang/c++/impl/Schema.cc
@@ -19,6 +19,7 @@
#include <utility>
#include "Schema.hh"
+#include "CustomFields.hh"
namespace avro {
@@ -27,11 +28,18 @@ RecordSchema::RecordSchema(const std::string &name) :
Schema(new NodeRecord) {
}
void RecordSchema::addField(const std::string &name, const Schema
&fieldSchema) {
+ const CustomFields emptyCustomField;
+ addField(name, fieldSchema, emptyCustomField);
+}
+
+void RecordSchema::addField(const std::string &name, const Schema
&fieldSchema, const CustomFields &customFields) {
// add the name first. it will throw if the name is a duplicate, preventing
// the leaf from being added
node_->addName(name);
node_->addLeaf(fieldSchema.root());
+
+ node_->addCustomAttributesForField(customFields);
}
std::string RecordSchema::getDoc() const {
diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc
index 3195eabd0..b1260f567 100755
--- a/lang/c++/test/SchemaTests.cc
+++ b/lang/c++/test/SchemaTests.cc
@@ -105,6 +105,13 @@ const char *basicSchemas[] = {
// namespace with '$' in it.
"{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"a.b$\",\"fields\":"
"[{\"name\":\"f\",\"type\":\"long\"}]}",
+
+ // Custom attribute(s) for field in record
+ "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
+ "[{\"name\": \"f1\",\"type\": \"long\",\"extra field\": \"1\"}]}",
+ "{\"type\": \"record\",\"name\": \"Test\",\"fields\": "
+ "[{\"name\": \"f1\",\"type\": \"long\","
+ "\"extra field1\": \"1\",\"extra field2\": \"2\"}]}"
};
const char *basicSchemaErrors[] = {
@@ -217,6 +224,15 @@ const char *roundTripSchemas[] = {
// namespace with '$' in it.
"{\"type\":\"record\",\"namespace\":\"a.b$\",\"name\":\"Test\",\"fields\":"
"[{\"name\":\"f\",\"type\":\"long\"}]}",
+
+ // Custom fields
+ "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
+ "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"},"
+ "{\"name\":\"f2\",\"type\":\"int\"}]}",
+ "{\"type\":\"record\",\"name\":\"Test\",\"fields\":"
+ "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"},"
+ "{\"name\":\"f2\",\"type\":\"int\","
+ "\"extra_field1\":\"21\",\"extra_field2\":\"22\"}]}",
};
const char *malformedLogicalTypes[] = {
diff --git a/lang/c++/test/unittest.cc b/lang/c++/test/unittest.cc
index 82b92f520..df7b21b84 100644
--- a/lang/c++/test/unittest.cc
+++ b/lang/c++/test/unittest.cc
@@ -18,6 +18,8 @@
#include <boost/test/included/unit_test_framework.hpp>
#include <iostream>
+#include <memory>
+#include <string>
#include "Compiler.hh"
#include "Decoder.hh"
@@ -36,6 +38,11 @@
#include "buffer/BufferStream.hh"
#include "AvroSerialize.hh"
+#include "CustomFields.hh"
+#include "NodeConcepts.hh"
+#include "NodeImpl.hh"
+#include "Types.hh"
+
using namespace avro;
@@ -67,7 +74,20 @@ struct TestSchema {
void buildSchema() {
RecordSchema record("RootRecord");
- record.addField("mylong", LongSchema());
+ CustomFields customFieldLong;
+ customFieldLong.addField("extra_info_mylong", std::string("it's a long
field"));
+ // Validate that adding a custom attribute with same name is not
allowed
+ bool caught = false;
+ try {
+ customFieldLong.addField("extra_info_mylong",
std::string("duplicate"));
+ }
+ catch(Exception &e) {
+ std::cout << "(intentional) exception: " << e.what() << '\n';
+ caught = true;
+ }
+ BOOST_CHECK_EQUAL(caught, true);
+ // Add custom attribute for the field
+ record.addField("mylong", LongSchema(), customFieldLong);
IntSchema intSchema;
avro::MapSchema map = MapSchema(IntSchema());
@@ -85,7 +105,7 @@ struct TestSchema {
myenum.addSymbol("two");
myenum.addSymbol("three");
- bool caught = false;
+ caught = false;
try {
myenum.addSymbol("three");
} catch (Exception &e) {
@@ -121,7 +141,12 @@ struct TestSchema {
}
BOOST_CHECK_EQUAL(caught, true);
- record.addField("mylong2", LongSchema());
+ CustomFields customFieldLong2;
+ customFieldLong2.addField("extra_info_mylong2",
+ std::string("it's a long field"));
+ customFieldLong2.addField("more_info_mylong2",
+ std::string("it's still a long field"));
+ record.addField("mylong2", LongSchema(), customFieldLong2);
record.addField("anotherint", intSchema);
@@ -387,6 +412,75 @@ struct TestSchema {
readData(p);
}
+ void testNodeRecord(const NodeRecord &nodeRecord,
+ const std::string &expectedJson)
+ {
+ BOOST_CHECK_EQUAL(nodeRecord.isValid(), true);
+
+ std::ostringstream oss;
+ nodeRecord.printJson(oss, 0);
+ std::string actual = oss.str();
+ actual.erase(std::remove_if(actual.begin(), actual.end(),
+ ::isspace), actual.end());
+
+ std::string expected = expectedJson;
+ expected.erase(std::remove_if(expected.begin(), expected.end(),
+ ::isspace), expected.end());
+
+ BOOST_CHECK_EQUAL(actual, expected);
+ }
+
+ // Create NodeRecord with custom attributes at field level
+ // validate json serialization
+ void checkNodeRecordWithCustomField()
+ {
+ Name recordName("Test");
+ HasName nameConcept(recordName);
+ concepts::MultiAttribute<std::string> fieldNames;
+ concepts::MultiAttribute<NodePtr> fieldValues;
+ std::vector<GenericDatum> defaultValues;
+ concepts::MultiAttribute<CustomFields> customFields;
+
+ CustomFields cf;
+ cf.addField("extra field", std::string("1"));
+ fieldNames.add("f1");
+ fieldValues.add(NodePtr( new NodePrimitive(Type::AVRO_LONG)));
+ customFields.add(cf);
+
+ NodeRecord nodeRecordWithCustomField(nameConcept, fieldValues,
+ fieldNames, defaultValues,
+ customFields);
+ std::string expectedJsonWithCustomField =
+ "{\"type\": \"record\", \"name\": \"Test\",\"fields\": "
+ "[{\"name\": \"f1\", \"type\": \"long\",\"extra field\": \"1\"}]}";
+ testNodeRecord(nodeRecordWithCustomField,
+ expectedJsonWithCustomField);
+ }
+
+ // Create NodeRecord without custom attributes at field level
+ // validate json serialization
+ void checkNodeRecordWithoutCustomField()
+ {
+ Name recordName("Test");
+ HasName nameConcept(recordName);
+ concepts::MultiAttribute<std::string> fieldNames;
+ concepts::MultiAttribute<NodePtr> fieldValues;
+ std::vector<GenericDatum> defaultValues;
+
+ CustomFields cf;
+ cf.addField("extra field", std::string("1"));
+ fieldNames.add("f1");
+ fieldValues.add(NodePtr( new NodePrimitive(Type::AVRO_LONG)));
+
+ NodeRecord nodeRecordWithoutCustomField(nameConcept, fieldValues,
+ fieldNames, defaultValues);
+ std::string expectedJsonWithoutCustomField =
+ "{\"type\": \"record\", \"name\": \"Test\",\"fields\": "
+ "[{\"name\": \"f1\", \"type\": \"long\"}]}";
+ testNodeRecord(nodeRecordWithoutCustomField,
+ expectedJsonWithoutCustomField);
+ }
+
void test() {
std::cout << "Before\n";
schema_.toJson(std::cout);
@@ -408,6 +502,9 @@ struct TestSchema {
readValidatedData();
createExampleSchema();
+
+ checkNodeRecordWithoutCustomField();
+ checkNodeRecordWithCustomField();
}
ValidSchema schema_;