Author: sbanacho
Date: Tue Nov 17 01:11:51 2009
New Revision: 881076
URL: http://svn.apache.org/viewvc?rev=881076&view=rev
Log:
AVRO-204. Change the way symbolic references are tracked.
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/src/c++/api/Compiler.hh
hadoop/avro/trunk/src/c++/api/CompilerNode.hh
hadoop/avro/trunk/src/c++/api/Node.hh
hadoop/avro/trunk/src/c++/api/NodeImpl.hh
hadoop/avro/trunk/src/c++/api/SymbolMap.hh
hadoop/avro/trunk/src/c++/api/Types.hh
hadoop/avro/trunk/src/c++/api/ValidSchema.hh
hadoop/avro/trunk/src/c++/api/Validator.hh
hadoop/avro/trunk/src/c++/impl/Compiler.cc
hadoop/avro/trunk/src/c++/impl/CompilerNode.cc
hadoop/avro/trunk/src/c++/impl/Types.cc
hadoop/avro/trunk/src/c++/impl/ValidSchema.cc
hadoop/avro/trunk/src/c++/impl/Validator.cc
hadoop/avro/trunk/src/c++/scripts/gen-cppcode.py
hadoop/avro/trunk/src/c++/test/unittest.cc
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Nov 17 01:11:51 2009
@@ -66,6 +66,8 @@
AVRO-197. Add mapping of name to index for records and enums. (sbanacho)
+ AVRO-204. Change the way symbolic references are tracked. (sbanacho)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/src/c++/api/Compiler.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Compiler.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Compiler.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Compiler.hh Tue Nov 17 01:11:51 2009
@@ -25,6 +25,7 @@
#include "Types.hh"
#include "Node.hh"
#include "CompilerNode.hh"
+#include "SymbolMap.hh"
namespace avro {
@@ -86,8 +87,9 @@
yyFlexLexer lexer_;
std::string text_;
- NodePtr root_;
- Stack stack_;
+ NodePtr root_;
+ Stack stack_;
+ SymbolMap symbolMap_;
};
class ValidSchema;
Modified: hadoop/avro/trunk/src/c++/api/CompilerNode.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/CompilerNode.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/CompilerNode.hh (original)
+++ hadoop/avro/trunk/src/c++/api/CompilerNode.hh Tue Nov 17 01:11:51 2009
@@ -43,7 +43,8 @@
FIELDS,
VALUES,
ITEMS,
- TYPES
+ TYPES,
+ SYMBOLIC
};
CompilerNode() :
@@ -87,6 +88,9 @@
case TYPES:
typesAttribute_.add(node);
break;
+ case SYMBOLIC:
+ symbolicAttribute_.add(node);
+ break;
default:
throw Exception("Can't add node if the attribute type is not set");
@@ -94,7 +98,7 @@
}
- // attribute used by records, enums, and fixed:
+ // attribute used by records, enums, symbols, and fixed:
concepts::SingleAttribute<std::string> nameAttribute_;
// attribute used by fixed:
@@ -113,6 +117,9 @@
// attribute used by maps:
concepts::SingleAttribute<NodePtr> valuesAttribute_;
+ // attribute used by symbolic names:
+ concepts::SingleAttribute<NodePtr> symbolicAttribute_;
+
// attribute used by unions:
concepts::MultiAttribute<NodePtr> typesAttribute_;
Modified: hadoop/avro/trunk/src/c++/api/Node.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Node.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Node.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Node.hh Tue Nov 17 01:11:51 2009
@@ -113,7 +113,7 @@
friend class ValidSchema;
- virtual void setLeafToSymbolic(int index) = 0;
+ virtual void setLeafToSymbolic(int index, const NodePtr &node) = 0;
void checkLock() const {
if(locked()) {
Modified: hadoop/avro/trunk/src/c++/api/NodeImpl.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeImpl.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeImpl.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeImpl.hh Tue Nov 17 01:11:51 2009
@@ -116,7 +116,7 @@
void printBasicInfo(std::ostream &os) const;
- void setLeafToSymbolic(int index);
+ void setLeafToSymbolic(int index, const NodePtr &node);
NameConcept nameAttribute_;
LeavesConcept leafAttributes_;
@@ -139,7 +139,7 @@
typedef concepts::SingleAttribute<int> HasSize;
typedef NodeImpl< NoName, NoLeaves, NoLeafNames, NoSize >
NodeImplPrimitive;
-typedef NodeImpl< HasName, NoLeaves, NoLeafNames, NoSize >
NodeImplSymbolic;
+typedef NodeImpl< HasName, SingleLeaf, NoLeafNames, NoSize >
NodeImplSymbolic;
typedef NodeImpl< HasName, MultiLeaves, LeafNames, NoSize > NodeImplRecord;
typedef NodeImpl< HasName, NoLeaves, LeafNames, NoSize > NodeImplEnum;
@@ -171,8 +171,8 @@
NodeImplSymbolic(AVRO_SYMBOLIC)
{ }
- explicit NodeSymbolic(const HasName &name) :
- NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(),
NoSize())
+ explicit NodeSymbolic(const HasName &name, const SingleLeaf &node) :
+ NodeImplSymbolic(AVRO_SYMBOLIC, name, node, NoLeafNames(), NoSize())
{ }
void printJson(std::ostream &os, int depth) const;
@@ -319,16 +319,20 @@
template < class A, class B, class C, class D >
inline void
-NodeImpl<A,B,C,D>::setLeafToSymbolic(int index)
+NodeImpl<A,B,C,D>::setLeafToSymbolic(int index, const NodePtr &node)
{
if(!B::hasAttribute) {
throw Exception("Cannot change leaf node for nonexistent leaf");
}
NodePtr symbol(new NodeSymbolic);
- NodePtr &node = const_cast<NodePtr &>(leafAttributes_.get(index));
+ NodePtr &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index));
+ if(replaceNode->name() != node->name()) {
+ throw Exception("Symbolic name does not match the name of the schema
it references");
+ }
symbol->setName(node->name());
- node = symbol;
+ symbol->addLeaf(node);
+ replaceNode = symbol;
}
template < class A, class B, class C, class D >
@@ -349,7 +353,7 @@
if( C::hasAttribute ) {
os << "name " << nameAt(i) << '\n';
}
- if( leafAttributes_.hasAttribute) {
+ if( type() != AVRO_SYMBOLIC && leafAttributes_.hasAttribute) {
leafAt(i)->printBasicInfo(os);
}
}
Modified: hadoop/avro/trunk/src/c++/api/SymbolMap.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/SymbolMap.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/SymbolMap.hh (original)
+++ hadoop/avro/trunk/src/c++/api/SymbolMap.hh Tue Nov 17 01:11:51 2009
@@ -42,6 +42,9 @@
bool registerSymbol(const NodePtr &node) {
+ if(node->type() == AVRO_SYMBOLIC) {
+ throw Exception("Node must not be a symbolic name");
+ }
const std::string name = node->name();
if(name.empty()) {
throw Exception("Node must have a name to be registered");
Modified: hadoop/avro/trunk/src/c++/api/Types.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Types.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Types.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Types.hh Tue Nov 17 01:11:51 2009
@@ -41,9 +41,12 @@
AVRO_UNION,
AVRO_FIXED,
- AVRO_SYMBOLIC,
+ AVRO_NUM_TYPES, // marker
+
+ // The following is a pseudo-type used in implementation
+
+ AVRO_SYMBOLIC = AVRO_NUM_TYPES
- AVRO_NUM_TYPES,
};
inline bool isPrimitive(Type t) {
@@ -58,7 +61,12 @@
return (t >= AVRO_STRING) && (t < AVRO_NUM_TYPES);
}
-std::ostream &operator<< (std::ostream &os, const avro::Type type);
+inline bool isAvroTypeOrPseudoType(Type t) {
+ return (t >= AVRO_STRING) && (t <= AVRO_NUM_TYPES);
+}
+
+
+std::ostream &operator<< (std::ostream &os, avro::Type type);
/// define a type to identify Null in template functions
struct Null {};
Modified: hadoop/avro/trunk/src/c++/api/ValidSchema.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/ValidSchema.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/ValidSchema.hh (original)
+++ hadoop/avro/trunk/src/c++/api/ValidSchema.hh Tue Nov 17 01:11:51 2009
@@ -22,11 +22,11 @@
#include <boost/noncopyable.hpp>
#include "Node.hh"
-#include "SymbolMap.hh"
namespace avro {
class Schema;
+class SymbolMap;
/// A ValidSchema is basically a non-mutable Schema that has passed some
/// minumum of sanity checks. Once valididated, any Schema that is part of
@@ -56,15 +56,10 @@
void toFlatList(std::ostream &os) const;
- NodePtr followSymbol(const std::string &name) const {
- return symbolMap_.locateSymbol(name);
- }
-
protected:
- bool validate(const NodePtr &node);
+ bool validate(const NodePtr &node, SymbolMap &symbolMap);
- SymbolMap symbolMap_;
NodePtr root_;
};
Modified: hadoop/avro/trunk/src/c++/api/Validator.hh
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Validator.hh?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Validator.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Validator.hh Tue Nov 17 01:11:51 2009
@@ -39,7 +39,7 @@
class Validator : private boost::noncopyable
{
- typedef uint64_t flag_t;
+ typedef uint32_t flag_t;
public:
Modified: hadoop/avro/trunk/src/c++/impl/Compiler.cc
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Compiler.cc?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Compiler.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Compiler.cc Tue Nov 17 01:11:51 2009
@@ -71,6 +71,13 @@
else {
stack_.back().addNode(node);
}
+
+ if(node->hasName() && node->type() != AVRO_SYMBOLIC) {
+ bool registered = symbolMap_.registerSymbol(node);
+ if(!registered) {
+ throw Exception(boost::format("Symbol %1% already exists") %
node->name());
+ }
+ }
}
void
@@ -120,8 +127,16 @@
#ifdef DEBUG_VERBOSE
std::cerr << "Adding named type " << text_ << '\n';
#endif
- stack_.back().setType(AVRO_SYMBOLIC);
- stack_.back().nameAttribute_.add(text_);
+
+ NodePtr node = symbolMap_.locateSymbol(text_);
+ if(node) {
+ stack_.back().setType(AVRO_SYMBOLIC);
+ stack_.back().nameAttribute_.add(text_);
+ stack_.back().symbolicAttribute_.add(node);
+ }
+ else {
+ throw Exception(boost::format("Could not resolve symbolic name %1%") %
text_);
+ }
}
void
Modified: hadoop/avro/trunk/src/c++/impl/CompilerNode.cc
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/CompilerNode.cc?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/CompilerNode.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/CompilerNode.cc Tue Nov 17 01:11:51 2009
@@ -54,7 +54,7 @@
break;
case AVRO_SYMBOLIC:
- ptr.reset ( new NodeSymbolic(node.nameAttribute_));
+ ptr.reset ( new NodeSymbolic(node.nameAttribute_,
node.symbolicAttribute_));
break;
default:
Modified: hadoop/avro/trunk/src/c++/impl/Types.cc
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Types.cc?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Types.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Types.cc Tue Nov 17 01:11:51 2009
@@ -41,15 +41,18 @@
"symbolic"
};
-BOOST_STATIC_ASSERT( (sizeof(typeToString)/sizeof(std::string)) ==
(AVRO_NUM_TYPES) );
+BOOST_STATIC_ASSERT( (sizeof(typeToString)/sizeof(std::string)) ==
(AVRO_NUM_TYPES+1) );
} // namespace strings
-BOOST_STATIC_ASSERT( AVRO_NUM_TYPES < 64 );
-std::ostream &operator<< (std::ostream &os, const Type type)
+// this static assert exists because a 32 bit integer is used as a bit-flag
for each type,
+// and it would be a problem for this flag if we ever supported more than 32
types
+BOOST_STATIC_ASSERT( AVRO_NUM_TYPES < 32 );
+
+std::ostream &operator<< (std::ostream &os, Type type)
{
- if(isAvroType(type)) {
+ if(isAvroTypeOrPseudoType(type)) {
os << strings::typeToString[type];
}
else {
Modified: hadoop/avro/trunk/src/c++/impl/ValidSchema.cc
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/ValidSchema.cc?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/ValidSchema.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/ValidSchema.cc Tue Nov 17 01:11:51 2009
@@ -19,6 +19,7 @@
#include <boost/format.hpp>
#include "ValidSchema.hh"
+#include "SymbolMap.hh"
#include "Schema.hh"
#include "Node.hh"
@@ -27,7 +28,8 @@
ValidSchema::ValidSchema(const Schema &schema) :
root_(schema.root())
{
- validate(root_);
+ SymbolMap symbolMap;
+ validate(root_, symbolMap);
}
ValidSchema::ValidSchema() :
@@ -38,12 +40,13 @@
ValidSchema::setSchema(const Schema &schema)
{
const NodePtr &node(schema.root());
- validate(schema.root());
+ SymbolMap symbolMap;
+ validate(schema.root(), symbolMap);
root_ = node;
}
bool
-ValidSchema::validate(const NodePtr &node)
+ValidSchema::validate(const NodePtr &node, SymbolMap &symbolMap)
{
if(!node) {
root_.reset(new NodePrimitive(AVRO_NULL));
@@ -54,12 +57,12 @@
}
if(node->hasName()) {
if(node->type() == AVRO_SYMBOLIC) {
- if(!symbolMap_.hasSymbol(node->name())) {
+ if(!symbolMap.hasSymbol(node->name())) {
throw Exception( boost::format("Symbolic name \"%1%\" is
unknown") % node->name());
}
return true;
}
- bool registered = symbolMap_.registerSymbol(node);
+ bool registered = symbolMap.registerSymbol(node);
if(!registered) {
return false;
}
@@ -69,8 +72,16 @@
for(size_t i = 0; i < leaves; ++i) {
const NodePtr &leaf(node->leafAt(i));
- if(! validate(leaf)) {
- node->setLeafToSymbolic(i);
+ if(! validate(leaf, symbolMap)) {
+
+ // if validate returns false it means a node with this name already
+ // existed in the map, instead of keeping this node twice in the
+ // map (which could potentially create circular shared pointer
+ // links that could not be easily freed), replace this node with a
+ // symbolic link to the original one.
+
+ NodePtr redirect = symbolMap.locateSymbol(leaf->name());
+ node->setLeafToSymbolic(i, redirect);
}
}
Modified: hadoop/avro/trunk/src/c++/impl/Validator.cc
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Validator.cc?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Validator.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Validator.cc Tue Nov 17 01:11:51 2009
@@ -173,8 +173,7 @@
&Validator::countingAdvance,
&Validator::countingAdvance,
&Validator::unionAdvance,
- &Validator::fixedAdvance,
- 0 // symbolic
+ &Validator::fixedAdvance
};
BOOST_STATIC_ASSERT( (sizeof(funcs)/sizeof(AdvanceFunc)) ==
(AVRO_NUM_TYPES) );
@@ -231,8 +230,7 @@
typeToFlag(AVRO_ARRAY),
typeToFlag(AVRO_MAP),
typeToFlag(AVRO_UNION),
- typeToFlag(AVRO_FIXED),
- 0
+ typeToFlag(AVRO_FIXED)
};
BOOST_STATIC_ASSERT( (sizeof(flags)/sizeof(flag_t)) == (AVRO_NUM_TYPES) );
@@ -245,13 +243,13 @@
nextType_ = node->type();
if(nextType_ == AVRO_SYMBOLIC) {
- NodePtr symNode ( schema_.followSymbol(node->name()) );
+ NodePtr symNode(node->leafAt(0));
assert(symNode);
setupOperation(symNode);
return;
}
- assert(nextType_ < AVRO_NUM_TYPES);
+ assert(nextType_ < AVRO_SYMBOLIC);
setupFlag(nextType_);
Modified: hadoop/avro/trunk/src/c++/scripts/gen-cppcode.py
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/scripts/gen-cppcode.py?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/scripts/gen-cppcode.py (original)
+++ hadoop/avro/trunk/src/c++/scripts/gen-cppcode.py Tue Nov 17 01:11:51 2009
@@ -49,8 +49,6 @@
return (typeToC[type], type)
def doSymbolic(args):
- line = getNextLine()
- if line[0] != 'end': print 'error'
addForwardDeclare(args[1])
return (args[1], args[1])
@@ -118,8 +116,9 @@
$setfuncs$
#ifdef AVRO_BOOST_NO_ANYREF
template<typename T>
- T getValue() const {
- return boost::any_cast<T>(value);
+ const T &getValue() const {
+ const T *ptr = boost::any_cast<T>(&value);
+ return *ptr;
}
#else
template<typename T>
Modified: hadoop/avro/trunk/src/c++/test/unittest.cc
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/unittest.cc?rev=881076&r1=881075&r2=881076&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/test/unittest.cc (original)
+++ hadoop/avro/trunk/src/c++/test/unittest.cc Tue Nov 17 01:11:51 2009
@@ -261,7 +261,7 @@
printNext(p);
size = p.readMapBlockSize();
std::cout << "Size " << size << '\n';
- for(int32_t i=0; i < size; ++i) {
+ for(int64_t i=0; i < size; ++i) {
std::string key;
printNext(p);
p.readString(key);
@@ -281,7 +281,7 @@
printNext(p);
size = p.readArrayBlockSize();
std::cout << "Size " << size << '\n';
- for(int32_t i=0; i < size; ++i) {
+ for(int64_t i=0; i < size; ++i) {
printNext(p);
d = p.readDouble();
std::cout << i << ":" << d << '\n';
@@ -602,7 +602,7 @@
{
void testBadFile()
{
- std::cout << "TestBadStuff\n";
+ std::cout << "TestBadFile\n";
avro::ValidSchema schema;
std::ifstream in("agjoewejefkjs");