Author: cutting
Date: Sun Jan 10 17:25:30 2010
New Revision: 897663
URL: http://svn.apache.org/viewvc?rev=897663&view=rev
Log:
AVRO-207. Fix Python to detect duplicate enum symbols. Contributed by Jeff
Hammerbacher.
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/src/py/avro/schema.py
hadoop/avro/trunk/src/test/py/test_schema.py
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=897663&r1=897662&r2=897663&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Sun Jan 10 17:25:30 2010
@@ -274,6 +274,9 @@
AVRO-299. Fix Python numeric promotion. (Jeff Hammerbacher via cutting)
+ AVRO-207. Fix Python to detect duplicate enum symbols and add
+ tests for duplicates in unions. (Jeff Hammerbacher via cutting)
+
Avro 1.2.0 (14 October 2009)
INCOMPATIBLE CHANGES
Modified: hadoop/avro/trunk/src/py/avro/schema.py
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/schema.py?rev=897663&r1=897662&r2=897663&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/schema.py (original)
+++ hadoop/avro/trunk/src/py/avro/schema.py Sun Jan 10 17:25:30 2010
@@ -122,7 +122,6 @@
def set_prop(self, key, value):
self.props[key] = value
-
class Name(object):
"""Container class for static methods on Avro names."""
@staticmethod
@@ -304,6 +303,9 @@
elif False in [isinstance(s, basestring) for s in symbols]:
fail_msg = 'Enum Schems requires All symbols to be JSON strings.'
raise AvroException(fail_msg)
+ elif len(set(symbols)) < len(symbols):
+ fail_msg = 'Duplicate symbol: %s' % symbols
+ raise AvroException(fail_msg)
# Call parent ctor
NamedSchema.__init__(self, 'enum', name, namespace, names)
Modified: hadoop/avro/trunk/src/test/py/test_schema.py
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/py/test_schema.py?rev=897663&r1=897662&r2=897663&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/py/test_schema.py (original)
+++ hadoop/avro/trunk/src/test/py/test_schema.py Sun Jan 10 17:25:30 2010
@@ -19,6 +19,12 @@
import unittest
from avro import schema
+def print_test_name(test_name):
+ print ''
+ print test_name
+ print '=' * len(test_name)
+ print ''
+
class ExampleSchema(object):
def __init__(self, schema_string, valid, name='', comment=''):
self._schema_string = schema_string
@@ -90,6 +96,11 @@
{"type": "enum",
"symbols" : ["I", "will", "fail", "no", "name"]}
""", False),
+ ExampleSchema("""\
+ {"type": "enum",
+ "name": "Test"
+ "symbols" : ["AA", "AA"]}
+ """, False),
]
ARRAY_EXAMPLES = [
@@ -111,6 +122,7 @@
UNION_EXAMPLES = [
ExampleSchema('["string", "null", "long"]', True),
ExampleSchema('["null", "null"]', False),
+ ExampleSchema('["long", "long"]', False),
ExampleSchema("""\
[{"type": "array", "items": "long"}
{"type": "array", "items": "string"}]
@@ -252,48 +264,44 @@
# TODO(hammer): use logging module?
class TestSchema(unittest.TestCase):
def test_parse(self):
- debug_msg = "\nTEST PARSE\n"
- print debug_msg
-
- num_correct = 0
+ print_test_name('TEST PARSE')
+ correct = 0
for example in EXAMPLES:
try:
schema.parse(example.schema_string)
- if example.valid: num_correct += 1
+ if example.valid: correct += 1
debug_msg = "%s: PARSE SUCCESS" % example.name
except:
- if not example.valid: num_correct += 1
+ if not example.valid: correct += 1
debug_msg = "%s: PARSE FAILURE" % example.name
finally:
print debug_msg
fail_msg = "Parse behavior correct on %d out of %d schemas." % \
- (num_correct, len(EXAMPLES))
- self.assertEqual(num_correct, len(EXAMPLES), fail_msg)
+ (correct, len(EXAMPLES))
+ self.assertEqual(correct, len(EXAMPLES), fail_msg)
def test_valid_cast_to_string_after_parse(self):
"""
Test that the string generated by an Avro Schema object
is, in fact, a valid Avro schema.
"""
- debug_msg = "\nTEST CAST TO STRING\n"
- print debug_msg
-
- num_correct = 0
+ print_test_name('TEST CAST TO STRING AFTER PARSE')
+ correct = 0
for example in VALID_EXAMPLES:
schema_data = schema.parse(example.schema_string)
try:
schema.parse(str(schema_data))
debug_msg = "%s: STRING CAST SUCCESS" % example.name
- num_correct += 1
+ correct += 1
except:
debug_msg = "%s: STRING CAST FAILURE" % example.name
finally:
print debug_msg
fail_msg = "Cast to string success on %d out of %d schemas" % \
- (num_correct, len(VALID_EXAMPLES))
- self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+ (correct, len(VALID_EXAMPLES))
+ self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
def test_equivalence_after_round_trip(self):
"""
@@ -302,17 +310,14 @@
to generate Avro schema "round trip".
3. Ensure "original" and "round trip" schemas are equivalent.
"""
- debug_msg = "\nTEST ROUND TRIP\n"
- print debug_msg
-
- num_correct = 0
+ print_test_name('TEST ROUND TRIP')
+ correct = 0
for example in VALID_EXAMPLES:
try:
original_schema = schema.parse(example.schema_string)
round_trip_schema = schema.parse(str(original_schema))
-
if original_schema == round_trip_schema:
- num_correct += 1
+ correct += 1
debug_msg = "%s: ROUND TRIP SUCCESS" % example.name
else:
debug_msg = "%s: ROUND TRIP FAILURE" % example.name
@@ -322,15 +327,39 @@
print debug_msg
fail_msg = "Round trip success on %d out of %d schemas" % \
- (num_correct, len(VALID_EXAMPLES))
- self.assertEqual(num_correct, len(VALID_EXAMPLES), fail_msg)
+ (correct, len(VALID_EXAMPLES))
+ self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
# TODO(hammer): more tests
def test_fullname(self):
- """Test process for making full names from name, namespace pairs."""
- debug_msg = '\nTEST FULL NAME\n'
- print debug_msg
-
+ """
+ The fullname is determined in one of the following ways:
+ * A name and namespace are both specified. For example,
+ one might use "name": "X", "namespace": "org.foo"
+ to indicate the fullname "org.foo.X".
+ * A fullname is specified. If the name specified contains
+ a dot, then it is assumed to be a fullname, and any
+ namespace also specified is ignored. For example,
+ use "name": "org.foo.X" to indicate the
+ fullname "org.foo.X".
+ * A name only is specified, i.e., a name that contains no
+ dots. In this case the namespace is taken from the most
+ tightly encosing schema or protocol. For example,
+ if "name": "X" is specified, and this occurs
+ within a field of the record definition
+ of "org.foo.Y", then the fullname is "org.foo.X".
+
+ References to previously defined names are as in the latter
+ two cases above: if they contain a dot they are a fullname, if
+ they do not contain a dot, the namespace is the namespace of
+ the enclosing definition.
+
+ Primitive type names have no namespace and their names may
+ not be defined in any namespace. A schema may only contain
+ multiple definitions of a fullname if the definitions are
+ equivalent.
+ """
+ print_test_name('TEST FULLNAME')
fullname = schema.Name.make_fullname('a', 'o.a.h')
self.assertEqual(fullname, 'o.a.h.a')