Author: cutting
Date: Fri Dec 4 23:13:09 2009
New Revision: 887439
URL: http://svn.apache.org/viewvc?rev=887439&view=rev
Log:
AVRO-241. In Java, add a union annotation for reflection.
Added:
hadoop/avro/trunk/src/java/org/apache/avro/reflect/Union.java
Removed:
hadoop/avro/trunk/src/java/org/apache/avro/util/WeakIdentityHashMap.java
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
hadoop/avro/trunk/src/java/org/apache/avro/reflect/package.html
hadoop/avro/trunk/src/java/org/apache/avro/specific/package.html
hadoop/avro/trunk/src/test/java/org/apache/avro/TestReflect.java
Modified: hadoop/avro/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Fri Dec 4 23:13:09 2009
@@ -118,6 +118,8 @@
AVRO-242. In Java, add support for extensible string-valued
properties to schemas. (cutting)
+ AVRO-241. In Java, add a union annotation for reflection. (cutting)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html Fri Dec 4
23:13:09 2009
@@ -20,8 +20,10 @@
<body>
A generic representation for Avro data.
-Represent {...@link org.apache.avro.Schema} data with generic Java classes.
-<p>Uses the following mapping:
+<p>This representation is best for applications which deal with
+ dynamic data, whose schemas are not known until runtime.
+
+<p>Avro schemas are mapped to Java types as follows:
<ul>
<li>Schema records are implemented as {...@link
org.apache.avro.generic.GenericRecord}.
<li>Schema enums are implemented as {...@link java.lang.String}.
Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
(original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java Fri Dec
4 23:13:09 2009
@@ -41,22 +41,11 @@
import org.apache.avro.specific.SpecificData;
import org.apache.avro.specific.FixedSize;
import org.apache.avro.ipc.AvroRemoteException;
-import org.apache.avro.util.WeakIdentityHashMap;
import com.thoughtworks.paranamer.CachingParanamer;
import com.thoughtworks.paranamer.Paranamer;
-/** Utilities to use existing Java classes and interfaces via reflection.
- *
- * <p><b>Records</b>Fields are not permitted to be null. Fields which are not
- * static or transient are used.
- *
- * <p><b>Arrays</b>Both Java arrays and implementations of {...@link
Collection}
- * are mapped to Avro arrays.
- *
- * <p><b>{...@link String}</b> is mapped to Avro string.
- * <p><b>byte[]</b> is mapped to Avro bytes.
- */
+/** Utilities to use existing Java classes and interfaces via reflection. */
public class ReflectData extends SpecificData {
/** {...@link ReflectData} implementation that permits null field values.
The
@@ -179,18 +168,17 @@
throw new AvroRuntimeException("No field named "+name+" in: "+c);
}
- // Indicates the Java representation for an array schema. If an entry is
- // present, it contains the Java Collection class of this array. If no entry
- // is present, then a Java array should be used to implement this array.
- private static final Map<Schema,Class> COLLECTION_CLASSES =
- new WeakIdentityHashMap<Schema,Class>();
- private static synchronized void setCollectionClass(Schema schema, Class c) {
- COLLECTION_CLASSES.put(schema, c);
- }
+ static final String CLASS_PROP = "java-class";
+ static final String ELEMENT_PROP = "java-element-class";
- /** Return the {...@link Collection} subclass that implements this schema.*/
- public static synchronized Class getCollectionClass(Schema schema) {
- return COLLECTION_CLASSES.get(schema);
+ static Class getClassProp(Schema schema, String prop) {
+ String name = schema.getProp(prop);
+ if (name == null) return null;
+ try {
+ return Class.forName(name);
+ } catch (ClassNotFoundException e) {
+ throw new AvroRuntimeException(e);
+ }
}
private static final Class BYTES_CLASS = new byte[0].getClass();
@@ -199,7 +187,7 @@
public Class getClass(Schema schema) {
switch (schema.getType()) {
case ARRAY:
- Class collectionClass = getCollectionClass(schema);
+ Class collectionClass = getClassProp(schema, CLASS_PROP);
if (collectionClass != null)
return collectionClass;
return
java.lang.reflect.Array.newInstance(getClass(schema.getElementType()),0).getClass();
@@ -217,7 +205,9 @@
Type component = ((GenericArrayType)type).getGenericComponentType();
if (component == Byte.TYPE) // byte array
return Schema.create(Schema.Type.BYTES);
- return Schema.createArray(createSchema(component, names));
+ Schema result = Schema.createArray(createSchema(component, names));
+ setElement(result, component);
+ return result;
} else if (type instanceof ParameterizedType) {
ParameterizedType ptype = (ParameterizedType)type;
Class raw = (Class)ptype.getRawType();
@@ -232,7 +222,7 @@
if (params.length != 1)
throw new AvroTypeException("No array type specified.");
Schema schema = Schema.createArray(createSchema(params[0], names));
- setCollectionClass(schema, raw);
+ schema.setProp(CLASS_PROP, raw.getName());
return schema;
}
} else if (type instanceof Class) { // Class
@@ -244,7 +234,9 @@
Class component = c.getComponentType();
if (component == Byte.TYPE) // byte array
return Schema.create(Schema.Type.BYTES);
- return Schema.createArray(createSchema(component, names));
+ Schema result = Schema.createArray(createSchema(component, names));
+ setElement(result, component);
+ return result;
}
if (c == String.class) // String
return Schema.create(Schema.Type.STRING);
@@ -255,7 +247,13 @@
String space = c.getPackage().getName();
if (c.getEnclosingClass() != null) // nested class
space = c.getEnclosingClass().getName() + "$";
- if (c.isEnum()) { // Enum
+ Union union = (Union)c.getAnnotation(Union.class);
+ if (union != null) { // union annotated
+ List<Schema> branches = new ArrayList<Schema>();
+ for (Class branch : union.value())
+ branches.add(createSchema(branch, names));
+ return Schema.createUnion(branches);
+ } else if (c.isEnum()) { // Enum
List<String> symbols = new ArrayList<String>();
Enum[] constants = (Enum[])c.getEnumConstants();
for (int i = 0; i < constants.length; i++)
@@ -284,6 +282,17 @@
return super.createSchema(type, names);
}
+ // if array element type is a class with a union annotation, note it
+ // this is required because we cannot set a property on the union itself
+ @SuppressWarnings(value="unchecked")
+ private void setElement(Schema schema, Type element) {
+ if (!(element instanceof Class)) return;
+ Class c = (Class)element;
+ Union union = (Union)c.getAnnotation(Union.class);
+ if (union != null) // element is annotated union
+ schema.setProp(ELEMENT_PROP, c.getName());
+ }
+
// Return of this class and its superclasses to serialize.
// Not cached, since this is only used to create schemas, which are cached.
private Collection<Field> getFields(Class recordClass) {
Modified:
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
(original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
Fri Dec 4 23:13:09 2009
@@ -73,7 +73,8 @@
@Override
@SuppressWarnings(value="unchecked")
protected Object newArray(Object old, int size, Schema schema) {
- Class collectionClass = ReflectData.get().getCollectionClass(schema);
+ ReflectData data = ReflectData.get();
+ Class collectionClass = data.getClassProp(schema, ReflectData.CLASS_PROP);
if (collectionClass != null) {
if (old instanceof Collection) {
((Collection)old).clear();
@@ -83,7 +84,9 @@
return new ArrayList();
return newInstance(collectionClass);
}
- Class elementClass = ReflectData.get().getClass(schema.getElementType());
+ Class elementClass = data.getClassProp(schema, ReflectData.ELEMENT_PROP);
+ if (elementClass == null)
+ elementClass = data.getClass(schema.getElementType());
return Array.newInstance(elementClass, size);
}
Added: hadoop/avro/trunk/src/java/org/apache/avro/reflect/Union.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/Union.java?rev=887439&view=auto
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/Union.java (added)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/Union.java Fri Dec 4
23:13:09 2009
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.reflect;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Declares that a class should be represented by a union type. Use for base
+ * classes or interfaces whose instantiable subclasses should be listed in the
+ * parameters to the @Union annotation.
+ */
+...@retention(RetentionPolicy.RUNTIME)
+...@target({ElementType.TYPE})
+...@documented
+public @interface Union {
+ /** The instantiable classes that compose this union. */
+ Class[] value();
+}
Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/package.html
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/package.html?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/package.html (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/package.html Fri Dec 4
23:13:09 2009
@@ -18,6 +18,53 @@
-->
<body>
-Use Java reflection to generate schemas and protocols for existing classes.
+Use Java reflection to generate schemas and protocols for existing
+classes.
+
+<p>This API is not recommended except as a stepping stone for
+systems that currently uses Java interfaces to define RPC protocols.
+For new RPC systems, the {...@link org.apache.avro.specific specific} API
+is preferred. For systems that process dynamic data, the {...@link
+org.apache.avro.generic generic} API is probably best.
+
+<p>Java types are mapped to Avro schemas as follows:
+
+<ul>
+
+<li><b>Classes</b> are mapped to Avro records. Only concrete classes
+ with a no-argument constructor are supported. Fields are not
+ permitted to be null. All inherited fields that are not static or
+ transient are used.</li>
+
+<li><b>Arrays</b> are mapped to Avro array schemas. If an array's
+ elements are a union defined by the {...@link
+ org.apache.avro.reflect.Union Union} annotation, the "java-element"
+ property is set to the union's class, e.g.:
+ <pre>{"type": "array", "java-element": "org.acme.Foo"}</pre>
+</li>
+
+<li><b>Collection</b> implementations are mapped to Avro array schemas
+ with the "java-class" property set to the collection
+ implementation, e.g.:
+ <pre>{"type": "array", "java-class": "java.util.ArrayList"}</pre>
+</li>
+
+<li><b>{...@link java.lang.String}</b> is mapped to an Avro string schema.</li>
+
+<li><b>byte[]</b> is mapped to an Avro bytes schema.</li>
+
+<li><b>short</b> is mapped to an Avro int schema with the "java-class"
+ property set to "java.lang.Short", e.g.:
+ <pre>{"type": "int", "java-class": "java.lang.Short"}</pre>
+
+<li>All other types are mapped as in the {...@link org.apache.avro.generic
+ generic} API.</li>
+
+</ul>
+
+The {...@link org.apache.avro.reflect.Union Union} annotation can be used
+to support reflection of schemas for interfaces, abstract base classes
+and other uses of polymorphism.
+
</body>
</html>
Modified: hadoop/avro/trunk/src/java/org/apache/avro/specific/package.html
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/specific/package.html?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/specific/package.html (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/specific/package.html Fri Dec 4
23:13:09 2009
@@ -18,6 +18,23 @@
-->
<body>
-Generate specific Java classes for schemas and protocols</i>.
+Generate specific Java classes for schemas and protocols.
+
+<p>This API is recommended for most RPC uses and for data applications
+ that always use the same datatypes, i.e., whose schemas are known at
+ compile time. For data applications that accept dynamic datatypes
+ the {...@link org.apache.avro.generic generic} API is recommended.
+
+<p>Avro types are mapped to Java as follows:
+
+<ul>
+
+<li>Record, enum, and fixed schemas generate Java class definitions.</li>
+
+<li>All other types are mapped as in the {...@link org.apache.avro.generic
+ generic} API.</li>
+
+</ul>
+
</body>
</html>
Modified: hadoop/avro/trunk/src/test/java/org/apache/avro/TestReflect.java
URL:
http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/TestReflect.java?rev=887439&r1=887438&r2=887439&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/TestReflect.java (original)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/TestReflect.java Fri Dec 4
23:13:09 2009
@@ -36,11 +36,13 @@
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.avro.reflect.ReflectDatumWriter;
+import org.apache.avro.reflect.Union;
import org.junit.Test;
public class TestReflect {
+ // test primitive type inference
@Test public void testVoid() {
check(Void.TYPE, "\"null\"");
check(Void.class, "\"null\"");
@@ -79,6 +81,7 @@
check(new byte[0], "\"bytes\"");
}
+ // test map, array and list type inference
public static class R1 {
private Map<String,String> mapField = new HashMap<String,String>();
private String[] arrayField = new String[] { "foo" };
@@ -109,13 +112,15 @@
}
@Test public void testList() throws Exception {
check(R1.class.getDeclaredField("listField").getGenericType(),
- "{\"type\":\"array\",\"items\":\"string\"}");
+ "{\"type\":\"array\",\"items\":\"string\""
+ +",\"java-class\":\"java.util.List\"}");
}
@Test public void testR1() throws Exception {
checkReadWrite(new R1());
}
+ // test record, array and list i/o
public static class R2 {
private String[] arrayField;
private Collection<String> collectionField;
@@ -136,6 +141,7 @@
checkReadWrite(r2);
}
+ // test array i/o of unboxed type
public static class R3 {
private int[] intArray;
@@ -152,6 +158,7 @@
checkReadWrite(r3);
}
+ // test inherited fields & short datatype
public static class R4 {
public short value;
@@ -169,8 +176,50 @@
checkReadWrite(r5);
}
+ // test union annotation
+ @Union({R7.class, R8.class})
+ public static class R6 {}
+
+ public static class R7 extends R6 {
+ public int value;
+ public boolean equals(Object o) {
+ if (!(o instanceof R7)) return false;
+ return this.value == ((R7)o).value;
+ }
+ }
+ public static class R8 extends R6 {
+ public float value;
+ public boolean equals(Object o) {
+ if (!(o instanceof R8)) return false;
+ return this.value == ((R8)o).value;
+ }
+ }
+
+ // test arrays with union annotation
+ public static class R9 {
+ public R6[] r6s;
+ public boolean equals(Object o) {
+ if (!(o instanceof R9)) return false;
+ return Arrays.equals(this.r6s, ((R9)o).r6s);
+ }
+ }
+
+ @Test public void testR6() throws Exception {
+ R7 r7 = new R7();
+ r7.value = 1;
+ checkReadWrite(r7, ReflectData.get().getSchema(R6.class));
+ R8 r8 = new R8();
+ r8.value = 1;
+ checkReadWrite(r8, ReflectData.get().getSchema(R6.class));
+ R9 r9 = new R9();
+ r9.r6s = new R6[] {r7, r8};
+ checkReadWrite(r9, ReflectData.get().getSchema(R9.class));
+ }
+
void checkReadWrite(Object object) throws Exception {
- Schema s = ReflectData.get().getSchema(object.getClass());
+ checkReadWrite(object, ReflectData.get().getSchema(object.getClass()));
+ }
+ void checkReadWrite(Object object, Schema s) throws Exception {
ReflectDatumWriter writer = new ReflectDatumWriter(s);
ByteArrayOutputStream out = new ByteArrayOutputStream();
writer.write(object, new BinaryEncoder(out));