[
https://issues.apache.org/jira/browse/AVRO-2152?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16386792#comment-16386792
]
ASF GitHub Bot commented on AVRO-2152:
--------------------------------------
phaas closed pull request #291: AVRO-2152: Fix JsonDecoder handling of aliases
in unions.
URL: https://github.com/apache/avro/pull/291
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
index 7c5da1169..226c5030a 100644
---
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
+++
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java
@@ -24,6 +24,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
@@ -195,6 +196,7 @@ private Symbol resolveUnion(Schema writer, Schema reader,
final int size = alts.size();
Symbol[] symbols = new Symbol[size];
String[] labels = new String[size];
+ Set<String>[] aliases = new Set[size];
/**
* We construct a symbol without filling the arrays. Please see
@@ -204,9 +206,10 @@ private Symbol resolveUnion(Schema writer, Schema reader,
for (Schema w : alts) {
symbols[i] = generate(w, reader, seen);
labels[i] = w.getFullName();
+ aliases[i] = generateAliases(w);
i++;
}
- return Symbol.seq(Symbol.alt(symbols, labels),
+ return Symbol.seq(Symbol.alt(symbols, labels, aliases),
Symbol.writerUnionAction());
}
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
index 187942400..dab275e12 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/Symbol.java
@@ -111,8 +111,8 @@ static Symbol repeat(Symbol endSymbol, Symbol...
symsToRepeat) {
/**
* A convenience method to construct a union.
*/
- static Symbol alt(Symbol[] symbols, String[] labels) {
- return new Alternative(symbols, labels);
+ static Symbol alt(Symbol[] symbols, String[] labels, Set<String>[] aliases) {
+ return new Alternative(symbols, labels, aliases);
}
/**
@@ -426,10 +426,13 @@ private static boolean hasErrors(Symbol root, Symbol[]
symbols, Set<Symbol> visi
public static class Alternative extends Symbol {
public final Symbol[] symbols;
public final String[] labels;
- private Alternative(Symbol[] symbols, String[] labels) {
+ public final Set<String>[] aliases;
+
+ private Alternative(Symbol[] symbols, String[] labels, Set<String>[]
aliases) {
super(Kind.ALTERNATIVE);
this.symbols = symbols;
this.labels = labels;
+ this.aliases = aliases;
}
public Symbol getSymbol(int index) {
@@ -450,6 +453,9 @@ public int findLabel(String label) {
if (label.equals(labels[i])) {
return i;
}
+ if (aliases[i].contains(label)) {
+ return i;
+ }
}
}
return -1;
@@ -462,7 +468,7 @@ public Alternative flatten(Map<Sequence, Sequence> map,
for (int i = 0; i < ss.length; i++) {
ss[i] = symbols[i].flatten(map, map2);
}
- return new Alternative(ss, labels);
+ return new Alternative(ss, labels, aliases);
}
}
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
index 6e1a55eae..bbbb33e30 100644
---
a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
+++
b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ValidatingGrammarGenerator.java
@@ -17,9 +17,11 @@
*/
package org.apache.avro.io.parsing;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
@@ -101,21 +103,43 @@ public Symbol generate(Schema sc, Map<LitS, Symbol> seen)
{
List<Schema> subs = sc.getTypes();
Symbol[] symbols = new Symbol[subs.size()];
String[] labels = new String[subs.size()];
+ Set<String>[] aliases = new Set[subs.size()];
int i = 0;
for (Schema b : sc.getTypes()) {
symbols[i] = generate(b, seen);
labels[i] = b.getFullName();
+ aliases[i] = generateAliases(b);
i++;
}
- return Symbol.seq(Symbol.alt(symbols, labels), Symbol.UNION);
+ return Symbol.seq(Symbol.alt(symbols, labels, aliases), Symbol.UNION);
default:
throw new RuntimeException("Unexpected schema type");
}
}
- /** A wrapper around Schema that does "==" equality. */
+ /**
+ * Named types may have alises which must be considered when resolving
+ * unions.
+ * <p>
+ * "Record, enums and fixed are named types." [spec.xml#Names]
+ *
+ * @param schema
+ * @return
+ */
+ protected Set<String> generateAliases(Schema schema) {
+ Schema.Type type = schema.getType();
+ if (Schema.Type.RECORD.equals(type) || Schema.Type.ENUM.equals(type) ||
Schema.Type.FIXED.equals(type)) {
+ return schema.getAliases();
+ } else {
+ return Collections.emptySet();
+ }
+ }
+
+ /**
+ * A wrapper around Schema that does "==" equality.
+ */
static class LitS {
public final Schema actual;
public LitS(Schema actual) { this.actual = actual; }
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java
b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java
index c63749840..a9f497087 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java
@@ -18,12 +18,26 @@
package org.apache.avro.io;
import org.apache.avro.Schema;
+import org.apache.avro.SchemaCompatibility;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecordBuilder;
import org.junit.Test;
import org.junit.Assert;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+import static java.util.Collections.singletonList;
+import static org.apache.avro.Schema.create;
+import static org.apache.avro.Schema.createRecord;
+import static org.apache.avro.Schema.createUnion;
+import static org.junit.Assert.assertEquals;
+
public class TestJsonDecoder {
@Test public void testInt() throws Exception {
@@ -76,4 +90,60 @@ private void checkNumeric(String type, Object value) throws
Exception {
Assert.assertEquals(200, in.readLong());
in.skipArray();
}
+
+
+ /**
+ * AVRO-2152
+ * JsonDecoder fails when reading record with aliases inside union
+ */
+ @Test public void testJsonDecoderWithAliasesInUnion() throws Exception {
+ Schema writerItem = createRecord("WItem", "writer item", "writer.ns",
false,
+ singletonList(new Schema.Field("value",
+ create(Schema.Type.STRING),
+ "value", (Object) null)));
+ Schema writerSchema = createRecord("WWrapper", "writer", "writer.ns",
false,
+ singletonList(new Schema.Field("item",
+ createUnion(create(Schema.Type.NULL), writerItem),
+ "value", (Object) null)));
+ System.out.println(writerSchema.toString(true));
+
+ Schema readerItem = createRecord("RItem", "reader item", "reader.ns",
false,
+ singletonList(new Schema.Field("value", create(Schema.Type.STRING),
+ "value", (Object) null)));
+ Schema readerSchema = createRecord("RWrapper", "reader", "reader.ns",
false,
+ singletonList(new Schema.Field("item",
+ createUnion(create(Schema.Type.NULL), readerItem),
+ "value", (Object) null)));
+ readerSchema.addAlias("WWrapper", "writer.ns");
+ readerItem.addAlias("WItem", "writer.ns");
+
+ System.out.println(readerSchema.toString(true));
+
+ assertEquals(SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE,
+ SchemaCompatibility.checkReaderWriterCompatibility(readerSchema,
writerSchema).getType());
+
+ // Create an instance for testing
+ GenericData.Record instance = new GenericRecordBuilder(writerSchema)
+ .set("item",
+ new GenericRecordBuilder(writerItem)
+ .set("value", "12345")
+ .build()
+ ).build();
+
+ // Serialize using JSON Encoder
+ final GenericDatumWriter<Object> writer = new
GenericDatumWriter<>(instance.getSchema());
+ final ByteArrayOutputStream out = new ByteArrayOutputStream();
+ final JsonEncoder encoder =
EncoderFactory.get().jsonEncoder(instance.getSchema(), out);
+ writer.write(instance, encoder);
+ encoder.flush();
+
+ // Deserialize using JSON Decoder
+ final GenericDatumReader<GenericRecord> reader = new
GenericDatumReader<>(writerSchema, readerSchema);
+ final JsonDecoder decoder = DecoderFactory.get().jsonDecoder(readerSchema,
+ new String(out.toByteArray(), StandardCharsets.UTF_8));
+
+ GenericRecord deserialized = reader.read(null, decoder);
+ assertEquals(deserialized.toString(), instance.toString());
+ }
+
}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> JsonDecoder fails when reading record with aliases inside union
> ---------------------------------------------------------------
>
> Key: AVRO-2152
> URL: https://issues.apache.org/jira/browse/AVRO-2152
> Project: Avro
> Issue Type: Bug
> Components: java
> Affects Versions: 1.8.2
> Environment: JDK 1.8
> Avro 1.8.2
> Reporter: Patrick Haas
> Priority: Trivial
> Attachments: JsonDecoderAliasesInUnion.java
>
>
> The JsonDecoder only handles aliases at the "top level" or "field level".
> Aliased records within a union fail with an "Unknown union branch"
> AvroTypeException.
> The same writer/read schema combination works fine with the binaryEncoder,
> and the two schemas are considered a COMPATIBLE by the SchemaCompatibility
> check.
>
> h1. Writer Schema
> {
> "type" : "record",
> "name" : "WWrapper",
> "namespace" : "writer.ns",
> "doc" : "writer",
> "fields" : [ {
> "name" : "item",
> "type" : [ "null", {
> "type" : "record",
> "name" : "WItem",
> "doc" : "writer item",
> "fields" : [ {
> "name" : "value",
> "type" : "string",
> "doc" : "value"
> } ]
> } ],
> "doc" : "value"
> } ]
> }
> h1. Reader Schema
> {
> "type" : "record",
> "name" : "RWrapper",
> "namespace" : "reader.ns",
> "doc" : "reader",
> "fields" : [ {
> "name" : "item",
> "type" : [ "null", {
> "type" : "record",
> "name" : "RItem",
> "doc" : "reader item",
> "fields" : [ {
> "name" : "value",
> "type" : "string",
> "doc" : "value"
> } ],
> "aliases" : [ "writer.ns.WItem" ]
> } ],
> "doc" : "value"
> } ],
> "aliases" : [ "writer.ns.WWrapper" ]
> }
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)