Repository: avro Updated Branches: refs/heads/master e2d1073bf -> 67bec06db
AVRO-1944: Fixed invalid toString in records with duplicate values Project: http://git-wip-us.apache.org/repos/asf/avro/repo Commit: http://git-wip-us.apache.org/repos/asf/avro/commit/67bec06d Tree: http://git-wip-us.apache.org/repos/asf/avro/tree/67bec06d Diff: http://git-wip-us.apache.org/repos/asf/avro/diff/67bec06d Branch: refs/heads/master Commit: 67bec06dbdc910856fdc33d149aece97c26a59b1 Parents: e2d1073 Author: Niels Basjes <[email protected]> Authored: Wed Oct 26 15:36:02 2016 +0200 Committer: Niels Basjes <[email protected]> Committed: Tue Nov 1 21:32:30 2016 +0100 ---------------------------------------------------------------------- CHANGES.txt | 2 +- .../org/apache/avro/generic/GenericData.java | 35 +++++-- .../apache/avro/generic/TestGenericData.java | 101 ++++++++++++++++++- 3 files changed, 130 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/avro/blob/67bec06d/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 45b0f61..15c0523 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -76,7 +76,7 @@ Trunk (not yet released) AVRO-1853: C++: Compiler::toBin crashes on empty string. (Zoltan Ivanfi via tomwhite) - AVRO-1923: Stop infinite recursion in GenericData.toString. (Niels Basjes) + AVRO-1923, AVRO-1944: Stop infinite recursion in GenericData.toString. (Niels Basjes) AVRO-1860: Java: Field#DefaultVal() returns Ints for Long fields. (Gabor Szadovszky via cutting) http://git-wip-us.apache.org/repos/asf/avro/blob/67bec06d/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java index 0484c03..ff0893c 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java @@ -504,14 +504,18 @@ public class GenericData { toString(datum, buffer, new IdentityHashMap<Object, Object>(128) ); return buffer.toString(); } + + private static final String TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT = + " \">>> CIRCULAR REFERENCE CANNOT BE PUT IN JSON STRING, ABORTING RECURSION <<<\" "; + /** Renders a Java datum as <a href="http://www.json.org/">JSON</a>. */ protected void toString(Object datum, StringBuilder buffer, IdentityHashMap<Object, Object> seenObjects) { - if (seenObjects.containsKey(datum)) { - buffer.append(" \">>> CIRCULAR REFERENCE CANNOT BE PUT IN JSON STRING, ABORTING RECURSION<<<\" "); - return; - } - seenObjects.put(datum, datum); if (isRecord(datum)) { + if (seenObjects.containsKey(datum)) { + buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT); + return; + } + seenObjects.put(datum, datum); buffer.append("{"); int count = 0; Schema schema = getRecordSchema(datum); @@ -523,7 +527,13 @@ public class GenericData { buffer.append(", "); } buffer.append("}"); + seenObjects.remove(datum); } else if (isArray(datum)) { + if (seenObjects.containsKey(datum)) { + buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT); + return; + } + seenObjects.put(datum, datum); Collection<?> array = getArrayAsCollection(datum); buffer.append("["); long last = array.size()-1; @@ -534,7 +544,13 @@ public class GenericData { buffer.append(", "); } buffer.append("]"); + seenObjects.remove(datum); } else if (isMap(datum)) { + if (seenObjects.containsKey(datum)) { + buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT); + return; + } + seenObjects.put(datum, datum); buffer.append("{"); int count = 0; @SuppressWarnings(value="unchecked") @@ -547,6 +563,7 @@ public class GenericData { buffer.append(", "); } buffer.append("}"); + seenObjects.remove(datum); } else if (isString(datum)|| isEnum(datum)) { buffer.append("\""); writeEscapedString(datum.toString(), buffer); @@ -564,7 +581,13 @@ public class GenericData { buffer.append(datum); buffer.append("\""); } else if (datum instanceof GenericData) { - toString(datum, buffer, seenObjects); + if (seenObjects.containsKey(datum)) { + buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT); + return; + } + seenObjects.put(datum, datum); + toString(datum, buffer, seenObjects); + seenObjects.remove(datum); } else { buffer.append(datum); } http://git-wip-us.apache.org/repos/asf/avro/blob/67bec06d/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java ---------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java index 09ed66a..fe01ad3 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java @@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Collection; import java.util.ArrayDeque; @@ -30,6 +31,7 @@ import static org.apache.avro.TestCircularReferences.Referenceable; import static org.junit.Assert.*; import java.util.Arrays; +import java.util.Map; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -496,6 +498,104 @@ public class TestGenericData { assertTrue(GenericData.get().validate(unionSchema, record)); } + /* + * The toString has a detection for circular references to abort. + * This detection has the risk of detecting that same value as being a circular reference. + * For Record, Map and Array this is correct, for the rest is is not. + */ + @Test + public void testToStringSameValues() throws IOException { + List<Field> fields = new ArrayList<Field>(); + fields.add(new Field("nullstring1", Schema.create(Type.STRING), null, (Object)null)); + fields.add(new Field("nullstring2", Schema.create(Type.STRING), null, (Object)null)); + + fields.add(new Field("string1", Schema.create(Type.STRING ), null, (Object)null)); + fields.add(new Field("string2", Schema.create(Type.STRING ), null, (Object)null)); + + fields.add(new Field("bytes1", Schema.create(Type.BYTES ), null, (Object)null)); + fields.add(new Field("bytes2", Schema.create(Type.BYTES ), null, (Object)null)); + + fields.add(new Field("int1", Schema.create(Type.INT ), null, (Object)null)); + fields.add(new Field("int2", Schema.create(Type.INT ), null, (Object)null)); + + fields.add(new Field("long1", Schema.create(Type.LONG ), null, (Object)null)); + fields.add(new Field("long2", Schema.create(Type.LONG ), null, (Object)null)); + + fields.add(new Field("float1", Schema.create(Type.FLOAT ), null, (Object)null)); + fields.add(new Field("float2", Schema.create(Type.FLOAT ), null, (Object)null)); + + fields.add(new Field("double1", Schema.create(Type.DOUBLE ), null, (Object)null)); + fields.add(new Field("double2", Schema.create(Type.DOUBLE ), null, (Object)null)); + + fields.add(new Field("boolean1",Schema.create(Type.BOOLEAN ), null, (Object)null)); + fields.add(new Field("boolean2",Schema.create(Type.BOOLEAN ), null, (Object)null)); + + List<String> enumValues = new ArrayList<String>(); + enumValues.add("One"); + enumValues.add("Two"); + Schema enumSchema = Schema.createEnum("myEnum", null, null, enumValues); + fields.add(new Field("enum1", enumSchema, null, (Object)null)); + fields.add(new Field("enum2", enumSchema, null, (Object)null)); + + Schema recordSchema = SchemaBuilder.record("aRecord").fields().requiredString("myString").endRecord(); + fields.add(new Field("record1", recordSchema, null, (Object)null)); + fields.add(new Field("record2", recordSchema, null, (Object)null)); + + Schema arraySchema = Schema.createArray(Schema.create(Type.STRING)); + fields.add(new Field("array1", arraySchema, null, (Object)null)); + fields.add(new Field("array2", arraySchema, null, (Object)null)); + + Schema mapSchema = Schema.createMap(Schema.create(Type.STRING)); + fields.add(new Field("map1", mapSchema, null, (Object)null)); + fields.add(new Field("map2", mapSchema, null, (Object)null)); + + Schema schema = Schema.createRecord("Foo", "test", "mytest", false); + schema.setFields(fields); + + Record testRecord = new Record(schema); + + testRecord.put("nullstring1", null); + testRecord.put("nullstring2", null); + + String fortyTwo = "42"; + testRecord.put("string1", fortyTwo); + testRecord.put("string2", fortyTwo); + testRecord.put("bytes1", 0x42 ); + testRecord.put("bytes2", 0x42 ); + testRecord.put("int1", 42 ); + testRecord.put("int2", 42 ); + testRecord.put("long1", 42L); + testRecord.put("long2", 42L); + testRecord.put("float1", 42F); + testRecord.put("float2", 42F); + testRecord.put("double1", 42D); + testRecord.put("double2", 42D); + testRecord.put("boolean1", true); + testRecord.put("boolean2", true); + + testRecord.put("enum1", "One"); + testRecord.put("enum2", "One"); + + GenericRecord record = new GenericData.Record(recordSchema); + record.put("myString","42"); + testRecord.put("record1", record); + testRecord.put("record2", record); + + GenericArray<String> array = new GenericData.Array<String>(1, arraySchema); + array.clear(); + array.add("42"); + testRecord.put("array1", array); + testRecord.put("array2", array); + + Map<String, String> map = new HashMap<String, String>(); + map.put("42", "42"); + testRecord.put("map1", map); + testRecord.put("map2", map); + + String testString = testRecord.toString(); + assertFalse("Record with duplicated values results in wrong 'toString()'", testString.contains("CIRCULAR REFERENCE")); + } + // Test copied from Apache Parquet: org.apache.parquet.avro.TestCircularReferences @Test public void testToStringRecursive() throws IOException { @@ -551,5 +651,4 @@ public class TestGenericData { fail("StackOverflowError occurred"); } } - }
