[
https://issues.apache.org/jira/browse/AVRO-2267?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16692921#comment-16692921
]
ASF GitHub Bot commented on AVRO-2267:
--------------------------------------
Fokko closed pull request #385: AVRO-2267 Removed copies of RandomData
URL: https://github.com/apache/avro/pull/385
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/lang/java/avro/pom.xml b/lang/java/avro/pom.xml
index c9104fba9..a684b1ccc 100644
--- a/lang/java/avro/pom.xml
+++ b/lang/java/avro/pom.xml
@@ -106,8 +106,7 @@
<id>interop-generate-null-codec</id>
<phase>generate-resources</phase>
<configuration>
- <mainClass>org.apache.avro.RandomData</mainClass>
- <classpathScope>test</classpathScope>
+ <mainClass>org.apache.avro.util.RandomData</mainClass>
<arguments>
<argument>../../../share/test/schemas/interop.avsc</argument>
<argument>../../../build/interop/data/java.avro</argument>
@@ -121,8 +120,7 @@
<id>interop-generate-deflate-codec</id>
<phase>generate-resources</phase>
<configuration>
- <mainClass>org.apache.avro.RandomData</mainClass>
- <classpathScope>test</classpathScope>
+ <mainClass>org.apache.avro.util.RandomData</mainClass>
<arguments>
<argument>../../../share/test/schemas/interop.avsc</argument>
<argument>../../../build/interop/data/java_deflate.avro</argument>
diff --git a/lang/java/avro/src/test/java/org/apache/avro/RandomData.java
b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java
similarity index 72%
rename from lang/java/avro/src/test/java/org/apache/avro/RandomData.java
rename to lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java
index 89a4321d6..b8c7c042d 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/RandomData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java
@@ -15,34 +15,51 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.avro;
+package org.apache.avro.util;
import java.io.File;
import java.nio.ByteBuffer;
-import java.util.*;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.util.Utf8;
/** Generates schema data as Java objects with random values. */
public class RandomData implements Iterable<Object> {
+ public static final String USE_DEFAULT = "use-default";
+
private final Schema root;
private final long seed;
private final int count;
+ private final boolean utf8ForString;
public RandomData(Schema schema, int count) {
- this(schema, count, System.currentTimeMillis());
+ this(schema, count, false);
}
public RandomData(Schema schema, int count, long seed) {
+ this(schema, count, seed, false);
+ }
+
+ public RandomData(Schema schema, int count, boolean utf8ForString) {
+ this(schema, count, System.currentTimeMillis(), utf8ForString);
+ }
+
+ public RandomData(Schema schema, int count, long seed, boolean
utf8ForString) {
this.root = schema;
this.seed = seed;
this.count = count;
+ this.utf8ForString = utf8ForString;
}
public Iterator<Object> iterator() {
@@ -59,12 +76,16 @@ public Object next() {
}
@SuppressWarnings(value="unchecked")
- private static Object generate(Schema schema, Random random, int d) {
+ private Object generate(Schema schema, Random random, int d) {
switch (schema.getType()) {
case RECORD:
GenericRecord record = new GenericData.Record(schema);
- for (Schema.Field field : schema.getFields())
- record.put(field.name(), generate(field.schema(), random, d+1));
+ for (Schema.Field field : schema.getFields()) {
+ Object value = (field.getObjectProp(USE_DEFAULT) == null) ?
+ generate(field.schema(), random, d+1) :
+ GenericData.get().getDefaultValue(field);
+ record.put(field.name(), value);
+ }
return record;
case ENUM:
List<String> symbols = schema.getEnumSymbols();
@@ -72,6 +93,7 @@ private static Object generate(Schema schema, Random random,
int d) {
(schema, symbols.get(random.nextInt(symbols.size())));
case ARRAY:
int length = (random.nextInt(5)+2)-d;
+ @SuppressWarnings("rawtypes")
GenericArray<Object> array =
new GenericData.Array(length<=0?0:length, schema);
for (int i = 0; i < length; i++)
@@ -81,7 +103,7 @@ private static Object generate(Schema schema, Random random,
int d) {
length = (random.nextInt(5)+2)-d;
Map<Object,Object> map = new HashMap<>(length <= 0 ? 0 : length);
for (int i = 0; i < length; i++) {
- map.put(randomUtf8(random, 40),
+ map.put(randomString(random, 40),
generate(schema.getValueType(), random, d+1));
}
return map;
@@ -92,7 +114,7 @@ private static Object generate(Schema schema, Random random,
int d) {
byte[] bytes = new byte[schema.getFixedSize()];
random.nextBytes(bytes);
return new GenericData.Fixed(schema, bytes);
- case STRING: return randomUtf8(random, 40);
+ case STRING: return randomString(random, 40);
case BYTES: return randomBytes(random, 40);
case INT: return random.nextInt();
case LONG: return random.nextLong();
@@ -104,12 +126,15 @@ private static Object generate(Schema schema, Random
random, int d) {
}
}
- private static Utf8 randomUtf8(Random rand, int maxLength) {
- Utf8 utf8 = new Utf8().setLength(rand.nextInt(maxLength));
- for (int i = 0; i < utf8.getLength(); i++) {
- utf8.getBytes()[i] = (byte)('a'+rand.nextInt('z'-'a'));
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+
+ private Object randomString(Random random, int maxLength) {
+ int length = random.nextInt(maxLength);
+ byte[] bytes = new byte[length];
+ for (int i = 0; i < length; i++) {
+ bytes[i] = (byte)('a'+random.nextInt('z'-'a'));
}
- return utf8;
+ return utf8ForString ? new Utf8(bytes) : new String(bytes, UTF8);
}
private static ByteBuffer randomBytes(Random rand, int maxLength) {
@@ -120,11 +145,11 @@ private static ByteBuffer randomBytes(Random rand, int
maxLength) {
}
public static void main(String[] args) throws Exception {
- if(args.length < 3 || args.length > 4) {
+ if (args.length < 3 || args.length > 4) {
System.out.println("Usage: RandomData <schemafile> <outputfile> <count>
[codec]");
System.exit(-1);
}
- Schema sch = Schema.parse(new File(args[0]));
+ Schema sch = new Schema.Parser().parse(new File(args[0]));
DataFileWriter<Object> writer =
new DataFileWriter<>(new GenericDatumWriter<>());
writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] :
"null"));
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java
b/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java
index d5440c2a4..5a9a57208 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/GenerateBlockingData.java
@@ -26,6 +26,7 @@
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.util.RandomData;
/**
* Generates file with objects of a specific schema(that doesn't contain
nesting
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java
b/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java
index 6e2398dff..d42548af8 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java
@@ -33,6 +33,7 @@
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.DatumReader;
+import org.apache.avro.util.RandomData;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java
b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java
index dfb68fa36..4312fdffa 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java
@@ -29,6 +29,7 @@
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.util.RandomData;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java
b/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java
index 97914c47e..35fdcdf37 100644
---
a/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java
+++
b/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java
@@ -22,9 +22,9 @@
import java.io.IOException;
import java.io.OutputStream;
-import org.apache.avro.RandomData;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.util.RandomData;
import org.junit.Test;
/*
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
index 76f4df3dc..dd1c55a69 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java
@@ -24,12 +24,12 @@
import java.util.Collection;
import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.RandomData;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.util.ByteBufferInputStream;
import org.apache.avro.util.ByteBufferOutputStream;
+import org.apache.avro.util.RandomData;
import org.apache.avro.util.Utf8;
import org.junit.Assert;
import org.junit.BeforeClass;
diff --git a/lang/java/ipc/src/test/java/org/apache/avro/RandomData.java
b/lang/java/ipc/src/test/java/org/apache/avro/RandomData.java
deleted file mode 100644
index 1d9d4ce5e..000000000
--- a/lang/java/ipc/src/test/java/org/apache/avro/RandomData.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.avro;
-
-import java.io.File;
-import java.nio.ByteBuffer;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.util.Utf8;
-
-/** Generates schema data as Java objects with random values. */
-public class RandomData implements Iterable<Object> {
- private final Schema root;
- private final long seed;
- private final int count;
-
- public RandomData(Schema schema, int count) {
- this(schema, count, System.currentTimeMillis());
- }
-
- public RandomData(Schema schema, int count, long seed) {
- this.root = schema;
- this.seed = seed;
- this.count = count;
- }
-
- public Iterator<Object> iterator() {
- return new Iterator<Object>() {
- private int n;
- private Random random = new Random(seed);
- public boolean hasNext() { return n < count; }
- public Object next() {
- n++;
- return generate(root, random, 0);
- }
- public void remove() { throw new UnsupportedOperationException(); }
- };
- }
-
- @SuppressWarnings(value="unchecked")
- private static Object generate(Schema schema, Random random, int d) {
- switch (schema.getType()) {
- case RECORD:
- GenericRecord record = new GenericData.Record(schema);
- for (Schema.Field field : schema.getFields())
- record.put(field.name(), generate(field.schema(), random, d+1));
- return record;
- case ENUM:
- List<String> symbols = schema.getEnumSymbols();
- return new GenericData.EnumSymbol
- (schema, symbols.get(random.nextInt(symbols.size())));
- case ARRAY:
- int length = (random.nextInt(5)+2)-d;
- GenericArray<Object> array =
- new GenericData.Array(length<=0?0:length, schema);
- for (int i = 0; i < length; i++)
- array.add(generate(schema.getElementType(), random, d+1));
- return array;
- case MAP:
- length = (random.nextInt(5)+2)-d;
- Map<Object,Object> map = new HashMap<>(length <= 0 ? 0 : length);
- for (int i = 0; i < length; i++) {
- map.put(randomUtf8(random, 40),
- generate(schema.getValueType(), random, d+1));
- }
- return map;
- case UNION:
- List<Schema> types = schema.getTypes();
- return generate(types.get(random.nextInt(types.size())), random, d);
- case FIXED:
- byte[] bytes = new byte[schema.getFixedSize()];
- random.nextBytes(bytes);
- return new GenericData.Fixed(schema, bytes);
- case STRING: return randomUtf8(random, 40);
- case BYTES: return randomBytes(random, 40);
- case INT: return random.nextInt();
- case LONG: return random.nextLong();
- case FLOAT: return random.nextFloat();
- case DOUBLE: return random.nextDouble();
- case BOOLEAN: return random.nextBoolean();
- case NULL: return null;
- default: throw new RuntimeException("Unknown type: "+schema);
- }
- }
-
- private static Utf8 randomUtf8(Random rand, int maxLength) {
- Utf8 utf8 = new Utf8().setLength(rand.nextInt(maxLength));
- for (int i = 0; i < utf8.getLength(); i++) {
- utf8.getBytes()[i] = (byte)('a'+rand.nextInt('z'-'a'));
- }
- return utf8;
- }
-
- private static ByteBuffer randomBytes(Random rand, int maxLength) {
- ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength));
- bytes.limit(bytes.capacity());
- rand.nextBytes(bytes.array());
- return bytes;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length != 3) {
- System.out.println("Usage: RandomData <schemafile> <outputfile>
<count>");
- System.exit(-1);
- }
- Schema sch = Schema.parse(new File(args[0]));
- DataFileWriter<Object> writer =
- new DataFileWriter<>(new GenericDatumWriter<>())
- .create(sch, new File(args[1]));
- try {
- for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {
- writer.append(datum);
- }
- } finally {
- writer.close();
- }
- }
-}
diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
index cc04e3331..525a8b1cd 100644
--- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
@@ -44,6 +44,7 @@
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.util.RandomData;
import org.apache.avro.util.Utf8;
import org.junit.Rule;
import org.junit.Test;
@@ -665,7 +666,7 @@ private static void check(File dst, String jsonSchema,
boolean induce) throws Ex
Schema schema = Schema.parse(jsonSchema);
checkProp(schema);
Object reuse = null;
- for (Object datum : new RandomData(schema, COUNT)) {
+ for (Object datum : new RandomData(schema, COUNT, true)) {
if (induce) {
Schema induced = GenericData.get().induce(datum);
diff --git
a/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
b/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
index a42846d40..3ea2382d9 100644
---
a/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
+++
b/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
@@ -28,7 +28,7 @@
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.trevni.avro.RandomData;
+import org.apache.avro.util.RandomData;
/** Creates a file filled with randomly-generated instances of a schema. */
public class CreateRandomFileTool implements Tool {
@@ -43,6 +43,7 @@ public String getShortDescription() {
return "Creates a file with randomly generated instances of a schema.";
}
+ @SuppressWarnings("unchecked")
@Override
public int run(InputStream stdin, PrintStream out, PrintStream err,
List<String> args) throws Exception {
@@ -62,6 +63,11 @@ public int run(InputStream stdin, PrintStream out,
PrintStream err,
p.accepts("schema", "Schema")
.withOptionalArg()
.ofType(String.class);
+ OptionSpec<Long> seedOpt =
+ p.accepts("seed", "Seed for random")
+ .withOptionalArg()
+ .ofType(Long.class);
+
OptionSet opts = p.parse(args.toArray(new String[0]));
if (opts.nonOptionArguments().size() != 1) {
err.println("Usage: outFile (filename or '-' for stdout)");
@@ -72,6 +78,7 @@ public int run(InputStream stdin, PrintStream out,
PrintStream err,
String schemastr = inschema.value(opts);
String schemafile = file.value(opts);
+ Long seed = seedOpt.value(opts);
if (schemastr == null && schemafile == null) {
err.println("Need input schema (--schema-file) or (--schema)");
p.printHelpOn(err);
@@ -90,10 +97,13 @@ public int run(InputStream stdin, PrintStream out,
PrintStream err,
if (countValue == null) {
err.println("Need count (--count)");
p.printHelpOn(err);
+ writer.close();
return 1;
}
- for (Object datum : new RandomData(schema, countValue))
+ RandomData rd = seed == null ? new RandomData(schema, countValue) :
+ new RandomData(schema, countValue, seed);
+ for (Object datum : rd)
writer.append(datum);
writer.close();
diff --git
a/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
b/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
index 67245947a..03a474c18 100644
---
a/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
+++
b/lang/java/tools/src/main/java/org/apache/avro/tool/TrevniCreateRandomTool.java
@@ -23,9 +23,9 @@
import java.util.List;
import org.apache.avro.Schema;
+import org.apache.avro.util.RandomData;
import org.apache.trevni.ColumnFileMetaData;
import org.apache.trevni.avro.AvroColumnWriter;
-import org.apache.trevni.avro.RandomData;
/** Tool to create randomly populated Trevni file based on an Avro schema */
public class TrevniCreateRandomTool implements Tool {
@@ -52,7 +52,7 @@ public int run(InputStream stdin, PrintStream out,
PrintStream err,
int count = Integer.parseInt(args.get(1));
File outputFile = new File(args.get(2));
- Schema schema = Schema.parse(schemaFile);
+ Schema schema = new Schema.Parser().parse(schemaFile);
AvroColumnWriter<Object> writer =
new AvroColumnWriter<>(schema, new ColumnFileMetaData());
diff --git
a/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
b/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
index 62d6e9af0..69855c657 100644
---
a/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
+++
b/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
@@ -30,7 +30,7 @@
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
-import org.apache.trevni.avro.RandomData;
+import org.apache.avro.util.RandomData;
import org.apache.trevni.TestUtil;
import org.junit.After;
@@ -48,6 +48,8 @@
private final Schema.Parser schemaParser = new Schema.Parser();
+ private static final long SEED = System.currentTimeMillis();
+
private ByteArrayOutputStream out;
private ByteArrayOutputStream err;
@@ -83,17 +85,18 @@ private void check(String... extraArgs) throws Exception {
args.addAll(Arrays.asList(new String[] {
OUT_FILE.toString(),
"--count", COUNT,
- "--schema-file", SCHEMA_FILE.toString()
+ "--schema-file", SCHEMA_FILE.toString(),
+ "--seed", Long.toString(SEED)
}));
args.addAll(Arrays.asList(extraArgs));
run(args);
DataFileReader<Object> reader =
- new DataFileReader(OUT_FILE, new GenericDatumReader<>());
+ new DataFileReader<Object>(OUT_FILE, new GenericDatumReader<>());
Iterator<Object> found = reader.iterator();
for (Object expected :
- new RandomData(schemaParser.parse(SCHEMA_FILE),
Integer.parseInt(COUNT)))
+ new RandomData(schemaParser.parse(SCHEMA_FILE),
Integer.parseInt(COUNT), SEED))
assertEquals(expected, found.next());
reader.close();
@@ -103,7 +106,8 @@ private void checkMissingCount(String... extraArgs) throws
Exception {
ArrayList<String> args = new ArrayList<>();
args.addAll(Arrays.asList(new String[] {
OUT_FILE.toString(),
- "--schema-file", SCHEMA_FILE.toString()
+ "--schema-file", SCHEMA_FILE.toString(),
+ "--seed", Long.toString(SEED)
}));
args.addAll(Arrays.asList(extraArgs));
run(args);
@@ -129,17 +133,18 @@ public void testMissingCountParameter() throws Exception {
public void testStdOut() throws Exception {
TestUtil.resetRandomSeed();
run(Arrays.asList(new String[]
- { "-", "--count", COUNT, "--schema-file", SCHEMA_FILE.toString()
}));
+ { "-", "--count", COUNT, "--schema-file", SCHEMA_FILE.toString(),
+ "--seed", Long.toString(SEED) }));
byte[] file = out.toByteArray();
DataFileStream<Object> reader =
- new DataFileStream(new ByteArrayInputStream(file),
+ new DataFileStream<Object>(new ByteArrayInputStream(file),
new GenericDatumReader<>());
Iterator<Object> found = reader.iterator();
for (Object expected :
- new RandomData(schemaParser.parse(SCHEMA_FILE),
Integer.parseInt(COUNT)))
+ new RandomData(schemaParser.parse(SCHEMA_FILE),
Integer.parseInt(COUNT), SEED))
assertEquals(expected, found.next());
reader.close();
diff --git
a/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java
b/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java
index 980885f12..7c222c4e3 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java
@@ -26,13 +26,14 @@
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.util.RandomData;
import org.apache.trevni.avro.AvroColumnReader;
-import org.apache.trevni.avro.RandomData;
-
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class TestToTrevniTool {
+ private static final long SEED = System.currentTimeMillis();
+
private static final int COUNT =
Integer.parseInt(System.getProperty("test.count", "200"));
private static final File DIR = new File("/tmp");
@@ -50,12 +51,12 @@ private String run(String... args) throws Exception {
@Test
public void test() throws Exception {
- Schema schema = Schema.parse(SCHEMA_FILE);
+ Schema schema = new Schema.Parser().parse(SCHEMA_FILE);
DataFileWriter<Object> writer =
new DataFileWriter<>(new GenericDatumWriter<>());
writer.create(schema, Util.createFromFS(AVRO_FILE.toString()));
- for (Object datum : new RandomData(schema, COUNT))
+ for (Object datum : new RandomData(schema, COUNT, SEED))
writer.append(datum);
writer.close();
@@ -64,7 +65,7 @@ public void test() throws Exception {
AvroColumnReader<Object> reader =
new AvroColumnReader<>(new AvroColumnReader.Params(TREVNI_FILE));
Iterator<Object> found = reader.iterator();
- for (Object expected : new RandomData(schema, COUNT))
+ for (Object expected : new RandomData(schema, COUNT, SEED))
assertEquals(expected, found.next());
reader.close();
}
diff --git
a/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
b/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
deleted file mode 100644
index 2426d89eb..000000000
--- a/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/RandomData.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.trevni.avro;
-
-import java.io.File;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-
-import org.apache.trevni.TestUtil;
-
-/** Generates schema data as Java objects with random values. */
-public class RandomData implements Iterable<Object> {
- public static final String USE_DEFAULT = "use-default";
-
- private final Schema root;
- private final int count;
-
- public RandomData(Schema schema, int count) {
- this.root = schema;
- this.count = count;
- }
-
- public Iterator<Object> iterator() {
- return new Iterator<Object>() {
- private int n;
- private Random random = TestUtil.createRandom();
- public boolean hasNext() { return n < count; }
- public Object next() {
- n++;
- return generate(root, random, 0);
- }
- public void remove() { throw new UnsupportedOperationException(); }
- };
- }
-
- @SuppressWarnings(value="unchecked")
- private static Object generate(Schema schema, Random random, int d) {
- switch (schema.getType()) {
- case RECORD:
- GenericRecord record = new GenericData.Record(schema);
- for (Schema.Field field : schema.getFields()) {
- Object value = (field.getObjectProp(USE_DEFAULT) == null)
- ? generate(field.schema(), random, d+1)
- : GenericData.get().getDefaultValue(field);
- record.put(field.name(), value);
- }
- return record;
- case ENUM:
- List<String> symbols = schema.getEnumSymbols();
- return new GenericData.EnumSymbol
- (schema, symbols.get(random.nextInt(symbols.size())));
- case ARRAY:
- int length = (random.nextInt(5)+2)-d;
- GenericArray<Object> array =
- new GenericData.Array(length<=0?0:length, schema);
- for (int i = 0; i < length; i++)
- array.add(generate(schema.getElementType(), random, d+1));
- return array;
- case MAP:
- length = (random.nextInt(5)+2)-d;
- Map<Object,Object> map = new HashMap<>(length <= 0 ? 0 : length);
- for (int i = 0; i < length; i++) {
- map.put(TestUtil.randomString(random),
- generate(schema.getValueType(), random, d+1));
- }
- return map;
- case UNION:
- List<Schema> types = schema.getTypes();
- return generate(types.get(random.nextInt(types.size())), random, d);
- case FIXED:
- byte[] bytes = new byte[schema.getFixedSize()];
- random.nextBytes(bytes);
- return new GenericData.Fixed(schema, bytes);
- case STRING: return TestUtil.randomString(random);
- case BYTES: return TestUtil.randomBytes(random);
- case INT: return random.nextInt();
- case LONG: return random.nextLong();
- case FLOAT: return random.nextFloat();
- case DOUBLE: return random.nextDouble();
- case BOOLEAN: return random.nextBoolean();
- case NULL: return null;
- default: throw new RuntimeException("Unknown type: "+schema);
- }
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length != 3) {
- System.out.println("Usage: RandomData <schemafile> <outputfile>
<count>");
- System.exit(-1);
- }
- Schema sch = Schema.parse(new File(args[0]));
- DataFileWriter<Object> writer =
- new DataFileWriter<>(new GenericDatumWriter<>())
- .create(sch, new File(args[1]));
- try {
- for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {
- writer.append(datum);
- }
- } finally {
- writer.close();
- }
- }
-}
diff --git
a/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java
b/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java
index 39d138e6f..62d58ac70 100644
---
a/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java
+++
b/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java
@@ -24,13 +24,13 @@
import org.apache.trevni.ValueType;
import org.apache.trevni.ColumnMetaData;
import org.apache.trevni.ColumnFileMetaData;
-
import org.apache.avro.Schema;
-
+import org.apache.avro.util.RandomData;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestShredder {
+ private static final long SEED = System.currentTimeMillis();
private static final int COUNT = 100;
private static final File FILE = new File("target", "test.trv");
@@ -257,7 +257,7 @@ private void checkWrite(Schema schema) throws IOException {
AvroColumnWriter<Object> writer =
new AvroColumnWriter<>(schema, new ColumnFileMetaData());
int count = 0;
- for (Object datum : new RandomData(schema, COUNT)) {
+ for (Object datum : new RandomData(schema, COUNT, SEED)) {
//System.out.println("datum="+datum);
writer.write(datum);
}
@@ -268,7 +268,7 @@ private void checkRead(Schema schema) throws IOException {
AvroColumnReader<Object> reader =
new AvroColumnReader<>(new AvroColumnReader.Params(FILE)
.setSchema(schema));
- for (Object expected : new RandomData(schema, COUNT))
+ for (Object expected : new RandomData(schema, COUNT, SEED))
assertEquals(expected, reader.next());
reader.close();
}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Duplicate code RandomData.java and its dependency problem
> ---------------------------------------------------------
>
> Key: AVRO-2267
> URL: https://issues.apache.org/jira/browse/AVRO-2267
> Project: Apache Avro
> Issue Type: Improvement
> Components: java
> Reporter: Thiruvalluvan M. G.
> Assignee: Thiruvalluvan M. G.
> Priority: Major
>
> There are two issues with {{RandomData}} class:
> * There are almost identical copies of the same code in two modules:
> {{avro}} and {{avro-ipc}}. We should use a single source file.
> * Both the copies belong to {{test}} subfolders in their respective module.
> But {{avro-tools}} module uses this class in {{main}}.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)