http://git-wip-us.apache.org/repos/asf/asterixdb/blob/cb92dad7/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/input/stream/TwitterFirehoseStreamFactory.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/input/stream/TwitterFirehoseStreamFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/input/stream/TwitterFirehoseStreamFactory.java new file mode 100644 index 0000000..77b8df1 --- /dev/null +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/input/stream/TwitterFirehoseStreamFactory.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.input.stream; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.asterix.external.api.AsterixInputStream; +import org.apache.asterix.external.api.IInputStreamFactory; +import org.apache.asterix.external.input.stream.TwitterFirehoseInputStream; +import org.apache.asterix.runtime.util.ClusterStateManager; +import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.HyracksDataException; + +/** + * Factory class for creating @see{TwitterFirehoseFeedAdapter}. The adapter + * simulates a twitter firehose with tweets being "pushed" into Asterix at a + * configurable rate measured in terms of TPS (tweets/second). The stream of + * tweets lasts for a configurable duration (measured in seconds). + */ +public class TwitterFirehoseStreamFactory implements IInputStreamFactory { + + private static final long serialVersionUID = 1L; + + /** + * Degree of parallelism for feed ingestion activity. Defaults to 1. This + * determines the count constraint for the ingestion operator. + **/ + private static final String KEY_INGESTION_CARDINALITY = "ingestion-cardinality"; + + /** + * The absolute locations where ingestion operator instances will be placed. + **/ + private static final String KEY_INGESTION_LOCATIONS = "ingestion-location"; + + private Map<String, String> configuration; + + @Override + public AlgebricksAbsolutePartitionConstraint getPartitionConstraint() { + String ingestionCardinalityParam = configuration.get(KEY_INGESTION_CARDINALITY); + String ingestionLocationParam = configuration.get(KEY_INGESTION_LOCATIONS); + String[] locations = null; + if (ingestionLocationParam != null) { + locations = ingestionLocationParam.split(","); + } + int count = locations != null ? locations.length : 1; + if (ingestionCardinalityParam != null) { + count = Integer.parseInt(ingestionCardinalityParam); + } + + List<String> chosenLocations = new ArrayList<>(); + String[] availableLocations = locations != null ? locations + : ClusterStateManager.INSTANCE.getParticipantNodes().toArray(new String[] {}); + for (int i = 0, k = 0; i < count; i++, k = (k + 1) % availableLocations.length) { + chosenLocations.add(availableLocations[k]); + } + return new AlgebricksAbsolutePartitionConstraint(chosenLocations.toArray(new String[] {})); + } + + @Override + public DataSourceType getDataSourceType() { + return DataSourceType.STREAM; + } + + @Override + public void configure(Map<String, String> configuration) { + this.configuration = configuration; + } + + @Override + public boolean isIndexible() { + return false; + } + + @Override + public AsterixInputStream createInputStream(IHyracksTaskContext ctx, int partition) throws HyracksDataException { + try { + return new TwitterFirehoseInputStream(configuration, ctx, partition); + } catch (IOException e) { + throw new HyracksDataException(e); + } + } +}
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/cb92dad7/asterixdb/asterix-tools/pom.xml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-tools/pom.xml b/asterixdb/asterix-tools/pom.xml index 011d44b..c039c48 100644 --- a/asterixdb/asterix-tools/pom.xml +++ b/asterixdb/asterix-tools/pom.xml @@ -54,6 +54,14 @@ <include>**/DEPENDENCIES*</include> </includes> </configuration> + <executions> + <execution> + <goals> + <goal>test-jar</goal> + </goals> + <phase>package</phase> + </execution> + </executions> </plugin> </plugins> </build> @@ -63,19 +71,20 @@ <groupId>org.apache.asterix</groupId> <artifactId>asterix-lang-aql</artifactId> <version>${project.version}</version> - <scope>compile</scope> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.asterix</groupId> <artifactId>asterix-algebra</artifactId> <version>${project.version}</version> - <scope>compile</scope> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.asterix</groupId> <artifactId>asterix-external-data</artifactId> <version>${project.version}</version> - <scope>compile</scope> + <type>test-jar</type> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.asterix</groupId> @@ -88,8 +97,7 @@ <groupId>org.apache.asterix</groupId> <artifactId>asterix-metadata</artifactId> <version>${project.version}</version> - <scope>compile</scope> + <scope>test</scope> </dependency> </dependencies> - </project> http://git-wip-us.apache.org/repos/asf/asterixdb/blob/cb92dad7/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdgClientDriver.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdgClientDriver.java b/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdgClientDriver.java deleted file mode 100644 index 8a1adb0..0000000 --- a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdgClientDriver.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.asterix.tools.datagen; - -import java.io.File; - -import org.kohsuke.args4j.Argument; -import org.kohsuke.args4j.CmdLineParser; - -public class AdgClientDriver { - - public static final int NUMBER_OF_ARGUMENTS = 2; - - public static class AdgClientConfig { - - @Argument(index = 0, required = true, metaVar = "ARG1", usage = "The file containing the annotated schema.") - private File schemaFile; - - @Argument(index = 1, required = true, metaVar = "ARG2", usage = "The output directory path.") - private File outputDir; - } - - public static void main(String[] args) throws Exception { - AdgClientConfig acc = new AdgClientConfig(); - CmdLineParser cmdParser = new CmdLineParser(acc); - try { - cmdParser.parseArgument(args); - } catch (Exception e) { - cmdParser.printUsage(System.err); - throw e; - } - AdmDataGen adg = new AdmDataGen(acc.schemaFile, acc.outputDir); - adg.init(); - adg.dataGen(); - } -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/cb92dad7/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdmDataGen.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdmDataGen.java b/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdmDataGen.java deleted file mode 100644 index 2918575..0000000 --- a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/AdmDataGen.java +++ /dev/null @@ -1,1020 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.asterix.tools.datagen; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import org.apache.asterix.common.annotations.AutoDataGen; -import org.apache.asterix.common.annotations.DateBetweenYearsDataGen; -import org.apache.asterix.common.annotations.DatetimeAddRandHoursDataGen; -import org.apache.asterix.common.annotations.DatetimeBetweenYearsDataGen; -import org.apache.asterix.common.annotations.FieldIntervalDataGen; -import org.apache.asterix.common.annotations.FieldValFileDataGen; -import org.apache.asterix.common.annotations.FieldValFileSameIndexDataGen; -import org.apache.asterix.common.annotations.IRecordFieldDataGen; -import org.apache.asterix.common.annotations.IRecordTypeAnnotation; -import org.apache.asterix.common.annotations.IRecordTypeAnnotation.Kind; -import org.apache.asterix.common.annotations.InsertRandIntDataGen; -import org.apache.asterix.common.annotations.ListDataGen; -import org.apache.asterix.common.annotations.ListValFileDataGen; -import org.apache.asterix.common.annotations.RecordDataGenAnnotation; -import org.apache.asterix.common.annotations.TypeDataGen; -import org.apache.asterix.common.annotations.UndeclaredFieldsDataGen; -import org.apache.asterix.common.exceptions.ACIDException; -import org.apache.asterix.common.exceptions.AsterixException; -import org.apache.asterix.common.transactions.JobId; -import org.apache.asterix.lang.aql.parser.AQLParserFactory; -import org.apache.asterix.lang.aql.parser.ParseException; -import org.apache.asterix.lang.common.base.IParser; -import org.apache.asterix.lang.common.base.IParserFactory; -import org.apache.asterix.lang.common.base.Statement; -import org.apache.asterix.metadata.MetadataException; -import org.apache.asterix.metadata.MetadataTransactionContext; -import org.apache.asterix.om.types.ARecordType; -import org.apache.asterix.om.types.ATypeTag; -import org.apache.asterix.om.types.AUnionType; -import org.apache.asterix.om.types.AbstractCollectionType; -import org.apache.asterix.om.types.BuiltinType; -import org.apache.asterix.om.types.IAType; -import org.apache.asterix.om.types.TypeSignature; -import org.apache.asterix.om.util.NonTaggedFormatUtil; -import org.apache.asterix.tools.translator.ADGenDmlTranslator; -import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; -import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException; -import org.apache.hyracks.algebricks.data.utils.WriteValueTools; - -public class AdmDataGen { - - class DataGeneratorContext { - private Map<File, String[]> fileToLoadedDataMap = new HashMap<File, String[]>(); - - public Map<File, String[]> getFileToLoadedDataMap() { - return fileToLoadedDataMap; - } - } - - public static void printDate(int year, int month, int day, PrintStream out) throws IOException { - WriteValueTools.writeInt(year, out); - out.print("-"); - if (month < 10) { - out.print("0"); - } - WriteValueTools.writeInt(month, out); - out.print("-"); - if (day < 10) { - out.print("0"); - } - WriteValueTools.writeInt(day, out); - } - - abstract class AbstractValueGenerator { - protected PrintStream out; - protected DataGeneratorContext ctx; - - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - this.out = out; - this.ctx = ctx; - } - - public abstract void generate() throws IOException; - } - - abstract class RandomValueGenerator extends AbstractValueGenerator { - protected Random rnd; - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - this.rnd = new Random(51); - } - - } - - class IntAutoGenerator extends AbstractValueGenerator { - - private final int initValue; - private int val; - - public IntAutoGenerator(int initValue) { - this.initValue = initValue; - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - val = initValue; - } - - @Override - public void generate() throws IOException { - WriteValueTools.writeInt(val, out); - val++; - } - - } - - class LongAutoGenerator extends AbstractValueGenerator { - - private final long initValue; - private long val; - - public LongAutoGenerator(long initValue) { - this.initValue = initValue; - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - val = initValue; - } - - @Override - public void generate() throws IOException { - WriteValueTools.writeLong(val, out); - out.print("i64"); - val++; - } - - } - - class StringFromArrayGenerator extends RandomValueGenerator { - private final String[][] array; - private int lastIndex; - private final String constructor; - private String[] chosen; - - public StringFromArrayGenerator(String[][] array, String constructor) { - this.array = array; - this.constructor = constructor; - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - this.lastIndex = -1; - this.chosen = new String[array.length]; - } - - @Override - public void generate() throws IOException { - for (int i = 0; i < array.length; i++) { - lastIndex = Math.abs(rnd.nextInt()) % array[i].length; - chosen[i] = array[i][lastIndex]; - } - if (constructor != null) { - out.print(constructor); - out.print("("); - } - out.print("\""); - for (int i = 0; i < chosen.length; i++) { - out.print(chosen[i]); - } - out.print("\""); - if (constructor != null) { - out.print(")"); - } - } - - public int getLastIndex() { - return lastIndex; - } - } - - abstract class AbstractCollectionDataGenerator extends RandomValueGenerator { - protected final AbstractCollectionType act; - protected final int min; - protected final int max; - protected final String startList; - protected final String endList; - - public AbstractCollectionDataGenerator(AbstractCollectionType act, int min, int max) { - this.act = act; - this.min = min; - this.max = max; - if (act.getTypeTag() == ATypeTag.ORDEREDLIST) { - startList = "["; - endList = "]"; - } else { - startList = "{{"; - endList = "}}"; - } - } - - } - - class ListDataGenerator extends AbstractCollectionDataGenerator { - - private AbstractValueGenerator itemGen; - - public ListDataGenerator(AbstractCollectionType act, int min, int max) { - super(act, min, max); - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - IAType t = act.getItemType(); - if (t.getTypeTag() != ATypeTag.RECORD) { - throw new NotImplementedException("list annotation only works with record item types for now."); - } - ARecordType rt = (ARecordType) t; - RecordDataGenAnnotation dga = firstDataGenAnnotation(rt); - if (dga == null) { - throw new Exception("No annotation on item type " + t); - } - itemGen = new RecordGenerator(rt, dga, null); - itemGen.init(out, ctx); - } - - @Override - public void generate() throws IOException { - out.print(startList); - int numItems = rnd.nextInt(max - min + 1) + min; - for (int i = 0; i < numItems; i++) { - if (i > 0) { - out.print(", "); - } - itemGen.generate(); - } - out.print(endList); - } - - } - - class ListFromArrayGenerator extends AbstractCollectionDataGenerator { - - private final String[] array; - private String constructor; - - public ListFromArrayGenerator(AbstractCollectionType act, String[] array, int min, int max) { - super(act, min, max); - this.array = array; - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - if (act.getItemType().getTypeTag() == ATypeTag.STRING) { - constructor = null; - } else { - constructor = getConstructor(act.getItemType()); - } - } - - @Override - public void generate() throws IOException { - out.print(startList); - int numItems = rnd.nextInt(max - min + 1) + min; - for (int i = 0; i < numItems; i++) { - if (i > 0) { - out.print(", "); - } - int c = Math.abs(rnd.nextInt()) % array.length; - if (constructor != null) { - out.print(constructor); - out.print("("); - } - out.print("\""); - out.print(array[c]); - out.print("\""); - if (constructor != null) { - out.print(")"); - } - } - out.print(endList); - } - - } - - class StringFromArraySameIndexGenerator extends AbstractValueGenerator { - private final String[] array; - private final StringFromArrayGenerator sfag; - private final String constructor; - - public StringFromArraySameIndexGenerator(String[] array, StringFromArrayGenerator sfag, String constructor) { - this.array = array; - this.sfag = sfag; - this.constructor = constructor; - } - - @Override - public void generate() throws IOException { - String val = array[sfag.getLastIndex()]; - if (constructor != null) { - out.print(constructor); - out.print("("); - } - out.print("\""); - out.print(val); - out.print("\""); - if (constructor != null) { - out.print(")"); - } - } - } - - class IntIntervalGenerator extends RandomValueGenerator { - - private final int min; - private final int max; - private final String prefix; - private final String suffix; - - public IntIntervalGenerator(int min, int max, String prefix, String suffix) { - this.min = min; - this.max = max; - this.prefix = prefix; - this.suffix = suffix; - } - - @Override - public void generate() throws IOException { - int v = Math.abs(rnd.nextInt() % (max - min + 1)) + min; - if (prefix != null) { - out.print(prefix); - } - WriteValueTools.writeInt(v, out); - if (suffix != null) { - out.print(suffix); - } - } - - } - - class LongIntervalGenerator extends RandomValueGenerator { - - private final long min; - private final long max; - private final String prefix; - private final String suffix; - - public LongIntervalGenerator(long min, long max, String prefix, String suffix) { - this.min = min; - this.max = max; - this.prefix = prefix; - this.suffix = suffix; - } - - @Override - public void generate() throws IOException { - long v = Math.abs(rnd.nextLong() % (max - min + 1)) + min; - if (prefix != null) { - out.print(prefix); - } - WriteValueTools.writeLong(v, out); - if (suffix != null) { - out.print(suffix); - } - } - - } - - class DoubleIntervalGenerator extends RandomValueGenerator { - - private final double min; - private final double max; - private final String prefix; - private final String suffix; - - public DoubleIntervalGenerator(double min, double max, String prefix, String suffix) { - this.min = min; - this.max = max; - this.prefix = prefix; - this.suffix = suffix; - } - - @Override - public void generate() throws IOException { - double v = Math.abs(rnd.nextDouble() % (max - min)) + min; - if (prefix != null) { - out.print(prefix); - } - out.print(v); - if (suffix != null) { - out.print(suffix); - } - } - - } - - class InsertRandIntGenerator extends RandomValueGenerator { - - private final String str1; - private final String str2; - - public InsertRandIntGenerator(String str1, String str2) { - this.str1 = "\"" + str1; - this.str2 = str2 + "\""; - } - - @Override - public void generate() throws IOException { - int v = Math.abs(rnd.nextInt()); - out.print(str1); - WriteValueTools.writeInt(v, out); - out.print(str2); - } - - } - - interface AccessibleDateGenerator { - public int getYear(); - - public int getMonth(); - - public int getDay(); - } - - abstract class DateGenerator extends RandomValueGenerator implements AccessibleDateGenerator { - - private int year; - private int month; - private int day; - - protected void generateDate(int minYear, int maxYear) throws IOException { - year = rnd.nextInt(maxYear - minYear + 1) + minYear; - month = Math.abs(rnd.nextInt()) % 12 + 1; - day = Math.abs(rnd.nextInt()) % 28 + 1; - printDate(year, month, day, out); - } - - @Override - public int getYear() { - return year; - } - - @Override - public int getMonth() { - return month; - } - - @Override - public int getDay() { - return day; - } - } - - class DateBetweenYearsGenerator extends DateGenerator { - - private final int minYear; - private final int maxYear; - - public DateBetweenYearsGenerator(int minYear, int maxYear) { - this.minYear = minYear; - this.maxYear = maxYear; - } - - @Override - public void generate() throws IOException { - out.print("date(\""); - generateDate(minYear, maxYear); - out.print("\")"); - } - - } - - interface AccessibleDatetimeGenerator extends AccessibleDateGenerator { - public int getHour(); - - public int getMinute(); - - public int getSecond(); - } - - class DatetimeBetweenYearsGenerator extends DateGenerator implements AccessibleDatetimeGenerator { - private final int minYear; - private final int maxYear; - private int hour; - - public DatetimeBetweenYearsGenerator(int minYear, int maxYear) { - this.minYear = minYear; - this.maxYear = maxYear; - } - - @Override - public void generate() throws IOException { - out.print("datetime(\""); - generateDate(minYear, maxYear); - out.print("T"); - hour = rnd.nextInt(24); - if (hour < 10) { - out.print("0"); - } - WriteValueTools.writeInt(hour, out); - out.print(":00:00\")"); - } - - @Override - public int getHour() { - return hour; - } - - @Override - public int getMinute() { - return 0; - } - - @Override - public int getSecond() { - return 0; - } - - } - - class DatetimeAddRandHoursGenerator extends RandomValueGenerator { - - private final int minHours; - private final int maxHours; - private final AccessibleDatetimeGenerator adtg; - - public DatetimeAddRandHoursGenerator(int minHours, int maxHours, AccessibleDatetimeGenerator adtg) { - this.minHours = minHours; - this.maxHours = maxHours; - this.adtg = adtg; - } - - @Override - public void generate() throws IOException { - int addHours = rnd.nextInt(maxHours - minHours + 1) + minHours; - - out.print("datetime(\""); - printDate(adtg.getYear(), adtg.getMonth(), adtg.getDay(), out); - out.print("T"); - int h = adtg.getHour() + addHours; - if (h > 23) { - h = 23; - } - if (h < 10) { - out.print("0"); - } - WriteValueTools.writeInt(h, out); - out.print(":"); - int m = adtg.getMinute(); - if (m < 10) { - out.print("0"); - } - WriteValueTools.writeInt(m, out); - out.print(":"); - int s = adtg.getSecond(); - if (s < 10) { - out.print("0"); - } - WriteValueTools.writeInt(s, out); - out.print("\")"); - } - - } - - class GenFieldsIntGenerator extends RandomValueGenerator { - private final int minFields; - private final int maxFields; - private final String prefix; - private final int startIndex; - private String[] fieldNames; - private int[] id; - private int[] x; - - public GenFieldsIntGenerator(int startIndex, int minFields, int maxFields, String prefix) { - this.startIndex = startIndex; - this.minFields = minFields; - this.maxFields = maxFields; - this.prefix = prefix; - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - fieldNames = new String[maxFields]; - for (int i = 0; i < maxFields; i++) { - fieldNames[i] = prefix + "_" + i; - } - id = new int[maxFields]; - for (int i = 0; i < maxFields; i++) { - id[i] = i; - } - x = new int[maxFields]; - } - - @Override - public void generate() throws IOException { - int numFields = rnd.nextInt(maxFields - minFields + 1) + minFields; - System.arraycopy(id, 0, x, 0, maxFields); - int n = numFields; - boolean first = startIndex < 1; - while (n > 0) { - int c = rnd.nextInt(n); - if (first) { - first = false; - } else { - out.print(","); - } - out.print(" \""); - out.print(fieldNames[x[c]]); - out.print("\": "); - WriteValueTools.writeInt(Math.abs(rnd.nextInt()), out); - x[c] = x[n - 1]; - n--; - } - } - } - - class RecordGenerator extends RandomValueGenerator { - - private AbstractValueGenerator[] declaredFieldsGenerators; - private boolean[] nullable; - private AbstractValueGenerator undeclaredFieldsGenerator; - private final ARecordType recType; - private final RecordDataGenAnnotation annot; - private final String printAfter; - - public RecordGenerator(ARecordType type, RecordDataGenAnnotation annot, String printAfter) { - this.recType = type; - this.annot = annot; - this.printAfter = printAfter; - } - - @Override - public void init(PrintStream out, DataGeneratorContext ctx) throws Exception { - super.init(out, ctx); - if (declaredFieldsGenerators == null) { - int m = annot.getDeclaredFieldsDatagen().length; - declaredFieldsGenerators = new AbstractValueGenerator[m]; - nullable = new boolean[m]; - for (int i = 0; i < m; i++) { - IAType ti = recType.getFieldTypes()[i]; - if (NonTaggedFormatUtil.isOptional(ti)) { - ti = ((AUnionType) ti).getActualType(); - nullable[i] = true; - } - IRecordFieldDataGen rfdg = annot.getDeclaredFieldsDatagen()[i]; - if (rfdg == null) { - if (ti.getTypeTag() == ATypeTag.RECORD) { - ARecordType rt = (ARecordType) ti; - RecordDataGenAnnotation dga = null; - for (IRecordTypeAnnotation annot : rt.getAnnotations()) { - if (annot.getKind() == Kind.RECORD_DATA_GEN) { - dga = (RecordDataGenAnnotation) annot; - break; - } - } - if (dga != null) { - declaredFieldsGenerators[i] = new RecordGenerator(rt, dga, null); - continue; - } - } - if (declaredFieldsGenerators[i] == null) { - throw new Exception("No data generator annotation for field " + recType.getFieldNames()[i] - + " in type " + recType); - } - } - switch (rfdg.getKind()) { - case VALFILE: { - FieldValFileDataGen vf = (FieldValFileDataGen) rfdg; - int numFiles = vf.getFiles().length; - String[][] a = new String[numFiles][]; - for (int k = 0; k < numFiles; k++) { - File f = vf.getFiles()[k]; - a[k] = ctx.getFileToLoadedDataMap().get(f); - if (a[k] == null) { - a[k] = readFileAsStringArray(f); - ctx.getFileToLoadedDataMap().put(f, a[k]); - } - } - String constructor; - if (ti.getTypeTag() == ATypeTag.STRING) { - constructor = null; - } else { - constructor = getConstructor(ti); - } - declaredFieldsGenerators[i] = new StringFromArrayGenerator(a, constructor); - break; - } - case LISTVALFILE: { - ListValFileDataGen lvf = (ListValFileDataGen) rfdg; - String[] a = ctx.getFileToLoadedDataMap().get(lvf.getFile()); - if (a == null) { - a = readFileAsStringArray(lvf.getFile()); - ctx.getFileToLoadedDataMap().put(lvf.getFile(), a); - } - if (ti.getTypeTag() != ATypeTag.ORDEREDLIST && ti.getTypeTag() != ATypeTag.UNORDEREDLIST) { - throw new Exception( - "list-val-file annotation cannot be used for field of type " + ti.getTypeTag()); - } - AbstractCollectionType act = (AbstractCollectionType) ti; - declaredFieldsGenerators[i] = new ListFromArrayGenerator(act, a, lvf.getMin(), - lvf.getMax()); - break; - } - case VALFILESAMEINDEX: { - FieldValFileSameIndexDataGen vfsi = (FieldValFileSameIndexDataGen) rfdg; - String[] a = ctx.getFileToLoadedDataMap().get(vfsi.getFile()); - if (a == null) { - a = readFileAsStringArray(vfsi.getFile()); - ctx.getFileToLoadedDataMap().put(vfsi.getFile(), a); - } - StringFromArrayGenerator sfag = null; - for (int j = 0; j < i; j++) { - if (recType.getFieldNames()[j].equals(vfsi.getSameAsField())) { - if (declaredFieldsGenerators[j] instanceof StringFromArrayGenerator) { - sfag = (StringFromArrayGenerator) declaredFieldsGenerators[j]; - break; - } else { - throw new Exception("Data generator for field " + recType.getFieldNames()[j] - + " is not based on values from a text file, as required by generator for field " - + recType.getFieldNames()[i]); - } - } - } - if (sfag == null) { - throw new Exception("Couldn't find field " + vfsi.getSameAsField() + " before field " - + recType.getFieldNames()[i]); - } - String constructor; - if (ti.getTypeTag() == ATypeTag.STRING) { - constructor = null; - } else { - constructor = getConstructor(ti); - } - declaredFieldsGenerators[i] = new StringFromArraySameIndexGenerator(a, sfag, constructor); - break; - } - case INTERVAL: { - FieldIntervalDataGen fi = (FieldIntervalDataGen) rfdg; - String prefix = null; - String suffix = null; - if (ti.getTypeTag() == ATypeTag.STRING) { - prefix = "\""; - suffix = "\""; - } - switch (fi.getValueType()) { - case INT: { - declaredFieldsGenerators[i] = new IntIntervalGenerator( - Integer.parseInt(fi.getMin()), Integer.parseInt(fi.getMax()), prefix, - suffix); - break; - } - case LONG: { - declaredFieldsGenerators[i] = new LongIntervalGenerator(Long.parseLong(fi.getMin()), - Long.parseLong(fi.getMax()), prefix, suffix); - break; - } - case DOUBLE: { - declaredFieldsGenerators[i] = new DoubleIntervalGenerator( - Double.parseDouble(fi.getMin()), Double.parseDouble(fi.getMax()), prefix, - suffix); - break; - } - default: { - throw new NotImplementedException(); - } - } - break; - } - case INSERTRANDINT: { - InsertRandIntDataGen iri = (InsertRandIntDataGen) rfdg; - declaredFieldsGenerators[i] = new InsertRandIntGenerator(iri.getStr1(), iri.getStr2()); - break; - } - case LIST: { - ListDataGen l = (ListDataGen) rfdg; - if (ti.getTypeTag() != ATypeTag.ORDEREDLIST && ti.getTypeTag() != ATypeTag.UNORDEREDLIST) { - throw new Exception( - "list-val-file annotation cannot be used for field of type " + ti.getTypeTag()); - } - AbstractCollectionType act = (AbstractCollectionType) ti; - declaredFieldsGenerators[i] = new ListDataGenerator(act, l.getMin(), l.getMax()); - break; - } - case DATEBETWEENYEARS: { - DateBetweenYearsDataGen dby = (DateBetweenYearsDataGen) rfdg; - declaredFieldsGenerators[i] = new DateBetweenYearsGenerator(dby.getMinYear(), - dby.getMaxYear()); - break; - } - case DATETIMEBETWEENYEARS: { - DatetimeBetweenYearsDataGen dtby = (DatetimeBetweenYearsDataGen) rfdg; - declaredFieldsGenerators[i] = new DatetimeBetweenYearsGenerator(dtby.getMinYear(), - dtby.getMaxYear()); - break; - } - case DATETIMEADDRANDHOURS: { - DatetimeAddRandHoursDataGen dtarh = (DatetimeAddRandHoursDataGen) rfdg; - AccessibleDatetimeGenerator adtg = null; - for (int j = 0; j < i; j++) { - if (recType.getFieldNames()[j].equals(dtarh.getAddToField())) { - if (declaredFieldsGenerators[j] instanceof AccessibleDatetimeGenerator) { - adtg = (AccessibleDatetimeGenerator) declaredFieldsGenerators[j]; - break; - } else { - throw new Exception("Data generator for field " + recType.getFieldNames()[j] - + " is not based on accessible datetime values, as required by generator for field " - + recType.getFieldNames()[i]); - } - } - } - if (adtg == null) { - throw new Exception("Couldn't find field " + dtarh.getAddToField() + " before field " - + recType.getFieldNames()[i]); - } - declaredFieldsGenerators[i] = new DatetimeAddRandHoursGenerator(dtarh.getMinHour(), - dtarh.getMaxHour(), adtg); - break; - } - case AUTO: { - AutoDataGen auto = (AutoDataGen) rfdg; - switch (ti.getTypeTag()) { - case INT32: { - declaredFieldsGenerators[i] = new IntAutoGenerator( - Integer.parseInt(auto.getInitValueStr())); - break; - } - case INT64: { - declaredFieldsGenerators[i] = new LongAutoGenerator( - Long.parseLong(auto.getInitValueStr())); - break; - } - default: { - throw new IllegalStateException(rfdg.getKind() - + " annotation is not implemented for type " + ti.getTypeTag()); - } - } - break; - } - default: { - throw new NotImplementedException(rfdg.getKind() + " is not implemented"); - } - } - } - } - for (int i = 0; i < declaredFieldsGenerators.length; i++) { - declaredFieldsGenerators[i].init(out, ctx); - } - if (undeclaredFieldsGenerator == null) { - UndeclaredFieldsDataGen ufdg = annot.getUndeclaredFieldsDataGen(); - if (ufdg != null) { - if (!recType.isOpen()) { - throw new Exception("Cannot generate undeclared fields for closed type " + recType); - } - undeclaredFieldsGenerator = new GenFieldsIntGenerator(declaredFieldsGenerators.length, - ufdg.getMinUndeclaredFields(), ufdg.getMaxUndeclaredFields(), - ufdg.getUndeclaredFieldsPrefix()); - } - } - if (undeclaredFieldsGenerator != null) { - undeclaredFieldsGenerator.init(out, ctx); - } - } - - @Override - public void generate() throws IOException { - out.print("{"); - boolean first = true; - for (int i = 0; i < declaredFieldsGenerators.length; i++) { - boolean pick; - if (nullable[i]) { - pick = rnd.nextBoolean(); - } else { - pick = true; - } - if (pick) { - if (first) { - first = false; - } else { - out.print(","); - } - out.print(" \""); - out.print(this.recType.getFieldNames()[i]); - out.print("\": "); - declaredFieldsGenerators[i].generate(); - } - } - if (undeclaredFieldsGenerator != null) { - undeclaredFieldsGenerator.generate(); - } - out.print(" }"); - if (printAfter != null) { - out.print(printAfter); - } - } - - } - - private final File schemaFile; - private final File outputDir; - private Map<TypeSignature, IAType> typeMap; - private Map<TypeSignature, TypeDataGen> typeAnnotMap; - private DataGeneratorContext dgCtx; - private final IParserFactory parserFactory = new AQLParserFactory(); - - public AdmDataGen(File schemaFile, File outputDir) { - this.schemaFile = schemaFile; - this.outputDir = outputDir; - } - - public void init() throws IOException, ParseException, AsterixException, ACIDException, MetadataException, - AlgebricksException { - FileReader aql = new FileReader(schemaFile); - IParser parser = parserFactory.createParser(aql); - List<Statement> statements = parser.parse(); - aql.close(); - // TODO: Need to fix how to use transactions here. - MetadataTransactionContext mdTxnCtx = new MetadataTransactionContext(new JobId(-1)); - ADGenDmlTranslator dmlt = new ADGenDmlTranslator(mdTxnCtx, statements); - dmlt.translate(); - typeMap = dmlt.getTypeMap(); - typeAnnotMap = dmlt.getTypeDataGenMap(); - dgCtx = new DataGeneratorContext(); - } - - public void dataGen() throws Exception { - for (Map.Entry<TypeSignature, IAType> me : typeMap.entrySet()) { - TypeSignature tn = me.getKey(); - TypeDataGen tdg = typeAnnotMap.get(tn); - if (tdg.isDataGen()) { - IAType t = me.getValue(); - - if (t.getTypeTag() != ATypeTag.RECORD) { - throw new NotImplementedException(); - } - ARecordType rt = (ARecordType) t; - RecordDataGenAnnotation dga = firstDataGenAnnotation(rt); - if (dga == null) { - throw new Exception("No data generator annotations for type " + tn); - } - File outFile = new File(outputDir + File.separator + tdg.getOutputFileName()); - PrintStream outStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outFile))); - RecordGenerator rg = new RecordGenerator(rt, dga, "\n"); - rg.init(outStream, dgCtx); - for (long i = 0; i < tdg.getNumValues(); i++) { - rg.generate(); - } - outStream.close(); - } - } - } - - private static RecordDataGenAnnotation firstDataGenAnnotation(ARecordType rt) { - for (IRecordTypeAnnotation annot : rt.getAnnotations()) { - if (annot.getKind() == Kind.RECORD_DATA_GEN) { - return (RecordDataGenAnnotation) annot; - } - } - return null; - } - - private static String[] readFileAsStringArray(File file) throws IOException { - List<String> tmp = new ArrayList<String>(); - FileInputStream fstream = new FileInputStream(file); - DataInputStream in = new DataInputStream(fstream); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - String strLine; - while ((strLine = br.readLine()) != null) { - tmp.add(strLine); - } - in.close(); - return tmp.toArray(new String[0]); - } - - private static String getConstructor(IAType t) throws Exception { - if (t instanceof BuiltinType) { - String s = ((BuiltinType) t).getConstructor(); - if (s == null) { - throw new Exception("Type " + t + " has no constructors."); - } - return s; - } else { - throw new Exception("No string constructor for type " + t); - } - } - -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/cb92dad7/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/CustOrdDataGen.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/CustOrdDataGen.java b/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/CustOrdDataGen.java deleted file mode 100644 index b605f73..0000000 --- a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/CustOrdDataGen.java +++ /dev/null @@ -1,476 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.asterix.tools.datagen; - -import java.io.BufferedReader; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; - -public class CustOrdDataGen { - - // for customers - private static final String FIRST_NAMES_FILE_NAME = "/opt/us_census_names/dist.all.first.cleaned"; - private static final String LAST_NAMES_FILE_NAME = "/opt/us_census_names/dist.all.last.cleaned"; - private static final int MIN_AGE = 10; - private static final int MAX_AGE = 90; - - private static final String[] STREETS = { "Main St.", "Oak St.", "7th St.", "Washington St.", "Cedar St.", - "Lake St.", "Hill St.", "Park St.", "View St." }; - private static final int MIN_STREET_NUM = 1; - private static final int MAX_STREET_NUM = 10000; - private static final String[] CITIES = { "Seattle", "San Jose", "Mountain View", "Los Angeles", "Sunnyvale", - "Portland" }; - - private static final int MIN_INTERESTS = 0; - private static final int MAX_INTERESTS = 5; - private String[] INTERESTS = { "Bass", "Music", "Databases", "Fishing", "Tennis", "Squash", "Computers", "Books", - "Movies", "Cigars", "Wine", "Running", "Walking", "Skiing", "Basketball", "Video Games", "Cooking", - "Coffee", "Base Jumping", "Puzzles", "Chess", "Programming", "Reddit", "Soccer", "Hockey", "Money", - "Dancing", "Brewing", "Gardening", "Hacking", "Reading" }; - - private static final int MIN_CHILD_AGE = 0; - - private static final int MIN_CHILDREN = 0; - private static final int MAX_CHILDREN = 5; - - // for orders - private static final int MIN_ORDERS_PER_CUST = 1; - private static final int MAX_ORDERS_PER_CUST = 10; - - private String[] CLERKS = { "Kathrin", "Katherine", "Cathryn", "Catherine", "Cat", "Kathryne", "Cathrin" }; - private String[] ORDER_PRIORITIES = { "LOW", "MEDIUM", "HIGH", "PREMIUM" }; - private String[] ORDER_STATUSES = { "ORDER_PLACED", "PAYMENT_RECEIVED", "ORDER_SHIPPED", "ORDER_DELIVERED" }; - - private String[] firstNames = { "Joe", "John", "Jill", "Gill", "Bill", "William", "Kathy", "Cathey", "Jane", - "Albert" }; - private String[] lastNames = { "Doe", "Smith", "Li", "Singh", "Williams", "Davis", "Brown", "Wilson", "Moore", - "Thomas" }; - - private static final String[] UNDECLARED_FIELD_NAMES = { "param1", "param2", "param3", "param4", "param5", - "param6", "param7", "param8", "param9", "param10" }; - - private int currentCID = 0; - private int currentOID = 0; - - private Random rndValue = new Random(50); - - private Random rndChildAgeField = new Random(50); - private Random rndCustAgeField = new Random(50); - private Random rndCustAddressField = new Random(50); - - private Random[] rndUndeclaredOrderFields; - - private class Child { - public String name; - public int age; - - public void generateFieldValues(String lastName, int maxChildAge) { - // name - int firstNameIx = Math.abs(rndValue.nextInt()) % firstNames.length; - name = firstNames[firstNameIx] + " " + lastName; - - // age - age = -1; - if (rndChildAgeField.nextBoolean()) { - if (maxChildAge >= 0) { - if (maxChildAge == MIN_CHILD_AGE) { - age = maxChildAge; - } else { - age = Math.abs((rndValue.nextInt()) % (maxChildAge - MIN_CHILD_AGE)) + MIN_AGE; - } - } - } - } - - public String getJSON() { - StringBuilder jsonString = new StringBuilder(); - - jsonString.append("{ "); // start child - - // name - jsonString.append(" \"name\": "); - jsonString.append("\"" + name + "\""); - - // age - if (age >= 0) { - jsonString.append(", "); - jsonString.append(" \"age\": "); - jsonString.append(age); - } - - jsonString.append(" }"); // end child - - return jsonString.toString(); - } - - } - - private class Customer { - - private int cid; - private String name; - private int age; - - private String streetName; - private int streetNumber; - private String city; - - private int[] custInterests; - private Child[] custChildren; - - public void generateFieldValues() { - cid = currentCID++; - - int firstNameIx = Math.abs(rndValue.nextInt()) % firstNames.length; - int lastNameIx = Math.abs(rndValue.nextInt()) % lastNames.length; - name = firstNames[firstNameIx] + " " + lastNames[lastNameIx]; - - if (rndCustAgeField.nextBoolean()) { - age = Math.abs((rndValue.nextInt()) % (MAX_AGE - MIN_AGE)) + MIN_AGE; - } else { - age = -1; - } - - if (rndCustAddressField.nextBoolean()) { - streetNumber = Math.abs(rndValue.nextInt()) % (MAX_STREET_NUM - MIN_STREET_NUM) + MIN_STREET_NUM; - - int streetIx = Math.abs(rndValue.nextInt()) % STREETS.length; - streetName = STREETS[streetIx]; - - int cityIx = Math.abs(rndValue.nextInt()) % CITIES.length; - city = CITIES[cityIx]; - } else { - streetNumber = -1; - streetName = null; - city = null; - } - - int numInterests = Math.abs((rndValue.nextInt()) % (MAX_INTERESTS - MIN_INTERESTS)) + MIN_INTERESTS; - custInterests = new int[numInterests]; - for (int i = 0; i < numInterests; i++) { - custInterests[i] = Math.abs(rndValue.nextInt()) % INTERESTS.length; - } - - int numChildren = Math.abs((rndValue.nextInt()) % (MAX_CHILDREN - MIN_CHILDREN)) + MIN_CHILDREN; - custChildren = new Child[numChildren]; - for (int i = 0; i < numChildren; i++) { - Child c = new Child(); - int maxChildAge = age <= 0 ? 50 : (age - 20); - c.generateFieldValues(lastNames[lastNameIx], maxChildAge); - custChildren[i] = c; - } - } - - public String getJSON() { - StringBuilder jsonString = new StringBuilder(); - - jsonString.append("{ "); // start customer - - // customer id - jsonString.append(" \"cid\": "); - jsonString.append(cid); - jsonString.append(", "); - - // name - jsonString.append(" \"name\": "); - jsonString.append("\"" + name + "\""); - - // age - if (age >= 0) { - jsonString.append(", "); - jsonString.append(" \"age\": "); - jsonString.append(age); - } - - // nested address - if (streetNumber >= 0) { - jsonString.append(", "); - jsonString.append(" \"address\": "); - jsonString.append("{ "); // start address - - // number - jsonString.append(" \"number\": "); - jsonString.append(streetNumber); - jsonString.append(", "); - - // street - jsonString.append(" \"street\": "); - jsonString.append("\"" + streetName + "\""); - jsonString.append(", "); - - // city - jsonString.append(" \"city\": "); - jsonString.append("\"" + city + "\""); - - jsonString.append(" }"); // end address - } - - jsonString.append(", "); - - // interests - jsonString.append(" \"interests\": "); - jsonString.append("{{ "); // start interests - for (int i = 0; i < custInterests.length; i++) { - jsonString.append("\"" + INTERESTS[custInterests[i]] + "\""); - if (i != custInterests.length - 1) { - jsonString.append(", "); - } - } - jsonString.append(" }}"); // end interests - jsonString.append(", "); - - // children - jsonString.append(" \"children\": "); - jsonString.append("[ "); // start children - for (int i = 0; i < custChildren.length; i++) { - String jsonChild = custChildren[i].getJSON(); - jsonString.append(jsonChild); - if (i != custChildren.length - 1) { - jsonString.append(", "); - } - } - jsonString.append(" ]"); // end children - - jsonString.append(" }"); // end customer - - return jsonString.toString(); - } - } - - private class Order { - public int oid; - public int cid; - public String orderStatus; - public String orderPriority; - public String clerk; - public String total; - - private int[] undeclaredFields; - private BitSet nullMap; - - public void generateFieldValues(Customer cust) { - cid = cust.cid; - oid = currentOID++; - float t = Math.abs(rndValue.nextFloat()) * 100; - total = t + "f"; - - int orderStatusIx = Math.abs(rndValue.nextInt()) % ORDER_STATUSES.length; - orderStatus = ORDER_STATUSES[orderStatusIx]; - - int orderPriorityIx = Math.abs(rndValue.nextInt()) % ORDER_PRIORITIES.length; - orderPriority = ORDER_PRIORITIES[orderPriorityIx]; - - int clerkIx = Math.abs(rndValue.nextInt()) % CLERKS.length; - clerk = CLERKS[clerkIx]; - - int m = rndUndeclaredOrderFields.length; - undeclaredFields = new int[m]; - nullMap = new BitSet(m); - for (int i = 0; i < m; i++) { - if (rndUndeclaredOrderFields[i].nextBoolean()) { - undeclaredFields[i] = rndValue.nextInt(); - } else { - nullMap.set(i); - } - } - } - - public String getJSON() { - StringBuilder jsonString = new StringBuilder(); - - jsonString.append("{ "); // start order - - // oid - jsonString.append(" \"oid\": "); - jsonString.append(oid); - jsonString.append(", "); - - // cid - jsonString.append(" \"cid\": "); - jsonString.append(cid); - jsonString.append(", "); - - // orderStatus - jsonString.append(" \"orderstatus\": "); - jsonString.append("\"" + orderStatus + "\""); - jsonString.append(", "); - - // orderPriority - jsonString.append(" \"orderpriority\": "); - jsonString.append("\"" + orderPriority + "\""); - jsonString.append(", "); - - // clerk - jsonString.append(" \"clerk\": "); - jsonString.append("\"" + clerk + "\""); - jsonString.append(", "); - - // / cid - jsonString.append(" \"total\": "); - jsonString.append(total); - - for (int i = 0; i < undeclaredFields.length; i++) { - if (!nullMap.get(i)) { - jsonString.append(", "); - jsonString.append(" \"" + UNDECLARED_FIELD_NAMES[i] + "\": "); - jsonString.append(undeclaredFields[i]); - } - } - - jsonString.append(" }"); // end order - return jsonString.toString(); - } - } - - public void init() { - try { - List<String> tmpFirstNames = new ArrayList<String>(); - - FileInputStream fstream = new FileInputStream(FIRST_NAMES_FILE_NAME); - DataInputStream in = new DataInputStream(fstream); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - String strLine; - while ((strLine = br.readLine()) != null) { - String firstLetter = strLine.substring(0, 1); - String remainder = strLine.substring(1); - String capitalized = firstLetter.toUpperCase() + remainder.toLowerCase(); - tmpFirstNames.add(capitalized); - } - in.close(); - firstNames = tmpFirstNames.toArray(firstNames); - - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - - try { - List<String> tmpLastNames = new ArrayList<String>(); - - FileInputStream fstream = new FileInputStream(LAST_NAMES_FILE_NAME); - DataInputStream in = new DataInputStream(fstream); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - String strLine; - while ((strLine = br.readLine()) != null) { - String firstLetter = strLine.substring(0, 1); - String remainder = strLine.substring(1); - String capitalized = firstLetter.toUpperCase() + remainder.toLowerCase(); - tmpLastNames.add(capitalized); - } - in.close(); - lastNames = tmpLastNames.toArray(firstNames); - - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - - rndUndeclaredOrderFields = new Random[UNDECLARED_FIELD_NAMES.length]; - for (int i = 0; i < rndUndeclaredOrderFields.length; i++) { - rndUndeclaredOrderFields[i] = new Random(50); - } - } - - public void writeOrdersList(List<Order> ordersList, FileWriter ordersFile) throws IOException { - while (!ordersList.isEmpty()) { - int ix = Math.abs(rndValue.nextInt()) % ordersList.size(); - ordersFile.write(ordersList.get(ix).getJSON() + "\n"); - ordersList.remove(ix); - } - } - - public void writeCustomerList(List<Customer> customerList, Order[] ordersBatch, List<Order> ordersList, - FileWriter customersFile, FileWriter ordersFile) throws IOException { - while (!customerList.isEmpty()) { - int ix = Math.abs(rndValue.nextInt()) % customerList.size(); - customersFile.write(customerList.get(ix).getJSON() + "\n"); - - // generate orders - int numOrders = Math.abs(rndValue.nextInt()) % (MAX_ORDERS_PER_CUST - MIN_ORDERS_PER_CUST) - + MIN_ORDERS_PER_CUST; - for (int i = 0; i < numOrders; i++) { - ordersBatch[i].generateFieldValues(customerList.get(ix)); - ordersList.add(ordersBatch[i]); - } - writeOrdersList(ordersList, ordersFile); - - customerList.remove(ix); - } - } - - public static void main(String[] args) throws IOException { - if (args.length != 2) { - System.err.println("MUST PROVIDE 2 PARAMS, 1. output dir name and 2. number of records to generate."); - System.exit(1); - } - - String outputFile = args[0]; - int numRecords = Integer.parseInt(args[1]); - - FileWriter customersFile = new FileWriter(outputFile + File.separator + "customer.adm"); - FileWriter ordersFile = new FileWriter(outputFile + File.separator + "orders.adm"); - - CustOrdDataGen dataGen = new CustOrdDataGen(); - dataGen.init(); - - int batchSize = 1000; - Customer[] customerBatch = new Customer[batchSize]; - for (int i = 0; i < batchSize; i++) { - customerBatch[i] = dataGen.new Customer(); - } - - Order[] ordersBatch = new Order[MAX_ORDERS_PER_CUST]; - for (int i = 0; i < MAX_ORDERS_PER_CUST; i++) { - ordersBatch[i] = dataGen.new Order(); - } - - List<Customer> customerList = new LinkedList<Customer>(); - List<Order> ordersList = new LinkedList<Order>(); - int custIx = 0; - for (int i = 0; i < numRecords; i++) { - - customerBatch[custIx].generateFieldValues(); - customerList.add(customerBatch[custIx]); - custIx++; - - if (customerList.size() >= batchSize) { - dataGen.writeCustomerList(customerList, ordersBatch, ordersList, customersFile, ordersFile); - custIx = 0; - } - } - dataGen.writeCustomerList(customerList, ordersBatch, ordersList, customersFile, ordersFile); - customersFile.flush(); - customersFile.close(); - - ordersFile.flush(); - ordersFile.close(); - } -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/cb92dad7/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/EventDataGen.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/EventDataGen.java b/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/EventDataGen.java deleted file mode 100644 index b175b42..0000000 --- a/asterixdb/asterix-tools/src/main/java/org/apache/asterix/tools/datagen/EventDataGen.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.asterix.tools.datagen; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -public class EventDataGen { - private static final String FIRST_NAMES_FILE_NAME = "/opt/us_census_names/dist.all.first.cleaned"; - private static final String LAST_NAMES_FILE_NAME = "/opt/us_census_names/dist.all.last.cleaned"; - - private String[] firstNames = new String[10]; - private String[] lastNames = new String[10]; - - private static final int MIN_USER_INTERESTS = 0; - private static final int MAX_USER_INTERESTS = 7; - private String[] INTERESTS = { "bass", "music", "databases", "fishing", "tennis", "squash", "computers", "books", - "movies", "cigars", "wine", "running", "walking", "skiing", "basketball", "video games", "cooking", - "coffee", "base jumping", "puzzles" }; - - private static final String[] STREETS = { "Main St.", "Oak St.", "7th St.", "Washington St.", "Cedar St.", - "Lake St.", "Hill St.", "Park St.", "View St." }; - private static final int MIN_STREET_NUM = 1; - private static final int MAX_STREET_NUM = 10000; - private static final String[] CITIES = { "Seattle", "Irvine", "Laguna Beach", "Los Angeles", "San Clemente", - "Huntington Beach", "Portland" }; - private static final int MIN_ZIP = 100000; - private static final int MAX_ZIP = 999999; - private static final String[] LAT_LONGS = { "47,-122", "33,-117", "33,-117", "34,-118", "33,-117", "33,-117", - "45,-122" }; - - private static final int MIN_MEMBERSHIPS = 1; - private static final int MAX_MEMBERSHIPS = 10; - private static final int MIN_SIG_ID = 1; - private static final int MAX_SIG_ID = 100; - private static final String[] CHAPTER_NAMES = { "Seattle", "Irvine", "Laguna Beach", "Los Angeles", "San Clemente", - "Huntington Beach", "Portland", "Newport Beach", "Kirkland" }; - private static final int MEMBER_SINCE_MIN_YEAR = 1970; - private static final int MEMBER_SINCE_MAX_YEAR = 1998; - - private Random rndValue = new Random(50); - private User user; - - private final class User { - private int firstNameIdx; - private int lastNameIdx; - private int[] interests = new int[MAX_USER_INTERESTS - MIN_USER_INTERESTS]; - int numInterests; - private int streetNumber; - private String street; - private String city; - private int zip; - private String latlong; - int numMemberships; - private int[] member_sigid = new int[MAX_MEMBERSHIPS]; - private String[] member_chap_name = new String[MAX_MEMBERSHIPS]; - private String[] member_since_date = new String[MAX_MEMBERSHIPS]; - - public void generateFieldValues() { - firstNameIdx = Math.abs(rndValue.nextInt()) % firstNames.length; - lastNameIdx = Math.abs(rndValue.nextInt()) % lastNames.length; - // name = firstNames[firstNameIx] + " " + lastNames[lastNameIx]; - numInterests = Math.abs((rndValue.nextInt()) % (MAX_USER_INTERESTS - MIN_USER_INTERESTS)) - + MIN_USER_INTERESTS; - for (int i = 0; i < numInterests; i++) { - interests[i] = Math.abs(rndValue.nextInt()) % INTERESTS.length; - } - streetNumber = Math.abs(rndValue.nextInt()) % (MAX_STREET_NUM - MIN_STREET_NUM) + MIN_STREET_NUM; - street = STREETS[Math.abs(rndValue.nextInt()) % STREETS.length]; - int cityIdx = Math.abs(rndValue.nextInt()) % CITIES.length; - city = CITIES[cityIdx]; - zip = Math.abs(rndValue.nextInt() % (MAX_ZIP - MIN_ZIP)) + MIN_ZIP; - latlong = LAT_LONGS[cityIdx]; - numMemberships = Math.abs(rndValue.nextInt()) % (MAX_MEMBERSHIPS - MIN_MEMBERSHIPS) + MIN_MEMBERSHIPS; - for (int i = 0; i < numMemberships; i++) { - member_sigid[i] = Math.abs(rndValue.nextInt()) % (MAX_SIG_ID - MIN_SIG_ID) + MIN_SIG_ID; - int cnIdx = Math.abs(rndValue.nextInt()) % CHAPTER_NAMES.length; - member_chap_name[i] = CHAPTER_NAMES[cnIdx]; - int msYear = Math.abs(rndValue.nextInt()) % (MEMBER_SINCE_MAX_YEAR - MEMBER_SINCE_MIN_YEAR) - + MEMBER_SINCE_MIN_YEAR; - int msMo = Math.abs(rndValue.nextInt()) % 12 + 1; - int msDay = Math.abs(rndValue.nextInt()) % 28 + 1; - member_since_date[i] = msYear + "-" + (msMo < 10 ? "0" : "") + msMo + "-" + (msDay < 10 ? "0" : "") - + msDay; - } - } - - public void write(Writer writer) throws IOException { - writer.append("{"); - writer.append(" \"name\": \""); - writer.append(firstNames[firstNameIdx]); - writer.append(" "); - writer.append(lastNames[lastNameIdx]); - writer.append("\", "); - - writer.append(" \"email\": \""); - writer.append(firstNames[firstNameIdx]); - writer.append("."); - writer.append(lastNames[lastNameIdx]); - writer.append("@example.com\", "); - - writer.append(" \"interests\": <"); - for (int i = 0; i < numInterests; i++) { - if (i > 0) { - writer.append(", "); - } - writer.append("\""); - writer.append(INTERESTS[interests[i]]); - writer.append("\""); - } - writer.append(">, "); - - writer.append(" \"address\": {"); - writer.append(" \"street\": \""); - writer.append(streetNumber + " " + street); - writer.append("\","); - writer.append(" \"city\": \""); - writer.append(city); - writer.append("\","); - writer.append(" \"zip\": \""); - writer.append(zip + "\","); - writer.append(" \"latlong\": point(\""); - writer.append(latlong); - writer.append("\")"); - writer.append("}, "); - - writer.append(" \"member_of\": <"); - for (int i = 0; i < numMemberships; i++) { - if (i > 0) { - writer.append(", "); - } - writer.append("{"); - writer.append(" \"sig_id\": "); - writer.append(member_sigid[i] + ","); - writer.append(" \"chapter_name\": \""); - writer.append(member_chap_name[i]); - writer.append("\","); - writer.append(" \"member_since\": date(\""); - writer.append(member_since_date[i]); - writer.append("\") }"); - } - writer.append(">"); - - writer.append(" }\n"); - } - } - - public void init() throws IOException { - { - List<String> tmpFirstNames = new ArrayList<String>(); - - FileInputStream fstream = new FileInputStream(FIRST_NAMES_FILE_NAME); - DataInputStream in = new DataInputStream(fstream); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - String strLine; - while ((strLine = br.readLine()) != null) { - String firstLetter = strLine.substring(0, 1); - String remainder = strLine.substring(1); - String capitalized = firstLetter.toUpperCase() + remainder.toLowerCase(); - tmpFirstNames.add(capitalized); - } - in.close(); - firstNames = tmpFirstNames.toArray(firstNames); - } - { - List<String> tmpLastNames = new ArrayList<String>(); - - FileInputStream fstream = new FileInputStream(LAST_NAMES_FILE_NAME); - DataInputStream in = new DataInputStream(fstream); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - String strLine; - while ((strLine = br.readLine()) != null) { - String firstLetter = strLine.substring(0, 1); - String remainder = strLine.substring(1); - String capitalized = firstLetter.toUpperCase() + remainder.toLowerCase(); - tmpLastNames.add(capitalized); - } - in.close(); - lastNames = tmpLastNames.toArray(firstNames); - } - user = new User(); - } - - public void generate() { - user.generateFieldValues(); - } - - public void write(Writer w) throws IOException { - user.write(w); - } - - public static void main(String[] args) throws IOException { - if (args.length != 2) { - System.err - .println("MUST PROVIDE 2 PARAMETERS, 1. output directory path and 2. number of records to generate."); - System.exit(1); - } - String outputFile = args[0]; - int numRecords = Integer.parseInt(args[1]); - Writer userFile = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile + File.separator - + "user.adm"))); - EventDataGen dgen = new EventDataGen(); - dgen.init(); - for (int i = 0; i < numRecords; i++) { - dgen.generate(); - dgen.write(userFile); - } - userFile.close(); - } -}