BIGTOP-1985. Extract name generator from BigPetStore data generator
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/502bd784 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/502bd784 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/502bd784 Branch: refs/heads/master Commit: 502bd784abeda6087215a98ca6719213457c6193 Parents: 15af83e Author: RJ Nowling <[email protected]> Authored: Tue Aug 25 09:30:49 2015 -0500 Committer: RJ Nowling <[email protected]> Committed: Tue Aug 25 09:30:49 2015 -0500 ---------------------------------------------------------------------- .../bigpetstore-data-generator/build.gradle | 1 + .../datagenerators/bigpetstore/Constants.java | 2 - .../bigpetstore/CustomerGenerator.java | 2 +- .../datagenerators/bigpetstore/DataLoader.java | 9 +- .../datamodels/inputs/InputData.java | 10 +- .../bigpetstore/datamodels/inputs/Names.java | 46 - .../bigpetstore/datareaders/NameReader.java | 62 - .../generators/customer/CustomerSampler.java | 14 +- .../customer/CustomerSamplerBuilder.java | 9 +- .../resources/input_data/namedb/data/data.dat | 129036 ---------------- .../resources/input_data/namedb/namedb.info | 13 - .../customer/TestCustomerSampler.java | 20 +- .../customer/TestCustomerSamplerBuilder.java | 12 +- .../bigtop-name-generator/README.md | 51 + .../bigtop-name-generator/build.gradle | 63 + .../bigtop-name-generator/settings.gradle | 16 + .../namegenerator/NameGenerator.java | 40 + .../namegenerator/NameReader.java | 68 + .../datagenerators/namegenerator/Names.java | 46 + .../resources/input_data/namedb/data/data.dat | 129036 ++++++++++++++++ .../resources/input_data/namedb/namedb.info | 12 + .../namegenerator/TestNameGenerator.java | 39 + 22 files changed, 129397 insertions(+), 129210 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/build.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/build.gradle b/bigtop-data-generators/bigpetstore-data-generator/build.gradle index d18cac5..57f0692 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/build.gradle +++ b/bigtop-data-generators/bigpetstore-data-generator/build.gradle @@ -60,6 +60,7 @@ dependencies { compile 'com.google.code.gson:gson:2.3' compile 'org.apache.commons:commons-lang3:3.4' compile 'org.apache.bigtop:bigtop-samplers:1.1.0-SNAPSHOT' + compile 'org.apache.bigtop:bigtop-name-generator:1.1.0-SNAPSHOT' testCompile 'junit:junit:4.+' } http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java index 21827d5..1e8e758 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java @@ -42,8 +42,6 @@ public class Constants public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); public static final File POPULATION_FILE = new File("population_data.csv"); - public static final File NAMEDB_FILE = new File("namedb/data/data.dat"); - public static final ProductsCollectionSize PRODUCTS_COLLECTION = ProductsCollectionSize.MEDIUM; public static final double INCOME_SCALING_FACTOR = 100.0; http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java index 7fc2cbe..4be976a 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java @@ -28,7 +28,7 @@ public class CustomerGenerator { final Sampler<Customer> sampler; - public CustomerGenerator(InputData inputData, List<Store> stores, SeedFactory seedFactory) + public CustomerGenerator(InputData inputData, List<Store> stores, SeedFactory seedFactory) throws Exception { CustomerSamplerBuilder builder = new CustomerSamplerBuilder(stores, inputData, seedFactory); sampler = builder.build(); http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java index d3393fe..ecbd6cf 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java @@ -21,9 +21,7 @@ import java.io.InputStream; import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.datareaders.NameReader; import org.apache.bigtop.datagenerators.bigpetstore.datareaders.ZipcodeReader; public class DataLoader @@ -45,12 +43,7 @@ public class DataLoader List<ZipcodeRecord> zipcodeTable = zipcodeReader.readData(); System.out.println("Read " + zipcodeTable.size() + " zipcode entries"); - System.out.println("Reading name data"); - NameReader nameReader = new NameReader(getResource(Constants.NAMEDB_FILE)); - Names names = nameReader.readData(); - System.out.println("Read " + names.getFirstNames().size() + " first names and " + names.getLastNames().size() + " last names"); - - InputData inputData = new InputData(zipcodeTable, names); + InputData inputData = new InputData(zipcodeTable); return inputData; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java index 7f5eddf..c180136 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java @@ -24,22 +24,14 @@ public class InputData implements Serializable private static final long serialVersionUID = 9078989799806707788L; List<ZipcodeRecord> zipcodeTable; - Names names; - public InputData(List<ZipcodeRecord> zipcodeTable, - Names names) + public InputData(List<ZipcodeRecord> zipcodeTable) { this.zipcodeTable = Collections.unmodifiableList(zipcodeTable); - this.names = names; } public List<ZipcodeRecord> getZipcodeTable() { return zipcodeTable; } - - public Names getNames() - { - return names; - } } http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java deleted file mode 100644 index 2d6da89..0000000 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs; - -import java.io.Serializable; -import java.util.Map; - -import com.google.common.collect.ImmutableMap; - -public class Names implements Serializable -{ - private static final long serialVersionUID = 2731634747628534453L; - - final ImmutableMap<String, Double> firstNames; - final ImmutableMap<String, Double> lastNames; - - public Names(Map<String, Double> firstNames, - Map<String, Double> lastNames) - { - this.firstNames = ImmutableMap.copyOf(firstNames); - this.lastNames = ImmutableMap.copyOf(lastNames); - } - - public ImmutableMap<String, Double> getFirstNames() - { - return firstNames; - } - - public ImmutableMap<String, Double> getLastNames() - { - return lastNames; - } -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java deleted file mode 100644 index ec5412a..0000000 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.datagenerators.bigpetstore.datareaders; - -import java.io.FileNotFoundException; -import java.io.InputStream; -import java.util.Map; -import java.util.Scanner; - -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names; - -import com.google.common.collect.Maps; - -public class NameReader -{ - InputStream path; - - public NameReader(InputStream path) - { - this.path = path; - } - - public Names readData() throws FileNotFoundException - { - Scanner scanner = new Scanner(path); - - Map<String, Double> firstNames = Maps.newHashMap(); - Map<String, Double> lastNames = Maps.newHashMap(); - - while(scanner.hasNextLine()) - { - String line = scanner.nextLine(); - String[] cols = line.trim().split(","); - - String name = cols[0]; - double weight = Double.parseDouble(cols[5]); - - if(cols[4].equals("1")) - firstNames.put(name, weight); - if(cols[3].equals("1")) - lastNames.put(name, weight); - } - - scanner.close(); - - return new Names(firstNames, lastNames); - - } -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java index 13b69a3..2bfb6e7 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java @@ -25,19 +25,18 @@ import org.apache.commons.lang3.tuple.Pair; public class CustomerSampler implements Sampler<Customer> { private final Sampler<Integer> idSampler; - private final Sampler<String> firstNameSampler; - private final Sampler<String> lastNameSampler; + private final Sampler<Pair<String, String>> nameSampler; private final Sampler<Store> storeSampler; private final ConditionalSampler<ZipcodeRecord, Store> locationSampler; - public CustomerSampler(Sampler<Integer> idSampler, Sampler<String> firstNameSampler, - Sampler<String> lastNameSampler, Sampler<Store> storeSampler, + public CustomerSampler(Sampler<Integer> idSampler, + Sampler<Pair<String, String>> nameSampler, + Sampler<Store> storeSampler, ConditionalSampler<ZipcodeRecord, Store> locationSampler) { this.idSampler = idSampler; - this.firstNameSampler = firstNameSampler; - this.lastNameSampler = lastNameSampler; + this.nameSampler = nameSampler; this.storeSampler = storeSampler; this.locationSampler = locationSampler; } @@ -45,8 +44,7 @@ public class CustomerSampler implements Sampler<Customer> public Customer sample() throws Exception { Integer id = idSampler.sample(); - Pair<String, String> name = Pair.of(firstNameSampler.sample(), - lastNameSampler.sample()); + Pair<String, String> name = nameSampler.sample(); Store store = storeSampler.sample(); ZipcodeRecord location = locationSampler.sample(store); http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java index 56ab761..44ffa6a 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java @@ -23,12 +23,14 @@ import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.namegenerator.NameGenerator; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler; import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler; +import org.apache.commons.lang3.tuple.Pair; import com.google.common.collect.Maps; @@ -65,16 +67,15 @@ public class CustomerSamplerBuilder }; } - public Sampler<Customer> build() + public Sampler<Customer> build() throws Exception { ProbabilityDensityFunction<Store> storePDF = new CustomerStorePDF(stores); Sampler<Integer> idSampler = new SequenceSampler(); - Sampler<String> firstNameSampler = RouletteWheelSampler.create(inputData.getNames().getFirstNames(), seedFactory); - Sampler<String> lastNameSampler = RouletteWheelSampler.create(inputData.getNames().getLastNames(), seedFactory); + Sampler<Pair<String, String>> nameSampler = new NameGenerator(seedFactory); Sampler<Store> storeSampler = RouletteWheelSampler.create(stores, storePDF, seedFactory); - return new CustomerSampler(idSampler, firstNameSampler, lastNameSampler, storeSampler, buildLocationSampler()); + return new CustomerSampler(idSampler, nameSampler, storeSampler, buildLocationSampler()); } }
