BIGTOP-1985. Extract name generator from BigPetStore data generator

Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/502bd784
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/502bd784
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/502bd784

Branch: refs/heads/master
Commit: 502bd784abeda6087215a98ca6719213457c6193
Parents: 15af83e
Author: RJ Nowling <[email protected]>
Authored: Tue Aug 25 09:30:49 2015 -0500
Committer: RJ Nowling <[email protected]>
Committed: Tue Aug 25 09:30:49 2015 -0500

----------------------------------------------------------------------
 .../bigpetstore-data-generator/build.gradle     |      1 +
 .../datagenerators/bigpetstore/Constants.java   |      2 -
 .../bigpetstore/CustomerGenerator.java          |      2 +-
 .../datagenerators/bigpetstore/DataLoader.java  |      9 +-
 .../datamodels/inputs/InputData.java            |     10 +-
 .../bigpetstore/datamodels/inputs/Names.java    |     46 -
 .../bigpetstore/datareaders/NameReader.java     |     62 -
 .../generators/customer/CustomerSampler.java    |     14 +-
 .../customer/CustomerSamplerBuilder.java        |      9 +-
 .../resources/input_data/namedb/data/data.dat   | 129036 ----------------
 .../resources/input_data/namedb/namedb.info     |     13 -
 .../customer/TestCustomerSampler.java           |     20 +-
 .../customer/TestCustomerSamplerBuilder.java    |     12 +-
 .../bigtop-name-generator/README.md             |     51 +
 .../bigtop-name-generator/build.gradle          |     63 +
 .../bigtop-name-generator/settings.gradle       |     16 +
 .../namegenerator/NameGenerator.java            |     40 +
 .../namegenerator/NameReader.java               |     68 +
 .../datagenerators/namegenerator/Names.java     |     46 +
 .../resources/input_data/namedb/data/data.dat   | 129036 ++++++++++++++++
 .../resources/input_data/namedb/namedb.info     |     12 +
 .../namegenerator/TestNameGenerator.java        |     39 +
 22 files changed, 129397 insertions(+), 129210 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/build.gradle
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/build.gradle 
b/bigtop-data-generators/bigpetstore-data-generator/build.gradle
index d18cac5..57f0692 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/build.gradle
+++ b/bigtop-data-generators/bigpetstore-data-generator/build.gradle
@@ -60,6 +60,7 @@ dependencies {
     compile 'com.google.code.gson:gson:2.3'
     compile 'org.apache.commons:commons-lang3:3.4'
     compile 'org.apache.bigtop:bigtop-samplers:1.1.0-SNAPSHOT'
+    compile 'org.apache.bigtop:bigtop-name-generator:1.1.0-SNAPSHOT'
 
     testCompile 'junit:junit:4.+'
 }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
index 21827d5..1e8e758 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
@@ -42,8 +42,6 @@ public class Constants
        public static final File INCOMES_FILE = new 
File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv");
        public static final File POPULATION_FILE = new 
File("population_data.csv");
 
-       public static final File NAMEDB_FILE = new File("namedb/data/data.dat");
-
        public static final ProductsCollectionSize PRODUCTS_COLLECTION = 
ProductsCollectionSize.MEDIUM;
 
        public static final double INCOME_SCALING_FACTOR = 100.0;

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
index 7fc2cbe..4be976a 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
@@ -28,7 +28,7 @@ public class CustomerGenerator
 {
        final Sampler<Customer> sampler;
 
-       public CustomerGenerator(InputData inputData, List<Store> stores, 
SeedFactory seedFactory)
+       public CustomerGenerator(InputData inputData, List<Store> stores, 
SeedFactory seedFactory) throws Exception
        {
                CustomerSamplerBuilder builder = new 
CustomerSamplerBuilder(stores, inputData, seedFactory);
                sampler = builder.build();

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
index d3393fe..ecbd6cf 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
@@ -21,9 +21,7 @@ import java.io.InputStream;
 import java.util.List;
 
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData;
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names;
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
-import org.apache.bigtop.datagenerators.bigpetstore.datareaders.NameReader;
 import org.apache.bigtop.datagenerators.bigpetstore.datareaders.ZipcodeReader;
 
 public class DataLoader
@@ -45,12 +43,7 @@ public class DataLoader
                List<ZipcodeRecord> zipcodeTable = zipcodeReader.readData();
                System.out.println("Read " + zipcodeTable.size() + " zipcode 
entries");
 
-               System.out.println("Reading name data");
-               NameReader nameReader = new 
NameReader(getResource(Constants.NAMEDB_FILE));
-               Names names = nameReader.readData();
-               System.out.println("Read " + names.getFirstNames().size() + " 
first names and " + names.getLastNames().size() + " last names");
-
-               InputData inputData = new InputData(zipcodeTable, names);
+               InputData inputData = new InputData(zipcodeTable);
 
                return inputData;
        }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
index 7f5eddf..c180136 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
@@ -24,22 +24,14 @@ public class InputData implements Serializable
        private static final long serialVersionUID = 9078989799806707788L;
 
        List<ZipcodeRecord> zipcodeTable;
-       Names names;
 
-       public InputData(List<ZipcodeRecord> zipcodeTable,
-                       Names names)
+       public InputData(List<ZipcodeRecord> zipcodeTable)
        {
                this.zipcodeTable = Collections.unmodifiableList(zipcodeTable);
-               this.names = names;
        }
 
        public List<ZipcodeRecord> getZipcodeTable()
        {
                return zipcodeTable;
        }
-
-       public Names getNames()
-       {
-               return names;
-       }
 }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
deleted file mode 100644
index 2d6da89..0000000
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import com.google.common.collect.ImmutableMap;
-
-public class Names implements Serializable
-{
-       private static final long serialVersionUID = 2731634747628534453L;
-
-       final ImmutableMap<String, Double> firstNames;
-       final ImmutableMap<String, Double> lastNames;
-
-       public Names(Map<String, Double> firstNames,
-                       Map<String, Double> lastNames)
-       {
-               this.firstNames = ImmutableMap.copyOf(firstNames);
-               this.lastNames = ImmutableMap.copyOf(lastNames);
-       }
-
-       public ImmutableMap<String, Double> getFirstNames()
-       {
-               return firstNames;
-       }
-
-       public ImmutableMap<String, Double> getLastNames()
-       {
-               return lastNames;
-       }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
deleted file mode 100644
index ec5412a..0000000
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.bigtop.datagenerators.bigpetstore.datareaders;
-
-import java.io.FileNotFoundException;
-import java.io.InputStream;
-import java.util.Map;
-import java.util.Scanner;
-
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names;
-
-import com.google.common.collect.Maps;
-
-public class NameReader
-{
-       InputStream path;
-
-       public NameReader(InputStream path)
-       {
-               this.path = path;
-       }
-
-       public Names readData() throws FileNotFoundException
-       {
-               Scanner scanner = new Scanner(path);
-
-               Map<String, Double> firstNames = Maps.newHashMap();
-               Map<String, Double> lastNames = Maps.newHashMap();
-
-               while(scanner.hasNextLine())
-               {
-                       String line = scanner.nextLine();
-                       String[] cols = line.trim().split(",");
-
-                       String name = cols[0];
-                       double weight = Double.parseDouble(cols[5]);
-
-                       if(cols[4].equals("1"))
-                               firstNames.put(name, weight);
-                       if(cols[3].equals("1"))
-                               lastNames.put(name, weight);
-               }
-
-               scanner.close();
-
-               return new Names(firstNames, lastNames);
-
-       }
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
index 13b69a3..2bfb6e7 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
@@ -25,19 +25,18 @@ import org.apache.commons.lang3.tuple.Pair;
 public class CustomerSampler implements Sampler<Customer>
 {
        private final Sampler<Integer> idSampler;
-       private final Sampler<String> firstNameSampler;
-       private final Sampler<String> lastNameSampler;
+       private final Sampler<Pair<String, String>> nameSampler;
        private final Sampler<Store> storeSampler;
        private final ConditionalSampler<ZipcodeRecord, Store> locationSampler;
 
 
-       public CustomerSampler(Sampler<Integer> idSampler, Sampler<String> 
firstNameSampler,
-                       Sampler<String> lastNameSampler, Sampler<Store> 
storeSampler,
+       public CustomerSampler(Sampler<Integer> idSampler,
+                       Sampler<Pair<String, String>> nameSampler,
+                       Sampler<Store> storeSampler,
                        ConditionalSampler<ZipcodeRecord, Store> 
locationSampler)
        {
                this.idSampler = idSampler;
-               this.firstNameSampler = firstNameSampler;
-               this.lastNameSampler = lastNameSampler;
+               this.nameSampler = nameSampler;
                this.storeSampler = storeSampler;
                this.locationSampler = locationSampler;
        }
@@ -45,8 +44,7 @@ public class CustomerSampler implements Sampler<Customer>
        public Customer sample() throws Exception
        {
                Integer id = idSampler.sample();
-               Pair<String, String> name = Pair.of(firstNameSampler.sample(),
-                               lastNameSampler.sample());
+               Pair<String, String> name = nameSampler.sample();
                Store store = storeSampler.sample();
                ZipcodeRecord location = locationSampler.sample(store);
 

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
index 56ab761..44ffa6a 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
@@ -23,12 +23,14 @@ import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store;
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData;
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.datagenerators.namegenerator.NameGenerator;
 import org.apache.bigtop.datagenerators.samplers.SeedFactory;
 import 
org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction;
 import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler;
 import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler;
 import org.apache.bigtop.datagenerators.samplers.samplers.Sampler;
 import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler;
+import org.apache.commons.lang3.tuple.Pair;
 
 import com.google.common.collect.Maps;
 
@@ -65,16 +67,15 @@ public class CustomerSamplerBuilder
                                };
        }
 
-       public Sampler<Customer> build()
+       public Sampler<Customer> build() throws Exception
        {
                ProbabilityDensityFunction<Store> storePDF = new 
CustomerStorePDF(stores);
 
                Sampler<Integer> idSampler = new SequenceSampler();
-               Sampler<String> firstNameSampler = 
RouletteWheelSampler.create(inputData.getNames().getFirstNames(), seedFactory);
-               Sampler<String> lastNameSampler = 
RouletteWheelSampler.create(inputData.getNames().getLastNames(), seedFactory);
+               Sampler<Pair<String, String>> nameSampler = new 
NameGenerator(seedFactory);
                Sampler<Store> storeSampler = 
RouletteWheelSampler.create(stores, storePDF, seedFactory);
 
-               return new CustomerSampler(idSampler, firstNameSampler, 
lastNameSampler, storeSampler, buildLocationSampler());
+               return new CustomerSampler(idSampler, nameSampler, 
storeSampler, buildLocationSampler());
        }
 
 }

Reply via email to