http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
 
b/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
deleted file mode 100644
index 1f59f9e..0000000
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
+++ /dev/null
@@ -1,13 +0,0 @@
-name = Name DB
-description = Defines a database for maintain a list of names.
-package = Fields
-version = VERSION
-core = 7.x
-dependencies[] = name
-
-; Information added by drupal.org packaging script on 2011-06-08
-version = "7.x-1.0-beta2"
-core = "7.x"
-project = "namedb"
-datestamp = "1307496118"
-

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
index 8bb3c87..a176333 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
@@ -20,7 +20,6 @@ import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
@@ -28,8 +27,6 @@ import org.apache.bigtop.datagenerators.bigpetstore.Constants;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store;
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
-import 
org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerLocationPDF;
-import 
org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerSampler;
 import org.apache.bigtop.datagenerators.samplers.SeedFactory;
 import 
org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction;
 import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler;
@@ -39,6 +36,7 @@ import 
org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.Test;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
 public class TestCustomerSampler
@@ -69,7 +67,12 @@ public class TestCustomerSampler
        {
                SeedFactory factory = new SeedFactory(1234);
 
-               Collection<String> nameList = Arrays.asList(new String[] 
{"Fred", "Gary", "George", "Fiona"});
+               List<Pair<String, String>> nameList = Lists.newArrayList();
+               nameList.add(Pair.of("Fred", "Fred"));
+               nameList.add(Pair.of("Gary", "Gary"));
+               nameList.add(Pair.of("George", "George"));
+               nameList.add(Pair.of("Fiona", "Fiona"));
+
                List<ZipcodeRecord> zipcodes = Arrays.asList(new 
ZipcodeRecord[] {
                                new ZipcodeRecord("11111", Pair.of(1.0, 1.0), 
"AZ", "Tempte", 30000.0, 100),
                                new ZipcodeRecord("22222", Pair.of(2.0, 2.0), 
"AZ", "Phoenix", 45000.0, 200),
@@ -85,21 +88,18 @@ public class TestCustomerSampler
 
 
                Sampler<Integer> idSampler = new SequenceSampler();
-               Sampler<String> nameSampler = 
RouletteWheelSampler.createUniform(nameList, factory);
+               Sampler<Pair<String, String>> nameSampler = 
RouletteWheelSampler.createUniform(nameList, factory);
                Sampler<Store> storeSampler = 
RouletteWheelSampler.createUniform(stores, factory);
                ConditionalSampler<ZipcodeRecord, Store> zipcodeSampler = 
buildLocationSampler(stores, zipcodes, factory);
 
-               Sampler<Customer> sampler = new CustomerSampler(idSampler, 
nameSampler, nameSampler, storeSampler, zipcodeSampler);
+               Sampler<Customer> sampler = new CustomerSampler(idSampler, 
nameSampler, storeSampler, zipcodeSampler);
 
                Customer customer = sampler.sample();
 
                assertNotNull(customer);
                assertTrue(customer.getId() >= 0);
                assertNotNull(customer.getName());
-               assertNotNull(customer.getName().getLeft());
-               assertTrue(nameList.contains(customer.getName().getLeft()));
-               assertNotNull(customer.getName().getRight());
-               assertTrue(nameList.contains(customer.getName().getRight()));
+               assertTrue(nameList.contains(customer.getName()));
                assertNotNull(customer.getLocation());
                assertTrue(zipcodes.contains(customer.getLocation()));
 

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
 
b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
index 786c7fc..6ddaa94 100644
--- 
a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
+++ 
b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
@@ -20,38 +20,30 @@ import static org.junit.Assert.assertTrue;
 
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store;
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData;
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names;
 import 
org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
-import 
org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerSamplerBuilder;
 import org.apache.bigtop.datagenerators.samplers.SeedFactory;
 import org.apache.bigtop.datagenerators.samplers.samplers.Sampler;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.Test;
 
-import com.google.common.collect.ImmutableMap;
-
 public class TestCustomerSamplerBuilder
 {
 
        @Test
        public void testSample() throws Exception
        {
-               Map<String, Double> nameList = ImmutableMap.of("Fred", 1.0, 
"George", 1.0, "Gary", 1.0, "Fiona", 1.0);
                List<ZipcodeRecord> zipcodes = Arrays.asList(new 
ZipcodeRecord[] {
                                new ZipcodeRecord("11111", Pair.of(1.0, 1.0), 
"AZ", "Tempte", 30000.0, 100),
                                new ZipcodeRecord("22222", Pair.of(2.0, 2.0), 
"AZ", "Phoenix", 45000.0, 200),
                                new ZipcodeRecord("33333", Pair.of(3.0, 3.0), 
"AZ", "Flagstaff", 60000.0, 300)
                                });
 
-               Names names = new Names(nameList, nameList);
-
                // don't need product categories for building customers
-               InputData inputData = new InputData(zipcodes, names);
+               InputData inputData = new InputData(zipcodes);
 
                List<Store> stores = Arrays.asList(new Store(0, "Store_0", 
zipcodes.get(0)),
                                new Store(1, "Store_1", zipcodes.get(1)),
@@ -68,8 +60,6 @@ public class TestCustomerSamplerBuilder
                assertNotNull(customer);
                assertTrue(customer.getId() >= 0);
                assertNotNull(customer.getName());
-               assertNotNull(customer.getName().getLeft());
-               assertNotNull(customer.getName().getRight());
                assertNotNull(customer.getLocation());
 
        }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/README.md
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/README.md 
b/bigtop-data-generators/bigtop-name-generator/README.md
new file mode 100644
index 0000000..aa204b2
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/README.md
@@ -0,0 +1,51 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+BigTop Name Generator
+=====================
+
+Library for generating first and last names by sampling from the [Drupal
+Name Database](https://www.drupal.org/project/namedb) which itself is based
+on U.S. Census data.  Frequency of generates names are in proportion to
+their frequencies according to the U.S. Census.
+
+Building and Testing
+--------------------
+We use the Gradle build system for the BPS data generator so you'll need
+to install Gradle on your system.
+Once that's done, you can use gradle to run the included unit tests
+and build the data generator jar.
+
+To build:
+
+    $ gradle build
+
+This will create several directories and a jar located at:
+
+    build/libs/bigtop-name-generator-1.1.0-SNAPSHOT.jar
+
+Building automatically runs the included unit tests.  If you would prefer
+to just run the unit tests, you can do so by:
+
+    $ gradle test
+
+To clean up the build files, run:
+
+    $ gradle clean
+
+To install a jar into your local maven repository:
+
+    $ gradle install

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/build.gradle
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/build.gradle 
b/bigtop-data-generators/bigtop-name-generator/build.gradle
new file mode 100644
index 0000000..08d9d34
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/build.gradle
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+apply plugin: 'eclipse'
+apply plugin: 'groovy'
+apply plugin: 'java'
+apply plugin: 'maven'
+group = 'org.apache.bigtop'
+version = '1.1.0-SNAPSHOT'
+
+jar {
+
+    from {
+        configurations.runtime.collect {
+            it.isDirectory() ? it : zipTree(it)
+        }
+    }
+
+    manifest {
+       attributes 'Title': 'BigTop Name Generator', 'Version': version
+    }
+}
+
+repositories {
+       mavenLocal()
+       mavenCentral()
+}
+
+test {
+     // show standard out and error on console
+     testLogging.showStandardStreams = true
+
+     // listen to events in the test execution lifecycle
+     beforeTest { descriptor ->
+       logger.lifecycle("Running test: " + descriptor)
+     }
+
+     // listen to standard out and standard error of the test JVM(s)
+     onOutput { descriptor, event ->
+       logger.lifecycle("Test: " + descriptor + " produced standard out/err: " 
+ event.message )
+     }
+
+}
+
+dependencies {
+    compile 'com.google.guava:guava:18.0'
+    compile 'org.apache.commons:commons-lang3:3.4'
+    compile 'org.apache.bigtop:bigtop-samplers:1.1.0-SNAPSHOT'
+
+    testCompile 'junit:junit:4.+'
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/settings.gradle
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/settings.gradle 
b/bigtop-data-generators/bigtop-name-generator/settings.gradle
new file mode 100644
index 0000000..e738f38
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/settings.gradle
@@ -0,0 +1,16 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+rootProject.name = "bigtop-name-generator"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
 
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
new file mode 100644
index 0000000..4b5f620
--- /dev/null
+++ 
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.namegenerator;
+
+import org.apache.bigtop.datagenerators.samplers.SeedFactory;
+import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler;
+import org.apache.bigtop.datagenerators.samplers.samplers.Sampler;
+import org.apache.commons.lang3.tuple.Pair;
+
+public class NameGenerator implements Sampler<Pair<String, String>>
+{
+       private final Sampler<String> firstNameSampler;
+       private final Sampler<String> lastNameSampler;
+
+       public NameGenerator(SeedFactory seedFactory) throws Exception
+       {
+               Names names = new NameReader().readData();
+
+               firstNameSampler = 
RouletteWheelSampler.create(names.getFirstNames(), seedFactory);
+               lastNameSampler = 
RouletteWheelSampler.create(names.getLastNames(), seedFactory);
+       }
+
+       public Pair<String, String> sample() throws Exception
+       {
+               return Pair.of(firstNameSampler.sample(), 
lastNameSampler.sample());
+       }
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
 
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
new file mode 100644
index 0000000..d53529f
--- /dev/null
+++ 
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.namegenerator;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Scanner;
+
+import com.google.common.collect.Maps;
+
+public class NameReader
+{
+       InputStream path;
+
+       public NameReader() throws Exception
+       {
+               this.path = getResource(new File("namedb/data/data.dat"));
+       }
+
+       private InputStream getResource(File filename) throws Exception
+       {
+               InputStream stream = 
getClass().getResourceAsStream("/input_data/" + filename);
+               return new BufferedInputStream(stream);
+       }
+
+       public Names readData() throws FileNotFoundException
+       {
+               Scanner scanner = new Scanner(path);
+
+               Map<String, Double> firstNames = Maps.newHashMap();
+               Map<String, Double> lastNames = Maps.newHashMap();
+
+               while(scanner.hasNextLine())
+               {
+                       String line = scanner.nextLine();
+                       String[] cols = line.trim().split(",");
+
+                       String name = cols[0];
+                       double weight = Double.parseDouble(cols[5]);
+
+                       if(cols[4].equals("1"))
+                               firstNames.put(name, weight);
+                       if(cols[3].equals("1"))
+                               lastNames.put(name, weight);
+               }
+
+               scanner.close();
+
+               return new Names(firstNames, lastNames);
+
+       }
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
----------------------------------------------------------------------
diff --git 
a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
 
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
new file mode 100644
index 0000000..a4d9e03
--- /dev/null
+++ 
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.namegenerator;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+
+public class Names implements Serializable
+{
+       private static final long serialVersionUID = 2731634747628534453L;
+
+       final ImmutableMap<String, Double> firstNames;
+       final ImmutableMap<String, Double> lastNames;
+
+       public Names(Map<String, Double> firstNames,
+                       Map<String, Double> lastNames)
+       {
+               this.firstNames = ImmutableMap.copyOf(firstNames);
+               this.lastNames = ImmutableMap.copyOf(lastNames);
+       }
+
+       public ImmutableMap<String, Double> getFirstNames()
+       {
+               return firstNames;
+       }
+
+       public ImmutableMap<String, Double> getLastNames()
+       {
+               return lastNames;
+       }
+}

Reply via email to