http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info b/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info deleted file mode 100644 index 1f59f9e..0000000 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info +++ /dev/null @@ -1,13 +0,0 @@ -name = Name DB -description = Defines a database for maintain a list of names. -package = Fields -version = VERSION -core = 7.x -dependencies[] = name - -; Information added by drupal.org packaging script on 2011-06-08 -version = "7.x-1.0-beta2" -core = "7.x" -project = "namedb" -datestamp = "1307496118" -
http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java index 8bb3c87..a176333 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java @@ -20,7 +20,6 @@ import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.List; import java.util.Map; @@ -28,8 +27,6 @@ import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerLocationPDF; -import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerSampler; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler; @@ -39,6 +36,7 @@ import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; public class TestCustomerSampler @@ -69,7 +67,12 @@ public class TestCustomerSampler { SeedFactory factory = new SeedFactory(1234); - Collection<String> nameList = Arrays.asList(new String[] {"Fred", "Gary", "George", "Fiona"}); + List<Pair<String, String>> nameList = Lists.newArrayList(); + nameList.add(Pair.of("Fred", "Fred")); + nameList.add(Pair.of("Gary", "Gary")); + nameList.add(Pair.of("George", "George")); + nameList.add(Pair.of("Fiona", "Fiona")); + List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), @@ -85,21 +88,18 @@ public class TestCustomerSampler Sampler<Integer> idSampler = new SequenceSampler(); - Sampler<String> nameSampler = RouletteWheelSampler.createUniform(nameList, factory); + Sampler<Pair<String, String>> nameSampler = RouletteWheelSampler.createUniform(nameList, factory); Sampler<Store> storeSampler = RouletteWheelSampler.createUniform(stores, factory); ConditionalSampler<ZipcodeRecord, Store> zipcodeSampler = buildLocationSampler(stores, zipcodes, factory); - Sampler<Customer> sampler = new CustomerSampler(idSampler, nameSampler, nameSampler, storeSampler, zipcodeSampler); + Sampler<Customer> sampler = new CustomerSampler(idSampler, nameSampler, storeSampler, zipcodeSampler); Customer customer = sampler.sample(); assertNotNull(customer); assertTrue(customer.getId() >= 0); assertNotNull(customer.getName()); - assertNotNull(customer.getName().getLeft()); - assertTrue(nameList.contains(customer.getName().getLeft())); - assertNotNull(customer.getName().getRight()); - assertTrue(nameList.contains(customer.getName().getRight())); + assertTrue(nameList.contains(customer.getName())); assertNotNull(customer.getLocation()); assertTrue(zipcodes.contains(customer.getLocation())); http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java index 786c7fc..6ddaa94 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java @@ -20,38 +20,30 @@ import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.List; -import java.util.Map; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerSamplerBuilder; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; -import com.google.common.collect.ImmutableMap; - public class TestCustomerSamplerBuilder { @Test public void testSample() throws Exception { - Map<String, Double> nameList = ImmutableMap.of("Fred", 1.0, "George", 1.0, "Gary", 1.0, "Fiona", 1.0); List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); - Names names = new Names(nameList, nameList); - // don't need product categories for building customers - InputData inputData = new InputData(zipcodes, names); + InputData inputData = new InputData(zipcodes); List<Store> stores = Arrays.asList(new Store(0, "Store_0", zipcodes.get(0)), new Store(1, "Store_1", zipcodes.get(1)), @@ -68,8 +60,6 @@ public class TestCustomerSamplerBuilder assertNotNull(customer); assertTrue(customer.getId() >= 0); assertNotNull(customer.getName()); - assertNotNull(customer.getName().getLeft()); - assertNotNull(customer.getName().getRight()); assertNotNull(customer.getLocation()); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/README.md ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/README.md b/bigtop-data-generators/bigtop-name-generator/README.md new file mode 100644 index 0000000..aa204b2 --- /dev/null +++ b/bigtop-data-generators/bigtop-name-generator/README.md @@ -0,0 +1,51 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +BigTop Name Generator +===================== + +Library for generating first and last names by sampling from the [Drupal +Name Database](https://www.drupal.org/project/namedb) which itself is based +on U.S. Census data. Frequency of generates names are in proportion to +their frequencies according to the U.S. Census. + +Building and Testing +-------------------- +We use the Gradle build system for the BPS data generator so you'll need +to install Gradle on your system. +Once that's done, you can use gradle to run the included unit tests +and build the data generator jar. + +To build: + + $ gradle build + +This will create several directories and a jar located at: + + build/libs/bigtop-name-generator-1.1.0-SNAPSHOT.jar + +Building automatically runs the included unit tests. If you would prefer +to just run the unit tests, you can do so by: + + $ gradle test + +To clean up the build files, run: + + $ gradle clean + +To install a jar into your local maven repository: + + $ gradle install http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/build.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/build.gradle b/bigtop-data-generators/bigtop-name-generator/build.gradle new file mode 100644 index 0000000..08d9d34 --- /dev/null +++ b/bigtop-data-generators/bigtop-name-generator/build.gradle @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply plugin: 'eclipse' +apply plugin: 'groovy' +apply plugin: 'java' +apply plugin: 'maven' +group = 'org.apache.bigtop' +version = '1.1.0-SNAPSHOT' + +jar { + + from { + configurations.runtime.collect { + it.isDirectory() ? it : zipTree(it) + } + } + + manifest { + attributes 'Title': 'BigTop Name Generator', 'Version': version + } +} + +repositories { + mavenLocal() + mavenCentral() +} + +test { + // show standard out and error on console + testLogging.showStandardStreams = true + + // listen to events in the test execution lifecycle + beforeTest { descriptor -> + logger.lifecycle("Running test: " + descriptor) + } + + // listen to standard out and standard error of the test JVM(s) + onOutput { descriptor, event -> + logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message ) + } + +} + +dependencies { + compile 'com.google.guava:guava:18.0' + compile 'org.apache.commons:commons-lang3:3.4' + compile 'org.apache.bigtop:bigtop-samplers:1.1.0-SNAPSHOT' + + testCompile 'junit:junit:4.+' +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/settings.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/settings.gradle b/bigtop-data-generators/bigtop-name-generator/settings.gradle new file mode 100644 index 0000000..e738f38 --- /dev/null +++ b/bigtop-data-generators/bigtop-name-generator/settings.gradle @@ -0,0 +1,16 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +rootProject.name = "bigtop-name-generator" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java new file mode 100644 index 0000000..4b5f620 --- /dev/null +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.namegenerator; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; +import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; +import org.apache.commons.lang3.tuple.Pair; + +public class NameGenerator implements Sampler<Pair<String, String>> +{ + private final Sampler<String> firstNameSampler; + private final Sampler<String> lastNameSampler; + + public NameGenerator(SeedFactory seedFactory) throws Exception + { + Names names = new NameReader().readData(); + + firstNameSampler = RouletteWheelSampler.create(names.getFirstNames(), seedFactory); + lastNameSampler = RouletteWheelSampler.create(names.getLastNames(), seedFactory); + } + + public Pair<String, String> sample() throws Exception + { + return Pair.of(firstNameSampler.sample(), lastNameSampler.sample()); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java new file mode 100644 index 0000000..d53529f --- /dev/null +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.namegenerator; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.Map; +import java.util.Scanner; + +import com.google.common.collect.Maps; + +public class NameReader +{ + InputStream path; + + public NameReader() throws Exception + { + this.path = getResource(new File("namedb/data/data.dat")); + } + + private InputStream getResource(File filename) throws Exception + { + InputStream stream = getClass().getResourceAsStream("/input_data/" + filename); + return new BufferedInputStream(stream); + } + + public Names readData() throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + Map<String, Double> firstNames = Maps.newHashMap(); + Map<String, Double> lastNames = Maps.newHashMap(); + + while(scanner.hasNextLine()) + { + String line = scanner.nextLine(); + String[] cols = line.trim().split(","); + + String name = cols[0]; + double weight = Double.parseDouble(cols[5]); + + if(cols[4].equals("1")) + firstNames.put(name, weight); + if(cols[3].equals("1")) + lastNames.put(name, weight); + } + + scanner.close(); + + return new Names(firstNames, lastNames); + + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java new file mode 100644 index 0000000..a4d9e03 --- /dev/null +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.namegenerator; + +import java.io.Serializable; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + +public class Names implements Serializable +{ + private static final long serialVersionUID = 2731634747628534453L; + + final ImmutableMap<String, Double> firstNames; + final ImmutableMap<String, Double> lastNames; + + public Names(Map<String, Double> firstNames, + Map<String, Double> lastNames) + { + this.firstNames = ImmutableMap.copyOf(firstNames); + this.lastNames = ImmutableMap.copyOf(lastNames); + } + + public ImmutableMap<String, Double> getFirstNames() + { + return firstNames; + } + + public ImmutableMap<String, Double> getLastNames() + { + return lastNames; + } +}
