Repository: bigtop Updated Branches: refs/heads/master b25084fa7 -> 431b89516
BIGTOP-1995. Update BigPetStore to use external locations data Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/431b8951 Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/431b8951 Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/431b8951 Branch: refs/heads/master Commit: 431b895166f8f976990c66733cfa210bdaa6fee0 Parents: b25084f Author: RJ Nowling <[email protected]> Authored: Sun Aug 30 01:35:33 2015 -0500 Committer: RJ Nowling <[email protected]> Committed: Sun Aug 30 02:03:40 2015 -0500 ---------------------------------------------------------------------- .../bigpetstore-data-generator/build.gradle | 4 +- .../datagenerators/bigpetstore/Constants.java | 5 -- .../datagenerators/bigpetstore/DataLoader.java | 25 ++------ .../bigpetstore/datamodels/Customer.java | 8 +-- .../bigpetstore/datamodels/Store.java | 8 +-- .../datamodels/inputs/InputData.java | 8 ++- .../customer/CustomerLocationPDF.java | 20 +++--- .../generators/customer/CustomerSampler.java | 8 +-- .../customer/CustomerSamplerBuilder.java | 14 ++-- .../store/StoreLocationIncomePDF.java | 14 ++-- .../store/StoreLocationPopulationPDF.java | 10 +-- .../generators/store/StoreSampler.java | 8 +-- .../generators/store/StoreSamplerBuilder.java | 16 ++--- .../customer/TestCustomerLocationPDF.java | 11 ++-- .../customer/TestCustomerSampler.java | 24 +++---- .../customer/TestCustomerSamplerBuilder.java | 10 +-- .../store/TestStoreLocationIncomePDF.java | 13 ++-- .../store/TestStoreLocationPopulationPDF.java | 13 ++-- .../generators/store/TestStoreSampler.java | 11 ++-- .../store/TestStoreSamplerBuilder.java | 11 ++-- .../namegenerator/NameGenerator.java | 5 +- .../namegenerator/NameReader.java | 2 +- .../samplers/wfs/DiscreteWeightFunction.java | 27 ++++++++ .../samplers/wfs/MultinomialWF.java | 67 ++++++++++++++++++++ bigtop-data-generators/settings.gradle | 2 +- 25 files changed, 208 insertions(+), 136 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/build.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/build.gradle b/bigtop-data-generators/bigpetstore-data-generator/build.gradle index 0d2bead..9787512 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/build.gradle +++ b/bigtop-data-generators/bigpetstore-data-generator/build.gradle @@ -59,7 +59,9 @@ dependencies { compile 'com.google.guava:guava:18.0' compile 'com.google.code.gson:gson:2.3' compile 'org.apache.commons:commons-lang3:3.4' - compile project(":bigtop-samplers"), project(":bigtop-name-generator") + compile project(":bigtop-samplers") + compile project(":bigtop-name-generator") + compile project(":bigtop-location-data") testCompile 'junit:junit:4.+' } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java index 1e8e758..ef11299 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java @@ -15,7 +15,6 @@ */ package org.apache.bigtop.datagenerators.bigpetstore; -import java.io.File; import java.util.List; import java.util.Set; @@ -38,10 +37,6 @@ public class Constants MEDIUM; } - public static final File COORDINATES_FILE = new File("zips.csv"); - public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); - public static final File POPULATION_FILE = new File("population_data.csv"); - public static final ProductsCollectionSize PRODUCTS_COLLECTION = ProductsCollectionSize.MEDIUM; public static final double INCOME_SCALING_FACTOR = 100.0; http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java index ecbd6cf..e8a4023 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java @@ -15,35 +15,18 @@ */ package org.apache.bigtop.datagenerators.bigpetstore; -import java.io.BufferedInputStream; -import java.io.File; -import java.io.InputStream; import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.datareaders.ZipcodeReader; +import org.apache.bigtop.datagenerators.locations.Location; +import org.apache.bigtop.datagenerators.locations.LocationReader; public class DataLoader { - private InputStream getResource(File filename) throws Exception - { - InputStream stream = getClass().getResourceAsStream("/input_data/" + filename); - return new BufferedInputStream(stream); - } - public InputData loadData() throws Exception { - - System.out.println("Reading zipcode data"); - ZipcodeReader zipcodeReader = new ZipcodeReader(); - zipcodeReader.setCoordinatesFile(getResource(Constants.COORDINATES_FILE)); - zipcodeReader.setIncomesFile(getResource(Constants.INCOMES_FILE)); - zipcodeReader.setPopulationFile(getResource(Constants.POPULATION_FILE)); - List<ZipcodeRecord> zipcodeTable = zipcodeReader.readData(); - System.out.println("Read " + zipcodeTable.size() + " zipcode entries"); - - InputData inputData = new InputData(zipcodeTable); + List<Location> locations = new LocationReader().readData(); + InputData inputData = new InputData(locations); return inputData; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java index 8847a36..b46c8cd 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Customer.java @@ -17,7 +17,7 @@ package org.apache.bigtop.datagenerators.bigpetstore.datamodels; import java.io.Serializable; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; public class Customer implements Serializable @@ -26,10 +26,10 @@ public class Customer implements Serializable int id; Pair<String, String> name; - ZipcodeRecord location; + Location location; Store store; - public Customer(int id, Pair<String, String> name, Store store, ZipcodeRecord location) + public Customer(int id, Pair<String, String> name, Store store, Location location) { this.id = id; this.name = name; @@ -47,7 +47,7 @@ public class Customer implements Serializable return name; } - public ZipcodeRecord getLocation() + public Location getLocation() { return location; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java index 61730dd..c9c4443 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/Store.java @@ -17,7 +17,7 @@ package org.apache.bigtop.datagenerators.bigpetstore.datamodels; import java.io.Serializable; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; public class Store implements Serializable { @@ -25,9 +25,9 @@ public class Store implements Serializable int id; String name; - ZipcodeRecord location; + Location location; - public Store(int id, String name, ZipcodeRecord location) + public Store(int id, String name, Location location) { this.id = id; this.name = name; @@ -44,7 +44,7 @@ public class Store implements Serializable return name; } - public ZipcodeRecord getLocation() + public Location getLocation() { return location; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java index c180136..074ea7e 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java @@ -19,18 +19,20 @@ import java.io.Serializable; import java.util.Collections; import java.util.List; +import org.apache.bigtop.datagenerators.locations.Location; + public class InputData implements Serializable { private static final long serialVersionUID = 9078989799806707788L; - List<ZipcodeRecord> zipcodeTable; + List<Location> zipcodeTable; - public InputData(List<ZipcodeRecord> zipcodeTable) + public InputData(List<Location> zipcodeTable) { this.zipcodeTable = Collections.unmodifiableList(zipcodeTable); } - public List<ZipcodeRecord> getZipcodeTable() + public List<Location> getZipcodeTable() { return zipcodeTable; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java index 08cbc81..0b5c480 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerLocationPDF.java @@ -19,29 +19,29 @@ import java.util.List; import java.util.Map; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -public class CustomerLocationPDF implements ProbabilityDensityFunction<ZipcodeRecord> +public class CustomerLocationPDF implements ProbabilityDensityFunction<Location> { - private final Map<ZipcodeRecord, Double> pdf; + private final Map<Location, Double> pdf; - public CustomerLocationPDF(List<ZipcodeRecord> zipcodes, Store store, double averageDistance) + public CustomerLocationPDF(List<Location> zipcodes, Store store, double averageDistance) { this.pdf = build(zipcodes, store, averageDistance); } - protected ImmutableMap<ZipcodeRecord, Double> build(List<ZipcodeRecord> zipcodeTable, + protected ImmutableMap<Location, Double> build(List<Location> zipcodeTable, Store store, double averageDistance) { double lambda = 1.0 / averageDistance; - Map<ZipcodeRecord, Double> zipcodeWeights = Maps.newHashMap(); + Map<Location, Double> zipcodeWeights = Maps.newHashMap(); double totalWeight = 0.0; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { double dist = record.distance(store.getLocation()); @@ -50,8 +50,8 @@ public class CustomerLocationPDF implements ProbabilityDensityFunction<ZipcodeRe zipcodeWeights.put(record, weight); } - Map<ZipcodeRecord, Double> pdf = Maps.newHashMap(); - for(ZipcodeRecord record : zipcodeTable) + Map<Location, Double> pdf = Maps.newHashMap(); + for(Location record : zipcodeTable) { pdf.put(record, zipcodeWeights.get(record) / totalWeight); } @@ -59,7 +59,7 @@ public class CustomerLocationPDF implements ProbabilityDensityFunction<ZipcodeRe return ImmutableMap.copyOf(pdf); } - public double probability(ZipcodeRecord record) + public double probability(Location record) { if(!this.pdf.containsKey(record)) return 0.0; http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java index 2bfb6e7..f0e94c9 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java @@ -17,7 +17,7 @@ package org.apache.bigtop.datagenerators.bigpetstore.generators.customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.commons.lang3.tuple.Pair; @@ -27,13 +27,13 @@ public class CustomerSampler implements Sampler<Customer> private final Sampler<Integer> idSampler; private final Sampler<Pair<String, String>> nameSampler; private final Sampler<Store> storeSampler; - private final ConditionalSampler<ZipcodeRecord, Store> locationSampler; + private final ConditionalSampler<Location, Store> locationSampler; public CustomerSampler(Sampler<Integer> idSampler, Sampler<Pair<String, String>> nameSampler, Sampler<Store> storeSampler, - ConditionalSampler<ZipcodeRecord, Store> locationSampler) + ConditionalSampler<Location, Store> locationSampler) { this.idSampler = idSampler; this.nameSampler = nameSampler; @@ -46,7 +46,7 @@ public class CustomerSampler implements Sampler<Customer> Integer id = idSampler.sample(); Pair<String, String> name = nameSampler.sample(); Store store = storeSampler.sample(); - ZipcodeRecord location = locationSampler.sample(store); + Location location = locationSampler.sample(store); return new Customer(id, name, store, location); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java index 44ffa6a..ae64f0c 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java @@ -22,7 +22,7 @@ import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.namegenerator.NameGenerator; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; @@ -47,20 +47,20 @@ public class CustomerSamplerBuilder this.inputData = inputData; } - protected ConditionalSampler<ZipcodeRecord, Store> buildLocationSampler() + protected ConditionalSampler<Location, Store> buildLocationSampler() { - final Map<Store, Sampler<ZipcodeRecord>> locationSamplers = Maps.newHashMap(); + final Map<Store, Sampler<Location>> locationSamplers = Maps.newHashMap(); for(Store store : stores) { - ProbabilityDensityFunction<ZipcodeRecord> locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(), + ProbabilityDensityFunction<Location> locationPDF = new CustomerLocationPDF(inputData.getZipcodeTable(), store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE); - Sampler<ZipcodeRecord> locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory); + Sampler<Location> locationSampler = RouletteWheelSampler.create(inputData.getZipcodeTable(), locationPDF, seedFactory); locationSamplers.put(store, locationSampler); } - return new ConditionalSampler<ZipcodeRecord, Store>() + return new ConditionalSampler<Location, Store>() { - public ZipcodeRecord sample(Store store) throws Exception + public Location sample(Store store) throws Exception { return locationSamplers.get(store).sample(); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java index 7e56462..d3055b9 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationIncomePDF.java @@ -17,22 +17,22 @@ package org.apache.bigtop.datagenerators.bigpetstore.generators.store; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; -public class StoreLocationIncomePDF implements ProbabilityDensityFunction<ZipcodeRecord> +public class StoreLocationIncomePDF implements ProbabilityDensityFunction<Location> { double incomeNormalizationFactor; double minIncome; double k; - public StoreLocationIncomePDF(List<ZipcodeRecord> zipcodeTable, double incomeScalingFactor) + public StoreLocationIncomePDF(List<Location> zipcodeTable, double incomeScalingFactor) { double maxIncome = 0.0; minIncome = Double.MAX_VALUE; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { maxIncome = Math.max(maxIncome, record.getMedianHouseholdIncome()); minIncome = Math.min(minIncome, record.getMedianHouseholdIncome()); @@ -41,21 +41,21 @@ public class StoreLocationIncomePDF implements ProbabilityDensityFunction<Zipcod k = Math.log(incomeScalingFactor) / (maxIncome - minIncome); incomeNormalizationFactor = 0.0d; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { double weight = incomeWeight(record); incomeNormalizationFactor += weight; } } - private double incomeWeight(ZipcodeRecord record) + private double incomeWeight(Location record) { return Math.exp(k * (record.getMedianHouseholdIncome() - minIncome)); } @Override - public double probability(ZipcodeRecord datum) + public double probability(Location datum) { double weight = incomeWeight(datum); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationPopulationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationPopulationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationPopulationPDF.java index 2623bb5..6dd42bb 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationPopulationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreLocationPopulationPDF.java @@ -17,17 +17,17 @@ package org.apache.bigtop.datagenerators.bigpetstore.generators.store; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; -public class StoreLocationPopulationPDF implements ProbabilityDensityFunction<ZipcodeRecord> +public class StoreLocationPopulationPDF implements ProbabilityDensityFunction<Location> { double populationSum = 0.0; - public StoreLocationPopulationPDF(List<ZipcodeRecord> zipcodeTable) + public StoreLocationPopulationPDF(List<Location> zipcodeTable) { long populationSum = 0L; - for(ZipcodeRecord record : zipcodeTable) + for(Location record : zipcodeTable) { populationSum += record.getPopulation(); } @@ -35,7 +35,7 @@ public class StoreLocationPopulationPDF implements ProbabilityDensityFunction<Zi this.populationSum = ((double) populationSum); } - public double probability(ZipcodeRecord record) + public double probability(Location record) { return ((double) record.getPopulation()) / populationSum; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSampler.java index 7b981c4..85e2b18 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSampler.java @@ -16,16 +16,16 @@ package org.apache.bigtop.datagenerators.bigpetstore.generators.store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; public class StoreSampler implements Sampler<Store> { - private final Sampler<ZipcodeRecord> locationSampler; + private final Sampler<Location> locationSampler; private final Sampler<Integer> idSampler; - public StoreSampler(Sampler<Integer> idSampler, Sampler<ZipcodeRecord> locationSampler) + public StoreSampler(Sampler<Integer> idSampler, Sampler<Location> locationSampler) { this.locationSampler = locationSampler; this.idSampler = idSampler; @@ -35,7 +35,7 @@ public class StoreSampler implements Sampler<Store> { Integer id = idSampler.sample(); String name = "Store_" + id; - ZipcodeRecord location = locationSampler.sample(); + Location location = locationSampler.sample(); Store store = new Store(id, name, location); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java index 74f6698..da795ce 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/StoreSamplerBuilder.java @@ -19,7 +19,7 @@ import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.JointPDF; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; @@ -29,10 +29,10 @@ import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler; public class StoreSamplerBuilder { - private final List<ZipcodeRecord> zipcodeTable; + private final List<Location> zipcodeTable; private final SeedFactory seedFactory; - public StoreSamplerBuilder(List<ZipcodeRecord> zipcodeTable, SeedFactory seedFactory) + public StoreSamplerBuilder(List<Location> zipcodeTable, SeedFactory seedFactory) { this.zipcodeTable = zipcodeTable; this.seedFactory = seedFactory; @@ -42,14 +42,14 @@ public class StoreSamplerBuilder { Sampler<Integer> idSampler = new SequenceSampler(); - ProbabilityDensityFunction<ZipcodeRecord> locationPopulationPDF = + ProbabilityDensityFunction<Location> locationPopulationPDF = new StoreLocationPopulationPDF(zipcodeTable); - ProbabilityDensityFunction<ZipcodeRecord> locationIncomePDF = + ProbabilityDensityFunction<Location> locationIncomePDF = new StoreLocationIncomePDF(zipcodeTable, Constants.INCOME_SCALING_FACTOR); - ProbabilityDensityFunction<ZipcodeRecord> locationJointPDF = - new JointPDF<ZipcodeRecord>(zipcodeTable, locationPopulationPDF, locationIncomePDF); + ProbabilityDensityFunction<Location> locationJointPDF = + new JointPDF<Location>(zipcodeTable, locationPopulationPDF, locationIncomePDF); - Sampler<ZipcodeRecord> locationSampler = RouletteWheelSampler.create(zipcodeTable, locationJointPDF, seedFactory); + Sampler<Location> locationSampler = RouletteWheelSampler.create(zipcodeTable, locationJointPDF, seedFactory); return new StoreSampler(idSampler, locationSampler); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java index 60733a9..ac6b1c2 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerLocationPDF.java @@ -23,8 +23,7 @@ import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerLocationPDF; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; @@ -34,10 +33,10 @@ public class TestCustomerLocationPDF @Test public void testProbability() throws Exception { - List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); List<Store> stores = new ArrayList<Store>(); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java index a176333..39857b4 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java @@ -26,7 +26,7 @@ import java.util.Map; import org.apache.bigtop.datagenerators.bigpetstore.Constants; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler; @@ -41,21 +41,21 @@ import com.google.common.collect.Maps; public class TestCustomerSampler { - protected ConditionalSampler<ZipcodeRecord, Store> buildLocationSampler(List<Store> stores, List<ZipcodeRecord> records, + protected ConditionalSampler<Location, Store> buildLocationSampler(List<Store> stores, List<Location> records, SeedFactory factory) { - final Map<Store, Sampler<ZipcodeRecord>> locationSamplers = Maps.newHashMap(); + final Map<Store, Sampler<Location>> locationSamplers = Maps.newHashMap(); for(Store store : stores) { - ProbabilityDensityFunction<ZipcodeRecord> locationPDF = new CustomerLocationPDF(records, + ProbabilityDensityFunction<Location> locationPDF = new CustomerLocationPDF(records, store, Constants.AVERAGE_CUSTOMER_STORE_DISTANCE); - Sampler<ZipcodeRecord> locationSampler = RouletteWheelSampler.create(records, locationPDF, factory); + Sampler<Location> locationSampler = RouletteWheelSampler.create(records, locationPDF, factory); locationSamplers.put(store, locationSampler); } - return new ConditionalSampler<ZipcodeRecord, Store>() + return new ConditionalSampler<Location, Store>() { - public ZipcodeRecord sample(Store store) throws Exception + public Location sample(Store store) throws Exception { return locationSamplers.get(store).sample(); } @@ -73,10 +73,10 @@ public class TestCustomerSampler nameList.add(Pair.of("George", "George")); nameList.add(Pair.of("Fiona", "Fiona")); - List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); List<Store> stores = new ArrayList<Store>(); @@ -90,7 +90,7 @@ public class TestCustomerSampler Sampler<Integer> idSampler = new SequenceSampler(); Sampler<Pair<String, String>> nameSampler = RouletteWheelSampler.createUniform(nameList, factory); Sampler<Store> storeSampler = RouletteWheelSampler.createUniform(stores, factory); - ConditionalSampler<ZipcodeRecord, Store> zipcodeSampler = buildLocationSampler(stores, zipcodes, factory); + ConditionalSampler<Location, Store> zipcodeSampler = buildLocationSampler(stores, zipcodes, factory); Sampler<Customer> sampler = new CustomerSampler(idSampler, nameSampler, storeSampler, zipcodeSampler); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java index 6ddaa94..47cd100 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java @@ -24,7 +24,7 @@ import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.commons.lang3.tuple.Pair; @@ -36,10 +36,10 @@ public class TestCustomerSamplerBuilder @Test public void testSample() throws Exception { - List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); // don't need product categories for building customers http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java index b194c3a..741698e 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationIncomePDF.java @@ -20,8 +20,7 @@ import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreLocationIncomePDF; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; @@ -31,15 +30,15 @@ public class TestStoreLocationIncomePDF @Test public void testProbability() throws Exception { - List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); StoreLocationIncomePDF pdf = new StoreLocationIncomePDF(zipcodes, 100.0); - for(ZipcodeRecord record : zipcodes) + for(Location record : zipcodes) { assertTrue(pdf.probability(record) > 0.0); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java index 002e14f..aa70821 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreLocationPopulationPDF.java @@ -20,8 +20,7 @@ import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.List; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreLocationPopulationPDF; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; @@ -31,15 +30,15 @@ public class TestStoreLocationPopulationPDF @Test public void testProbability() throws Exception { - List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); StoreLocationPopulationPDF pdf = new StoreLocationPopulationPDF(zipcodes); - for(ZipcodeRecord record : zipcodes) + for(Location record : zipcodes) { assertTrue(pdf.probability(record) > 0.0); } http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java index 2594509..dc14d77 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSampler.java @@ -22,8 +22,7 @@ import java.util.Arrays; import java.util.Collection; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreSampler; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; @@ -37,10 +36,10 @@ public class TestStoreSampler @Test public void testSampler() throws Exception { - Collection<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + Collection<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); SeedFactory factory = new SeedFactory(1234); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java index f440ff6..19f6cd2 100644 --- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/store/TestStoreSamplerBuilder.java @@ -22,8 +22,7 @@ import java.util.Arrays; import java.util.List; import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store; -import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord; -import org.apache.bigtop.datagenerators.bigpetstore.generators.store.StoreSamplerBuilder; +import org.apache.bigtop.datagenerators.locations.Location; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; import org.apache.commons.lang3.tuple.Pair; @@ -35,10 +34,10 @@ public class TestStoreSamplerBuilder @Test public void testBuild() throws Exception { - List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] { - new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), - new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), - new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) + List<Location> zipcodes = Arrays.asList(new Location[] { + new Location("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100), + new Location("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200), + new Location("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300) }); assertTrue(zipcodes.size() > 0); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java index 4b5f620..e8c9ebf 100644 --- a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java @@ -18,6 +18,7 @@ package org.apache.bigtop.datagenerators.namegenerator; import org.apache.bigtop.datagenerators.samplers.SeedFactory; import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; +import org.apache.bigtop.datagenerators.samplers.wfs.MultinomialWF; import org.apache.commons.lang3.tuple.Pair; public class NameGenerator implements Sampler<Pair<String, String>> @@ -29,8 +30,8 @@ public class NameGenerator implements Sampler<Pair<String, String>> { Names names = new NameReader().readData(); - firstNameSampler = RouletteWheelSampler.create(names.getFirstNames(), seedFactory); - lastNameSampler = RouletteWheelSampler.create(names.getLastNames(), seedFactory); + firstNameSampler = RouletteWheelSampler.create(new MultinomialWF<String>(names.getFirstNames()).normalize(), seedFactory); + lastNameSampler = RouletteWheelSampler.create(new MultinomialWF<String>(names.getLastNames()).normalize(), seedFactory); } public Pair<String, String> sample() throws Exception http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java index d53529f..548cf91 100644 --- a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java +++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java @@ -52,7 +52,7 @@ public class NameReader String[] cols = line.trim().split(","); String name = cols[0]; - double weight = Double.parseDouble(cols[5]); + double weight = 1.0 / (Double.parseDouble(cols[5]) + 1.0); if(cols[4].equals("1")) firstNames.put(name, weight); http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java new file mode 100644 index 0000000..a68acb9 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/DiscreteWeightFunction.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.wfs; + +import java.util.Set; + +import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; + +public interface DiscreteWeightFunction<T> extends WeightFunction<T> +{ + public ProbabilityDensityFunction<T> normalize(); + + public Set<T> getData(); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java new file mode 100644 index 0000000..e4a2a35 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/MultinomialWF.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.wfs; + +import java.util.Map; +import java.util.Set; + +import org.apache.bigtop.datagenerators.samplers.pdfs.MultinomialPDF; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; + +public class MultinomialWF<T> implements DiscreteWeightFunction<T> +{ + private final ImmutableMap<T, Double> weights; + + public MultinomialWF(Map<T, Double> probabilities) + { + this.weights = ImmutableMap.copyOf(probabilities); + } + + public Set<T> getData() + { + return weights.keySet(); + } + + public double weight(T value) + { + if(weights.containsKey(value)) + { + return weights.get(value); + } + + return 0.0; + } + + public MultinomialPDF<T> normalize() + { + double sum = 0.0; + for(double w : weights.values()) + { + sum += w; + } + + Map<T, Double> probabilities = Maps.newHashMap(); + for(Map.Entry<T, Double> entry : weights.entrySet()) + { + probabilities.put(entry.getKey(), entry.getValue() / sum); + } + + return new MultinomialPDF<T>(probabilities); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/431b8951/bigtop-data-generators/settings.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/settings.gradle b/bigtop-data-generators/settings.gradle index ad6c61d..a0e23a3 100644 --- a/bigtop-data-generators/settings.gradle +++ b/bigtop-data-generators/settings.gradle @@ -13,4 +13,4 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -include ":bigpetstore-data-generator", ":bigtop-name-generator", ":bigtop-samplers" \ No newline at end of file +include ":bigpetstore-data-generator", ":bigtop-name-generator", ":bigtop-samplers", ":bigtop-location-data"
