http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java new file mode 100644 index 0000000..3fdef68 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Driver.java @@ -0,0 +1,315 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.cli; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Collection; +import java.util.List; + +import org.apache.bigtop.bigpetstore.datagenerator.DataLoader; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory; +import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel; +import org.apache.commons.lang3.tuple.Pair; + + +public class Driver +{ + int nStores; + int nCustomers; + int nPurchasingModels; + double simulationTime; + long seed; + File outputDir; + + static final int NPARAMS = 6; + + private void printUsage() + { + String usage = "BigPetStore Data Generator\n" + + "\n" + + "Usage: java -jar bps-data-generator-v0.2.java outputDir nStores nCustomers nPurchasingModels simulationLength seed\n" + + "\n" + + "outputDir - (string) directory to write files\n" + + "nStores - (int) number of stores to generate\n" + + "nCustomers - (int) number of customers to generate\n" + + "nPurchasingModels - (int) number of purchasing models to generate\n" + + "simulationLength - (float) number of days to simulate\n" + + "seed - (long) seed for RNG. If not given, one is reandomly generated.\n"; + + System.out.println(usage); + } + + public void parseArgs(String[] args) + { + if(args.length != NPARAMS && args.length != (NPARAMS - 1)) + { + printUsage(); + System.exit(1); + } + + int i = -1; + + outputDir = new File(args[++i]); + if(! outputDir.exists()) + { + System.err.println("Given path (" + args[i] + ") does not exist.\n"); + printUsage(); + System.exit(1); + } + + if(! outputDir.isDirectory()) + { + System.err.println("Given path (" + args[i] + ") is not a directory.\n"); + printUsage(); + System.exit(1); + } + + try + { + nStores = Integer.parseInt(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as an integer for nStores.\n"); + printUsage(); + System.exit(1); + } + + try + { + nCustomers = Integer.parseInt(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as an integer for nCustomers.\n"); + printUsage(); + System.exit(1); + } + + try + { + nPurchasingModels = Integer.parseInt(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as an integer for nPurchasingModels.\n"); + printUsage(); + System.exit(1); + } + + try + { + simulationTime = Double.parseDouble(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as a float for simulationLength.\n"); + printUsage(); + System.exit(1); + } + + try + { + seed = Long.parseLong(args[++i]); + } + catch(Exception e) + { + System.err.println("Unable to parse '" + args[i] + "' as a long for the seed.\n"); + printUsage(); + System.exit(1); + } + } + + private void writeTransactions(Collection<Transaction> transactions) throws Exception + { + File outputFile = new File(outputDir.toString() + File.separator + "transactions.txt"); + System.out.println(outputFile.toString()); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + + for(Transaction transaction : transactions) + { + for(Product product : transaction.getProducts()) + { + String record = transaction.getId() + ","; + record += transaction.getDateTime() + ","; + record += transaction.getStore().getId() + ","; + record += transaction.getStore().getLocation().getZipcode() + ","; + record += transaction.getStore().getLocation().getCity() + ","; + record += transaction.getStore().getLocation().getState() + ","; + record += transaction.getCustomer().getId() + ","; + Pair<String, String> name = transaction.getCustomer().getName(); + record += name.getLeft() + " " + name.getRight() + ","; + record += transaction.getCustomer().getLocation().getZipcode() + ","; + record += transaction.getCustomer().getLocation().getCity() + ","; + record += transaction.getCustomer().getLocation().getState() + ","; + record += product.toString() + "\n"; + + outputStream.write(record.getBytes()); + } + } + + outputStream.close(); + } + + private void writeCustomers(Collection<Customer> customers) throws Exception + { + File outputFile = new File(outputDir.toString() + File.separator + "customers.txt"); + System.out.println(outputFile.toString()); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + + for(Customer customer : customers) + { + String record = customer.getId() + ","; + Pair<String, String> name = customer.getName(); + record += name.getLeft() + "," + name.getRight() + ","; + record += customer.getLocation().getZipcode() + ","; + record += customer.getLocation().getCity() + ","; + record += customer.getLocation().getState() + "\n"; + + outputStream.write(record.getBytes()); + } + + outputStream.close(); + } + + private void writeStores(Collection<Store> stores) throws Exception + { + File outputFile = new File(outputDir.toString() + File.separator + "stores.txt"); + System.out.println(outputFile.toString()); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + + for(Store store : stores) + { + String record = store.getId() + ","; + record += store.getLocation().getZipcode() + ","; + record += store.getLocation().getCity() + ","; + record += store.getLocation().getState() + "\n"; + + outputStream.write(record.getBytes()); + } + + outputStream.close(); + } + + private void writeProducts(Collection<ProductCategory> productCategories) throws Exception + { + File outputFile = new File(outputDir.toString() + File.separator + "products.txt"); + System.out.println(outputFile.toString()); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + + for(ProductCategory category : productCategories) + { + + for(Product product : category.getProducts()) + { + String record = category.getCategoryLabel() + ","; + record += product.toString() + "\n"; + + outputStream.write(record.getBytes()); + } + } + + outputStream.close(); + } + + private void writePurchasingProfiles(List<ProductCategory> productCategories, List<PurchasingModel> profiles) throws Exception + { + File outputFile = new File(outputDir.toString() + File.separator + "purchasing_profiles.txt"); + System.out.println(outputFile.toString()); + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + + for(ProductCategory category : productCategories) + { + int i = 0; + for(PurchasingModel model : profiles) + { + Object productModel = model.getProfile(category.getCategoryLabel()); + String record = productModel.toString(); + + outputStream.write(record.getBytes()); + + i += 1; + } + } + + outputStream.close(); + } + + public Simulation buildSimulation(InputData inputData) + { + return new Simulation(inputData, nStores, nCustomers, nPurchasingModels, simulationTime, seed); + } + + private void run(InputData inputData) throws Exception + { + Simulation simulation = buildSimulation(inputData); + + simulation.simulate(); + + writeStores(simulation.getStores()); + writeCustomers(simulation.getCustomers()); + writeProducts(simulation.getProductCategories()); + writePurchasingProfiles(simulation.getProductCategories(), simulation.getPurchasingProfiles()); + writeTransactions(simulation.getTransactions()); + } + public void run(String[] args) throws Exception + { + parseArgs(args); + + InputData inputData = (new DataLoader()).loadData(); + + run(inputData); + } + + public static void main(String[] args) throws Exception + { + Driver driver = new Driver(); + driver.run(args); + } + + public Double getSimulationLength() + { + return simulationTime; + } + + public int getNCustomers() + { + return nCustomers; + } + + public long getSeed() + { + return seed; + } + + public int getNStores() + { + return nStores; + } + + public File getOutputDir() + { + return outputDir; + } +}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java new file mode 100644 index 0000000..4b9b500 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/cli/Simulation.java @@ -0,0 +1,188 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.cli; + +import java.util.Collections; +import java.util.List; +import java.util.Vector; + +import org.apache.bigtop.bigpetstore.datagenerator.Constants; +import org.apache.bigtop.bigpetstore.datagenerator.CustomerGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.ProductGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.PurchasingModelGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.StoreGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.TransactionGenerator; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Customer; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Store; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Transaction; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.InputData; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ProductCategory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; +import org.apache.bigtop.bigpetstore.datagenerator.generators.purchase.PurchasingModel; + +import com.google.common.collect.Lists; + +public class Simulation +{ + InputData inputData; + SeedFactory seedFactory; + int nStores; + int nCustomers; + int nPurchasingModels; + double simulationTime; + + List<Store> stores; + List<Customer> customers; + Sampler<PurchasingModel> purchasingModelSampler; + List<PurchasingModel> purchasingProfiles; + List<Transaction> transactions; + List<ProductCategory> productCategories; + + public Simulation(InputData inputData, int nStores, int nCustomers, int nPurchasingModels, double simulationTime, long seed) + { + this.inputData = inputData; + this.nStores = nStores; + this.nCustomers = nCustomers; + this.nPurchasingModels = nPurchasingModels; + this.simulationTime = simulationTime; + seedFactory = new SeedFactory(seed); + } + + public void generateStores() throws Exception + { + System.out.println("Generating stores"); + StoreGenerator storeGenerator = new StoreGenerator(inputData, seedFactory); + + stores = new Vector<Store>(); + for(int i = 0; i < nStores; i++) + { + Store store = storeGenerator.generate(); + stores.add(store); + } + + stores = Collections.unmodifiableList(stores); + + System.out.println("Generated " + stores.size() + " stores"); + } + + public void generateCustomers() throws Exception + { + System.out.println("Generating customers"); + CustomerGenerator generator = new CustomerGenerator(inputData, stores, seedFactory); + + customers = new Vector<Customer>(); + for(int i = 0; i < nCustomers; i++) + { + Customer customer = generator.generate(); + customers.add(customer); + } + + customers = Collections.unmodifiableList(customers); + + System.out.println("Generated " + customers.size() + " customers"); + } + + public void generateProducts() + { + System.out.println("Generating products"); + ProductGenerator generator = new ProductGenerator(Constants.PRODUCTS_COLLECTION); + productCategories = generator.generate(); + } + + public void generatePurchasingProfiles() throws Exception + { + System.out.println("Generating purchasing profiles"); + PurchasingModelGenerator generator = new PurchasingModelGenerator(productCategories, seedFactory); + + purchasingProfiles = new Vector<PurchasingModel>(); + for(int i = 0; i < nPurchasingModels; i++) + { + PurchasingModel profile = generator.generate(); + purchasingProfiles.add(profile); + } + + System.out.println("Generated " + purchasingProfiles.size() + " purchasing profiles"); + + purchasingModelSampler = RouletteWheelSampler.createUniform(purchasingProfiles, seedFactory); + } + + public void generateTransactions() throws Exception + { + System.out.println("Generating transactions"); + transactions = Lists.newArrayList(); + + for(int i = 0; i < nCustomers; i++) + { + Customer customer = customers.get(i); + PurchasingModel profile = purchasingModelSampler.sample(); + + TransactionGenerator generator = new TransactionGenerator(customer, + profile, productCategories, seedFactory); + + while(true) + { + Transaction transaction = generator.generate(); + + if(transaction.getDateTime() > simulationTime) + break; + transactions.add(transaction); + } + } + + System.out.println("Generated " + transactions.size() + " transactions"); + } + + public void simulate() throws Exception + { + generateStores(); + generateCustomers(); + generateProducts(); + generatePurchasingProfiles(); + generateTransactions(); + } + + public List<Store> getStores() + { + return stores; + } + + public List<Customer> getCustomers() + { + return customers; + } + + public List<Transaction> getTransactions() + { + return transactions; + } + + public InputData getInputData() + { + return inputData; + } + + public List<ProductCategory> getProductCategories() + { + return this.productCategories; + } + + public List<PurchasingModel> getPurchasingProfiles() + { + return this.purchasingProfiles; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java new file mode 100644 index 0000000..704c965 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Customer.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; +import org.apache.commons.lang3.tuple.Pair; + +public class Customer implements Serializable +{ + private static final long serialVersionUID = 5739806281335931258L; + + int id; + Pair<String, String> name; + ZipcodeRecord location; + Store store; + + public Customer(int id, Pair<String, String> name, Store store, ZipcodeRecord location) + { + this.id = id; + this.name = name; + this.location = location; + this.store = store; + } + + public int getId() + { + return id; + } + + public Pair<String, String> getName() + { + return name; + } + + public ZipcodeRecord getLocation() + { + return location; + } + + public Store getStore() + { + return store; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java new file mode 100644 index 0000000..d4bab79 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/PetSpecies.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +public enum PetSpecies +{ + DOG, + CAT; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java new file mode 100644 index 0000000..1d4e074 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Product.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class Product implements Serializable +{ + private static final long serialVersionUID = 4519472063058037956L; + + ImmutableMap<String, Object> fields; + + public Product(Map<String, Object> fields) + { + this.fields = ImmutableMap.copyOf(fields); + } + + public ImmutableSet<String> getFieldNames() + { + return fields.keySet(); + } + + public Object getFieldValue(String fieldName) + { + return fields.get(fieldName); + } + + public String getFieldValueAsString(String fieldName) + { + return fields.get(fieldName).toString(); + } + + public Double getFieldValueAsDouble(String fieldName) + { + Object value = getFieldValue(fieldName); + try + { + Double doubleValue = (Double) value; + return doubleValue; + } + catch(ClassCastException e) + { + return null; + } + } + + public Long getFieldValueAsLong(String fieldName) + { + Object value = getFieldValue(fieldName); + try + { + Long longValue = (Long) value; + return longValue; + } + catch(ClassCastException e) + { + try + { + Integer intValue = (Integer) value; + return new Long(intValue); + } + catch(ClassCastException f) + { + return null; + } + } + } + + public String toString() + { + String str = ""; + for(Map.Entry<String, Object> entry : fields.entrySet()) + { + str += entry.getKey() + "=" + entry.getValue() + ";"; + } + + return str; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java new file mode 100644 index 0000000..50e333a --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Store.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; + +public class Store implements Serializable +{ + private static final long serialVersionUID = 2347066623022747969L; + + int id; + String name; + ZipcodeRecord location; + + public Store(int id, String name, ZipcodeRecord location) + { + this.id = id; + this.name = name; + this.location = location; + } + + public int getId() + { + return id; + } + + public String getName() + { + return name; + } + + public ZipcodeRecord getLocation() + { + return location; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java new file mode 100644 index 0000000..6a78e44 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/Transaction.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels; + +import java.io.Serializable; +import java.util.List; + +import com.google.common.collect.ImmutableList; + +public class Transaction implements Serializable +{ + private static final long serialVersionUID = 103133601154354349L; + + final int id; + final Customer customer; + final Store store; + final Double dateTime; + final ImmutableList<Product> products; + + public Transaction(int id, Customer customer, Store store, Double dateTime, List<Product> products) + { + this.id = id; + this.customer = customer; + this.store = store; + this.dateTime = dateTime; + this.products = ImmutableList.copyOf(products); + } + + public int getId() + { + return id; + } + + public Customer getCustomer() + { + return customer; + } + + public Store getStore() + { + return store; + } + + public Double getDateTime() + { + return dateTime; + } + + public ImmutableList<Product> getProducts() + { + return products; + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java new file mode 100644 index 0000000..4fad219 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/InputData.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; + +public class InputData implements Serializable +{ + private static final long serialVersionUID = 9078989799806707788L; + + List<ZipcodeRecord> zipcodeTable; + Names names; + + public InputData(List<ZipcodeRecord> zipcodeTable, + Names names) + { + this.zipcodeTable = Collections.unmodifiableList(zipcodeTable); + this.names = names; + } + + public List<ZipcodeRecord> getZipcodeTable() + { + return zipcodeTable; + } + + public Names getNames() + { + return names; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java new file mode 100644 index 0000000..9402e02 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/Names.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + +public class Names implements Serializable +{ + private static final long serialVersionUID = 2731634747628534453L; + + final ImmutableMap<String, Double> firstNames; + final ImmutableMap<String, Double> lastNames; + + public Names(Map<String, Double> firstNames, + Map<String, Double> lastNames) + { + this.firstNames = ImmutableMap.copyOf(firstNames); + this.lastNames = ImmutableMap.copyOf(lastNames); + } + + public ImmutableMap<String, Double> getFirstNames() + { + return firstNames; + } + + public ImmutableMap<String, Double> getLastNames() + { + return lastNames; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java new file mode 100644 index 0000000..6fb0572 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ProductCategory.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; +import java.util.List; +import java.util.Set; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.PetSpecies; +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.Product; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +public class ProductCategory implements Serializable +{ + private static final long serialVersionUID = -7638076590334497836L; + + String categoryLabel; + ImmutableSet<PetSpecies> applicableSpecies; + ImmutableSet<String> fieldNames; + boolean triggerTransaction; + double dailyUsageRate; + double amountUsedPerPetAverage; + double amountUsedPerPetVariance; + double triggerTransactionRate; + double triggerPurchaseRate; + ImmutableList<Product> products; + + public ProductCategory(String categoryLabel, Set<PetSpecies> species, Set<String> fieldNames, + boolean triggerTransaction, double dailyUsageRate, double amountUsedPerPetAverage, + double amountUsedPerPetVariance, double triggerTransactionRate, + double triggerPurchaseRate, List<Product> products) + { + this.categoryLabel = categoryLabel; + this.applicableSpecies = ImmutableSet.copyOf(species); + this.fieldNames = ImmutableSet.copyOf(fieldNames); + this.triggerTransaction = triggerTransaction; + this.dailyUsageRate = dailyUsageRate; + this.amountUsedPerPetAverage = amountUsedPerPetAverage; + this.amountUsedPerPetVariance = amountUsedPerPetVariance; + this.triggerTransactionRate = triggerTransactionRate; + this.triggerPurchaseRate = triggerPurchaseRate; + this.products = ImmutableList.copyOf(products); + } + + public String getCategoryLabel() + { + return categoryLabel; + } + + public ImmutableSet<PetSpecies> getApplicableSpecies() + { + return applicableSpecies; + } + + public ImmutableSet<String> getFieldNames() + { + return fieldNames; + } + public Boolean getTriggerTransaction() + { + return triggerTransaction; + } + + public Double getDailyUsageRate() + { + return dailyUsageRate; + } + + public Double getBaseAmountUsedAverage() + { + return amountUsedPerPetAverage; + } + + public Double getBaseAmountUsedVariance() + { + return amountUsedPerPetVariance; + } + + public Double getTransactionTriggerRate() + { + return triggerTransactionRate; + } + + public Double getPurchaseTriggerRate() + { + return triggerPurchaseRate; + } + + public ImmutableList<Product> getProducts() + { + return products; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java new file mode 100644 index 0000000..ef54712 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datamodels/inputs/ZipcodeRecord.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs; + +import java.io.Serializable; + +import org.apache.commons.lang3.tuple.Pair; + +public class ZipcodeRecord implements Serializable +{ + private static final long serialVersionUID = 1769986686070108470L; + + final String zipcode; + final Pair<Double, Double> coordinates; + final String city; + final String state; + final double medianHouseholdIncome; + final long population; + + public ZipcodeRecord(String zipcode, Pair<Double, Double> coordinates, + String city, String state, double medianHouseholdIncome, long population) + { + this.city = city; + this.state = state; + this.zipcode = zipcode; + this.coordinates = coordinates; + this.medianHouseholdIncome = medianHouseholdIncome; + this.population = population; + } + + public String getZipcode() + { + return zipcode; + } + + public Pair<Double, Double> getCoordinates() + { + return coordinates; + } + + public double getMedianHouseholdIncome() + { + return medianHouseholdIncome; + } + + public long getPopulation() + { + return population; + } + + public double distance(ZipcodeRecord other) + { + if(other.getZipcode().equals(zipcode)) + return 0.0; + + Pair<Double, Double> otherCoords = other.getCoordinates(); + + double dist = Math.sin(Math.toRadians(coordinates.getLeft())) * + Math.sin(Math.toRadians(otherCoords.getLeft())) + + Math.cos(Math.toRadians(coordinates.getLeft())) * + Math.cos(Math.toRadians(otherCoords.getLeft())) * + Math.cos(Math.toRadians(coordinates.getRight() - otherCoords.getRight())); + dist = Math.toDegrees(Math.acos(dist)) * 69.09; + + return dist; + } + + public String getCity() + { + return city; + } + + public String getState() + { + return state; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java new file mode 100644 index 0000000..5a847ea --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/NameReader.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datareaders; + +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.Map; +import java.util.Scanner; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.Names; + +import com.google.common.collect.Maps; + +public class NameReader +{ + InputStream path; + + public NameReader(InputStream path) + { + this.path = path; + } + + public Names readData() throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + Map<String, Double> firstNames = Maps.newHashMap(); + Map<String, Double> lastNames = Maps.newHashMap(); + + while(scanner.hasNextLine()) + { + String line = scanner.nextLine(); + String[] cols = line.trim().split(","); + + String name = cols[0]; + double weight = Double.parseDouble(cols[5]); + + if(cols[4].equals("1")) + firstNames.put(name, weight); + if(cols[3].equals("1")) + lastNames.put(name, weight); + } + + scanner.close(); + + return new Names(firstNames, lastNames); + + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java new file mode 100644 index 0000000..1612a92 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/datareaders/ZipcodeReader.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.datareaders; + +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; +import java.util.Vector; + +import org.apache.bigtop.bigpetstore.datagenerator.datamodels.inputs.ZipcodeRecord; +import org.apache.commons.lang3.tuple.Pair; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; + +public class ZipcodeReader +{ + private static class ZipcodeLocationRecord + { + public final Pair<Double, Double> coordinates; + public final String state; + public final String city; + + public ZipcodeLocationRecord(Pair<Double, Double> coordinates, + String city, String state) + { + this.coordinates = coordinates; + this.city = city; + this.state = state; + } + } + + InputStream zipcodeIncomesFile = null; + InputStream zipcodePopulationFile = null; + InputStream zipcodeCoordinatesFile = null; + + public void setIncomesFile(InputStream path) + { + this.zipcodeIncomesFile = path; + } + + public void setPopulationFile(InputStream path) + { + this.zipcodePopulationFile = path; + } + + public void setCoordinatesFile(InputStream path) + { + this.zipcodeCoordinatesFile = path; + } + + private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + // skip headers + scanner.nextLine(); + scanner.nextLine(); + + Map<String, Double> entries = Maps.newHashMap(); + while(scanner.hasNextLine()) + { + String line = scanner.nextLine().trim(); + String[] cols = line.split(","); + // zipcodes are in the form "ZCTA5 XXXXX" + String zipcode = cols[2].split(" ")[1].trim(); + try + { + double medianHouseholdIncome = Integer.parseInt(cols[5].trim()); + entries.put(zipcode, medianHouseholdIncome); + } + catch(NumberFormatException e) + { + + } + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + // skip header + scanner.nextLine(); + + Map<String, Long> entries = Maps.newHashMap(); + while(scanner.hasNextLine()) + { + String line = scanner.nextLine().trim(); + + if(line.length() == 0) + continue; + + String[] cols = line.split(","); + + String zipcode = cols[0].trim(); + Long population = Long.parseLong(cols[1].trim()); + + if(entries.containsKey(zipcode)) + { + entries.put(zipcode, Math.max(entries.get(zipcode), population)); + } + else + { + entries.put(zipcode, population); + } + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException + { + Scanner scanner = new Scanner(path); + + // skip header + scanner.nextLine(); + + Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap(); + while(scanner.hasNextLine()) + { + String line = scanner.nextLine().trim(); + + String[] cols = line.split(", "); + + // remove quote marks + String zipcode = cols[0].substring(1, cols[0].length() - 1); + String state = cols[1].substring(1, cols[1].length() - 1); + Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1)); + Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1)); + String city = cols[4].substring(1, cols[4].length() - 1); + + Pair<Double, Double> coords = Pair.of(latitude, longitude); + + ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state); + + entries.put(zipcode, record); + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException + { + ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile); + ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile); + ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile); + + Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet()); + zipcodeSubset.retainAll(populations.keySet()); + zipcodeSubset.retainAll(coordinates.keySet()); + + List<ZipcodeRecord> table = new Vector<ZipcodeRecord>(); + for(String zipcode : zipcodeSubset) + { + ZipcodeRecord record = new ZipcodeRecord(zipcode, + coordinates.get(zipcode).coordinates, + coordinates.get(zipcode).city, + coordinates.get(zipcode).state, + incomes.get(zipcode), + populations.get(zipcode)); + table.add(record); + } + + return ImmutableList.copyOf(table); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java new file mode 100644 index 0000000..aea004e --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/SeedFactory.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework; + +import java.util.Random; + +public class SeedFactory +{ + Random rng; + + public SeedFactory() + { + rng = new Random(); + } + + public SeedFactory(long seed) + { + rng = new Random(seed); + } + + public long getNextSeed() + { + return rng.nextLong(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java new file mode 100644 index 0000000..0b90e2b --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModel.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels; + +import java.io.Serializable; +import java.util.Map; + +public class MarkovModel<T> implements Serializable +{ + private static final long serialVersionUID = 8378109656005603192L; + + final Map<T, Map<T, Double>> transitionWeights; + final Map<T, Double> startWeights; + + public MarkovModel(Map<T, Map<T, Double>> transitionWeights, Map<T, Double> startWeights) + { + this.transitionWeights = transitionWeights; + this.startWeights = startWeights; + } + + public Map<T, Map<T, Double>> getTransitionWeights() + { + return transitionWeights; + } + + public Map<T, Double> getStartWeights() + { + return startWeights; + } + + @Override + public String toString() + { + return "MarkModel(" + startWeights + "," + transitionWeights + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java new file mode 100644 index 0000000..861c0ef --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovModelBuilder.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableTable; + +public class MarkovModelBuilder<S> +{ + ImmutableTable.Builder<S, S, Double> transitionWeights; + ImmutableMap.Builder<S, Double> startWeights; + + public MarkovModelBuilder() + { + transitionWeights = ImmutableTable.builder(); + startWeights = ImmutableMap.builder(); + } + + public static <T> MarkovModelBuilder<T> create() + { + return new MarkovModelBuilder<T>(); + } + + public void addStartState(S state, double weight) + { + startWeights.put(state, weight); + } + + public void addTransition(S state1, S state2, double weight) + { + transitionWeights.put(state1, state2, weight); + } + + public MarkovModel<S> build() + { + return new MarkovModel<S>(transitionWeights.build().rowMap(), startWeights.build()); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java new file mode 100644 index 0000000..d0bd6c5 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/markovmodels/MarkovProcess.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.markovmodels; + +import java.util.Map; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.RouletteWheelSampler; +import org.apache.bigtop.bigpetstore.datagenerator.framework.samplers.Sampler; + +import com.google.common.collect.ImmutableMap; + +public class MarkovProcess<T> implements Sampler<T> +{ + final ImmutableMap<T, Sampler<T>> transitionSamplers; + final Sampler<T> startStateSampler; + + T currentState; + + + public MarkovProcess(MarkovModel<T> model, SeedFactory factory) + { + Map<T, Map<T, Double>> transitionTable = model.getTransitionWeights(); + + startStateSampler = RouletteWheelSampler.create(model.getStartWeights(), factory); + + ImmutableMap.Builder<T, Sampler<T>> builder = ImmutableMap.builder(); + for(Map.Entry<T, Map<T, Double>> entry : transitionTable.entrySet()) + { + builder.put(entry.getKey(), RouletteWheelSampler.create(entry.getValue(), factory)); + } + + + this.transitionSamplers = builder.build(); + + currentState = null; + } + + public static <T> MarkovProcess<T> create(MarkovModel<T> model, SeedFactory factory) + { + return new MarkovProcess<T>(model, factory); + } + + public T sample() throws Exception + { + if(currentState == null) + { + currentState = startStateSampler.sample(); + return currentState; + } + + currentState = transitionSamplers.get(currentState).sample(); + return currentState; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java new file mode 100644 index 0000000..5161761 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ConditionalProbabilityDensityFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public interface ConditionalProbabilityDensityFunction<T, S> +{ + public double probability(T datum, S conditionalDatum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java new file mode 100644 index 0000000..dcc1278 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ExponentialPDF.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public class ExponentialPDF implements ProbabilityDensityFunction<Double> +{ + private final double lambda; + + public ExponentialPDF(double lambda) + { + this.lambda = lambda; + } + + public double probability(Double value) + { + return lambda * Math.exp(-1.0 * value * lambda); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java new file mode 100644 index 0000000..55ebc93 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/GaussianPDF.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public class GaussianPDF implements ProbabilityDensityFunction<Double> +{ + private double mean; + private double std; + + public GaussianPDF(double mean, double std) + { + this.mean = mean; + this.std = std; + } + + public double probability(Double value) + { + double diff = (mean - value) * (mean - value); + double var = std * std; + double exp = Math.exp(-1.0 * diff / (2.0 * var)); + + return exp / (std * Math.sqrt(2.0 * Math.PI)); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java new file mode 100644 index 0000000..fdf2db0 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/JointPDF.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +import java.util.List; + +public class JointPDF<T> implements ProbabilityDensityFunction<T> +{ + + double normalizationFactor; + ProbabilityDensityFunction<T>[] pdfs; + + public JointPDF(List<T> data, ProbabilityDensityFunction<T> ... pdfs) + { + this.pdfs = pdfs; + + normalizationFactor = 0.0d; + for(T datum : data) + { + double prob = 1.0; + for(ProbabilityDensityFunction<T> pdf : pdfs) + prob *= pdf.probability(datum); + normalizationFactor += prob; + } + + } + + public double probability(T datum) + { + double weight = 1.0; + for(ProbabilityDensityFunction<T> pdf : pdfs) + weight *= pdf.probability(datum); + + return weight / normalizationFactor; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java new file mode 100644 index 0000000..7605156 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/MultinomialPDF.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableMap; + +public class MultinomialPDF<T> implements ProbabilityDensityFunction<T> +{ + private final ImmutableMap<T, Double> probabilities; + + public MultinomialPDF(Map<T, Double> probabilities) + { + this.probabilities = ImmutableMap.copyOf(probabilities); + } + + public Set<T> getData() + { + return probabilities.keySet(); + } + + public double probability(T value) + { + if(probabilities.containsKey(value)) + { + return probabilities.get(value); + } + + return 0.0; + } + + @Override + public String toString() + { + String str = ""; + for(Map.Entry<T, Double> entry: probabilities.entrySet()) + { + str += entry.getValue() + "," + entry.getKey() + "\n"; + } + return str; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java new file mode 100644 index 0000000..1b691ca --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/ProbabilityDensityFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public interface ProbabilityDensityFunction<T> +{ + public double probability(T datum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java new file mode 100644 index 0000000..ea8e77e --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/pdfs/UniformPDF.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs; + +public class UniformPDF<T> implements ProbabilityDensityFunction<T> +{ + private final double probability; + + public UniformPDF(long count) + { + probability = 1.0 / ((double) count); + } + + public UniformPDF(double probability) + { + this.probability = probability; + } + + public double probability(T datum) + { + return probability; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java new file mode 100644 index 0000000..348f080 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/BoundedMultiModalGaussianSampler.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.List; +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.commons.lang3.tuple.Pair; + +import com.google.common.collect.ImmutableList; + +public class BoundedMultiModalGaussianSampler implements Sampler<Double> +{ + ImmutableList<Pair<Double, Double>> distributions; + + double min; + double max; + Random rng; + + public BoundedMultiModalGaussianSampler(List<Pair<Double, Double>> distributions, double min, double max, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.distributions = ImmutableList.copyOf(distributions); + + this.min = min; + this.max = max; + } + + public Double sample() + { + while(true) + { + int idx = rng.nextInt(distributions.size()); + + double mean = distributions.get(idx).getLeft(); + double std = distributions.get(idx).getRight(); + + double value = mean + rng.nextGaussian() * std; + + if (value >= this.min && value <= this.max) + { + return value; + } + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java new file mode 100644 index 0000000..c521333 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ConditionalSampler.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +public interface ConditionalSampler<T, S> +{ + public T sample(S conditional) throws Exception; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java new file mode 100644 index 0000000..82e4d2d --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/DoubleSequenceSampler.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +public class DoubleSequenceSampler implements Sampler<Double> +{ + Double start; + Double end; + Double step; + Double next; + + public DoubleSequenceSampler() + { + start = 0.0; + end = null; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start) + { + this.start = start; + end = null; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start, Double end) + { + this.start = start; + this.end = end; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start, Double end, Double step) + { + this.start = start; + this.end = end; + this.step = step; + next = start; + } + + public Double sample() throws Exception + { + if(end == null || next < end) + { + Double current = next; + next = current + step; + return current; + } + + throw new Exception("All values have been sampled"); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java new file mode 100644 index 0000000..082f3ac --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/ExponentialSampler.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +public class ExponentialSampler implements Sampler<Double> +{ + final private Random rng; + final private double lambda; + + public ExponentialSampler(double lambda, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.lambda = lambda; + } + + public Double sample() + { + return - Math.log(1.0 - rng.nextDouble()) / lambda; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java new file mode 100644 index 0000000..ed40cc8 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/GaussianSampler.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; + +public class GaussianSampler implements Sampler<Double> +{ + double mean; + double std; + Random rng; + + public GaussianSampler(double mean, double std, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.mean = mean; + this.std = std; + } + + public Double sample() + { + return rng.nextGaussian() * std + mean; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/3bbbb557/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java new file mode 100644 index 0000000..0db8200 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/bigpetstore/datagenerator/framework/samplers/MonteCarloSampler.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.datagenerator.framework.samplers; + +import java.util.Random; + +import org.apache.bigtop.bigpetstore.datagenerator.framework.SeedFactory; +import org.apache.bigtop.bigpetstore.datagenerator.framework.pdfs.ProbabilityDensityFunction; + + +public class MonteCarloSampler<T> implements Sampler<T> +{ + private final Sampler<T> stateSampler; + private final Random rng; + private final ProbabilityDensityFunction<T> acceptancePDF; + + public MonteCarloSampler(Sampler<T> stateGenerator, + ProbabilityDensityFunction<T> acceptancePDF, + SeedFactory seedFactory) + { + this.acceptancePDF = acceptancePDF; + this.stateSampler = stateGenerator; + + rng = new Random(seedFactory.getNextSeed()); + } + + public T sample() throws Exception + { + while(true) + { + T proposedState = this.stateSampler.sample(); + double probability = acceptancePDF.probability(proposedState); + double r = rng.nextDouble(); + + if(r < probability) + { + return proposedState; + } + } + } + +}
