http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestCustomerTransactionParametersSamplerBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestCustomerTransactionParametersSamplerBuilder.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestCustomerTransactionParametersSamplerBuilder.java new file mode 100644 index 0000000..5da4ae3 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestCustomerTransactionParametersSamplerBuilder.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.bigpetstore.generators.transaction; + +import static org.junit.Assert.assertTrue; + +import org.apache.bigtop.datagenerators.bigpetstore.Constants; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerTransactionParameters; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerTransactionParametersSamplerBuilder; +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; +import org.junit.Test; + +public class TestCustomerTransactionParametersSamplerBuilder +{ + + @Test + public void testBuild() throws Exception + { + SeedFactory seedFactory = new SeedFactory(1234); + CustomerTransactionParametersSamplerBuilder builder = new CustomerTransactionParametersSamplerBuilder(seedFactory); + Sampler<CustomerTransactionParameters> sampler = builder.build(); + + CustomerTransactionParameters transParams = sampler.sample(); + + assertTrue(transParams.countPets() >= Constants.MIN_PETS); + assertTrue(transParams.countPets() <= Constants.MAX_PETS); + assertTrue(transParams.getAveragePurchaseTriggerTime() >= Constants.PURCHASE_TRIGGER_TIME_MIN); + assertTrue(transParams.getAveragePurchaseTriggerTime() <= Constants.PURCHASE_TRIGGER_TIME_MAX); + assertTrue(transParams.getAverageTransactionTriggerTime() >= Constants.TRANSACTION_TRIGGER_TIME_MIN); + assertTrue(transParams.getAverageTransactionTriggerTime() <= Constants.TRANSACTION_TRIGGER_TIME_MAX); + } + +}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryInventory.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryInventory.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryInventory.java new file mode 100644 index 0000000..6b00eca --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryInventory.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.bigpetstore.generators.transaction; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.Map; + +import org.apache.bigtop.datagenerators.bigpetstore.Constants; +import org.apache.bigtop.datagenerators.bigpetstore.datamodels.PetSpecies; +import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Product; +import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ProductCategory; +import org.apache.bigtop.datagenerators.bigpetstore.generators.products.ProductCategoryBuilder; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerTransactionParameters; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerTransactionParametersSamplerBuilder; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.ProductCategoryInventory; +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; +import org.junit.Test; + +import com.google.common.collect.Maps; + +public class TestProductCategoryInventory +{ + + @Test + public void testPurchase() throws Exception + { + SeedFactory seedFactory = new SeedFactory(1234); + + CustomerTransactionParametersSamplerBuilder transParamsBuilder = new CustomerTransactionParametersSamplerBuilder(seedFactory); + Sampler<CustomerTransactionParameters> sampler = transParamsBuilder.build(); + + CustomerTransactionParameters parameters = sampler.sample(); + + ProductCategoryBuilder builder = new ProductCategoryBuilder(); + builder.addApplicableSpecies(PetSpecies.DOG); + builder.setAmountUsedPetPetAverage(1.0); + builder.setAmountUsedPetPetVariance(1.0); + builder.setDailyUsageRate(2.0); + + + ProductCategory category = builder.build(); + + ProductCategoryInventory inventory = new ProductCategoryInventory(category, parameters, seedFactory); + + assertEquals(inventory.findExhaustionTime(), 0.0, 0.0001); + assertEquals(inventory.findRemainingAmount(0.0), 0.0, 0.0001); + + Map<String, Object> fields = Maps.newHashMap(); + fields.put(Constants.PRODUCT_CATEGORY, "dog food"); + fields.put(Constants.PRODUCT_QUANTITY, 30.0); + Product product = new Product(fields); + + inventory.simulatePurchase(1.0, product); + + assertTrue(inventory.findExhaustionTime() > 1.0); + assertTrue(inventory.findRemainingAmount(1.0) > 0.0); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageSimulator.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageSimulator.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageSimulator.java new file mode 100644 index 0000000..22dbfd4 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageSimulator.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.bigpetstore.generators.transaction; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.ProductCategoryUsageSimulator; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.ProductCategoryUsageTrajectory; +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.Test; + +public class TestProductCategoryUsageSimulator +{ + + @Test + public void testSimulate() throws Exception + { + SeedFactory seedFactory = new SeedFactory(1234); + + ProductCategoryUsageSimulator simulator = new ProductCategoryUsageSimulator(2.0, 1.0, 1.0, seedFactory); + + ProductCategoryUsageTrajectory trajectory = simulator.simulate(0.0, 30.0); + + assertEquals(0.0, trajectory.getLastAmount(), 0.0001); + + Pair<Double, Double> previousEntry = trajectory.getStep(0); + for(int i = 1; i < trajectory.size(); i++) + { + Pair<Double, Double> entry = trajectory.getStep(i); + // time should move forward + assertTrue(previousEntry.getLeft() <= entry.getLeft()); + // remaining amounts should go down + assertTrue(previousEntry.getRight() >= entry.getRight()); + previousEntry = entry; + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageTrajectory.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageTrajectory.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageTrajectory.java new file mode 100644 index 0000000..230f06e --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestProductCategoryUsageTrajectory.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.bigpetstore.generators.transaction; + +import static org.junit.Assert.assertEquals; + +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.ProductCategoryUsageTrajectory; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.Test; + +public class TestProductCategoryUsageTrajectory +{ + + @Test + public void testTrajectory() + { + double initialAmount = 30.0; + double initialTime = 0.0; + + ProductCategoryUsageTrajectory trajectory = new ProductCategoryUsageTrajectory(initialTime, initialAmount); + + assertEquals(trajectory.size(), 1); + + Pair<Double, Double> entry = trajectory.getStep(0); + assertEquals(initialTime, entry.getLeft(), 0.0001); + assertEquals(initialAmount, entry.getRight(), 0.0001); + + trajectory.append(1.0, 25.0); + + assertEquals(2, trajectory.size()); + + entry = trajectory.getStep(1); + assertEquals(1.0, entry.getLeft(), 0.0001); + assertEquals(25.0, entry.getRight(), 0.0001); + + assertEquals(1.0, trajectory.getLastTime(), 0.0001); + assertEquals(25.0, trajectory.getLastAmount(), 0.0001); + } + + @Test + public void testAmountAtTime() + { + ProductCategoryUsageTrajectory trajectory = new ProductCategoryUsageTrajectory(0.0, 30.0); + trajectory.append(1.0, 25.0); + trajectory.append(2.0, 20.0); + trajectory.append(3.0, 50.0); + trajectory.append(4.0, 40.0); + trajectory.append(4.0, 50.0); + trajectory.append(5.0, 30.0); + + assertEquals(30.0, trajectory.amountAtTime(0.5), 0.0001); + assertEquals(50.0, trajectory.amountAtTime(4.0), 0.0001); + assertEquals(30.0, trajectory.amountAtTime(10.0), 0.0001); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionPurchasesHiddenMarkovModel.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionPurchasesHiddenMarkovModel.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionPurchasesHiddenMarkovModel.java new file mode 100644 index 0000000..984f796 --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionPurchasesHiddenMarkovModel.java @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.bigpetstore.generators.transaction; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Map; + +import org.apache.bigtop.datagenerators.bigpetstore.Constants; +import org.apache.bigtop.datagenerators.bigpetstore.datamodels.PetSpecies; +import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Product; +import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ProductCategory; +import org.apache.bigtop.datagenerators.bigpetstore.generators.products.ProductCategoryBuilder; +import org.apache.bigtop.datagenerators.bigpetstore.generators.purchase.MarkovPurchasingModel; +import org.apache.bigtop.datagenerators.bigpetstore.generators.purchase.PurchasingProcesses; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CategoryWeightFunction; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerInventory; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerInventoryBuilder; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerTransactionParameters; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.CustomerTransactionParametersSamplerBuilder; +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.TransactionPurchasesHiddenMarkovModel; +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.markovmodels.MarkovModel; +import org.apache.bigtop.datagenerators.samplers.markovmodels.MarkovModelBuilder; +import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; +import org.apache.bigtop.datagenerators.samplers.wfs.ConditionalWeightFunction; +import org.junit.Test; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class TestTransactionPurchasesHiddenMarkovModel +{ + + private List<Product> createProducts(String category) + { + List<Product> products = Lists.newArrayList(); + + for(int i = 0; i < 10; i++) + { + Map<String, Object> fields = Maps.newHashMap(); + fields.put(Constants.PRODUCT_CATEGORY, category); + fields.put(Constants.PRODUCT_QUANTITY, (double) (i + 1)); + Product product = new Product(fields); + products.add(product); + } + + return products; + } + + private MarkovModel<Product> createMarkovModel(ProductCategory category) + { + MarkovModelBuilder<Product> markovBuilder = new MarkovModelBuilder<Product>(); + + for(Product product1 : category.getProducts()) + { + markovBuilder.addStartState(product1, 1.0); + for(Product product2 : category.getProducts()) + { + markovBuilder.addTransition(product1, product2, 1.0); + } + } + + return markovBuilder.build(); + } + + protected PurchasingProcesses createProcesses(ProductCategory dogFoodCategory, + ProductCategory catFoodCategory, SeedFactory seedFactory) + { + MarkovModel<Product> dogFoodModel = createMarkovModel(dogFoodCategory); + MarkovModel<Product> catFoodModel = createMarkovModel(catFoodCategory); + + Map<String, MarkovModel<Product>> models = Maps.newHashMap(); + models.put("dog food", dogFoodModel); + models.put("cat food", catFoodModel); + + MarkovPurchasingModel profile = new MarkovPurchasingModel(models); + + return profile.buildProcesses(seedFactory); + } + + protected ProductCategory createCategory(String category) + { + List<Product> products = createProducts(category); + + ProductCategoryBuilder builder = new ProductCategoryBuilder(); + + if(category.equals("dog food")) + { + builder.addApplicableSpecies(PetSpecies.DOG); + } + else + { + builder.addApplicableSpecies(PetSpecies.CAT); + } + + builder.setAmountUsedPetPetAverage(1.0); + builder.setAmountUsedPetPetVariance(1.0); + builder.setDailyUsageRate(2.0); + builder.setCategory(category); + builder.setTriggerPurchaseRate(1.0 / 10.0); + builder.setTriggerPurchaseRate(1.0 / 10.0); + + for(Product product : products) + { + builder.addProduct(product); + } + + return builder.build(); + } + + protected TransactionPurchasesHiddenMarkovModel createHMM() throws Exception + { + SeedFactory seedFactory = new SeedFactory(1234); + + ProductCategory dogFoodCategory = createCategory("dog food"); + ProductCategory catFoodCategory = createCategory("cat food"); + + PurchasingProcesses processes = createProcesses(dogFoodCategory, catFoodCategory, seedFactory); + + CustomerTransactionParametersSamplerBuilder transParamsBuilder = new CustomerTransactionParametersSamplerBuilder(seedFactory); + Sampler<CustomerTransactionParameters> sampler = transParamsBuilder.build(); + + CustomerTransactionParameters parameters = sampler.sample(); + + CustomerInventoryBuilder inventoryBuilder = new CustomerInventoryBuilder(parameters, seedFactory); + inventoryBuilder.addProductCategory(dogFoodCategory); + inventoryBuilder.addProductCategory(catFoodCategory); + CustomerInventory inventory = inventoryBuilder.build(); + + ConditionalWeightFunction<Double, Double> categoryWF = + new CategoryWeightFunction(parameters.getAveragePurchaseTriggerTime()); + + TransactionPurchasesHiddenMarkovModel hmm = new TransactionPurchasesHiddenMarkovModel(processes, + categoryWF, inventory, seedFactory); + + return hmm; + } + + @Test + public void testChooseCategory() throws Exception + { + TransactionPurchasesHiddenMarkovModel hmm = createHMM(); + + String category = hmm.chooseCategory(1.0, 0); + + assertNotNull(category); + assertTrue(category.equals(TransactionPurchasesHiddenMarkovModel.STOP_STATE) || + category.equals("dog food") || + category.equals("cat food")); + } + + @Test + public void testChooseProduct() throws Exception + { + TransactionPurchasesHiddenMarkovModel hmm = createHMM(); + + Product product = hmm.chooseProduct("dog food"); + + assertNotNull(product); + assertTrue(product.getFieldValue(Constants.PRODUCT_CATEGORY).equals("dog food")); + + product = hmm.chooseProduct("cat food"); + + assertNotNull(product); + assertTrue(product.getFieldValue(Constants.PRODUCT_CATEGORY).equals("cat food")); + } + + @Test + public void testSample() throws Exception + { + TransactionPurchasesHiddenMarkovModel hmm = createHMM(); + + List<Product> purchase = hmm.sample(1.0); + + assertTrue(purchase.size() > 0); + + for(int i = 0; i < purchase.size(); i++) + { + Product product = purchase.get(i); + + // first product should never be null + assertNotNull(product); + assertTrue(product.getFieldValue(Constants.PRODUCT_CATEGORY).equals("dog food") || + product.getFieldValue(Constants.PRODUCT_CATEGORY).equals("cat food")); + } + + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionTimePDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionTimePDF.java b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionTimePDF.java new file mode 100644 index 0000000..5bd0c9f --- /dev/null +++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/transaction/TestTransactionTimePDF.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.bigpetstore.generators.transaction; + +import static org.junit.Assert.assertEquals; + +import org.apache.bigtop.datagenerators.bigpetstore.generators.transaction.TransactionTimePDF; +import org.junit.Test; + +public class TestTransactionTimePDF +{ + + @Test + public void testProbability() throws Exception + { + TransactionTimePDF pdf = new TransactionTimePDF(); + + assertEquals(pdf.probability(0.5, 0.75), 0.0, 0.000001); + assertEquals(pdf.probability(0.5, 0.5), 1.0, 0.000001); + assertEquals(pdf.probability(0.75, 0.5), 1.0, 0.000001); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/README.md ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/README.md b/bigtop-data-generators/bigtop-samplers/README.md new file mode 100644 index 0000000..3a91c1b --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/README.md @@ -0,0 +1,49 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +BigTop Samplers +=============== + +Library of interfaces and implementations of probability density +functions, probability mass functions, weight functions and samplers. + +Building and Testing +-------------------- +We use the Gradle build system for the BPS data generator so you'll need +to install Gradle on your system. +Once that's done, you can use gradle to run the included unit tests +and build the data generator jar. + +To build: + + $ gradle build + +This will create several directories and a jar located at: + + build/libs/bigtop-samplers-0.9.0-SNAPSHOT.jar + +Building automatically runs the included unit tests. If you would prefer +to just run the unit tests, you can do so by: + + $ gradle test + +To clean up the build files, run: + + $ gradle clean + +To install a jar into your local maven repository: + + $ gradle install http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/build.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/build.gradle b/bigtop-data-generators/bigtop-samplers/build.gradle new file mode 100644 index 0000000..84b5fe0 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/build.gradle @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply plugin: 'eclipse' +apply plugin: 'groovy' +apply plugin: 'java' +apply plugin: 'maven' +group = 'org.apache.bigtop' +version = '1.1.0-SNAPSHOT' + +jar { + + from { + configurations.runtime.collect { + it.isDirectory() ? it : zipTree(it) + } + } + + manifest { + attributes 'Title': 'BigTop Samplers', 'Version': version + } +} + +repositories { + mavenLocal() + mavenCentral() +} + +test { + // show standard out and error on console + testLogging.showStandardStreams = true + + // listen to events in the test execution lifecycle + beforeTest { descriptor -> + logger.lifecycle("Running test: " + descriptor) + } + + // listen to standard out and standard error of the test JVM(s) + onOutput { descriptor, event -> + logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message ) + } + +} + +dependencies { + compile 'com.google.guava:guava:18.0' + + compile 'org.apache.commons:commons-lang3:3.4' + compile 'org.apache.commons:commons-math3:3.5' + + testCompile 'junit:junit:4.+' +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/settings.gradle ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/settings.gradle b/bigtop-data-generators/bigtop-samplers/settings.gradle new file mode 100644 index 0000000..ef1ce16 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/settings.gradle @@ -0,0 +1,16 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +rootProject.name = "bigtop-samplers" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/SeedFactory.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/SeedFactory.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/SeedFactory.java new file mode 100644 index 0000000..678a3dc --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/SeedFactory.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers; + +import java.util.Random; + +public class SeedFactory +{ + Random rng; + + public SeedFactory() + { + rng = new Random(); + } + + public SeedFactory(long seed) + { + rng = new Random(seed); + } + + public long getNextSeed() + { + return rng.nextLong(); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModel.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModel.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModel.java new file mode 100644 index 0000000..ca51e7d --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModel.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.markovmodels; + +import java.io.Serializable; +import java.util.Map; + +public class MarkovModel<T> implements Serializable +{ + private static final long serialVersionUID = 8378109656005603192L; + + final Map<T, Map<T, Double>> transitionWeights; + final Map<T, Double> startWeights; + + public MarkovModel(Map<T, Map<T, Double>> transitionWeights, Map<T, Double> startWeights) + { + this.transitionWeights = transitionWeights; + this.startWeights = startWeights; + } + + public Map<T, Map<T, Double>> getTransitionWeights() + { + return transitionWeights; + } + + public Map<T, Double> getStartWeights() + { + return startWeights; + } + + @Override + public String toString() + { + return "MarkModel(" + startWeights + "," + transitionWeights + ")"; + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModelBuilder.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModelBuilder.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModelBuilder.java new file mode 100644 index 0000000..cf50a9c --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovModelBuilder.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.markovmodels; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableTable; + +public class MarkovModelBuilder<S> +{ + ImmutableTable.Builder<S, S, Double> transitionWeights; + ImmutableMap.Builder<S, Double> startWeights; + + public MarkovModelBuilder() + { + transitionWeights = ImmutableTable.builder(); + startWeights = ImmutableMap.builder(); + } + + public static <T> MarkovModelBuilder<T> create() + { + return new MarkovModelBuilder<T>(); + } + + public void addStartState(S state, double weight) + { + startWeights.put(state, weight); + } + + public void addTransition(S state1, S state2, double weight) + { + transitionWeights.put(state1, state2, weight); + } + + public MarkovModel<S> build() + { + return new MarkovModel<S>(transitionWeights.build().rowMap(), startWeights.build()); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovProcess.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovProcess.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovProcess.java new file mode 100644 index 0000000..99bdb15 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/markovmodels/MarkovProcess.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.markovmodels; + +import java.util.Map; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler; +import org.apache.bigtop.datagenerators.samplers.samplers.Sampler; + +import com.google.common.collect.ImmutableMap; + +public class MarkovProcess<T> implements Sampler<T> +{ + final ImmutableMap<T, Sampler<T>> transitionSamplers; + final Sampler<T> startStateSampler; + + T currentState; + + + public MarkovProcess(MarkovModel<T> model, SeedFactory factory) + { + Map<T, Map<T, Double>> transitionTable = model.getTransitionWeights(); + + startStateSampler = RouletteWheelSampler.create(model.getStartWeights(), factory); + + ImmutableMap.Builder<T, Sampler<T>> builder = ImmutableMap.builder(); + for(Map.Entry<T, Map<T, Double>> entry : transitionTable.entrySet()) + { + builder.put(entry.getKey(), RouletteWheelSampler.create(entry.getValue(), factory)); + } + + + this.transitionSamplers = builder.build(); + + currentState = null; + } + + public static <T> MarkovProcess<T> create(MarkovModel<T> model, SeedFactory factory) + { + return new MarkovProcess<T>(model, factory); + } + + public T sample() throws Exception + { + if(currentState == null) + { + currentState = startStateSampler.sample(); + return currentState; + } + + currentState = transitionSamplers.get(currentState).sample(); + return currentState; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ConditionalProbabilityDensityFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ConditionalProbabilityDensityFunction.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ConditionalProbabilityDensityFunction.java new file mode 100644 index 0000000..7a55b2e --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ConditionalProbabilityDensityFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +public interface ConditionalProbabilityDensityFunction<T, S> +{ + public double probability(T datum, S conditionalDatum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ExponentialPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ExponentialPDF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ExponentialPDF.java new file mode 100644 index 0000000..93042f0 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ExponentialPDF.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +public class ExponentialPDF implements ProbabilityDensityFunction<Double> +{ + private final double lambda; + + public ExponentialPDF(double lambda) + { + this.lambda = lambda; + } + + public double probability(Double value) + { + return lambda * Math.exp(-1.0 * value * lambda); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/GaussianPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/GaussianPDF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/GaussianPDF.java new file mode 100644 index 0000000..5cb4b9e --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/GaussianPDF.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +public class GaussianPDF implements ProbabilityDensityFunction<Double> +{ + private double mean; + private double std; + + public GaussianPDF(double mean, double std) + { + this.mean = mean; + this.std = std; + } + + public double probability(Double value) + { + double diff = (mean - value) * (mean - value); + double var = std * std; + double exp = Math.exp(-1.0 * diff / (2.0 * var)); + + return exp / (std * Math.sqrt(2.0 * Math.PI)); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/JointPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/JointPDF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/JointPDF.java new file mode 100644 index 0000000..aca60f4 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/JointPDF.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +import java.util.List; + +public class JointPDF<T> implements ProbabilityDensityFunction<T> +{ + + double normalizationFactor; + ProbabilityDensityFunction<T>[] pdfs; + + public JointPDF(List<T> data, ProbabilityDensityFunction<T> ... pdfs) + { + this.pdfs = pdfs; + + normalizationFactor = 0.0d; + for(T datum : data) + { + double prob = 1.0; + for(ProbabilityDensityFunction<T> pdf : pdfs) + prob *= pdf.probability(datum); + normalizationFactor += prob; + } + + } + + public double probability(T datum) + { + double weight = 1.0; + for(ProbabilityDensityFunction<T> pdf : pdfs) + weight *= pdf.probability(datum); + + return weight / normalizationFactor; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/MultinomialPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/MultinomialPDF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/MultinomialPDF.java new file mode 100644 index 0000000..6d39379 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/MultinomialPDF.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableMap; + +public class MultinomialPDF<T> implements ProbabilityDensityFunction<T> +{ + private final ImmutableMap<T, Double> probabilities; + + public MultinomialPDF(Map<T, Double> probabilities) + { + this.probabilities = ImmutableMap.copyOf(probabilities); + } + + public Set<T> getData() + { + return probabilities.keySet(); + } + + public double probability(T value) + { + if(probabilities.containsKey(value)) + { + return probabilities.get(value); + } + + return 0.0; + } + + @Override + public String toString() + { + String str = ""; + for(Map.Entry<T, Double> entry: probabilities.entrySet()) + { + str += entry.getValue() + "," + entry.getKey() + "\n"; + } + return str; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ProbabilityDensityFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ProbabilityDensityFunction.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ProbabilityDensityFunction.java new file mode 100644 index 0000000..463e98e --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/ProbabilityDensityFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +public interface ProbabilityDensityFunction<T> +{ + public double probability(T datum); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/UniformPDF.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/UniformPDF.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/UniformPDF.java new file mode 100644 index 0000000..72d378a --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/pdfs/UniformPDF.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.pdfs; + +public class UniformPDF<T> implements ProbabilityDensityFunction<T> +{ + private final double probability; + + public UniformPDF(long count) + { + probability = 1.0 / ((double) count); + } + + public UniformPDF(double probability) + { + this.probability = probability; + } + + public double probability(T datum) + { + return probability; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/BoundedMultiModalGaussianSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/BoundedMultiModalGaussianSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/BoundedMultiModalGaussianSampler.java new file mode 100644 index 0000000..da25c86 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/BoundedMultiModalGaussianSampler.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.List; +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.commons.lang3.tuple.Pair; + +import com.google.common.collect.ImmutableList; + +public class BoundedMultiModalGaussianSampler implements Sampler<Double> +{ + ImmutableList<Pair<Double, Double>> distributions; + + double min; + double max; + Random rng; + + public BoundedMultiModalGaussianSampler(List<Pair<Double, Double>> distributions, double min, double max, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.distributions = ImmutableList.copyOf(distributions); + + this.min = min; + this.max = max; + } + + public Double sample() + { + while(true) + { + int idx = rng.nextInt(distributions.size()); + + double mean = distributions.get(idx).getLeft(); + double std = distributions.get(idx).getRight(); + + double value = mean + rng.nextGaussian() * std; + + if (value >= this.min && value <= this.max) + { + return value; + } + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ConditionalSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ConditionalSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ConditionalSampler.java new file mode 100644 index 0000000..611aea9 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ConditionalSampler.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +public interface ConditionalSampler<T, S> +{ + public T sample(S conditional) throws Exception; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/DoubleSequenceSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/DoubleSequenceSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/DoubleSequenceSampler.java new file mode 100644 index 0000000..f4f1639 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/DoubleSequenceSampler.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +public class DoubleSequenceSampler implements Sampler<Double> +{ + Double start; + Double end; + Double step; + Double next; + + public DoubleSequenceSampler() + { + start = 0.0; + end = null; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start) + { + this.start = start; + end = null; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start, Double end) + { + this.start = start; + this.end = end; + step = 1.0; + next = start; + } + + public DoubleSequenceSampler(Double start, Double end, Double step) + { + this.start = start; + this.end = end; + this.step = step; + next = start; + } + + public Double sample() throws Exception + { + if(end == null || next < end) + { + Double current = next; + next = current + step; + return current; + } + + throw new Exception("All values have been sampled"); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ExponentialSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ExponentialSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ExponentialSampler.java new file mode 100644 index 0000000..adfbccc --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/ExponentialSampler.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; + +public class ExponentialSampler implements Sampler<Double> +{ + final private Random rng; + final private double lambda; + + public ExponentialSampler(double lambda, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.lambda = lambda; + } + + public Double sample() + { + return - Math.log(1.0 - rng.nextDouble()) / lambda; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/GaussianSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/GaussianSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/GaussianSampler.java new file mode 100644 index 0000000..c6b2ea0 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/GaussianSampler.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; + +public class GaussianSampler implements Sampler<Double> +{ + double mean; + double std; + Random rng; + + public GaussianSampler(double mean, double std, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.mean = mean; + this.std = std; + } + + public Double sample() + { + return rng.nextGaussian() * std + mean; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/MonteCarloSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/MonteCarloSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/MonteCarloSampler.java new file mode 100644 index 0000000..bd060e5 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/MonteCarloSampler.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; + + +public class MonteCarloSampler<T> implements Sampler<T> +{ + private final Sampler<T> stateSampler; + private final Random rng; + private final ProbabilityDensityFunction<T> acceptancePDF; + + public MonteCarloSampler(Sampler<T> stateGenerator, + ProbabilityDensityFunction<T> acceptancePDF, + SeedFactory seedFactory) + { + this.acceptancePDF = acceptancePDF; + this.stateSampler = stateGenerator; + + rng = new Random(seedFactory.getNextSeed()); + } + + public T sample() throws Exception + { + while(true) + { + T proposedState = this.stateSampler.sample(); + double probability = acceptancePDF.probability(proposedState); + double r = rng.nextDouble(); + + if(r < probability) + { + return proposedState; + } + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/RouletteWheelSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/RouletteWheelSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/RouletteWheelSampler.java new file mode 100644 index 0000000..ec97ac5 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/RouletteWheelSampler.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Collection; +import java.util.Map; +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.pdfs.MultinomialPDF; +import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction; +import org.apache.commons.lang3.tuple.Pair; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; + +public class RouletteWheelSampler<T> implements Sampler<T> +{ + Random rng; + final ImmutableList<Pair<T, Double>> wheel; + + public static <T> RouletteWheelSampler<T> create(Map<T, Double> domainWeights, SeedFactory factory) + { + return new RouletteWheelSampler<T>(domainWeights, factory); + } + + public static <T> RouletteWheelSampler<T> create(MultinomialPDF<T> pdf, SeedFactory factory) + { + return new RouletteWheelSampler<T>(pdf.getData(), pdf, factory); + } + + public static <T> RouletteWheelSampler<T> create(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory) + { + return new RouletteWheelSampler<T>(data, pdf, factory); + } + + public static <T> RouletteWheelSampler<T> createUniform(Collection<T> data, SeedFactory factory) + { + Map<T, Double> pdf = Maps.newHashMap(); + for(T datum : data) + { + pdf.put(datum, 1.0); + } + + return create(pdf, factory); + } + + public RouletteWheelSampler(Map<T, Double> domainWeights, SeedFactory factory) + { + this.rng = new Random(factory.getNextSeed()); + this.wheel = this.normalize(domainWeights); + } + + public RouletteWheelSampler(Collection<T> data, ProbabilityDensityFunction<T> pdf, SeedFactory factory) + { + this.rng = new Random(factory.getNextSeed()); + + Map<T, Double> domainWeights = Maps.newHashMap(); + for(T datum : data) + { + double prob = pdf.probability(datum); + domainWeights.put(datum, prob); + } + + this.wheel = this.normalize(domainWeights); + } + + private ImmutableList<Pair<T, Double>> normalize(Map<T, Double> domainWeights) + { + double weightSum = 0.0; + for(Map.Entry<T, Double> entry : domainWeights.entrySet()) + { + weightSum += entry.getValue(); + } + + double cumProb = 0.0; + ImmutableList.Builder<Pair<T, Double>> builder = ImmutableList.builder(); + for(Map.Entry<T, Double> entry : domainWeights.entrySet()) + { + double prob = entry.getValue() / weightSum; + cumProb += prob; + + builder.add(Pair.of(entry.getKey(), cumProb)); + } + + return builder.build(); + } + + public T sample() + { + double r = rng.nextDouble(); + for(Pair<T, Double> cumProbPair : wheel) + if(r < cumProbPair.getValue()) + return cumProbPair.getKey(); + + throw new IllegalStateException("Invalid state -- RouletteWheelSampler should never fail to sample!"); + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/Sampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/Sampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/Sampler.java new file mode 100644 index 0000000..a4d5d36 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/Sampler.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +public interface Sampler<T> +{ + public T sample() throws Exception; +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/SequenceSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/SequenceSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/SequenceSampler.java new file mode 100644 index 0000000..b4871ac --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/SequenceSampler.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +public class SequenceSampler implements Sampler<Integer> +{ + Integer start; + Integer end; + Integer step; + Integer next; + + public SequenceSampler() + { + start = 0; + end = null; + step = 1; + next = start; + } + + public SequenceSampler(Integer start) + { + this.start = start; + end = null; + step = 1; + next = start; + } + + public SequenceSampler(Integer start, Integer end) + { + this.start = start; + this.end = end; + step = 1; + next = start; + } + + public SequenceSampler(Integer start, Integer end, Integer step) + { + this.start = start; + this.end = end; + this.step = step; + next = start; + } + + public Integer sample() throws Exception + { + if(end == null || next < end) + { + Integer current = next; + next = current + step; + return current; + } + + throw new Exception("All values have been sampled"); + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/StatefulMonteCarloSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/StatefulMonteCarloSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/StatefulMonteCarloSampler.java new file mode 100644 index 0000000..4f0cf9b --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/StatefulMonteCarloSampler.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; +import org.apache.bigtop.datagenerators.samplers.pdfs.ConditionalProbabilityDensityFunction; + + +public class StatefulMonteCarloSampler<T> implements Sampler<T> +{ + private final Sampler<T> stateSampler; + private final Random rng; + private final ConditionalProbabilityDensityFunction<T, T> acceptancePDF; + private T currentState; + + public StatefulMonteCarloSampler(Sampler<T> stateGenerator, + ConditionalProbabilityDensityFunction<T, T> acceptancePDF, + T initialState, + SeedFactory seedFactory) + { + this.acceptancePDF = acceptancePDF; + this.stateSampler = stateGenerator; + + rng = new Random(seedFactory.getNextSeed()); + + this.currentState = initialState; + } + + public T sample() throws Exception + { + while(true) + { + T proposedState = this.stateSampler.sample(); + double probability = acceptancePDF.probability(proposedState, currentState); + double r = rng.nextDouble(); + + if(r < probability) + { + this.currentState = proposedState; + return proposedState; + } + } + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformIntSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformIntSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformIntSampler.java new file mode 100644 index 0000000..0cb7520 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformIntSampler.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; + +public class UniformIntSampler implements Sampler<Integer> +{ + int lowerbound; + int upperbound; + Random rng; + + /* + * Upperbound is inclusive + */ + public UniformIntSampler(int lowerbound, int upperbound, SeedFactory seedFactory) + { + this.lowerbound = lowerbound; + this.upperbound = upperbound; + rng = new Random(seedFactory.getNextSeed()); + } + + public Integer sample() + { + int range = upperbound + 1 - lowerbound; + return rng.nextInt(range) + lowerbound; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformSampler.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformSampler.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformSampler.java new file mode 100644 index 0000000..a90ff89 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/samplers/UniformSampler.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.samplers; + +import java.util.Random; + +import org.apache.bigtop.datagenerators.samplers.SeedFactory; + +public class UniformSampler implements Sampler<Double> +{ + final Random rng; + final double lowerbound; + final double upperbound; + + public UniformSampler(SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + lowerbound = 0.0; + upperbound = 1.0; + } + + public UniformSampler(double lowerbound, double upperbound, SeedFactory seedFactory) + { + rng = new Random(seedFactory.getNextSeed()); + this.lowerbound = lowerbound; + this.upperbound = upperbound; + } + + public Double sample() + { + return (upperbound - lowerbound) * rng.nextDouble() + lowerbound; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/ConditionalWeightFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/ConditionalWeightFunction.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/ConditionalWeightFunction.java new file mode 100644 index 0000000..93a5b2c --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/ConditionalWeightFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.wfs; + +public interface ConditionalWeightFunction<T, S> +{ + public double weight(T datum, S given); +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/15af83eb/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/WeightFunction.java ---------------------------------------------------------------------- diff --git a/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/WeightFunction.java b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/WeightFunction.java new file mode 100644 index 0000000..bceff80 --- /dev/null +++ b/bigtop-data-generators/bigtop-samplers/src/main/java/org/apache/bigtop/datagenerators/samplers/wfs/WeightFunction.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.datagenerators.samplers.wfs; + +public interface WeightFunction<T> +{ + public double weight(T datum); +}
