http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java deleted file mode 100644 index 0ca7444..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/etl/PigCSVCleaner.java +++ /dev/null @@ -1,156 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.etl; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants.OUTPUTS; -import org.apache.bigtop.bigpetstore.util.DeveloperTools; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; - -/** - * This class operates by ETL'ing the data-set into pig. - * The pigServer is persisted through the life of the class, so that the - * intermediate data sets created in the constructor can be reused. - */ -public class PigCSVCleaner { - - PigServer pigServer; - - private static Path getCleanedTsvPath(Path outputPath) { - return new Path(outputPath, OUTPUTS.tsv.name()); - } - - public PigCSVCleaner(Path inputPath, Path outputPath, ExecType ex, File... scripts) - throws Exception { - FileSystem fs = FileSystem.get(inputPath.toUri(), new Configuration()); - - if(! fs.exists(inputPath)){ - throw new RuntimeException("INPUT path DOES NOT exist : " + inputPath); - } - - if(fs.exists(outputPath)){ - throw new RuntimeException("OUTPUT already exists : " + outputPath); - } - // run pig in local mode - pigServer = new PigServer(ex); - - /** - * First, split the tabs up. - * - * BigPetStore,storeCode_OK,2 1,yang,jay,3,flea collar,69.56,Mon Dec 15 23:33:49 EST 1969 - * - * ("BigPetStore,storeCode_OK,2", "1,yang,jay,3,flea collar,69.56,Mon Dec 15 23:33:49 EST 1969") - */ - pigServer.registerQuery("csvdata = LOAD '<i>' AS (ID,DETAILS);".replaceAll("<i>", inputPath.toString())); - - // currentCustomerId, firstName, lastName, product.id, product.name.toLowerCase, product.price, date - /** - * Now, we want to split the two tab delimited fields into uniform - * fields of comma separated values. To do this, we 1) Internally split - * the FIRST and SECOND fields by commas "a,b,c" --> (a,b,c) 2) FLATTEN - * the FIRST and SECOND fields. (d,e) (a,b,c) -> d e a b c - */ - pigServer.registerQuery( - "id_details = FOREACH csvdata GENERATE " - + "FLATTEN(STRSPLIT(ID, ',', 3)) AS " + - "(drop, code, transaction) ," - - + "FLATTEN(STRSPLIT(DETAILS, ',', 7)) AS " + - "(custId, fname, lname, productId, product:chararray, price, date);"); - pigServer.registerQuery("mahout_records = FOREACH id_details GENERATE custId, productId, 1;"); - pigServer.store("id_details", getCleanedTsvPath(outputPath).toString()); - pigServer.store("mahout_records", new Path(outputPath, OUTPUTS.MahoutPaths.Mahout.name()).toString()); - /** - * Now we run scripts... this is where you can add some - * arbitrary analytics. - * - * We add "input" and "output" parameters so that each - * script can read them and use them if they want. - * - * Otherwise, just hardcode your inputs into your pig scripts. - */ - int i = 0; - for(File script : scripts) { - Map<String,String> parameters = new HashMap<>(); - parameters.put("input", getCleanedTsvPath(outputPath).toString()); - - Path dir = outputPath.getParent(); - Path adHocOut = new Path(dir, OUTPUTS.pig_ad_hoc_script.name() + (i++)); - System.out.println("Setting default output to " + adHocOut); - parameters.put("output", adHocOut.toString()); - pigServer.registerScript(script.getAbsolutePath(), parameters); - } - } - - private static File[] files(String[] args,int startIndex) { - List<File> files = new ArrayList<File>(); - for(int i = startIndex ; i < args.length ; i++) { - File f = new File(args[i]); - if(! f.exists()) { - throw new RuntimeException("Pig script arg " + i + " " + f.getAbsolutePath() + " not found. "); - } - files.add(f); - } - System.out.println( - "Ad-hoc analytics:"+ - "Added " + files.size() + " pig scripts to post process. "+ - "Each one will be given $input and $output arguments."); - return files.toArray(new File[]{}); - } - - public static void main(final String[] args) throws Exception { - System.out.println("Starting pig etl " + args.length); - Configuration c = new Configuration(); - int res = ToolRunner.run(c, new Tool() { - Configuration conf; - @Override - public void setConf(Configuration conf) { - this.conf=conf; - } - - @Override - public Configuration getConf() { - return this.conf; - } - - @Override - public int run(String[] args) throws Exception { - DeveloperTools.validate( - args, - "generated data directory", - "pig output directory"); - new PigCSVCleaner( - new Path(args[0]), - new Path(args[1]), - ExecType.MAPREDUCE, - files(args,2)); - return 0; - } - }, args); - System.exit(res); - } -} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java deleted file mode 100755 index 6c8beef..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/BPSGenerator.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bigtop.bigpetstore.generator; - -import java.io.IOException; -import java.util.Date; - -import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants; -import org.apache.bigtop.bigpetstore.util.DeveloperTools; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.lib.MultipleOutputs; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Mapper.Context; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import static org.apache.bigtop.bigpetstore.generator.PetStoreTransactionsInputFormat.props; - -/** - * This is a mapreduce implementation of a generator of a large sentiment - * analysis data set. The scenario is as follows: - * - * The number of records will (roughly) correspond to the output size - each - * record is about 80 bytes. - * - * 1KB set bigpetstore_records=10 1MB set bigpetstore_records=10,000 1GB set - * bigpetstore_records=10,000,000 1TB set bigpetstore_records=10,000,000,000 - */ -public class BPSGenerator { - - public static final int DEFAULT_NUM_RECORDS = 100; - - final static Logger log = LoggerFactory.getLogger(BPSGenerator.class); - - public enum props { - bigpetstore_records - } - - public static Job createJob(Path output, int records) throws IOException { - Configuration c = new Configuration(); - c.setInt(props.bigpetstore_records.name(), DEFAULT_NUM_RECORDS); - return getCreateTransactionRecordsJob(output, c); - } - - public static Job getCreateTransactionRecordsJob(Path outputDir, Configuration conf) - throws IOException { - Job job = new Job(conf, "PetStoreTransaction_ETL_" + System.currentTimeMillis()); - // recursively delete the data set if it exists. - FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); - job.setJarByClass(BPSGenerator.class); - job.setMapperClass(MyMapper.class); - // use the default reducer - // job.setReducerClass(PetStoreTransactionGeneratorJob.Red.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(Text.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(Text.class); - job.setInputFormatClass(PetStoreTransactionsInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(job, outputDir); - return job; - } - - public static class MyMapper extends Mapper<Text, Text, Text, Text> { - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - } - - protected void map(Text key, Text value, Context context) - throws java.io.IOException, InterruptedException { - context.write(key, value); - } - } - - public static void main(String args[]) throws Exception { - if (args.length != 2) { - System.err.println("USAGE : [number of records] [output path]"); - System.exit(0); - } else { - Configuration conf = new Configuration(); - DeveloperTools.validate(args, "# of records", "output path"); - conf.setInt(PetStoreTransactionsInputFormat.props.bigpetstore_records.name(), - Integer.parseInt(args[0])); - getCreateTransactionRecordsJob(new Path(args[1]), conf).waitForCompletion(true); - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/CustomerGenerator.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/CustomerGenerator.scala b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/CustomerGenerator.scala deleted file mode 100644 index 0223c8d..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/CustomerGenerator.scala +++ /dev/null @@ -1,97 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator - -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileSystem -import org.apache.bigtop.bigpetstore.generator.util.State -import org.apache.hadoop.fs.Path -import parquet.org.codehaus.jackson.format.DataFormatDetector -import org.slf4j.LoggerFactory -import java.util.{Collection => JavaCollection} -import scala.collection.JavaConversions.asJavaCollection -import java.util.Random -import scala.collection.mutable.{HashMap, Set, MultiMap} -import scala.collection.immutable.NumericRange - -/** - * This class generates random customer data. The generated customer - * ids will be consecutive. The client code that generates the transactions - * records needs to know the available customer ids. If we keep the customer - * ids consecutive here. we don't have to store those ids in memory, or perform - * costly lookups. Once we introduce something that allows efficient lookup - * of data, we can do something else as well. - * - * The generated customer ids will start from 1. So, if we have 100 customers, - * the ids will be [1, 100]. - */ -class CustomerGenerator(val desiredCustomerCount: Int, val outputPath: Path) { - private val logger = LoggerFactory.getLogger(getClass) - private val random = new Random; - private val assertion = "The generateCustomerRecords() hasn't been called yet"; - private var customerFileGenerated = false - private val _stateToCustomerIds = new HashMap[State, NumericRange[Long]] - - def isCustomerFileGenrated = customerFileGenerated - - def customerIds(state: State) = { - assert(customerFileGenerated, assertion) - _stateToCustomerIds(state) - } - - def generateCustomerRecords() = { - val config = new Configuration - val fs = FileSystem.getLocal(config) - - assert(!fs.exists(outputPath)) - - val outputStream = fs.create(outputPath) - - var currentId: Long = 1 - logger.info("Generating customer records at: {}", fs.pathToFile(outputPath)) - for (state <- State.values(); - stateCustomerCount = (state.probability * desiredCustomerCount) toLong; - random = new Random(state.hashCode); - i <- 1L to stateCustomerCount) { - val customerRecord = CustomerGenerator.createRecord(currentId, state, random); - logger.info("generated customer: {}", customerRecord) - outputStream.writeBytes(customerRecord) - - if(i == 1) { - val stateCustomerIdRange = currentId until (currentId + stateCustomerCount); - _stateToCustomerIds += (state -> stateCustomerIdRange) - } - currentId += 1 - } - - println(_stateToCustomerIds) - outputStream.flush - outputStream.close - customerFileGenerated = true - } -} - -object CustomerGenerator { - val OUTPUT_FILE_NAME = "customers" - - private def createRecord(id: Long, state: State, r: Random) = { - val firstName = DataForger.firstName - val lastName = DataForger.lastName - s"$id\t${DataForger.firstName(r)}\t${DataForger.lastName(r)}\t${state.name}\n" - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java deleted file mode 100755 index 71aa6d6..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransaction.java +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - -import java.util.Date; - -public interface PetStoreTransaction { - - public String getFirstName(); - - public String getLastName(); - - public String getProduct(); - - public Date getDate(); - - public Integer getPrice(); - -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java deleted file mode 100755 index d350cc8..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionInputSplit.java +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.bigtop.bigpetstore.generator.util.State; -import org.apache.commons.lang3.Range; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.InputSplit; - -/** - * What does an `InputSplit` actually do? From the Javadocs, it looks like ... - * absolutely nothing. - * - * Note: for some reason, you *have* to implement Writable, even if your methods - * do nothing, or you will got strange and un-debuggable null pointer - * exceptions. - */ -public class PetStoreTransactionInputSplit extends InputSplit implements - Writable { - - public PetStoreTransactionInputSplit() { - } - - public int records; - public State state; - public Range<Long> customerIdRange; - - public PetStoreTransactionInputSplit(int records, Range<Long> customerIdRange, State state) { - this.records = records; - this.state = state; - this.customerIdRange = customerIdRange; - } - - public void readFields(DataInput dataInputStream) throws IOException { - records = dataInputStream.readInt(); - state = State.valueOf(dataInputStream.readUTF()); - customerIdRange = Range.between(dataInputStream.readLong(), dataInputStream.readLong()); - } - - public void write(DataOutput dataOutputStream) throws IOException { - dataOutputStream.writeInt(records); - dataOutputStream.writeUTF(state.name()); - dataOutputStream.writeLong(customerIdRange.getMinimum()); - dataOutputStream.writeLong(customerIdRange.getMaximum()); - } - - @Override - public String[] getLocations() throws IOException, InterruptedException { - return new String[] {}; - } - - @Override - public long getLength() throws IOException, InterruptedException { - return records; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionsInputFormat.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionsInputFormat.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionsInputFormat.java deleted file mode 100755 index 4c22e36..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/PetStoreTransactionsInputFormat.java +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.KeyVal; -import org.apache.bigtop.bigpetstore.generator.util.State; -import org.apache.commons.lang3.Range; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; - -/** - * A simple input split that fakes input. - */ -public class PetStoreTransactionsInputFormat extends - FileInputFormat<Text, Text> { - - @Override - public RecordReader<Text, Text> createRecordReader( - final InputSplit inputSplit, TaskAttemptContext arg1) - throws IOException, InterruptedException { - return new RecordReader<Text, Text>() { - - @Override - public void close() throws IOException { - - } - - /** - * We need the "state" information to generate records. - Each state - * has a probability associated with it, so that our data set can be - * realistic (i.e. Colorado should have more transactions than rhode - * island). - * - * - Each state also will its name as part of the key. - * - * - This task would be distributed, for example, into 50 nodes on a - * real cluster, each creating the data for a given state. - */ - - PetStoreTransactionInputSplit bpsInputplit = (PetStoreTransactionInputSplit) inputSplit; - int records = bpsInputplit.records; - // TODO why not send the whole InputSplit there? - Iterator<KeyVal<String, String>> data = - (new TransactionIteratorFactory(records, bpsInputplit.customerIdRange, bpsInputplit.state)).data(); - KeyVal<String, String> currentRecord; - - @Override - public Text getCurrentKey() throws IOException, - InterruptedException { - return new Text(currentRecord.key()); - } - - @Override - public Text getCurrentValue() throws IOException, - InterruptedException { - return new Text(currentRecord.value()); - } - - @Override - public void initialize(InputSplit arg0, TaskAttemptContext arg1) - throws IOException, InterruptedException { - } - - @Override - public boolean nextKeyValue() throws IOException, - InterruptedException { - if (data.hasNext()) { - currentRecord = data.next(); - return true; - } - return false; - } - - @Override - public float getProgress() throws IOException, InterruptedException { - return 0f; - } - - }; - } - - public enum props { - bigpetstore_records - } - - @Override - public List<InputSplit> getSplits(JobContext arg) throws IOException { - int numRecordsDesired = arg - .getConfiguration() - .getInt(PetStoreTransactionsInputFormat.props.bigpetstore_records - .name(), -1); - if (numRecordsDesired == -1) { - throw new RuntimeException( - "# of total records not set in configuration object: " - + arg.getConfiguration()); - } - - List<InputSplit> list = new ArrayList<InputSplit>(); - long customerIdStart = 1; - for (State s : State.values()) { - int numRecords = numRecords(numRecordsDesired, s.probability); - // each state is assigned a range of customer-ids from which it can choose. - // The number of customers can be as many as the number of transactions. - Range<Long> customerIdRange = Range.between(customerIdStart, customerIdStart + numRecords - 1); - PetStoreTransactionInputSplit split = - new PetStoreTransactionInputSplit(numRecords, customerIdRange, s); - System.out.println(s + " _ " + split.records); - list.add(split); - customerIdStart += numRecords; - } - return list; - } - - private int numRecords(int numRecordsDesired, float probability) { - return (int) (Math.ceil(numRecordsDesired * probability)); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java deleted file mode 100644 index 54ae8fe..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator.util; - -import java.math.BigDecimal; -import static org.apache.bigtop.bigpetstore.generator.util.ProductType.*; - -public enum Product { - DOG_FOOD(DOG, 10.50), - ORGANIC_DOG_FOOD(DOG, 16.99), - STEEL_LEASH(DOG, 19.99), - FUZZY_COLLAR(DOG, 24.90), - LEATHER_COLLAR(DOG, 18.90), - CHOKE_COLLAR(DOG, 15.50), - DOG_HOUSE(DOG, 109.99), - CHEWY_BONE(DOG, 20.10), - DOG_VEST(DOG, 19.99), - DOG_SOAP(DOG, 5.45), - - CAT_FOOD(CAT, 7.50), - FEEDER_BOWL(CAT, 10.99), - LITTER_BOX(CAT, 24.95), - CAT_COLLAR(CAT, 7.95), - CAT_BLANKET(CAT, 14.49), - - TURTLE_PELLETS(TURTLE, 4.95), - TURTLE_FOOD(TURTLE, 10.90), - TURTLE_TUB(TURTLE, 40.45), - - FISH_FOOD(FISH, 12.50), - SALMON_BAIT(FISH, 29.95), - FISH_BOWL(FISH, 20.99), - AIR_PUMP(FISH, 13.95), - FILTER(FISH, 34.95), - - DUCK_COLLAR(DUCK, 13.25), - DUCK_FOOD(DUCK, 20.25), - WADING_POOL(DUCK, 45.90); - - /* - ANTELOPE_COLLAR(OTHER, 19.90), - ANTELOPE_SNACKS(OTHER, 29.25), - RODENT_CAGE(OTHER, 39.95), - HAY_BALE(OTHER, 4.95), - COW_DUNG(OTHER, 1.95), - SEAL_SPRAY(OTHER, 24.50), - SNAKE_BITE_OINTMENT(OTHER, 29.90); - */ - private final BigDecimal price; - public final ProductType productType; - private Product(ProductType productType, double price) { - this.price = BigDecimal.valueOf(price); - this.productType = productType; - } - - public int id() { - return this.ordinal(); - } - - public BigDecimal price() { - return this.price; - } - - -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java deleted file mode 100644 index af9ea7f..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator.util; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -public enum ProductType { - DOG, CAT, TURTLE, FISH, DUCK; - - private List<Product> products; - - public List<Product> getProducts() { - if(products == null) { - generateProductList(); - } - return products; - } - - private void generateProductList() { - List<Product> products = new ArrayList<>(); - for(Product p : Product.values()) { - if(p.productType == this) { - products.add(p); - } - } - this.products = Collections.unmodifiableList(products); - } - -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java deleted file mode 100644 index 2c729a7..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator.util; - -import java.util.Random; - - -/** - * Each "state" has a pet store , with a certain "proportion" of the - * transactions. - */ -public enum State { - // Each state is associated with a relative probability. - AZ(.1f), - AK(.1f), - CT(.1f), - OK(.1f), - CO(.1f), - CA(.3f), - NY(.2f); - - public static Random rand = new Random(); - public float probability; - - private State(float probability) { - this.probability = probability; - } -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala deleted file mode 100644 index 10acd5a..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bigtop.bigpetstore.recommend - -import org.apache.mahout.cf.taste.hadoop.als.RecommenderJob -import org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob -import java.io.File -import parquet.org.codehaus.jackson.map.DeserializerFactory.Config -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.conf.Configurable -import org.apache.hadoop.util.ToolRunner -import org.apache.mahout.cf.taste.hadoop.als.SharingMapper -import org.apache.hadoop.util.Tool -import org.apache.bigtop.bigpetstore.util.DeveloperTools - -// We don't need to wrap these two jobs in ToolRunner.run calls since the only -// thing that we are doing right now is calling the run() methods of RecommenderJob -// and ParallelALSFactorizationJob. Both of these classes have a main() method that -// internally calls ToolRunner.run with all the command line args passed. So, if -// we want to run this code from the command line, we can easily do so by running -// the main methods of the ParallelALSFactorizationJob, followed by running the -// main method of RecommenderJob. That would also take care of the multiple-jvm -// instance issue metioned in the comments below, so the call to -class ItemRecommender(private val inputDir: String, - private val factorizationOutputDir: String, - private val recommendationsOutputDir: String) { - private val recommenderJob = new RecommenderJob - private val factorizationJob = new ParallelALSFactorizationJob - - private def tempDir = "/tmp/mahout_" + System.currentTimeMillis - - private def performAlsFactorization() = { - ToolRunner.run(factorizationJob, Array( - "--input", inputDir, - "--output", factorizationOutputDir, - "--lambda", "0.1", - "--tempDir", tempDir, - "--implicitFeedback", "false", - "--alpha", "0.8", - "--numFeatures", "2", - "--numIterations", "5", - "--numThreadsPerSolver", "1")) - } - - private def generateRecommendations() = { - ToolRunner.run(recommenderJob, (Array( - "--input", factorizationOutputDir + "/userRatings/", - "--userFeatures", factorizationOutputDir + "/U/", - "--itemFeatures", factorizationOutputDir + "/M/", - "--numRecommendations", "1", - "--output", recommendationsOutputDir, - "--maxRating", "1"))) - } - - // At this point, the performAlsFactorization generateRecommendations - // and this method can not be run from the same VM instance. These two jobs - // share a common static variable which is not being handled correctly. - // This, unfortunately, results in a class-cast exception being thrown. That's - // why the resetFlagInSharedAlsMapper is required. See the comments on - // resetFlagInSharedAlsMapper() method. - def recommend = { - performAlsFactorization - resetFlagInSharedAlsMapper - generateRecommendations - } - - // necessary for local execution in the same JVM only. If the performAlsFactorization() - // and generateRecommendations() calls are performed in separate JVM instances, this - // would be taken care of automatically. However, if we want to run this two methods - // as one task, we need to clean up the static state set by these methods, and we don't - // have any legitimate way of doing this directly. This clean-up should have been - // performed by ParallelALSFactorizationJob class after the job is finished. - // TODO: remove this when a better way comes along, or ParallelALSFactorizationJob - // takes responsibility. - private def resetFlagInSharedAlsMapper { - val m = classOf[SharingMapper[_, _, _, _, _]].getDeclaredMethod("reset"); - m setAccessible true - m.invoke(null) - } -} - -object ItemRecommender { - def main(args: Array[String]) { - val res = ToolRunner.run(new Configuration(), new Tool() { - var conf: Configuration = _; - - override def setConf(conf: Configuration) { - this.conf=conf; - } - - - override def getConf() = { - this.conf; - } - - - override def run(toolArgs: Array[String]) = { - val ir = new ItemRecommender(toolArgs(0), toolArgs(1), toolArgs(2)) - ir.recommend - 0; - } - }, args); - System.exit(res); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java deleted file mode 100755 index 01a6b95..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Static final constants - * - * is useful to have the basic sql here as the HIVE SQL can vary between hive - * versions if updated here will update everywhere - */ - -package org.apache.bigtop.bigpetstore.util; - -public class BigPetStoreConstants { - - //Files should be stored in graphviz arch.dot - public static enum OUTPUTS { - generated,//generator - cleaned,//pig - tsv, - pig_ad_hoc_script, - CUSTOMER_PAGE; //crunchhh - - public static enum MahoutPaths { - Mahout, - AlsFactorization, - AlsRecommendations - } - }; - -} http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java deleted file mode 100755 index 9c2d684..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/DeveloperTools.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.util; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.mapreduce.Job; - -/** - * Dev utilities for testing arguments etc... - */ -public class DeveloperTools { - - /** - * Validates that the expected args are present in the "args" array. - * Just some syntactic sugar for good arg error handling. - * @param args - * @param expected arguments. - */ - public static void validate(String[] args, String... expected) { - int i=-1; - try{ - for(i = 0 ; i < expected.length ; i++) { - System.out.println("VALUE OF " + expected[i] + " = " + args[i]); - } - } - catch(Throwable t) { - System.out.println("Argument " + i + " not available."); - System.out.println("We expected " + expected.length + " arguments for this phase"); - } - - - } - public static void main(String[] args) throws Exception { - Log LOG = LogFactory.getLog(Job.class); - } - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java deleted file mode 100644 index c652beb..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bigtop.bigpetstore.util; - -import org.apache.bigtop.bigpetstore.generator.util.State; - -/** - * User and Product IDs need numerical - * identifiers for recommender algorithms - * which attempt to interpolate new - * products. - * - * TODO: Delete this class. Its not necessarily required: We might just use HIVE HASH() as our - * standard for this. - */ -public class NumericalIdUtils { - - /** - * People: Leading with ordinal code for state. - */ - public static long toId(State state, String name){ - String fromRawData = - state==null? - name: - (state.name()+"_"+name); - return fromRawData.hashCode(); - } - /** - * People: Leading with ordinal code for state. - */ - public static long toId(String name){ - return toId(null,name); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java deleted file mode 100755 index 7b6bede..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/PetStoreParseFunctions.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bigtop.bigpetstore.util; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * TODO: This might be dead code. - */ -public class PetStoreParseFunctions { - - String[] headers = { "code", "city", "country", "lat", "lon" }; - - public Map<String, Object> parse(String line) { - - Map<String, Object> resultMap = new HashMap<String, Object>(); - - List<String> csvObj = null; - - String[] temp = line.split(","); - csvObj = new ArrayList<String>(Arrays.asList(temp)); - - if (csvObj.isEmpty()) { - return resultMap; - } - - int k = 0; - - for (String valueStr : csvObj) { - - resultMap.put(headers[k++], valueStr); - - } - - return resultMap; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java deleted file mode 100644 index 02399bf..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/StringUtils.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.util; - -import java.util.ArrayList; - -/** -******************************************************************** -* Borrowed from apache-commons-lang StringUtils, overtime we might -* add more elements here . -* To maintain minor dependencies on a cluster sometimes this is easier -* jar's manually in the hadoop classpath or via DistributedCache. -********************************************************************/ - -public class StringUtils { - - public static String substringBefore(String str, String separator) { - int pos = str.indexOf(separator); - if (pos == -1) { - return str; - } - return str.substring(0, pos); - } - - - public static String substringAfter(String str, String separator) { - if (str.length()==0) { - return str; - } - if (separator == null) { - return ""; - } - int pos = str.indexOf(separator); - if (pos == -1) { - return ""; - } - return str.substring(pos + separator.length()); - } - } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala deleted file mode 100644 index c5e6513..0000000 --- a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala +++ /dev/null @@ -1,280 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator - -import java.util.Random -import org.jfairy.Fairy -import java.util.Date - - -/** - * Generic class for generating random data. This class was created so - * that we can provide a uniform API for getting random data. If we want, - * we can replace the underlying data-generation implementation using - * existing libraries. - */ -object DataForger { - private val random = new Random - private val fairy = Fairy.create() - - // TODO: Jay / Bhashit : refactor to use a random data generator? - def firstName(random: Random) = firstNames(random.nextInt(firstNames.length)) - def firstName: String = firstName(random) - - // TODO: Jay / Bhashit : refactor to use a random data generator? - def lastName(random: Random) = lastNames(random.nextInt(lastNames.length)) - def lastName: String = lastName(random) - - def randomDateInPastYears(maxYearsEarlier: Int) = fairy.dateProducer().randomDateInThePast(maxYearsEarlier).toDate() - - private val firstNames = IndexedSeq("Aaron", "Abby", "Abigail", "Adam", - "Alan", "Albert", "Alex", "Alexandra", "Alexis", "Alice", "Alicia", - "Alisha", "Alissa", "Allen", "Allison", "Alyssa", "Amanda", "Amber", - "Amy", "Andrea", "Andrew", "Andy", "Angel", "Angela", "Angie", - "Anita", "Ann", "Anna", "Annette", "Anthony", "Antonio", "April", - "Arthur", "Ashley", "Audrey", "Austin", "Autumn", "Baby", "Barb", - "Barbara", "Becky", "Benjamin", "Beth", "Bethany", "Betty", - "Beverly", "Bill", "Billie", "Billy", "Blake", "Bob", "Bobbie", - "Bobby", "Bonnie", "Brad", "Bradley", "Brady", "Brandi", "Brandon", - "Brandy", "Breanna", "Brenda", "Brent", "Brett", "Brian", "Brianna", - "Brittany", "Brooke", "Brooklyn", "Bruce", "Bryan", "Caleb", - "Cameron", "Candy", "Carl", "Carla", "Carmen", "Carol", "Carolyn", - "Carrie", "Casey", "Cassandra", "Catherine", "Cathy", "Chad", - "Charlene", "Charles", "Charlie", "Charlotte", "Chase", "Chasity", - "Chastity", "Chelsea", "Cheryl", "Chester", "Cheyenne", "Chris", - "Christian", "Christina", "Christine", "Christoph", "Christopher", - "Christy", "Chuck", "Cindy", "Clara", "Clarence", "Clayton", - "Clifford", "Clint", "Cody", "Colton", "Connie", "Corey", "Cory", - "Courtney", "Craig", "Crystal", "Curtis", "Cynthia", "Dakota", - "Dale", "Dallas", "Dalton", "Dan", "Dana", "Daniel", "Danielle", - "Danny", "Darla", "Darlene", "Darrell", "Darren", "Dave", "David", - "Dawn", "Dean", "Deanna", "Debbie", "Deborah", "Debra", "Denise", - "Dennis", "Derek", "Derrick", "Destiny", "Devin", "Diana", "Diane", - "Dillon", "Dixie", "Dominic", "Don", "Donald", "Donna", "Donnie", - "Doris", "Dorothy", "Doug", "Douglas", "Drew", "Duane", "Dustin", - "Dusty", "Dylan", "Earl", "Ed", "Eddie", "Edward", "Elaine", - "Elizabeth", "Ellen", "Emily", "Eric", "Erica", "Erika", "Erin", - "Ernest", "Ethan", "Eugene", "Eva", "Evelyn", "Everett", "Faith", - "Father", "Felicia", "Floyd", "Francis", "Frank", "Fred", "Gabriel", - "Gage", "Gail", "Gary", "Gene", "George", "Gerald", "Gina", "Ginger", - "Glen", "Glenn", "Gloria", "Grace", "Greg", "Gregory", "Haley", - "Hannah", "Harley", "Harold", "Harry", "Heath", "Heather", "Heidi", - "Helen", "Herbert", "Holly", "Hope", "Howard", "Hunter", "Ian", - "Isaac", "Jack", "Jackie", "Jacob", "Jade", "Jake", "James", "Jamie", - "Jan", "Jane", "Janet", "Janice", "Jared", "Jasmine", "Jason", "Jay", - "Jean", "Jeannie", "Jeff", "Jeffery", "Jeffrey", "Jenna", "Jennifer", - "Jenny", "Jeremiah", "Jeremy", "Jerry", "Jesse", "Jessica", "Jessie", - "Jill", "Jim", "Jimmy", "Joann", "Joanne", "Jodi", "Jody", "Joe", - "Joel", "Joey", "John", "Johnathan", "Johnny", "Jon", "Jonathan", - "Jonathon", "Jordan", "Joseph", "Josh", "Joshua", "Joyce", "Juanita", - "Judy", "Julia", "Julie", "Justin", "Kaitlyn", "Karen", "Katelyn", - "Katherine", "Kathleen", "Kathryn", "Kathy", "Katie", "Katrina", - "Kay", "Kayla", "Kaylee", "Keith", "Kelly", "Kelsey", "Ken", - "Kendra", "Kenneth", "Kenny", "Kevin", "Kim", "Kimberly", "Kris", - "Krista", "Kristen", "Kristin", "Kristina", "Kristy", "Kyle", - "Kylie", "Lacey", "Laken", "Lance", "Larry", "Laura", "Lawrence", - "Leah", "Lee", "Leonard", "Leroy", "Leslie", "Levi", "Lewis", - "Linda", "Lindsay", "Lindsey", "Lisa", "Lloyd", "Logan", "Lois", - "Loretta", "Lori", "Louis", "Lynn", "Madison", "Mandy", "Marcus", - "Margaret", "Maria", "Mariah", "Marie", "Marilyn", "Marion", "Mark", - "Marlene", "Marsha", "Martha", "Martin", "Marty", "Marvin", "Mary", - "Mary ann", "Mason", "Matt", "Matthew", "Max", "Megan", "Melanie", - "Melinda", "Melissa", "Melody", "Michael", "Michelle", "Mickey", - "Mike", "Mindy", "Miranda", "Misty", "Mitchell", "Molly", "Monica", - "Morgan", "Mother", "Myron", "Nancy", "Natasha", "Nathan", - "Nicholas", "Nick", "Nicole", "Nina", "Noah", "Norma", "Norman", - "Olivia", "Paige", "Pam", "Pamela", "Pat", "Patricia", "Patrick", - "Patty", "Paul", "Paula", "Peggy", "Penny", "Pete", "Phillip", - "Phyllis", "Rachael", "Rachel", "Ralph", "Randall", "Randi", "Randy", - "Ray", "Raymond", "Rebecca", "Regina", "Renee", "Rex", "Rhonda", - "Richard", "Rick", "Ricky", "Rita", "Rob", "Robbie", "Robert", - "Roberta", "Robin", "Rochelle", "Rocky", "Rod", "Rodney", "Roger", - "Ron", "Ronald", "Ronda", "Ronnie", "Rose", "Roxanne", "Roy", "Russ", - "Russell", "Rusty", "Ruth", "Ryan", "Sabrina", "Sally", "Sam", - "Samantha", "Samuel", "Sandra", "Sandy", "Sara", "Sarah", "Savannah", - "Scott", "Sean", "Seth", "Shanda", "Shane", "Shanna", "Shannon", - "Sharon", "Shaun", "Shawn", "Shawna", "Sheila", "Shelly", "Sher", - "Sherri", "Sherry", "Shirley", "Sierra", "Skyler", "Stacey", "Stacy", - "Stanley", "Stephanie", "Stephen", "Steve", "Steven", "Sue", - "Summer", "Susan", "Sydney", "Tabatha", "Tabitha", "Tamara", "Tammy", - "Tara", "Tasha", "Tashia", "Taylor", "Ted", "Teresa", "Terri", - "Terry", "Tessa", "Thelma", "Theresa", "Thomas", "Tia", "Tiffany", - "Tim", "Timmy", "Timothy", "Tina", "Todd", "Tom", "Tommy", "Toni", - "Tony", "Tonya", "Tracey", "Tracie", "Tracy", "Travis", "Trent", - "Trevor", "Trey", "Trisha", "Tristan", "Troy", "Tyler", "Tyrone", - "Unborn", "Valerie", "Vanessa", "Vernon", "Veronica", "Vicki", - "Vickie", "Vicky", "Victor", "Victoria", "Vincent", "Virginia", - "Vivian", "Walter", "Wanda", "Wayne", "Wendy", "Wesley", "Whitney", - "William", "Willie", "Wyatt", "Zachary") - - private val lastNames = IndexedSeq("Abbott", "Acevedo", "Acosta", "Adams", - "Adkins", "Aguilar", "Aguirre", "Albert", "Alexander", "Alford", - "Allen", "Allison", "Alston", "Alvarado", "Alvarez", "Anderson", - "Andrews", "Anthony", "Armstrong", "Arnold", "Ashley", "Atkins", - "Atkinson", "Austin", "Avery", "Avila", "Ayala", "Ayers", "Bailey", - "Baird", "Baker", "Baldwin", "Ball", "Ballard", "Banks", "Barber", - "Smith", "Johnson", "Williams", "Jones", "Brown", "Davis", "Miller", - "Wilson", "Moore", "Taylor", "Thomas", "Jackson", "Barker", "Barlow", - "Barnes", "Barnett", "Barr", "Barrera", "Barrett", "Barron", "Barry", - "Bartlett", "Barton", "Bass", "Bates", "Battle", "Bauer", "Baxter", - "Beach", "Bean", "Beard", "Beasley", "Beck", "Becker", "Bell", - "Bender", "Benjamin", "Bennett", "Benson", "Bentley", "Benton", - "Berg", "Berger", "Bernard", "Berry", "Best", "Bird", "Bishop", - "Black", "Blackburn", "Blackwell", "Blair", "Blake", "Blanchard", - "Blankenship", "Blevins", "Bolton", "Bond", "Bonner", "Booker", - "Boone", "Booth", "Bowen", "Bowers", "Bowman", "Boyd", "Boyer", - "Boyle", "Bradford", "Bradley", "Bradshaw", "Brady", "Branch", - "Bray", "Brennan", "Brewer", "Bridges", "Briggs", "Bright", "Britt", - "Brock", "Brooks", "Browning", "Bruce", "Bryan", "Bryant", - "Buchanan", "Buck", "Buckley", "Buckner", "Bullock", "Burch", - "Burgess", "Burke", "Burks", "Burnett", "Burns", "Burris", "Burt", - "Burton", "Bush", "Butler", "Byers", "Byrd", "Cabrera", "Cain", - "Calderon", "Caldwell", "Calhoun", "Callahan", "Camacho", "Cameron", - "Campbell", "Campos", "Cannon", "Cantrell", "Cantu", "Cardenas", - "Carey", "Carlson", "Carney", "Carpenter", "Carr", "Carrillo", - "Carroll", "Carson", "Carter", "Carver", "Case", "Casey", "Cash", - "Castaneda", "Castillo", "Castro", "Cervantes", "Chambers", "Chan", - "Chandler", "Chaney", "Chang", "Chapman", "Charles", "Chase", - "Chavez", "Chen", "Cherry", "Christensen", "Christian", "Church", - "Clark", "Clarke", "Clay", "Clayton", "Clements", "Clemons", - "Cleveland", "Cline", "Cobb", "Cochran", "Coffey", "Cohen", "Cole", - "Coleman", "Collier", "Collins", "Colon", "Combs", "Compton", - "Conley", "Conner", "Conrad", "Contreras", "Conway", "Cook", "Cooke", - "Cooley", "Cooper", "Copeland", "Cortez", "Cote", "Cotton", "Cox", - "Craft", "Craig", "Crane", "Crawford", "Crosby", "Cross", "Cruz", - "Cummings", "Cunningham", "Curry", "Curtis", "Dale", "Dalton", - "Daniel", "Daniels", "Daugherty", "Davenport", "David", "Davidson", - "Dawson", "Day", "Dean", "Decker", "Dejesus", "Delacruz", "Delaney", - "Deleon", "Delgado", "Dennis", "Diaz", "Dickerson", "Dickinson", - "Dillard", "Dillon", "Dixon", "Dodson", "Dominguez", "Donaldson", - "Donovan", "Dorsey", "Dotson", "Douglas", "Downs", "Doyle", "Drake", - "Dudley", "Duffy", "Duke", "Duncan", "Dunlap", "Dunn", "Duran", - "Durham", "Dyer", "Eaton", "Edwards", "Elliott", "Ellis", "Ellison", - "Emerson", "England", "English", "Erickson", "Espinoza", "Estes", - "Estrada", "Evans", "Everett", "Ewing", "Farley", "Farmer", - "Farrell", "Faulkner", "Ferguson", "Fernandez", "Ferrell", "Fields", - "Figueroa", "Finch", "Finley", "Fischer", "Fisher", "Fitzgerald", - "Fitzpatrick", "Fleming", "Fletcher", "Flores", "Flowers", "Floyd", - "Flynn", "Foley", "Forbes", "Ford", "Foreman", "Foster", "Fowler", - "Fox", "Francis", "Franco", "Frank", "Franklin", "Franks", "Frazier", - "Frederick", "Freeman", "French", "Frost", "Fry", "Frye", "Fuentes", - "Fuller", "Fulton", "Gaines", "Gallagher", "Gallegos", "Galloway", - "Gamble", "Garcia", "Gardner", "Garner", "Garrett", "Garrison", - "Garza", "Gates", "Gay", "Gentry", "George", "Gibbs", "Gibson", - "Gilbert", "Giles", "Gill", "Gillespie", "Gilliam", "Gilmore", - "Glass", "Glenn", "Glover", "Goff", "Golden", "Gomez", "Gonzales", - "Gonzalez", "Good", "Goodman", "Goodwin", "Gordon", "Gould", - "Graham", "Grant", "Graves", "Gray", "Green", "Greene", "Greer", - "Gregory", "Griffin", "Griffith", "Grimes", "Gross", "Guerra", - "Guerrero", "Guthrie", "Gutierrez", "Guy", "Guzman", "Hahn", "Hale", - "Haley", "Hall", "Hamilton", "Hammond", "Hampton", "Hancock", - "Haney", "Hansen", "Hanson", "Hardin", "Harding", "Hardy", "Harmon", - "Harper", "Harris", "Harrington", "Harrison", "Hart", "Hartman", - "Harvey", "Hatfield", "Hawkins", "Hayden", "Hayes", "Haynes", "Hays", - "Head", "Heath", "Hebert", "Henderson", "Hendricks", "Hendrix", - "Henry", "Hensley", "Henson", "Herman", "Hernandez", "Herrera", - "Herring", "Hess", "Hester", "Hewitt", "Hickman", "Hicks", "Higgins", - "Hill", "Hines", "Hinton", "Hobbs", "Hodge", "Hodges", "Hoffman", - "Hogan", "Holcomb", "Holden", "Holder", "Holland", "Holloway", - "Holman", "Holmes", "Holt", "Hood", "Hooper", "Hoover", "Hopkins", - "Hopper", "Horn", "Horne", "Horton", "House", "Houston", "Howard", - "Howe", "Howell", "Hubbard", "Huber", "Hudson", "Huff", "Huffman", - "Hughes", "Hull", "Humphrey", "Hunt", "Hunter", "Hurley", "Hurst", - "Hutchinson", "Hyde", "Ingram", "Irwin", "Jacobs", "Jacobson", - "James", "Jarvis", "Jefferson", "Jenkins", "Jennings", "Jensen", - "Jimenez", "Johns", "Johnston", "Jordan", "Joseph", "Joyce", - "Joyner", "Juarez", "Justice", "Kane", "Kaufman", "Keith", "Keller", - "Kelley", "Kelly", "Kemp", "Kennedy", "Kent", "Kerr", "Key", "Kidd", - "Kim", "King", "Kinney", "Kirby", "Kirk", "Kirkland", "Klein", - "Kline", "Knapp", "Knight", "Knowles", "Knox", "Koch", "Kramer", - "Lamb", "Lambert", "Lancaster", "Landry", "Lane", "Lang", "Langley", - "Lara", "Larsen", "Larson", "Lawrence", "Lawson", "Le", "Leach", - "Leblanc", "Lee", "Leon", "Leonard", "Lester", "Levine", "Levy", - "Lewis", "Lindsay", "Lindsey", "Little", "Livingston", "Lloyd", - "Logan", "Long", "Lopez", "Lott", "Love", "Lowe", "Lowery", "Lucas", - "Luna", "Lynch", "Lynn", "Lyons", "Macdonald", "Macias", "Mack", - "Madden", "Maddox", "Maldonado", "Malone", "Mann", "Manning", - "Marks", "Marquez", "Marsh", "Marshall", "Martin", "Martinez", - "Mason", "Massey", "Mathews", "Mathis", "Matthews", "Maxwell", "May", - "Mayer", "Maynard", "Mayo", "Mays", "McBride", "McCall", "McCarthy", - "McCarty", "McClain", "McClure", "McConnell", "McCormick", "McCoy", - "McCray", "McCullough", "McDaniel", "McDonald", "McDowell", - "McFadden", "McFarland", "McGee", "McGowan", "McGuire", "McIntosh", - "McIntyre", "McKay", "McKee", "McKenzie", "McKinney", "McKnight", - "McLaughlin", "McLean", "McLeod", "McMahon", "McMillan", "McNeil", - "McPherson", "Meadows", "Medina", "Mejia", "Melendez", "Melton", - "Mendez", "Mendoza", "Mercado", "Mercer", "Merrill", "Merritt", - "Meyer", "Meyers", "Michael", "Middleton", "Miles", "Mills", - "Miranda", "Mitchell", "Molina", "Monroe", "Montgomery", "Montoya", - "Moody", "Moon", "Mooney", "Morales", "Moran", "Moreno", "Morgan", - "Morin", "Morris", "Morrison", "Morrow", "Morse", "Morton", "Moses", - "Mosley", "Moss", "Mueller", "Mullen", "Mullins", "Munoz", "Murphy", - "Murray", "Myers", "Nash", "Navarro", "Neal", "Nelson", "Newman", - "Newton", "Nguyen", "Nichols", "Nicholson", "Nielsen", "Nieves", - "Nixon", "Noble", "Noel", "Nolan", "Norman", "Norris", "Norton", - "Nunez", "Obrien", "Ochoa", "Oconnor", "Odom", "Odonnell", "Oliver", - "Olsen", "Olson", "O'neal", "O'neil", "O'neill", "Orr", "Ortega", - "Ortiz", "Osborn", "Osborne", "Owen", "Owens", "Pace", "Pacheco", - "Padilla", "Page", "Palmer", "Park", "Parker", "Parks", "Parrish", - "Parsons", "Pate", "Patel", "Patrick", "Patterson", "Patton", "Paul", - "Payne", "Pearson", "Peck", "Pena", "Pennington", "Perez", "Perkins", - "Perry", "Peters", "Petersen", "Peterson", "Petty", "Phelps", - "Phillips", "Pickett", "Pierce", "Pittman", "Pitts", "Pollard", - "Poole", "Pope", "Porter", "Potter", "Potts", "Powell", "Powers", - "Pratt", "Preston", "Price", "Prince", "Pruitt", "Puckett", "Pugh", - "Quinn", "Ramirez", "Ramos", "Ramsey", "Randall", "Randolph", - "Rasmussen", "Ratliff", "Ray", "Raymond", "Reed", "Reese", "Reeves", - "Reid", "Reilly", "Reyes", "Reynolds", "Rhodes", "Rice", "Rich", - "Richard", "Richards", "Richardson", "Richmond", "Riddle", "Riggs", - "Riley", "Rios", "Rivas", "Rivera", "Rivers", "Roach", "Robbins", - "Roberson", "Roberts", "Robertson", "Robinson", "Robles", "Rocha", - "Rodgers", "Rodriguez", "Rodriquez", "Rogers", "Rojas", "Rollins", - "Roman", "Romero", "Rosa", "Rosales", "Rosario", "Rose", "Ross", - "Roth", "Rowe", "Rowland", "Roy", "Ruiz", "Rush", "Russell", "Russo", - "Rutledge", "Ryan", "Salas", "Salazar", "Salinas", "Sampson", - "Sanchez", "Sanders", "Sandoval", "Sanford", "Santana", "Santiago", - "Santos", "Sargent", "Saunders", "Savage", "Sawyer", "Schmidt", - "Schneider", "Schroeder", "Schultz", "Schwartz", "Scott", "Sears", - "Sellers", "Serrano", "Sexton", "Shaffer", "Shannon", "Sharp", - "Sharpe", "Shaw", "Shelton", "Shepard", "Shepherd", "Sheppard", - "Sherman", "Shields", "Short", "Silva", "Simmons", "Simon", - "Simpson", "Sims", "Singleton", "Skinner", "Slater", "Sloan", - "Small", "Snider", "Snow", "Snyder", "Solis", "Solomon", "Sosa", - "Soto", "Sparks", "Spears", "Spence", "Spencer", "Stafford", - "Stanley", "Stanton", "Stark", "Steele", "Stein", "Stephens", - "Stephenson", "Stevens", "Stevenson", "Stewart", "Stokes", "Stone", - "Stout", "Strickland", "Strong", "Stuart", "Suarez", "Sullivan", - "Summers", "Sutton", "Swanson", "Sweeney", "Sweet", "Sykes", - "Talley", "Tanner", "Tate", "Terrell", "Terry", "Thompson", - "Thornton", "Tillman", "Todd", "Torres", "Townsend", "Tran", - "Travis", "Trevino", "Trujillo", "Tucker", "Turner", "Tyler", - "Tyson", "Underwood", "Valdez", "Valencia", "Valentine", - "Valenzuela", "Vance", "Vang", "Vargas", "Vasquez", "Vaughan", - "Vaughn", "Vazquez", "Vega", "Velasquez", "Velazquez", "Velez", - "Van halen", "Vincent", "Vinson", "Wade", "Wagner", "Walker", "Wall", - "Wallace", "Waller", "Walls", "Walsh", "Walter", "Walters", "Walton", - "Ward", "Ware", "Warner", "Warren", "Washington", "Waters", - "Watkins", "Watson", "Watts", "Weaver", "Webb", "Weber", "Webster", - "Weeks", "Weiss", "Welch", "Wells", "West", "Wheeler", "Whitaker", - "White", "Whitehead", "Whitfield", "Whitley", "Whitney", "Wiggins", - "Wilcox", "Wilder", "Wiley", "Wilkerson", "Wilkins", "Wilkinson", - "William", "Williamson", "Willis", "Winters", "Wise", "Witt", "Wolf", - "Wolfe", "Wong", "Wood", "Woodard", "Woods", "Woodward", "Wooten", - "Workman", "Wright", "Wyatt", "Wynn", "Yang", "Yates", "York", - "Young", "Zamora", "Zimmerman") -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala deleted file mode 100644 index 534c606..0000000 --- a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - -import java.util.Date -import org.apache.bigtop.bigpetstore.generator.util.State -import org.apache.commons.lang3.StringUtils -import java.util.Arrays.asList -import java.util.Random -import scala.collection.Iterator -import com.sun.org.apache.xml.internal.serializer.ToStream -import java.util.{Iterator => JavaIterator} -import scala.collection.JavaConversions.asJavaIterator -import org.apache.bigtop.bigpetstore.generator.util.Product -import org.apache.commons.lang3.Range; -import org.apache.bigtop.bigpetstore.generator.util.ProductType - -/** - * This class generates our data. Over time we will use it to embed bias which - * can then be teased out, i.e. by clustering/classifiers. For example: - * - * certain products <--> certain years or days - */ -class TransactionIteratorFactory(private val records: Int, - private val customerIdRange: Range[java.lang.Long], - private val state: State) { - assert(records > 0, "Number of records must be greater than 0 to generate a data iterator!") - private val random = new Random(state.hashCode) - - def data: JavaIterator[TransactionIteratorFactory.KeyVal[String, String]] = { - new TransactionIteratorFactory.DataIterator(records, customerIdRange, state, random) - } -} - -object TransactionIteratorFactory { - class KeyVal[K, V](val key: K, val value: V) - - private class DataIterator(records: Int, - customerIdRange: Range[java.lang.Long], - state: State, - r: Random) extends Iterator[KeyVal[String, String]] { - private var firstName: String = null - private var lastName: String = null - private var elementsProcducedCount = 0 - private var repeatCount = 0 - private var currentCustomerId = customerIdRange.getMinimum - private var currentProductType = selectRandomProductType; - - def hasNext = - elementsProcducedCount < records && currentCustomerId <= customerIdRange.getMaximum - - - def next(): TransactionIteratorFactory.KeyVal[String,String] = { - val date = DataForger.randomDateInPastYears(50); - setIteratorState(); - - val product = randomProductOfCurrentlySelectedType - val key = StringUtils.join(asList("BigPetStore", "storeCode_" + state.name(), - elementsProcducedCount.toString), ",") - val value = StringUtils.join(asList(currentCustomerId, firstName, lastName, product.id, - product.name.toLowerCase, product.price, date), ",") - - elementsProcducedCount += 1 - new TransactionIteratorFactory.KeyVal(key, value) - } - - private def setIteratorState() = { - /** Some customers come back for more :) We repeat a customer up to ten times */ - if (repeatCount > 0) { - repeatCount -= 1 - } else { - firstName = DataForger.firstName(r) - lastName = DataForger.lastName(r) - // this sometimes generates numbers much larger than 10. We don't really need Gaussian - // distribution since number of transactions per customer can be truly arbitrary. - repeatCount = (r.nextGaussian * 4f) toInt; - println("####Repeat: " + repeatCount) - currentCustomerId += 1 - currentProductType = selectRandomProductType; - } - } - - private def selectRandomProductType = { - ProductType.values.apply(r.nextInt(ProductType.values.length)) - } - - private def randomProductOfCurrentlySelectedType = { - currentProductType.getProducts.get(r.nextInt(currentProductType.getProducts.size)) - } - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java deleted file mode 100644 index 3292ba5..0000000 --- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/docs/TestDocs.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.docs; - -import static org.junit.Assert.assertTrue; - -import java.io.File; - -import org.apache.bigtop.bigpetstore.util.BigPetStoreConstants.OUTPUTS; -import org.apache.commons.io.FileUtils; -import org.junit.Test; - -public class TestDocs { - - @Test - public void testGraphViz() throws Exception { - // test the graphviz file by grepping out the constants. - String graphviz = FileUtils.readFileToString(new File("arch.dot")); - System.out.println(graphviz); - - assertTrue(graphviz.contains(OUTPUTS.generated.name())); - assertTrue(graphviz.contains(OUTPUTS.cleaned.name())); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java deleted file mode 100644 index e2f1f25..0000000 --- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - -import static org.junit.Assert.assertFalse; - -import org.apache.bigtop.bigpetstore.generator.util.State; -import org.apache.bigtop.bigpetstore.util.NumericalIdUtils; -import org.junit.Test; - -public class TestNumericalIdUtils { - - @Test - public void testName() { - String strId= State.OK.name()+"_"+ "jay vyas"; - long id = NumericalIdUtils.toId(strId); - String strId2= State.CO.name()+"_"+ "jay vyas"; - long id2 = NumericalIdUtils.toId(strId2); - System.out.println(id + " " + id2); - assertFalse(id==id2); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java deleted file mode 100755 index 76de3d0..0000000 --- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.BufferedReader; -import java.io.DataInputStream; -import java.io.InputStreamReader; -import java.util.Date; - -import org.apache.bigtop.bigpetstore.generator.BPSGenerator.props; -import org.apache.bigtop.bigpetstore.generator.util.State; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * run this test with vm options -XX:MaxPermSize=256m -Xms512m -Xmx1024m - * - */ -public class TestPetStoreTransactionGeneratorJob { - - final static Logger log = LoggerFactory - .getLogger(TestPetStoreTransactionGeneratorJob.class); - - @Test - public void test() throws Exception { - System.out.println("memory : " + Runtime.getRuntime().freeMemory() - / 1000000); - if (Runtime.getRuntime().freeMemory() / 1000000 < 75) { - // throw new - // RuntimeException("need more memory to run this test !"); - } - int records = 20; - /** - * Setup configuration with prop. - */ - Configuration c = new Configuration(); - c.setInt(props.bigpetstore_records.name(), records); - - /** - * Run the job - */ - Path output = new Path("petstoredata/" + (new Date()).toString()); - Job createInput = BPSGenerator.getCreateTransactionRecordsJob(output, c); - createInput.submit(); - System.out.println(createInput); - createInput.waitForCompletion(true); - - FileSystem fs = FileSystem.getLocal(new Configuration()); - - /** - * Read file output into string. - */ - DataInputStream f = fs.open(new Path(output, "part-r-00000")); - BufferedReader br = new BufferedReader(new InputStreamReader(f)); - String s; - int recordsSeen = 0; - boolean CTseen = false; - boolean AZseen = false; - - // confirm that both CT and AZ are seen in the outputs. - while (br.ready()) { - s = br.readLine(); - System.out.println("===>" + s); - recordsSeen++; - if (s.contains(State.CT.name())) { - CTseen = true; - } - if (s.contains(State.AZ.name())) { - AZseen = true; - } - } - - // records seen should = 20 - assertEquals(records, recordsSeen); - // Assert that a couple of the states are seen (todo make it - // comprehensive for all states). - assertTrue(CTseen); - assertTrue(AZseen); - log.info("Created " + records + " , file was " - + fs.getFileStatus(new Path(output, "part-r-00000")).getLen() - + " bytes."); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/6ec6cebf/bigtop-bigpetstore/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/test/resources/log4j.properties b/bigtop-bigpetstore/src/test/resources/log4j.properties deleted file mode 100644 index 1e33093..0000000 --- a/bigtop-bigpetstore/src/test/resources/log4j.properties +++ /dev/null @@ -1,47 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -hadoop.root.logger=INFO,console -hadoop.log.dir=. -hadoop.log.file=hadoop.log - -# -# Job Summary Appender -# -# Use following logger to send summary to separate file defined by -# hadoop.mapreduce.jobsummary.log.file rolled daily: -# hadoop.mapreduce.jobsummary.logger=INFO,JSA -# -hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} -hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.EventCounter=org.apache.log4j.ConsoleAppender -log4j.appender.EventCounter.layout=org.apache.log4j.PatternLayout -# Define the root logger to the system property "hadoop.root.logger". -log4j.rootLogger=${hadoop.root.logger}, EventCounter -log4j.appender.console.layout=org.apache.log4j.PatternLayout -# Logging Threshold -log4j.threshold=ALL - -# -# Daily Rolling File Appender -# - -log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender -log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} - -# Rollver at midnight -log4j.appender.DRFA.DatePattern=.yyyy-MM-dd - -# 30-day backup -#log4j.appender.DRFA.MaxBackupIndex=30 -log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
