http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml b/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml index 0abbdf4..9e0535b 100644 --- a/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml +++ b/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml @@ -100,5 +100,14 @@ <artifactId>hyracks-dataflow-std</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.hyracks</groupId> + <artifactId>hyracks-util</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + </dependency> </dependencies> </project>
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java new file mode 100644 index 0000000..e031a51 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; + +/** + * Quick & dirty data generator for multi-thread testing. + */ +@SuppressWarnings("rawtypes") +public class DataGenThread extends Thread { + public final BlockingQueue<TupleBatch> tupleBatchQueue; + private final int maxNumBatches; + private final int maxOutstandingBatches; + private int numBatches = 0; + private final Random rnd; + + // maxOutstandingBatches pre-created tuple-batches for populating the queue. + private TupleBatch[] tupleBatches; + private int ringPos; + + public DataGenThread(int numConsumers, int maxNumBatches, int batchSize, ISerializerDeserializer[] fieldSerdes, + int payloadSize, int rndSeed, int maxOutstandingBatches, boolean sorted) { + this.maxNumBatches = maxNumBatches; + this.maxOutstandingBatches = maxOutstandingBatches; + rnd = new Random(rndSeed); + tupleBatches = new TupleBatch[maxOutstandingBatches]; + IFieldValueGenerator[] fieldGens = DataGenUtils.getFieldGensFromSerdes(fieldSerdes, rnd, sorted); + for (int i = 0; i < maxOutstandingBatches; i++) { + tupleBatches[i] = new TupleBatch(batchSize, fieldGens, fieldSerdes, payloadSize); + } + tupleBatchQueue = new LinkedBlockingQueue<TupleBatch>(maxOutstandingBatches); + ringPos = 0; + } + + public DataGenThread(int numConsumers, int maxNumBatches, int batchSize, ISerializerDeserializer[] fieldSerdes, + IFieldValueGenerator[] fieldGens, int rndSeed, int maxOutstandingBatches) { + this.maxNumBatches = maxNumBatches; + this.maxOutstandingBatches = maxOutstandingBatches; + rnd = new Random(rndSeed); + tupleBatches = new TupleBatch[maxOutstandingBatches]; + for (int i = 0; i < maxOutstandingBatches; i++) { + tupleBatches[i] = new TupleBatch(batchSize, fieldGens, fieldSerdes, 0); + } + tupleBatchQueue = new LinkedBlockingQueue<TupleBatch>(maxOutstandingBatches); + ringPos = 0; + } + + @Override + public void run() { + while (numBatches < maxNumBatches) { + boolean added = false; + try { + if (tupleBatches[ringPos].inUse.compareAndSet(false, true)) { + tupleBatches[ringPos].generate(); + tupleBatchQueue.put(tupleBatches[ringPos]); + added = true; + } + } catch (IOException e) { + e.printStackTrace(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + if (added) { + numBatches++; + ringPos++; + if (ringPos >= maxOutstandingBatches) { + ringPos = 0; + } + } + } + } + + public TupleBatch getBatch() throws InterruptedException { + return tupleBatchQueue.take(); + } + + public void releaseBatch(TupleBatch batch) { + batch.inUse.set(false); + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java new file mode 100644 index 0000000..130f7e2 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.util.Random; + +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.DoubleSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer; + +@SuppressWarnings("rawtypes") +public class DataGenUtils { + public static IFieldValueGenerator getFieldGenFromSerde(ISerializerDeserializer serde, Random rnd, boolean sorted) { + if (serde instanceof IntegerSerializerDeserializer) { + if (sorted) { + return new SortedIntegerFieldValueGenerator(); + } else { + return new IntegerFieldValueGenerator(rnd); + } + } else if (serde instanceof FloatSerializerDeserializer) { + if (sorted) { + return new SortedFloatFieldValueGenerator(); + } else { + return new FloatFieldValueGenerator(rnd); + } + } else if (serde instanceof DoubleSerializerDeserializer) { + if (sorted) { + return new SortedDoubleFieldValueGenerator(); + } else { + return new DoubleFieldValueGenerator(rnd); + } + } else if (serde instanceof UTF8StringSerializerDeserializer) { + return new StringFieldValueGenerator(20, rnd); + } + return null; + } + + public static IFieldValueGenerator[] getFieldGensFromSerdes(ISerializerDeserializer[] serdes, Random rnd, + boolean sorted) { + IFieldValueGenerator[] fieldValueGens = new IFieldValueGenerator[serdes.length]; + for (int i = 0; i < serdes.length; i++) { + fieldValueGens[i] = getFieldGenFromSerde(serdes[i], rnd, sorted); + } + return fieldValueGens; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java new file mode 100644 index 0000000..6163b48 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.hyracks.util.MathUtil; + +public class DocumentStringFieldValueGenerator implements IFieldValueGenerator<String> { + private static final String FIRST_NAMES_FILE = "dist.all.first.cleaned"; + private static final String LAST_NAMES_FILE = "dist.all.last.cleaned"; + + private final int docMinWords; + private final int docMaxWords; + private final int maxDictionarySize; + private final Random rnd; + private int[] cumulIntRanges; + + private List<String> tokenDict = new ArrayList<>(); + + public DocumentStringFieldValueGenerator(int docMinWords, int docMaxWords, int maxDictionarySize, Random rnd) + throws IOException { + this.docMinWords = docMinWords; + this.docMaxWords = docMaxWords; + this.maxDictionarySize = maxDictionarySize; + this.rnd = rnd; + initDictionary(); + double[] zipfProbDist = ProbabilityHelper.getZipfProbDist(tokenDict.size(), 1); + cumulIntRanges = ProbabilityHelper.getCumulIntRanges(zipfProbDist); + } + + private void initDictionary() throws IOException { + String line; + int count = 0; + + // Read first names from data file. + InputStream firstNamesIn = this.getClass().getClassLoader().getResourceAsStream(FIRST_NAMES_FILE); + try (BufferedReader firstNamesReader = new BufferedReader(new InputStreamReader(firstNamesIn))) { + while (count < maxDictionarySize && (line = firstNamesReader.readLine()) != null) { + if (!line.startsWith(";")) { + tokenDict.add(line.trim()); + count++; + } + } + } + + // Read last names from data file. + InputStream lastNamesIn = this.getClass().getClassLoader().getResourceAsStream(LAST_NAMES_FILE); + try (BufferedReader lastNamesReader = new BufferedReader(new InputStreamReader(lastNamesIn))) { + while (count < maxDictionarySize && (line = lastNamesReader.readLine()) != null) { + if (!line.startsWith(";")) { + tokenDict.add(line.trim()); + count++; + } + } + } + } + + @Override + public String next() { + StringBuilder strBuilder = new StringBuilder(); + int numWords = MathUtil.stripSignBit(rnd.nextInt()) % (docMaxWords - docMinWords + 1) + docMinWords; + for (int i = 0; i < numWords; i++) { + int ix = ProbabilityHelper.choose(cumulIntRanges, rnd.nextInt()); + strBuilder.append(tokenDict.get(ix)); + if (i != numWords - 1) { + strBuilder.append(" "); + } + } + return strBuilder.toString(); + } + + public List<String> getTokenDictionary() { + return tokenDict; + } + + @Override + public void reset() { + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java new file mode 100644 index 0000000..8e36335 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.util.Random; + +public class DoubleFieldValueGenerator implements IFieldValueGenerator<Double> { + protected final Random rnd; + + public DoubleFieldValueGenerator(Random rnd) { + this.rnd = rnd; + } + + @Override + public Double next() { + return rnd.nextDouble(); + } + + @Override + public void reset() { + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java new file mode 100644 index 0000000..851a0f4 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.util.Random; + +public class FloatFieldValueGenerator implements IFieldValueGenerator<Float> { + protected final Random rnd; + + public FloatFieldValueGenerator(Random rnd) { + this.rnd = rnd; + } + + @Override + public Float next() { + return rnd.nextFloat(); + } + + @Override + public void reset() { + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java new file mode 100644 index 0000000..dbd4bfc --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +public interface IFieldValueGenerator<T> { + public T next(); + public void reset(); +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java new file mode 100644 index 0000000..6c5f649 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.util.Random; + +public class IntegerFieldValueGenerator implements IFieldValueGenerator<Integer> { + protected final Random rnd; + + public IntegerFieldValueGenerator(Random rnd) { + this.rnd = rnd; + } + + @Override + public Integer next() { + return rnd.nextInt(); + } + + @Override + public void reset() { + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java new file mode 100644 index 0000000..256eaf5 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.hyracks.util.MathUtil; + +public class PersonNameFieldValueGenerator implements IFieldValueGenerator<String> { + private static final String FIRST_NAMES_FILE = "dist.all.first.cleaned"; + private static final String LAST_NAMES_FILE = "dist.all.last.cleaned"; + private static final String LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + private final Random rnd; + private final double middleInitialProb; + + private List<String> firstNames = new ArrayList<>(); + private List<String> lastNames = new ArrayList<>(); + + public PersonNameFieldValueGenerator(Random rnd, double middleInitialProb) + throws IOException { + this.rnd = rnd; + this.middleInitialProb = middleInitialProb; + initNames(); + } + + private void initNames() throws IOException { + String line; + + // Read first names from data file. + InputStream firstNamesIn = this.getClass().getClassLoader().getResourceAsStream(FIRST_NAMES_FILE); + try (BufferedReader firstNamesReader = new BufferedReader(new InputStreamReader(firstNamesIn))) { + while ((line = firstNamesReader.readLine()) != null) { + if (!line.startsWith(";")) { + firstNames.add(line.trim()); + } + } + } + + // Read last names from data file. + InputStream lastNamesIn = this.getClass().getClassLoader().getResourceAsStream(LAST_NAMES_FILE); + try (BufferedReader lastNamesReader = new BufferedReader(new InputStreamReader(lastNamesIn))) { + while ((line = lastNamesReader.readLine()) != null) { + if (!line.startsWith(";")) { + lastNames.add(line.trim()); + } + } + } + } + + @Override + public String next() { + StringBuilder strBuilder = new StringBuilder(); + + // First name. + int fix = MathUtil.stripSignBit(rnd.nextInt()) % firstNames.size(); + strBuilder.append(firstNames.get(fix)); + strBuilder.append(" "); + + // Optional middle initial. + double d = Math.abs(rnd.nextDouble()); + if (d <= middleInitialProb) { + int mix = MathUtil.stripSignBit(rnd.nextInt()) % LETTERS.length(); + strBuilder.append(LETTERS.charAt(mix)); + strBuilder.append(". "); + } + + // Last name. + int lix = MathUtil.stripSignBit(rnd.nextInt()) % lastNames.size(); + strBuilder.append(lastNames.get(lix)); + + return strBuilder.toString(); + } + + @Override + public void reset() { + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java new file mode 100644 index 0000000..c7fbd3f --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.util.Arrays; + +public class ProbabilityHelper { + public static double[] getUniformProbDist(int numChoices) { + double[] probDist = new double[numChoices]; + for (int i = 0; i < numChoices; i++) { + probDist[i] = 1.0 / (double) numChoices; + } + return probDist; + } + + public static double[] getZipfProbDist(int numChoices, int zipfSkew) { + double[] probDist = new double[numChoices]; + double divisor = 0; + for (int i = 1; i <= numChoices; i++) { + divisor += 1.0 / (double) Math.pow((double) i, (double) zipfSkew); + } + for (int i = 1; i <= numChoices; i++) { + probDist[i - 1] = (1.0 / (double) Math.pow((double) i, (double) zipfSkew)) / divisor; + } + return probDist; + } + + public static int[] getCumulIntRanges(double[] probDist) { + int[] opRanges = new int[probDist.length]; + if (opRanges.length > 1) { + opRanges[0] = (int) Math.floor(Integer.MAX_VALUE * probDist[0]); + for (int i = 1; i < opRanges.length - 1; i++) { + opRanges[i] = opRanges[i - 1] + (int) Math.floor(Integer.MAX_VALUE * probDist[i]); + } + opRanges[opRanges.length - 1] = Integer.MAX_VALUE; + } else { + opRanges[0] = Integer.MAX_VALUE; + } + return opRanges; + } + + public static int choose(int[] cumulIntRanges, int randomInt) { + int rndVal = Math.abs(randomInt); + int ix = Arrays.binarySearch(cumulIntRanges, rndVal); + if (ix < 0) { + ix = -ix - 1; + } + return ix; + } + +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java new file mode 100644 index 0000000..ddca6f3 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +public class SortedDoubleFieldValueGenerator implements IFieldValueGenerator<Double> { + private double val; + private final double startVal; + + public SortedDoubleFieldValueGenerator() { + startVal = 0.0d; + reset(); + } + + public SortedDoubleFieldValueGenerator(double startVal) { + this.startVal = startVal; + reset(); + } + + @Override + public Double next() { + return val++; + } + + @Override + public void reset() { + val = startVal; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java new file mode 100644 index 0000000..1e5dd19 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +public class SortedFloatFieldValueGenerator implements IFieldValueGenerator<Float> { + private float val = 0.0f; + private final float startVal; + + public SortedFloatFieldValueGenerator() { + startVal = 0.0f; + reset(); + } + + public SortedFloatFieldValueGenerator(float startVal) { + this.startVal = startVal; + reset(); + } + + @Override + public Float next() { + return val++; + } + + @Override + public void reset() { + val = startVal; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java new file mode 100644 index 0000000..3ab17ca --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +public class SortedIntegerFieldValueGenerator implements IFieldValueGenerator<Integer> { + private int val = 0; + private final int startVal; + + public SortedIntegerFieldValueGenerator() { + startVal = 0; + reset(); + } + + public SortedIntegerFieldValueGenerator(int startVal) { + this.startVal = startVal; + reset(); + } + + @Override + public Integer next() { + return val++; + } + + @Override + public void reset() { + val = startVal; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java new file mode 100644 index 0000000..9418447 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.util.Random; + +import org.apache.hyracks.util.MathUtil; + +public class StringFieldValueGenerator implements IFieldValueGenerator<String> { + private int maxLen; + private final Random rnd; + + public StringFieldValueGenerator(int maxLen, Random rnd) { + this.maxLen = maxLen; + this.rnd = rnd; + } + + public void setMaxLength(int maxLen) { + this.maxLen = maxLen; + } + + @Override + public String next() { + String s = Long.toHexString(Double.doubleToLongBits(rnd.nextDouble())); + StringBuilder strBuilder = new StringBuilder(); + for (int i = 0; i < s.length() && i < maxLen; i++) { + strBuilder.append(s.charAt(MathUtil.stripSignBit(rnd.nextInt()) % s.length())); + } + return strBuilder.toString(); + } + + @Override + public void reset() { + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java new file mode 100644 index 0000000..c34c7bc --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; +import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference; + +@SuppressWarnings("rawtypes") +public class TupleBatch { + private final int size; + private final TupleGenerator[] tupleGens; + public final AtomicBoolean inUse = new AtomicBoolean(false); + + public TupleBatch(int size, IFieldValueGenerator[] fieldGens, ISerializerDeserializer[] fieldSerdes, int payloadSize) { + this.size = size; + tupleGens = new TupleGenerator[size]; + for (int i = 0; i < size; i++) { + tupleGens[i] = new TupleGenerator(fieldGens, fieldSerdes, payloadSize); + } + } + + public void generate() throws IOException { + for(TupleGenerator tupleGen : tupleGens) { + tupleGen.next(); + } + } + + public int size() { + return size; + } + + public ITupleReference get(int ix) { + return tupleGens[ix].get(); + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java new file mode 100644 index 0000000..4f26065 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hyracks.storage.am.common.datagen; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; +import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder; +import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference; +import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference; + +@SuppressWarnings({"rawtypes", "unchecked" }) +public class TupleGenerator { + protected final ISerializerDeserializer[] fieldSerdes; + protected final IFieldValueGenerator[] fieldGens; + protected final ArrayTupleBuilder tb; + protected final ArrayTupleReference tuple; + protected final byte[] payload; + protected final DataOutput tbDos; + + public TupleGenerator(IFieldValueGenerator[] fieldGens, ISerializerDeserializer[] fieldSerdes, int payloadSize) { + this.fieldSerdes = fieldSerdes; + this.fieldGens = fieldGens; + tuple = new ArrayTupleReference(); + if (payloadSize > 0) { + tb = new ArrayTupleBuilder(fieldSerdes.length + 1); + payload = new byte[payloadSize]; + } else { + tb = new ArrayTupleBuilder(fieldSerdes.length); + payload = null; + } + tbDos = tb.getDataOutput(); + } + + public ITupleReference next() throws IOException { + tb.reset(); + for (int i = 0; i < fieldSerdes.length; i++) { + fieldSerdes[i].serialize(fieldGens[i].next(), tbDos); + tb.addFieldEndOffset(); + } + if (payload != null) { + tbDos.write(payload); + tb.addFieldEndOffset(); + } + tuple.reset(tb.getFieldEndOffsets(), tb.getByteArray()); + return tuple; + } + + public ITupleReference get() { + return tuple; + } + + public void reset() { + for (IFieldValueGenerator fieldGen : fieldGens) { + fieldGen.reset(); + } + } + + public ISerializerDeserializer[] getFieldSerdes() { + return fieldSerdes; + } + + public IFieldValueGenerator[] getFieldGens() { + return fieldGens; + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java index ab87f93..039cf7d 100644 --- a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.Executors; +import org.apache.commons.lang3.StringUtils; import org.apache.hyracks.api.application.INCApplicationContext; import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.dataflow.ActivityId; @@ -53,4 +54,8 @@ public class TestUtils { devices.add(new IODeviceHandle(new File(System.getProperty("java.io.tmpdir")), ".")); return new IOManager(devices, Executors.newCachedThreadPool()); } + + public static String joinPath(String... pathElements) { + return StringUtils.join(pathElements, File.separatorChar); + } }
