http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java new file mode 100644 index 0000000..c069a5f --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -0,0 +1,217 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * ColumnVector contains the shared structure for the sub-types, + * including NULL information, and whether this vector + * repeats, i.e. has all values the same, so only the first + * one is set. This is used to accelerate query performance + * by handling a whole vector in O(1) time when applicable. + * + * The fields are public by design since this is a performance-critical + * structure that is used in the inner loop of query execution. + */ +public abstract class ColumnVector { + + /* + * The current kinds of column vectors. + */ + public static enum Type { + NONE, // Useful when the type of column vector has not be determined yet. + LONG, + DOUBLE, + BYTES, + DECIMAL, + TIMESTAMP, + INTERVAL_DAY_TIME, + STRUCT, + LIST, + MAP, + UNION + } + + /* + * If hasNulls is true, then this array contains true if the value + * is null, otherwise false. The array is always allocated, so a batch can be re-used + * later and nulls added. + */ + public boolean[] isNull; + + // If the whole column vector has no nulls, this is true, otherwise false. + public boolean noNulls; + + /* + * True if same value repeats for whole column vector. + * If so, vector[0] holds the repeating value. + */ + public boolean isRepeating; + + // Variables to hold state from before flattening so it can be easily restored. + private boolean preFlattenIsRepeating; + private boolean preFlattenNoNulls; + + /** + * Constructor for super-class ColumnVector. This is not called directly, + * but used to initialize inherited fields. + * + * @param len Vector length + */ + public ColumnVector(int len) { + isNull = new boolean[len]; + noNulls = true; + isRepeating = false; + preFlattenNoNulls = true; + preFlattenIsRepeating = false; + } + + /** + * Resets the column to default state + * - fills the isNull array with false + * - sets noNulls to true + * - sets isRepeating to false + */ + public void reset() { + if (!noNulls) { + Arrays.fill(isNull, false); + } + noNulls = true; + isRepeating = false; + preFlattenNoNulls = true; + preFlattenIsRepeating = false; + } + + /** + * Sets the isRepeating flag. Recurses over structs and unions so that the + * flags are set correctly. + * @param isRepeating + */ + public void setRepeating(boolean isRepeating) { + this.isRepeating = isRepeating; + } + + abstract public void flatten(boolean selectedInUse, int[] sel, int size); + + // Simplify vector by brute-force flattening noNulls if isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel, + int size) { + + boolean nullFillValue; + + if (noNulls) { + nullFillValue = false; + } else { + nullFillValue = isNull[0]; + } + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = nullFillValue; + } + } else { + Arrays.fill(isNull, 0, size, nullFillValue); + } + + // all nulls are now explicit + noNulls = false; + } + + protected void flattenNoNulls(boolean selectedInUse, int[] sel, + int size) { + if (noNulls) { + noNulls = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + isNull[sel[j]] = false; + } + } else { + Arrays.fill(isNull, 0, size, false); + } + } + } + + /** + * Restore the state of isRepeating and noNulls to what it was + * before flattening. This must only be called just after flattening + * and then evaluating a VectorExpression on the column vector. + * It is an optimization that allows other operations on the same + * column to continue to benefit from the isRepeating and noNulls + * indicators. + */ + public void unFlatten() { + isRepeating = preFlattenIsRepeating; + noNulls = preFlattenNoNulls; + } + + // Record repeating and no nulls state to be restored later. + protected void flattenPush() { + preFlattenIsRepeating = isRepeating; + preFlattenNoNulls = noNulls; + } + + /** + * Set the element in this column vector from the given input vector. + * This method can assume that the output does not have isRepeating set. + */ + public abstract void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector); + + /** + * Initialize the column vector. This method can be overridden by specific column vector types. + * Use this method only if the individual type of the column vector is not known, otherwise its + * preferable to call specific initialization methods. + */ + public void init() { + // Do nothing by default + } + + /** + * Ensure the ColumnVector can hold at least size values. + * This method is deliberately *not* recursive because the complex types + * can easily have more (or less) children than the upper levels. + * @param size the new minimum size + * @param presesrveData should the old data be preserved? + */ + public void ensureSize(int size, boolean presesrveData) { + if (isNull.length < size) { + boolean[] oldArray = isNull; + isNull = new boolean[size]; + if (presesrveData && !noNulls) { + if (isRepeating) { + isNull[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, isNull, 0, oldArray.length); + } + } + } + } + + /** + * Print the value for this column into the given string builder. + * @param buffer the buffer to print into + * @param row the id of the row to print + */ + public abstract void stringifyValue(StringBuilder buffer, + int row); + }
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java new file mode 100644 index 0000000..0c52210 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; +import java.math.BigInteger; + +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +public class DecimalColumnVector extends ColumnVector { + + /** + * A vector of HiveDecimalWritable objects. + * + * For high performance and easy access to this low-level structure, + * the fields are public by design (as they are in other ColumnVector + * types). + */ + public HiveDecimalWritable[] vector; + public short scale; + public short precision; + + public DecimalColumnVector(int precision, int scale) { + this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale); + } + + public DecimalColumnVector(int size, int precision, int scale) { + super(size); + this.precision = (short) precision; + this.scale = (short) scale; + vector = new HiveDecimalWritable[size]; + for (int i = 0; i < size; i++) { + vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); + } + } + + // Fill the all the vector entries with provided value + public void fill(HiveDecimal value) { + noNulls = true; + isRepeating = true; + if (vector[0] == null) { + vector[0] = new HiveDecimalWritable(value); + } else { + vector[0].set(value); + } + } + + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = null; + isNull[0] = true; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + // TODO Auto-generated method stub + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + HiveDecimal hiveDec = + ((DecimalColumnVector) inputVector).vector[inputElementNum] + .getHiveDecimal(precision, scale); + if (hiveDec == null) { + isNull[outElementNum] = true; + noNulls = false; + } else { + isNull[outElementNum] = false; + vector[outElementNum].set(hiveDec); + } + } else { + isNull[outElementNum] = true; + noNulls = false; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row].toString()); + } else { + buffer.append("null"); + } + } + + public void set(int elementNum, HiveDecimalWritable writeable) { + if (writeable == null) { + noNulls = false; + isNull[elementNum] = true; + } else { + HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale); + if (hiveDec == null) { + noNulls = false; + isNull[elementNum] = true; + } else { + vector[elementNum].set(hiveDec); + } + } + } + + public void set(int elementNum, HiveDecimal hiveDec) { + HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale); + if (checkedDec == null) { + noNulls = false; + isNull[elementNum] = true; + } else { + vector[elementNum].set(checkedDec); + } + } + + public void setNullDataValue(int elementNum) { + // E.g. For scale 2 the minimum is "0.01" + HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale); + vector[elementNum].set(minimumNonZeroValue); + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (size > vector.length) { + HiveDecimalWritable[] oldArray = vector; + vector = new HiveDecimalWritable[size]; + if (preserveData) { + // we copy all of the values to avoid creating more objects + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + for(int i= oldArray.length; i < vector.length; ++i) { + vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java new file mode 100644 index 0000000..bd421f4 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.util.Arrays; + +/** + * This class represents a nullable double precision floating point column vector. + * This class will be used for operations on all floating point types (float, double) + * and as such will use a 64-bit double value to hold the biggest possible value. + * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will + * reduce the amount of code that needs to be generated and also will run fast since the + * machine operates with 64-bit words. + * + * The vector[] field is public by design for high-performance access in the inner + * loop of query execution. + */ +public class DoubleColumnVector extends ColumnVector { + public double[] vector; + public static final double NULL_VALUE = Double.NaN; + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public DoubleColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len + */ + public DoubleColumnVector(int len) { + super(len); + vector = new double[len]; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(double value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = NULL_VALUE; + isNull[0] = true; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + double repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + vector[outElementNum] = + ((DoubleColumnVector) inputVector).vector[inputElementNum]; + } else { + isNull[outElementNum] = true; + noNulls = false; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (size > vector.length) { + double[] oldArray = vector; + vector = new double[size]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java new file mode 100644 index 0000000..39ccea8 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -0,0 +1,348 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.io.Writable; + +/** + * This class represents a nullable interval day time column vector capable of handing a + * wide range of interval day time values. + * + * We store the 2 (value) fields of a HiveIntervalDayTime class in primitive arrays. + * + * We do this to avoid an array of Java HiveIntervalDayTime objects which would have poor storage + * and memory access characteristics. + * + * Generally, the caller will fill in a scratch HiveIntervalDayTime object with values from a row, + * work using the scratch HiveIntervalDayTime, and then perhaps update the column vector row + * with a result. + */ +public class IntervalDayTimeColumnVector extends ColumnVector { + + /* + * The storage arrays for this column vector corresponds to the storage of a HiveIntervalDayTime: + */ + private long[] totalSeconds; + // The values from HiveIntervalDayTime.getTotalSeconds(). + + private int[] nanos; + // The values from HiveIntervalDayTime.getNanos(). + + /* + * Scratch objects. + */ + private final HiveIntervalDayTime scratchIntervalDayTime; + + private Writable scratchWritable; + // Supports keeping a HiveIntervalDayTimeWritable object without having to import + // that definition... + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public IntervalDayTimeColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public IntervalDayTimeColumnVector(int len) { + super(len); + + totalSeconds = new long[len]; + nanos = new int[len]; + + scratchIntervalDayTime = new HiveIntervalDayTime(); + + scratchWritable = null; // Allocated by caller. + } + + /** + * Return the number of rows. + * @return + */ + public int getLength() { + return totalSeconds.length; + } + + /** + * Return a row's HiveIntervalDayTime.getTotalSeconds() value. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getTotalSeconds(int elementNum) { + return totalSeconds[elementNum]; + } + + /** + * Return a row's HiveIntervalDayTime.getNanos() value. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getNanos(int elementNum) { + return nanos[elementNum]; + } + + /** + * Return a row's HiveIntervalDayTime.getDouble() value. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public double getDouble(int elementNum) { + return asScratchIntervalDayTime(elementNum).getDouble(); + } + + /** + * Set a HiveIntervalDayTime object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param intervalDayTime + * @param elementNum + */ + public void intervalDayTimeUpdate(HiveIntervalDayTime intervalDayTime, int elementNum) { + intervalDayTime.set(totalSeconds[elementNum], nanos[elementNum]); + } + + + /** + * Return the scratch HiveIntervalDayTime object set from a row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public HiveIntervalDayTime asScratchIntervalDayTime(int elementNum) { + scratchIntervalDayTime.set(totalSeconds[elementNum], nanos[elementNum]); + return scratchIntervalDayTime; + } + + /** + * Return the scratch HiveIntervalDayTime (contents undefined). + * @return + */ + public HiveIntervalDayTime getScratchIntervalDayTime() { + return scratchIntervalDayTime; + } + + /** + * Compare row to HiveIntervalDayTime. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @param intervalDayTime + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(int elementNum, HiveIntervalDayTime intervalDayTime) { + return asScratchIntervalDayTime(elementNum).compareTo(intervalDayTime); + } + + /** + * Compare HiveIntervalDayTime to row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param intervalDayTime + * @param elementNum + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(HiveIntervalDayTime intervalDayTime, int elementNum) { + return intervalDayTime.compareTo(asScratchIntervalDayTime(elementNum)); + } + + /** + * Compare a row to another TimestampColumnVector's row. + * @param elementNum1 + * @param intervalDayTimeColVector2 + * @param elementNum2 + * @return + */ + public int compareTo(int elementNum1, IntervalDayTimeColumnVector intervalDayTimeColVector2, + int elementNum2) { + return asScratchIntervalDayTime(elementNum1).compareTo( + intervalDayTimeColVector2.asScratchIntervalDayTime(elementNum2)); + } + + /** + * Compare another TimestampColumnVector's row to a row. + * @param intervalDayTimeColVector1 + * @param elementNum1 + * @param elementNum2 + * @return + */ + public int compareTo(IntervalDayTimeColumnVector intervalDayTimeColVector1, int elementNum1, + int elementNum2) { + return intervalDayTimeColVector1.asScratchIntervalDayTime(elementNum1).compareTo( + asScratchIntervalDayTime(elementNum2)); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + + IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputVector; + + totalSeconds[outElementNum] = timestampColVector.totalSeconds[inputElementNum]; + nanos[outElementNum] = timestampColVector.nanos[inputElementNum]; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatFastTime = totalSeconds[0]; + int repeatNanos = nanos[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + totalSeconds[i] = repeatFastTime; + nanos[i] = repeatNanos; + } + } else { + Arrays.fill(totalSeconds, 0, size, repeatFastTime); + Arrays.fill(nanos, 0, size, repeatNanos); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + /** + * Set a row from a HiveIntervalDayTime. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param intervalDayTime + */ + public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { + this.totalSeconds[elementNum] = intervalDayTime.getTotalSeconds(); + this.nanos[elementNum] = intervalDayTime.getNanos(); + } + + /** + * Set a row from the current value in the scratch interval day time. + * @param elementNum + */ + public void setFromScratchIntervalDayTime(int elementNum) { + this.totalSeconds[elementNum] = scratchIntervalDayTime.getTotalSeconds(); + this.nanos[elementNum] = scratchIntervalDayTime.getNanos(); + } + + /** + * Set row to standard null value(s). + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + */ + public void setNullValue(int elementNum) { + totalSeconds[elementNum] = 0; + nanos[elementNum] = 1; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.totalSeconds[0] = totalSeconds[0]; + output.nanos[0] = nanos[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } + else { + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** + * Fill all the vector entries with a HiveIntervalDayTime. + * @param intervalDayTime + */ + public void fill(HiveIntervalDayTime intervalDayTime) { + noNulls = true; + isRepeating = true; + totalSeconds[0] = intervalDayTime.getTotalSeconds(); + nanos[0] = intervalDayTime.getNanos(); + } + + /** + * Return a convenience writable object stored by this column vector. + * Supports keeping a TimestampWritable object without having to import that definition... + * @return + */ + public Writable getScratchWritable() { + return scratchWritable; + } + + /** + * Set the convenience writable object stored by this column vector + * @param scratchWritable + */ + public void setScratchWritable(Writable scratchWritable) { + this.scratchWritable = scratchWritable; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + scratchIntervalDayTime.set(totalSeconds[row], nanos[row]); + buffer.append(scratchIntervalDayTime.toString()); + } else { + buffer.append("null"); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java new file mode 100644 index 0000000..66240dd --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of list objects. + * + * Each list is composed of a range of elements in the underlying child + * ColumnVector. The range for list i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public class ListColumnVector extends MultiValuedColumnVector { + + public ColumnVector child; + + public ListColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE, null); + } + + /** + * Constructor for ListColumnVector. + * + * @param len Vector length + * @param child The child vector + */ + public ListColumnVector(int len, ColumnVector child) { + super(len); + this.child = child; + } + + @Override + protected void childFlatten(boolean useSelected, int[] selected, int size) { + child.flatten(useSelected, selected, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + ListColumnVector input = (ListColumnVector) inputVector; + if (input.isRepeating) { + inputElementNum = 0; + } + if (!input.noNulls && input.isNull[inputElementNum]) { + isNull[outElementNum] = true; + noNulls = false; + } else { + isNull[outElementNum] = false; + int offset = childCount; + int length = (int) input.lengths[inputElementNum]; + int inputOffset = (int) input.offsets[inputElementNum]; + offsets[outElementNum] = offset; + childCount += length; + lengths[outElementNum] = length; + child.ensureSize(childCount, true); + for (int i = 0; i < length; ++i) { + child.setElement(i + offset, inputOffset + i, input.child); + } + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + boolean isFirst = true; + for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) { + if (isFirst) { + isFirst = false; + } else { + buffer.append(", "); + } + child.stringifyValue(buffer, (int) i); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void init() { + super.init(); + child.init(); + } + + @Override + public void reset() { + super.reset(); + child.reset(); + } + + @Override + public void unFlatten() { + super.unFlatten(); + if (!isRepeating || noNulls || !isNull[0]) { + child.unFlatten(); + } + } + +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java new file mode 100644 index 0000000..80d4731 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -0,0 +1,224 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.IOException; +import java.util.Arrays; + +/** + * This class represents a nullable int column vector. + * This class will be used for operations on all integer types (tinyint, smallint, int, bigint) + * and as such will use a 64-bit long value to hold the biggest possible value. + * During copy-in/copy-out, smaller int types will be converted as needed. This will + * reduce the amount of code that needs to be generated and also will run fast since the + * machine operates with 64-bit words. + * + * The vector[] field is public by design for high-performance access in the inner + * loop of query execution. + */ +public class LongColumnVector extends ColumnVector { + public long[] vector; + public static final long NULL_VALUE = 1; + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public LongColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public LongColumnVector(int len) { + super(len); + vector = new long[len]; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; // automatic conversion to double is done here + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(long value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = NULL_VALUE; + isNull[0] = true; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + vector[outElementNum] = + ((LongColumnVector) inputVector).vector[inputElementNum]; + } else { + isNull[outElementNum] = true; + noNulls = false; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (size > vector.length) { + long[] oldArray = vector; + vector = new long[size]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java new file mode 100644 index 0000000..e8421e3 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of map objects. + * + * Each map is composed of a range of elements in the underlying child + * ColumnVector. The range for map i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public class MapColumnVector extends MultiValuedColumnVector { + + public ColumnVector keys; + public ColumnVector values; + + public MapColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE, null, null); + } + + /** + * Constructor for MapColumnVector + * + * @param len Vector length + * @param keys The keys column vector + * @param values The values column vector + */ + public MapColumnVector(int len, ColumnVector keys, ColumnVector values) { + super(len); + this.keys = keys; + this.values = values; + } + + @Override + protected void childFlatten(boolean useSelected, int[] selected, int size) { + keys.flatten(useSelected, selected, size); + values.flatten(useSelected, selected, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = true; + noNulls = false; + } else { + MapColumnVector input = (MapColumnVector) inputVector; + isNull[outElementNum] = false; + int offset = childCount; + int length = (int) input.lengths[inputElementNum]; + int inputOffset = (int) input.offsets[inputElementNum]; + offsets[outElementNum] = offset; + childCount += length; + lengths[outElementNum] = length; + keys.ensureSize(childCount, true); + values.ensureSize(childCount, true); + for (int i = 0; i < length; ++i) { + keys.setElement(i + offset, inputOffset + i, input.keys); + values.setElement(i + offset, inputOffset + i, input.values); + } + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + boolean isFirst = true; + for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) { + if (isFirst) { + isFirst = false; + } else { + buffer.append(", "); + } + buffer.append("{\"key\": "); + keys.stringifyValue(buffer, (int) i); + buffer.append(", \"value\": "); + values.stringifyValue(buffer, (int) i); + buffer.append('}'); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void init() { + super.init(); + keys.init(); + values.init(); + } + + @Override + public void reset() { + super.reset(); + keys.reset(); + values.reset(); + } + + @Override + public void unFlatten() { + super.unFlatten(); + if (!isRepeating || noNulls || !isNull[0]) { + keys.unFlatten(); + values.unFlatten(); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java new file mode 100644 index 0000000..1aeff83 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * The representation of a vectorized column of multi-valued objects, such + * as lists and maps. + * + * Each object is composed of a range of elements in the underlying child + * ColumnVector. The range for list i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public abstract class MultiValuedColumnVector extends ColumnVector { + + public long[] offsets; + public long[] lengths; + // the number of children slots used + public int childCount; + + /** + * Constructor for MultiValuedColumnVector. + * + * @param len Vector length + */ + public MultiValuedColumnVector(int len) { + super(len); + childCount = 0; + offsets = new long[len]; + lengths = new long[len]; + } + + protected abstract void childFlatten(boolean useSelected, int[] selected, + int size); + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + + if (isRepeating) { + if (noNulls || !isNull[0]) { + if (selectedInUse) { + for (int i = 0; i < size; ++i) { + int row = sel[i]; + offsets[row] = offsets[0]; + lengths[row] = lengths[0]; + isNull[row] = false; + } + } else { + Arrays.fill(offsets, 0, size, offsets[0]); + Arrays.fill(lengths, 0, size, lengths[0]); + Arrays.fill(isNull, 0, size, false); + } + // We optimize by assuming that a repeating list/map will run from + // from 0 .. lengths[0] in the child vector. + // Sanity check the assumption that we can start at 0. + if (offsets[0] != 0) { + throw new IllegalArgumentException("Repeating offset isn't 0, but " + + offsets[0]); + } + childFlatten(false, null, (int) lengths[0]); + } else { + if (selectedInUse) { + for(int i=0; i < size; ++i) { + isNull[sel[i]] = true; + } + } else { + Arrays.fill(isNull, 0, size, true); + } + } + isRepeating = false; + noNulls = false; + } else { + if (selectedInUse) { + int childSize = 0; + for(int i=0; i < size; ++i) { + childSize += lengths[sel[i]]; + } + int[] childSelection = new int[childSize]; + int idx = 0; + for(int i=0; i < size; ++i) { + int row = sel[i]; + for(int elem=0; elem < lengths[row]; ++elem) { + childSelection[idx++] = (int) (offsets[row] + elem); + } + } + childFlatten(true, childSelection, childSize); + } else { + childFlatten(false, null, childCount); + } + flattenNoNulls(selectedInUse, sel, size); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (size > offsets.length) { + long[] oldOffsets = offsets; + offsets = new long[size]; + long oldLengths[] = lengths; + lengths = new long[size]; + if (preserveData) { + if (isRepeating) { + offsets[0] = oldOffsets[0]; + lengths[0] = oldLengths[0]; + } else { + System.arraycopy(oldOffsets, 0, offsets, 0 , oldOffsets.length); + System.arraycopy(oldLengths, 0, lengths, 0, oldLengths.length); + } + } + } + } + + /** + * Initializee the vector + */ + @Override + public void init() { + super.init(); + childCount = 0; + } + + /** + * Reset the vector for the next batch. + */ + @Override + public void reset() { + super.reset(); + childCount = 0; + } + +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java new file mode 100644 index 0000000..cf07bca --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of struct objects. + * + * Each field is represented by a separate inner ColumnVector. Since this + * ColumnVector doesn't own any per row data other that the isNull flag, the + * isRepeating only covers the isNull array. + */ +public class StructColumnVector extends ColumnVector { + + public ColumnVector[] fields; + + public StructColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Constructor for StructColumnVector + * + * @param len Vector length + * @param fields the field column vectors + */ + public StructColumnVector(int len, ColumnVector... fields) { + super(len); + this.fields = fields; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + for(int i=0; i < fields.length; ++i) { + fields[i].flatten(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields; + for (int i = 0; i < inputFields.length; ++i) { + fields[i].setElement(outElementNum, inputElementNum, inputFields[i]); + } + } else { + noNulls = false; + isNull[outElementNum] = true; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + for(int i=0; i < fields.length; ++i) { + if (i != 0) { + buffer.append(", "); + } + fields[i].stringifyValue(buffer, row); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + for(int i=0; i < fields.length; ++i) { + fields[i].ensureSize(size, preserveData); + } + } + + @Override + public void reset() { + super.reset(); + for(int i =0; i < fields.length; ++i) { + fields[i].reset(); + } + } + + @Override + public void init() { + super.init(); + for(int i =0; i < fields.length; ++i) { + fields[i].init(); + } + } + + @Override + public void unFlatten() { + super.unFlatten(); + for(int i=0; i < fields.length; ++i) { + fields[i].unFlatten(); + } + } + + @Override + public void setRepeating(boolean isRepeating) { + super.setRepeating(isRepeating); + for(int i=0; i < fields.length; ++i) { + fields[i].setRepeating(isRepeating); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java new file mode 100644 index 0000000..228461a --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -0,0 +1,400 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.hadoop.io.Writable; + +/** + * This class represents a nullable timestamp column vector capable of handing a wide range of + * timestamp values. + * + * We store the 2 (value) fields of a Timestamp class in primitive arrays. + * + * We do this to avoid an array of Java Timestamp objects which would have poor storage + * and memory access characteristics. + * + * Generally, the caller will fill in a scratch timestamp object with values from a row, work + * using the scratch timestamp, and then perhaps update the column vector row with a result. + */ +public class TimestampColumnVector extends ColumnVector { + + /* + * The storage arrays for this column vector corresponds to the storage of a Timestamp: + */ + public long[] time; + // The values from Timestamp.getTime(). + + public int[] nanos; + // The values from Timestamp.getNanos(). + + /* + * Scratch objects. + */ + private final Timestamp scratchTimestamp; + + private Writable scratchWritable; + // Supports keeping a TimestampWritable object without having to import that definition... + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public TimestampColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public TimestampColumnVector(int len) { + super(len); + + time = new long[len]; + nanos = new int[len]; + + scratchTimestamp = new Timestamp(0); + + scratchWritable = null; // Allocated by caller. + } + + /** + * Return the number of rows. + * @return + */ + public int getLength() { + return time.length; + } + + /** + * Return a row's Timestamp.getTime() value. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getTime(int elementNum) { + return time[elementNum]; + } + + /** + * Return a row's Timestamp.getNanos() value. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public int getNanos(int elementNum) { + return nanos[elementNum]; + } + + /** + * Set a Timestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param timestamp + * @param elementNum + */ + public void timestampUpdate(Timestamp timestamp, int elementNum) { + timestamp.setTime(time[elementNum]); + timestamp.setNanos(nanos[elementNum]); + } + + /** + * Return the scratch Timestamp object set from a row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public Timestamp asScratchTimestamp(int elementNum) { + scratchTimestamp.setTime(time[elementNum]); + scratchTimestamp.setNanos(nanos[elementNum]); + return scratchTimestamp; + } + + /** + * Return the scratch timestamp (contents undefined). + * @return + */ + public Timestamp getScratchTimestamp() { + return scratchTimestamp; + } + + /** + * Return a long representation of a Timestamp. + * @param elementNum + * @return + */ + public long getTimestampAsLong(int elementNum) { + scratchTimestamp.setTime(time[elementNum]); + scratchTimestamp.setNanos(nanos[elementNum]); + return getTimestampAsLong(scratchTimestamp); + } + + /** + * Return a long representation of a Timestamp. + * @param timestamp + * @return + */ + public static long getTimestampAsLong(Timestamp timestamp) { + return millisToSeconds(timestamp.getTime()); + } + + // Copy of TimestampWritable.millisToSeconds + /** + * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of + * seconds. 500 would round to 0, -500 would round to -1. + */ + private static long millisToSeconds(long millis) { + if (millis >= 0) { + return millis / 1000; + } else { + return (millis - 999) / 1000; + } + } + + /** + * Return a double representation of a Timestamp. + * @param elementNum + * @return + */ + public double getDouble(int elementNum) { + scratchTimestamp.setTime(time[elementNum]); + scratchTimestamp.setNanos(nanos[elementNum]); + return getDouble(scratchTimestamp); + } + + /** + * Return a double representation of a Timestamp. + * @param elementNum + * @return + */ + public static double getDouble(Timestamp timestamp) { + // Same algorithm as TimestampWritable (not currently import-able here). + double seconds, nanos; + seconds = millisToSeconds(timestamp.getTime()); + nanos = timestamp.getNanos(); + return seconds + nanos / 1000000000; + } + + /** + * Compare row to Timestamp. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @param timestamp + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(int elementNum, Timestamp timestamp) { + return asScratchTimestamp(elementNum).compareTo(timestamp); + } + + /** + * Compare Timestamp to row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param timestamp + * @param elementNum + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(Timestamp timestamp, int elementNum) { + return timestamp.compareTo(asScratchTimestamp(elementNum)); + } + + /** + * Compare a row to another TimestampColumnVector's row. + * @param elementNum1 + * @param timestampColVector2 + * @param elementNum2 + * @return + */ + public int compareTo(int elementNum1, TimestampColumnVector timestampColVector2, + int elementNum2) { + return asScratchTimestamp(elementNum1).compareTo( + timestampColVector2.asScratchTimestamp(elementNum2)); + } + + /** + * Compare another TimestampColumnVector's row to a row. + * @param timestampColVector1 + * @param elementNum1 + * @param elementNum2 + * @return + */ + public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1, + int elementNum2) { + return timestampColVector1.asScratchTimestamp(elementNum1).compareTo( + asScratchTimestamp(elementNum2)); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector; + + time[outElementNum] = timestampColVector.time[inputElementNum]; + nanos[outElementNum] = timestampColVector.nanos[inputElementNum]; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatFastTime = time[0]; + int repeatNanos = nanos[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + time[i] = repeatFastTime; + nanos[i] = repeatNanos; + } + } else { + Arrays.fill(time, 0, size, repeatFastTime); + Arrays.fill(nanos, 0, size, repeatNanos); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + /** + * Set a row from a timestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void set(int elementNum, Timestamp timestamp) { + if (timestamp == null) { + this.noNulls = false; + this.isNull[elementNum] = true; + } else { + this.time[elementNum] = timestamp.getTime(); + this.nanos[elementNum] = timestamp.getNanos(); + } + } + + /** + * Set a row from the current value in the scratch timestamp. + * @param elementNum + */ + public void setFromScratchTimestamp(int elementNum) { + this.time[elementNum] = scratchTimestamp.getTime(); + this.nanos[elementNum] = scratchTimestamp.getNanos(); + } + + /** + * Set row to standard null value(s). + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + */ + public void setNullValue(int elementNum) { + time[elementNum] = 0; + nanos[elementNum] = 1; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.time[0] = time[0]; + output.nanos[0] = nanos[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } + else { + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** + * Fill all the vector entries with a timestamp. + * @param timestamp + */ + public void fill(Timestamp timestamp) { + noNulls = true; + isRepeating = true; + time[0] = timestamp.getTime(); + nanos[0] = timestamp.getNanos(); + } + + /** + * Return a convenience writable object stored by this column vector. + * Supports keeping a TimestampWritable object without having to import that definition... + * @return + */ + public Writable getScratchWritable() { + return scratchWritable; + } + + /** + * Set the convenience writable object stored by this column vector + * @param scratchWritable + */ + public void setScratchWritable(Writable scratchWritable) { + this.scratchWritable = scratchWritable; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + scratchTimestamp.setTime(time[row]); + scratchTimestamp.setNanos(nanos[row]); + buffer.append(scratchTimestamp.toString()); + } else { + buffer.append("null"); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java new file mode 100644 index 0000000..0c61243 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of struct objects. + * + * Each field is represented by a separate inner ColumnVector. Since this + * ColumnVector doesn't own any per row data other that the isNull flag, the + * isRepeating only covers the isNull array. + */ +public class UnionColumnVector extends ColumnVector { + + public int[] tags; + public ColumnVector[] fields; + + public UnionColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Constructor for UnionColumnVector + * + * @param len Vector length + * @param fields the field column vectors + */ + public UnionColumnVector(int len, ColumnVector... fields) { + super(len); + tags = new int[len]; + this.fields = fields; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + for(int i=0; i < fields.length; ++i) { + fields[i].flatten(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + UnionColumnVector input = (UnionColumnVector) inputVector; + tags[outElementNum] = input.tags[inputElementNum]; + fields[tags[outElementNum]].setElement(outElementNum, inputElementNum, + input.fields[tags[outElementNum]]); + } else { + noNulls = false; + isNull[outElementNum] = true; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append("{\"tag\": "); + buffer.append(tags[row]); + buffer.append(", \"value\": "); + fields[tags[row]].stringifyValue(buffer, row); + buffer.append('}'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (tags.length < size) { + if (preserveData) { + int[] oldTags = tags; + tags = new int[size]; + System.arraycopy(oldTags, 0, tags, 0, oldTags.length); + } else { + tags = new int[size]; + } + for(int i=0; i < fields.length; ++i) { + fields[i].ensureSize(size, preserveData); + } + } + } + + @Override + public void reset() { + super.reset(); + for(int i =0; i < fields.length; ++i) { + fields[i].reset(); + } + } + + @Override + public void init() { + super.init(); + for(int i =0; i < fields.length; ++i) { + fields[i].init(); + } + } + + @Override + public void unFlatten() { + super.unFlatten(); + for(int i=0; i < fields.length; ++i) { + fields[i].unFlatten(); + } + } + + @Override + public void setRepeating(boolean isRepeating) { + super.setRepeating(isRepeating); + for(int i=0; i < fields.length; ++i) { + fields[i].setRepeating(isRepeating); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ---------------------------------------------------------------------- diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java new file mode 100644 index 0000000..9c066e0 --- /dev/null +++ b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -0,0 +1,218 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; + +/** + * A VectorizedRowBatch is a set of rows, organized with each column + * as a vector. It is the unit of query execution, organized to minimize + * the cost per row and achieve high cycles-per-instruction. + * The major fields are public by design to allow fast and convenient + * access by the vectorized query execution code. + */ +public class VectorizedRowBatch implements Writable { + public int numCols; // number of columns + public ColumnVector[] cols; // a vector for each column + public int size; // number of rows that qualify (i.e. haven't been filtered out) + public int[] selected; // array of positions of selected values + public int[] projectedColumns; + public int projectionSize; + + private int dataColumnCount; + private int partitionColumnCount; + + + /* + * If no filtering has been applied yet, selectedInUse is false, + * meaning that all rows qualify. If it is true, then the selected[] array + * records the offsets of qualifying rows. + */ + public boolean selectedInUse; + + // If this is true, then there is no data in the batch -- we have hit the end of input. + public boolean endOfFile; + + /* + * This number is carefully chosen to minimize overhead and typically allows + * one VectorizedRowBatch to fit in cache. + */ + public static final int DEFAULT_SIZE = 1024; + + /** + * Return a batch with the specified number of columns. + * This is the standard constructor -- all batches should be the same size + * + * @param numCols the number of columns to include in the batch + */ + public VectorizedRowBatch(int numCols) { + this(numCols, DEFAULT_SIZE); + } + + /** + * Return a batch with the specified number of columns and rows. + * Only call this constructor directly for testing purposes. + * Batch size should normally always be defaultSize. + * + * @param numCols the number of columns to include in the batch + * @param size the number of rows to include in the batch + */ + public VectorizedRowBatch(int numCols, int size) { + this.numCols = numCols; + this.size = size; + selected = new int[size]; + selectedInUse = false; + this.cols = new ColumnVector[numCols]; + projectedColumns = new int[numCols]; + + // Initially all columns are projected and in the same order + projectionSize = numCols; + for (int i = 0; i < numCols; i++) { + projectedColumns[i] = i; + } + + dataColumnCount = -1; + partitionColumnCount = -1; + } + + public void setPartitionInfo(int dataColumnCount, int partitionColumnCount) { + this.dataColumnCount = dataColumnCount; + this.partitionColumnCount = partitionColumnCount; + } + + public int getDataColumnCount() { + return dataColumnCount; + } + + public int getPartitionColumnCount() { + return partitionColumnCount; + } + + /** + * Returns the maximum size of the batch (number of rows it can hold) + */ + public int getMaxSize() { + return selected.length; + } + + /** + * Return count of qualifying rows. + * + * @return number of rows that have not been filtered out + */ + public long count() { + return size; + } + + private static String toUTF8(Object o) { + if(o == null || o instanceof NullWritable) { + return "\\N"; /* as found in LazySimpleSerDe's nullSequence */ + } + return o.toString(); + } + + @Override + public String toString() { + if (size == 0) { + return ""; + } + StringBuilder b = new StringBuilder(); + if (this.selectedInUse) { + for (int j = 0; j < size; j++) { + int i = selected[j]; + b.append('['); + for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { + b.append(", "); + } + cv.stringifyValue(b, i); + } + b.append(']'); + if (j < size - 1) { + b.append('\n'); + } + } + } else { + for (int i = 0; i < size; i++) { + b.append('['); + for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { + b.append(", "); + } + if (cv != null) { + cv.stringifyValue(b, i); + } + } + b.append(']'); + if (i < size - 1) { + b.append('\n'); + } + } + } + return b.toString(); + } + + @Override + public void readFields(DataInput arg0) throws IOException { + throw new UnsupportedOperationException("Do you really need me?"); + } + + @Override + public void write(DataOutput arg0) throws IOException { + throw new UnsupportedOperationException("Don't call me"); + } + + /** + * Resets the row batch to default state + * - sets selectedInUse to false + * - sets size to 0 + * - sets endOfFile to false + * - resets each column + * - inits each column + */ + public void reset() { + selectedInUse = false; + size = 0; + endOfFile = false; + for (ColumnVector vc : cols) { + if (vc != null) { + vc.reset(); + vc.init(); + } + } + } + + /** + * Set the maximum number of rows in the batch. + * Data is not preserved. + */ + public void ensureSize(int rows) { + for(int i=0; i < cols.length; ++i) { + cols[i].ensureSize(rows, false); + } + } +}
