http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/StripeStatistics.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/StripeStatistics.java b/orc/src/java/org/apache/orc/StripeStatistics.java deleted file mode 100644 index 8fc91cb..0000000 --- a/orc/src/java/org/apache/orc/StripeStatistics.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc; - -import org.apache.orc.impl.ColumnStatisticsImpl; - -import java.util.List; - -public class StripeStatistics { - private final List<OrcProto.ColumnStatistics> cs; - - public StripeStatistics(List<OrcProto.ColumnStatistics> list) { - this.cs = list; - } - - /** - * Return list of column statistics - * - * @return column stats - */ - public ColumnStatistics[] getColumnStatistics() { - ColumnStatistics[] result = new ColumnStatistics[cs.size()]; - for (int i = 0; i < result.length; ++i) { - result[i] = ColumnStatisticsImpl.deserialize(cs.get(i)); - } - return result; - } -}
http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/TimestampColumnStatistics.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java b/orc/src/java/org/apache/orc/TimestampColumnStatistics.java deleted file mode 100644 index 27dc49f..0000000 --- a/orc/src/java/org/apache/orc/TimestampColumnStatistics.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc; - -import java.sql.Timestamp; - -/** - * Statistics for Timestamp columns. - */ -public interface TimestampColumnStatistics extends ColumnStatistics { - /** - * Get the minimum value for the column. - * @return minimum value - */ - Timestamp getMinimum(); - - /** - * Get the maximum value for the column. - * @return maximum value - */ - Timestamp getMaximum(); -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/TypeDescription.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/TypeDescription.java b/orc/src/java/org/apache/orc/TypeDescription.java deleted file mode 100644 index 2e9328b..0000000 --- a/orc/src/java/org/apache/orc/TypeDescription.java +++ /dev/null @@ -1,870 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * This is the description of the types in an ORC file. - */ -public class TypeDescription - implements Comparable<TypeDescription>, Serializable { - private static final int MAX_PRECISION = 38; - private static final int MAX_SCALE = 38; - private static final int DEFAULT_PRECISION = 38; - private static final int DEFAULT_SCALE = 10; - private static final int DEFAULT_LENGTH = 256; - - @Override - public int compareTo(TypeDescription other) { - if (this == other) { - return 0; - } else if (other == null) { - return -1; - } else { - int result = category.compareTo(other.category); - if (result == 0) { - switch (category) { - case CHAR: - case VARCHAR: - return maxLength - other.maxLength; - case DECIMAL: - if (precision != other.precision) { - return precision - other.precision; - } - return scale - other.scale; - case UNION: - case LIST: - case MAP: - if (children.size() != other.children.size()) { - return children.size() - other.children.size(); - } - for(int c=0; result == 0 && c < children.size(); ++c) { - result = children.get(c).compareTo(other.children.get(c)); - } - break; - case STRUCT: - if (children.size() != other.children.size()) { - return children.size() - other.children.size(); - } - for(int c=0; result == 0 && c < children.size(); ++c) { - result = fieldNames.get(c).compareTo(other.fieldNames.get(c)); - if (result == 0) { - result = children.get(c).compareTo(other.children.get(c)); - } - } - break; - default: - // PASS - } - } - return result; - } - } - - public enum Category { - BOOLEAN("boolean", true), - BYTE("tinyint", true), - SHORT("smallint", true), - INT("int", true), - LONG("bigint", true), - FLOAT("float", true), - DOUBLE("double", true), - STRING("string", true), - DATE("date", true), - TIMESTAMP("timestamp", true), - BINARY("binary", true), - DECIMAL("decimal", true), - VARCHAR("varchar", true), - CHAR("char", true), - LIST("array", false), - MAP("map", false), - STRUCT("struct", false), - UNION("uniontype", false); - - Category(String name, boolean isPrimitive) { - this.name = name; - this.isPrimitive = isPrimitive; - } - - final boolean isPrimitive; - final String name; - - public boolean isPrimitive() { - return isPrimitive; - } - - public String getName() { - return name; - } - } - - public static TypeDescription createBoolean() { - return new TypeDescription(Category.BOOLEAN); - } - - public static TypeDescription createByte() { - return new TypeDescription(Category.BYTE); - } - - public static TypeDescription createShort() { - return new TypeDescription(Category.SHORT); - } - - public static TypeDescription createInt() { - return new TypeDescription(Category.INT); - } - - public static TypeDescription createLong() { - return new TypeDescription(Category.LONG); - } - - public static TypeDescription createFloat() { - return new TypeDescription(Category.FLOAT); - } - - public static TypeDescription createDouble() { - return new TypeDescription(Category.DOUBLE); - } - - public static TypeDescription createString() { - return new TypeDescription(Category.STRING); - } - - public static TypeDescription createDate() { - return new TypeDescription(Category.DATE); - } - - public static TypeDescription createTimestamp() { - return new TypeDescription(Category.TIMESTAMP); - } - - public static TypeDescription createBinary() { - return new TypeDescription(Category.BINARY); - } - - public static TypeDescription createDecimal() { - return new TypeDescription(Category.DECIMAL); - } - - static class StringPosition { - final String value; - int position; - final int length; - - StringPosition(String value) { - this.value = value; - position = 0; - length = value.length(); - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - buffer.append('\''); - buffer.append(value.substring(0, position)); - buffer.append('^'); - buffer.append(value.substring(position)); - buffer.append('\''); - return buffer.toString(); - } - } - - static Category parseCategory(StringPosition source) { - int start = source.position; - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - if (!Character.isLetter(ch)) { - break; - } - source.position += 1; - } - if (source.position != start) { - String word = source.value.substring(start, source.position).toLowerCase(); - for (Category cat : Category.values()) { - if (cat.getName().equals(word)) { - return cat; - } - } - } - throw new IllegalArgumentException("Can't parse category at " + source); - } - - static int parseInt(StringPosition source) { - int start = source.position; - int result = 0; - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - if (!Character.isDigit(ch)) { - break; - } - result = result * 10 + (ch - '0'); - source.position += 1; - } - if (source.position == start) { - throw new IllegalArgumentException("Missing integer at " + source); - } - return result; - } - - static String parseName(StringPosition source) { - int start = source.position; - while (source.position < source.length) { - char ch = source.value.charAt(source.position); - if (!Character.isLetterOrDigit(ch) && ch != '.' && ch != '_') { - break; - } - source.position += 1; - } - if (source.position == start) { - throw new IllegalArgumentException("Missing name at " + source); - } - return source.value.substring(start, source.position); - } - - static void requireChar(StringPosition source, char required) { - if (source.position >= source.length || - source.value.charAt(source.position) != required) { - throw new IllegalArgumentException("Missing required char '" + - required + "' at " + source); - } - source.position += 1; - } - - static boolean consumeChar(StringPosition source, char ch) { - boolean result = source.position < source.length && - source.value.charAt(source.position) == ch; - if (result) { - source.position += 1; - } - return result; - } - - static void parseUnion(TypeDescription type, StringPosition source) { - requireChar(source, '<'); - do { - type.addUnionChild(parseType(source)); - } while (consumeChar(source, ',')); - requireChar(source, '>'); - } - - static void parseStruct(TypeDescription type, StringPosition source) { - requireChar(source, '<'); - do { - String fieldName = parseName(source); - requireChar(source, ':'); - type.addField(fieldName, parseType(source)); - } while (consumeChar(source, ',')); - requireChar(source, '>'); - } - - static TypeDescription parseType(StringPosition source) { - TypeDescription result = new TypeDescription(parseCategory(source)); - switch (result.getCategory()) { - case BINARY: - case BOOLEAN: - case BYTE: - case DATE: - case DOUBLE: - case FLOAT: - case INT: - case LONG: - case SHORT: - case STRING: - case TIMESTAMP: - break; - case CHAR: - case VARCHAR: - requireChar(source, '('); - result.withMaxLength(parseInt(source)); - requireChar(source, ')'); - break; - case DECIMAL: { - requireChar(source, '('); - int precision = parseInt(source); - requireChar(source, ','); - result.withScale(parseInt(source)); - result.withPrecision(precision); - requireChar(source, ')'); - break; - } - case LIST: - requireChar(source, '<'); - result.children.add(parseType(source)); - requireChar(source, '>'); - break; - case MAP: - requireChar(source, '<'); - result.children.add(parseType(source)); - requireChar(source, ','); - result.children.add(parseType(source)); - requireChar(source, '>'); - break; - case UNION: - parseUnion(result, source); - break; - case STRUCT: - parseStruct(result, source); - break; - default: - throw new IllegalArgumentException("Unknown type " + - result.getCategory() + " at " + source); - } - return result; - } - - /** - * Parse TypeDescription from the Hive type names. This is the inverse - * of TypeDescription.toString() - * @param typeName the name of the type - * @return a new TypeDescription or null if typeName was null - * @throws IllegalArgumentException if the string is badly formed - */ - public static TypeDescription fromString(String typeName) { - if (typeName == null) { - return null; - } - StringPosition source = new StringPosition(typeName); - TypeDescription result = parseType(source); - if (source.position != source.length) { - throw new IllegalArgumentException("Extra characters at " + source); - } - return result; - } - - /** - * For decimal types, set the precision. - * @param precision the new precision - * @return this - */ - public TypeDescription withPrecision(int precision) { - if (category != Category.DECIMAL) { - throw new IllegalArgumentException("precision is only allowed on decimal"+ - " and not " + category.name); - } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){ - throw new IllegalArgumentException("precision " + precision + - " is out of range 1 .. " + scale); - } - this.precision = precision; - return this; - } - - /** - * For decimal types, set the scale. - * @param scale the new scale - * @return this - */ - public TypeDescription withScale(int scale) { - if (category != Category.DECIMAL) { - throw new IllegalArgumentException("scale is only allowed on decimal"+ - " and not " + category.name); - } else if (scale < 0 || scale > MAX_SCALE || scale > precision) { - throw new IllegalArgumentException("scale is out of range at " + scale); - } - this.scale = scale; - return this; - } - - public static TypeDescription createVarchar() { - return new TypeDescription(Category.VARCHAR); - } - - public static TypeDescription createChar() { - return new TypeDescription(Category.CHAR); - } - - /** - * Set the maximum length for char and varchar types. - * @param maxLength the maximum value - * @return this - */ - public TypeDescription withMaxLength(int maxLength) { - if (category != Category.VARCHAR && category != Category.CHAR) { - throw new IllegalArgumentException("maxLength is only allowed on char" + - " and varchar and not " + category.name); - } - this.maxLength = maxLength; - return this; - } - - public static TypeDescription createList(TypeDescription childType) { - TypeDescription result = new TypeDescription(Category.LIST); - result.children.add(childType); - childType.parent = result; - return result; - } - - public static TypeDescription createMap(TypeDescription keyType, - TypeDescription valueType) { - TypeDescription result = new TypeDescription(Category.MAP); - result.children.add(keyType); - result.children.add(valueType); - keyType.parent = result; - valueType.parent = result; - return result; - } - - public static TypeDescription createUnion() { - return new TypeDescription(Category.UNION); - } - - public static TypeDescription createStruct() { - return new TypeDescription(Category.STRUCT); - } - - /** - * Add a child to a union type. - * @param child a new child type to add - * @return the union type. - */ - public TypeDescription addUnionChild(TypeDescription child) { - if (category != Category.UNION) { - throw new IllegalArgumentException("Can only add types to union type" + - " and not " + category); - } - children.add(child); - child.parent = this; - return this; - } - - /** - * Add a field to a struct type as it is built. - * @param field the field name - * @param fieldType the type of the field - * @return the struct type - */ - public TypeDescription addField(String field, TypeDescription fieldType) { - if (category != Category.STRUCT) { - throw new IllegalArgumentException("Can only add fields to struct type" + - " and not " + category); - } - fieldNames.add(field); - children.add(fieldType); - fieldType.parent = this; - return this; - } - - /** - * Get the id for this type. - * The first call will cause all of the the ids in tree to be assigned, so - * it should not be called before the type is completely built. - * @return the sequential id - */ - public int getId() { - // if the id hasn't been assigned, assign all of the ids from the root - if (id == -1) { - TypeDescription root = this; - while (root.parent != null) { - root = root.parent; - } - root.assignIds(0); - } - return id; - } - - public TypeDescription clone() { - TypeDescription result = new TypeDescription(category); - result.maxLength = maxLength; - result.precision = precision; - result.scale = scale; - if (fieldNames != null) { - result.fieldNames.addAll(fieldNames); - } - if (children != null) { - for(TypeDescription child: children) { - TypeDescription clone = child.clone(); - clone.parent = result; - result.children.add(clone); - } - } - return result; - } - - @Override - public int hashCode() { - long result = category.ordinal() * 4241 + maxLength + precision * 13 + scale; - if (children != null) { - for(TypeDescription child: children) { - result = result * 6959 + child.hashCode(); - } - } - return (int) result; - } - - @Override - public boolean equals(Object other) { - if (other == null || !(other instanceof TypeDescription)) { - return false; - } - if (other == this) { - return true; - } - TypeDescription castOther = (TypeDescription) other; - if (category != castOther.category || - maxLength != castOther.maxLength || - scale != castOther.scale || - precision != castOther.precision) { - return false; - } - if (children != null) { - if (children.size() != castOther.children.size()) { - return false; - } - for (int i = 0; i < children.size(); ++i) { - if (!children.get(i).equals(castOther.children.get(i))) { - return false; - } - } - } - if (category == Category.STRUCT) { - for(int i=0; i < fieldNames.size(); ++i) { - if (!fieldNames.get(i).equals(castOther.fieldNames.get(i))) { - return false; - } - } - } - return true; - } - - /** - * Get the maximum id assigned to this type or its children. - * The first call will cause all of the the ids in tree to be assigned, so - * it should not be called before the type is completely built. - * @return the maximum id assigned under this type - */ - public int getMaximumId() { - // if the id hasn't been assigned, assign all of the ids from the root - if (maxId == -1) { - TypeDescription root = this; - while (root.parent != null) { - root = root.parent; - } - root.assignIds(0); - } - return maxId; - } - - private ColumnVector createColumn(int maxSize) { - switch (category) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DATE: - return new LongColumnVector(maxSize); - case TIMESTAMP: - return new TimestampColumnVector(maxSize); - case FLOAT: - case DOUBLE: - return new DoubleColumnVector(maxSize); - case DECIMAL: - return new DecimalColumnVector(maxSize, precision, scale); - case STRING: - case BINARY: - case CHAR: - case VARCHAR: - return new BytesColumnVector(maxSize); - case STRUCT: { - ColumnVector[] fieldVector = new ColumnVector[children.size()]; - for(int i=0; i < fieldVector.length; ++i) { - fieldVector[i] = children.get(i).createColumn(maxSize); - } - return new StructColumnVector(maxSize, - fieldVector); - } - case UNION: { - ColumnVector[] fieldVector = new ColumnVector[children.size()]; - for(int i=0; i < fieldVector.length; ++i) { - fieldVector[i] = children.get(i).createColumn(maxSize); - } - return new UnionColumnVector(maxSize, - fieldVector); - } - case LIST: - return new ListColumnVector(maxSize, - children.get(0).createColumn(maxSize)); - case MAP: - return new MapColumnVector(maxSize, - children.get(0).createColumn(maxSize), - children.get(1).createColumn(maxSize)); - default: - throw new IllegalArgumentException("Unknown type " + category); - } - } - - public VectorizedRowBatch createRowBatch(int maxSize) { - VectorizedRowBatch result; - if (category == Category.STRUCT) { - result = new VectorizedRowBatch(children.size(), maxSize); - for(int i=0; i < result.cols.length; ++i) { - result.cols[i] = children.get(i).createColumn(maxSize); - } - } else { - result = new VectorizedRowBatch(1, maxSize); - result.cols[0] = createColumn(maxSize); - } - result.reset(); - return result; - } - - public VectorizedRowBatch createRowBatch() { - return createRowBatch(VectorizedRowBatch.DEFAULT_SIZE); - } - - /** - * Get the kind of this type. - * @return get the category for this type. - */ - public Category getCategory() { - return category; - } - - /** - * Get the maximum length of the type. Only used for char and varchar types. - * @return the maximum length of the string type - */ - public int getMaxLength() { - return maxLength; - } - - /** - * Get the precision of the decimal type. - * @return the number of digits for the precision. - */ - public int getPrecision() { - return precision; - } - - /** - * Get the scale of the decimal type. - * @return the number of digits for the scale. - */ - public int getScale() { - return scale; - } - - /** - * For struct types, get the list of field names. - * @return the list of field names. - */ - public List<String> getFieldNames() { - return Collections.unmodifiableList(fieldNames); - } - - /** - * Get the subtypes of this type. - * @return the list of children types - */ - public List<TypeDescription> getChildren() { - return children == null ? null : Collections.unmodifiableList(children); - } - - /** - * Assign ids to all of the nodes under this one. - * @param startId the lowest id to assign - * @return the next available id - */ - private int assignIds(int startId) { - id = startId++; - if (children != null) { - for (TypeDescription child : children) { - startId = child.assignIds(startId); - } - } - maxId = startId - 1; - return startId; - } - - private TypeDescription(Category category) { - this.category = category; - if (category.isPrimitive) { - children = null; - } else { - children = new ArrayList<>(); - } - if (category == Category.STRUCT) { - fieldNames = new ArrayList<>(); - } else { - fieldNames = null; - } - } - - private int id = -1; - private int maxId = -1; - private TypeDescription parent; - private final Category category; - private final List<TypeDescription> children; - private final List<String> fieldNames; - private int maxLength = DEFAULT_LENGTH; - private int precision = DEFAULT_PRECISION; - private int scale = DEFAULT_SCALE; - - public void printToBuffer(StringBuilder buffer) { - buffer.append(category.name); - switch (category) { - case DECIMAL: - buffer.append('('); - buffer.append(precision); - buffer.append(','); - buffer.append(scale); - buffer.append(')'); - break; - case CHAR: - case VARCHAR: - buffer.append('('); - buffer.append(maxLength); - buffer.append(')'); - break; - case LIST: - case MAP: - case UNION: - buffer.append('<'); - for(int i=0; i < children.size(); ++i) { - if (i != 0) { - buffer.append(','); - } - children.get(i).printToBuffer(buffer); - } - buffer.append('>'); - break; - case STRUCT: - buffer.append('<'); - for(int i=0; i < children.size(); ++i) { - if (i != 0) { - buffer.append(','); - } - buffer.append(fieldNames.get(i)); - buffer.append(':'); - children.get(i).printToBuffer(buffer); - } - buffer.append('>'); - break; - default: - break; - } - } - - public String toString() { - StringBuilder buffer = new StringBuilder(); - printToBuffer(buffer); - return buffer.toString(); - } - - private void printJsonToBuffer(String prefix, StringBuilder buffer, - int indent) { - for(int i=0; i < indent; ++i) { - buffer.append(' '); - } - buffer.append(prefix); - buffer.append("{\"category\": \""); - buffer.append(category.name); - buffer.append("\", \"id\": "); - buffer.append(getId()); - buffer.append(", \"max\": "); - buffer.append(maxId); - switch (category) { - case DECIMAL: - buffer.append(", \"precision\": "); - buffer.append(precision); - buffer.append(", \"scale\": "); - buffer.append(scale); - break; - case CHAR: - case VARCHAR: - buffer.append(", \"length\": "); - buffer.append(maxLength); - break; - case LIST: - case MAP: - case UNION: - buffer.append(", \"children\": ["); - for(int i=0; i < children.size(); ++i) { - buffer.append('\n'); - children.get(i).printJsonToBuffer("", buffer, indent + 2); - if (i != children.size() - 1) { - buffer.append(','); - } - } - buffer.append("]"); - break; - case STRUCT: - buffer.append(", \"fields\": ["); - for(int i=0; i < children.size(); ++i) { - buffer.append('\n'); - children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ", - buffer, indent + 2); - if (i != children.size() - 1) { - buffer.append(','); - } - } - buffer.append(']'); - break; - default: - break; - } - buffer.append('}'); - } - - public String toJson() { - StringBuilder buffer = new StringBuilder(); - printJsonToBuffer("", buffer, 0); - return buffer.toString(); - } - - /** - * Locate a subtype by its id. - * @param goal the column id to look for - * @return the subtype - */ - public TypeDescription findSubtype(int goal) { - // call getId method to make sure the ids are assigned - int id = getId(); - if (goal < id || goal > maxId) { - throw new IllegalArgumentException("Unknown type id " + id + " in " + - toJson()); - } - if (goal == id) { - return this; - } else { - TypeDescription prev = null; - for(TypeDescription next: children) { - if (next.id > goal) { - return prev.findSubtype(goal); - } - prev = next; - } - return prev.findSubtype(goal); - } - }} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/Writer.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/Writer.java b/orc/src/java/org/apache/orc/Writer.java deleted file mode 100644 index 4492062..0000000 --- a/orc/src/java/org/apache/orc/Writer.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc; - -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.List; - -import org.apache.orc.OrcProto; -import org.apache.orc.StripeInformation; -import org.apache.orc.TypeDescription; - -/** - * The interface for writing ORC files. - */ -public interface Writer { - - /** - * Get the schema for this writer - * @return the file schema - */ - TypeDescription getSchema(); - - /** - * Add arbitrary meta-data to the ORC file. This may be called at any point - * until the Writer is closed. If the same key is passed a second time, the - * second value will replace the first. - * @param key a key to label the data with. - * @param value the contents of the metadata. - */ - void addUserMetadata(String key, ByteBuffer value); - - /** - * Add a row batch to the ORC file. - * @param batch the rows to add - */ - void addRowBatch(VectorizedRowBatch batch) throws IOException; - - /** - * Flush all of the buffers and close the file. No methods on this writer - * should be called afterwards. - * @throws IOException - */ - void close() throws IOException; - - /** - * Return the deserialized data size. Raw data size will be compute when - * writing the file footer. Hence raw data size value will be available only - * after closing the writer. - * - * @return raw data size - */ - long getRawDataSize(); - - /** - * Return the number of rows in file. Row count gets updated when flushing - * the stripes. To get accurate row count this method should be called after - * closing the writer. - * - * @return row count - */ - long getNumberOfRows(); - - /** - * Write an intermediate footer on the file such that if the file is - * truncated to the returned offset, it would be a valid ORC file. - * @return the offset that would be a valid end location for an ORC file - */ - long writeIntermediateFooter() throws IOException; - - /** - * Fast stripe append to ORC file. This interface is used for fast ORC file - * merge with other ORC files. When merging, the file to be merged should pass - * stripe in binary form along with stripe information and stripe statistics. - * After appending last stripe of a file, use appendUserMetadata() to append - * any user metadata. - * @param stripe - stripe as byte array - * @param offset - offset within byte array - * @param length - length of stripe within byte array - * @param stripeInfo - stripe information - * @param stripeStatistics - stripe statistics (Protobuf objects can be - * merged directly) - * @throws IOException - */ - public void appendStripe(byte[] stripe, int offset, int length, - StripeInformation stripeInfo, - OrcProto.StripeStatistics stripeStatistics) throws IOException; - - /** - * When fast stripe append is used for merging ORC stripes, after appending - * the last stripe from a file, this interface must be used to merge any - * user metadata. - * @param userMetadata - user metadata - */ - public void appendUserMetadata(List<OrcProto.UserMetadataItem> userMetadata); -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/AcidStats.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/impl/AcidStats.java b/orc/src/java/org/apache/orc/impl/AcidStats.java deleted file mode 100644 index 6657fe9..0000000 --- a/orc/src/java/org/apache/orc/impl/AcidStats.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p/> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p/> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.orc.impl; - -/** - * Statistics about the ACID operations in an ORC file - */ -public class AcidStats { - public long inserts; - public long updates; - public long deletes; - - public AcidStats() { - inserts = 0; - updates = 0; - deletes = 0; - } - - public AcidStats(String serialized) { - String[] parts = serialized.split(","); - inserts = Long.parseLong(parts[0]); - updates = Long.parseLong(parts[1]); - deletes = Long.parseLong(parts[2]); - } - - public String serialize() { - StringBuilder builder = new StringBuilder(); - builder.append(inserts); - builder.append(","); - builder.append(updates); - builder.append(","); - builder.append(deletes); - return builder.toString(); - } - - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - builder.append(" inserts: ").append(inserts); - builder.append(" updates: ").append(updates); - builder.append(" deletes: ").append(deletes); - return builder.toString(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/BitFieldReader.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/impl/BitFieldReader.java b/orc/src/java/org/apache/orc/impl/BitFieldReader.java deleted file mode 100644 index dda7355..0000000 --- a/orc/src/java/org/apache/orc/impl/BitFieldReader.java +++ /dev/null @@ -1,217 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.orc.impl; - -import java.io.EOFException; -import java.io.IOException; - -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.orc.impl.InStream; -import org.apache.orc.impl.PositionProvider; -import org.apache.orc.impl.RunLengthByteReader; - -public class BitFieldReader { - private final RunLengthByteReader input; - /** The number of bits in one item. Non-test code always uses 1. */ - private final int bitSize; - private int current; - private int bitsLeft; - private final int mask; - - public BitFieldReader(InStream input, - int bitSize) throws IOException { - this.input = new RunLengthByteReader(input); - this.bitSize = bitSize; - mask = (1 << bitSize) - 1; - } - - public void setInStream(InStream inStream) { - this.input.setInStream(inStream); - } - - private void readByte() throws IOException { - if (input.hasNext()) { - current = 0xff & input.next(); - bitsLeft = 8; - } else { - throw new EOFException("Read past end of bit field from " + this); - } - } - - public int next() throws IOException { - int result = 0; - int bitsLeftToRead = bitSize; - while (bitsLeftToRead > bitsLeft) { - result <<= bitsLeft; - result |= current & ((1 << bitsLeft) - 1); - bitsLeftToRead -= bitsLeft; - readByte(); - } - if (bitsLeftToRead > 0) { - result <<= bitsLeftToRead; - bitsLeft -= bitsLeftToRead; - result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1); - } - return result & mask; - } - - /** - * Unlike integer readers, where runs are encoded explicitly, in this one we have to read ahead - * to figure out whether we have a run. Given that runs in booleans are likely it's worth it. - * However it means we'd need to keep track of how many bytes we read, and next/nextVector won't - * work anymore once this is called. These is trivial to fix, but these are never interspersed. - */ - private boolean lastRunValue; - private int lastRunLength = -1; - private void readNextRun(int maxRunLength) throws IOException { - assert bitSize == 1; - if (lastRunLength > 0) return; // last run is not exhausted yet - if (bitsLeft == 0) { - readByte(); - } - // First take care of the partial bits. - boolean hasVal = false; - int runLength = 0; - if (bitsLeft != 8) { - int partialBitsMask = (1 << bitsLeft) - 1; - int partialBits = current & partialBitsMask; - if (partialBits == partialBitsMask || partialBits == 0) { - lastRunValue = (partialBits == partialBitsMask); - if (maxRunLength <= bitsLeft) { - lastRunLength = maxRunLength; - return; - } - maxRunLength -= bitsLeft; - hasVal = true; - runLength = bitsLeft; - bitsLeft = 0; - } else { - // There's no run in partial bits. Return whatever we have. - int prefixBitsCount = 32 - bitsLeft; - runLength = Integer.numberOfLeadingZeros(partialBits) - prefixBitsCount; - lastRunValue = (runLength > 0); - lastRunLength = Math.min(maxRunLength, lastRunValue ? runLength : - (Integer.numberOfLeadingZeros(~(partialBits | ~partialBitsMask)) - prefixBitsCount)); - return; - } - assert bitsLeft == 0; - readByte(); - } - if (!hasVal) { - lastRunValue = ((current >> 7) == 1); - hasVal = true; - } - // Read full bytes until the run ends. - assert bitsLeft == 8; - while (maxRunLength >= 8 - && ((lastRunValue && (current == 0xff)) || (!lastRunValue && (current == 0)))) { - runLength += 8; - maxRunLength -= 8; - readByte(); - } - if (maxRunLength > 0) { - int extraBits = Integer.numberOfLeadingZeros( - lastRunValue ? (~(current | ~255)) : current) - 24; - bitsLeft -= extraBits; - runLength += extraBits; - } - lastRunLength = runLength; - } - - public void nextVector(LongColumnVector previous, - long previousLen) throws IOException { - previous.isRepeating = true; - for (int i = 0; i < previousLen; i++) { - if (previous.noNulls || !previous.isNull[i]) { - previous.vector[i] = next(); - } else { - // The default value of null for int types in vectorized - // processing is 1, so set that if the value is null - previous.vector[i] = 1; - } - - // The default value for nulls in Vectorization for int types is 1 - // and given that non null value can also be 1, we need to check for isNull also - // when determining the isRepeating flag. - if (previous.isRepeating - && i > 0 - && ((previous.vector[0] != previous.vector[i]) || - (previous.isNull[0] != previous.isNull[i]))) { - previous.isRepeating = false; - } - } - } - - public void seek(PositionProvider index) throws IOException { - input.seek(index); - int consumed = (int) index.getNext(); - if (consumed > 8) { - throw new IllegalArgumentException("Seek past end of byte at " + - consumed + " in " + input); - } else if (consumed != 0) { - readByte(); - bitsLeft = 8 - consumed; - } else { - bitsLeft = 0; - } - } - - public void skip(long items) throws IOException { - long totalBits = bitSize * items; - if (bitsLeft >= totalBits) { - bitsLeft -= totalBits; - } else { - totalBits -= bitsLeft; - input.skip(totalBits / 8); - current = input.next(); - bitsLeft = (int) (8 - (totalBits % 8)); - } - } - - @Override - public String toString() { - return "bit reader current: " + current + " bits left: " + bitsLeft + - " bit size: " + bitSize + " from " + input; - } - - boolean hasFullByte() { - return bitsLeft == 8 || bitsLeft == 0; - } - - int peekOneBit() throws IOException { - assert bitSize == 1; - if (bitsLeft == 0) { - readByte(); - } - return (current >>> (bitsLeft - 1)) & 1; - } - - int peekFullByte() throws IOException { - assert bitSize == 1; - assert bitsLeft == 8 || bitsLeft == 0; - if (bitsLeft == 0) { - readByte(); - } - return current; - } - - void skipInCurrentByte(int bits) throws IOException { - assert bitsLeft >= bits; - bitsLeft -= bits; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/BitFieldWriter.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java b/orc/src/java/org/apache/orc/impl/BitFieldWriter.java deleted file mode 100644 index aa5f886..0000000 --- a/orc/src/java/org/apache/orc/impl/BitFieldWriter.java +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.orc.impl; - -import org.apache.orc.impl.PositionRecorder; -import org.apache.orc.impl.PositionedOutputStream; -import org.apache.orc.impl.RunLengthByteWriter; - -import java.io.IOException; - -public class BitFieldWriter { - private RunLengthByteWriter output; - private final int bitSize; - private byte current = 0; - private int bitsLeft = 8; - - public BitFieldWriter(PositionedOutputStream output, - int bitSize) throws IOException { - this.output = new RunLengthByteWriter(output); - this.bitSize = bitSize; - } - - private void writeByte() throws IOException { - output.write(current); - current = 0; - bitsLeft = 8; - } - - public void flush() throws IOException { - if (bitsLeft != 8) { - writeByte(); - } - output.flush(); - } - - public void write(int value) throws IOException { - int bitsToWrite = bitSize; - while (bitsToWrite > bitsLeft) { - // add the bits to the bottom of the current word - current |= value >>> (bitsToWrite - bitsLeft); - // subtract out the bits we just added - bitsToWrite -= bitsLeft; - // zero out the bits above bitsToWrite - value &= (1 << bitsToWrite) - 1; - writeByte(); - } - bitsLeft -= bitsToWrite; - current |= value << bitsLeft; - if (bitsLeft == 0) { - writeByte(); - } - } - - public void getPosition(PositionRecorder recorder) throws IOException { - output.getPosition(recorder); - recorder.addPosition(8 - bitsLeft); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/BufferChunk.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/impl/BufferChunk.java b/orc/src/java/org/apache/orc/impl/BufferChunk.java deleted file mode 100644 index da43b96..0000000 --- a/orc/src/java/org/apache/orc/impl/BufferChunk.java +++ /dev/null @@ -1,85 +0,0 @@ -package org.apache.orc.impl; - -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.hadoop.hive.common.io.DiskRange; -import org.apache.hadoop.hive.common.io.DiskRangeList; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.nio.ByteBuffer; - -/** - * The sections of stripe that we have read. - * This might not match diskRange - 1 disk range can be multiple buffer chunks, - * depending on DFS block boundaries. - */ -public class BufferChunk extends DiskRangeList { - - private static final Logger LOG = - LoggerFactory.getLogger(BufferChunk.class); - final ByteBuffer chunk; - - public BufferChunk(ByteBuffer chunk, long offset) { - super(offset, offset + chunk.remaining()); - this.chunk = chunk; - } - - public ByteBuffer getChunk() { - return chunk; - } - - @Override - public boolean hasData() { - return chunk != null; - } - - @Override - public final String toString() { - boolean makesSense = chunk.remaining() == (end - offset); - return "data range [" + offset + ", " + end + "), size: " + chunk.remaining() - + (makesSense ? "" : "(!)") + " type: " + - (chunk.isDirect() ? "direct" : "array-backed"); - } - - @Override - public DiskRange sliceAndShift(long offset, long end, long shiftBy) { - assert offset <= end && offset >= this.offset && end <= this.end; - assert offset + shiftBy >= 0; - ByteBuffer sliceBuf = chunk.slice(); - int newPos = (int) (offset - this.offset); - int newLimit = newPos + (int) (end - offset); - try { - sliceBuf.position(newPos); - sliceBuf.limit(newLimit); - } catch (Throwable t) { - LOG.error("Failed to slice buffer chunk with range" + " [" + this.offset + ", " + this.end - + "), position: " + chunk.position() + " limit: " + chunk.limit() + ", " - + (chunk.isDirect() ? "direct" : "array") + "; to [" + offset + ", " + end + ") " - + t.getClass()); - throw new RuntimeException(t); - } - return new BufferChunk(sliceBuf, offset + shiftBy); - } - - @Override - public ByteBuffer getData() { - return chunk; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/d7f71fb4/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java ---------------------------------------------------------------------- diff --git a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java deleted file mode 100644 index 1118c5c..0000000 --- a/orc/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ /dev/null @@ -1,1101 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.orc.impl; - -import java.sql.Date; -import java.sql.Timestamp; - -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparator; -import org.apache.orc.BinaryColumnStatistics; -import org.apache.orc.BooleanColumnStatistics; -import org.apache.orc.ColumnStatistics; -import org.apache.orc.DateColumnStatistics; -import org.apache.orc.DecimalColumnStatistics; -import org.apache.orc.DoubleColumnStatistics; -import org.apache.orc.IntegerColumnStatistics; -import org.apache.orc.OrcProto; -import org.apache.orc.StringColumnStatistics; -import org.apache.orc.TimestampColumnStatistics; -import org.apache.orc.TypeDescription; - -public class ColumnStatisticsImpl implements ColumnStatistics { - - private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl - implements BooleanColumnStatistics { - private long trueCount = 0; - - BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.BucketStatistics bkt = stats.getBucketStatistics(); - trueCount = bkt.getCount(0); - } - - BooleanStatisticsImpl() { - } - - @Override - public void reset() { - super.reset(); - trueCount = 0; - } - - @Override - public void updateBoolean(boolean value, int repetitions) { - if (value) { - trueCount += repetitions; - } - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof BooleanStatisticsImpl) { - BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other; - trueCount += bkt.trueCount; - } else { - if (isStatsExists() && trueCount != 0) { - throw new IllegalArgumentException("Incompatible merging of boolean column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = super.serialize(); - OrcProto.BucketStatistics.Builder bucket = - OrcProto.BucketStatistics.newBuilder(); - bucket.addCount(trueCount); - builder.setBucketStatistics(bucket); - return builder; - } - - @Override - public long getFalseCount() { - return getNumberOfValues() - trueCount; - } - - @Override - public long getTrueCount() { - return trueCount; - } - - @Override - public String toString() { - return super.toString() + " true: " + trueCount; - } - } - - private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl - implements IntegerColumnStatistics { - - private long minimum = Long.MAX_VALUE; - private long maximum = Long.MIN_VALUE; - private long sum = 0; - private boolean hasMinimum = false; - private boolean overflow = false; - - IntegerStatisticsImpl() { - } - - IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.IntegerStatistics intStat = stats.getIntStatistics(); - if (intStat.hasMinimum()) { - hasMinimum = true; - minimum = intStat.getMinimum(); - } - if (intStat.hasMaximum()) { - maximum = intStat.getMaximum(); - } - if (intStat.hasSum()) { - sum = intStat.getSum(); - } else { - overflow = true; - } - } - - @Override - public void reset() { - super.reset(); - hasMinimum = false; - minimum = Long.MAX_VALUE; - maximum = Long.MIN_VALUE; - sum = 0; - overflow = false; - } - - @Override - public void updateInteger(long value, int repetitions) { - if (!hasMinimum) { - hasMinimum = true; - minimum = value; - maximum = value; - } else if (value < minimum) { - minimum = value; - } else if (value > maximum) { - maximum = value; - } - if (!overflow) { - boolean wasPositive = sum >= 0; - sum += value * repetitions; - if ((value >= 0) == wasPositive) { - overflow = (sum >= 0) != wasPositive; - } - } - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof IntegerStatisticsImpl) { - IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other; - if (!hasMinimum) { - hasMinimum = otherInt.hasMinimum; - minimum = otherInt.minimum; - maximum = otherInt.maximum; - } else if (otherInt.hasMinimum) { - if (otherInt.minimum < minimum) { - minimum = otherInt.minimum; - } - if (otherInt.maximum > maximum) { - maximum = otherInt.maximum; - } - } - - overflow |= otherInt.overflow; - if (!overflow) { - boolean wasPositive = sum >= 0; - sum += otherInt.sum; - if ((otherInt.sum >= 0) == wasPositive) { - overflow = (sum >= 0) != wasPositive; - } - } - } else { - if (isStatsExists() && hasMinimum) { - throw new IllegalArgumentException("Incompatible merging of integer column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = super.serialize(); - OrcProto.IntegerStatistics.Builder intb = - OrcProto.IntegerStatistics.newBuilder(); - if (hasMinimum) { - intb.setMinimum(minimum); - intb.setMaximum(maximum); - } - if (!overflow) { - intb.setSum(sum); - } - builder.setIntStatistics(intb); - return builder; - } - - @Override - public long getMinimum() { - return minimum; - } - - @Override - public long getMaximum() { - return maximum; - } - - @Override - public boolean isSumDefined() { - return !overflow; - } - - @Override - public long getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (hasMinimum) { - buf.append(" min: "); - buf.append(minimum); - buf.append(" max: "); - buf.append(maximum); - } - if (!overflow) { - buf.append(" sum: "); - buf.append(sum); - } - return buf.toString(); - } - } - - private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl - implements DoubleColumnStatistics { - private boolean hasMinimum = false; - private double minimum = Double.MAX_VALUE; - private double maximum = Double.MIN_VALUE; - private double sum = 0; - - DoubleStatisticsImpl() { - } - - DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics(); - if (dbl.hasMinimum()) { - hasMinimum = true; - minimum = dbl.getMinimum(); - } - if (dbl.hasMaximum()) { - maximum = dbl.getMaximum(); - } - if (dbl.hasSum()) { - sum = dbl.getSum(); - } - } - - @Override - public void reset() { - super.reset(); - hasMinimum = false; - minimum = Double.MAX_VALUE; - maximum = Double.MIN_VALUE; - sum = 0; - } - - @Override - public void updateDouble(double value) { - if (!hasMinimum) { - hasMinimum = true; - minimum = value; - maximum = value; - } else if (value < minimum) { - minimum = value; - } else if (value > maximum) { - maximum = value; - } - sum += value; - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof DoubleStatisticsImpl) { - DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other; - if (!hasMinimum) { - hasMinimum = dbl.hasMinimum; - minimum = dbl.minimum; - maximum = dbl.maximum; - } else if (dbl.hasMinimum) { - if (dbl.minimum < minimum) { - minimum = dbl.minimum; - } - if (dbl.maximum > maximum) { - maximum = dbl.maximum; - } - } - sum += dbl.sum; - } else { - if (isStatsExists() && hasMinimum) { - throw new IllegalArgumentException("Incompatible merging of double column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = super.serialize(); - OrcProto.DoubleStatistics.Builder dbl = - OrcProto.DoubleStatistics.newBuilder(); - if (hasMinimum) { - dbl.setMinimum(minimum); - dbl.setMaximum(maximum); - } - dbl.setSum(sum); - builder.setDoubleStatistics(dbl); - return builder; - } - - @Override - public double getMinimum() { - return minimum; - } - - @Override - public double getMaximum() { - return maximum; - } - - @Override - public double getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (hasMinimum) { - buf.append(" min: "); - buf.append(minimum); - buf.append(" max: "); - buf.append(maximum); - } - buf.append(" sum: "); - buf.append(sum); - return buf.toString(); - } - } - - protected static final class StringStatisticsImpl extends ColumnStatisticsImpl - implements StringColumnStatistics { - private Text minimum = null; - private Text maximum = null; - private long sum = 0; - - StringStatisticsImpl() { - } - - StringStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.StringStatistics str = stats.getStringStatistics(); - if (str.hasMaximum()) { - maximum = new Text(str.getMaximum()); - } - if (str.hasMinimum()) { - minimum = new Text(str.getMinimum()); - } - if(str.hasSum()) { - sum = str.getSum(); - } - } - - @Override - public void reset() { - super.reset(); - minimum = null; - maximum = null; - sum = 0; - } - - @Override - public void updateString(Text value) { - if (minimum == null) { - maximum = minimum = new Text(value); - } else if (minimum.compareTo(value) > 0) { - minimum = new Text(value); - } else if (maximum.compareTo(value) < 0) { - maximum = new Text(value); - } - sum += value.getLength(); - } - - @Override - public void updateString(byte[] bytes, int offset, int length, - int repetitions) { - if (minimum == null) { - maximum = minimum = new Text(); - maximum.set(bytes, offset, length); - } else if (WritableComparator.compareBytes(minimum.getBytes(), 0, - minimum.getLength(), bytes, offset, length) > 0) { - minimum = new Text(); - minimum.set(bytes, offset, length); - } else if (WritableComparator.compareBytes(maximum.getBytes(), 0, - maximum.getLength(), bytes, offset, length) < 0) { - maximum = new Text(); - maximum.set(bytes, offset, length); - } - sum += length * repetitions; - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof StringStatisticsImpl) { - StringStatisticsImpl str = (StringStatisticsImpl) other; - if (minimum == null) { - if (str.minimum != null) { - maximum = new Text(str.getMaximum()); - minimum = new Text(str.getMinimum()); - } else { - /* both are empty */ - maximum = minimum = null; - } - } else if (str.minimum != null) { - if (minimum.compareTo(str.minimum) > 0) { - minimum = new Text(str.getMinimum()); - } - if (maximum.compareTo(str.maximum) < 0) { - maximum = new Text(str.getMaximum()); - } - } - sum += str.sum; - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of string column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.StringStatistics.Builder str = - OrcProto.StringStatistics.newBuilder(); - if (getNumberOfValues() != 0) { - str.setMinimum(getMinimum()); - str.setMaximum(getMaximum()); - str.setSum(sum); - } - result.setStringStatistics(str); - return result; - } - - @Override - public String getMinimum() { - return minimum == null ? null : minimum.toString(); - } - - @Override - public String getMaximum() { - return maximum == null ? null : maximum.toString(); - } - - @Override - public long getSum() { - return sum; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(getMinimum()); - buf.append(" max: "); - buf.append(getMaximum()); - buf.append(" sum: "); - buf.append(sum); - } - return buf.toString(); - } - } - - protected static final class BinaryStatisticsImpl extends ColumnStatisticsImpl implements - BinaryColumnStatistics { - - private long sum = 0; - - BinaryStatisticsImpl() { - } - - BinaryStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.BinaryStatistics binStats = stats.getBinaryStatistics(); - if (binStats.hasSum()) { - sum = binStats.getSum(); - } - } - - @Override - public void reset() { - super.reset(); - sum = 0; - } - - @Override - public void updateBinary(BytesWritable value) { - sum += value.getLength(); - } - - @Override - public void updateBinary(byte[] bytes, int offset, int length, - int repetitions) { - sum += length * repetitions; - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof BinaryColumnStatistics) { - BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other; - sum += bin.sum; - } else { - if (isStatsExists() && sum != 0) { - throw new IllegalArgumentException("Incompatible merging of binary column statistics"); - } - } - super.merge(other); - } - - @Override - public long getSum() { - return sum; - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder(); - bin.setSum(sum); - result.setBinaryStatistics(bin); - return result; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" sum: "); - buf.append(sum); - } - return buf.toString(); - } - } - - private static final class DecimalStatisticsImpl extends ColumnStatisticsImpl - implements DecimalColumnStatistics { - - // These objects are mutable for better performance. - private HiveDecimalWritable minimum = null; - private HiveDecimalWritable maximum = null; - private HiveDecimalWritable sum = new HiveDecimalWritable(0); - - DecimalStatisticsImpl() { - } - - DecimalStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.DecimalStatistics dec = stats.getDecimalStatistics(); - if (dec.hasMaximum()) { - maximum = new HiveDecimalWritable(dec.getMaximum()); - } - if (dec.hasMinimum()) { - minimum = new HiveDecimalWritable(dec.getMinimum()); - } - if (dec.hasSum()) { - sum = new HiveDecimalWritable(dec.getSum()); - } else { - sum = null; - } - } - - @Override - public void reset() { - super.reset(); - minimum = null; - maximum = null; - sum = new HiveDecimalWritable(0); - } - - @Override - public void updateDecimal(HiveDecimalWritable value) { - if (minimum == null) { - minimum = new HiveDecimalWritable(value); - maximum = new HiveDecimalWritable(value); - } else if (minimum.compareTo(value) > 0) { - minimum.set(value); - } else if (maximum.compareTo(value) < 0) { - maximum.set(value); - } - if (sum != null) { - sum.mutateAdd(value); - } - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof DecimalStatisticsImpl) { - DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other; - if (minimum == null) { - minimum = (dec.minimum != null ? new HiveDecimalWritable(dec.minimum) : null); - maximum = (dec.maximum != null ? new HiveDecimalWritable(dec.maximum) : null); - sum = dec.sum; - } else if (dec.minimum != null) { - if (minimum.compareTo(dec.minimum) > 0) { - minimum.set(dec.minimum); - } - if (maximum.compareTo(dec.maximum) < 0) { - maximum.set(dec.maximum); - } - if (sum == null || dec.sum == null) { - sum = null; - } else { - sum.mutateAdd(dec.sum); - } - } - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of decimal column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.DecimalStatistics.Builder dec = - OrcProto.DecimalStatistics.newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { - dec.setMinimum(minimum.toString()); - dec.setMaximum(maximum.toString()); - } - // Check isSet for overflow. - if (sum != null && sum.isSet()) { - dec.setSum(sum.toString()); - } - result.setDecimalStatistics(dec); - return result; - } - - @Override - public HiveDecimal getMinimum() { - return minimum.getHiveDecimal(); - } - - @Override - public HiveDecimal getMaximum() { - return maximum.getHiveDecimal(); - } - - @Override - public HiveDecimal getSum() { - return sum.getHiveDecimal(); - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(minimum); - buf.append(" max: "); - buf.append(maximum); - if (sum != null) { - buf.append(" sum: "); - buf.append(sum); - } - } - return buf.toString(); - } - } - - private static final class DateStatisticsImpl extends ColumnStatisticsImpl - implements DateColumnStatistics { - private Integer minimum = null; - private Integer maximum = null; - - DateStatisticsImpl() { - } - - DateStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.DateStatistics dateStats = stats.getDateStatistics(); - // min,max values serialized/deserialized as int (days since epoch) - if (dateStats.hasMaximum()) { - maximum = dateStats.getMaximum(); - } - if (dateStats.hasMinimum()) { - minimum = dateStats.getMinimum(); - } - } - - @Override - public void reset() { - super.reset(); - minimum = null; - maximum = null; - } - - @Override - public void updateDate(DateWritable value) { - if (minimum == null) { - minimum = value.getDays(); - maximum = value.getDays(); - } else if (minimum > value.getDays()) { - minimum = value.getDays(); - } else if (maximum < value.getDays()) { - maximum = value.getDays(); - } - } - - @Override - public void updateDate(int value) { - if (minimum == null) { - minimum = value; - maximum = value; - } else if (minimum > value) { - minimum = value; - } else if (maximum < value) { - maximum = value; - } - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof DateStatisticsImpl) { - DateStatisticsImpl dateStats = (DateStatisticsImpl) other; - if (minimum == null) { - minimum = dateStats.minimum; - maximum = dateStats.maximum; - } else if (dateStats.minimum != null) { - if (minimum > dateStats.minimum) { - minimum = dateStats.minimum; - } - if (maximum < dateStats.maximum) { - maximum = dateStats.maximum; - } - } - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of date column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.DateStatistics.Builder dateStats = - OrcProto.DateStatistics.newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { - dateStats.setMinimum(minimum); - dateStats.setMaximum(maximum); - } - result.setDateStatistics(dateStats); - return result; - } - - private transient final DateWritable minDate = new DateWritable(); - private transient final DateWritable maxDate = new DateWritable(); - - @Override - public Date getMinimum() { - if (minimum == null) { - return null; - } - minDate.set(minimum); - return minDate.get(); - } - - @Override - public Date getMaximum() { - if (maximum == null) { - return null; - } - maxDate.set(maximum); - return maxDate.get(); - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(getMinimum()); - buf.append(" max: "); - buf.append(getMaximum()); - } - return buf.toString(); - } - } - - private static final class TimestampStatisticsImpl extends ColumnStatisticsImpl - implements TimestampColumnStatistics { - private Long minimum = null; - private Long maximum = null; - - TimestampStatisticsImpl() { - } - - TimestampStatisticsImpl(OrcProto.ColumnStatistics stats) { - super(stats); - OrcProto.TimestampStatistics timestampStats = stats.getTimestampStatistics(); - // min,max values serialized/deserialized as int (milliseconds since epoch) - if (timestampStats.hasMaximum()) { - maximum = timestampStats.getMaximum(); - } - if (timestampStats.hasMinimum()) { - minimum = timestampStats.getMinimum(); - } - } - - @Override - public void reset() { - super.reset(); - minimum = null; - maximum = null; - } - - @Override - public void updateTimestamp(Timestamp value) { - if (minimum == null) { - minimum = value.getTime(); - maximum = value.getTime(); - } else if (minimum > value.getTime()) { - minimum = value.getTime(); - } else if (maximum < value.getTime()) { - maximum = value.getTime(); - } - } - - @Override - public void updateTimestamp(long value) { - if (minimum == null) { - minimum = value; - maximum = value; - } else if (minimum > value) { - minimum = value; - } else if (maximum < value) { - maximum = value; - } - } - - @Override - public void merge(ColumnStatisticsImpl other) { - if (other instanceof TimestampStatisticsImpl) { - TimestampStatisticsImpl timestampStats = (TimestampStatisticsImpl) other; - if (minimum == null) { - minimum = timestampStats.minimum; - maximum = timestampStats.maximum; - } else if (timestampStats.minimum != null) { - if (minimum > timestampStats.minimum) { - minimum = timestampStats.minimum; - } - if (maximum < timestampStats.maximum) { - maximum = timestampStats.maximum; - } - } - } else { - if (isStatsExists() && minimum != null) { - throw new IllegalArgumentException("Incompatible merging of timestamp column statistics"); - } - } - super.merge(other); - } - - @Override - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder result = super.serialize(); - OrcProto.TimestampStatistics.Builder timestampStats = OrcProto.TimestampStatistics - .newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { - timestampStats.setMinimum(minimum); - timestampStats.setMaximum(maximum); - } - result.setTimestampStatistics(timestampStats); - return result; - } - - @Override - public Timestamp getMinimum() { - return minimum == null ? null : new Timestamp(minimum); - } - - @Override - public Timestamp getMaximum() { - return maximum == null ? null : new Timestamp(maximum); - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(super.toString()); - if (getNumberOfValues() != 0) { - buf.append(" min: "); - buf.append(getMinimum()); - buf.append(" max: "); - buf.append(getMaximum()); - } - return buf.toString(); - } - } - - private long count = 0; - private boolean hasNull = false; - - ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) { - if (stats.hasNumberOfValues()) { - count = stats.getNumberOfValues(); - } - - if (stats.hasHasNull()) { - hasNull = stats.getHasNull(); - } else { - hasNull = true; - } - } - - ColumnStatisticsImpl() { - } - - public void increment() { - count += 1; - } - - public void increment(int count) { - this.count += count; - } - - public void setNull() { - hasNull = true; - } - - public void updateBoolean(boolean value, int repetitions) { - throw new UnsupportedOperationException("Can't update boolean"); - } - - public void updateInteger(long value, int repetitions) { - throw new UnsupportedOperationException("Can't update integer"); - } - - public void updateDouble(double value) { - throw new UnsupportedOperationException("Can't update double"); - } - - public void updateString(Text value) { - throw new UnsupportedOperationException("Can't update string"); - } - - public void updateString(byte[] bytes, int offset, int length, - int repetitions) { - throw new UnsupportedOperationException("Can't update string"); - } - - public void updateBinary(BytesWritable value) { - throw new UnsupportedOperationException("Can't update binary"); - } - - public void updateBinary(byte[] bytes, int offset, int length, - int repetitions) { - throw new UnsupportedOperationException("Can't update string"); - } - - public void updateDecimal(HiveDecimalWritable value) { - throw new UnsupportedOperationException("Can't update decimal"); - } - - public void updateDate(DateWritable value) { - throw new UnsupportedOperationException("Can't update date"); - } - - public void updateDate(int value) { - throw new UnsupportedOperationException("Can't update date"); - } - - public void updateTimestamp(Timestamp value) { - throw new UnsupportedOperationException("Can't update timestamp"); - } - - public void updateTimestamp(long value) { - throw new UnsupportedOperationException("Can't update timestamp"); - } - - public boolean isStatsExists() { - return (count > 0 || hasNull == true); - } - - public void merge(ColumnStatisticsImpl stats) { - count += stats.count; - hasNull |= stats.hasNull; - } - - public void reset() { - count = 0; - hasNull = false; - } - - @Override - public long getNumberOfValues() { - return count; - } - - @Override - public boolean hasNull() { - return hasNull; - } - - @Override - public String toString() { - return "count: " + count + " hasNull: " + hasNull; - } - - public OrcProto.ColumnStatistics.Builder serialize() { - OrcProto.ColumnStatistics.Builder builder = - OrcProto.ColumnStatistics.newBuilder(); - builder.setNumberOfValues(count); - builder.setHasNull(hasNull); - return builder; - } - - public static ColumnStatisticsImpl create(TypeDescription schema) { - switch (schema.getCategory()) { - case BOOLEAN: - return new BooleanStatisticsImpl(); - case BYTE: - case SHORT: - case INT: - case LONG: - return new IntegerStatisticsImpl(); - case FLOAT: - case DOUBLE: - return new DoubleStatisticsImpl(); - case STRING: - case CHAR: - case VARCHAR: - return new StringStatisticsImpl(); - case DECIMAL: - return new DecimalStatisticsImpl(); - case DATE: - return new DateStatisticsImpl(); - case TIMESTAMP: - return new TimestampStatisticsImpl(); - case BINARY: - return new BinaryStatisticsImpl(); - default: - return new ColumnStatisticsImpl(); - } - } - - public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) { - if (stats.hasBucketStatistics()) { - return new BooleanStatisticsImpl(stats); - } else if (stats.hasIntStatistics()) { - return new IntegerStatisticsImpl(stats); - } else if (stats.hasDoubleStatistics()) { - return new DoubleStatisticsImpl(stats); - } else if (stats.hasStringStatistics()) { - return new StringStatisticsImpl(stats); - } else if (stats.hasDecimalStatistics()) { - return new DecimalStatisticsImpl(stats); - } else if (stats.hasDateStatistics()) { - return new DateStatisticsImpl(stats); - } else if (stats.hasTimestampStatistics()) { - return new TimestampStatisticsImpl(stats); - } else if(stats.hasBinaryStatistics()) { - return new BinaryStatisticsImpl(stats); - } else { - return new ColumnStatisticsImpl(stats); - } - } -}
