http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/common/src/main/java/org/apache/kylin/common/util/Bytes.java ---------------------------------------------------------------------- diff --git a/common/src/main/java/org/apache/kylin/common/util/Bytes.java b/common/src/main/java/org/apache/kylin/common/util/Bytes.java deleted file mode 100644 index 0bb0af2..0000000 --- a/common/src/main/java/org/apache/kylin/common/util/Bytes.java +++ /dev/null @@ -1,2203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kylin.common.util; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.base.Preconditions.checkPositionIndex; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.lang.reflect.Field; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.Charset; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.security.SecureRandom; -import java.util.Arrays; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.io.RawComparator; -import org.apache.hadoop.io.WritableComparator; -import org.apache.hadoop.io.WritableUtils; - -import sun.misc.Unsafe; - -/** - * Utility class that handles byte arrays, conversions to/from other types, - * comparisons, hash code generation, manufacturing keys for HashMaps or - * HashSets, etc. - * - * Copied from org.apache.hadoop.hbase.util.Bytes - */ -public class Bytes { - //HConstants.UTF8_ENCODING should be updated if this changed - /** - * When we encode strings, we always specify UTF8 encoding - */ - private static final String UTF8_ENCODING = "UTF-8"; - - //HConstants.UTF8_CHARSET should be updated if this changed - /** - * When we encode strings, we always specify UTF8 encoding - */ - private static final Charset UTF8_CHARSET = Charset.forName(UTF8_ENCODING); - - //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed - private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; - - private static final Log LOG = LogFactory.getLog(Bytes.class); - - /** - * Size of boolean in bytes - */ - public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE; - - /** - * Size of byte in bytes - */ - public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN; - - /** - * Size of char in bytes - */ - public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE; - - /** - * Size of double in bytes - */ - public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE; - - /** - * Size of float in bytes - */ - public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE; - - /** - * Size of int in bytes - */ - public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE; - - /** - * Size of long in bytes - */ - public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE; - - /** - * Size of short in bytes - */ - public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE; - - /** - * Estimate of size cost to pay beyond payload in jvm for instance of byte []. - * Estimate based on study of jhat and jprofiler numbers. - */ - // JHat says BU is 56 bytes. - // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?) - public static final int ESTIMATED_HEAP_TAX = 16; - - /** - * Returns length of the byte array, returning 0 if the array is null. - * Useful for calculating sizes. - * - * @param b byte array, which can be null - * @return 0 if b is null, otherwise returns length - */ - final public static int len(byte[] b) { - return b == null ? 0 : b.length; - } - - /** - * Byte array comparator class. - */ - public static class ByteArrayComparator implements RawComparator<byte[]> { - /** - * Constructor - */ - public ByteArrayComparator() { - super(); - } - - @Override - public int compare(byte[] left, byte[] right) { - return compareTo(left, right); - } - - @Override - public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - return LexicographicalComparerHolder.BEST_COMPARER.compareTo(b1, s1, l1, b2, s2, l2); - } - } - - /** - * A {@link ByteArrayComparator} that treats the empty array as the largest value. - * This is useful for comparing row end keys for regions. - */ - // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region - // boundaries. Thus semantically, we should treat empty byte array as the smallest value - // while comparing row keys, start keys etc; but as the largest value for comparing - // region boundaries for endKeys. - public static class RowEndKeyComparator extends ByteArrayComparator { - @Override - public int compare(byte[] left, byte[] right) { - return compare(left, 0, left.length, right, 0, right.length); - } - - @Override - public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - if (b1 == b2 && s1 == s2 && l1 == l2) { - return 0; - } - if (l1 == 0) { - return l2; //0 or positive - } - if (l2 == 0) { - return -1; - } - return super.compare(b1, s1, l1, b2, s2, l2); - } - } - - /** - * Pass this to TreeMaps where byte [] are keys. - */ - public final static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator(); - - /** - * Use comparing byte arrays, byte-by-byte - */ - public final static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator(); - - /** - * Read byte-array written with a WritableableUtils.vint prefix. - * - * @param in Input to read from. - * @return byte array read off <code>in</code> - * @throws java.io.IOException e - */ - public static byte[] readByteArray(final DataInput in) throws IOException { - int len = WritableUtils.readVInt(in); - if (len < 0) { - throw new NegativeArraySizeException(Integer.toString(len)); - } - byte[] result = new byte[len]; - in.readFully(result, 0, len); - return result; - } - - /** - * Read byte-array written with a WritableableUtils.vint prefix. - * IOException is converted to a RuntimeException. - * - * @param in Input to read from. - * @return byte array read off <code>in</code> - */ - public static byte[] readByteArrayThrowsRuntime(final DataInput in) { - try { - return readByteArray(in); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Write byte-array with a WritableableUtils.vint prefix. - * - * @param out output stream to be written to - * @param b array to write - * @throws IOException e - */ - public static void writeByteArray(final DataOutput out, final byte[] b) throws IOException { - if (b == null) { - WritableUtils.writeVInt(out, 0); - } else { - writeByteArray(out, b, 0, b.length); - } - } - - /** - * Write byte-array to out with a vint length prefix. - * - * @param out output stream - * @param b array - * @param offset offset into array - * @param length length past offset - * @throws IOException e - */ - public static void writeByteArray(final DataOutput out, final byte[] b, final int offset, final int length) throws IOException { - WritableUtils.writeVInt(out, length); - out.write(b, offset, length); - } - - /** - * Write byte-array from src to tgt with a vint length prefix. - * - * @param tgt target array - * @param tgtOffset offset into target array - * @param src source array - * @param srcOffset source offset - * @param srcLength source length - * @return New offset in src array. - */ - public static int writeByteArray(final byte[] tgt, final int tgtOffset, final byte[] src, final int srcOffset, final int srcLength) { - byte[] vint = vintToBytes(srcLength); - System.arraycopy(vint, 0, tgt, tgtOffset, vint.length); - int offset = tgtOffset + vint.length; - System.arraycopy(src, srcOffset, tgt, offset, srcLength); - return offset + srcLength; - } - - /** - * Put bytes at the specified byte array position. - * - * @param tgtBytes the byte array - * @param tgtOffset position in the array - * @param srcBytes array to write out - * @param srcOffset source offset - * @param srcLength source length - * @return incremented offset - */ - public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes, int srcOffset, int srcLength) { - System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength); - return tgtOffset + srcLength; - } - - /** - * Write a single byte out to the specified byte array position. - * - * @param bytes the byte array - * @param offset position in the array - * @param b byte to write out - * @return incremented offset - */ - public static int putByte(byte[] bytes, int offset, byte b) { - bytes[offset] = b; - return offset + 1; - } - - /** - * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified. - * - * @param bytes the byte array - * @param offset position in the array - * @param buf ByteBuffer to write out - * @return incremented offset - */ - public static int putByteBuffer(byte[] bytes, int offset, ByteBuffer buf) { - int len = buf.remaining(); - buf.get(bytes, offset, len); - return offset + len; - } - - /** - * Returns a new byte array, copied from the given {@code buf}, - * from the index 0 (inclusive) to the limit (exclusive), - * regardless of the current position. - * The position and the other index parameters are not changed. - * - * @param buf a byte buffer - * @return the byte array - * @see #getBytes(ByteBuffer) - */ - public static byte[] toBytes(ByteBuffer buf) { - ByteBuffer dup = buf.duplicate(); - dup.position(0); - return readBytes(dup); - } - - private static byte[] readBytes(ByteBuffer buf) { - byte[] result = new byte[buf.remaining()]; - buf.get(result); - return result; - } - - /** - * @param b Presumed UTF-8 encoded byte array. - * @return String made from <code>b</code> - */ - public static String toString(final byte[] b) { - if (b == null) { - return null; - } - return toString(b, 0, b.length); - } - - /** - * Joins two byte arrays together using a separator. - * - * @param b1 The first byte array. - * @param sep The separator to use. - * @param b2 The second byte array. - */ - public static String toString(final byte[] b1, String sep, final byte[] b2) { - return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length); - } - - /** - * This method will convert utf8 encoded bytes into a string. If - * the given byte array is null, this method will return null. - * - * @param b Presumed UTF-8 encoded byte array. - * @param off offset into array - * @param len length of utf-8 sequence - * @return String made from <code>b</code> or null - */ - public static String toString(final byte[] b, int off, int len) { - if (b == null) { - return null; - } - if (len == 0) { - return ""; - } - return new String(b, off, len, UTF8_CHARSET); - } - - /** - * Write a printable representation of a byte array. - * - * @param b byte array - * @return string - * @see #toStringBinary(byte[], int, int) - */ - public static String toStringBinary(final byte[] b) { - if (b == null) - return "null"; - return toStringBinary(b, 0, b.length); - } - - /** - * Converts the given byte buffer to a printable representation, - * from the index 0 (inclusive) to the limit (exclusive), - * regardless of the current position. - * The position and the other index parameters are not changed. - * - * @param buf a byte buffer - * @return a string representation of the buffer's binary contents - * @see #toBytes(ByteBuffer) - * @see #getBytes(ByteBuffer) - */ - public static String toStringBinary(ByteBuffer buf) { - if (buf == null) - return "null"; - if (buf.hasArray()) { - return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); - } - return toStringBinary(toBytes(buf)); - } - - /** - * Write a printable representation of a byte array. Non-printable - * characters are hex escaped in the format \\x%02X, eg: - * \x00 \x05 etc - * - * @param b array to write out - * @param off offset to start at - * @param len length to write - * @return string output - */ - public static String toStringBinary(final byte[] b, int off, int len) { - StringBuilder result = new StringBuilder(); - // Just in case we are passed a 'len' that is > buffer length... - if (off >= b.length) - return result.toString(); - if (off + len > b.length) - len = b.length - off; - for (int i = off; i < off + len; ++i) { - int ch = b[i] & 0xFF; - if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || " `~!@#$%^&*()-_=+[]{}|;:'\",.<>/?".indexOf(ch) >= 0) { - result.append((char) ch); - } else { - result.append(String.format("\\x%02X", ch)); - } - } - return result.toString(); - } - - private static boolean isHexDigit(char c) { - return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9'); - } - - /** - * Takes a ASCII digit in the range A-F0-9 and returns - * the corresponding integer/ordinal value. - * - * @param ch The hex digit. - * @return The converted hex value as a byte. - */ - public static byte toBinaryFromHex(byte ch) { - if (ch >= 'A' && ch <= 'F') - return (byte) ((byte) 10 + (byte) (ch - 'A')); - // else - return (byte) (ch - '0'); - } - - public static byte[] toBytesBinary(String in) { - // this may be bigger than we need, but let's be safe. - byte[] b = new byte[in.length()]; - int size = 0; - for (int i = 0; i < in.length(); ++i) { - char ch = in.charAt(i); - if (ch == '\\' && in.length() > i + 1 && in.charAt(i + 1) == 'x') { - // ok, take next 2 hex digits. - char hd1 = in.charAt(i + 2); - char hd2 = in.charAt(i + 3); - - // they need to be A-F0-9: - if (!isHexDigit(hd1) || !isHexDigit(hd2)) { - // bogus escape code, ignore: - continue; - } - // turn hex ASCII digit -> number - byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2)); - - b[size++] = d; - i += 3; // skip 3 - } else { - b[size++] = (byte) ch; - } - } - // resize: - byte[] b2 = new byte[size]; - System.arraycopy(b, 0, b2, 0, size); - return b2; - } - - /** - * Converts a string to a UTF-8 byte array. - * - * @param s string - * @return the byte array - */ - public static byte[] toBytes(String s) { - return s.getBytes(UTF8_CHARSET); - } - - /** - * Convert a boolean to a byte array. True becomes -1 - * and false becomes 0. - * - * @param b value - * @return <code>b</code> encoded in a byte array. - */ - public static byte[] toBytes(final boolean b) { - return new byte[] { b ? (byte) -1 : (byte) 0 }; - } - - /** - * Reverses {@link #toBytes(boolean)} - * - * @param b array - * @return True or false. - */ - public static boolean toBoolean(final byte[] b) { - if (b.length != 1) { - throw new IllegalArgumentException("Array has wrong size: " + b.length); - } - return b[0] != (byte) 0; - } - - /** - * Convert a long value to a byte array using big-endian. - * - * @param val value to convert - * @return the byte array - */ - public static byte[] toBytes(long val) { - byte[] b = new byte[8]; - for (int i = 7; i > 0; i--) { - b[i] = (byte) val; - val >>>= 8; - } - b[0] = (byte) val; - return b; - } - - /** - * Converts a byte array to a long value. Reverses - * {@link #toBytes(long)} - * - * @param bytes array - * @return the long value - */ - public static long toLong(byte[] bytes) { - return toLong(bytes, 0, SIZEOF_LONG); - } - - /** - * Converts a byte array to a long value. Assumes there will be - * {@link #SIZEOF_LONG} bytes available. - * - * @param bytes bytes - * @param offset offset - * @return the long value - */ - public static long toLong(byte[] bytes, int offset) { - return toLong(bytes, offset, SIZEOF_LONG); - } - - /** - * Converts a byte array to a long value. - * - * @param bytes array of bytes - * @param offset offset into array - * @param length length of data (must be {@link #SIZEOF_LONG}) - * @return the long value - * @throws IllegalArgumentException if length is not {@link #SIZEOF_LONG} or - * if there's not enough room in the array at the offset indicated. - */ - public static long toLong(byte[] bytes, int offset, final int length) { - if (length != SIZEOF_LONG || offset + length > bytes.length) { - throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG); - } - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.isAvailable()) { - return toLongUnsafe(bytes, offset); - } else { - long l = 0; - for (int i = offset; i < offset + length; i++) { - l <<= 8; - l ^= bytes[i] & 0xFF; - } - return l; - } - } - - private static IllegalArgumentException explainWrongLengthOrOffset(final byte[] bytes, final int offset, final int length, final int expectedLength) { - String reason; - if (length != expectedLength) { - reason = "Wrong length: " + length + ", expected " + expectedLength; - } else { - reason = "offset (" + offset + ") + length (" + length + ") exceed the" + " capacity of the array: " + bytes.length; - } - return new IllegalArgumentException(reason); - } - - /** - * Put a long value out to the specified byte array position. - * - * @param bytes the byte array - * @param offset position in the array - * @param val long to write out - * @return incremented offset - * @throws IllegalArgumentException if the byte array given doesn't have - * enough room at the offset specified. - */ - public static int putLong(byte[] bytes, int offset, long val) { - if (bytes.length - offset < SIZEOF_LONG) { - throw new IllegalArgumentException("Not enough room to put a long at" + " offset " + offset + " in a " + bytes.length + " byte array"); - } - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.isAvailable()) { - return putLongUnsafe(bytes, offset, val); - } else { - for (int i = offset + 7; i > offset; i--) { - bytes[i] = (byte) val; - val >>>= 8; - } - bytes[offset] = (byte) val; - return offset + SIZEOF_LONG; - } - } - - /** - * Put a long value out to the specified byte array position (Unsafe). - * - * @param bytes the byte array - * @param offset position in the array - * @param val long to write out - * @return incremented offset - */ - public static int putLongUnsafe(byte[] bytes, int offset, long val) { - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.littleEndian) { - val = Long.reverseBytes(val); - } - org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.putLong(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET, val); - return offset + SIZEOF_LONG; - } - - /** - * Presumes float encoded as IEEE 754 floating-point "single format" - * - * @param bytes byte array - * @return Float made from passed byte array. - */ - public static float toFloat(byte[] bytes) { - return toFloat(bytes, 0); - } - - /** - * Presumes float encoded as IEEE 754 floating-point "single format" - * - * @param bytes array to convert - * @param offset offset into array - * @return Float made from passed byte array. - */ - public static float toFloat(byte[] bytes, int offset) { - return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT)); - } - - /** - * @param bytes byte array - * @param offset offset to write to - * @param f float value - * @return New offset in <code>bytes</code> - */ - public static int putFloat(byte[] bytes, int offset, float f) { - return putInt(bytes, offset, Float.floatToRawIntBits(f)); - } - - /** - * @param f float value - * @return the float represented as byte [] - */ - public static byte[] toBytes(final float f) { - // Encode it as int - return Bytes.toBytes(Float.floatToRawIntBits(f)); - } - - /** - * @param bytes byte array - * @return Return double made from passed bytes. - */ - public static double toDouble(final byte[] bytes) { - return toDouble(bytes, 0); - } - - /** - * @param bytes byte array - * @param offset offset where double is - * @return Return double made from passed bytes. - */ - public static double toDouble(final byte[] bytes, final int offset) { - return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG)); - } - - /** - * @param bytes byte array - * @param offset offset to write to - * @param d value - * @return New offset into array <code>bytes</code> - */ - public static int putDouble(byte[] bytes, int offset, double d) { - return putLong(bytes, offset, Double.doubleToLongBits(d)); - } - - /** - * Serialize a double as the IEEE 754 double format output. The resultant - * array will be 8 bytes long. - * - * @param d value - * @return the double represented as byte [] - */ - public static byte[] toBytes(final double d) { - // Encode it as a long - return Bytes.toBytes(Double.doubleToRawLongBits(d)); - } - - /** - * Convert an int value to a byte array. Big-endian. Same as what DataOutputStream.writeInt - * does. - * - * @param val value - * @return the byte array - */ - public static byte[] toBytes(int val) { - byte[] b = new byte[4]; - for (int i = 3; i > 0; i--) { - b[i] = (byte) val; - val >>>= 8; - } - b[0] = (byte) val; - return b; - } - - /** - * Converts a byte array to an int value - * - * @param bytes byte array - * @return the int value - */ - public static int toInt(byte[] bytes) { - return toInt(bytes, 0, SIZEOF_INT); - } - - /** - * Converts a byte array to an int value - * - * @param bytes byte array - * @param offset offset into array - * @return the int value - */ - public static int toInt(byte[] bytes, int offset) { - return toInt(bytes, offset, SIZEOF_INT); - } - - /** - * Converts a byte array to an int value - * - * @param bytes byte array - * @param offset offset into array - * @param length length of int (has to be {@link #SIZEOF_INT}) - * @return the int value - * @throws IllegalArgumentException if length is not {@link #SIZEOF_INT} or - * if there's not enough room in the array at the offset indicated. - */ - public static int toInt(byte[] bytes, int offset, final int length) { - if (length != SIZEOF_INT || offset + length > bytes.length) { - throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT); - } - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.isAvailable()) { - return toIntUnsafe(bytes, offset); - } else { - int n = 0; - for (int i = offset; i < (offset + length); i++) { - n <<= 8; - n ^= bytes[i] & 0xFF; - } - return n; - } - } - - /** - * Converts a byte array to an int value (Unsafe version) - * - * @param bytes byte array - * @param offset offset into array - * @return the int value - */ - public static int toIntUnsafe(byte[] bytes, int offset) { - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.littleEndian) { - return Integer.reverseBytes(org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.getInt(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET)); - } else { - return org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.getInt(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET); - } - } - - /** - * Converts a byte array to an short value (Unsafe version) - * - * @param bytes byte array - * @param offset offset into array - * @return the short value - */ - public static short toShortUnsafe(byte[] bytes, int offset) { - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.littleEndian) { - return Short.reverseBytes(org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.getShort(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET)); - } else { - return org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.getShort(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET); - } - } - - /** - * Converts a byte array to an long value (Unsafe version) - * - * @param bytes byte array - * @param offset offset into array - * @return the long value - */ - public static long toLongUnsafe(byte[] bytes, int offset) { - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.littleEndian) { - return Long.reverseBytes(org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.getLong(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET)); - } else { - return org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.getLong(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET); - } - } - - /** - * Converts a byte array to an int value - * - * @param bytes byte array - * @param offset offset into array - * @param length how many bytes should be considered for creating int - * @return the int value - * @throws IllegalArgumentException if there's not enough room in the array at the offset - * indicated. - */ - public static int readAsInt(byte[] bytes, int offset, final int length) { - if (offset + length > bytes.length) { - throw new IllegalArgumentException("offset (" + offset + ") + length (" + length + ") exceed the" + " capacity of the array: " + bytes.length); - } - int n = 0; - for (int i = offset; i < (offset + length); i++) { - n <<= 8; - n ^= bytes[i] & 0xFF; - } - return n; - } - - /** - * Put an int value out to the specified byte array position. - * - * @param bytes the byte array - * @param offset position in the array - * @param val int to write out - * @return incremented offset - * @throws IllegalArgumentException if the byte array given doesn't have - * enough room at the offset specified. - */ - public static int putInt(byte[] bytes, int offset, int val) { - if (bytes.length - offset < SIZEOF_INT) { - throw new IllegalArgumentException("Not enough room to put an int at" + " offset " + offset + " in a " + bytes.length + " byte array"); - } - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.isAvailable()) { - return putIntUnsafe(bytes, offset, val); - } else { - for (int i = offset + 3; i > offset; i--) { - bytes[i] = (byte) val; - val >>>= 8; - } - bytes[offset] = (byte) val; - return offset + SIZEOF_INT; - } - } - - /** - * Put an int value out to the specified byte array position (Unsafe). - * - * @param bytes the byte array - * @param offset position in the array - * @param val int to write out - * @return incremented offset - */ - public static int putIntUnsafe(byte[] bytes, int offset, int val) { - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.littleEndian) { - val = Integer.reverseBytes(val); - } - org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.putInt(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET, val); - return offset + SIZEOF_INT; - } - - /** - * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long. - * - * @param val value - * @return the byte array - */ - public static byte[] toBytes(short val) { - byte[] b = new byte[SIZEOF_SHORT]; - b[1] = (byte) val; - val >>= 8; - b[0] = (byte) val; - return b; - } - - /** - * Converts a byte array to a short value - * - * @param bytes byte array - * @return the short value - */ - public static short toShort(byte[] bytes) { - return toShort(bytes, 0, SIZEOF_SHORT); - } - - /** - * Converts a byte array to a short value - * - * @param bytes byte array - * @param offset offset into array - * @return the short value - */ - public static short toShort(byte[] bytes, int offset) { - return toShort(bytes, offset, SIZEOF_SHORT); - } - - /** - * Converts a byte array to a short value - * - * @param bytes byte array - * @param offset offset into array - * @param length length, has to be {@link #SIZEOF_SHORT} - * @return the short value - * @throws IllegalArgumentException if length is not {@link #SIZEOF_SHORT} - * or if there's not enough room in the array at the offset indicated. - */ - public static short toShort(byte[] bytes, int offset, final int length) { - if (length != SIZEOF_SHORT || offset + length > bytes.length) { - throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT); - } - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.isAvailable()) { - return toShortUnsafe(bytes, offset); - } else { - short n = 0; - n ^= bytes[offset] & 0xFF; - n <<= 8; - n ^= bytes[offset + 1] & 0xFF; - return n; - } - } - - /** - * Returns a new byte array, copied from the given {@code buf}, - * from the position (inclusive) to the limit (exclusive). - * The position and the other index parameters are not changed. - * - * @param buf a byte buffer - * @return the byte array - * @see #toBytes(ByteBuffer) - */ - public static byte[] getBytes(ByteBuffer buf) { - return readBytes(buf.duplicate()); - } - - /** - * Put a short value out to the specified byte array position. - * - * @param bytes the byte array - * @param offset position in the array - * @param val short to write out - * @return incremented offset - * @throws IllegalArgumentException if the byte array given doesn't have - * enough room at the offset specified. - */ - public static int putShort(byte[] bytes, int offset, short val) { - if (bytes.length - offset < SIZEOF_SHORT) { - throw new IllegalArgumentException("Not enough room to put a short at" + " offset " + offset + " in a " + bytes.length + " byte array"); - } - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.isAvailable()) { - return putShortUnsafe(bytes, offset, val); - } else { - bytes[offset + 1] = (byte) val; - val >>= 8; - bytes[offset] = (byte) val; - return offset + SIZEOF_SHORT; - } - } - - /** - * Put a short value out to the specified byte array position (Unsafe). - * - * @param bytes the byte array - * @param offset position in the array - * @param val short to write out - * @return incremented offset - */ - public static int putShortUnsafe(byte[] bytes, int offset, short val) { - if (org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.littleEndian) { - val = Short.reverseBytes(val); - } - org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.theUnsafe.putShort(bytes, (long) offset + org.apache.kylin.common.util.Bytes.LexicographicalComparerHolder.UnsafeComparer.BYTE_ARRAY_BASE_OFFSET, val); - return offset + SIZEOF_SHORT; - } - - /** - * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of - * the short will be put into the array. The caller of the API need to make sure they will not - * loose the value by doing so. This is useful to store an unsigned short which is represented as - * int in other parts. - * - * @param bytes the byte array - * @param offset position in the array - * @param val value to write out - * @return incremented offset - * @throws IllegalArgumentException if the byte array given doesn't have - * enough room at the offset specified. - */ - public static int putAsShort(byte[] bytes, int offset, int val) { - if (bytes.length - offset < SIZEOF_SHORT) { - throw new IllegalArgumentException("Not enough room to put a short at" + " offset " + offset + " in a " + bytes.length + " byte array"); - } - bytes[offset + 1] = (byte) val; - val >>= 8; - bytes[offset] = (byte) val; - return offset + SIZEOF_SHORT; - } - - /** - * Convert a BigDecimal value to a byte array - * - * @param val - * @return the byte array - */ - public static byte[] toBytes(BigDecimal val) { - byte[] valueBytes = val.unscaledValue().toByteArray(); - byte[] result = new byte[valueBytes.length + SIZEOF_INT]; - int offset = putInt(result, 0, val.scale()); - putBytes(result, offset, valueBytes, 0, valueBytes.length); - return result; - } - - /** - * Converts a byte array to a BigDecimal - * - * @param bytes - * @return the char value - */ - public static BigDecimal toBigDecimal(byte[] bytes) { - return toBigDecimal(bytes, 0, bytes.length); - } - - /** - * Converts a byte array to a BigDecimal value - * - * @param bytes - * @param offset - * @param length - * @return the char value - */ - public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) { - if (bytes == null || length < SIZEOF_INT + 1 || (offset + length > bytes.length)) { - return null; - } - - int scale = toInt(bytes, offset); - byte[] tcBytes = new byte[length - SIZEOF_INT]; - System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT); - return new BigDecimal(new BigInteger(tcBytes), scale); - } - - /** - * Put a BigDecimal value out to the specified byte array position. - * - * @param bytes the byte array - * @param offset position in the array - * @param val BigDecimal to write out - * @return incremented offset - */ - public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) { - if (bytes == null) { - return offset; - } - - byte[] valueBytes = val.unscaledValue().toByteArray(); - byte[] result = new byte[valueBytes.length + SIZEOF_INT]; - offset = putInt(result, offset, val.scale()); - return putBytes(result, offset, valueBytes, 0, valueBytes.length); - } - - /** - * @param vint Integer to make a vint of. - * @return Vint as bytes array. - */ - public static byte[] vintToBytes(final long vint) { - long i = vint; - int size = WritableUtils.getVIntSize(i); - byte[] result = new byte[size]; - int offset = 0; - if (i >= -112 && i <= 127) { - result[offset] = (byte) i; - return result; - } - - int len = -112; - if (i < 0) { - i ^= -1L; // take one's complement' - len = -120; - } - - long tmp = i; - while (tmp != 0) { - tmp = tmp >> 8; - len--; - } - - result[offset++] = (byte) len; - - len = (len < -120) ? -(len + 120) : -(len + 112); - - for (int idx = len; idx != 0; idx--) { - int shiftbits = (idx - 1) * 8; - long mask = 0xFFL << shiftbits; - result[offset++] = (byte) ((i & mask) >> shiftbits); - } - return result; - } - - /** - * @param buffer buffer to convert - * @return vint bytes as an integer. - */ - public static long bytesToVint(final byte[] buffer) { - int offset = 0; - byte firstByte = buffer[offset++]; - int len = WritableUtils.decodeVIntSize(firstByte); - if (len == 1) { - return firstByte; - } - long i = 0; - for (int idx = 0; idx < len - 1; idx++) { - byte b = buffer[offset++]; - i = i << 8; - i = i | (b & 0xFF); - } - return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); - } - - /** - * Reads a zero-compressed encoded long from input stream and returns it. - * - * @param buffer Binary array - * @param offset Offset into array at which vint begins. - * @return deserialized long from stream. - * @throws java.io.IOException e - */ - public static long readVLong(final byte[] buffer, final int offset) throws IOException { - byte firstByte = buffer[offset]; - int len = WritableUtils.decodeVIntSize(firstByte); - if (len == 1) { - return firstByte; - } - long i = 0; - for (int idx = 0; idx < len - 1; idx++) { - byte b = buffer[offset + 1 + idx]; - i = i << 8; - i = i | (b & 0xFF); - } - return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); - } - - /** - * @param left left operand - * @param right right operand - * @return 0 if equal, < 0 if left is less than right, etc. - */ - public static int compareTo(final byte[] left, final byte[] right) { - return LexicographicalComparerHolder.BEST_COMPARER.compareTo(left, 0, left.length, right, 0, right.length); - } - - /** - * Lexicographically compare two arrays. - * - * @param buffer1 left operand - * @param buffer2 right operand - * @param offset1 Where to start comparing in the left buffer - * @param offset2 Where to start comparing in the right buffer - * @param length1 How much to compare from the left buffer - * @param length2 How much to compare from the right buffer - * @return 0 if equal, < 0 if left is less than right, etc. - */ - public static int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) { - return LexicographicalComparerHolder.BEST_COMPARER.compareTo(buffer1, offset1, length1, buffer2, offset2, length2); - } - - interface Comparer<T> { - int compareTo(T buffer1, int offset1, int length1, T buffer2, int offset2, int length2); - } - - static Comparer<byte[]> lexicographicalComparerJavaImpl() { - return LexicographicalComparerHolder.PureJavaComparer.INSTANCE; - } - - /** - * Provides a lexicographical comparer implementation; either a Java - * implementation or a faster implementation based on {@link sun.misc.Unsafe}. - * <p/> - * <p>Uses reflection to gracefully fall back to the Java implementation if - * {@code Unsafe} isn't available. - */ - static class LexicographicalComparerHolder { - static final String UNSAFE_COMPARER_NAME = LexicographicalComparerHolder.class.getName() + "$UnsafeComparer"; - - static final Comparer<byte[]> BEST_COMPARER = getBestComparer(); - - /** - * Returns the Unsafe-using Comparer, or falls back to the pure-Java - * implementation if unable to do so. - */ - static Comparer<byte[]> getBestComparer() { - try { - Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME); - - // yes, UnsafeComparer does implement Comparer<byte[]> - @SuppressWarnings("unchecked") - Comparer<byte[]> comparer = (Comparer<byte[]>) theClass.getEnumConstants()[0]; - return comparer; - } catch (Throwable t) { // ensure we really catch *everything* - return lexicographicalComparerJavaImpl(); - } - } - - enum PureJavaComparer implements Comparer<byte[]> { - INSTANCE; - - @Override - public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) { - // Short circuit equal case - if (buffer1 == buffer2 && offset1 == offset2 && length1 == length2) { - return 0; - } - // Bring WritableComparator code local - int end1 = offset1 + length1; - int end2 = offset2 + length2; - for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { - int a = (buffer1[i] & 0xff); - int b = (buffer2[j] & 0xff); - if (a != b) { - return a - b; - } - } - return length1 - length2; - } - } - - enum UnsafeComparer implements Comparer<byte[]> { - INSTANCE; - - static final Unsafe theUnsafe; - - /** - * The offset to the first element in a byte array. - */ - static final int BYTE_ARRAY_BASE_OFFSET; - - static { - theUnsafe = (Unsafe) AccessController.doPrivileged(new PrivilegedAction<Object>() { - @Override - public Object run() { - try { - Field f = Unsafe.class.getDeclaredField("theUnsafe"); - f.setAccessible(true); - return f.get(null); - } catch (NoSuchFieldException e) { - // It doesn't matter what we throw; - // it's swallowed in getBestComparer(). - throw new Error(); - } catch (IllegalAccessException e) { - throw new Error(); - } - } - }); - - BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class); - - // sanity check - this should never fail - if (theUnsafe.arrayIndexScale(byte[].class) != 1) { - throw new AssertionError(); - } - } - - static final boolean littleEndian = ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); - - /** - * Returns true if x1 is less than x2, when both values are treated as - * unsigned long. - */ - static boolean lessThanUnsignedLong(long x1, long x2) { - return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE); - } - - /** - * Returns true if x1 is less than x2, when both values are treated as - * unsigned int. - */ - static boolean lessThanUnsignedInt(int x1, int x2) { - return (x1 & 0xffffffffL) < (x2 & 0xffffffffL); - } - - /** - * Returns true if x1 is less than x2, when both values are treated as - * unsigned short. - */ - static boolean lessThanUnsignedShort(short x1, short x2) { - return (x1 & 0xffff) < (x2 & 0xffff); - } - - /** - * Checks if Unsafe is available - * - * @return true, if available, false - otherwise - */ - public static boolean isAvailable() { - return theUnsafe != null; - } - - /** - * Lexicographically compare two arrays. - * - * @param buffer1 left operand - * @param buffer2 right operand - * @param offset1 Where to start comparing in the left buffer - * @param offset2 Where to start comparing in the right buffer - * @param length1 How much to compare from the left buffer - * @param length2 How much to compare from the right buffer - * @return 0 if equal, < 0 if left is less than right, etc. - */ - @Override - public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) { - - // Short circuit equal case - if (buffer1 == buffer2 && offset1 == offset2 && length1 == length2) { - return 0; - } - final int minLength = Math.min(length1, length2); - final int minWords = minLength / SIZEOF_LONG; - final long offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET; - final long offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET; - - /* - * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a - * time is no slower than comparing 4 bytes at a time even on 32-bit. - * On the other hand, it is substantially faster on 64-bit. - */ - for (int i = 0; i < minWords * SIZEOF_LONG; i += SIZEOF_LONG) { - long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); - long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); - long diff = lw ^ rw; - if (littleEndian) { - lw = Long.reverseBytes(lw); - rw = Long.reverseBytes(rw); - } - if (diff != 0) { - return lessThanUnsignedLong(lw, rw) ? -1 : 1; - } - } - int offset = minWords * SIZEOF_LONG; - - if (minLength - offset >= SIZEOF_INT) { - int il = theUnsafe.getInt(buffer1, offset1Adj + offset); - int ir = theUnsafe.getInt(buffer2, offset2Adj + offset); - if (littleEndian) { - il = Integer.reverseBytes(il); - ir = Integer.reverseBytes(ir); - } - if (il != ir) { - return lessThanUnsignedInt(il, ir) ? -1 : 1; - } - offset += SIZEOF_INT; - } - if (minLength - offset >= SIZEOF_SHORT) { - short sl = theUnsafe.getShort(buffer1, offset1Adj + offset); - short sr = theUnsafe.getShort(buffer2, offset2Adj + offset); - if (littleEndian) { - sl = Short.reverseBytes(sl); - sr = Short.reverseBytes(sr); - } - if (sl != sr) { - return lessThanUnsignedShort(sl, sr) ? -1 : 1; - } - offset += SIZEOF_SHORT; - } - if (minLength - offset == 1) { - int a = (buffer1[(int) (offset1 + offset)] & 0xff); - int b = (buffer2[(int) (offset2 + offset)] & 0xff); - if (a != b) { - return a - b; - } - } - return length1 - length2; - } - } - } - - /** - * @param left left operand - * @param right right operand - * @return True if equal - */ - public static boolean equals(final byte[] left, final byte[] right) { - // Could use Arrays.equals? - //noinspection SimplifiableConditionalExpression - if (left == right) - return true; - if (left == null || right == null) - return false; - if (left.length != right.length) - return false; - if (left.length == 0) - return true; - - // Since we're often comparing adjacent sorted data, - // it's usual to have equal arrays except for the very last byte - // so check that first - if (left[left.length - 1] != right[right.length - 1]) - return false; - - return compareTo(left, right) == 0; - } - - public static boolean equals(final byte[] left, int leftOffset, int leftLen, final byte[] right, int rightOffset, int rightLen) { - // short circuit case - if (left == right && leftOffset == rightOffset && leftLen == rightLen) { - return true; - } - // different lengths fast check - if (leftLen != rightLen) { - return false; - } - if (leftLen == 0) { - return true; - } - - // Since we're often comparing adjacent sorted data, - // it's usual to have equal arrays except for the very last byte - // so check that first - if (left[leftOffset + leftLen - 1] != right[rightOffset + rightLen - 1]) - return false; - - return LexicographicalComparerHolder.BEST_COMPARER.compareTo(left, leftOffset, leftLen, right, rightOffset, rightLen) == 0; - } - - /** - * @param a left operand - * @param buf right operand - * @return True if equal - */ - public static boolean equals(byte[] a, ByteBuffer buf) { - if (a == null) - return buf == null; - if (buf == null) - return false; - if (a.length != buf.remaining()) - return false; - - // Thou shalt not modify the original byte buffer in what should be read only operations. - ByteBuffer b = buf.duplicate(); - for (byte anA : a) { - if (anA != b.get()) { - return false; - } - } - return true; - } - - /** - * Return true if the byte array on the right is a prefix of the byte - * array on the left. - */ - public static boolean startsWith(byte[] bytes, byte[] prefix) { - return bytes != null && prefix != null && bytes.length >= prefix.length && LexicographicalComparerHolder.BEST_COMPARER.compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0; - } - - /** - * @param b bytes to hash - * @return Runs {@link org.apache.hadoop.io.WritableComparator#hashBytes(byte[], int)} on the - * passed in array. This method is what {@link org.apache.hadoop.io.Text} and - * {@link org.apache.hadoop.hbase.io.ImmutableBytesWritable} use calculating hash code. - */ - public static int hashCode(final byte[] b) { - return hashCode(b, b.length); - } - - /** - * @param b value - * @param length length of the value - * @return Runs {@link org.apache.hadoop.io.WritableComparator#hashBytes(byte[], int)} on the - * passed in array. This method is what {@link org.apache.hadoop.io.Text} and - * {@link org.apache.hadoop.hbase.io.ImmutableBytesWritable} use calculating hash code. - */ - public static int hashCode(final byte[] b, final int length) { - return WritableComparator.hashBytes(b, length); - } - - /** - * @param b bytes to hash - * @return A hash of <code>b</code> as an Integer that can be used as key in - * Maps. - */ - public static Integer mapKey(final byte[] b) { - return hashCode(b); - } - - /** - * @param b bytes to hash - * @param length length to hash - * @return A hash of <code>b</code> as an Integer that can be used as key in - * Maps. - */ - public static Integer mapKey(final byte[] b, final int length) { - return hashCode(b, length); - } - - /** - * @param a lower half - * @param b upper half - * @return New array that has a in lower half and b in upper half. - */ - public static byte[] add(final byte[] a, final byte[] b) { - return add(a, b, EMPTY_BYTE_ARRAY); - } - - /** - * @param a first third - * @param b second third - * @param c third third - * @return New array made from a, b and c - */ - public static byte[] add(final byte[] a, final byte[] b, final byte[] c) { - byte[] result = new byte[a.length + b.length + c.length]; - System.arraycopy(a, 0, result, 0, a.length); - System.arraycopy(b, 0, result, a.length, b.length); - System.arraycopy(c, 0, result, a.length + b.length, c.length); - return result; - } - - /** - * @param a array - * @param length amount of bytes to grab - * @return First <code>length</code> bytes from <code>a</code> - */ - public static byte[] head(final byte[] a, final int length) { - if (a.length < length) { - return null; - } - byte[] result = new byte[length]; - System.arraycopy(a, 0, result, 0, length); - return result; - } - - /** - * @param a array - * @param length amount of bytes to snarf - * @return Last <code>length</code> bytes from <code>a</code> - */ - public static byte[] tail(final byte[] a, final int length) { - if (a.length < length) { - return null; - } - byte[] result = new byte[length]; - System.arraycopy(a, a.length - length, result, 0, length); - return result; - } - - /** - * @param a array - * @param length new array size - * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes - */ - public static byte[] padHead(final byte[] a, final int length) { - byte[] padding = new byte[length]; - for (int i = 0; i < length; i++) { - padding[i] = 0; - } - return add(padding, a); - } - - /** - * @param a array - * @param length new array size - * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes - */ - public static byte[] padTail(final byte[] a, final int length) { - byte[] padding = new byte[length]; - for (int i = 0; i < length; i++) { - padding[i] = 0; - } - return add(a, padding); - } - - /** - * Split passed range. Expensive operation relatively. Uses BigInteger math. - * Useful splitting ranges for MapReduce jobs. - * - * @param a Beginning of range - * @param b End of range - * @param num Number of times to split range. Pass 1 if you want to split - * the range in two; i.e. one split. - * @return Array of dividing values - */ - public static byte[][] split(final byte[] a, final byte[] b, final int num) { - return split(a, b, false, num); - } - - /** - * Split passed range. Expensive operation relatively. Uses BigInteger math. - * Useful splitting ranges for MapReduce jobs. - * - * @param a Beginning of range - * @param b End of range - * @param inclusive Whether the end of range is prefix-inclusive or is - * considered an exclusive boundary. Automatic splits are generally exclusive - * and manual splits with an explicit range utilize an inclusive end of range. - * @param num Number of times to split range. Pass 1 if you want to split - * the range in two; i.e. one split. - * @return Array of dividing values - */ - public static byte[][] split(final byte[] a, final byte[] b, boolean inclusive, final int num) { - byte[][] ret = new byte[num + 2][]; - int i = 0; - Iterable<byte[]> iter = iterateOnSplits(a, b, inclusive, num); - if (iter == null) - return null; - for (byte[] elem : iter) { - ret[i++] = elem; - } - return ret; - } - - /** - * Iterate over keys within the passed range, splitting at an [a,b) boundary. - */ - public static Iterable<byte[]> iterateOnSplits(final byte[] a, final byte[] b, final int num) { - return iterateOnSplits(a, b, false, num); - } - - /** - * Iterate over keys within the passed range. - */ - public static Iterable<byte[]> iterateOnSplits(final byte[] a, final byte[] b, boolean inclusive, final int num) { - byte[] aPadded; - byte[] bPadded; - if (a.length < b.length) { - aPadded = padTail(a, b.length - a.length); - bPadded = b; - } else if (b.length < a.length) { - aPadded = a; - bPadded = padTail(b, a.length - b.length); - } else { - aPadded = a; - bPadded = b; - } - if (compareTo(aPadded, bPadded) >= 0) { - throw new IllegalArgumentException("b <= a"); - } - if (num <= 0) { - throw new IllegalArgumentException("num cannot be <= 0"); - } - byte[] prependHeader = { 1, 0 }; - final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); - final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); - BigInteger diffBI = stopBI.subtract(startBI); - if (inclusive) { - diffBI = diffBI.add(BigInteger.ONE); - } - final BigInteger splitsBI = BigInteger.valueOf(num + 1); - if (diffBI.compareTo(splitsBI) < 0) { - return null; - } - final BigInteger intervalBI; - try { - intervalBI = diffBI.divide(splitsBI); - } catch (Exception e) { - LOG.error("Exception caught during division", e); - return null; - } - - final Iterator<byte[]> iterator = new Iterator<byte[]>() { - private int i = -1; - - @Override - public boolean hasNext() { - return i < num + 1; - } - - @Override - public byte[] next() { - i++; - if (i == 0) - return a; - if (i == num + 1) - return b; - - BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i))); - byte[] padded = curBI.toByteArray(); - if (padded[1] == 0) - padded = tail(padded, padded.length - 2); - else - padded = tail(padded, padded.length - 1); - return padded; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - }; - - return new Iterable<byte[]>() { - @Override - public Iterator<byte[]> iterator() { - return iterator; - } - }; - } - - /** - * @param bytes array to hash - * @param offset offset to start from - * @param length length to hash - */ - public static int hashCode(byte[] bytes, int offset, int length) { - int hash = 1; - for (int i = offset; i < offset + length; i++) - hash = (31 * hash) + (int) bytes[i]; - return hash; - } - - /** - * @param t operands - * @return Array of byte arrays made from passed array of Text - */ - public static byte[][] toByteArrays(final String[] t) { - byte[][] result = new byte[t.length][]; - for (int i = 0; i < t.length; i++) { - result[i] = Bytes.toBytes(t[i]); - } - return result; - } - - /** - * @param column operand - * @return A byte array of a byte array where first and only entry is - * <code>column</code> - */ - public static byte[][] toByteArrays(final String column) { - return toByteArrays(toBytes(column)); - } - - /** - * @param column operand - * @return A byte array of a byte array where first and only entry is - * <code>column</code> - */ - public static byte[][] toByteArrays(final byte[] column) { - byte[][] result = new byte[1][]; - result[0] = column; - return result; - } - - /** - * Binary search for keys in indexes. - * - * @param arr array of byte arrays to search for - * @param key the key you want to find - * @param offset the offset in the key you want to find - * @param length the length of the key - * @param comparator a comparator to compare. - * @return zero-based index of the key, if the key is present in the array. - * Otherwise, a value -(i + 1) such that the key is between arr[i - - * 1] and arr[i] non-inclusively, where i is in [0, i], if we define - * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above - * means that this function can return 2N + 1 different values - * ranging from -(N + 1) to N - 1. - */ - public static int binarySearch(byte[][] arr, byte[] key, int offset, int length, RawComparator<?> comparator) { - int low = 0; - int high = arr.length - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - // we have to compare in this order, because the comparator order - // has special logic when the 'left side' is a special key. - int cmp = comparator.compare(key, offset, length, arr[mid], 0, arr[mid].length); - // key lives above the midpoint - if (cmp > 0) - low = mid + 1; - // key lives below the midpoint - else if (cmp < 0) - high = mid - 1; - // BAM. how often does this really happen? - else - return mid; - } - return -(low + 1); - } - - /** - * Bytewise binary increment/deincrement of long contained in byte array - * on given amount. - * - * @param value - array of bytes containing long (length <= SIZEOF_LONG) - * @param amount value will be incremented on (deincremented if negative) - * @return array of bytes containing incremented long (length == SIZEOF_LONG) - */ - public static byte[] incrementBytes(byte[] value, long amount) { - byte[] val = value; - if (val.length < SIZEOF_LONG) { - // Hopefully this doesn't happen too often. - byte[] newvalue; - if (val[0] < 0) { - newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 }; - } else { - newvalue = new byte[SIZEOF_LONG]; - } - System.arraycopy(val, 0, newvalue, newvalue.length - val.length, val.length); - val = newvalue; - } else if (val.length > SIZEOF_LONG) { - throw new IllegalArgumentException("Increment Bytes - value too big: " + val.length); - } - if (amount == 0) - return val; - if (val[0] < 0) { - return binaryIncrementNeg(val, amount); - } - return binaryIncrementPos(val, amount); - } - - /* increment/deincrement for positive value */ - private static byte[] binaryIncrementPos(byte[] value, long amount) { - long amo = amount; - int sign = 1; - if (amount < 0) { - amo = -amount; - sign = -1; - } - for (int i = 0; i < value.length; i++) { - int cur = ((int) amo % 256) * sign; - amo = (amo >> 8); - int val = value[value.length - i - 1] & 0x0ff; - int total = val + cur; - if (total > 255) { - amo += sign; - total %= 256; - } else if (total < 0) { - amo -= sign; - } - value[value.length - i - 1] = (byte) total; - if (amo == 0) - return value; - } - return value; - } - - /* increment/deincrement for negative value */ - private static byte[] binaryIncrementNeg(byte[] value, long amount) { - long amo = amount; - int sign = 1; - if (amount < 0) { - amo = -amount; - sign = -1; - } - for (int i = 0; i < value.length; i++) { - int cur = ((int) amo % 256) * sign; - amo = (amo >> 8); - int val = ((~value[value.length - i - 1]) & 0x0ff) + 1; - int total = cur - val; - if (total >= 0) { - amo += sign; - } else if (total < -256) { - amo -= sign; - total %= 256; - } - value[value.length - i - 1] = (byte) total; - if (amo == 0) - return value; - } - return value; - } - - /** - * Writes a string as a fixed-size field, padded with zeros. - */ - public static void writeStringFixedSize(final DataOutput out, String s, int size) throws IOException { - byte[] b = toBytes(s); - if (b.length > size) { - throw new IOException("Trying to write " + b.length + " bytes (" + toStringBinary(b) + ") into a field of length " + size); - } - - out.writeBytes(s); - for (int i = 0; i < size - s.length(); ++i) - out.writeByte(0); - } - - /** - * Reads a fixed-size field and interprets it as a string padded with zeros. - */ - public static String readStringFixedSize(final DataInput in, int size) throws IOException { - byte[] b = new byte[size]; - in.readFully(b); - int n = b.length; - while (n > 0 && b[n - 1] == 0) - --n; - - return toString(b, 0, n); - } - - /** - * Copy the byte array given in parameter and return an instance - * of a new byte array with the same length and the same content. - * - * @param bytes the byte array to duplicate - * @return a copy of the given byte array - */ - public static byte[] copy(byte[] bytes) { - if (bytes == null) - return null; - byte[] result = new byte[bytes.length]; - System.arraycopy(bytes, 0, result, 0, bytes.length); - return result; - } - - /** - * Copy the byte array given in parameter and return an instance - * of a new byte array with the same length and the same content. - * - * @param bytes the byte array to copy from - * @param offset - * @param length - * @return a copy of the given designated byte array - */ - public static byte[] copy(byte[] bytes, final int offset, final int length) { - if (bytes == null) - return null; - byte[] result = new byte[length]; - System.arraycopy(bytes, offset, result, 0, length); - return result; - } - - /** - * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from - * somewhere. (mcorgan) - * - * @param a Array to search. Entries must be sorted and unique. - * @param fromIndex First index inclusive of "a" to include in the search. - * @param toIndex Last index exclusive of "a" to include in the search. - * @param key The byte to search for. - * @return The index of key if found. If not found, return -(index + 1), where negative indicates - * "not found" and the "index + 1" handles the "-0" case. - */ - public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) { - int unsignedKey = key & 0xff; - int low = fromIndex; - int high = toIndex - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - int midVal = a[mid] & 0xff; - - if (midVal < unsignedKey) { - low = mid + 1; - } else if (midVal > unsignedKey) { - high = mid - 1; - } else { - return mid; // key found - } - } - return -(low + 1); // key not found. - } - - /** - * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding - * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes. - * - * @param input The byte[] to increment. - * @return The incremented copy of "in". May be same length or 1 byte longer. - */ - public static byte[] unsignedCopyAndIncrement(final byte[] input) { - byte[] copy = copy(input); - if (copy == null) { - throw new IllegalArgumentException("cannot increment null array"); - } - for (int i = copy.length - 1; i >= 0; --i) { - if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum - copy[i] = 0; - } else { - ++copy[i]; - return copy; - } - } - // we maxed out the array - byte[] out = new byte[copy.length + 1]; - out[0] = 1; - System.arraycopy(copy, 0, out, 1, copy.length); - return out; - } - - public static boolean equals(List<byte[]> a, List<byte[]> b) { - if (a == null) { - if (b == null) { - return true; - } - return false; - } - if (b == null) { - return false; - } - if (a.size() != b.size()) { - return false; - } - for (int i = 0; i < a.size(); ++i) { - if (!Bytes.equals(a.get(i), b.get(i))) { - return false; - } - } - return true; - } - - /** - * Returns the index of the first appearance of the value {@code target} in - * {@code array}. - * - * @param array an array of {@code byte} values, possibly empty - * @param target a primitive {@code byte} value - * @return the least index {@code i} for which {@code array[i] == target}, or - * {@code -1} if no such index exists. - */ - public static int indexOf(byte[] array, byte target) { - for (int i = 0; i < array.length; i++) { - if (array[i] == target) { - return i; - } - } - return -1; - } - - /** - * Returns the start position of the first occurrence of the specified {@code - * target} within {@code array}, or {@code -1} if there is no such occurrence. - * <p/> - * <p>More formally, returns the lowest index {@code i} such that {@code - * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly - * the same elements as {@code target}. - * - * @param array the array to search for the sequence {@code target} - * @param target the array to search for as a sub-sequence of {@code array} - */ - public static int indexOf(byte[] array, byte[] target) { - checkNotNull(array, "array"); - checkNotNull(target, "target"); - if (target.length == 0) { - return 0; - } - - outer: for (int i = 0; i < array.length - target.length + 1; i++) { - for (int j = 0; j < target.length; j++) { - if (array[i + j] != target[j]) { - continue outer; - } - } - return i; - } - return -1; - } - - /** - * @param array an array of {@code byte} values, possibly empty - * @param target a primitive {@code byte} value - * @return {@code true} if {@code target} is present as an element anywhere in {@code array}. - */ - public static boolean contains(byte[] array, byte target) { - return indexOf(array, target) > -1; - } - - /** - * @param array an array of {@code byte} values, possibly empty - * @param target an array of {@code byte} - * @return {@code true} if {@code target} is present anywhere in {@code array} - */ - public static boolean contains(byte[] array, byte[] target) { - return indexOf(array, target) > -1; - } - - /** - * Fill given array with zeros. - * - * @param b array which needs to be filled with zeros - */ - public static void zero(byte[] b) { - zero(b, 0, b.length); - } - - /** - * Fill given array with zeros at the specified position. - * - * @param b - * @param offset - * @param length - */ - public static void zero(byte[] b, int offset, int length) { - checkPositionIndex(offset, b.length, "offset"); - checkArgument(length > 0, "length must be greater than 0"); - checkPositionIndex(offset + length, b.length, "offset + length"); - Arrays.fill(b, offset, offset + length, (byte) 0); - } - - private static final SecureRandom RNG = new SecureRandom(); - - /** - * Fill given array with random bytes. - * - * @param b array which needs to be filled with random bytes - */ - public static void random(byte[] b) { - RNG.nextBytes(b); - } - - /** - * Fill given array with random bytes at the specified position. - * - * @param b - * @param offset - * @param length - */ - public static void random(byte[] b, int offset, int length) { - checkPositionIndex(offset, b.length, "offset"); - checkArgument(length > 0, "length must be greater than 0"); - checkPositionIndex(offset + length, b.length, "offset + length"); - byte[] buf = new byte[length]; - RNG.nextBytes(buf); - System.arraycopy(buf, 0, b, offset, length); - } - - /** - * Create a max byte array with the specified max byte count - * - * @param maxByteCount the length of returned byte array - * @return the created max byte array - */ - public static byte[] createMaxByteArray(int maxByteCount) { - byte[] maxByteArray = new byte[maxByteCount]; - for (int i = 0; i < maxByteArray.length; i++) { - maxByteArray[i] = (byte) 0xff; - } - return maxByteArray; - } - - /** - * Create a byte array which is multiple given bytes - * - * @param srcBytes - * @param multiNum - * @return byte array - */ - public static byte[] multiple(byte[] srcBytes, int multiNum) { - if (multiNum <= 0) { - return new byte[0]; - } - byte[] result = new byte[srcBytes.length * multiNum]; - for (int i = 0; i < multiNum; i++) { - System.arraycopy(srcBytes, 0, result, i * srcBytes.length, srcBytes.length); - } - return result; - } - - /** - * Convert a byte array into a hex string - * - * @param b - */ - public static String toHex(byte[] b) { - checkArgument(b.length > 0, "length must be greater than 0"); - return String.format("%x", new BigInteger(1, b)); - } - - /** - * Create a byte array from a string of hash digits. The length of the - * string must be a multiple of 2 - * - * @param hex - */ - public static byte[] fromHex(String hex) { - checkArgument(hex.length() > 0, "length must be greater than 0"); - checkArgument(hex.length() % 2 == 0, "length must be a multiple of 2"); - // Make sure letters are upper case - hex = hex.toUpperCase(); - byte[] b = new byte[hex.length() / 2]; - for (int i = 0; i < b.length; i++) { - b[i] = (byte) ((toBinaryFromHex((byte) hex.charAt(2 * i)) << 4) + toBinaryFromHex((byte) hex.charAt((2 * i + 1)))); - } - return b; - } - -}
http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/common/src/main/java/org/apache/kylin/common/util/BytesSerializer.java ---------------------------------------------------------------------- diff --git a/common/src/main/java/org/apache/kylin/common/util/BytesSerializer.java b/common/src/main/java/org/apache/kylin/common/util/BytesSerializer.java deleted file mode 100644 index b192348..0000000 --- a/common/src/main/java/org/apache/kylin/common/util/BytesSerializer.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.common.util; - -import java.nio.ByteBuffer; - -/** - * @author yangli9 - * - */ -public interface BytesSerializer<T> { - - public static final int SERIALIZE_BUFFER_SIZE = 65536; - - abstract public void serialize(T value, ByteBuffer out); - - abstract public T deserialize(ByteBuffer in); - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/common/src/main/java/org/apache/kylin/common/util/BytesSplitter.java ---------------------------------------------------------------------- diff --git a/common/src/main/java/org/apache/kylin/common/util/BytesSplitter.java b/common/src/main/java/org/apache/kylin/common/util/BytesSplitter.java deleted file mode 100644 index ef0d827..0000000 --- a/common/src/main/java/org/apache/kylin/common/util/BytesSplitter.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.common.util; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.io.Text; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * @author xjiang - */ -public class BytesSplitter { - private static final Logger logger = LoggerFactory.getLogger(BytesSplitter.class); - - private static final int[] COMMON_DELIMS = new int[] { "\177".codePointAt(0), "|".codePointAt(0), "\t".codePointAt(0), ",".codePointAt(0) }; - - private SplittedBytes[] splitBuffers; - private int bufferSize; - - public SplittedBytes[] getSplitBuffers() { - return splitBuffers; - } - - public SplittedBytes getSplitBuffer(int index) { - return splitBuffers[index]; - } - - public int getBufferSize() { - return bufferSize; - } - - public BytesSplitter(int splitLen, int bytesLen) { - this.splitBuffers = new SplittedBytes[splitLen]; - for (int i = 0; i < splitLen; i++) { - this.splitBuffers[i] = new SplittedBytes(bytesLen); - } - this.bufferSize = 0; - } - - public int split(byte[] bytes, int byteLen, byte delimiter) { - this.bufferSize = 0; - int offset = 0; - int length = 0; - for (int i = 0; i < byteLen; i++) { - if (bytes[i] == delimiter) { - SplittedBytes split = this.splitBuffers[this.bufferSize++]; - if(length > split.value.length) { - length = split.value.length; - } - System.arraycopy(bytes, offset, split.value, 0, length); - split.length = length; - offset = i + 1; - length = 0; - } else { - length++; - } - } - SplittedBytes split = this.splitBuffers[this.bufferSize++]; - if(length > split.value.length) { - length = split.value.length; - } - System.arraycopy(bytes, offset, split.value, 0, length); - split.length = length; - - return bufferSize; - } - - public byte inferByteRowDelimiter(byte[] bytes, int byteLen, int expectedSplits) throws IOException { - - if (expectedSplits > this.splitBuffers.length) - throw new IOException("expectSplits can not be greater than split buffer size"); - - boolean delimiterFound = false; - byte foundDelimiter = 0; - for (int i = 0; i < bytes.length; ++i) { - byte c = bytes[i]; - if (!Character.isLetterOrDigit((char) c)) { - try { - int actualSplits = this.split(bytes, byteLen, c); - if (actualSplits == expectedSplits) { - if (!delimiterFound) { - logger.info("Delimiter found, value is : " + c); - delimiterFound = true; - foundDelimiter = c; - } else if (c != foundDelimiter) { - throw new IOException("Duplicate delimiter found, found delimiter is : " + foundDelimiter + " new delimiter is " + c); - } - } - } catch (Exception e) { - logger.info("Unqualified delimiter pruned, value is " + c); - } - } - } - - if (delimiterFound) - return foundDelimiter; - else - throw new IOException("No delimiter found"); - } - - public int detectDelim(Text value, int expectedParts) { - for (int i = 0; i < COMMON_DELIMS.length; i++) { - int nParts = split(value.getBytes(), value.getLength(), (byte) COMMON_DELIMS[i]); - if (nParts == expectedParts) - return COMMON_DELIMS[i]; - } - throw new RuntimeException("Cannot detect delimeter from first line -- " + value.toString() + " -- expect " + expectedParts + " columns"); - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(); - buf.append("["); - for (int i = 0; i < bufferSize; i++) { - if (i > 0) - buf.append(", "); - - buf.append(Bytes.toString(splitBuffers[i].value, 0, splitBuffers[i].length)); - } - return buf.toString(); - } - - public static List<String> splitToString(byte[] bytes, int offset, byte delimiter) { - List<String> splitStrings = new ArrayList<String>(); - int splitOffset = 0; - int splitLength = 0; - for (int i = offset; i < bytes.length; i++) { - if (bytes[i] == delimiter) { - String str = Bytes.toString(bytes, splitOffset, splitLength); - splitStrings.add(str); - splitOffset = i + 1; - splitLength = 0; - } else { - splitLength++; - } - } - String str = Bytes.toString(bytes, splitOffset, splitLength); - splitStrings.add(str); - return splitStrings; - } - -}
