http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index f44a84b..b375d26 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -18,24 +18,15 @@ package org.apache.hadoop.hive.serde2.lazy.fast; -import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.charset.CharacterCodingException; import java.sql.Date; -import java.sql.Timestamp; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyBinary; import org.apache.hadoop.hive.serde2.lazy.LazyByte; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; @@ -43,11 +34,8 @@ import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.TimestampParser; @@ -66,17 +54,17 @@ import org.apache.hive.common.util.TimestampParser; * other type specific buffers. So, those references are only valid until the next time set is * called. */ -public final class LazySimpleDeserializeRead implements DeserializeRead { +public final class LazySimpleDeserializeRead extends DeserializeRead { public static final Logger LOG = LoggerFactory.getLogger(LazySimpleDeserializeRead.class.getName()); - private TypeInfo[] typeInfos; - + private int[] startPosition; private byte separator; private boolean isEscaped; private byte escapeChar; private byte[] nullSequenceBytes; private boolean isExtendedBooleanLiteral; + private boolean lastColumnTakesRest; private byte[] bytes; private int start; @@ -87,34 +75,18 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { private int fieldStart; private int fieldLength; - private boolean saveBool; - private byte saveByte; - private short saveShort; - private int saveInt; - private long saveLong; - private float saveFloat; - private double saveDouble; - private byte[] saveBytes; - private int saveBytesStart; - private int saveBytesLength; - private Date saveDate; - private Timestamp saveTimestamp; - private HiveIntervalYearMonth saveIntervalYearMonth; - private HiveIntervalDayTime saveIntervalDayTime; - private HiveDecimal saveDecimal; - private DecimalTypeInfo saveDecimalTypeInfo; - private Text tempText; private TimestampParser timestampParser; - private boolean readBeyondConfiguredFieldsWarned; - private boolean readBeyondBufferRangeWarned; - private boolean bufferRangeHasExtraDataWarned; + private boolean extraFieldWarned; + private boolean missingFieldWarned; public LazySimpleDeserializeRead(TypeInfo[] typeInfos, byte separator, LazySerDeParameters lazyParams) { + super(typeInfos); - this.typeInfos = typeInfos; + // Field length is difference between positions hence one extra. + startPosition = new int[typeInfos.length + 1]; this.separator = separator; @@ -122,24 +94,21 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { escapeChar = lazyParams.getEscapeChar(); nullSequenceBytes = lazyParams.getNullSequence().getBytes(); isExtendedBooleanLiteral = lazyParams.isExtendedBooleanLiteral(); + lastColumnTakesRest = lazyParams.isLastColumnTakesRest(); fieldCount = typeInfos.length; tempText = new Text(); - readBeyondConfiguredFieldsWarned = false; - readBeyondBufferRangeWarned = false; - bufferRangeHasExtraDataWarned = false; + extraFieldWarned = false; + missingFieldWarned = false; } - // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. - private LazySimpleDeserializeRead() { + public LazySimpleDeserializeRead(TypeInfo[] typeInfos, LazySerDeParameters lazyParams) { + this(typeInfos, lazyParams.getSeparators()[0], lazyParams); } - /* - * The type information for all fields. - */ - @Override - public TypeInfo[] typeInfos() { - return typeInfos; + // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. + private LazySimpleDeserializeRead() { + super(); } /* @@ -154,6 +123,64 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { fieldIndex = -1; } + /** + * Parse the byte[] and fill each field. + * + * This is an adapted version of the parse method in the LazyStruct class. + * They should parse things the same way. + */ + private void parse() { + + int structByteEnd = end; + int fieldId = 0; + int fieldByteBegin = start; + int fieldByteEnd = start; + + // Go through all bytes in the byte[] + while (fieldByteEnd <= structByteEnd) { + if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) { + // Reached the end of a field? + if (lastColumnTakesRest && fieldId == fieldCount - 1) { + fieldByteEnd = structByteEnd; + } + startPosition[fieldId] = fieldByteBegin; + fieldId++; + if (fieldId == fieldCount || fieldByteEnd == structByteEnd) { + // All fields have been parsed, or bytes have been parsed. + // We need to set the startPosition of fields.length to ensure we + // can use the same formula to calculate the length of each field. + // For missing fields, their starting positions will all be the same, + // which will make their lengths to be -1 and uncheckedGetField will + // return these fields as NULLs. + for (int i = fieldId; i <= fieldCount; i++) { + startPosition[i] = fieldByteEnd + 1; + } + break; + } + fieldByteBegin = fieldByteEnd + 1; + fieldByteEnd++; + } else { + if (isEscaped && bytes[fieldByteEnd] == escapeChar + && fieldByteEnd + 1 < structByteEnd) { + // ignore the char after escape_char + fieldByteEnd += 2; + } else { + fieldByteEnd++; + } + } + } + + // Extra bytes at the end? + if (!extraFieldWarned && fieldByteEnd < structByteEnd) { + doExtraFieldWarned(); + } + + // Missing fields? + if (!missingFieldWarned && fieldId < fieldCount) { + doMissingFieldWarned(fieldId); + } + } + /* * Reads the NULL information for a field. * @@ -162,57 +189,24 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { */ @Override public boolean readCheckNull() { - if (++fieldIndex >= fieldCount) { - // Reading beyond the specified field count produces NULL. - if (!readBeyondConfiguredFieldsWarned) { - // Warn only once. - LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " - + " reading more (NULLs returned). Ignoring similar problems."); - readBeyondConfiguredFieldsWarned = true; - } + if (fieldIndex == -1) { + parse(); + fieldIndex = 0; + } else if (fieldIndex + 1 >= fieldCount) { return true; + } else { + fieldIndex++; } - if (offset > end) { - // We must allow for an empty field at the end, so no strict >= checking. - if (!readBeyondBufferRangeWarned) { - // Warn only once. - int length = end - start; - LOG.info("Reading beyond buffer range! Buffer range " + start - + " for length " + length + " but reading more (NULLs returned)." - + " Ignoring similar problems."); - readBeyondBufferRangeWarned = true; - } - - // char[] charsBuffer = new char[end - start]; - // for (int c = 0; c < charsBuffer.length; c++) { - // charsBuffer[c] = (char) (bytes[start + c] & 0xFF); - // } + // Do we want this field? + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { return true; } - fieldStart = offset; - while (true) { - if (offset >= end) { - fieldLength = offset - fieldStart; - break; - } - if (bytes[offset] == separator) { - fieldLength = (offset++ - fieldStart); - break; - } - if (isEscaped && bytes[offset] == escapeChar - && offset + 1 < end) { - // Ignore the char after escape char. - offset += 2; - } else { - offset++; - } - } - - char[] charField = new char[fieldLength]; - for (int c = 0; c < charField.length; c++) { - charField[c] = (char) (bytes[fieldStart + c] & 0xFF); + fieldStart = startPosition[fieldIndex]; + fieldLength = startPosition[fieldIndex + 1] - startPosition[fieldIndex] - 1; + if (fieldLength < 0) { + return true; } // Is the field the configured string representing NULL? @@ -231,7 +225,10 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { } } - switch (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory()) { + /* + * We have a field and are positioned to it. Read it. + */ + switch (primitiveCategories[fieldIndex]) { case BOOLEAN: { int i = fieldStart; @@ -240,7 +237,7 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { (bytes[i + 1] == 'R' || bytes[i + 1] == 'r') && (bytes[i + 2] == 'U' || bytes[i + 1] == 'u') && (bytes[i + 3] == 'E' || bytes[i + 3] == 'e')) { - saveBool = true; + currentBoolean = true; } else { // No boolean value match for 5 char field. return true; @@ -251,7 +248,7 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { (bytes[i + 2] == 'L' || bytes[i + 2] == 'l') && (bytes[i + 3] == 'S' || bytes[i + 3] == 's') && (bytes[i + 4] == 'E' || bytes[i + 4] == 'e')) { - saveBool = false; + currentBoolean = false; } else { // No boolean value match for 4 char field. return true; @@ -259,9 +256,9 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { } else if (isExtendedBooleanLiteral && fieldLength == 1) { byte b = bytes[fieldStart]; if (b == '1' || b == 't' || b == 'T') { - saveBool = true; + currentBoolean = true; } else if (b == '0' || b == 'f' || b == 'F') { - saveBool = false; + currentBoolean = false; } else { // No boolean value match for extended 1 char field. return true; @@ -274,66 +271,42 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { break; case BYTE: try { - saveByte = LazyByte.parseByte(bytes, fieldStart, fieldLength, 10); + currentByte = LazyByte.parseByte(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { logExceptionMessage(bytes, fieldStart, fieldLength, "TINYINT"); return true; } -// if (!parseLongFast()) { -// return true; -// } -// saveShort = (short) saveLong; -// if (saveShort != saveLong) { -// return true; -// } break; case SHORT: try { - saveShort = LazyShort.parseShort(bytes, fieldStart, fieldLength, 10); + currentShort = LazyShort.parseShort(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { logExceptionMessage(bytes, fieldStart, fieldLength, "SMALLINT"); return true; } -// if (!parseLongFast()) { -// return true; -// } -// saveShort = (short) saveLong; -// if (saveShort != saveLong) { -// return true; -// } break; case INT: try { - saveInt = LazyInteger.parseInt(bytes, fieldStart, fieldLength, 10); + currentInt = LazyInteger.parseInt(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { logExceptionMessage(bytes, fieldStart, fieldLength, "INT"); return true; } -// if (!parseLongFast()) { -// return true; -// } -// saveInt = (int) saveLong; -// if (saveInt != saveLong) { -// return true; -// } break; case LONG: try { - saveLong = LazyLong.parseLong(bytes, fieldStart, fieldLength, 10); + currentLong = LazyLong.parseLong(bytes, fieldStart, fieldLength, 10); } catch (NumberFormatException e) { logExceptionMessage(bytes, fieldStart, fieldLength, "BIGINT"); return true; } -// if (!parseLongFast()) { -// return true; -// } break; case FLOAT: { String byteData = null; try { byteData = Text.decode(bytes, fieldStart, fieldLength); - saveFloat = Float.parseFloat(byteData); + currentFloat = Float.parseFloat(byteData); } catch (NumberFormatException e) { LOG.debug("Data not in the Float data type range so converted to null. Given data is :" + byteData, e); @@ -343,16 +316,13 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { return true; } } -// if (!parseFloat()) { -// return true; -// } break; case DOUBLE: { String byteData = null; try { byteData = Text.decode(bytes, fieldStart, fieldLength); - saveDouble = Double.parseDouble(byteData); + currentDouble = Double.parseDouble(byteData); } catch (NumberFormatException e) { LOG.debug("Data not in the Double data type range so converted to null. Given data is :" + byteData, e); @@ -362,9 +332,6 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { return true; } } -// if (!parseDouble()) { -// return true; -// } break; case STRING: @@ -372,14 +339,14 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { case VARCHAR: if (isEscaped) { LazyUtils.copyAndEscapeStringDataToText(bytes, fieldStart, fieldLength, escapeChar, tempText); - saveBytes = tempText.getBytes(); - saveBytesStart = 0; - saveBytesLength = tempText.getLength(); + currentBytes = tempText.getBytes(); + currentBytesStart = 0; + currentBytesLength = tempText.getLength(); } else { // if the data is not escaped, simply copy the data. - saveBytes = bytes; - saveBytesStart = fieldStart; - saveBytesLength = fieldLength; + currentBytes = bytes; + currentBytesStart = fieldStart; + currentBytesLength = fieldLength; } break; case BINARY: @@ -389,9 +356,9 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { byte[] decoded = LazyBinary.decodeIfNeeded(recv); // use the original bytes in case decoding should fail decoded = decoded.length > 0 ? decoded : recv; - saveBytes = decoded; - saveBytesStart = 0; - saveBytesLength = decoded.length; + currentBytes = decoded; + currentBytesStart = 0; + currentBytesLength = decoded.length; } break; case DATE: @@ -399,15 +366,12 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); - saveDate = Date.valueOf(s); + currentDateWritable.set(Date.valueOf(s)); } catch (Exception e) { logExceptionMessage(bytes, fieldStart, fieldLength, "DATE"); return true; } } -// if (!parseDate()) { -// return true; -// } break; case TIMESTAMP: { @@ -427,46 +391,37 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { if (timestampParser == null) { timestampParser = new TimestampParser(); } - saveTimestamp = timestampParser.parseTimestamp(s); + currentTimestampWritable.set(timestampParser.parseTimestamp(s)); } catch (IllegalArgumentException e) { logExceptionMessage(bytes, fieldStart, fieldLength, "TIMESTAMP"); return true; } } } -// if (!parseTimestamp()) { -// return true; -// } break; case INTERVAL_YEAR_MONTH: { String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); - saveIntervalYearMonth = HiveIntervalYearMonth.valueOf(s); + currentHiveIntervalYearMonthWritable.set(HiveIntervalYearMonth.valueOf(s)); } catch (Exception e) { logExceptionMessage(bytes, fieldStart, fieldLength, "INTERVAL_YEAR_MONTH"); return true; } } -// if (!parseIntervalYearMonth()) { -// return true; -// } break; case INTERVAL_DAY_TIME: { String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); - saveIntervalDayTime = HiveIntervalDayTime.valueOf(s); + currentHiveIntervalDayTimeWritable.set(HiveIntervalDayTime.valueOf(s)); } catch (Exception e) { logExceptionMessage(bytes, fieldStart, fieldLength, "INTERVAL_DAY_TIME"); return true; } } -// if (!parseIntervalDayTime()) { -// return true; -// } break; case DECIMAL: { @@ -478,25 +433,23 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { return true; } - saveDecimal = HiveDecimal.create(byteData); - saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; - int precision = saveDecimalTypeInfo.getPrecision(); - int scale = saveDecimalTypeInfo.getScale(); - saveDecimal = HiveDecimal.enforcePrecisionScale(saveDecimal, precision, - scale); - if (saveDecimal == null) { + HiveDecimal decimal = HiveDecimal.create(byteData); + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + decimal = HiveDecimal.enforcePrecisionScale( + decimal, precision, scale); + if (decimal == null) { LOG.debug("Data not in the HiveDecimal data type range so converted to null. Given data is :" + byteData); return true; } + currentHiveDecimalWritable.set(decimal); } -// if (!parseDecimal()) { -// return true; -// } break; default: - throw new Error("Unexpected primitive category " + ((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory()); + throw new Error("Unexpected primitive category " + primitiveCategories[fieldIndex].name()); } return false; @@ -520,17 +473,7 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { */ @Override public void extraFieldsCheck() { - if (offset < end) { - // We did not consume all of the byte range. - if (!bufferRangeHasExtraDataWarned) { - // Warn only once. - int length = end - start; - LOG.info("Not all fields were read in the buffer range! Buffer range " + start - + " for length " + length + " but reading more (NULLs returned)." - + " Ignoring similar problems."); - bufferRangeHasExtraDataWarned = true; - } - } + // UNDONE: Get rid of... } /* @@ -538,407 +481,30 @@ public final class LazySimpleDeserializeRead implements DeserializeRead { */ @Override public boolean readBeyondConfiguredFieldsWarned() { - return readBeyondConfiguredFieldsWarned; + return missingFieldWarned; } @Override public boolean readBeyondBufferRangeWarned() { - return readBeyondBufferRangeWarned; + return extraFieldWarned; } @Override public boolean bufferRangeHasExtraDataWarned() { - return bufferRangeHasExtraDataWarned; - } - - /* - * BOOLEAN. - */ - @Override - public boolean readBoolean() { - return saveBool; - } - - /* - * BYTE. - */ - @Override - public byte readByte() { - return saveByte; - } - - /* - * SHORT. - */ - @Override - public short readShort() { - return saveShort; - } - - /* - * INT. - */ - @Override - public int readInt() { - return saveInt; - } - - /* - * LONG. - */ - @Override - public long readLong() { - return saveLong; - } - - /* - * FLOAT. - */ - @Override - public float readFloat() { - return saveFloat; - } - - /* - * DOUBLE. - */ - @Override - public double readDouble() { - return saveDouble; - } - - /* - * STRING. - * - * Can be used to read CHAR and VARCHAR when the caller takes responsibility for - * truncation/padding issues. - */ - - // This class is for internal use. - private class LazySimpleReadStringResults extends ReadStringResults { - public LazySimpleReadStringResults() { - super(); - } - } - - // Reading a STRING field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. - @Override - public ReadStringResults createReadStringResults() { - return new LazySimpleReadStringResults(); - } - - @Override - public void readString(ReadStringResults readStringResults) { - readStringResults.bytes = saveBytes; - readStringResults.start = saveBytesStart; - readStringResults.length = saveBytesLength; - } - - /* - * CHAR. - */ - - // This class is for internal use. - private static class LazySimpleReadHiveCharResults extends ReadHiveCharResults { - - // Use our STRING reader. - public LazySimpleReadStringResults readStringResults; - - public LazySimpleReadHiveCharResults() { - super(); - } - - public HiveCharWritable getHiveCharWritable() { - return hiveCharWritable; - } - } - - // Reading a CHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different CHAR field. - @Override - public ReadHiveCharResults createReadHiveCharResults() { - return new LazySimpleReadHiveCharResults(); - } - - @Override - public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { - LazySimpleReadHiveCharResults LazySimpleReadHiveCharResults = (LazySimpleReadHiveCharResults) readHiveCharResults; - - if (!LazySimpleReadHiveCharResults.isInit()) { - LazySimpleReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]); - } - - if (LazySimpleReadHiveCharResults.readStringResults == null) { - LazySimpleReadHiveCharResults.readStringResults = new LazySimpleReadStringResults(); - } - LazySimpleReadStringResults readStringResults = LazySimpleReadHiveCharResults.readStringResults; - - // Read the bytes using our basic method. - readString(readStringResults); - - // Copy the bytes into our Text object, then truncate. - HiveCharWritable hiveCharWritable = LazySimpleReadHiveCharResults.getHiveCharWritable(); - hiveCharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); - hiveCharWritable.enforceMaxLength(LazySimpleReadHiveCharResults.getMaxLength()); - - readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); - readHiveCharResults.start = 0; - readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); - } - - /* - * VARCHAR. - */ - - // This class is for internal use. - private static class LazySimpleReadHiveVarcharResults extends ReadHiveVarcharResults { - - // Use our bytes reader. - public LazySimpleReadStringResults readStringResults; - - public LazySimpleReadHiveVarcharResults() { - super(); - } - - public HiveVarcharWritable getHiveVarcharWritable() { - return hiveVarcharWritable; - } - } - - // Reading a VARCHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different VARCHAR field. - @Override - public ReadHiveVarcharResults createReadHiveVarcharResults() { - return new LazySimpleReadHiveVarcharResults(); - } - - @Override - public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { - LazySimpleReadHiveVarcharResults lazySimpleReadHiveVarvarcharResults = (LazySimpleReadHiveVarcharResults) readHiveVarcharResults; - - if (!lazySimpleReadHiveVarvarcharResults.isInit()) { - lazySimpleReadHiveVarvarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]); - } - - if (lazySimpleReadHiveVarvarcharResults.readStringResults == null) { - lazySimpleReadHiveVarvarcharResults.readStringResults = new LazySimpleReadStringResults(); - } - LazySimpleReadStringResults readStringResults = lazySimpleReadHiveVarvarcharResults.readStringResults; - - // Read the bytes using our basic method. - readString(readStringResults); - - // Copy the bytes into our Text object, then truncate. - HiveVarcharWritable hiveVarcharWritable = lazySimpleReadHiveVarvarcharResults.getHiveVarcharWritable(); - hiveVarcharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); - hiveVarcharWritable.enforceMaxLength(lazySimpleReadHiveVarvarcharResults.getMaxLength()); - - readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); - readHiveVarcharResults.start = 0; - readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); - } - - /* - * BINARY. - */ - - // This class is for internal use. - private class LazySimpleReadBinaryResults extends ReadBinaryResults { - public LazySimpleReadBinaryResults() { - super(); - } - } - - // Reading a BINARY field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. - @Override - public ReadBinaryResults createReadBinaryResults() { - return new LazySimpleReadBinaryResults(); - } - - @Override - public void readBinary(ReadBinaryResults readBinaryResults) { - readBinaryResults.bytes = saveBytes; - readBinaryResults.start = saveBytesStart; - readBinaryResults.length = saveBytesLength; - } - - /* - * DATE. - */ - - // This class is for internal use. - private static class LazySimpleReadDateResults extends ReadDateResults { - - public LazySimpleReadDateResults() { - super(); - } - - public DateWritable getDateWritable() { - return dateWritable; - } - } - - // Reading a DATE field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DATE field. - @Override - public ReadDateResults createReadDateResults() { - return new LazySimpleReadDateResults(); - } - - @Override - public void readDate(ReadDateResults readDateResults) { - LazySimpleReadDateResults lazySimpleReadDateResults = (LazySimpleReadDateResults) readDateResults; - - DateWritable dateWritable = lazySimpleReadDateResults.getDateWritable(); - dateWritable.set(saveDate); - saveDate = null; - } - - - /* - * INTERVAL_YEAR_MONTH. - */ - - // This class is for internal use. - private static class LazySimpleReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { - - public LazySimpleReadIntervalYearMonthResults() { - super(); - } - - public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { - return hiveIntervalYearMonthWritable; - } - } - - // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. - // A separate results object is created by the caller at initialization per different - // INTERVAL_YEAR_MONTH field. - @Override - public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { - return new LazySimpleReadIntervalYearMonthResults(); - } - - @Override - public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) - throws IOException { - LazySimpleReadIntervalYearMonthResults lazySimpleReadIntervalYearMonthResults = - (LazySimpleReadIntervalYearMonthResults) readIntervalYearMonthResults; - - HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = - lazySimpleReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); - hiveIntervalYearMonthWritable.set(saveIntervalYearMonth); - saveIntervalYearMonth = null; - } - - /* - * INTERVAL_DAY_TIME. - */ - - // This class is for internal use. - private static class LazySimpleReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { - - public LazySimpleReadIntervalDayTimeResults() { - super(); - } - - public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { - return hiveIntervalDayTimeWritable; - } + return false; // UNDONE: Get rid of... } - // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. - // A separate results object is created by the caller at initialization per different - // INTERVAL_DAY_TIME field. - @Override - public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { - return new LazySimpleReadIntervalDayTimeResults(); + private void doExtraFieldWarned() { + extraFieldWarned = true; + LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + + "problems."); } - @Override - public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) - throws IOException { - LazySimpleReadIntervalDayTimeResults lazySimpleReadIntervalDayTimeResults = - (LazySimpleReadIntervalDayTimeResults) readIntervalDayTimeResults; - - HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = - lazySimpleReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); - hiveIntervalDayTimeWritable.set(saveIntervalDayTime); - saveIntervalDayTime = null; + private void doMissingFieldWarned(int fieldId) { + missingFieldWarned = true; + LOG.info("Missing fields! Expected " + fieldCount + " fields but " + + "only got " + fieldId + "! Ignoring similar problems."); } - /* - * TIMESTAMP. - */ - - // This class is for internal use. - private static class LazySimpleReadTimestampResults extends ReadTimestampResults { - - public LazySimpleReadTimestampResults() { - super(); - } - - public TimestampWritable getTimestampWritable() { - return timestampWritable; - } - } - - // Reading a TIMESTAMP field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different TIMESTAMP field. - @Override - public ReadTimestampResults createReadTimestampResults() { - return new LazySimpleReadTimestampResults(); - } - - @Override - public void readTimestamp(ReadTimestampResults readTimestampResults) { - LazySimpleReadTimestampResults lazySimpleReadTimestampResults = - (LazySimpleReadTimestampResults) readTimestampResults; - - TimestampWritable timestampWritable = lazySimpleReadTimestampResults.getTimestampWritable(); - timestampWritable.set(saveTimestamp); - saveTimestamp = null; - } - - /* - * DECIMAL. - */ - - // This class is for internal use. - private static class LazySimpleReadDecimalResults extends ReadDecimalResults { - - HiveDecimal hiveDecimal; - - public LazySimpleReadDecimalResults() { - super(); - } - - @Override - public HiveDecimal getHiveDecimal() { - return hiveDecimal; - } - } - - // Reading a DECIMAL field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DECIMAL field. - @Override - public ReadDecimalResults createReadDecimalResults() { - return new LazySimpleReadDecimalResults(); - } - - @Override - public void readHiveDecimal(ReadDecimalResults readDecimalResults) { - LazySimpleReadDecimalResults lazySimpleReadDecimalResults = (LazySimpleReadDecimalResults) readDecimalResults; - - if (!lazySimpleReadDecimalResults.isInit()) { - lazySimpleReadDecimalResults.init(saveDecimalTypeInfo); - } - - lazySimpleReadDecimalResults.hiveDecimal = saveDecimal; - - saveDecimal = null; - saveDecimalTypeInfo = null; - } + //------------------------------------------------------------------------------------------------ private static byte[] maxLongBytes = ((Long) Long.MAX_VALUE).toString().getBytes(); private static int maxLongDigitsCount = maxLongBytes.length;
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index c5f0730..4415431 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -20,28 +20,17 @@ package org.apache.hadoop.hive.serde2.lazybinary.fast; import java.io.EOFException; import java.io.IOException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; /* * Directly deserialize with the caller reading field-by-field the LazyBinary serialization format. @@ -57,11 +46,9 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; * other type specific buffers. So, those references are only valid until the next time set is * called. */ -public final class LazyBinaryDeserializeRead implements DeserializeRead { +public final class LazyBinaryDeserializeRead extends DeserializeRead { public static final Logger LOG = LoggerFactory.getLogger(LazyBinaryDeserializeRead.class.getName()); - private TypeInfo[] typeInfos; - private byte[] bytes; private int start; private int offset; @@ -70,20 +57,16 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { private int fieldIndex; private byte nullByte; - private DecimalTypeInfo saveDecimalTypeInfo; - private HiveDecimal saveDecimal; - // Object to receive results of reading a decoded variable length int or long. private VInt tempVInt; private VLong tempVLong; - private HiveDecimalWritable tempHiveDecimalWritable; private boolean readBeyondConfiguredFieldsWarned; private boolean readBeyondBufferRangeWarned; private boolean bufferRangeHasExtraDataWarned; public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) { - this.typeInfos = typeInfos; + super(typeInfos); fieldCount = typeInfos.length; tempVInt = new VInt(); tempVLong = new VLong(); @@ -94,13 +77,7 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { // Not public since we must have the field count so every 8 fields NULL bytes can be navigated. private LazyBinaryDeserializeRead() { - } - - /* - * The type information for all fields. - */ - public TypeInfo[] typeInfos() { - return typeInfos; + super(); } /* @@ -144,7 +121,11 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { } // NOTE: The bit is set to 1 if a field is NOT NULL. - if ((nullByte & (1 << (fieldIndex % 8))) != 0) { + boolean isNull; + if ((nullByte & (1 << (fieldIndex % 8))) == 0) { + isNull = true; + } else { + isNull = false; // Assume. // Make sure there is at least one byte that can be read for a value. if (offset >= end) { @@ -153,18 +134,192 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { warnBeyondEof(); } - // We have a field and are positioned to it. + /* + * We have a field and are positioned to it. Read it. + */ + switch (primitiveCategories[fieldIndex]) { + case BOOLEAN: + // No check needed for single byte read. + currentBoolean = (bytes[offset++] != 0); + break; + case BYTE: + // No check needed for single byte read. + currentByte = bytes[offset++]; + break; + case SHORT: + // Last item -- ok to be at end. + if (offset + 2 > end) { + warnBeyondEof(); + } + currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset); + offset += 2; + break; + case INT: + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + currentInt = tempVInt.value; + break; + case LONG: + LazyBinaryUtils.readVLong(bytes, offset, tempVLong); + offset += tempVLong.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + currentLong = tempVLong.value; + break; + case FLOAT: + // Last item -- ok to be at end. + if (offset + 4 > end) { + warnBeyondEof(); + } + currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset)); + offset += 4; + break; + case DOUBLE: + // Last item -- ok to be at end. + if (offset + 8 > end) { + warnBeyondEof(); + } + currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset)); + offset += 8; + break; + + case BINARY: + case STRING: + case CHAR: + case VARCHAR: + { + // using vint instead of 4 bytes + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Could be last item for empty string -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + int saveStart = offset; + int length = tempVInt.value; + offset += length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + currentBytes = bytes; + currentBytesStart = saveStart; + currentBytesLength = length; + } + break; + case DATE: + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + currentDateWritable.set(tempVInt.value); + break; + case TIMESTAMP: + { + int length = TimestampWritable.getTotalLength(bytes, offset); + int saveStart = offset; + offset += length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + currentTimestampWritable.set(bytes, saveStart); + } + break; + case INTERVAL_YEAR_MONTH: + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + currentHiveIntervalYearMonthWritable.set(tempVInt.value); + break; + case INTERVAL_DAY_TIME: + LazyBinaryUtils.readVLong(bytes, offset, tempVLong); + offset += tempVLong.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + + currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value); + break; + case DECIMAL: + { + // Since enforcing precision and scale can cause a HiveDecimal to become NULL, + // we must read it, enforce it here, and either return NULL or buffer the result. + + // These calls are to see how much data there is. The setFromBytes call below will do the same + // readVInt reads but actually unpack the decimal. + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + int saveStart = offset; + offset += tempVInt.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + LazyBinaryUtils.readVInt(bytes, offset, tempVInt); + offset += tempVInt.length; + if (offset >= end) { + // Overshoot or not enough for next item. + warnBeyondEof(); + } + offset += tempVInt.value; + // Last item -- ok to be at end. + if (offset > end) { + warnBeyondEof(); + } + int length = offset - saveStart; + + LazyBinarySerDe.setFromBytes(bytes, saveStart, length, + currentHiveDecimalWritable); + + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; + + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + + HiveDecimal decimal = currentHiveDecimalWritable.getHiveDecimal(precision, scale); + if (decimal == null) { + isNull = true; + } else { + // Put value back into writable. + currentHiveDecimalWritable.set(decimal); + } + } + break; - if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { - return false; + default: + throw new Error("Unexpected primitive category " + primitiveCategories[fieldIndex].name()); } - // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read - // it here. - return earlyReadHiveDecimal(); + /* + * Now that we have read through the field -- did we really want it? + */ + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { + isNull = true; + } } - // When NULL, we need to move past this field. + // Logically move past this field. fieldIndex++; // Every 8 fields we read a new NULL byte. @@ -178,7 +333,7 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { } } - return true; + return isNull; } /* @@ -191,7 +346,7 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { // Warn only once. int length = end - start; int remaining = end - offset; - LOG.info("Not all fields were read in the buffer range! Buffer range " + start + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + " for length " + length + " but " + remaining + " bytes remain. " + "(total buffer length " + bytes.length + ")" + " Ignoring similar problems."); @@ -220,726 +375,11 @@ public final class LazyBinaryDeserializeRead implements DeserializeRead { if (!readBeyondBufferRangeWarned) { // Warn only once. int length = end - start; - LOG.info("Reading beyond buffer range! Buffer range " + start + LOG.info("Reading beyond buffer range! Buffer range " + start + " for length " + length + " but reading more... " + "(total buffer length " + bytes.length + ")" + " Ignoring similar problems."); readBeyondBufferRangeWarned = true; } } - - /* - * BOOLEAN. - */ - @Override - public boolean readBoolean() throws IOException { - // No check needed for single byte read. - byte result = bytes[offset++]; - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - return (result != 0); - } - - /* - * BYTE. - */ - @Override - public byte readByte() throws IOException { - // No check needed for single byte read. - byte result = bytes[offset++]; - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - if ((fieldIndex % 8) == 0) { - nullByte = bytes[offset++]; - } - } - - return result; - } - - /* - * SHORT. - */ - @Override - public short readShort() throws IOException { - // Last item -- ok to be at end. - if (offset + 2 > end) { - warnBeyondEof(); - } - short result = LazyBinaryUtils.byteArrayToShort(bytes, offset); - offset += 2; - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - return result; - } - - /* - * INT. - */ - @Override - public int readInt() throws IOException { - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - offset += tempVInt.length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - return tempVInt.value; - } - - /* - * LONG. - */ - @Override - public long readLong() throws IOException { - LazyBinaryUtils.readVLong(bytes, offset, tempVLong); - offset += tempVLong.length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - return tempVLong.value; - } - - /* - * FLOAT. - */ - @Override - public float readFloat() throws IOException { - // Last item -- ok to be at end. - if (offset + 4 > end) { - warnBeyondEof(); - } - float result = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset)); - offset += 4; - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - return result; - } - - /* - * DOUBLE. - */ - @Override - public double readDouble() throws IOException { - // Last item -- ok to be at end. - if (offset + 8 > end) { - warnBeyondEof(); - } - double result = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset)); - offset += 8; - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - return result; - } - - /* - * STRING. - * - * Can be used to read CHAR and VARCHAR when the caller takes responsibility for - * truncation/padding issues. - */ - - // This class is for internal use. - private class LazyBinaryReadStringResults extends ReadStringResults { - public LazyBinaryReadStringResults() { - super(); - } - } - - // Reading a STRING field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. - @Override - public ReadStringResults createReadStringResults() { - return new LazyBinaryReadStringResults(); - } - - @Override - public void readString(ReadStringResults readStringResults) throws IOException { - // using vint instead of 4 bytes - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - offset += tempVInt.length; - // Could be last item for empty string -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - int saveStart = offset; - int length = tempVInt.value; - offset += length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - readStringResults.bytes = bytes; - readStringResults.start = saveStart; - readStringResults.length = length; - } - - /* - * CHAR. - */ - - // This class is for internal use. - private static class LazyBinaryReadHiveCharResults extends ReadHiveCharResults { - - // Use our STRING reader. - public LazyBinaryReadStringResults readStringResults; - - public LazyBinaryReadHiveCharResults() { - super(); - } - - public HiveCharWritable getHiveCharWritable() { - return hiveCharWritable; - } - } - - // Reading a CHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different CHAR field. - @Override - public ReadHiveCharResults createReadHiveCharResults() { - return new LazyBinaryReadHiveCharResults(); - } - - public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { - LazyBinaryReadHiveCharResults lazyBinaryReadHiveCharResults = (LazyBinaryReadHiveCharResults) readHiveCharResults; - - if (!lazyBinaryReadHiveCharResults.isInit()) { - lazyBinaryReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]); - } - - if (lazyBinaryReadHiveCharResults.readStringResults == null) { - lazyBinaryReadHiveCharResults.readStringResults = new LazyBinaryReadStringResults(); - } - LazyBinaryReadStringResults readStringResults = lazyBinaryReadHiveCharResults.readStringResults; - - // Read the bytes using our basic method. - readString(readStringResults); - - // Copy the bytes into our Text object, then truncate. - HiveCharWritable hiveCharWritable = lazyBinaryReadHiveCharResults.getHiveCharWritable(); - hiveCharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); - hiveCharWritable.enforceMaxLength(lazyBinaryReadHiveCharResults.getMaxLength()); - - readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); - readHiveCharResults.start = 0; - readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); - } - - /* - * VARCHAR. - */ - - // This class is for internal use. - private static class LazyBinaryReadHiveVarcharResults extends ReadHiveVarcharResults { - - // Use our STRING reader. - public LazyBinaryReadStringResults readStringResults; - - public LazyBinaryReadHiveVarcharResults() { - super(); - } - - public HiveVarcharWritable getHiveVarcharWritable() { - return hiveVarcharWritable; - } - } - - // Reading a VARCHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different VARCHAR field. - @Override - public ReadHiveVarcharResults createReadHiveVarcharResults() { - return new LazyBinaryReadHiveVarcharResults(); - } - - public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { - LazyBinaryReadHiveVarcharResults lazyBinaryReadHiveVarcharResults = (LazyBinaryReadHiveVarcharResults) readHiveVarcharResults; - - if (!lazyBinaryReadHiveVarcharResults.isInit()) { - lazyBinaryReadHiveVarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]); - } - - if (lazyBinaryReadHiveVarcharResults.readStringResults == null) { - lazyBinaryReadHiveVarcharResults.readStringResults = new LazyBinaryReadStringResults(); - } - LazyBinaryReadStringResults readStringResults = lazyBinaryReadHiveVarcharResults.readStringResults; - - // Read the bytes using our basic method. - readString(readStringResults); - - // Copy the bytes into our Text object, then truncate. - HiveVarcharWritable hiveVarcharWritable = lazyBinaryReadHiveVarcharResults.getHiveVarcharWritable(); - hiveVarcharWritable.getTextValue().set(readStringResults.bytes, readStringResults.start, readStringResults.length); - hiveVarcharWritable.enforceMaxLength(lazyBinaryReadHiveVarcharResults.getMaxLength()); - - readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); - readHiveVarcharResults.start = 0; - readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); - } - - /* - * BINARY. - */ - - // This class is for internal use. - private class LazyBinaryReadBinaryResults extends ReadBinaryResults { - - // Use our STRING reader. - public LazyBinaryReadStringResults readStringResults; - - public LazyBinaryReadBinaryResults() { - super(); - } - } - - // Reading a BINARY field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. - @Override - public ReadBinaryResults createReadBinaryResults() { - return new LazyBinaryReadBinaryResults(); - } - - public void readBinary(ReadBinaryResults readBinaryResults) throws IOException { - LazyBinaryReadBinaryResults lazyBinaryReadBinaryResults = (LazyBinaryReadBinaryResults) readBinaryResults; - - if (lazyBinaryReadBinaryResults.readStringResults == null) { - lazyBinaryReadBinaryResults.readStringResults = new LazyBinaryReadStringResults(); - } - LazyBinaryReadStringResults readStringResults = lazyBinaryReadBinaryResults.readStringResults; - - // Read the bytes using our basic method. - readString(readStringResults); - - readBinaryResults.bytes = readStringResults.bytes; - readBinaryResults.start = readStringResults.start; - readBinaryResults.length = readStringResults.length; - } - - /* - * DATE. - */ - - // This class is for internal use. - private static class LazyBinaryReadDateResults extends ReadDateResults { - - public LazyBinaryReadDateResults() { - super(); - } - - public DateWritable getDateWritable() { - return dateWritable; - } - } - - // Reading a DATE field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DATE field. - @Override - public ReadDateResults createReadDateResults() { - return new LazyBinaryReadDateResults(); - } - - @Override - public void readDate(ReadDateResults readDateResults) throws IOException { - LazyBinaryReadDateResults lazyBinaryReadDateResults = (LazyBinaryReadDateResults) readDateResults; - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - offset += tempVInt.length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - DateWritable dateWritable = lazyBinaryReadDateResults.getDateWritable(); - dateWritable.set(tempVInt.value); - } - - /* - * INTERVAL_YEAR_MONTH. - */ - - // This class is for internal use. - private static class LazyBinaryReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { - - public LazyBinaryReadIntervalYearMonthResults() { - super(); - } - - public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { - return hiveIntervalYearMonthWritable; - } - } - - // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. - // A separate results object is created by the caller at initialization per different - // INTERVAL_YEAR_MONTH field. - @Override - public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { - return new LazyBinaryReadIntervalYearMonthResults(); - } - - @Override - public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) - throws IOException { - LazyBinaryReadIntervalYearMonthResults lazyBinaryReadIntervalYearMonthResults = - (LazyBinaryReadIntervalYearMonthResults) readIntervalYearMonthResults; - - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - offset += tempVInt.length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = - lazyBinaryReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); - hiveIntervalYearMonthWritable.set(tempVInt.value); - } - - /* - * INTERVAL_DAY_TIME. - */ - - // This class is for internal use. - private static class LazyBinaryReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { - - public LazyBinaryReadIntervalDayTimeResults() { - super(); - } - - public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { - return hiveIntervalDayTimeWritable; - } - } - - // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. - // A separate results object is created by the caller at initialization per different - // INTERVAL_DAY_TIME field. - @Override - public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { - return new LazyBinaryReadIntervalDayTimeResults(); - } - - @Override - public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) - throws IOException { - LazyBinaryReadIntervalDayTimeResults lazyBinaryReadIntervalDayTimeResults = - (LazyBinaryReadIntervalDayTimeResults) readIntervalDayTimeResults; - LazyBinaryUtils.readVLong(bytes, offset, tempVLong); - offset += tempVLong.length; - if (offset >= end) { - // Overshoot or not enough for next item. - warnBeyondEof(); - } - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - offset += tempVInt.length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = - lazyBinaryReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); - hiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value); - } - - /* - * TIMESTAMP. - */ - - // This class is for internal use. - private static class LazyBinaryReadTimestampResults extends ReadTimestampResults { - - public LazyBinaryReadTimestampResults() { - super(); - } - - public TimestampWritable getTimestampWritable() { - return timestampWritable; - } - } - - // Reading a TIMESTAMP field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different TIMESTAMP field. - @Override - public ReadTimestampResults createReadTimestampResults() { - return new LazyBinaryReadTimestampResults(); - } - - @Override - public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException { - LazyBinaryReadTimestampResults lazyBinaryReadTimestampResults = (LazyBinaryReadTimestampResults) readTimestampResults; - int length = TimestampWritable.getTotalLength(bytes, offset); - int saveStart = offset; - offset += length; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - - // Move past this NOT NULL field. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - TimestampWritable timestampWritable = lazyBinaryReadTimestampResults.getTimestampWritable(); - timestampWritable.set(bytes, saveStart); - } - - /* - * DECIMAL. - */ - - // This class is for internal use. - private static class LazyBinaryReadDecimalResults extends ReadDecimalResults { - - public HiveDecimal hiveDecimal; - - public void init(DecimalTypeInfo decimalTypeInfo) { - super.init(decimalTypeInfo); - } - - @Override - public HiveDecimal getHiveDecimal() { - return hiveDecimal; - } - } - - // Reading a DECIMAL field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DECIMAL field. - @Override - public ReadDecimalResults createReadDecimalResults() { - return new LazyBinaryReadDecimalResults(); - } - - @Override - public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException { - LazyBinaryReadDecimalResults lazyBinaryReadDecimalResults = (LazyBinaryReadDecimalResults) readDecimalResults; - - if (!lazyBinaryReadDecimalResults.isInit()) { - lazyBinaryReadDecimalResults.init(saveDecimalTypeInfo); - } - - lazyBinaryReadDecimalResults.hiveDecimal = saveDecimal; - - saveDecimal = null; - saveDecimalTypeInfo = null; - } - - /** - * We read the whole HiveDecimal value and then enforce precision and scale, which may - * make it a NULL. - * @return Returns true if this HiveDecimal enforced to a NULL. - */ - private boolean earlyReadHiveDecimal() throws EOFException { - - // Since enforcing precision and scale can cause a HiveDecimal to become NULL, - // we must read it, enforce it here, and either return NULL or buffer the result. - - // These calls are to see how much data there is. The setFromBytes call below will do the same - // readVInt reads but actually unpack the decimal. - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - int saveStart = offset; - offset += tempVInt.length; - if (offset >= end) { - // Overshoot or not enough for next item. - warnBeyondEof(); - } - LazyBinaryUtils.readVInt(bytes, offset, tempVInt); - offset += tempVInt.length; - if (offset >= end) { - // Overshoot or not enough for next item. - warnBeyondEof(); - } - offset += tempVInt.value; - // Last item -- ok to be at end. - if (offset > end) { - warnBeyondEof(); - } - int length = offset - saveStart; - - if (tempHiveDecimalWritable == null) { - tempHiveDecimalWritable = new HiveDecimalWritable(); - } - LazyBinarySerDe.setFromBytes(bytes, saveStart, length, - tempHiveDecimalWritable); - - saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; - - int precision = saveDecimalTypeInfo.getPrecision(); - int scale = saveDecimalTypeInfo.getScale(); - - saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale); - - // Move past this field whether it is NULL or NOT NULL. - fieldIndex++; - - // Every 8 fields we read a new NULL byte. - if (fieldIndex < fieldCount) { - if ((fieldIndex % 8) == 0) { - // Get next null byte. - if (offset >= end) { - warnBeyondEof(); - } - nullByte = bytes[offset++]; - } - } - - // Now return whether it is NULL or NOT NULL. - return (saveDecimal == null); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index fc845a5..e27c6b1 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -32,8 +32,11 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; /** @@ -58,7 +61,7 @@ public class VerifyFast { switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: { - boolean value = deserializeRead.readBoolean(); + boolean value = deserializeRead.currentBoolean; if (!(object instanceof Boolean)) { TestCase.fail("Boolean expected object not Boolean"); } @@ -70,7 +73,7 @@ public class VerifyFast { break; case BYTE: { - byte value = deserializeRead.readByte(); + byte value = deserializeRead.currentByte; if (!(object instanceof Byte)) { TestCase.fail("Byte expected object not Byte"); } @@ -82,7 +85,7 @@ public class VerifyFast { break; case SHORT: { - short value = deserializeRead.readShort(); + short value = deserializeRead.currentShort; if (!(object instanceof Short)) { TestCase.fail("Short expected object not Short"); } @@ -94,7 +97,7 @@ public class VerifyFast { break; case INT: { - int value = deserializeRead.readInt(); + int value = deserializeRead.currentInt; if (!(object instanceof Integer)) { TestCase.fail("Integer expected object not Integer"); } @@ -106,7 +109,7 @@ public class VerifyFast { break; case LONG: { - long value = deserializeRead.readLong(); + long value = deserializeRead.currentLong; if (!(object instanceof Long)) { TestCase.fail("Long expected object not Long"); } @@ -118,7 +121,7 @@ public class VerifyFast { break; case FLOAT: { - float value = deserializeRead.readFloat(); + float value = deserializeRead.currentFloat; Float expected = (Float) object; if (!(object instanceof Float)) { TestCase.fail("Float expected object not Float"); @@ -130,7 +133,7 @@ public class VerifyFast { break; case DOUBLE: { - double value = deserializeRead.readDouble(); + double value = deserializeRead.currentDouble; Double expected = (Double) object; if (!(object instanceof Double)) { TestCase.fail("Double expected object not Double"); @@ -142,9 +145,10 @@ public class VerifyFast { break; case STRING: { - DeserializeRead.ReadStringResults readStringResults = deserializeRead.createReadStringResults(); - deserializeRead.readString(readStringResults); - byte[] stringBytes = Arrays.copyOfRange(readStringResults.bytes, readStringResults.start, readStringResults.start + readStringResults.length); + byte[] stringBytes = Arrays.copyOfRange( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); Text text = new Text(stringBytes); String string = text.toString(); String expected = (String) object; @@ -155,9 +159,15 @@ public class VerifyFast { break; case CHAR: { - DeserializeRead.ReadHiveCharResults readHiveCharResults = deserializeRead.createReadHiveCharResults(); - deserializeRead.readHiveChar(readHiveCharResults); - HiveChar hiveChar = readHiveCharResults.getHiveChar(); + byte[] stringBytes = Arrays.copyOfRange( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); + Text text = new Text(stringBytes); + String string = text.toString(); + + HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength()); + HiveChar expected = (HiveChar) object; if (!hiveChar.equals(expected)) { TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')"); @@ -166,9 +176,15 @@ public class VerifyFast { break; case VARCHAR: { - DeserializeRead.ReadHiveVarcharResults readHiveVarcharResults = deserializeRead.createReadHiveVarcharResults(); - deserializeRead.readHiveVarchar(readHiveVarcharResults); - HiveVarchar hiveVarchar = readHiveVarcharResults.getHiveVarchar(); + byte[] stringBytes = Arrays.copyOfRange( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); + Text text = new Text(stringBytes); + String string = text.toString(); + + HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); + HiveVarchar expected = (HiveVarchar) object; if (!hiveVarchar.equals(expected)) { TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')"); @@ -177,9 +193,7 @@ public class VerifyFast { break; case DECIMAL: { - DeserializeRead.ReadDecimalResults readDecimalResults = deserializeRead.createReadDecimalResults(); - deserializeRead.readHiveDecimal(readDecimalResults); - HiveDecimal value = readDecimalResults.getHiveDecimal(); + HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal(); if (value == null) { TestCase.fail("Decimal field evaluated to NULL"); } @@ -194,9 +208,7 @@ public class VerifyFast { break; case DATE: { - DeserializeRead.ReadDateResults readDateResults = deserializeRead.createReadDateResults(); - deserializeRead.readDate(readDateResults); - Date value = readDateResults.getDate(); + Date value = deserializeRead.currentDateWritable.get(); Date expected = (Date) object; if (!value.equals(expected)) { TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); @@ -205,9 +217,7 @@ public class VerifyFast { break; case TIMESTAMP: { - DeserializeRead.ReadTimestampResults readTimestampResults = deserializeRead.createReadTimestampResults(); - deserializeRead.readTimestamp(readTimestampResults); - Timestamp value = readTimestampResults.getTimestamp(); + Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp(); Timestamp expected = (Timestamp) object; if (!value.equals(expected)) { TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); @@ -216,9 +226,7 @@ public class VerifyFast { break; case INTERVAL_YEAR_MONTH: { - DeserializeRead.ReadIntervalYearMonthResults readIntervalYearMonthResults = deserializeRead.createReadIntervalYearMonthResults(); - deserializeRead.readIntervalYearMonth(readIntervalYearMonthResults); - HiveIntervalYearMonth value = readIntervalYearMonthResults.getHiveIntervalYearMonth(); + HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); HiveIntervalYearMonth expected = (HiveIntervalYearMonth) object; if (!value.equals(expected)) { TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); @@ -227,9 +235,7 @@ public class VerifyFast { break; case INTERVAL_DAY_TIME: { - DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults = deserializeRead.createReadIntervalDayTimeResults(); - deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); - HiveIntervalDayTime value = readIntervalDayTimeResults.getHiveIntervalDayTime(); + HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime(); HiveIntervalDayTime expected = (HiveIntervalDayTime) object; if (!value.equals(expected)) { TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); @@ -238,9 +244,10 @@ public class VerifyFast { break; case BINARY: { - DeserializeRead.ReadBinaryResults readBinaryResults = deserializeRead.createReadBinaryResults(); - deserializeRead.readBinary(readBinaryResults); - byte[] byteArray = Arrays.copyOfRange(readBinaryResults.bytes, readBinaryResults.start, readBinaryResults.start + readBinaryResults.length); + byte[] byteArray = Arrays.copyOfRange( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); byte[] expected = (byte[]) object; if (byteArray.length != expected.length){ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected)
