Repository: hive Updated Branches: refs/heads/master fbeee6236 -> 4533d21b0
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index e27c6b1..52dd5a3 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -32,12 +32,27 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * TestBinarySortableSerDe. @@ -45,27 +60,28 @@ import org.apache.hadoop.io.Text; */ public class VerifyFast { - public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException { + public static void verifyDeserializeRead(DeserializeRead deserializeRead, + PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException { boolean isNull; isNull = deserializeRead.readCheckNull(); if (isNull) { - if (object != null) { + if (writable != null) { TestCase.fail("Field reports null but object is not null"); } return; - } else if (object == null) { + } else if (writable == null) { TestCase.fail("Field report not null but object is null"); } switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: { boolean value = deserializeRead.currentBoolean; - if (!(object instanceof Boolean)) { - TestCase.fail("Boolean expected object not Boolean"); + if (!(writable instanceof BooleanWritable)) { + TestCase.fail("Boolean expected writable not Boolean"); } - Boolean expected = (Boolean) object; + boolean expected = ((BooleanWritable) writable).get(); if (value != expected) { TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); } @@ -74,10 +90,10 @@ public class VerifyFast { case BYTE: { byte value = deserializeRead.currentByte; - if (!(object instanceof Byte)) { - TestCase.fail("Byte expected object not Byte"); + if (!(writable instanceof ByteWritable)) { + TestCase.fail("Byte expected writable not Byte"); } - Byte expected = (Byte) object; + byte expected = ((ByteWritable) writable).get(); if (value != expected) { TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); } @@ -86,10 +102,10 @@ public class VerifyFast { case SHORT: { short value = deserializeRead.currentShort; - if (!(object instanceof Short)) { - TestCase.fail("Short expected object not Short"); + if (!(writable instanceof ShortWritable)) { + TestCase.fail("Short expected writable not Short"); } - Short expected = (Short) object; + short expected = ((ShortWritable) writable).get(); if (value != expected) { TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); } @@ -98,10 +114,10 @@ public class VerifyFast { case INT: { int value = deserializeRead.currentInt; - if (!(object instanceof Integer)) { - TestCase.fail("Integer expected object not Integer"); + if (!(writable instanceof IntWritable)) { + TestCase.fail("Integer expected writable not Integer"); } - Integer expected = (Integer) object; + int expected = ((IntWritable) writable).get(); if (value != expected) { TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); } @@ -110,10 +126,10 @@ public class VerifyFast { case LONG: { long value = deserializeRead.currentLong; - if (!(object instanceof Long)) { - TestCase.fail("Long expected object not Long"); + if (!(writable instanceof LongWritable)) { + TestCase.fail("Long expected writable not Long"); } - Long expected = (Long) object; + Long expected = ((LongWritable) writable).get(); if (value != expected) { TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); } @@ -122,10 +138,10 @@ public class VerifyFast { case FLOAT: { float value = deserializeRead.currentFloat; - Float expected = (Float) object; - if (!(object instanceof Float)) { - TestCase.fail("Float expected object not Float"); + if (!(writable instanceof FloatWritable)) { + TestCase.fail("Float expected writable not Float"); } + float expected = ((FloatWritable) writable).get(); if (value != expected) { TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); } @@ -134,10 +150,10 @@ public class VerifyFast { case DOUBLE: { double value = deserializeRead.currentDouble; - Double expected = (Double) object; - if (!(object instanceof Double)) { - TestCase.fail("Double expected object not Double"); + if (!(writable instanceof DoubleWritable)) { + TestCase.fail("Double expected writable not Double"); } + double expected = ((DoubleWritable) writable).get(); if (value != expected) { TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); } @@ -151,7 +167,7 @@ public class VerifyFast { deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); Text text = new Text(stringBytes); String string = text.toString(); - String expected = (String) object; + String expected = ((Text) writable).toString(); if (!string.equals(expected)) { TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')"); } @@ -168,7 +184,7 @@ public class VerifyFast { HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength()); - HiveChar expected = (HiveChar) object; + HiveChar expected = ((HiveCharWritable) writable).getHiveChar(); if (!hiveChar.equals(expected)) { TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')"); } @@ -185,7 +201,7 @@ public class VerifyFast { HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); - HiveVarchar expected = (HiveVarchar) object; + HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar(); if (!hiveVarchar.equals(expected)) { TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')"); } @@ -197,7 +213,7 @@ public class VerifyFast { if (value == null) { TestCase.fail("Decimal field evaluated to NULL"); } - HiveDecimal expected = (HiveDecimal) object; + HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal(); if (!value.equals(expected)) { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; int precision = decimalTypeInfo.getPrecision(); @@ -209,7 +225,7 @@ public class VerifyFast { case DATE: { Date value = deserializeRead.currentDateWritable.get(); - Date expected = (Date) object; + Date expected = ((DateWritable) writable).get(); if (!value.equals(expected)) { TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -218,7 +234,7 @@ public class VerifyFast { case TIMESTAMP: { Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp(); - Timestamp expected = (Timestamp) object; + Timestamp expected = ((TimestampWritable) writable).getTimestamp(); if (!value.equals(expected)) { TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -227,7 +243,7 @@ public class VerifyFast { case INTERVAL_YEAR_MONTH: { HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); - HiveIntervalYearMonth expected = (HiveIntervalYearMonth) object; + HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth(); if (!value.equals(expected)) { TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -236,7 +252,7 @@ public class VerifyFast { case INTERVAL_DAY_TIME: { HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime(); - HiveIntervalDayTime expected = (HiveIntervalDayTime) object; + HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime(); if (!value.equals(expected)) { TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -248,7 +264,8 @@ public class VerifyFast { deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); - byte[] expected = (byte[]) object; + BytesWritable bytesWritable = (BytesWritable) writable; + byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); if (byteArray.length != expected.length){ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")"); @@ -266,57 +283,58 @@ public class VerifyFast { } } - public static void serializeWrite(SerializeWrite serializeWrite, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException { - if (object == null) { + public static void serializeWrite(SerializeWrite serializeWrite, + PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException { + if (writable == null) { serializeWrite.writeNull(); return; } switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: { - boolean value = (Boolean) object; + boolean value = ((BooleanWritable) writable).get(); serializeWrite.writeBoolean(value); } break; case BYTE: { - byte value = (Byte) object; + byte value = ((ByteWritable) writable).get(); serializeWrite.writeByte(value); } break; case SHORT: { - short value = (Short) object; + short value = ((ShortWritable) writable).get(); serializeWrite.writeShort(value); } break; case INT: { - int value = (Integer) object; + int value = ((IntWritable) writable).get(); serializeWrite.writeInt(value); } break; case LONG: { - long value = (Long) object; + long value = ((LongWritable) writable).get(); serializeWrite.writeLong(value); } break; case FLOAT: { - float value = (Float) object; + float value = ((FloatWritable) writable).get(); serializeWrite.writeFloat(value); } break; case DOUBLE: { - double value = (Double) object; + double value = ((DoubleWritable) writable).get(); serializeWrite.writeDouble(value); } break; case STRING: { - String value = (String) object; + Text value = (Text) writable; byte[] stringBytes = value.getBytes(); int stringLength = stringBytes.length; serializeWrite.writeString(stringBytes, 0, stringLength); @@ -324,51 +342,52 @@ public class VerifyFast { break; case CHAR: { - HiveChar value = (HiveChar) object; + HiveChar value = ((HiveCharWritable) writable).getHiveChar(); serializeWrite.writeHiveChar(value); } break; case VARCHAR: { - HiveVarchar value = (HiveVarchar) object; + HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar(); serializeWrite.writeHiveVarchar(value); } break; case DECIMAL: { - HiveDecimal value = (HiveDecimal) object; + HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal(); DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo; serializeWrite.writeHiveDecimal(value, decTypeInfo.scale()); } break; case DATE: { - Date value = (Date) object; + Date value = ((DateWritable) writable).get(); serializeWrite.writeDate(value); } break; case TIMESTAMP: { - Timestamp value = (Timestamp) object; + Timestamp value = ((TimestampWritable) writable).getTimestamp(); serializeWrite.writeTimestamp(value); } break; case INTERVAL_YEAR_MONTH: { - HiveIntervalYearMonth value = (HiveIntervalYearMonth) object; + HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth(); serializeWrite.writeHiveIntervalYearMonth(value); } break; case INTERVAL_DAY_TIME: { - HiveIntervalDayTime value = (HiveIntervalDayTime) object; + HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime(); serializeWrite.writeHiveIntervalDayTime(value); } break; case BINARY: { - byte[] binaryBytes = (byte[]) object; - int length = binaryBytes.length; + BytesWritable byteWritable = (BytesWritable) writable; + byte[] binaryBytes = byteWritable.getBytes(); + int length = byteWritable.getLength(); serializeWrite.writeBinary(binaryBytes, 0, length); } break; http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java index 14fc38e..1349f74 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java @@ -23,6 +23,8 @@ import java.util.ArrayList; import java.util.List; import java.util.Random; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -30,6 +32,28 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +import com.sun.jdi.PrimitiveType; public class MyTestClass { @@ -202,4 +226,66 @@ public class MyTestClass { static Object[] nrIntervalDayTime = { HiveIntervalDayTime.valueOf("1 0:0:0") }; + + public static void nonRandomRowFill(Object[][] rows, PrimitiveCategory[] primitiveCategories) { + int minCount = Math.min(rows.length, nrDecimal.length); + for (int i = 0; i < minCount; i++) { + Object[] row = rows[i]; + for (int c = 0; c < primitiveCategories.length; c++) { + Object object = row[c]; // Current value. + switch (primitiveCategories[c]) { + case BOOLEAN: + // Use current for now. + break; + case BYTE: + object = nrByte; + break; + case SHORT: + object = nrShort; + break; + case INT: + object = nrInt; + break; + case LONG: + object = nrLong; + break; + case DATE: + object = nrDate; + break; + case FLOAT: + object = nrFloat; + break; + case DOUBLE: + object = nrDouble; + break; + case STRING: + object = nrString; + break; + case CHAR: + // Use current for now. + break; + case VARCHAR: + // Use current for now. + break; + case BINARY: + // Use current for now. + break; + case TIMESTAMP: + // Use current for now. + break; + case INTERVAL_YEAR_MONTH: + object = nrIntervalYearMonth; + break; + case INTERVAL_DAY_TIME: + object = nrIntervalDayTime; + break; + case DECIMAL: + object = nrDecimal[i]; + break; + default: + throw new Error("Unknown primitive category " + primitiveCategories[c]); + } + } + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java index 0be3213..58937db 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java @@ -17,53 +17,72 @@ */ package org.apache.hadoop.hive.serde2.binarysortable; +import java.io.EOFException; import java.util.Arrays; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Random; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.VerifyFast; -import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; import junit.framework.TestCase; public class TestBinarySortableFast extends TestCase { - private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, + private void testBinarySortableFast( + RandomRowObjectSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, - SerDe serde, StructObjectInspector rowOI, boolean ascending, - Map<Object, PrimitiveTypeInfo[]> primitiveTypeInfoMap) throws Throwable { + SerDe serde, StructObjectInspector rowOI, + SerDe serde_fewer, StructObjectInspector writeRowOI, + boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, + boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { + + int rowCount = rows.length; + int columnCount = primitiveTypeInfos.length; + + boolean[] columnsToInclude = null; + if (useIncludeColumns) { + columnsToInclude = new boolean[columnCount]; + for (int i = 0; i < columnCount; i++) { + columnsToInclude[i] = r.nextBoolean(); + } + } + + int writeColumnCount = columnCount; + if (doWriteFewerColumns) { + writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); + } BinarySortableSerializeWrite binarySortableSerializeWrite = - new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); + new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize // One Writable per row. - BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - - int[][] perFieldWriteLengthsArray = new int[myTestPrimitiveClasses.length][]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; + + int[][] perFieldWriteLengthsArray = new int[rowCount][]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; Output output = new Output(); binarySortableSerializeWrite.set(output); - int[] perFieldWriteLengths = new int[MyTestPrimitiveClass.primitiveCount]; - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfoMap.get(t)[index], object); + int[] perFieldWriteLengths = new int[columnCount]; + for (int index = 0; index < writeColumnCount; index++) { + + Writable writable = (Writable) row[index]; + + VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable); perFieldWriteLengths[index] = output.getLength(); } perFieldWriteLengthsArray[i] = perFieldWriteLengths; @@ -90,34 +109,87 @@ public class TestBinarySortableFast extends TestCase { // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); - BinarySortableDeserializeRead binarySortableDeserializeRead = + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + if (useIncludeColumns) { + binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude); + } + BytesWritable bytesWritable = serializeWriteBytes[i]; - binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + binarySortableDeserializeRead.set( + bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable); + } } binarySortableDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); + + /* + * Clip off one byte and expect to get an EOFException on the write field. + */ + BinarySortableDeserializeRead binarySortableDeserializeRead2 = + new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + + if (useIncludeColumns) { + binarySortableDeserializeRead2.setColumnsToInclude(columnsToInclude); + } + + binarySortableDeserializeRead2.set( + bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte. + + for (int index = 0; index < writeColumnCount; index++) { + Writable writable = (Writable) row[index]; + if (index == writeColumnCount - 1) { + boolean threw = false; + try { + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); + } catch (EOFException e) { + threw = true; + } + TestCase.assertTrue(threw); + } else { + if (useIncludeColumns && !columnsToInclude[index]) { + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], null); + } else { + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); + } + } + } + } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; - List<Object> deserializedRow = (List<Object>) serde.deserialize(bytesWritable); - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + // Note that regular SerDe doesn't tolerate fewer columns. + List<Object> deserializedRow; + if (doWriteFewerColumns) { + deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable); + } else { + deserializedRow = (List<Object>) serde.deserialize(bytesWritable); + } + + Object[] row = rows[i]; + for (int index = 0; index < writeColumnCount; index++) { + Object expected = row[index]; Object object = deserializedRow.get(index); if (expected == null || object == null) { if (expected != null || object != null) { @@ -132,14 +204,19 @@ public class TestBinarySortableFast extends TestCase { } // One Writable per row. - BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - + BytesWritable serdeBytes[] = new BytesWritable[rowCount]; + // Serialize using the SerDe, then below deserialize using DeserializeRead. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; // Since SerDe reuses memory, we will need to make a copy. - BytesWritable serialized = (BytesWritable) serde.serialize(t, rowOI); + BytesWritable serialized; + if (doWriteFewerColumns) { + serialized = (BytesWritable) serde_fewer.serialize(row, rowOI); + } else { + serialized = (BytesWritable) serde.serialize(row, rowOI);; + } BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(serialized); byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); @@ -167,85 +244,188 @@ public class TestBinarySortableFast extends TestCase { } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); - BinarySortableDeserializeRead binarySortableDeserializeRead = + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + if (useIncludeColumns) { + binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude); + } + BytesWritable bytesWritable = serdeBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable); + } } binarySortableDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); } } + private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r) + throws Throwable { + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + int rowCount = 1000; + Object[][] rows = source.randomRows(rowCount); + + if (doNonRandomFill) { + MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); + } + + // We need to operate on sorted data to fully test BinarySortable. + source.sort(rows); + + StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); + + PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + int columnCount = primitiveTypeInfos.length; + + int writeColumnCount = columnCount; + StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; + boolean doWriteFewerColumns = r.nextBoolean(); + if (doWriteFewerColumns) { + writeColumnCount = 1 + r.nextInt(columnCount); + if (writeColumnCount == columnCount) { + doWriteFewerColumns = false; + } else { + writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); + } + } + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); + String order; + order = StringUtils.leftPad("", columnCount, '+'); + String nullOrder; + nullOrder = StringUtils.leftPad("", columnCount, 'a'); + SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); + + SerDe serde_ascending_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder); + } + + order = StringUtils.leftPad("", columnCount, '-'); + nullOrder = StringUtils.leftPad("", columnCount, 'z'); + SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); + + SerDe serde_descending_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder); + } + + boolean[] columnSortOrderIsDesc = new boolean[columnCount]; + Arrays.fill(columnSortOrderIsDesc, false); + byte[] columnNullMarker = new byte[columnCount]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + byte[] columnNotNullMarker = new byte[columnCount]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); + + /* + * Acending. + */ + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); + + if (doWriteFewerColumns) { + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + } + + /* + * Descending. + */ + Arrays.fill(columnSortOrderIsDesc, true); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); + + if (doWriteFewerColumns) { + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_descending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_descending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + } + + } + public void testBinarySortableFast() throws Throwable { + try { + Random r = new Random(35790); + + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testBinarySortableFastCase(caseNum, (i % 2 == 0), r); + caseNum++; + } - int num = 1000; - Random r = new Random(1234); - MyTestPrimitiveClass myTestPrimitiveClasses[] = new MyTestPrimitiveClass[num]; - // Need a map because we sort. - Map<Object, PrimitiveTypeInfo[]> primitiveTypeInfoMap = new HashMap<Object, PrimitiveTypeInfo[]>(); - - int i; - // First try non-random values - for (i = 0; i < MyTestClass.nrDecimal.length; i++) { - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.nonRandomFill(i, extraTypeInfo); - myTestPrimitiveClasses[i] = t; - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - primitiveTypeInfoMap.put(t, primitiveTypeInfos); - } - - for ( ; i < num; i++) { - int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - int field = 0; - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.randomFill(r, randField, field, extraTypeInfo); - myTestPrimitiveClasses[i] = t; - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - primitiveTypeInfoMap.put(t, primitiveTypeInfos); - } - - StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory - .getReflectionObjectInspector(MyTestPrimitiveClass.class, - ObjectInspectorOptions.JAVA); - - TestBinarySortableSerDe.sort(myTestPrimitiveClasses, rowOI); - - String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); - String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); - String order; - order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+'); - String nullOrder; - nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'a'); - SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); - order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-'); - nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'z'); - SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); - - boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount]; - Arrays.fill(columnSortOrderIsDesc, false); - byte[] columnNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; - Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); - byte[] columnNotNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; - Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, - columnNotNullMarker, serde_ascending, rowOI, true, primitiveTypeInfoMap); - Arrays.fill(columnSortOrderIsDesc, true); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, - columnNotNullMarker, serde_descending, rowOI, false, primitiveTypeInfoMap); } catch (Throwable e) { e.printStackTrace(); throw e; http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java index 7ebe7ae..76b93c6 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.EOFException; import java.util.Arrays; import java.util.Properties; import java.util.Random; @@ -24,11 +25,15 @@ import java.util.Random; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -37,31 +42,55 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; import junit.framework.TestCase; public class TestLazySimpleFast extends TestCase { - private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, LazySimpleSerDe[] serdes, - StructObjectInspector[] rowOIs, byte separator, LazySerDeParameters[] serdeParams, - PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + private void testLazySimpleFast( + RandomRowObjectSource source, Object[][] rows, + LazySimpleSerDe serde, StructObjectInspector rowOI, + LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, + byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, + PrimitiveTypeInfo[] primitiveTypeInfos, + boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { + + int rowCount = rows.length; + int columnCount = primitiveTypeInfos.length; + + boolean[] columnsToInclude = null; + if (useIncludeColumns) { + columnsToInclude = new boolean[columnCount]; + for (int i = 0; i < columnCount; i++) { + columnsToInclude[i] = r.nextBoolean(); + } + } + int writeColumnCount = columnCount; + PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos; + if (doWriteFewerColumns) { + writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); + writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount); + } // Try to serialize - BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; Output output = new Output(); - LazySimpleSerializeWrite lazySimpleSerializeWrite = - new LazySimpleSerializeWrite(MyTestPrimitiveClass.primitiveCount, - separator, serdeParams[i]); + LazySimpleSerializeWrite lazySimpleSerializeWrite = + new LazySimpleSerializeWrite(columnCount, + separator, serdeParams); lazySimpleSerializeWrite.set(output); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfosArray[i][index], object); + for (int index = 0; index < columnCount; index++) { + + Writable writable = (Writable) row[index]; + + VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable); } BytesWritable bytesWritable = new BytesWritable(); @@ -70,12 +99,15 @@ public class TestLazySimpleFast extends TestCase { } // Try to deserialize - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazySimpleDeserializeRead lazySimpleDeserializeRead = - new LazySimpleDeserializeRead(primitiveTypeInfos, - separator, serdeParams[i]); + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(writePrimitiveTypeInfos, + separator, serdeParams); + + if (useIncludeColumns) { + lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude); + } BytesWritable bytesWritable = serializeWriteBytes[i]; byte[] bytes = bytesWritable.getBytes(); @@ -87,28 +119,37 @@ public class TestLazySimpleFast extends TestCase { chars[c] = (char) (bytes[c] & 0xFF); } - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); - VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable); + } } lazySimpleDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); + } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; - LazyStruct lazySimpleStruct = (LazyStruct) serdes[i].deserialize(bytesWritable); + LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable); - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] row = rows[i]; - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + for (int index = 0; index < columnCount; index++) { PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; - Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + Writable writable = (Writable) row[index]; LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index); Object object; if (lazyPrimitive != null) { @@ -116,12 +157,12 @@ public class TestLazySimpleFast extends TestCase { } else { object = null; } - if (expected == null || object == null) { - if (expected != null || object != null) { + if (writable == null || object == null) { + if (writable != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { - if (!object.equals(expected)) { + if (!object.equals(writable)) { fail("SerDe deserialized value does not match"); } } @@ -129,21 +170,19 @@ public class TestLazySimpleFast extends TestCase { } // One Writable per row. - byte[][] serdeBytes = new byte[myTestPrimitiveClasses.length][]; - + byte[][] serdeBytes = new byte[rowCount][]; + // Serialize using the SerDe, then below deserialize using DeserializeRead. - Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] serdeRow = new Object[columnCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; // LazySimple seems to work better with an row object array instead of a Java object... - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); - row[index] = object; + for (int index = 0; index < columnCount; index++) { + serdeRow[index] = row[index]; } - Text serialized = (Text) serdes[i].serialize(row, rowOIs[i]); + Text serialized = (Text) serde.serialize(serdeRow, rowOI); byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()); byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); @@ -154,23 +193,37 @@ public class TestLazySimpleFast extends TestCase { } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazySimpleDeserializeRead lazySimpleDeserializeRead = - new LazySimpleDeserializeRead(primitiveTypeInfos, - separator, serdeParams[i]); + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(writePrimitiveTypeInfos, + separator, serdeParams); + + if (useIncludeColumns) { + lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude); + } byte[] bytes = serdeBytes[i]; lazySimpleDeserializeRead.set(bytes, 0, bytes.length); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable); + } } lazySimpleDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); } } @@ -186,7 +239,7 @@ public class TestLazySimpleFast extends TestCase { // Set the configuration parameters tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - + tbl.setProperty("columns", fieldNames); tbl.setProperty("columns.types", fieldTypes); @@ -210,44 +263,95 @@ public class TestLazySimpleFast extends TestCase { return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); } - public void testLazySimpleFast() throws Throwable { - try { + public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r) + throws Throwable { + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); - int num = 1000; - Random r = new Random(1234); - MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; - PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; - for (int i = 0; i < num; i++) { - int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - int field = 0; - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.randomFill(r, randField, field, extraTypeInfo); - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - rows[i] = t; - primitiveTypeInfosArray[i] = primitiveTypeInfos; + int rowCount = 1000; + Object[][] rows = source.randomRows(rowCount); + + if (doNonRandomFill) { + MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); + } + + StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); + + PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + int columnCount = primitiveTypeInfos.length; + + int writeColumnCount = columnCount; + StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; + boolean doWriteFewerColumns = r.nextBoolean(); + if (doWriteFewerColumns) { + writeColumnCount = 1 + r.nextInt(columnCount); + if (writeColumnCount == columnCount) { + doWriteFewerColumns = false; + } else { + writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); } + } - // To get the specific type information for CHAR and VARCHAR, seems like we need an - // inspector and SerDe per row... - StructObjectInspector[] rowOIs = new StructObjectInspector[num]; - LazySimpleSerDe[] serdes = new LazySimpleSerDe[num]; - LazySerDeParameters[] serdeParams = new LazySerDeParameters[num]; - for (int i = 0; i < num; i++) { - MyTestPrimitiveClass t = rows[i]; + String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); - StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes); + LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes); - String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); - String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + LazySimpleSerDe serde_fewer = null; + LazySerDeParameters serdeParams_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_fewer = getSerDe(fieldNames, fieldTypes); + serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes); + } + + byte separator = (byte) '\t'; + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); + + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); + + if (doWriteFewerColumns) { + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + } + } + + public void testLazySimpleFast() throws Throwable { + + try { + Random r = new Random(35790); - rowOIs[i] = rowOI; - serdes[i] = getSerDe(fieldNames, fieldTypes); - serdeParams[i] = getSerDeParams(fieldNames, fieldTypes); + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testLazySimpleFastCase(caseNum, (i % 2 == 0), r); + caseNum++; } - byte separator = (byte) '\t'; - testLazySimpleFast(rows, serdes, rowOIs, separator, serdeParams, primitiveTypeInfosArray); } catch (Throwable e) { e.printStackTrace(); throw e; http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java ---------------------------------------------------------------------- diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java index 4032743..d7c4999 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; +import java.io.EOFException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,9 +28,13 @@ import junit.framework.TestCase; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -43,24 +48,50 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; public class TestLazyBinaryFast extends TestCase { - private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, SerDe[] serdes, StructObjectInspector[] rowOIs, - PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + private void testLazyBinaryFast( + RandomRowObjectSource source, Object[][] rows, + SerDe serde, StructObjectInspector rowOI, + SerDe serde_fewer, StructObjectInspector writeRowOI, + PrimitiveTypeInfo[] primitiveTypeInfos, + boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { - LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(MyTestPrimitiveClass.primitiveCount); + int rowCount = rows.length; + int columnCount = primitiveTypeInfos.length; + + boolean[] columnsToInclude = null; + if (useIncludeColumns) { + columnsToInclude = new boolean[columnCount]; + for (int i = 0; i < columnCount; i++) { + columnsToInclude[i] = r.nextBoolean(); + } + } + + int writeColumnCount = columnCount; + PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos; + if (doWriteFewerColumns) { + writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); + writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount); + } + + LazyBinarySerializeWrite lazyBinarySerializeWrite = + new LazyBinarySerializeWrite(writeColumnCount); // Try to serialize - BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; Output output = new Output(); lazyBinarySerializeWrite.set(output); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfosArray[i][index], object); + for (int index = 0; index < writeColumnCount; index++) { + + Writable writable = (Writable) row[index]; + + VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable); } BytesWritable bytesWritable = new BytesWritable(); @@ -69,44 +100,63 @@ public class TestLazyBinaryFast extends TestCase { } // Try to deserialize - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazyBinaryDeserializeRead lazyBinaryDeserializeRead = - new LazyBinaryDeserializeRead(primitiveTypeInfos); + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + + // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last + // column. + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(writePrimitiveTypeInfos); + + if (useIncludeColumns) { + lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude); + } BytesWritable bytesWritable = serializeWriteBytes[i]; lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); - VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable); + } } lazyBinaryDeserializeRead.extraFieldsCheck(); - TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + } else { + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + } TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; - LazyBinaryStruct lazyBinaryStruct = (LazyBinaryStruct) serdes[i].deserialize(bytesWritable); + LazyBinaryStruct lazyBinaryStruct; + if (doWriteFewerColumns) { + lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable); + } else { + lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable); + } - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] row = rows[i]; - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + for (int index = 0; index < writeColumnCount; index++) { PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; - Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + Writable writable = (Writable) row[index]; Object object = lazyBinaryStruct.getField(index); - if (expected == null || object == null) { - if (expected != null || object != null) { + if (writable == null || object == null) { + if (writable != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { - if (!object.equals(expected)) { + if (!object.equals(writable)) { fail("SerDe deserialized value does not match"); } } @@ -114,88 +164,167 @@ public class TestLazyBinaryFast extends TestCase { } // One Writable per row. - BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - + BytesWritable serdeBytes[] = new BytesWritable[rowCount]; + // Serialize using the SerDe, then below deserialize using DeserializeRead. - Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] serdeRow = new Object[writeColumnCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; // LazyBinary seems to work better with an row object array instead of a Java object... - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); - row[index] = object; + for (int index = 0; index < writeColumnCount; index++) { + serdeRow[index] = row[index]; } - BytesWritable serialized = (BytesWritable) serdes[i].serialize(row, rowOIs[i]); - BytesWritable bytesWritable = new BytesWritable(); - bytesWritable.set(serialized); - byte[] bytes1 = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + BytesWritable serialized; + if (doWriteFewerColumns) { + serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI); + } else { + serialized = (BytesWritable) serde.serialize(serdeRow, rowOI); + } + + BytesWritable bytesWritable = + new BytesWritable( + Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength())); + byte[] bytes1 = bytesWritable.getBytes(); - byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i]; + byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength()); + if (bytes1.length != bytes2.length) { + fail("SerializeWrite length " + bytes2.length + " and " + + "SerDe serialization length " + bytes1.length + + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")"); + } if (!Arrays.equals(bytes1, bytes2)) { - fail("SerializeWrite and SerDe serialization does not match"); + fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")"); } serdeBytes[i] = bytesWritable; } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + + // When doWriteFewerColumns, try to read more fields than exist in buffer. + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos); + if (useIncludeColumns) { + lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude); + } + BytesWritable bytesWritable = serdeBytes[i]; lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable); + } } lazyBinaryDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + // The nullByte may cause this to not be true... + // TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); } } - public void testLazyBinaryFast() throws Throwable { - try { + public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable { + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + int rowCount = 1000; + Object[][] rows = source.randomRows(rowCount); + + if (doNonRandomFill) { + MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); + } + + StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); + + PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + int columnCount = primitiveTypeInfos.length; - int num = 1000; - Random r = new Random(1234); - MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; - PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; - for (int i = 0; i < num; i++) { - int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - int field = 0; - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.randomFill(r, randField, field, extraTypeInfo); - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - rows[i] = t; - primitiveTypeInfosArray[i] = primitiveTypeInfos; + int writeColumnCount = columnCount; + StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; + boolean doWriteFewerColumns = r.nextBoolean(); + if (doWriteFewerColumns) { + writeColumnCount = 1 + r.nextInt(columnCount); + if (writeColumnCount == columnCount) { + doWriteFewerColumns = false; + } else { + writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); } + } + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); + + SerDe serde = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes); + + SerDe serde_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_fewer = TestLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);; + } - // To get the specific type information for CHAR and VARCHAR, seems like we need an - // inspector and SerDe per row... - StructObjectInspector[] rowOIs = new StructObjectInspector[num]; - SerDe[] serdes = new SerDe[num]; - for (int i = 0; i < num; i++) { - MyTestPrimitiveClass t = rows[i]; + testLazyBinaryFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); - StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + testLazyBinaryFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); - String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); - String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + /* + * Can the LazyBinary format really tolerate writing fewer columns? + */ + // if (doWriteFewerColumns) { + // testLazyBinaryFast( + // source, rows, + // serde, rowStructObjectInspector, + // serde_fewer, writeRowStructObjectInspector, + // primitiveTypeInfos, + // /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + // testLazyBinaryFast( + // source, rows, + // serde, rowStructObjectInspector, + // serde_fewer, writeRowStructObjectInspector, + // primitiveTypeInfos, + // /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + // } + } + + public void testLazyBinaryFast() throws Throwable { + + try { + Random r = new Random(35790); - rowOIs[i] = rowOI; - serdes[i] = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes); + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testLazyBinaryFastCase(caseNum, (i % 2 == 0), r); + caseNum++; } - testLazyBinaryFast(rows, serdes, rowOIs, primitiveTypeInfosArray); } catch (Throwable e) { e.printStackTrace(); throw e; http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java index 3fb0cfd..53a7823 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.common.type; +import java.sql.Date; import java.sql.Timestamp; import java.text.DateFormat; import java.text.ParseException; @@ -26,6 +27,100 @@ import java.util.concurrent.TimeUnit; public class RandomTypeUtil { + public static String getRandString(Random r) { + return getRandString(r, null, r.nextInt(10)); + } + + public static String getRandString(Random r, String characters, int length) { + if (characters == null) { + characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + if (characters == null) { + sb.append((char) (r.nextInt(128))); + } else { + sb.append(characters.charAt(r.nextInt(characters.length()))); + } + } + return sb.toString(); + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static class HiveDecimalAndPrecisionScale { + public HiveDecimal hiveDecimal; + public int precision; + public int scale; + + HiveDecimalAndPrecisionScale(HiveDecimal hiveDecimal, int precision, int scale) { + this.hiveDecimal = hiveDecimal; + this.precision = precision; + this.scale = scale; + } + } + + public static HiveDecimalAndPrecisionScale getRandHiveDecimal(Random r) { + int precision; + int scale; + while (true) { + StringBuilder sb = new StringBuilder(); + precision = 1 + r.nextInt(18); + scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + precision = bd.precision(); + scale = bd.scale(); + if (scale > precision) { + // Sometimes weird decimals are produced? + continue; + } + + // For now, punt. + precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION; + scale = HiveDecimal.SYSTEM_DEFAULT_SCALE; + return new HiveDecimalAndPrecisionScale(bd, precision, scale); + } + } + + public static Date getRandDate(Random r) { + String dateStr = String.format("%d-%02d-%02d", + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28))); // day + Date dateVal = Date.valueOf(dateStr); + return dateVal; + } + + /** + * TIMESTAMP. + */ + public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); public static final long NANOSECONDS_PER_MILLISSECOND = TimeUnit.MILLISECONDS.toNanos(1);
