Repository: hive Updated Branches: refs/heads/HIVE-20445 [created] a4ef8df46
Working Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4ef8df4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4ef8df4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4ef8df4 Branch: refs/heads/HIVE-20445 Commit: a4ef8df46a5b881dde9895aab7c60ff24185b10a Parents: 611770d Author: Teddy Choi <[email protected]> Authored: Fri Aug 24 13:21:35 2018 +0900 Committer: Teddy Choi <[email protected]> Committed: Fri Aug 24 13:21:35 2018 +0900 ---------------------------------------------------------------------- .../ql/io/arrow/ArrowColumnarBatchSerDe.java | 17 +- .../hadoop/hive/ql/io/arrow/Deserializer.java | 22 +- .../hadoop/hive/ql/io/arrow/Serializer.java | 33 +-- .../ql/exec/vector/VectorRandomRowSource.java | 55 ++++- .../io/arrow/TestArrowColumnarBatchSerDe.java | 213 +++++++------------ 5 files changed, 134 insertions(+), 206 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java index ed82d2d..0dd959e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/ArrowColumnarBatchSerDe.java @@ -233,17 +233,16 @@ public class ArrowColumnarBatchSerDe extends AbstractSerDe { } static ListColumnVector toStructListVector(MapColumnVector mapVector) { - final StructColumnVector structVector; - final ListColumnVector structListVector; - structVector = new StructColumnVector(); - structVector.fields = new ColumnVector[] {mapVector.keys, mapVector.values}; - structListVector = new ListColumnVector(); - structListVector.child = structVector; - structListVector.childCount = mapVector.childCount; + final StructColumnVector structVector = + new StructColumnVector(mapVector.childCount, mapVector.keys, mapVector.values); + final ListColumnVector structListVector = + new ListColumnVector(mapVector.isNull.length, structVector); structListVector.isRepeating = mapVector.isRepeating; structListVector.noNulls = mapVector.noNulls; - System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, mapVector.childCount); - System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, mapVector.childCount); + + System.arraycopy(mapVector.offsets, 0, structListVector.offsets, 0, mapVector.offsets.length); + System.arraycopy(mapVector.lengths, 0, structListVector.lengths, 0, mapVector.lengths.length); + System.arraycopy(mapVector.isNull, 0, structListVector.isNull, 0, mapVector.isNull.length); return structListVector; } http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java index edc4b39..51de786 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Deserializer.java @@ -136,15 +136,13 @@ class Deserializer { case LIST: readList(arrowVector, (ListColumnVector) hiveVector, (ListTypeInfo) typeInfo); break; - case MAP: - readMap(arrowVector, (MapColumnVector) hiveVector, (MapTypeInfo) typeInfo); - break; case STRUCT: readStruct(arrowVector, (StructColumnVector) hiveVector, (StructTypeInfo) typeInfo); break; case UNION: readUnion(arrowVector, (UnionColumnVector) hiveVector, (UnionTypeInfo) typeInfo); break; + case MAP: default: throw new IllegalArgumentException(); } @@ -407,24 +405,6 @@ class Deserializer { } } - private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo) { - final int size = arrowVector.getValueCount(); - final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo); - final ListColumnVector mapStructListVector = toStructListVector(hiveVector); - final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child; - - read(arrowVector, mapStructListVector, mapStructListTypeInfo); - - hiveVector.isRepeating = mapStructListVector.isRepeating; - hiveVector.childCount = mapStructListVector.childCount; - hiveVector.noNulls = mapStructListVector.noNulls; - hiveVector.keys = mapStructVector.fields[0]; - hiveVector.values = mapStructVector.fields[1]; - System.arraycopy(mapStructListVector.offsets, 0, hiveVector.offsets, 0, size); - System.arraycopy(mapStructListVector.lengths, 0, hiveVector.lengths, 0, size); - System.arraycopy(mapStructListVector.isNull, 0, hiveVector.isNull, 0, size); - } - private void readStruct(FieldVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java index 08e0fb2..0add293 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java @@ -181,6 +181,8 @@ public class Serializer { public ArrowWrapperWritable serializeBatch(VectorizedRowBatch vectorizedRowBatch, boolean isNative) { rootVector.setValueCount(0); + final int size = isNative ? vectorizedRowBatch.size : batchSize; + for (int fieldIndex = 0; fieldIndex < vectorizedRowBatch.projectionSize; fieldIndex++) { final int projectedColumn = vectorizedRowBatch.projectedColumns[fieldIndex]; final ColumnVector hiveVector = vectorizedRowBatch.cols[projectedColumn]; @@ -195,20 +197,18 @@ public class Serializer { } final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); if(fieldExists) { - arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize); + arrowVector.setValueCount(size); } else { - arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize); + arrowVector.setInitialCapacity(size); arrowVector.allocateNew(); } - write(arrowVector, hiveVector, fieldTypeInfo, isNative ? vectorizedRowBatch.size : batchSize, vectorizedRowBatch, isNative); + write(arrowVector, hiveVector, fieldTypeInfo, size, vectorizedRowBatch, isNative); } if(!isNative) { //Only mutate batches that are constructed by this serde vectorizedRowBatch.reset(); - rootVector.setValueCount(batchSize); - } else { - rootVector.setValueCount(vectorizedRowBatch.size); } + rootVector.setValueCount(size); batchSize = 0; VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); @@ -266,7 +266,6 @@ public class Serializer { case STRUCT: return ArrowType.Struct.INSTANCE; case MAP: - return ArrowType.List.INSTANCE; case UNION: default: throw new IllegalArgumentException(); @@ -288,31 +287,11 @@ public class Serializer { case UNION: writeUnion(arrowVector, hiveVector, typeInfo, size, vectorizedRowBatch, isNative); break; - case MAP: - writeMap((ListVector) arrowVector, (MapColumnVector) hiveVector, (MapTypeInfo) typeInfo, size, vectorizedRowBatch, isNative); - break; default: throw new IllegalArgumentException(); } } - private static void writeMap(ListVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo, - int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { - final ListTypeInfo structListTypeInfo = toStructListTypeInfo(typeInfo); - final ListColumnVector structListVector = toStructListVector(hiveVector); - - write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative); - - final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); - for (int rowIndex = 0; rowIndex < size; rowIndex++) { - if (hiveVector.isNull[rowIndex]) { - BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); - } else { - BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); - } - } - } - private static void writeUnion(FieldVector arrowVector, ColumnVector hiveVector, TypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index af73ee6..5b3ca43 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -40,7 +40,6 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.RandomTypeUtil; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -86,7 +85,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hive.common.util.DateUtils; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; @@ -286,7 +284,7 @@ public class VectorRandomRowSource { } public enum SupportedTypes { - ALL, PRIMITIVES, ALL_EXCEPT_MAP + ALL, PRIMITIVES, ALL_EXCEPT_MAP, ALL_EXCEPT_MAP_UNION } public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth) { @@ -363,6 +361,17 @@ public class VectorRandomRowSource { "map" }; + private static String[] possibleHiveComplexTypeNamesWithoutMap = { + "array", + "struct", + "uniontype" + }; + + private static String[] possibleHiveComplexTypeNamesWithoutMapUnion = { + "array", + "struct" + }; + public static String getRandomTypeName(Random random, SupportedTypes supportedTypes, Set<String> allowedTypeNameSet) { @@ -376,7 +385,12 @@ public class VectorRandomRowSource { typeName = possibleHivePrimitiveTypeNames[random.nextInt(possibleHivePrimitiveTypeNames.length)]; break; case ALL_EXCEPT_MAP: - typeName = possibleHiveComplexTypeNames[random.nextInt(possibleHiveComplexTypeNames.length - 1)]; + typeName = possibleHiveComplexTypeNamesWithoutMap[random.nextInt( + possibleHiveComplexTypeNamesWithoutMap.length)]; + break; + case ALL_EXCEPT_MAP_UNION: + typeName = possibleHiveComplexTypeNamesWithoutMapUnion[random.nextInt( + possibleHiveComplexTypeNamesWithoutMapUnion.length)]; break; case ALL: typeName = possibleHiveComplexTypeNames[random.nextInt(possibleHiveComplexTypeNames.length)]; @@ -583,10 +597,16 @@ public class VectorRandomRowSource { if (allTypes) { switch (supportedTypes) { case ALL: - columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length; + columnCount = possibleHivePrimitiveTypeNames.length + + possibleHiveComplexTypeNames.length; break; case ALL_EXCEPT_MAP: - columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1; + columnCount = possibleHivePrimitiveTypeNames.length + + possibleHiveComplexTypeNamesWithoutMap.length; + break; + case ALL_EXCEPT_MAP_UNION: + columnCount = possibleHivePrimitiveTypeNames.length + + possibleHiveComplexTypeNamesWithoutMapUnion.length; break; case PRIMITIVES: columnCount = possibleHivePrimitiveTypeNames.length; @@ -627,7 +647,10 @@ public class VectorRandomRowSource { maxTypeNum = possibleHivePrimitiveTypeNames.length; break; case ALL_EXCEPT_MAP: - maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1; + maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNamesWithoutMap.length - 1; + break; + case ALL_EXCEPT_MAP_UNION: + maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNamesWithoutMapUnion.length; break; case ALL: maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length; @@ -656,9 +679,23 @@ public class VectorRandomRowSource { if (typeNum < possibleHivePrimitiveTypeNames.length) { typeName = possibleHivePrimitiveTypeNames[typeNum]; } else { - typeName = possibleHiveComplexTypeNames[typeNum - possibleHivePrimitiveTypeNames.length]; + switch (supportedTypes) { + case ALL: + typeName = possibleHiveComplexTypeNames[typeNum - + possibleHivePrimitiveTypeNames.length]; + break; + case ALL_EXCEPT_MAP: + typeName = possibleHiveComplexTypeNamesWithoutMap[typeNum - + possibleHivePrimitiveTypeNames.length]; + break; + case ALL_EXCEPT_MAP_UNION: + typeName = possibleHiveComplexTypeNamesWithoutMapUnion[typeNum - + possibleHivePrimitiveTypeNames.length]; + break; + default: + throw new IllegalArgumentException(); + } } - } String decoratedTypeName = http://git-wip-us.apache.org/repos/asf/hive/blob/a4ef8df4/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java index c9a5812..057f2ad 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/arrow/TestArrowColumnarBatchSerDe.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; @@ -58,6 +59,9 @@ import org.apache.hadoop.io.Text; import org.junit.Before; import org.junit.Test; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -204,11 +208,30 @@ public class TestArrowColumnarBatchSerDe { serializeAndDeserialize(serDe, rows, rowOI); } + private StructObjectInspector initSerDe(AbstractSerDe serDe, TypeInfo[] typeInfos) + throws SerDeException { + List<String> fieldNameList = new ArrayList<>(); + List<String> fieldTypeList = new ArrayList<>(); + + for (int i = 0; i < typeInfos.length; i++) { + fieldNameList.add("col" + i); + fieldTypeList.add(typeInfos[i].getTypeName()); + } + + Properties schemaProperties = new Properties(); + schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(fieldNameList)); + schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, + Joiner.on(',').join(fieldTypeList)); + SerDeUtils.initializeSerDe(serDe, conf, schemaProperties, null); + return (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + TypeInfoFactory.getStructTypeInfo(fieldNameList, Arrays.asList(typeInfos))); + } + private StructObjectInspector initSerDe(AbstractSerDe serDe, String[][] schema) throws SerDeException { - List<String> fieldNameList = newArrayList(); - List<String> fieldTypeList = newArrayList(); - List<TypeInfo> typeInfoList = newArrayList(); + List<String> fieldNameList = new ArrayList<>(); + List<String> fieldTypeList = new ArrayList<>(); + List<TypeInfo> typeInfoList = new ArrayList<>(); for (String[] nameAndType : schema) { String name = nameAndType[0]; @@ -218,12 +241,10 @@ public class TestArrowColumnarBatchSerDe { typeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(type)); } - String fieldNames = Joiner.on(',').join(fieldNameList); - String fieldTypes = Joiner.on(',').join(fieldTypeList); - Properties schemaProperties = new Properties(); - schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); - schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); + schemaProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(fieldNameList)); + schemaProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, + Joiner.on(',').join(fieldTypeList)); SerDeUtils.initializeSerDe(serDe, conf, schemaProperties, null); return (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( TypeInfoFactory.getStructTypeInfo(fieldNameList, typeInfoList)); @@ -251,6 +272,9 @@ public class TestArrowColumnarBatchSerDe { for (int fieldIndex = 0; fieldIndex < fields.size(); fieldIndex++) { final StructField field = fields.get(fieldIndex); final ObjectInspector fieldObjInspector = field.getFieldObjectInspector(); + if (row[fieldIndex] == null && deserializedRow[fieldIndex] == null) { + continue; + } switch (fieldObjInspector.getCategory()) { case PRIMITIVE: final PrimitiveObjectInspector primitiveObjInspector = @@ -262,13 +286,38 @@ public class TestArrowColumnarBatchSerDe { assertEquals(Objects.toString(row[fieldIndex]), Objects.toString(deserializedRow[fieldIndex])); break; + case TIMESTAMP: { + TimestampWritableV2 source = (TimestampWritableV2) row[fieldIndex]; + TimestampWritableV2 deserialized = + (TimestampWritableV2) deserializedRow[fieldIndex]; + long sourceMilli = source.getTimestamp().toEpochMilli(); + long deserializedMilli = deserialized.getTimestamp().toEpochMilli(); + assertEquals(sourceMilli, deserializedMilli); + break; + } + case INTERVAL_DAY_TIME: { + HiveIntervalDayTimeWritable source = + (HiveIntervalDayTimeWritable) row[fieldIndex]; + HiveIntervalDayTimeWritable deserialized = + (HiveIntervalDayTimeWritable) deserializedRow[fieldIndex]; + assertEquals(source.getHiveIntervalDayTime().getTotalSeconds(), + deserialized.getHiveIntervalDayTime().getTotalSeconds()); + assertEquals(source.getHiveIntervalDayTime().getNanos() / 1_000_000, + deserialized.getHiveIntervalDayTime().getNanos() / 1_000_000); + break; + } default: assertEquals(row[fieldIndex], deserializedRow[fieldIndex]); break; } break; case STRUCT: - final Object[] rowStruct = (Object[]) row[fieldIndex]; + final Object[] rowStruct; + if (row[fieldIndex] instanceof ArrayList) { + rowStruct = ((ArrayList) row[fieldIndex]).toArray(); + } else { + rowStruct = (Object[]) row[fieldIndex]; + } final List deserializedRowStruct = (List) deserializedRow[fieldIndex]; if (rowStruct == null) { assertNull(deserializedRowStruct); @@ -280,26 +329,26 @@ public class TestArrowColumnarBatchSerDe { case UNION: assertEquals(row[fieldIndex], deserializedRow[fieldIndex]); break; - case MAP: - final Map rowMap = (Map) row[fieldIndex]; - final Map deserializedRowMap = (Map) deserializedRow[fieldIndex]; - if (rowMap == null) { - assertNull(deserializedRowMap); - } else { - final Set rowMapKeySet = rowMap.keySet(); - final Set deserializedRowMapKeySet = deserializedRowMap.keySet(); - assertEquals(rowMapKeySet, deserializedRowMapKeySet); - for (Object key : rowMapKeySet) { - assertEquals(rowMap.get(key), deserializedRowMap.get(key)); - } - } - break; } } } } @Test + public void testRandom() throws SerDeException { + Random random = new Random(3); + for (int i = 0; i < 1; i++) { + VectorRandomRowSource source = new VectorRandomRowSource(); + source.init(random, VectorRandomRowSource.SupportedTypes.ALL_EXCEPT_MAP_UNION, 0); + Object[][] rows = source.randomRows(100); + + ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe(); + StructObjectInspector structObjectInspector = initSerDe(serDe, source.typeInfos()); + serializeAndDeserialize(serDe, rows, structObjectInspector); + } + } + + @Test public void testComprehensive() throws SerDeException { String[][] schema = { {"datatypes.c1", "int"}, @@ -307,17 +356,13 @@ public class TestArrowColumnarBatchSerDe { {"datatypes.c3", "double"}, {"datatypes.c4", "string"}, {"datatypes.c5", "array<int>"}, - {"datatypes.c6", "map<int,string>"}, - {"datatypes.c7", "map<string,string>"}, {"datatypes.c8", "struct<r:string,s:int,t:double>"}, {"datatypes.c9", "tinyint"}, {"datatypes.c10", "smallint"}, {"datatypes.c11", "float"}, {"datatypes.c12", "bigint"}, {"datatypes.c13", "array<array<string>>"}, - {"datatypes.c14", "map<int,map<int,int>>"}, {"datatypes.c15", "struct<r:int,s:struct<a:int,b:string>>"}, - {"datatypes.c16", "array<struct<m:map<string,string>,n:int>>"}, {"datatypes.c17", "timestamp"}, {"datatypes.c18", "decimal(16,7)"}, {"datatypes.c19", "binary"}, @@ -334,12 +379,6 @@ public class TestArrowColumnarBatchSerDe { doubleW(0), // c3:double text("Hello"), // c4:string newArrayList(intW(0), intW(1), intW(2)), // c5:array<int> - Maps.toMap( - newArrayList(intW(0), intW(1), intW(2)), - input -> text("Number " + input)), // c6:map<int,string> - Maps.toMap( - newArrayList(text("apple"), text("banana"), text("carrot")), - input -> text(input.toString().toUpperCase())), // c7:map<string,string> new Object[] {text("0"), intW(1), doubleW(2)}, // c8:struct<r:string,s:int,t:double> byteW(0), // c9:tinyint shortW(0), // c10:smallint @@ -348,22 +387,11 @@ public class TestArrowColumnarBatchSerDe { newArrayList( newArrayList(text("a"), text("b"), text("c")), newArrayList(text("A"), text("B"), text("C"))), // c13:array<array<string>> - Maps.toMap( - newArrayList(intW(0), intW(1), intW(2)), - x -> Maps.toMap( - newArrayList(x, intW(x.get() * 2)), - y -> y)), // c14:map<int,map<int,int>> new Object[] { intW(0), newArrayList( intW(1), text("Hello"))}, // c15:struct<r:int,s:struct<a:int,b:string>> - Collections.singletonList( - newArrayList( - Maps.toMap( - newArrayList(text("hello")), - input -> text(input.toString().toUpperCase())), - intW(0))), // c16:array<struct<m:map<string,string>,n:int>> new TimestampWritableV2(TIMESTAMP), // c17:timestamp decimalW(HiveDecimal.create(0, 0)), // c18:decimal(16,7) new BytesWritable("Hello".getBytes()), // c19:binary @@ -373,8 +401,7 @@ public class TestArrowColumnarBatchSerDe { new BytesWritable("world!".getBytes()), // c23:binary }, { null, null, null, null, null, null, null, null, null, null, // c1-c10 - null, null, null, null, null, null, null, null, null, null, // c11-c20 - null, null, null, // c21-c23 + null, null, null, null, null, null, null, null, null, // c21-c23 } }; @@ -690,98 +717,4 @@ public class TestArrowColumnarBatchSerDe { initAndSerializeAndDeserialize(schema, toStruct(BINARY_ROWS)); } - - private Object[][] toMap(Object[][] rows) { - Map[][] array = new Map[rows.length][]; - for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) { - Object[] row = rows[rowIndex]; - array[rowIndex] = new Map[row.length]; - for (int fieldIndex = 0; fieldIndex < row.length; fieldIndex++) { - Map map = Maps.newHashMap(); - map.put(new Text(String.valueOf(row[fieldIndex])), row[fieldIndex]); - array[rowIndex][fieldIndex] = map; - } - } - return array; - } - - @Test - public void testMapInteger() throws SerDeException { - String[][] schema = { - {"tinyint_map", "map<string,tinyint>"}, - {"smallint_map", "map<string,smallint>"}, - {"int_map", "map<string,int>"}, - {"bigint_map", "map<string,bigint>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(INTEGER_ROWS)); - } - - @Test - public void testMapFloat() throws SerDeException { - String[][] schema = { - {"float_map", "map<string,float>"}, - {"double_map", "map<string,double>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(FLOAT_ROWS)); - } - - @Test - public void testMapString() throws SerDeException { - String[][] schema = { - {"string_map", "map<string,string>"}, - {"char_map", "map<string,char(10)>"}, - {"varchar_map", "map<string,varchar(10)>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(STRING_ROWS)); - } - - @Test - public void testMapDTI() throws SerDeException { - String[][] schema = { - {"date_map", "map<string,date>"}, - {"timestamp_map", "map<string,timestamp>"}, - {"interval_year_month_map", "map<string,interval_year_month>"}, - {"interval_day_time_map", "map<string,interval_day_time>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(DTI_ROWS)); - } - - @Test - public void testMapBoolean() throws SerDeException { - String[][] schema = { - {"boolean_map", "map<string,boolean>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(BOOLEAN_ROWS)); - } - - @Test - public void testMapBinary() throws SerDeException { - String[][] schema = { - {"binary_map", "map<string,binary>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(BINARY_ROWS)); - } - - public void testMapDecimal() throws SerDeException { - String[][] schema = { - {"decimal_map", "map<string,decimal(38,10)>"}, - }; - - initAndSerializeAndDeserialize(schema, toMap(DECIMAL_ROWS)); - } - - public void testListDecimal() throws SerDeException { - String[][] schema = { - {"decimal_list", "array<decimal(38,10)>"}, - }; - - initAndSerializeAndDeserialize(schema, toList(DECIMAL_ROWS)); - } - }
