paul-rogers commented on a change in pull request #1870: DRILL-7359: Add support for DICT type in RowSet Framework URL: https://github.com/apache/drill/pull/1870#discussion_r347115215
########## File path: exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestRowSet.java ########## @@ -586,6 +595,490 @@ public void testRepeatedMapStructure() { RowSetUtilities.verify(expected, actual); } + @Test + public void testDictStructure() { + final String dictName = "d"; + + final TupleMetadata schema = new SchemaBuilder() + .add("id", MinorType.INT) + .addDict(dictName, MinorType.INT) + .value(MinorType.VARCHAR) // required int + .resumeSchema() + .buildSchema(); + final ExtendableRowSet rowSet = fixture.rowSet(schema); + final RowSetWriter writer = rowSet.writer(); + + // Dict + // Pick out components and lightly test. (Assumes structure + // tested earlier is still valid, so no need to exhaustively + // test again.) + + assertEquals(ObjectType.ARRAY, writer.column(dictName).type()); + assertTrue(writer.column(dictName).schema().isDict()); + + final ScalarWriter idWriter = writer.column(0).scalar(); + final DictWriter dictWriter = writer.column(1).dict(); + + assertEquals(ValueType.INTEGER, dictWriter.keyType()); + assertEquals(ObjectType.SCALAR, dictWriter.valueType()); + + final ScalarWriter keyWriter = dictWriter.keyWriter(); + final ScalarWriter valueWriter = dictWriter.valueWriter().scalar(); + + assertEquals(ValueType.INTEGER, keyWriter.valueType()); + assertEquals(ValueType.STRING, valueWriter.valueType()); + + // Write data + idWriter.setInt(1); + + keyWriter.setInt(11); + valueWriter.setString("a"); + dictWriter.save(); // Advance to next entry position + keyWriter.setInt(12); + valueWriter.setString("b"); + dictWriter.save(); + writer.save(); + + idWriter.setInt(2); + + keyWriter.setInt(21); + valueWriter.setString("c"); + dictWriter.save(); + writer.save(); + + idWriter.setInt(3); + + keyWriter.setInt(31); + valueWriter.setString("d"); + dictWriter.save(); + keyWriter.setInt(32); + valueWriter.setString("e"); + dictWriter.save(); + writer.save(); + + // Finish the row set and get a reader. + + final SingleRowSet actual = writer.done(); + final RowSetReader reader = actual.reader(); + + // Verify reader structure + + assertEquals(ObjectType.ARRAY, reader.column(dictName).type()); + + final DictReader dictReader = reader.dict(1); + assertEquals(ObjectType.ARRAY, dictReader.type()); + + assertEquals(ValueType.INTEGER, dictReader.keyColumnType()); + assertEquals(ObjectType.SCALAR, dictReader.valueColumnType()); + + // Row 1: get value reader with its position set to entry corresponding to a key + + assertTrue(reader.next()); + assertFalse(dictReader.isNull()); // dict itself is not null + + dictReader.getAsString(); + assertEquals("b", dictReader.getValueReader(12).scalar().getString()); + assertEquals("a", dictReader.getValueReader(11).scalar().getString()); + + // compare entire dict + Map<Object, Object> map = map(11, "a", 12, "b"); + assertEquals(map, dictReader.getObject()); + + // Row 2: get Object representation of value directly + + assertTrue(reader.next()); + assertEquals("c", dictReader.get(21)); + assertTrue(dictReader.getValueReader(22).isNull()); // the dict does not contain an entry with the key + + map = map(21, "c"); + assertEquals(map, dictReader.getObject()); + + // Row 3 + + assertTrue(reader.next()); + + assertEquals("d", dictReader.get(31)); + assertEquals("e", dictReader.get(32)); + + map = map(31, "d", 32, "e"); + assertEquals(map, dictReader.getObject()); + + assertFalse(reader.next()); + + // Verify that the dict accessor's value count was set. + + final DictVector dictVector = (DictVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(3, dictVector.getAccessor().getValueCount()); + + final SingleRowSet expected = fixture.rowSetBuilder(schema) + .addRow(1, objArray(11, "a", 12, "b")) + .addRow(2, objArray(21, "c")) + .addRow(3, objArray(31, "d", 32, "e")) + .build(); + RowSetUtilities.verify(expected, actual); + } + + /** + * Utility method to bootstrap a map object easily. + * + * @param entry key-value sequence + * @return map containing key-value pairs from passed sequence + */ + private Map<Object, Object> map(Object... entry) { + if (entry.length % 2 == 1) { + throw new IllegalArgumentException("Array length should be even."); + } + + Map<Object, Object> map = new HashMap<>(); + for (int i = 0; i < entry.length; i += 2) { + map.put(entry[i], entry[i + 1]); + } + return map; + } + + @Test + public void testDictStructureMapValue() { + final String dictName = "d"; + final int bScale = 1; + + final TupleMetadata schema = new SchemaBuilder() + .add("id", MinorType.INT) + .addDict(dictName, MinorType.INT) + .mapValue() + .add("a", MinorType.INT) + .add("b", MinorType.VARDECIMAL, 8, bScale) + .resumeDict() + .resumeSchema() + .buildSchema(); + final ExtendableRowSet rowSet = fixture.rowSet(schema); + final RowSetWriter writer = rowSet.writer(); + + // Dict with Map value + + assertEquals(ObjectType.ARRAY, writer.column(dictName).type()); + + final ScalarWriter idWriter = writer.scalar(0); + final DictWriter dictWriter = writer.column(1).dict(); + + assertEquals(ValueType.INTEGER, dictWriter.keyType()); + assertEquals(ObjectType.TUPLE, dictWriter.valueType()); + + final ScalarWriter keyWriter = dictWriter.keyWriter(); + final TupleWriter valueWriter = dictWriter.valueWriter().tuple(); + + assertEquals(ValueType.INTEGER, keyWriter.valueType()); + + ScalarWriter aWriter = valueWriter.scalar("a"); + ScalarWriter bWriter = valueWriter.scalar("b"); + assertEquals(ValueType.INTEGER, aWriter.valueType()); + assertEquals(ValueType.DECIMAL, bWriter.valueType()); + + // Write data + + idWriter.setInt(1); + + keyWriter.setInt(11); + aWriter.setInt(10); + bWriter.setDecimal(BigDecimal.valueOf(1)); + dictWriter.save(); // advance to next entry position + + keyWriter.setInt(12); + aWriter.setInt(11); + bWriter.setDecimal(BigDecimal.valueOf(2)); + dictWriter.save(); + + writer.save(); + + idWriter.setInt(2); + + keyWriter.setInt(21); + aWriter.setInt(20); + bWriter.setDecimal(BigDecimal.valueOf(3)); + dictWriter.save(); + + writer.save(); + + idWriter.setInt(3); + + keyWriter.setInt(31); + aWriter.setInt(30); + bWriter.setDecimal(BigDecimal.valueOf(4)); + dictWriter.save(); + + keyWriter.setInt(32); + aWriter.setInt(31); + bWriter.setDecimal(BigDecimal.valueOf(5)); + dictWriter.save(); + + keyWriter.setInt(33); + aWriter.setInt(32); + bWriter.setDecimal(BigDecimal.valueOf(6)); + dictWriter.save(); + + writer.save(); + + // Finish the row set and get a reader. + + final SingleRowSet actual = writer.done(); + final RowSetReader reader = actual.reader(); + + // Verify reader structure + + assertEquals(ObjectType.ARRAY, reader.column(dictName).type()); + + final DictReader dictReader = reader.dict(1); + assertEquals(ObjectType.ARRAY, dictReader.type()); + + assertEquals(ValueType.INTEGER, dictReader.keyColumnType()); + assertEquals(ObjectType.TUPLE, dictReader.valueColumnType()); + + // Row 1: get value reader with its position set to entry corresponding to a key + + assertTrue(reader.next()); + assertFalse(dictReader.isNull()); // dict itself is not null + + TupleReader valueReader = dictReader.getValueReader(12).tuple(); + assertEquals(11, valueReader.scalar("a").getInt()); + assertEquals(BigDecimal.valueOf(2.0), valueReader.scalar("b").getDecimal()); + + // MapReader#getObject() returns a List containing values for each column + // rather than mapping of column name to it's value, hence List is expected for Dict's value. + Map<Object, Object> map = map( + 11, Arrays.asList(10, BigDecimal.valueOf(1.0)), + 12, Arrays.asList(11, BigDecimal.valueOf(2.0)) + ); + assertEquals(map, dictReader.getObject()); + + // Row 2: get value by key directly + + assertTrue(reader.next()); + assertEquals(Arrays.asList(20, BigDecimal.valueOf(3.0)), dictReader.get(21)); + assertTrue(dictReader.getValueReader(22).isNull()); + + map = map(21, Arrays.asList(20, BigDecimal.valueOf(3.0))); + assertEquals(map, dictReader.getObject()); + + // Row 3 + + assertTrue(reader.next()); + + valueReader = dictReader.getValueReader(32).tuple(); + assertFalse(valueReader.isNull()); + assertEquals(31, valueReader.scalar("a").getInt()); + assertEquals(BigDecimal.valueOf(5.0), valueReader.scalar("b").getDecimal()); + + valueReader = dictReader.getValueReader(31).tuple(); + assertEquals(30, valueReader.scalar("a").getInt()); + assertEquals(BigDecimal.valueOf(4.0), valueReader.scalar("b").getDecimal()); + + valueReader = dictReader.getValueReader(404).tuple(); + assertTrue(valueReader.isNull()); + + map = map( + 31, Arrays.asList(30, BigDecimal.valueOf(4.0)), + 32, Arrays.asList(31, BigDecimal.valueOf(5.0)), + 33, Arrays.asList(32, BigDecimal.valueOf(6.0)) + ); + assertEquals(map, dictReader.getObject()); + + assertFalse(reader.next()); + + // Verify that the dict accessor's value count was set. + + final DictVector dictVector = (DictVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(3, dictVector.getAccessor().getValueCount()); + + final SingleRowSet expected = fixture.rowSetBuilder(schema) + .addRow(1, objArray( + 11, objArray(10, BigDecimal.valueOf(1.0)), + 12, objArray(11, BigDecimal.valueOf(2.0)) + )) + .addRow(2, objArray(21, objArray(20, BigDecimal.valueOf(3.0)))) + .addRow(3, objArray( + 31, objArray(30, BigDecimal.valueOf(4.0)), + 32, objArray(31, BigDecimal.valueOf(5.0)), + 33, objArray(32, BigDecimal.valueOf(6.0)) + )) + .build(); + RowSetUtilities.verify(expected, actual); + } + + @Test + public void testRepeatedDictStructure() { + final String dictName = "d"; + final TupleMetadata schema = new SchemaBuilder() + .add("id", MinorType.INT) + .addDictArray(dictName, MinorType.INT) + .value(MinorType.VARCHAR) + .resumeSchema() + .buildSchema(); + final ExtendableRowSet rowSet = fixture.rowSet(schema); + final RowSetWriter writer = rowSet.writer(); + + // Repeated dict + + assertEquals(ObjectType.ARRAY, writer.column(dictName).type()); + + final ScalarWriter idWriter = writer.scalar(0); + + final ArrayWriter dictArrayWriter = writer.column(1).array(); + assertEquals(ObjectType.ARRAY, dictArrayWriter.entryType()); + + DictWriter dictWriter = (DictWriter) dictArrayWriter.array(); + + assertEquals(ValueType.INTEGER, dictWriter.keyType()); + assertEquals(ObjectType.SCALAR, dictWriter.valueType()); + + final ScalarWriter keyWriter = dictWriter.keyWriter(); + final ScalarWriter valueWriter = dictWriter.valueWriter().scalar(); + assertEquals(ValueType.INTEGER, keyWriter.valueType()); + assertEquals(ValueType.STRING, valueWriter.valueType()); + + // Write data + + idWriter.setInt(1); + + keyWriter.setInt(1); + valueWriter.setString("a"); + dictWriter.save(); // advance to next entry position + keyWriter.setInt(2); + valueWriter.setString("b"); + dictWriter.save(); + dictArrayWriter.save(); // advance to next array position + + keyWriter.setInt(3); + valueWriter.setString("c"); + dictWriter.save(); + dictArrayWriter.save(); + + writer.save(); // advance to next row + + idWriter.setInt(2); + + keyWriter.setInt(11); + valueWriter.setString("d"); + dictWriter.save(); + keyWriter.setInt(12); + valueWriter.setString("e"); + dictWriter.save(); + dictArrayWriter.save(); + + writer.save(); + + idWriter.setInt(3); + + keyWriter.setInt(21); + valueWriter.setString("f"); + dictWriter.save(); + keyWriter.setInt(22); + valueWriter.setString("g"); + dictWriter.save(); + keyWriter.setInt(23); + valueWriter.setString("h"); + dictWriter.save(); + dictArrayWriter.save(); + + keyWriter.setInt(24); + valueWriter.setString("i"); + dictWriter.save(); + keyWriter.setInt(25); + valueWriter.setString("j"); + dictWriter.save(); + keyWriter.setInt(26); + valueWriter.setString("k"); + dictWriter.save(); + keyWriter.setInt(27); + valueWriter.setString("l"); + dictWriter.save(); + keyWriter.setInt(28); + valueWriter.setString("m"); + dictWriter.save(); + dictArrayWriter.save(); + + writer.save(); + + // Finish the row set and get a reader. + + final SingleRowSet actual = writer.done(); + final RowSetReader reader = actual.reader(); + + // Verify reader structure + + assertEquals(ObjectType.ARRAY, reader.column(dictName).type()); + + final ArrayReader dictArrayReader = reader.array(1); + assertEquals(ObjectType.ARRAY, dictArrayReader.entryType()); + + final DictReader dictReader = dictArrayReader.entry().dict(); + assertEquals(ValueType.INTEGER, dictReader.keyColumnType()); + assertEquals(ObjectType.SCALAR, dictReader.valueColumnType()); + + // Row 1 + + assertTrue(reader.next()); + assertFalse(dictArrayReader.isNull()); // array is not null + + assertTrue(dictArrayReader.next()); + assertFalse(dictArrayReader.isNull()); // first dict is not null + assertEquals("b", dictReader.getValueReader(2).getObject()); + assertEquals("a", dictReader.getValueReader(1).getObject()); + assertTrue(dictReader.getValueReader(404).isNull()); // no entry for given key + + assertTrue(dictArrayReader.next()); + assertEquals("c", dictReader.getValueReader(3).getObject()); + assertTrue(dictReader.getValueReader(1).isNull()); + + assertEquals(Arrays.asList(map(1, "a", 2, "b"), map(3, "c")), dictArrayReader.getObject()); + + // Row 2 + + assertTrue(reader.next()); + + assertTrue(dictArrayReader.next()); + assertEquals("d", dictReader.getValueReader(11).scalar().getString()); + assertTrue(dictReader.getValueReader(1).scalar().isNull()); + assertEquals("e", dictReader.getValueReader(12).scalar().getString()); + + // Row 3: use explicit positioning + + assertTrue(reader.next()); + dictArrayReader.setPosn(1); + assertEquals("i", dictReader.get(24)); + assertEquals("k", dictReader.get(26)); + assertEquals("m", dictReader.get(28)); + assertNull(dictReader.get(35)); + assertEquals("l", dictReader.get(27)); + + Map<Object, Object> element1 = map(24, "i", 25, "j", 26, "k", 27, "l", 28, "m"); + assertEquals(element1, dictReader.getObject()); + + dictArrayReader.setPosn(0); + assertEquals("h", dictReader.getValueReader(23).getObject()); + assertEquals("f", dictReader.getValueReader(21).getObject()); + assertNull(dictReader.getValueReader(24).getObject()); + + Map<Object, Object> element0 = map(21, "f", 22, "g", 23, "h"); + assertEquals(element0, dictReader.getObject()); + + assertEquals(Arrays.asList(element0, element1), dictArrayReader.getObject()); + + assertFalse(reader.next()); + + // Verify that the dict accessor's value count was set. + + final RepeatedDictVector vector = (RepeatedDictVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(3, vector.getAccessor().getValueCount()); + + final SingleRowSet expected = fixture.rowSetBuilder(schema) + .addRow(1, objArray(objArray(1, "a", 2, "b"), objArray(3, "c"))) + .addRow(2, objArray(singleObjArray(objArray(11, "d", 12, "e")))) + .addRow(3, objArray( + objArray(21, "f", 22, "g", 23, "h"), + objArray(24, "i", 25, "j", 26, "k", 27, "l", 28, "m"))) + .build(); + RowSetUtilities.verify(expected, actual); + } + Review comment: Thanks for the complete unit tests. Lots of work, but they pay off: they let us make changes without worrying about breaking anything. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services