marton-bod commented on a change in pull request #2126:
URL: https://github.com/apache/iceberg/pull/2126#discussion_r561108864
##########
File path:
mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -296,6 +300,45 @@ public void testInsert() throws IOException {
HiveIcebergTestUtils.validateData(table, new
ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), 0);
}
+ @Test
+ public void testInsertSupportedTypes() throws IOException {
+ Assume.assumeTrue("Tez write is not implemented yet",
executionEngine.equals("mr"));
+ for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
+ Type type = SUPPORTED_TYPES.get(i);
+ // TODO: remove this filter when issue #1881 is resolved
+ if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
+ continue;
+ }
+ // TODO: remove this filter when we figure out how we could test binary
types
+ if (type.equals(Types.BinaryType.get()) ||
type.equals(Types.FixedType.ofLength(11))) {
+ continue;
+ }
+ String tableName = type.typeId().toString().toLowerCase() + "_table_" +
i;
+ String columnName = type.typeId().toString().toLowerCase() + "_column";
+
+ Schema schema = new Schema(required(1, "id", Types.LongType.get()),
required(2, columnName, type));
+ List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);
+ List<Record> records = new ArrayList<>();
+ if (type == Types.TimestampType.withoutZone()) {
+ expected.forEach(r -> records.add(r.copy()));
+ records.forEach(r -> r.set(1, Timestamp.valueOf((LocalDateTime)
r.get(1))));
+ } else if (type == Types.TimestampType.withZone()) {
+ expected.forEach(r -> records.add(r.copy()));
+ records.forEach(r -> r.set(1, Timestamp.from(((OffsetDateTime)
r.get(1)).toInstant())));
+ } else {
+ records.addAll(expected);
+ }
+
+ Table table = testTables.createTable(shell, tableName, schema,
fileFormat, ImmutableList.of());
+ StringBuilder query = new StringBuilder("INSERT INTO
").append(tableName).append(" VALUES")
+ .append(String.join(",", records.stream().map(r ->
+ String.format(type == Types.BooleanType.get() ? "(%s,%s)" :
"(%s,'%s')", r.get(0),
+ r.get(1))).collect(Collectors.toList())));
Review comment:
I think you can use `Collectors.joining` here and then you wouldn't need
the extra `String.join` step
##########
File path:
mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -296,6 +300,45 @@ public void testInsert() throws IOException {
HiveIcebergTestUtils.validateData(table, new
ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), 0);
}
+ @Test
+ public void testInsertSupportedTypes() throws IOException {
+ Assume.assumeTrue("Tez write is not implemented yet",
executionEngine.equals("mr"));
+ for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
+ Type type = SUPPORTED_TYPES.get(i);
+ // TODO: remove this filter when issue #1881 is resolved
+ if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
+ continue;
+ }
+ // TODO: remove this filter when we figure out how we could test binary
types
+ if (type.equals(Types.BinaryType.get()) ||
type.equals(Types.FixedType.ofLength(11))) {
+ continue;
+ }
+ String tableName = type.typeId().toString().toLowerCase() + "_table_" +
i;
+ String columnName = type.typeId().toString().toLowerCase() + "_column";
+
+ Schema schema = new Schema(required(1, "id", Types.LongType.get()),
required(2, columnName, type));
+ List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);
+ List<Record> records = new ArrayList<>();
+ if (type == Types.TimestampType.withoutZone()) {
+ expected.forEach(r -> records.add(r.copy()));
+ records.forEach(r -> r.set(1, Timestamp.valueOf((LocalDateTime)
r.get(1))));
+ } else if (type == Types.TimestampType.withZone()) {
+ expected.forEach(r -> records.add(r.copy()));
+ records.forEach(r -> r.set(1, Timestamp.from(((OffsetDateTime)
r.get(1)).toInstant())));
+ } else {
+ records.addAll(expected);
+ }
+
+ Table table = testTables.createTable(shell, tableName, schema,
fileFormat, ImmutableList.of());
+ StringBuilder query = new StringBuilder("INSERT INTO
").append(tableName).append(" VALUES")
+ .append(String.join(",", records.stream().map(r ->
+ String.format(type == Types.BooleanType.get() ? "(%s,%s)" :
"(%s,'%s')", r.get(0),
Review comment:
Can you please add a small clarifying comment as to why we need the
special case for the boolean type?
##########
File path:
mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -296,6 +300,45 @@ public void testInsert() throws IOException {
HiveIcebergTestUtils.validateData(table, new
ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), 0);
}
+ @Test
+ public void testInsertSupportedTypes() throws IOException {
+ Assume.assumeTrue("Tez write is not implemented yet",
executionEngine.equals("mr"));
+ for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
+ Type type = SUPPORTED_TYPES.get(i);
+ // TODO: remove this filter when issue #1881 is resolved
+ if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
+ continue;
+ }
+ // TODO: remove this filter when we figure out how we could test binary
types
+ if (type.equals(Types.BinaryType.get()) ||
type.equals(Types.FixedType.ofLength(11))) {
+ continue;
+ }
+ String tableName = type.typeId().toString().toLowerCase() + "_table_" +
i;
+ String columnName = type.typeId().toString().toLowerCase() + "_column";
+
+ Schema schema = new Schema(required(1, "id", Types.LongType.get()),
required(2, columnName, type));
+ List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);
+ List<Record> records = new ArrayList<>();
Review comment:
very nit: you can set the initialCapacity to be `expected.size()`
##########
File path:
hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java
##########
@@ -69,4 +69,8 @@ public Object copyObject(Object o) {
}
}
+ @Override
+ public LocalDate convert(Object o) {
+ return o == null ? null : LocalDate.of(((Date) o).getYear(), ((Date)
o).getMonth(), ((Date) o).getDay());
Review comment:
nit: can we cast to Date only once and reuse that? might help slightly
with readability too
##########
File path:
mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -88,9 +91,10 @@
private static final List<Type> SUPPORTED_TYPES =
ImmutableList.of(Types.BooleanType.get(), Types.IntegerType.get(),
Types.LongType.get(),
- Types.FloatType.get(), Types.DoubleType.get(),
Types.DateType.get(), Types.TimestampType.withZone(),
+ Types.FloatType.get(), Types.DoubleType.get(),
Types.DateType.get(),
+ Types.TimestampType.withZone(),
Types.TimestampType.withoutZone(), Types.StringType.get(),
Types.BinaryType.get(),
- Types.DecimalType.of(3, 1), Types.UUIDType.get(),
Types.FixedType.ofLength(5),
+ Types.DecimalType.of(3, 1), Types.UUIDType.get(),
Types.FixedType.ofLength(11),
Review comment:
Can we write different decimal scales/precisions? If so, can we add a
second one here, that has a different scale to test?
##########
File path:
mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimeObjectInspector.java
##########
@@ -49,8 +50,11 @@ public Text getPrimitiveWritableObject(Object o) {
}
@Override
- public Object convert(Object o) {
- return o == null ? null : o.toString();
+ public LocalTime convert(Object o) {
+ if (o == null) {
+ return null;
+ }
+ return LocalTime.parse(o.toString());
Review comment:
Can we rely on the string representation of `o` to be parsed correctly
here?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]