codope commented on a change in pull request #5004:
URL: https://github.com/apache/hudi/pull/5004#discussion_r832421782



##########
File path: 
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
##########
@@ -160,6 +187,89 @@ public void testWriteReadHFile(boolean populateMetaFields, 
boolean testAvroWithM
     }
   }
 
+  @Override
+  @Test
+  public void testWriteReadWithEvolvedSchema() throws Exception {
+    // Disable the test with evolved schema for HFile since it's not supported
+  }
+
+  @Test
+  public void testReadHFileFormatRecords() throws Exception {
+    writeFileWithSimpleSchema();
+    FileSystem fs = FSUtils.getFs(getFilePath().toString(), new 
Configuration());
+    byte[] content = FileIOUtils.readAsByteArray(
+        fs.open(getFilePath()), (int) 
fs.getFileStatus(getFilePath()).getLen());
+    // Reading byte array in HFile format, without actual file path
+    HoodieHFileReader<GenericRecord> hfileReader =
+        new HoodieHFileReader<>(fs, new Path(DUMMY_BASE_PATH), content);
+    Schema avroSchema = 
getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    assertEquals(NUM_RECORDS, hfileReader.getTotalRecords());
+    verifySimpleRecords(hfileReader.getRecordIterator(avroSchema));
+  }
+
+  @Test
+  public void testReaderGetRecordIterator() throws Exception {
+    writeFileWithSimpleSchema();
+    HoodieHFileReader<GenericRecord> hfileReader =
+        (HoodieHFileReader<GenericRecord>) createReader(new Configuration());
+    List<String> keys =
+        IntStream.concat(IntStream.range(40, NUM_RECORDS * 2), 
IntStream.range(10, 20))
+            .mapToObj(i -> "key" + String.format("%02d", 
i)).collect(Collectors.toList());
+    Schema avroSchema = 
getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc");
+    Iterator<GenericRecord> iterator = hfileReader.getRecordIterator(keys, 
avroSchema);
+
+    List<Integer> expectedIds =
+        IntStream.concat(IntStream.range(40, NUM_RECORDS), IntStream.range(10, 
20))
+            .boxed().collect(Collectors.toList());
+    int index = 0;
+    while (iterator.hasNext()) {
+      GenericRecord record = iterator.next();
+      String key = "key" + String.format("%02d", expectedIds.get(index));
+      assertEquals(key, record.get("_row_key").toString());
+      assertEquals(Integer.toString(expectedIds.get(index)), 
record.get("time").toString());
+      assertEquals(expectedIds.get(index), record.get("number"));
+      index++;
+    }
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {
+      "/hudi_0_9_hbase_1_2_3", "/hudi_0_10_hbase_1_2_3", 
"/hudi_0_11_hbase_2_4_9"})
+  public void testHoodieHFileCompatibility(String hfilePrefix) throws 
IOException {

Review comment:
       I like the way it's being tested but we will need to update fixtures as 
and when there're changes to the sources from which they were generated. They 
wouldn't be too frequent but just something to keep in mind.

##########
File path: 
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
##########
@@ -160,6 +187,89 @@ public void testWriteReadHFile(boolean populateMetaFields, 
boolean testAvroWithM
     }
   }
 
+  @Override
+  @Test
+  public void testWriteReadWithEvolvedSchema() throws Exception {
+    // Disable the test with evolved schema for HFile since it's not supported

Review comment:
       Let's remove it if it's not needed or track it in a JIRA if you plan to 
add support later.

##########
File path: 
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
##########
@@ -73,189 +60,41 @@ private HoodieOrcWriter createOrcWriter(Schema avroSchema) 
throws Exception {
     HoodieOrcConfig config = new HoodieOrcConfig(conf, CompressionKind.ZLIB, 
orcStripSize, orcBlockSize, maxFileSize, filter);
     TaskContextSupplier mockTaskContextSupplier = 
Mockito.mock(TaskContextSupplier.class);
     String instantTime = "000";
-    return new HoodieOrcWriter(instantTime, filePath, config, avroSchema, 
mockTaskContextSupplier);
+    return new HoodieOrcWriter<>(instantTime, getFilePath(), config, 
avroSchema, mockTaskContextSupplier);
   }
 
-  @Test
-  public void testWriteReadMetadata() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
+  @Override
+  protected HoodieFileReader<GenericRecord> createReader(
+      Configuration conf) throws Exception {
+    return HoodieFileReaderFactory.getFileReader(conf, getFilePath());
+  }
 
-    Configuration conf = new Configuration();
-    Reader orcReader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf));
+  @Override
+  protected void verifyMetadata(Configuration conf) throws IOException {
+    Reader orcReader = OrcFile.createReader(getFilePath(), 
OrcFile.readerOptions(conf));
     assertEquals(4, orcReader.getMetadataKeys().size());
     
assertTrue(orcReader.getMetadataKeys().contains(HOODIE_MIN_RECORD_KEY_FOOTER));
     
assertTrue(orcReader.getMetadataKeys().contains(HOODIE_MAX_RECORD_KEY_FOOTER));
     
assertTrue(orcReader.getMetadataKeys().contains(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY));
     assertTrue(orcReader.getMetadataKeys().contains(AVRO_SCHEMA_METADATA_KEY));
     assertEquals(CompressionKind.ZLIB.name(), 
orcReader.getCompressionKind().toString());
-
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    BloomFilter filter = hoodieReader.readBloomFilter();
-    for (int i = 0; i < 3; i++) {
-      assertTrue(filter.mightContain("key" + i));
-    }
-    assertFalse(filter.mightContain("non-existent-key"));
-    assertEquals(3, hoodieReader.getTotalRecords());
-    String[] minMaxRecordKeys = hoodieReader.readMinMaxRecordKeys();
-    assertEquals(2, minMaxRecordKeys.length);
-    assertEquals("key0", minMaxRecordKeys[0]);
-    assertEquals("key2", minMaxRecordKeys[1]);
-  }
-
-  @Test
-  public void testWriteReadPrimitiveRecord() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    Reader orcReader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf));
-    assertEquals("struct<_row_key:string,time:string,number:int>", 
orcReader.getSchema().toString());
-    assertEquals(3, orcReader.getNumberOfRows());
-
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator();
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      index++;
-    }
+    assertEquals(NUM_RECORDS, orcReader.getNumberOfRows());
   }
 
-  @Test
-  public void testWriteReadComplexRecord() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchemaWithUDT.avsc");
-    Schema udtSchema = 
avroSchema.getField("driver").schema().getTypes().get(1);
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      GenericRecord innerRecord = new GenericData.Record(udtSchema);
-      innerRecord.put("driver_name", "driver" + i);
-      innerRecord.put("list", Collections.singletonList(i));
-      innerRecord.put("map", Collections.singletonMap("key" + i, "value" + i));
-      record.put("driver", innerRecord);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    Reader reader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf));
-    
assertEquals("struct<_row_key:string,time:string,number:int,driver:struct<driver_name:string,list:array<int>,map:map<string,string>>>",
-        reader.getSchema().toString());
-    assertEquals(3, reader.getNumberOfRows());
-
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator();
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      GenericRecord innerRecord = (GenericRecord) record.get("driver");
-      assertEquals("driver" + index, 
innerRecord.get("driver_name").toString());
-      assertEquals(1, ((List<?>)innerRecord.get("list")).size());
-      assertEquals(index, ((List<?>)innerRecord.get("list")).get(0));
-      assertEquals("value" + index, 
((Map<?,?>)innerRecord.get("map")).get("key" + index).toString());
-      index++;
+  @Override
+  protected void verifySchema(Configuration conf, String schemaPath) throws 
IOException {
+    Reader orcReader = OrcFile.createReader(getFilePath(), 
OrcFile.readerOptions(conf));
+    if ("/exampleSchema.avsc".equals(schemaPath)) {
+      assertEquals("struct<_row_key:string,time:string,number:int>",
+          orcReader.getSchema().toString());
+    } else if ("/exampleSchemaWithUDT.avsc".equals(schemaPath)) {
+      
assertEquals("struct<_row_key:string,time:string,number:int,driver:struct<driver_name:string,list:array<int>,map:map<string,string>>>",
+          orcReader.getSchema().toString());
     }
   }
 
-  @Test
-  public void testWriteReadWithEvolvedSchema() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Schema evolvedSchema = 
getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchema.avsc");
-    Iterator<GenericRecord> iter = 
hoodieReader.getRecordIterator(evolvedSchema);
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaChangeOrder.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaColumnRequire.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaColumnType.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(Integer.toString(index), record.get("number").toString());
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaDeleteColumn.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertNull(record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
+  @Override
+  public void testReaderFilterRowKeys() {
+    // TODO: fix filterRowKeys test for ORC due to a bug in ORC logic

Review comment:
       What's the bug here? Are we tracking it?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to