yihua commented on a change in pull request #5004:
URL: https://github.com/apache/hudi/pull/5004#discussion_r832497509



##########
File path: 
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java
##########
@@ -73,189 +60,41 @@ private HoodieOrcWriter createOrcWriter(Schema avroSchema) 
throws Exception {
     HoodieOrcConfig config = new HoodieOrcConfig(conf, CompressionKind.ZLIB, 
orcStripSize, orcBlockSize, maxFileSize, filter);
     TaskContextSupplier mockTaskContextSupplier = 
Mockito.mock(TaskContextSupplier.class);
     String instantTime = "000";
-    return new HoodieOrcWriter(instantTime, filePath, config, avroSchema, 
mockTaskContextSupplier);
+    return new HoodieOrcWriter<>(instantTime, getFilePath(), config, 
avroSchema, mockTaskContextSupplier);
   }
 
-  @Test
-  public void testWriteReadMetadata() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
+  @Override
+  protected HoodieFileReader<GenericRecord> createReader(
+      Configuration conf) throws Exception {
+    return HoodieFileReaderFactory.getFileReader(conf, getFilePath());
+  }
 
-    Configuration conf = new Configuration();
-    Reader orcReader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf));
+  @Override
+  protected void verifyMetadata(Configuration conf) throws IOException {
+    Reader orcReader = OrcFile.createReader(getFilePath(), 
OrcFile.readerOptions(conf));
     assertEquals(4, orcReader.getMetadataKeys().size());
     
assertTrue(orcReader.getMetadataKeys().contains(HOODIE_MIN_RECORD_KEY_FOOTER));
     
assertTrue(orcReader.getMetadataKeys().contains(HOODIE_MAX_RECORD_KEY_FOOTER));
     
assertTrue(orcReader.getMetadataKeys().contains(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY));
     assertTrue(orcReader.getMetadataKeys().contains(AVRO_SCHEMA_METADATA_KEY));
     assertEquals(CompressionKind.ZLIB.name(), 
orcReader.getCompressionKind().toString());
-
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    BloomFilter filter = hoodieReader.readBloomFilter();
-    for (int i = 0; i < 3; i++) {
-      assertTrue(filter.mightContain("key" + i));
-    }
-    assertFalse(filter.mightContain("non-existent-key"));
-    assertEquals(3, hoodieReader.getTotalRecords());
-    String[] minMaxRecordKeys = hoodieReader.readMinMaxRecordKeys();
-    assertEquals(2, minMaxRecordKeys.length);
-    assertEquals("key0", minMaxRecordKeys[0]);
-    assertEquals("key2", minMaxRecordKeys[1]);
-  }
-
-  @Test
-  public void testWriteReadPrimitiveRecord() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    Reader orcReader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf));
-    assertEquals("struct<_row_key:string,time:string,number:int>", 
orcReader.getSchema().toString());
-    assertEquals(3, orcReader.getNumberOfRows());
-
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator();
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      index++;
-    }
+    assertEquals(NUM_RECORDS, orcReader.getNumberOfRows());
   }
 
-  @Test
-  public void testWriteReadComplexRecord() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchemaWithUDT.avsc");
-    Schema udtSchema = 
avroSchema.getField("driver").schema().getTypes().get(1);
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      GenericRecord innerRecord = new GenericData.Record(udtSchema);
-      innerRecord.put("driver_name", "driver" + i);
-      innerRecord.put("list", Collections.singletonList(i));
-      innerRecord.put("map", Collections.singletonMap("key" + i, "value" + i));
-      record.put("driver", innerRecord);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    Reader reader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf));
-    
assertEquals("struct<_row_key:string,time:string,number:int,driver:struct<driver_name:string,list:array<int>,map:map<string,string>>>",
-        reader.getSchema().toString());
-    assertEquals(3, reader.getNumberOfRows());
-
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Iterator<GenericRecord> iter = hoodieReader.getRecordIterator();
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      GenericRecord innerRecord = (GenericRecord) record.get("driver");
-      assertEquals("driver" + index, 
innerRecord.get("driver_name").toString());
-      assertEquals(1, ((List<?>)innerRecord.get("list")).size());
-      assertEquals(index, ((List<?>)innerRecord.get("list")).get(0));
-      assertEquals("value" + index, 
((Map<?,?>)innerRecord.get("map")).get("key" + index).toString());
-      index++;
+  @Override
+  protected void verifySchema(Configuration conf, String schemaPath) throws 
IOException {
+    Reader orcReader = OrcFile.createReader(getFilePath(), 
OrcFile.readerOptions(conf));
+    if ("/exampleSchema.avsc".equals(schemaPath)) {
+      assertEquals("struct<_row_key:string,time:string,number:int>",
+          orcReader.getSchema().toString());
+    } else if ("/exampleSchemaWithUDT.avsc".equals(schemaPath)) {
+      
assertEquals("struct<_row_key:string,time:string,number:int,driver:struct<driver_name:string,list:array<int>,map:map<string,string>>>",
+          orcReader.getSchema().toString());
     }
   }
 
-  @Test
-  public void testWriteReadWithEvolvedSchema() throws Exception {
-    Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleSchema.avsc");
-    HoodieOrcWriter writer = createOrcWriter(avroSchema);
-    for (int i = 0; i < 3; i++) {
-      GenericRecord record = new GenericData.Record(avroSchema);
-      record.put("_row_key", "key" + i);
-      record.put("time", Integer.toString(i));
-      record.put("number", i);
-      writer.writeAvro("key" + i, record);
-    }
-    writer.close();
-
-    Configuration conf = new Configuration();
-    HoodieFileReader<GenericRecord> hoodieReader = 
HoodieFileReaderFactory.getFileReader(conf, filePath);
-    Schema evolvedSchema = 
getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchema.avsc");
-    Iterator<GenericRecord> iter = 
hoodieReader.getRecordIterator(evolvedSchema);
-    int index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaChangeOrder.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaColumnRequire.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(index, record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaColumnType.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertEquals(Integer.toString(index), record.get("number").toString());
-      assertNull(record.get("added_field"));
-      index++;
-    }
-
-    evolvedSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, 
"/exampleEvolvedSchemaDeleteColumn.avsc");
-    iter = hoodieReader.getRecordIterator(evolvedSchema);
-    index = 0;
-    while (iter.hasNext()) {
-      GenericRecord record = iter.next();
-      assertEquals("key" + index, record.get("_row_key").toString());
-      assertEquals(Integer.toString(index), record.get("time").toString());
-      assertNull(record.get("number"));
-      assertNull(record.get("added_field"));
-      index++;
-    }
+  @Override
+  public void testReaderFilterRowKeys() {
+    // TODO: fix filterRowKeys test for ORC due to a bug in ORC logic

Review comment:
       Tracked here: HUDI-3682




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to