arina-ielchiieva commented on a change in pull request #1836: DRILL-7156: 
Support empty Parquet files creation
URL: https://github.com/apache/drill/pull/1836#discussion_r313803449
 
 

 ##########
 File path: 
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriterEmptyFiles.java
 ##########
 @@ -43,47 +49,99 @@ public void testWriteEmptyFile() throws Exception {
     final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
     test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
+    Assert.assertTrue(outputFile.exists());
+  }
+
+  @Test
+  public void testWriteEmptyFileWithEmptySchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyfileemptyschema";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
+
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`empty.json`", 
outputFileName);
     Assert.assertFalse(outputFile.exists());
   }
 
   @Test
-  public void testMultipleWriters() throws Exception {
-    final String outputFile = 
"testparquetwriteremptyfiles_testmultiplewriters";
+  public void testWriteEmptySchemaChange() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyschemachange";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
-    runSQL("alter session set `planner.slice_target` = 1");
+    test("CREATE TABLE dfs.tmp.%s AS select id, a, b from 
dfs.`schemachange/multi/*.json` WHERE id = 0", outputFileName);
 
-    try {
-      final String query = "SELECT position_id FROM cp.`employee.json` WHERE 
position_id IN (15, 16) GROUP BY position_id";
+    // Only the last scan scheme is written
+    SchemaBuilder schemaBuilder = new SchemaBuilder()
+      .addNullable("id", TypeProtos.MinorType.BIGINT)
+      .addNullable("a", TypeProtos.MinorType.BIGINT)
+      .addNullable("b", TypeProtos.MinorType.BIT);
+    BatchSchema expectedSchema = new BatchSchemaBuilder()
+      .withSchemaBuilder(schemaBuilder)
+      .build();
 
-      test("CREATE TABLE dfs.tmp.%s AS %s", outputFile, query);
+    testBuilder()
+      .unOrdered()
+      .sqlQuery("select * from dfs.tmp.%s", outputFileName)
+      .schemaBaseLine(expectedSchema)
+      .go();
 
-      // this query will fail if an "empty" file was created
-      testBuilder()
-        .unOrdered()
-        .sqlQuery("SELECT * FROM dfs.tmp.%s", outputFile)
-        .sqlBaselineQuery(query)
-        .go();
-    } finally {
-      runSQL("alter session set `planner.slice_target` = " + 
ExecConstants.SLICE_TARGET_DEFAULT);
-    }
+    // Make sure that only 1 parquet file was created
+    Assert.assertEquals(1, outputFile.list((dir, name) -> 
name.endsWith("parquet")).length);
+  }
+
+  @Test
+  public void testEmptyFileSchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testemptyfileschema";
+
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
+
+    // end_date column is null, so it missing in result schema.
+    SchemaBuilder schemaBuilder = new SchemaBuilder()
+            .addNullable("employee_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("full_name", TypeProtos.MinorType.VARCHAR)
+            .addNullable("first_name", TypeProtos.MinorType.VARCHAR)
+            .addNullable("last_name", TypeProtos.MinorType.VARCHAR)
+            .addNullable("position_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("position_title", TypeProtos.MinorType.VARCHAR)
+            .addNullable("store_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("department_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("birth_date", TypeProtos.MinorType.VARCHAR)
+            .addNullable("hire_date", TypeProtos.MinorType.VARCHAR)
+            .addNullable("salary", TypeProtos.MinorType.FLOAT8)
+            .addNullable("supervisor_id", TypeProtos.MinorType.BIGINT)
+            .addNullable("education_level", TypeProtos.MinorType.VARCHAR)
+            .addNullable("marital_status", TypeProtos.MinorType.VARCHAR)
+            .addNullable("gender", TypeProtos.MinorType.VARCHAR)
+            .addNullable("management_role", TypeProtos.MinorType.VARCHAR);
+    BatchSchema expectedSchema = new BatchSchemaBuilder()
+            .withSchemaBuilder(schemaBuilder)
+            .build();
+
+    testBuilder()
+            .unOrdered()
+            .sqlQuery("select * from dfs.tmp.%s", outputFileName)
+            .schemaBaseLine(expectedSchema)
+            .go();
   }
 
   @Test // see DRILL-2408
 
 Review comment:
   Please remove `see DRILL-2408` references in the class, now the make no 
sense.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to