[ 
https://issues.apache.org/jira/browse/DRILL-7156?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16907151#comment-16907151
 ] 

ASF GitHub Bot commented on DRILL-7156:
---------------------------------------

arina-ielchiieva commented on pull request #1836: DRILL-7156: Support empty 
Parquet files creation
URL: https://github.com/apache/drill/pull/1836#discussion_r313805760
 
 

 ##########
 File path: 
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriterEmptyFiles.java
 ##########
 @@ -43,47 +49,99 @@ public void testWriteEmptyFile() throws Exception {
     final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
     test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
+    Assert.assertTrue(outputFile.exists());
+  }
+
+  @Test
+  public void testWriteEmptyFileWithEmptySchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyfileemptyschema";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
+
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`empty.json`", 
outputFileName);
     Assert.assertFalse(outputFile.exists());
   }
 
   @Test
-  public void testMultipleWriters() throws Exception {
-    final String outputFile = 
"testparquetwriteremptyfiles_testmultiplewriters";
+  public void testWriteEmptySchemaChange() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyschemachange";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
-    runSQL("alter session set `planner.slice_target` = 1");
+    test("CREATE TABLE dfs.tmp.%s AS select id, a, b from 
dfs.`schemachange/multi/*.json` WHERE id = 0", outputFileName);
 
-    try {
-      final String query = "SELECT position_id FROM cp.`employee.json` WHERE 
position_id IN (15, 16) GROUP BY position_id";
+    // Only the last scan scheme is written
+    SchemaBuilder schemaBuilder = new SchemaBuilder()
+      .addNullable("id", TypeProtos.MinorType.BIGINT)
+      .addNullable("a", TypeProtos.MinorType.BIGINT)
+      .addNullable("b", TypeProtos.MinorType.BIT);
+    BatchSchema expectedSchema = new BatchSchemaBuilder()
+      .withSchemaBuilder(schemaBuilder)
+      .build();
 
-      test("CREATE TABLE dfs.tmp.%s AS %s", outputFile, query);
+    testBuilder()
+      .unOrdered()
+      .sqlQuery("select * from dfs.tmp.%s", outputFileName)
+      .schemaBaseLine(expectedSchema)
+      .go();
 
-      // this query will fail if an "empty" file was created
-      testBuilder()
-        .unOrdered()
-        .sqlQuery("SELECT * FROM dfs.tmp.%s", outputFile)
-        .sqlBaselineQuery(query)
-        .go();
-    } finally {
-      runSQL("alter session set `planner.slice_target` = " + 
ExecConstants.SLICE_TARGET_DEFAULT);
-    }
+    // Make sure that only 1 parquet file was created
+    Assert.assertEquals(1, outputFile.list((dir, name) -> 
name.endsWith("parquet")).length);
+  }
+
+  @Test
+  public void testEmptyFileSchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testemptyfileschema";
+
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
 
 Review comment:
   Please replace select from JSON file with select from Parquet, we need to 
test that schema is created correctly from already known schema.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Empty Parquet is not getting created if 0 records in result
> -----------------------------------------------------------
>
>                 Key: DRILL-7156
>                 URL: https://issues.apache.org/jira/browse/DRILL-7156
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - Parquet
>    Affects Versions: 1.16.0
>            Reporter: Sayalee Bhanavase
>            Assignee: Oleg Zinoviev
>            Priority: Major
>             Fix For: 1.17.0
>
>
> I am creating parquet tables out of joins. If there is no record in join, it 
> does not create empty. table and when I reused the table my further script 
> fails. 
> Has anyone faced this issue? Any suggestion or workaround?



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

Reply via email to