[jira] [Commented] (DRILL-7156) Empty Parquet is not getting created if 0 records in result

ASF GitHub Bot (JIRA) Wed, 14 Aug 2019 07:14:05 -0700


    [ 
https://issues.apache.org/jira/browse/DRILL-7156?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16907297#comment-16907297
 ]


ASF GitHub Bot commented on DRILL-7156:
---------------------------------------

arina-ielchiieva commented on pull request #1836: DRILL-7156: Support empty 
Parquet files creation
URL: https://github.com/apache/drill/pull/1836#discussion_r313897281
 
 

 ##########
 File path: 
exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriterEmptyFiles.java
 ##########
 @@ -18,72 +18,142 @@
 package org.apache.drill.exec.physical.impl.writer;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.BatchSchemaBuilder;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
 import org.apache.drill.test.BaseTestQuery;
 import org.apache.drill.categories.ParquetTest;
 import org.apache.drill.categories.UnlikelyTest;
 import org.apache.drill.exec.ExecConstants;
-import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 import java.io.File;
+import java.nio.file.Paths;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 @Category({ParquetTest.class, UnlikelyTest.class})
 public class TestParquetWriterEmptyFiles extends BaseTestQuery {
 
   @BeforeClass
   public static void initFs() throws Exception {
     updateTestCluster(3, null);
+    dirTestWatcher.copyResourceToRoot(Paths.get("schemachange"));
+    dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "empty"));
   }
 
-  @Test // see DRILL-2408
+  @Test
   public void testWriteEmptyFile() throws Exception {
     final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyfile";
     final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
     test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`employee.json` WHERE 
1=0", outputFileName);
-    Assert.assertFalse(outputFile.exists());
+    assertTrue(outputFile.exists());
   }
 
   @Test
-  public void testMultipleWriters() throws Exception {
-    final String outputFile = 
"testparquetwriteremptyfiles_testmultiplewriters";
+  public void testWriteEmptyFileWithEmptySchema() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyfileemptyschema";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
-    runSQL("alter session set `planner.slice_target` = 1");
+    test("CREATE TABLE dfs.tmp.%s AS SELECT * FROM cp.`empty.json`", 
outputFileName);
+    assertFalse(outputFile.exists());
+  }
 
-    try {
-      final String query = "SELECT position_id FROM cp.`employee.json` WHERE 
position_id IN (15, 16) GROUP BY position_id";
+  @Test
+  public void testWriteEmptySchemaChange() throws Exception {
+    final String outputFileName = 
"testparquetwriteremptyfiles_testwriteemptyschemachange";
+    final File outputFile = 
FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
 
-      test("CREATE TABLE dfs.tmp.%s AS %s", outputFile, query);
+    test("CREATE TABLE dfs.tmp.%s AS select id, a, b from 
dfs.`schemachange/multi/*.json` WHERE id = 0", outputFileName);
 
-      // this query will fail if an "empty" file was created
-      testBuilder()
-        .unOrdered()
-        .sqlQuery("SELECT * FROM dfs.tmp.%s", outputFile)
-        .sqlBaselineQuery(query)
-        .go();
-    } finally {
-      runSQL("alter session set `planner.slice_target` = " + 
ExecConstants.SLICE_TARGET_DEFAULT);
-    }
+    // Only the last scan scheme is written
+    SchemaBuilder schemaBuilder = new SchemaBuilder()
+      .addNullable("id", TypeProtos.MinorType.BIGINT)
+      .addNullable("a", TypeProtos.MinorType.BIGINT)
+      .addNullable("b", TypeProtos.MinorType.BIT);
+    BatchSchema expectedSchema = new BatchSchemaBuilder()
+      .withSchemaBuilder(schemaBuilder)
+      .build();
+
+    testBuilder()
+      .unOrdered()
+      .sqlQuery("select * from dfs.tmp.%s", outputFileName)
+      .schemaBaseLine(expectedSchema)
+      .go();
+
+    // Make sure that only 1 parquet file was created
+    assertEquals(1, outputFile.list((dir, name) -> 
name.endsWith("parquet")).length);
   }
 
-  @Test // see DRILL-2408
+  @Test
+  public void testSimpleEmptyFileSchema() throws Exception {
 
 Review comment:
   Also we need to add test where we select from non-empty Parquet file but 
filter condition eliminates all rows, similar as you have for JSON.
   
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Empty Parquet is not getting created if 0 records in result
> -----------------------------------------------------------
>
>                 Key: DRILL-7156
>                 URL: https://issues.apache.org/jira/browse/DRILL-7156
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - Parquet
>    Affects Versions: 1.16.0
>            Reporter: Sayalee Bhanavase
>            Assignee: Oleg Zinoviev
>            Priority: Major
>             Fix For: 1.17.0
>
>
> I am creating parquet tables out of joins. If there is no record in join, it 
> does not create empty. table and when I reused the table my further script 
> fails. 
> Has anyone faced this issue? Any suggestion or workaround?



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

[jira] [Commented] (DRILL-7156) Empty Parquet is not getting created if 0 records in result

Reply via email to