[ https://issues.apache.org/jira/browse/DRILL-7156?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16907147#comment-16907147 ]
ASF GitHub Bot commented on DRILL-7156: --------------------------------------- arina-ielchiieva commented on pull request #1836: DRILL-7156: Support empty Parquet files creation URL: https://github.com/apache/drill/pull/1836#discussion_r313800525 ########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java ########## @@ -486,11 +467,54 @@ public void abort() throws IOException { @Override public void cleanup() throws IOException { - flush(); + flush(true); codecFactory.release(); } + private void createParquetFileWriter() throws IOException { + assert parquetFileWriter == null; + + Path path = new Path(location, prefix + "_" + index + ".parquet"); + // to ensure that our writer was the first to create output file, we create empty file first and fail if file exists + Path firstCreatedPath = storageStrategy.createFileAndApply(fs, path); + + // since parquet reader supports partitions, it means that several output files may be created + // if this writer was the one to create table folder, we store only folder and delete it with its content in case of abort + // if table location was created before, we store only files created by this writer and delete them in case of abort + addCleanUpLocation(fs, firstCreatedPath); + + // since ParquetFileWriter will overwrite empty output file (append is not supported) + // we need to re-apply file permission + if (useSingleFSBlock) { + // Passing blockSize creates files with this blockSize instead of filesystem default blockSize. + // Currently, this is supported only by filesystems included in + // BLOCK_FS_SCHEMES (ParquetFileWriter.java in parquet-mr), which includes HDFS. + // For other filesystems, it uses default blockSize configured for the file system. + parquetFileWriter = new ParquetFileWriter(conf, schema, path, ParquetFileWriter.Mode.OVERWRITE, blockSize, 0); + } else { + parquetFileWriter = new ParquetFileWriter(conf, schema, path, ParquetFileWriter.Mode.OVERWRITE); + } + storageStrategy.applyToFile(fs, path); + parquetFileWriter.start(); + } + + private void flushParquetFileWriter() throws IOException { + assert parquetFileWriter != null; Review comment: Same here. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Empty Parquet is not getting created if 0 records in result > ----------------------------------------------------------- > > Key: DRILL-7156 > URL: https://issues.apache.org/jira/browse/DRILL-7156 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet > Affects Versions: 1.16.0 > Reporter: Sayalee Bhanavase > Assignee: Oleg Zinoviev > Priority: Major > Fix For: 1.17.0 > > > I am creating parquet tables out of joins. If there is no record in join, it > does not create empty. table and when I reused the table my further script > fails. > Has anyone faced this issue? Any suggestion or workaround? -- This message was sent by Atlassian JIRA (v7.6.14#76016)