This is an automated email from the ASF dual-hosted git repository. sorabh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit de74eabe013d9249712a14e1f0acabdc8d3ebc16 Author: Venkata Jyothsna Donapati <[email protected]> AuthorDate: Thu Apr 11 14:16:36 2019 -0700 DRILL-7171: Create metadata directories cache file in the leaf level directories to support ConvertCountToDirectScan optimization. closes #1748 --- .../exec/store/parquet/metadata/Metadata.java | 18 ++-- .../logical/TestConvertCountToDirectScan.java | 98 ++++++++++++++++++---- 2 files changed, 87 insertions(+), 29 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java index 59849e7..5459a8a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata.java @@ -352,23 +352,15 @@ public class Metadata { writeFile(metadataTableWithRelativePaths.fileMetadata, new Path(path, METADATA_FILENAME), fs); writeFile(metadataTableWithRelativePaths.getSummary(), new Path(path, METADATA_SUMMARY_FILENAME), fs); Metadata_V4.MetadataSummary metadataSummaryWithRelativePaths = metadataTableWithRelativePaths.getSummary(); - - if (directoryList.size() > 0 && childFiles.size() == 0) { - ParquetTableMetadataDirs parquetTableMetadataDirsRelativePaths = - new ParquetTableMetadataDirs(metadataSummaryWithRelativePaths.directories); - writeFile(parquetTableMetadataDirsRelativePaths, new Path(path, METADATA_DIRECTORIES_FILENAME), fs); - if (timer != null) { - logger.debug("Creating metadata files recursively took {} ms", timer.elapsed(TimeUnit.MILLISECONDS)); - } - ParquetTableMetadataDirs parquetTableMetadataDirs = new ParquetTableMetadataDirs(directoryList); - return Pair.of(parquetTableMetadata, parquetTableMetadataDirs); - } - List<Path> emptyDirList = new ArrayList<>(); + // Directories list will be empty at the leaf level directories. For sub-directories with both files and directories, + // only the directories will be included in the list. + writeFile(new ParquetTableMetadataDirs(metadataSummaryWithRelativePaths.directories), + new Path(path, METADATA_DIRECTORIES_FILENAME), fs); if (timer != null) { logger.debug("Creating metadata files recursively took {} ms", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } - return Pair.of(parquetTableMetadata, new ParquetTableMetadataDirs(emptyDirList)); + return Pair.of(parquetTableMetadata, new ParquetTableMetadataDirs(directoryList)); } /** diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java index 4bd3a0f..eaf9257 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java @@ -190,11 +190,11 @@ public class TestConvertCountToDirectScan extends PlanTestBase { testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern}); testBuilder() - .sqlQuery(sql) - .unOrdered() - .baselineColumns("star_count", "int_column_count", "vrchr_column_count") - .baselineValues(24L, 8L, 12L) - .go(); + .sqlQuery(sql) + .unOrdered() + .baselineColumns("star_count", "int_column_count", "vrchr_column_count") + .baselineValues(24L, 8L, 12L) + .go(); } finally { test("drop table if exists %s", tableName); @@ -222,17 +222,17 @@ public class TestConvertCountToDirectScan extends PlanTestBase { int expectedNumFiles = 1; String numFilesPattern = "numFiles = " + expectedNumFiles; - String usedMetaSummaryPattern = "usedMetadataSummaryFile = false"; + String usedMetaSummaryPattern = "usedMetadataSummaryFile = true"; String recordReaderPattern = "DynamicPojoRecordReader"; testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern}); testBuilder() - .sqlQuery(sql) - .unOrdered() - .baselineColumns("star_count", "int_column_count", "vrchr_column_count") - .baselineValues(6L, 2L, 3L) - .go(); + .sqlQuery(sql) + .unOrdered() + .baselineColumns("star_count", "int_column_count", "vrchr_column_count") + .baselineValues(6L, 2L, 3L) + .go(); } finally { test("drop table if exists %s", tableName); @@ -264,11 +264,77 @@ public class TestConvertCountToDirectScan extends PlanTestBase { testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, recordReaderPattern}); testBuilder() - .sqlQuery(sql) - .unOrdered() - .baselineColumns("star_count") - .baselineValues(250L) - .go(); + .sqlQuery(sql) + .unOrdered() + .baselineColumns("star_count") + .baselineValues(250L) + .go(); + + } finally { + test("drop table if exists %s", tableName); + } + } + + @Test + public void testCountsForLeafDirectories() throws Exception { + test("use dfs.tmp"); + String tableName = "parquet_table_counts"; + + try { + test("create table `%s/1` as select * from cp.`tpch/nation.parquet`", tableName); + test("create table `%s/2` as select * from cp.`tpch/nation.parquet`", tableName); + test("create table `%s/3` as select * from cp.`tpch/nation.parquet`", tableName); + test("refresh table metadata %s", tableName); + + String sql = String.format("select\n" + + "count(*) as star_count\n" + + "from `%s/1`", tableName); + + int expectedNumFiles = 1; + String numFilesPattern = "numFiles = " + expectedNumFiles; + String usedMetaSummaryPattern = "usedMetadataSummaryFile = true"; + String recordReaderPattern = "DynamicPojoRecordReader"; + + testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern}); + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("star_count") + .baselineValues(25L) + .go(); + + } finally { + test("drop table if exists %s", tableName); + } + } + + @Test + public void testCountsForDirWithFilesAndDir() throws Exception { + test("use dfs.tmp"); + String tableName = "parquet_table_counts"; + + try { + test("create table `%s/1` as select * from cp.`tpch/nation.parquet`", tableName); + test("create table `%s/1/2` as select * from cp.`tpch/nation.parquet`", tableName); + test("create table `%s/1/3` as select * from cp.`tpch/nation.parquet`", tableName); + test("refresh table metadata %s", tableName); + + String sql = String.format("select count(*) as star_count from `%s/1`", tableName); + + int expectedNumFiles = 1; + String numFilesPattern = "numFiles = " + expectedNumFiles; + String usedMetaSummaryPattern = "usedMetadataSummaryFile = true"; + String recordReaderPattern = "DynamicPojoRecordReader"; + + testPlanMatchingPatterns(sql, new String[]{numFilesPattern, usedMetaSummaryPattern, recordReaderPattern}); + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("star_count") + .baselineValues(75L) + .go(); } finally { test("drop table if exists %s", tableName);
