This is an automated email from the ASF dual-hosted git repository. sorabh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit a801e1330cdc665edb4efa6569646ac29fcef17b Author: kr-arjun <arjun...@outlook.com> AuthorDate: Thu Jul 26 16:52:48 2018 -0700 DRILL-6640: Drill takes long time in planning when there are large number of files in views/tables DFS parent directory Modifying DotDrillUtil implementation to avoid using globStatus calls with GLOB for dot drill files Includes - Modified DotDrillUtil.getDotDrills implementation to avoid using DFS globStatus call with GLOB for a given base file name. - Added unit test cases for the new method. - Code refactoring to include additional comments. - Updated logic to use globStatus call for path with wildcards and not ending with .drill - Modified Testcase implementation to use BaseDirTestWatcher. closes #1405 --- .../apache/drill/exec/dotdrill/DotDrillType.java | 22 +++++ .../apache/drill/exec/dotdrill/DotDrillUtil.java | 92 +++++++++++++++++-- .../drill/exec/dotdrill/TestDotDrillUtil.java | 102 +++++++++++++++++++++ 3 files changed, 209 insertions(+), 7 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java index a8b5f4b..673e1c7 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillType.java @@ -56,6 +56,28 @@ public enum DotDrillType { return ending; } + /** + * Return Glob pattern for given Dot Drill Types. + * @param types + * @return Glob pattern representing For Dot Drill Types provided as types param + */ + public static String getDrillFileGlobPattern(DotDrillType[] types) { + if (types.length == 1) { + return "." + types[0].name().toLowerCase() + ".drill"; + } + + StringBuffer b = new StringBuffer(); + b.append(".{"); + for (DotDrillType d : types) { + if (b.length() > 2) { + b.append(','); + } + b.append(d.name().toLowerCase()); + } + b.append("}.drill"); + return b.toString(); + } + public static final String DOT_DRILL_GLOB; static{ diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java index 226aa24..b6571df 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/DotDrillUtil.java @@ -18,10 +18,14 @@ package org.apache.drill.exec.dotdrill; import java.io.IOException; +import java.io.FileNotFoundException; import java.util.List; +import java.util.Arrays; +import java.util.ArrayList; import org.apache.drill.exec.store.dfs.DrillFileSystem; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.GlobPattern; import org.apache.hadoop.fs.Path; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; @@ -29,7 +33,15 @@ import org.apache.drill.shaded.guava.com.google.common.collect.Lists; public class DotDrillUtil { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DotDrillUtil.class); - private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, FileStatus[] statuses, DotDrillType... types){ + /** + * Returns List of DotDrillFile objects for given list of FileStatus objects matching the given Dot Drill File Types. + * Return an empty list if no FileStatus matches the given Dot Drill File Types. + * @param fs DrillFileSystem instance + * @param statuses List of FileStatus objects + * @param types Dot Drill Types to be matched + * @return List of matched DotDrillFile objects + */ + private static List<DotDrillFile> getDrillFiles(DrillFileSystem fs, List<FileStatus> statuses, DotDrillType... types){ List<DotDrillFile> files = Lists.newArrayList(); for(FileStatus s : statuses){ DotDrillFile f = DotDrillFile.create(fs, s); @@ -48,16 +60,82 @@ public class DotDrillUtil { } return files; } - + /** + * Return list of DotDrillFile objects whose file name ends with .drill and matches the provided Drill Dot files types + * in a given parent Path. + * Return an empty list if no files matches the given Dot Drill File Types. + * @param fs DrillFileSystem instance + * @param root parent Path + * @param types Dot Drill Types to be matched + * @return List of matched DotDrillFile objects + * @throws IOException + */ public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, DotDrillType... types) throws IOException{ - return getDrillFiles(fs, fs.globStatus(new Path(root, "*.drill")), types); + return getDrillFiles(fs, getDrillFileStatus(fs, root,"*.drill"), types); } + /** + * Return list of DotDrillFile objects whose file name matches the provided name pattern and Drill Dot files types + * in a given parent Path. + * Return an empty list if no files matches the given file name and Dot Drill File Types. + * @param fs DrillFileSystem instance + * @param root parent Path + * @param name name/pattern of the file + * @param types Dot Drill Types to be matched + * @return List of matched DotDrillFile objects + * @throws IOException + */ public static List<DotDrillFile> getDotDrills(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException{ - if(!name.endsWith(".drill")) { - name = name + DotDrillType.DOT_DRILL_GLOB; - } + return getDrillFiles(fs, getDrillFileStatus(fs, root, name, types), types); + } - return getDrillFiles(fs, fs.globStatus(new Path(root, name)), types); + /** + * Return list of FileStatus objects matching '.drill' files for a given name in the parent path. + * a) If given name ends with '.drill', it return all '.drill' files's status matching the name pattern. + * b) If given name does not end with '.drill', it return file statues starting with name + * and ending with pattern matching + * 1) all the valid DotDrillTypes if no DotDrillType is provided. + * 2) given DotDrillTypes if DotDrillType is provided. + * Return an empty list if no files matches the pattern and Drill Dot file types. + * @param fs DrillFileSystem instance + * @param root parent Path + * @param name name/pattern of the file + * @param types Dot Drill Types to be matched. Applies type matching only if name does not end with '.drill' + * @return List of FileStatuses for files matching name and Drill Dot file types. + * @throws IOException if any I/O error occurs when fetching file status + */ + private static List<FileStatus> getDrillFileStatus(DrillFileSystem fs, Path root, String name, DotDrillType... types) throws IOException { + List<FileStatus> statuses = new ArrayList<FileStatus>(); + + if (name.endsWith(".drill")) { + FileStatus[] status = fs.globStatus(new Path(root, name)); + if (status != null) { + statuses.addAll(Arrays.asList(status)); + } + } else { + // If no DotDrillTypes are provided, check file status for all DotDrillTypes available. + // Else check the file status for provided types. + if (types.length == 0) { + types = DotDrillType.values(); + } + // Check if path has glob pattern or wildcards.If yes, use globStatus with globPattern for given types. + GlobPattern pathGlob = new GlobPattern((new Path(root, name)).toString()); + if (pathGlob.hasWildcard()) { + String patternAppliedName = name + DotDrillType.getDrillFileGlobPattern(types); + FileStatus[] status = fs.globStatus(new Path(root, patternAppliedName)); + if (status != null) { + statuses.addAll(Arrays.asList(status)); + } + } else { // use list status if no glob_pattern/wildcards exist in path + for (DotDrillType dotType : types) { + try { + FileStatus[] status = fs.listStatus(new Path(root, name + dotType.getEnding())); + statuses.addAll(Arrays.asList(status)); + } catch (FileNotFoundException ex) { + } + } + } + } + return statuses; } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java new file mode 100644 index 0000000..1866c9c --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/dotdrill/TestDotDrillUtil.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.dotdrill; + +import java.io.File; +import java.nio.file.Paths; +import java.nio.file.Files; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +import org.apache.drill.exec.store.dfs.DrillFileSystem; +import org.apache.drill.test.BaseDirTestWatcher; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +public class TestDotDrillUtil { + + private static File tempDir; + private static Path tempPath; + private static DrillFileSystem dfs; + + @ClassRule + public static final BaseDirTestWatcher dirTestWatcher = new BaseDirTestWatcher(); + + @BeforeClass + public static void setup() throws Exception { + Configuration conf = new Configuration(); + conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS); + dfs = new DrillFileSystem(conf); + tempDir = dirTestWatcher.getTmpDir(); + tempPath = new Path(tempDir.getAbsolutePath()); + } + + + @Test //DRILL-6640 + public void testViewFileStatus() throws Exception { + List<DotDrillFile> dotDrillFiles; + + Files.createFile(Paths.get(tempDir + "/test1.view.drill")); + Files.createFile(Paths.get(tempDir + "/test2.view.drill")); + Files.createFile(Paths.get(tempDir + "/test1.txt")); + + + // Check for view file by passing file name without extension + dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1", DotDrillType.VIEW); + assertTrue(dotDrillFiles.size() == 1); + + // Check for dot drill file by passing full name + dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.view.drill"); + assertTrue(dotDrillFiles.size() == 1); + + // Check for dot drill files by passing pattern *.drill + dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "*.drill"); + assertTrue(dotDrillFiles.size() >= 2); + + // Check for non existent file + dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "junkfile", DotDrillType.VIEW); + assertTrue(dotDrillFiles.size() == 0); + + // Check for existing file which is not a drill view file + dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test1.txt", DotDrillType.VIEW); + assertTrue(dotDrillFiles.size() == 0); + + // Check for views files by passing file name having glob without any extension + dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "test*", DotDrillType.VIEW); + assertTrue(dotDrillFiles.size() >= 2); + } + + @Test //DRILL-6640 + public void testDotFilesStatus() throws Exception { + String filePrefix = "sample"; + //Creating different Dot Drill files supported for base file name "sample" + for (DotDrillType dotType : DotDrillType.values()) { + Files.createFile(Paths.get(tempDir + "/" + filePrefix + dotType.getEnding())); + } + // Check Dot File count for "sample" file created for available Drill dot types + List<DotDrillFile> dotDrillFiles = DotDrillUtil.getDotDrills(dfs, tempPath, "sample"); + assertTrue(dotDrillFiles.size() == DotDrillType.values().length); + } + +}