Author: tomwhite
Date: Thu Dec 4 06:11:23 2008
New Revision: 723332
URL: http://svn.apache.org/viewvc?rev=723332&view=rev
Log:
HADOOP-3497. Fix bug in overly restrictive file globbing with a PathFilter.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Dec 4 06:11:23 2008
@@ -47,6 +47,9 @@
HADOOP-4035. Support memory based scheduling in capacity scheduler.
(Vinod Kumar Vavilapalli via yhemanth)
+ HADOOP-3497. Fix bug in overly restrictive file globbing with a
+ PathFilter. (tomwhite)
+
NEW FEATURES
HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java Thu Dec 4
06:11:23 2008
@@ -879,26 +879,16 @@
// glob the paths that match the parent path, i.e., [0,
components.length-1]
boolean[] hasGlob = new boolean[]{false};
- Path[] parentPaths =
- globPathsLevel(parents, components, level, filter, hasGlob);
+ Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob);
FileStatus[] results;
if (parentPaths == null || parentPaths.length == 0) {
results = null;
} else {
// Now work on the last component of the path
GlobFilter fp = new GlobFilter(components[components.length - 1],
filter);
+ results = listStatus(parentPaths, fp);
if (fp.hasPattern()) { // last component has a pattern
- // list parent directories and then glob the results
- results = listStatus(parentPaths, fp);
hasGlob[0] = true;
- } else { // last component does not have a pattern
- // get all the path names
- for (int i = 0; i < parentPaths.length; i++) {
- parentPaths[i] = new Path(parentPaths[i],
- components[components.length - 1]);
- }
- // get all their statuses
- results = getFileStatus(parentPaths);
}
}
@@ -924,13 +914,13 @@
* components [<code>level</code>, <code>N-1</code>].
*/
private Path[] globPathsLevel(Path[] parents, String[] filePattern,
- int level, PathFilter filter, boolean[] hasGlob) throws IOException {
+ int level, boolean[] hasGlob) throws IOException {
if (level == filePattern.length - 1)
return parents;
if (parents == null || parents.length == 0) {
return null;
}
- GlobFilter fp = new GlobFilter(filePattern[level], filter);
+ GlobFilter fp = new GlobFilter(filePattern[level]);
if (fp.hasPattern()) {
parents = FileUtil.stat2Paths(listStatus(parents, fp));
hasGlob[0] = true;
@@ -939,7 +929,7 @@
parents[i] = new Path(parents[i], filePattern[level]);
}
}
- return globPathsLevel(parents, filePattern, level + 1, filter, hasGlob);
+ return globPathsLevel(parents, filePattern, level + 1, hasGlob);
}
/* A class that could decide if a string matches the glob or not */
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
(original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Thu Dec
4 06:11:23 2008
@@ -18,6 +18,7 @@
package org.apache.hadoop.fs;
import java.io.IOException;
+import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -26,6 +27,19 @@
public class TestGlobPaths extends TestCase {
+ static class RegexPathFilter implements PathFilter {
+
+ private final String regex;
+ public RegexPathFilter(String regex) {
+ this.regex = regex;
+ }
+
+ public boolean accept(Path path) {
+ return path.toString().matches(regex);
+ }
+
+ }
+
static private MiniDFSCluster dfsCluster;
static private FileSystem fs;
static final private int NUM_OF_PATHS = 4;
@@ -48,6 +62,31 @@
}
}
+ public void testPathFilter() throws IOException {
+ try {
+ String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b" };
+ Path[] matchedPath = prepareTesting(USER_DIR + "/*/*", files,
+ new RegexPathFilter("^.*" + Pattern.quote(USER_DIR) + "/a/b"));
+ assertEquals(matchedPath.length, 1);
+ assertEquals(matchedPath[0], path[1]);
+ } finally {
+ cleanupDFS();
+ }
+ }
+
+ public void testPathFilterWithFixedLastComponent() throws IOException {
+ try {
+ String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b",
+ USER_DIR + "/c", USER_DIR + "/c/b", };
+ Path[] matchedPath = prepareTesting(USER_DIR + "/*/b", files,
+ new RegexPathFilter("^.*" + Pattern.quote(USER_DIR) + "/a/b"));
+ assertEquals(matchedPath.length, 1);
+ assertEquals(matchedPath[0], path[1]);
+ } finally {
+ cleanupDFS();
+ }
+ }
+
public void testGlob() throws Exception {
//pTestEscape(); // need to wait until HADOOP-1995 is fixed
pTestJavaRegexSpecialChars();
@@ -368,6 +407,23 @@
return globResults;
}
+ private Path[] prepareTesting(String pattern, String[] files,
+ PathFilter filter) throws IOException {
+ for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
+ path[i] = new Path(files[i]).makeQualified(fs);
+ if (!fs.mkdirs(path[i])) {
+ throw new IOException("Mkdirs failed to create " + path[i].toString());
+ }
+ }
+ Path patternPath = new Path(pattern);
+ Path[] globResults = FileUtil.stat2Paths(fs.globStatus(patternPath,
filter),
+ patternPath);
+ for(int i=0; i<globResults.length; i++) {
+ globResults[i] = globResults[i].makeQualified(fs);
+ }
+ return globResults;
+ }
+
private void cleanupDFS() throws IOException {
fs.delete(new Path("/user"), true);
}
Modified:
hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java?rev=723332&r1=723331&r2=723332&view=diff
==============================================================================
---
hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
(original)
+++
hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestFileInputFormatPathFilter.java
Thu Dec 4 06:11:23 2008
@@ -90,7 +90,8 @@
public static class TestPathFilter implements PathFilter {
public boolean accept(Path path) {
- return path.getName().length() == 1;
+ String name = path.getName();
+ return name.equals("TestFileInputFormatPathFilter") || name.length() ==
1;
}
}