Author: dhruba Date: Fri Oct 5 10:57:53 2007 New Revision: 582366 URL: http://svn.apache.org/viewvc?rev=582366&view=rev Log: HADOOP-1968. FileSystem supports wildcard input syntax "{ }". (Hairong Kuang via dhruba)
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=582366&r1=582365&r2=582366&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Fri Oct 5 10:57:53 2007 @@ -81,6 +81,9 @@ factories. Provide a StandardSocketFactory, and a SocksSocketFactory to allow the use of SOCKS proxies. (taton). + HADOOP-1968. FileSystem supports wildcard input syntax "{ }". + (Hairong Kuang via dhruba) + OPTIMIZATIONS HADOOP-1910. Reduce the number of RPCs that DistributedFileSystem.create() Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=582366&r1=582365&r2=582366&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Fri Oct 5 10:57:53 2007 @@ -574,6 +574,14 @@ * <dt> <tt> \<i>c</i> </tt> * <dd> Removes (escapes) any special meaning of character <i>c</i>. * + * <p> + * <dt> <tt> {ab,cd} </tt> + * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt> + * + * <p> + * <dt> <tt> {ab,c{de,fh}} </tt> + * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt> + * * </dl> * </dd> * </dl> @@ -652,11 +660,18 @@ setRegex(filePattern); } + private boolean isJavaRegexSpecialChar(char pChar) { + return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' || + pChar == '|' || pChar == '+'; + } void setRegex(String filePattern) throws IOException { int len; int setOpen; + int curlyOpen; boolean setRange; - StringBuffer fileRegex = new StringBuffer(); + boolean expectGroup; + + StringBuilder fileRegex = new StringBuilder(); // Validate the pattern len = filePattern.length(); @@ -665,7 +680,9 @@ setOpen = 0; setRange = false; - + curlyOpen = 0; + expectGroup = false; + for (int i = 0; i < len; i++) { char pCh; @@ -677,7 +694,7 @@ if (i >= len) error("An escaped character does not present", filePattern, i); pCh = filePattern.charAt(i); - } else if (pCh == '.') { + } else if (isJavaRegexSpecialChar(pCh)) { fileRegex.append(PAT_ESCAPE); } else if (pCh == '*') { fileRegex.append(PAT_ANY); @@ -685,6 +702,21 @@ } else if (pCh == '?') { pCh = PAT_ANY; hasPattern = true; + } else if (pCh == '{') { + fileRegex.append('('); + pCh = '('; + curlyOpen++; + } else if (pCh == ',' && curlyOpen > 0) { + fileRegex.append(")|"); + pCh = '('; + expectGroup = true; + } else if (pCh == '}' && curlyOpen > 0) { + // End of a group + if (expectGroup) + error("Unexpected end of a group", filePattern, i); + curlyOpen--; + fileRegex.append(")"); + pCh = ')'; } else if (pCh == '[' && setOpen == 0) { setOpen++; hasPattern = true; @@ -704,15 +736,17 @@ // Normal character, or the end of a character set range setOpen++; setRange = false; + } else if (curlyOpen > 0) { + expectGroup = false; } fileRegex.append(pCh); } // Check for a well-formed pattern - if (setOpen > 0 || setRange) { + if (setOpen > 0 || setRange || curlyOpen > 0) { // Incomplete character set or character range - error("Expecting set closure character or end of range", filePattern, - len); + error("Expecting set closure character or end of range, or }", + filePattern, len); } regex = Pattern.compile(fileRegex.toString()); } Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=582366&r1=582365&r2=582366&view=diff ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Fri Oct 5 10:57:53 2007 @@ -43,30 +43,29 @@ } protected void tearDown() throws Exception { - dfsCluster.shutdown(); + if(dfsCluster!=null) { + dfsCluster.shutdown(); + } } - public void testGlob() { - try { - pTestLiteral(); - pTestAny(); - pTestClosure(); - pTestSet(); - pTestRange(); - pTestSetExcl(); - pTestCombination(); - pTestRelativePath(); - } catch(IOException e) { - e.printStackTrace(); - } + public void testGlob() throws Exception { + //pTestEscape(); // need to wait until HADOOP-1995 is fixed + pTestJavaRegexSpecialChars(); + pTestCurlyBracket(); + pTestLiteral(); + pTestAny(); + pTestClosure(); + pTestSet(); + pTestRange(); + pTestSetExcl(); + pTestCombination(); + pTestRelativePath(); } private void pTestLiteral() throws IOException { try { - String [] files = new String[2]; - files[0] = USER_DIR+"/a2c"; - files[1] = USER_DIR+"/ab\\[c.d"; - Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files); + String [] files = new String[] {USER_DIR+"/a2c", USER_DIR+"/abc.d"}; + Path[] matchedPath = prepareTesting(USER_DIR+"/abc.d", files); assertEquals(matchedPath.length, 1); assertEquals(matchedPath[0], path[1]); } finally { @@ -74,13 +73,21 @@ } } + private void pTestEscape() throws IOException { + try { + String [] files = new String[] {USER_DIR+"/ab\\[c.d"}; + Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files); + assertEquals(matchedPath.length, 1); + assertEquals(matchedPath[0], path[0]); + } finally { + cleanupDFS(); + } + } + private void pTestAny() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/abc"; - files[1] = USER_DIR+"/a2c"; - files[2] = USER_DIR+"/a.c"; - files[3] = USER_DIR+"/abcd"; + String [] files = new String[] { USER_DIR+"/abc", USER_DIR+"/a2c", + USER_DIR+"/a.c", USER_DIR+"/abcd"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a?c", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], path[2]); @@ -99,11 +106,8 @@ private void pTestClosure1() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/a"; - files[1] = USER_DIR+"/abc"; - files[2] = USER_DIR+"/abc.p"; - files[3] = USER_DIR+"/bacd"; + String [] files = new String[] {USER_DIR+"/a", USER_DIR+"/abc", + USER_DIR+"/abc.p", USER_DIR+"/bacd"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a*", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], path[0]); @@ -116,11 +120,8 @@ private void pTestClosure2() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/a."; - files[1] = USER_DIR+"/a.txt"; - files[2] = USER_DIR+"/a.old.java"; - files[3] = USER_DIR+"/.java"; + String [] files = new String[] {USER_DIR+"/a.", USER_DIR+"/a.txt", + USER_DIR+"/a.old.java", USER_DIR+"/.java"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a.*", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], path[0]); @@ -133,11 +134,8 @@ private void pTestClosure3() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/a.txt.x"; - files[1] = USER_DIR+"/ax"; - files[2] = USER_DIR+"/ab37x"; - files[3] = USER_DIR+"/bacd"; + String [] files = new String[] {USER_DIR+"/a.txt.x", USER_DIR+"/ax", + USER_DIR+"/ab37x", USER_DIR+"/bacd"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a*x", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], path[0]); @@ -150,11 +148,8 @@ private void pTestSet() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/a.c"; - files[1] = USER_DIR+"/a.cpp"; - files[2] = USER_DIR+"/a.hlp"; - files[3] = USER_DIR+"/a.hxy"; + String [] files = new String[] {USER_DIR+"/a.c", USER_DIR+"/a.cpp", + USER_DIR+"/a.hlp", USER_DIR+"/a.hxy"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a.[ch]??", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], path[1]); @@ -167,11 +162,8 @@ private void pTestRange() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/a.d"; - files[1] = USER_DIR+"/a.e"; - files[2] = USER_DIR+"/a.f"; - files[3] = USER_DIR+"/a.h"; + String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e", + USER_DIR+"/a.f", USER_DIR+"/a.h"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a.[d-fm]", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], path[0]); @@ -184,11 +176,8 @@ private void pTestSetExcl() throws IOException { try { - String [] files = new String[4]; - files[0] = USER_DIR+"/a.d"; - files[1] = USER_DIR+"/a.e"; - files[2] = USER_DIR+"/a.0"; - files[3] = USER_DIR+"/a.h"; + String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e", + USER_DIR+"/a.0", USER_DIR+"/a.h"}; Path[] matchedPath = prepareTesting(USER_DIR+"/a.[^a-cg-z0-9]", files); assertEquals(matchedPath.length, 2); assertEquals(matchedPath[0], path[0]); @@ -200,15 +189,11 @@ private void pTestCombination() throws IOException { try { - String [] files = new String[4]; - files[0] = "/user/aa/a.c"; - files[1] = "/user/bb/a.cpp"; - files[2] = "/user1/cc/b.hlp"; - files[3] = "/user/dd/a.hxy"; - Path[] matchedPath = prepareTesting("/use?/*/a.[ch]??", files); - assertEquals(matchedPath.length, 2); - assertEquals(matchedPath[0], path[1]); - assertEquals(matchedPath[1], path[3]); + String [] files = new String[] {"/user/aa/a.c", "/user/bb/a.cpp", + "/user1/cc/b.hlp", "/user/dd/a.hxy"}; + Path[] matchedPath = prepareTesting("/use?/*/a.[ch]{lp,xy}", files); + assertEquals(matchedPath.length, 1); + assertEquals(matchedPath[0], path[3]); } finally { cleanupDFS(); } @@ -216,11 +201,7 @@ private void pTestRelativePath() throws IOException { try { - String [] files = new String[4]; - files[0] = "a"; - files[1] = "abc"; - files[2] = "abc.p"; - files[3] = "bacd"; + String [] files = new String[] {"a", "abc", "abc.p", "bacd"}; Path[] matchedPath = prepareTesting("a*", files); assertEquals(matchedPath.length, 3); assertEquals(matchedPath[0], new Path(USER_DIR, path[0])); @@ -231,15 +212,91 @@ } } + /* Test {xx,yy} */ + private void pTestCurlyBracket() throws IOException { + Path[] matchedPath; + String [] files; + try { + files = new String[] { USER_DIR+"/a.abcxx", USER_DIR+"/a.abxy", + USER_DIR+"/a.hlp", USER_DIR+"/a.jhyy"}; + matchedPath = prepareTesting(USER_DIR+"/a.{abc,jh}??", files); + assertEquals(matchedPath.length, 2); + assertEquals(matchedPath[0], path[0]); + assertEquals(matchedPath[1], path[3]); + } finally { + cleanupDFS(); + } + // nested curlies + try { + files = new String[] { USER_DIR+"/a.abcxx", USER_DIR+"/a.abdxy", + USER_DIR+"/a.hlp", USER_DIR+"/a.jhyy" }; + matchedPath = prepareTesting(USER_DIR+"/a.{ab{c,d},jh}??", files); + assertEquals(matchedPath.length, 3); + assertEquals(matchedPath[0], path[0]); + assertEquals(matchedPath[1], path[1]); + assertEquals(matchedPath[2], path[3]); + } finally { + cleanupDFS(); + } + try { + // test standalone } + files = new String[] {USER_DIR+"/}bc"}; + matchedPath = prepareTesting(USER_DIR+"/}{a,b}c", files); + assertEquals(matchedPath.length, 1); + // test {b} + matchedPath = prepareTesting(USER_DIR+"/}{b}c", files); + assertEquals(matchedPath.length, 1); + // test {} + matchedPath = prepareTesting(USER_DIR+"}{}bc", files); + assertEquals(matchedPath.length, 1); + + // test ill-formed curly + boolean hasException = false; + try { + prepareTesting(USER_DIR+"}{b,}c", files); + } catch (IOException e) { + assertTrue(e.getMessage().startsWith("Illegal file pattern:") ); + hasException = true; + } + assertTrue(hasException); + hasException = false; + try { + prepareTesting(USER_DIR+"}{bc", files); + } catch (IOException e) { + assertTrue(e.getMessage().startsWith("Illegal file pattern:") ); + hasException = true; + } + assertTrue(hasException); + } finally { + cleanupDFS(); + } + } + + /* test that a path name can contain Java regex special characters */ + private void pTestJavaRegexSpecialChars() throws IOException { + try { + String[] files = new String[] {USER_DIR+"/($.|+)bc", USER_DIR+"/abc"}; + Path[] matchedPath = prepareTesting(USER_DIR+"/($.|+)*", files); + assertEquals(matchedPath.length, 1); + assertEquals(matchedPath[0], path[0]); + } finally { + cleanupDFS(); + } + + } private Path[] prepareTesting(String pattern, String[] files) throws IOException { for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) { - path[i] = new Path(files[i]); + path[i] = new Path(files[i]).makeQualified(fs); if (!fs.mkdirs(path[i])) { throw new IOException("Mkdirs failed to create " + path[i].toString()); } } - return fs.globPaths(new Path(pattern)); + Path[] globResults = fs.globPaths(new Path(pattern)); + for(int i=0; i<globResults.length; i++) { + globResults[i] = globResults[i].makeQualified(fs); + } + return globResults; } private void cleanupDFS() throws IOException {