Author: kturner
Date: Mon Oct 15 18:56:00 2012
New Revision: 1398451

URL: http://svn.apache.org/viewvc?rev=1398451&view=rev
Log:
ACCUMULO-720 Applied patch, from Chris Bennight, that adds subsequence matching 
to egrep shell command

Modified:
    
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
    
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java
    
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java
    
accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java

Modified: 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
URL: 
http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java?rev=1398451&r1=1398450&r2=1398451&view=diff
==============================================================================
--- 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
 (original)
+++ 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java
 Mon Oct 15 18:56:00 2012
@@ -46,12 +46,15 @@ public class RegExFilter extends Filter 
     return result;
   }
   
+  
+  
   public static final String ROW_REGEX = "rowRegex";
   public static final String COLF_REGEX = "colfRegex";
   public static final String COLQ_REGEX = "colqRegex";
   public static final String VALUE_REGEX = "valueRegex";
   public static final String OR_FIELDS = "orFields";
   public static final String ENCODING = "encoding";
+  public static final String MATCH_SUBSTRING = "matchSubstring";
   
   public static final String ENCODING_DEFAULT = "UTF-8";
   
@@ -60,6 +63,7 @@ public class RegExFilter extends Filter 
   private Matcher colqMatcher;
   private Matcher valueMatcher;
   private boolean orFields = false;
+  private boolean matchSubstring = false;
   
   private String encoding = ENCODING_DEFAULT;
   
@@ -74,7 +78,7 @@ public class RegExFilter extends Filter 
     if (matcher != null) {
       try {
         matcher.reset(new String(bs.getBackingArray(), bs.offset(), 
bs.length(), encoding));
-        return matcher.matches();
+        return matchSubstring ? matcher.find() : matcher.matches();
       } catch (UnsupportedEncodingException e) {
         e.printStackTrace();
       }
@@ -86,7 +90,7 @@ public class RegExFilter extends Filter 
     if (matcher != null) {
       try {
         matcher.reset(new String(data, offset, len, encoding));
-        return matcher.matches();
+        return matchSubstring ? matcher.find() : matcher.matches();
       } catch (UnsupportedEncodingException e) {
         e.printStackTrace();
       }
@@ -144,6 +148,12 @@ public class RegExFilter extends Filter 
       orFields = false;
     }
     
+    if (options.containsKey(MATCH_SUBSTRING)) {
+       matchSubstring = Boolean.parseBoolean(options.get(MATCH_SUBSTRING));
+    } else {
+       matchSubstring = false;
+    }
+    
     if (options.containsKey(ENCODING)) {
       encoding = options.get(ENCODING);
     }
@@ -159,6 +169,7 @@ public class RegExFilter extends Filter 
     io.addNamedOption(RegExFilter.COLQ_REGEX, "regular expression on column 
qualifier");
     io.addNamedOption(RegExFilter.VALUE_REGEX, "regular expression on value");
     io.addNamedOption(RegExFilter.OR_FIELDS, "use OR instread of AND when 
multiple regexes given");
+    io.addNamedOption(RegExFilter.MATCH_SUBSTRING, "match on substrings");
     io.addNamedOption(RegExFilter.ENCODING, "character encoding of byte array 
value (default is " + ENCODING_DEFAULT + ")");
     return io;
   }
@@ -194,6 +205,8 @@ public class RegExFilter extends Filter 
   
   /**
    * Encode the terms to match against in the iterator
+   * Same as calling setRegexs(IteratorSetting si, String rowTerm, String 
cfTerm, String cqTerm, String valueTerm, boolean orFields, boolean 
matchSubstring)
+   * with matchSubstring set to false
    * 
    * @param si
    *          ScanIterator config to be updated
@@ -208,7 +221,28 @@ public class RegExFilter extends Filter 
    * @param orFields
    *          if true, any of the non-null terms can match to return the entry
    */
-  public static void setRegexs(IteratorSetting si, String rowTerm, String 
cfTerm, String cqTerm, String valueTerm, boolean orFields) {
+  public static void setRegexs(IteratorSetting si, String rowTerm, String 
cfTerm, String cqTerm, String valueTerm, boolean orFields) {  
+    setRegexs(si, rowTerm, cfTerm, cqTerm, valueTerm, orFields, false);
+  }
+  
+  /**
+   * Encode the terms to match against in the iterator
+   * 
+   * @param si
+   *          ScanIterator config to be updated
+   * @param rowTerm
+   *          the pattern to match against the Key's row. Not used if null.
+   * @param cfTerm
+   *          the pattern to match against the Key's column family. Not used 
if null.
+   * @param cqTerm
+   *          the pattern to match against the Key's column qualifier. Not 
used if null.
+   * @param valueTerm
+   *          the pattern to match against the Key's value. Not used if null.
+   * @param matchSubstring
+   *          if true then search expressions will match on partial strings
+   */
+  public static void setRegexs(IteratorSetting si, String rowTerm, String 
cfTerm, String cqTerm, String valueTerm, boolean orFields, boolean 
matchSubstring) {
+        
     if (rowTerm != null)
       si.addOption(RegExFilter.ROW_REGEX, rowTerm);
     if (cfTerm != null)
@@ -217,11 +251,14 @@ public class RegExFilter extends Filter 
       si.addOption(RegExFilter.COLQ_REGEX, cqTerm);
     if (valueTerm != null)
       si.addOption(RegExFilter.VALUE_REGEX, valueTerm);
-    if (orFields) {
-      si.addOption(RegExFilter.OR_FIELDS, "true");
-    }
+    si.addOption(RegExFilter.OR_FIELDS, String.valueOf(orFields));
+         si.addOption(RegExFilter.MATCH_SUBSTRING, 
String.valueOf(matchSubstring));
+         
   }
   
+  
+  
+  
   /**
    * Set the encoding string to use when interpreting characters
    * 

Modified: 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java
URL: 
http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java?rev=1398451&r1=1398450&r2=1398451&view=diff
==============================================================================
--- 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java
 (original)
+++ 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java
 Mon Oct 15 18:56:00 2012
@@ -21,15 +21,21 @@ import java.io.IOException;
 import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.iterators.user.RegExFilter;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
 
 public class EGrepCommand extends GrepCommand {
+  
+  private Option matchSubstringOption;
+  
   @Override
-  protected void setUpIterator(final int prio, final String name, final String 
term, final BatchScanner scanner) throws IOException {
+  protected void setUpIterator(final int prio, final String name, final String 
term, final BatchScanner scanner, CommandLine cl) throws IOException {
     if (prio < 0) {
       throw new IllegalArgumentException("Priority < 0 " + prio);
     }
     final IteratorSetting si = new IteratorSetting(prio, name, 
RegExFilter.class);
-    RegExFilter.setRegexs(si, term, term, term, term, true);
+    RegExFilter.setRegexs(si, term, term, term, term, true, 
cl.hasOption(matchSubstringOption.getOpt()));
     scanner.addScanIterator(si);
   }
   
@@ -42,4 +48,12 @@ public class EGrepCommand extends GrepCo
   public String usage() {
     return getName() + " <regex>{ <regex>}";
   }
+  
+  @Override
+  public Options getOptions() {
+    final Options opts = super.getOptions();
+    matchSubstringOption = new Option("g", "global", false, "forces the use of 
the find() expression matcher, causing substring matches to return true");
+    opts.addOption(matchSubstringOption);
+    return opts;
+  }
 }

Modified: 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java
URL: 
http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java?rev=1398451&r1=1398450&r2=1398451&view=diff
==============================================================================
--- 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java
 (original)
+++ 
accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java
 Mon Oct 15 18:56:00 2012
@@ -36,6 +36,7 @@ public class GrepCommand extends ScanCom
   
   private Option numThreadsOpt;
   
+  @Override
   public int execute(final String fullCommand, final CommandLine cl, final 
Shell shellState) throws Exception {
     
     final String tableName = OptUtil.getTableOpt(cl, shellState);
@@ -45,7 +46,7 @@ public class GrepCommand extends ScanCom
     }
     final Class<? extends Formatter> formatter = getFormatter(cl, tableName, 
shellState);
     final ScanInterpreter interpeter = getInterpreter(cl, tableName, 
shellState);
-
+    
     // handle first argument, if present, the authorizations list to
     // scan with
     int numThreads = 20;
@@ -57,10 +58,10 @@ public class GrepCommand extends ScanCom
     scanner.setRanges(Collections.singletonList(getRange(cl, interpeter)));
     
     scanner.setTimeout(getTimeout(cl), TimeUnit.MILLISECONDS);
-
+    
     for (int i = 0; i < cl.getArgs().length; i++) {
-      setUpIterator(Integer.MAX_VALUE - cl.getArgs().length + i, "grep" + i, 
cl.getArgs()[i], scanner);
-    }    
+      setUpIterator(Integer.MAX_VALUE - cl.getArgs().length + i, "grep" + i, 
cl.getArgs()[i], scanner, cl);
+    }
     try {
       // handle columns
       fetchColumns(cl, scanner, interpeter);
@@ -74,10 +75,10 @@ public class GrepCommand extends ScanCom
     return 0;
   }
   
-  protected void setUpIterator(final int prio, final String name, final String 
term, final BatchScanner scanner) throws IOException {
+  protected void setUpIterator(final int prio, final String name, final String 
term, final BatchScanner scanner, CommandLine cl) throws IOException {
     if (prio < 0) {
       throw new IllegalArgumentException("Priority < 0 " + prio);
-    }    
+    }
     final IteratorSetting grep = new IteratorSetting(prio, name, 
GrepIterator.class);
     GrepIterator.setTerm(grep, term);
     scanner.addScanIterator(grep);

Modified: 
accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
URL: 
http://svn.apache.org/viewvc/accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java?rev=1398451&r1=1398450&r2=1398451&view=diff
==============================================================================
--- 
accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
 (original)
+++ 
accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java
 Mon Oct 15 18:56:00 2012
@@ -67,6 +67,21 @@ public class RegExFilterTest extends Tes
     assertTrue(rei.getTopKey().equals(k3));
     rei.next();
     assertFalse(rei.hasTop());
+        
+    // -----------------------------------------------------
+    // Test substring regex
+    is.clearOptions();
+    
+    RegExFilter.setRegexs(is, null, null, null, "amst", false, true); // 
Should only match hamster
+    
+    rei.validateOptions(is.getOptions());
+    rei.init(new SortedMapIterator(tm), is.getOptions(), new 
DefaultIteratorEnvironment());
+    rei.seek(new Range(), EMPTY_COL_FAMS, false);
+    
+    assertTrue(rei.hasTop());
+    assertTrue(rei.getTopKey().equals(k3));
+    rei.next();
+    assertFalse(rei.hasTop());
     
     // -----------------------------------------------------
     is.clearOptions();


Reply via email to