Author: kturner Date: Mon Oct 15 18:56:00 2012 New Revision: 1398451 URL: http://svn.apache.org/viewvc?rev=1398451&view=rev Log: ACCUMULO-720 Applied patch, from Chris Bennight, that adds subsequence matching to egrep shell command
Modified: accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java Modified: accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java URL: http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java?rev=1398451&r1=1398450&r2=1398451&view=diff ============================================================================== --- accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java (original) +++ accumulo/trunk/core/src/main/java/org/apache/accumulo/core/iterators/user/RegExFilter.java Mon Oct 15 18:56:00 2012 @@ -46,12 +46,15 @@ public class RegExFilter extends Filter return result; } + + public static final String ROW_REGEX = "rowRegex"; public static final String COLF_REGEX = "colfRegex"; public static final String COLQ_REGEX = "colqRegex"; public static final String VALUE_REGEX = "valueRegex"; public static final String OR_FIELDS = "orFields"; public static final String ENCODING = "encoding"; + public static final String MATCH_SUBSTRING = "matchSubstring"; public static final String ENCODING_DEFAULT = "UTF-8"; @@ -60,6 +63,7 @@ public class RegExFilter extends Filter private Matcher colqMatcher; private Matcher valueMatcher; private boolean orFields = false; + private boolean matchSubstring = false; private String encoding = ENCODING_DEFAULT; @@ -74,7 +78,7 @@ public class RegExFilter extends Filter if (matcher != null) { try { matcher.reset(new String(bs.getBackingArray(), bs.offset(), bs.length(), encoding)); - return matcher.matches(); + return matchSubstring ? matcher.find() : matcher.matches(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } @@ -86,7 +90,7 @@ public class RegExFilter extends Filter if (matcher != null) { try { matcher.reset(new String(data, offset, len, encoding)); - return matcher.matches(); + return matchSubstring ? matcher.find() : matcher.matches(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } @@ -144,6 +148,12 @@ public class RegExFilter extends Filter orFields = false; } + if (options.containsKey(MATCH_SUBSTRING)) { + matchSubstring = Boolean.parseBoolean(options.get(MATCH_SUBSTRING)); + } else { + matchSubstring = false; + } + if (options.containsKey(ENCODING)) { encoding = options.get(ENCODING); } @@ -159,6 +169,7 @@ public class RegExFilter extends Filter io.addNamedOption(RegExFilter.COLQ_REGEX, "regular expression on column qualifier"); io.addNamedOption(RegExFilter.VALUE_REGEX, "regular expression on value"); io.addNamedOption(RegExFilter.OR_FIELDS, "use OR instread of AND when multiple regexes given"); + io.addNamedOption(RegExFilter.MATCH_SUBSTRING, "match on substrings"); io.addNamedOption(RegExFilter.ENCODING, "character encoding of byte array value (default is " + ENCODING_DEFAULT + ")"); return io; } @@ -194,6 +205,8 @@ public class RegExFilter extends Filter /** * Encode the terms to match against in the iterator + * Same as calling setRegexs(IteratorSetting si, String rowTerm, String cfTerm, String cqTerm, String valueTerm, boolean orFields, boolean matchSubstring) + * with matchSubstring set to false * * @param si * ScanIterator config to be updated @@ -208,7 +221,28 @@ public class RegExFilter extends Filter * @param orFields * if true, any of the non-null terms can match to return the entry */ - public static void setRegexs(IteratorSetting si, String rowTerm, String cfTerm, String cqTerm, String valueTerm, boolean orFields) { + public static void setRegexs(IteratorSetting si, String rowTerm, String cfTerm, String cqTerm, String valueTerm, boolean orFields) { + setRegexs(si, rowTerm, cfTerm, cqTerm, valueTerm, orFields, false); + } + + /** + * Encode the terms to match against in the iterator + * + * @param si + * ScanIterator config to be updated + * @param rowTerm + * the pattern to match against the Key's row. Not used if null. + * @param cfTerm + * the pattern to match against the Key's column family. Not used if null. + * @param cqTerm + * the pattern to match against the Key's column qualifier. Not used if null. + * @param valueTerm + * the pattern to match against the Key's value. Not used if null. + * @param matchSubstring + * if true then search expressions will match on partial strings + */ + public static void setRegexs(IteratorSetting si, String rowTerm, String cfTerm, String cqTerm, String valueTerm, boolean orFields, boolean matchSubstring) { + if (rowTerm != null) si.addOption(RegExFilter.ROW_REGEX, rowTerm); if (cfTerm != null) @@ -217,11 +251,14 @@ public class RegExFilter extends Filter si.addOption(RegExFilter.COLQ_REGEX, cqTerm); if (valueTerm != null) si.addOption(RegExFilter.VALUE_REGEX, valueTerm); - if (orFields) { - si.addOption(RegExFilter.OR_FIELDS, "true"); - } + si.addOption(RegExFilter.OR_FIELDS, String.valueOf(orFields)); + si.addOption(RegExFilter.MATCH_SUBSTRING, String.valueOf(matchSubstring)); + } + + + /** * Set the encoding string to use when interpreting characters * Modified: accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java URL: http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java?rev=1398451&r1=1398450&r2=1398451&view=diff ============================================================================== --- accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java (original) +++ accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/EGrepCommand.java Mon Oct 15 18:56:00 2012 @@ -21,15 +21,21 @@ import java.io.IOException; import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.iterators.user.RegExFilter; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; public class EGrepCommand extends GrepCommand { + + private Option matchSubstringOption; + @Override - protected void setUpIterator(final int prio, final String name, final String term, final BatchScanner scanner) throws IOException { + protected void setUpIterator(final int prio, final String name, final String term, final BatchScanner scanner, CommandLine cl) throws IOException { if (prio < 0) { throw new IllegalArgumentException("Priority < 0 " + prio); } final IteratorSetting si = new IteratorSetting(prio, name, RegExFilter.class); - RegExFilter.setRegexs(si, term, term, term, term, true); + RegExFilter.setRegexs(si, term, term, term, term, true, cl.hasOption(matchSubstringOption.getOpt())); scanner.addScanIterator(si); } @@ -42,4 +48,12 @@ public class EGrepCommand extends GrepCo public String usage() { return getName() + " <regex>{ <regex>}"; } + + @Override + public Options getOptions() { + final Options opts = super.getOptions(); + matchSubstringOption = new Option("g", "global", false, "forces the use of the find() expression matcher, causing substring matches to return true"); + opts.addOption(matchSubstringOption); + return opts; + } } Modified: accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java URL: http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java?rev=1398451&r1=1398450&r2=1398451&view=diff ============================================================================== --- accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java (original) +++ accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/shell/commands/GrepCommand.java Mon Oct 15 18:56:00 2012 @@ -36,6 +36,7 @@ public class GrepCommand extends ScanCom private Option numThreadsOpt; + @Override public int execute(final String fullCommand, final CommandLine cl, final Shell shellState) throws Exception { final String tableName = OptUtil.getTableOpt(cl, shellState); @@ -45,7 +46,7 @@ public class GrepCommand extends ScanCom } final Class<? extends Formatter> formatter = getFormatter(cl, tableName, shellState); final ScanInterpreter interpeter = getInterpreter(cl, tableName, shellState); - + // handle first argument, if present, the authorizations list to // scan with int numThreads = 20; @@ -57,10 +58,10 @@ public class GrepCommand extends ScanCom scanner.setRanges(Collections.singletonList(getRange(cl, interpeter))); scanner.setTimeout(getTimeout(cl), TimeUnit.MILLISECONDS); - + for (int i = 0; i < cl.getArgs().length; i++) { - setUpIterator(Integer.MAX_VALUE - cl.getArgs().length + i, "grep" + i, cl.getArgs()[i], scanner); - } + setUpIterator(Integer.MAX_VALUE - cl.getArgs().length + i, "grep" + i, cl.getArgs()[i], scanner, cl); + } try { // handle columns fetchColumns(cl, scanner, interpeter); @@ -74,10 +75,10 @@ public class GrepCommand extends ScanCom return 0; } - protected void setUpIterator(final int prio, final String name, final String term, final BatchScanner scanner) throws IOException { + protected void setUpIterator(final int prio, final String name, final String term, final BatchScanner scanner, CommandLine cl) throws IOException { if (prio < 0) { throw new IllegalArgumentException("Priority < 0 " + prio); - } + } final IteratorSetting grep = new IteratorSetting(prio, name, GrepIterator.class); GrepIterator.setTerm(grep, term); scanner.addScanIterator(grep); Modified: accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java URL: http://svn.apache.org/viewvc/accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java?rev=1398451&r1=1398450&r2=1398451&view=diff ============================================================================== --- accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java (original) +++ accumulo/trunk/core/src/test/java/org/apache/accumulo/core/iterators/user/RegExFilterTest.java Mon Oct 15 18:56:00 2012 @@ -67,6 +67,21 @@ public class RegExFilterTest extends Tes assertTrue(rei.getTopKey().equals(k3)); rei.next(); assertFalse(rei.hasTop()); + + // ----------------------------------------------------- + // Test substring regex + is.clearOptions(); + + RegExFilter.setRegexs(is, null, null, null, "amst", false, true); // Should only match hamster + + rei.validateOptions(is.getOptions()); + rei.init(new SortedMapIterator(tm), is.getOptions(), new DefaultIteratorEnvironment()); + rei.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(rei.hasTop()); + assertTrue(rei.getTopKey().equals(k3)); + rei.next(); + assertFalse(rei.hasTop()); // ----------------------------------------------------- is.clearOptions();