ACCUMULO-1267 Added check to see if a user actually sets a regex before applying the RegExFilter to the record reader's scanner.
git-svn-id: https://svn.apache.org/repos/asf/accumulo/branches/1.4@1499198 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/1725ec3a Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/1725ec3a Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/1725ec3a Branch: refs/heads/1.5.1-SNAPSHOT Commit: 1725ec3a6f8a7f56fb167e7b24686b1848520aa9 Parents: 4fd1066 Author: Bill Slacum <[email protected]> Authored: Wed Jul 3 04:04:20 2013 +0000 Committer: Bill Slacum <[email protected]> Committed: Wed Jul 3 04:04:20 2013 +0000 ---------------------------------------------------------------------- .../core/client/mapreduce/InputFormatBase.java | 10 ++++-- .../mapreduce/AccumuloInputFormatTest.java | 33 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/1725ec3a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java ---------------------------------------------------------------------- diff --git a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java index 174df66..8e238f1 100644 --- a/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java +++ b/src/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java @@ -1131,9 +1131,13 @@ public abstract class InputFormatBase<K,V> extends InputFormat<K,V> { scanner = new ClientSideIteratorScanner(scanner); } setupMaxVersions(conf, scanner); - IteratorSetting is = new IteratorSetting(50, RegExFilter.class); - RegExFilter.setRegexs(is, conf.get(ROW_REGEX), conf.get(COLUMN_FAMILY_REGEX), conf.get(COLUMN_QUALIFIER_REGEX), null, false); - scanner.addScanIterator(is); + if (conf.get(ROW_REGEX) != null || conf.get(COLUMN_FAMILY_REGEX) != null || conf.get(COLUMN_QUALIFIER_REGEX) != null || + conf.get(VALUE_REGEX) != null) { + IteratorSetting is = new IteratorSetting(50, RegExFilter.class); + RegExFilter.setRegexs(is, conf.get(ROW_REGEX), conf.get(COLUMN_FAMILY_REGEX), conf.get(COLUMN_QUALIFIER_REGEX), + conf.get(VALUE_REGEX), false); + scanner.addScanIterator(is); + } setupIterators(conf, scanner); } catch (Exception e) { throw new IOException(e); http://git-wip-us.apache.org/repos/asf/accumulo/blob/1725ec3a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java ---------------------------------------------------------------------- diff --git a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java index d6df099..ba647e9 100644 --- a/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java +++ b/src/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormatTest.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.List; +import java.util.regex.Pattern; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.Connector; @@ -46,6 +47,7 @@ import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.junit.After; +import org.junit.Assert; import org.junit.Test; public class AccumuloInputFormatTest { @@ -353,4 +355,35 @@ public class AccumuloInputFormatTest { mapper.map(rr.getCurrentKey(), rr.getCurrentValue(), context); } } + + @SuppressWarnings("deprecation") + @Test + public void testRegex() throws Exception { + MockInstance mockInstance = new MockInstance("testmapinstance"); + Connector c = mockInstance.getConnector("root", new byte[] {}); + c.tableOperations().create("testtable3"); + BatchWriter bw = c.createBatchWriter("testtable3", 10000L, 1000L, 4); + for (int i = 0; i < 100; i++) { + Mutation m = new Mutation(new Text(String.format("%09x", i + 1))); + m.put(new Text(), new Text(), new Value(String.format("%09x", i).getBytes())); + bw.addMutation(m); + } + bw.close(); + + JobContext job = new JobContext(new Configuration(), new JobID()); + AccumuloInputFormat.setInputInfo(job.getConfiguration(), "root", "".getBytes(), "testtable3", new Authorizations()); + AccumuloInputFormat.setMockInstance(job.getConfiguration(), "testmapinstance"); + final String regex = ".*1.*"; + AccumuloInputFormat.setRegex(job, RegexType.ROW, regex); + AccumuloInputFormat input = new AccumuloInputFormat(); + RangeInputSplit ris = new RangeInputSplit(); + TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()); + RecordReader<Key,Value> rr = input.createRecordReader(ris, tac); + rr.initialize(ris, tac); + + Pattern p = Pattern.compile(regex); + while (rr.nextKeyValue()) { + Assert.assertTrue( p.matcher( rr.getCurrentKey().getRow().toString()).matches()); + } + } }
