ACCUMULO-1639 Adding ColumnSliceFilter implementation from Jeffrey Principe.
The implementation filters out records which do not fall between a given column qualifer range. It is intended to be equivalent functionality to HBase's ColumRangeFilter. Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/c617032b Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/c617032b Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/c617032b Branch: refs/heads/master Commit: c617032b868ce06177b30367c34918916289be3d Parents: ac4a027 Author: Josh Elser <[email protected]> Authored: Tue Sep 10 22:09:18 2013 -0400 Committer: Josh Elser <[email protected]> Committed: Tue Sep 10 22:09:18 2013 -0400 ---------------------------------------------------------------------- .../core/iterators/user/ColumnSliceFilter.java | 115 ++++++++ .../iterators/user/ColumnSliceFilterTest.java | 277 +++++++++++++++++++ 2 files changed, 392 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/c617032b/src/core/src/main/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilter.java ---------------------------------------------------------------------- diff --git a/src/core/src/main/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilter.java b/src/core/src/main/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilter.java new file mode 100644 index 0000000..5dfcd17 --- /dev/null +++ b/src/core/src/main/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilter.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.core.iterators.user; + +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.Filter; +import org.apache.accumulo.core.iterators.IteratorEnvironment; +import org.apache.accumulo.core.iterators.SortedKeyValueIterator; + +import java.io.IOException; +import java.util.Map; + +public class ColumnSliceFilter extends Filter { + public static final String START_BOUND = "startBound"; + public static final String START_INCLUSIVE = "startInclusive"; + public static final String END_BOUND = "endBound"; + public static final String END_INCLUSIVE = "endInclusive"; + + private String startBound; + private String endBound; + private boolean startInclusive; + private boolean endInclusive; + + @Override + public boolean accept(Key key, Value value) { + String colQ = key.getColumnQualifier().toString(); + return (startBound == null || (startInclusive ? (colQ.compareTo(startBound) >= 0) : (colQ.compareTo(startBound) > 0))) + && (endBound == null || (endInclusive ? (colQ.compareTo(endBound) <= 0) : (colQ.compareTo(endBound) < 0))); + } + + @Override + public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException { + super.init(source, options, env); + if (options.containsKey(START_BOUND)) { + startBound = options.get(START_BOUND); + } else { + startBound = null; + } + + if (options.containsKey(START_INCLUSIVE)) { + startInclusive = Boolean.parseBoolean(options.get(START_INCLUSIVE)); + } else { + startInclusive = true; + } + + if (options.containsKey(END_BOUND)) { + endBound = options.get(END_BOUND); + } else { + endBound = null; + } + + if (options.containsKey(END_INCLUSIVE)) { + endInclusive = Boolean.parseBoolean(options.get(END_INCLUSIVE)); + } else { + endInclusive = false; + } + } + + @Override + public IteratorOptions describeOptions() { + IteratorOptions io = super.describeOptions(); + io.setName("columnSlice"); + io.setDescription("The ColumnSliceFilter/Iterator allows you to filter for key/value pairs based on a lexicographic range of column qualifier names"); + io.addNamedOption(START_BOUND, "start string in slice"); + io.addNamedOption(END_BOUND, "end string in slice"); + io.addNamedOption(START_INCLUSIVE, "include the start bound in the result set"); + io.addNamedOption(END_INCLUSIVE, "include the end bound in the result set"); + return io; + } + + public static void setSlice(IteratorSetting si, String start, String end) { + setSlice(si, start, true, end, false); + } + + public static void setSlice(IteratorSetting si, String start, boolean startInclusive, String end, boolean endInclusive) { + if (start != null && end != null && (start.compareTo(end) > 0 || (start.compareTo(end) == 0 && (!startInclusive || !endInclusive)))) { + throw new IllegalArgumentException("Start key must be less than end key or equal with both sides inclusive in range (" + start + ", " + end + ")"); + } + + if (start != null) { + si.addOption(START_BOUND, start); + } + if (end != null) { + si.addOption(END_BOUND, end); + } + si.addOption(START_INCLUSIVE, String.valueOf(startInclusive)); + si.addOption(END_INCLUSIVE, String.valueOf(endInclusive)); + } + + @Override + public SortedKeyValueIterator<Key, Value> deepCopy(IteratorEnvironment env) { + ColumnSliceFilter result = (ColumnSliceFilter) super.deepCopy(env); + result.startBound = startBound; + result.startInclusive = startInclusive; + result.endBound = endBound; + result.endInclusive = endInclusive; + return result; + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c617032b/src/core/src/test/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilterTest.java ---------------------------------------------------------------------- diff --git a/src/core/src/test/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilterTest.java b/src/core/src/test/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilterTest.java new file mode 100644 index 0000000..44fe00f --- /dev/null +++ b/src/core/src/test/java/org/apache/accumulo/core/iterators/user/ColumnSliceFilterTest.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.core.iterators.user; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.DefaultIteratorEnvironment; +import org.apache.accumulo.core.iterators.IteratorEnvironment; +import org.apache.accumulo.core.iterators.SortedMapIterator; +import org.apache.hadoop.io.Text; +import org.junit.Before; +import org.junit.Test; + +public class ColumnSliceFilterTest { + + private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>(); + + private static final SortedMap<Key,Value> TEST_DATA = new TreeMap<Key,Value>(); + private static final Key KEY_1 = nkv(TEST_DATA, "boo1", "yup", "20080201", "dog"); + private static final Key KEY_2 = nkv(TEST_DATA, "boo1", "yap", "20080202", "cat"); + private static final Key KEY_3 = nkv(TEST_DATA, "boo2", "yap", "20080203", "hamster"); + private static final Key KEY_4 = nkv(TEST_DATA, "boo2", "yop", "20080204", "lion"); + private static final Key KEY_5 = nkv(TEST_DATA, "boo2", "yup", "20080206", "tiger"); + private static final Key KEY_6 = nkv(TEST_DATA, "boo2", "yip", "20080203", "tiger"); + + private static IteratorEnvironment iteratorEnvironment; + + private ColumnSliceFilter columnSliceFilter = new ColumnSliceFilter(); + private IteratorSetting is; + + private static Key nkv(SortedMap<Key,Value> tm, String row, String cf, String cq, String val) { + Key k = nk(row, cf, cq); + tm.put(k, new Value(val.getBytes())); + return k; + } + + private static Key nk(String row, String cf, String cq) { + return new Key(new Text(row), new Text(cf), new Text(cq)); + } + + @Before + public void setUp() throws Exception { + columnSliceFilter.describeOptions(); + iteratorEnvironment = new DefaultIteratorEnvironment(); + is = new IteratorSetting(1, ColumnSliceFilter.class); + } + + @Test + public void testBasic() throws IOException { + ColumnSliceFilter.setSlice(is, "20080202", "20080204"); + + assertTrue(columnSliceFilter.validateOptions(is.getOptions())); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, true); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_2)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testBothInclusive() throws IOException { + ColumnSliceFilter.setSlice(is, "20080202", true, "20080204", true); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_2)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_4)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testBothExclusive() throws IOException { + ColumnSliceFilter.setSlice(is, "20080202", false, "20080204", false); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testStartExclusiveEndInclusive() throws IOException { + ColumnSliceFilter.setSlice(is, "20080202", false, "20080204", true); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_4)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testNullStart() throws IOException { + ColumnSliceFilter.setSlice(is, null, "20080204"); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_2)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_1)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testNullEnd() throws IOException { + ColumnSliceFilter.setSlice(is, "20080202", null); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_2)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_4)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_5)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testBothNull() throws IOException { + ColumnSliceFilter.setSlice(is, null, null); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_2)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_1)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_3)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_6)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_4)); + columnSliceFilter.next(); + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_5)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } + + @Test + public void testStartAfterEnd() throws IOException { + try { + ColumnSliceFilter.setSlice(is, "20080204", "20080202"); + fail("IllegalArgumentException expected but not thrown"); + } catch(IllegalArgumentException expectedException) { + // Exception successfully thrown + } + } + + @Test + public void testStartEqualToEndStartInclusiveEndExclusive() throws IOException { + try { + ColumnSliceFilter.setSlice(is, "20080202", "20080202"); + fail("IllegalArgumentException expected but not thrown"); + } catch(IllegalArgumentException expectedException) { + // Exception successfully thrown + } + } + + @Test + public void testStartEqualToEndStartExclusiveEndInclusive() throws IOException { + try { + ColumnSliceFilter.setSlice(is, "20080202", false, "20080202", true); + fail("IllegalArgumentException expected but not thrown"); + } catch(IllegalArgumentException expectedException) { + // Exception successfully thrown + } + } + + @Test + public void testStartEqualToEndBothInclusive() throws IOException { + ColumnSliceFilter.setSlice(is, "20080202", true, "20080202", true); + + columnSliceFilter.validateOptions(is.getOptions()); + columnSliceFilter.init(new SortedMapIterator(TEST_DATA), is.getOptions(), iteratorEnvironment); + columnSliceFilter.seek(new Range(), EMPTY_COL_FAMS, false); + + assertTrue(columnSliceFilter.hasTop()); + assertTrue(columnSliceFilter.getTopKey().equals(KEY_2)); + columnSliceFilter.next(); + assertFalse(columnSliceFilter.hasTop()); + } +} +
