DRILL-1508: Implement pushdown for LIKE operator in HBase storage engine
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/71f5ad44 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/71f5ad44 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/71f5ad44 Branch: refs/heads/master Commit: 71f5ad447823cba96cf73fb6709f8ed6cc0b63e9 Parents: 67df8cf Author: Aditya Kishore <adi...@maprtech.com> Authored: Tue Oct 21 23:54:01 2014 -0700 Committer: Aditya Kishore <adi...@maprtech.com> Committed: Wed Oct 22 18:20:08 2014 -0700 ---------------------------------------------------------------------- .../java/org/apache/drill/test/DrillTest.java | 9 + .../store/hbase/CompareFunctionsProcessor.java | 6 +- .../exec/store/hbase/HBaseFilterBuilder.java | 68 +++++++- .../exec/store/hbase/HBaseRegexParser.java | 165 +++++++++++++++++++ .../org/apache/drill/hbase/BaseHBaseTest.java | 10 -- .../org/apache/drill/hbase/HBaseTestsSuite.java | 7 +- .../drill/hbase/TestHBaseFilterPushDown.java | 36 ++++ .../drill/hbase/TestHBaseRegexParser.java | 65 ++++++++ .../apache/drill/hbase/TestTableGenerator.java | 45 +++++ 9 files changed, 393 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/common/src/test/java/org/apache/drill/test/DrillTest.java ---------------------------------------------------------------------- diff --git a/common/src/test/java/org/apache/drill/test/DrillTest.java b/common/src/test/java/org/apache/drill/test/DrillTest.java index 55f89b3..8abcb6a 100644 --- a/common/src/test/java/org/apache/drill/test/DrillTest.java +++ b/common/src/test/java/org/apache/drill/test/DrillTest.java @@ -25,8 +25,10 @@ import java.util.List; import org.apache.drill.common.util.DrillStringUtils; import org.apache.drill.common.util.TestTools; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; +import org.junit.rules.TestName; import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; import org.junit.runner.Description; @@ -51,6 +53,13 @@ public class DrillTest { @Rule public final TestRule TIMEOUT = TestTools.getTimeoutRule(50000); @Rule public final TestLogReporter logOutcome = LOG_OUTCOME; + @Rule public TestName TEST_NAME = new TestName(); + + @Before + public void printID() throws Exception { + System.out.printf("Running %s#%s\n", getClass().getName(), TEST_NAME.getMethodName()); + } + @BeforeClass public static void initDrillTest() throws Exception { memWatcher = new MemWatcher(); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/CompareFunctionsProcessor.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/CompareFunctionsProcessor.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/CompareFunctionsProcessor.java index 6810f81..1635c5d 100644 --- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/CompareFunctionsProcessor.java +++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/CompareFunctionsProcessor.java @@ -37,6 +37,7 @@ import org.apache.drill.common.expression.ValueExpressions.QuotedString; import org.apache.drill.common.expression.ValueExpressions.TimeExpression; import org.apache.drill.common.expression.visitors.AbstractExprVisitor; +import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -54,7 +55,7 @@ class CompareFunctionsProcessor extends AbstractExprVisitor<Boolean, LogicalExpr public static CompareFunctionsProcessor process(FunctionCall call, boolean nullComparatorSupported) { String functionName = call.getName(); LogicalExpression nameArg = call.args.get(0); - LogicalExpression valueArg = call.args.size() == 2 ? call.args.get(1) : null; + LogicalExpression valueArg = call.args.size() >= 2 ? call.args.get(1) : null; CompareFunctionsProcessor evaluator = new CompareFunctionsProcessor(functionName); if (valueArg != null) { // binary function @@ -186,7 +187,7 @@ class CompareFunctionsProcessor extends AbstractExprVisitor<Boolean, LogicalExpr @Override public Boolean visitSchemaPath(SchemaPath path, LogicalExpression valueArg) throws RuntimeException { if (valueArg instanceof QuotedString) { - this.value = ((QuotedString) valueArg).value.getBytes(); + this.value = ((QuotedString) valueArg).value.getBytes(Charsets.UTF_8); this.path = path; return true; } @@ -220,6 +221,7 @@ class CompareFunctionsProcessor extends AbstractExprVisitor<Boolean, LogicalExpr .put("isNull", "isNull") .put("is null", "is null") // binary functions + .put("like", "like") .put("equal", "equal") .put("not_equal", "not_equal") .put("greater_than_or_equal_to", "less_than_or_equal_to") http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseFilterBuilder.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseFilterBuilder.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseFilterBuilder.java index 9d74b69..5c49f18 100644 --- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseFilterBuilder.java +++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseFilterBuilder.java @@ -29,10 +29,12 @@ import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.NullComparator; +import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.WritableByteArrayComparable; +import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, RuntimeException> implements DrillHBaseConstants { @@ -43,6 +45,8 @@ public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, private boolean allExpressionsConverted = true; + private static Boolean nullComparatorSupported = null; + HBaseFilterBuilder(HBaseGroupScan groupScan, LogicalExpression le) { this.groupScan = groupScan; this.le = le; @@ -56,7 +60,8 @@ public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, * If RowFilter is THE filter attached to the scan specification, * remove it since its effect is also achieved through startRow and stopRow. */ - if (parsedSpec.filter instanceof RowFilter) { + if (parsedSpec.filter instanceof RowFilter && + ((RowFilter)parsedSpec.filter).getComparator() instanceof BinaryComparator) { parsedSpec.filter = null; } } @@ -90,11 +95,13 @@ public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, * causes a filter with NullComparator to fail. Enable only if specified in * the configuration (after ensuring that the HBase cluster has the fix). */ - boolean nullComparatorSupported = groupScan.getHBaseConf().getBoolean("drill.hbase.supports.null.comparator", false); + if (nullComparatorSupported == null) { + nullComparatorSupported = groupScan.getHBaseConf().getBoolean("drill.hbase.supports.null.comparator", false); + } CompareFunctionsProcessor processor = CompareFunctionsProcessor.process(call, nullComparatorSupported); if (processor.isSuccess()) { - nodeScanSpec = createHBaseScanSpec(processor.getFunctionName(), processor.getPath(), processor.getValue()); + nodeScanSpec = createHBaseScanSpec(call, processor); } } else { switch (functionName) { @@ -143,10 +150,17 @@ public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, return new HBaseScanSpec(groupScan.getTableName(), startRow, stopRow, newFilter); } - private HBaseScanSpec createHBaseScanSpec(String functionName, SchemaPath field, byte[] fieldValue) { + private HBaseScanSpec createHBaseScanSpec(FunctionCall call, CompareFunctionsProcessor processor) { + String functionName = processor.getFunctionName(); + SchemaPath field = processor.getPath(); + byte[] fieldValue = processor.getValue(); boolean isRowKey = field.getAsUnescapedPath().equals(ROW_KEY); if (!(isRowKey - || (field.getRootSegment().getChild() != null && field.getRootSegment().getChild().isNamed()))) { + || (!field.getRootSegment().isLastPath() + && field.getRootSegment().getChild().isLastPath() + && field.getRootSegment().getChild().isNamed()) + ) + ) { /* * if the field in this function is neither the row_key nor a qualified HBase column, return. */ @@ -163,6 +177,7 @@ public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, compareOp = CompareOp.EQUAL; if (isRowKey) { startRow = stopRow = fieldValue; + compareOp = null; } break; case "not_equal": @@ -213,6 +228,49 @@ public class HBaseFilterBuilder extends AbstractExprVisitor<HBaseScanSpec, Void, compareOp = CompareOp.NOT_EQUAL; comparator = new NullComparator(); break; + case "like": + /* + * Convert the LIKE operand to Regular Expression pattern so that we can + * apply RegexStringComparator() + */ + HBaseRegexParser parser = new HBaseRegexParser(call).parse(); + compareOp = CompareOp.EQUAL; + comparator = new RegexStringComparator(parser.getRegexString()); + + /* + * We can possibly do better if the LIKE operator is on the row_key + */ + if (isRowKey) { + String prefix = parser.getPrefixString(); + if (prefix != null) { // group 3 is literal + /* + * If there is a literal prefix, it can help us prune the scan to a sub range + */ + if (prefix.equals(parser.getLikeString())) { + /* The operand value is literal. This turns the LIKE operator to EQUAL operator */ + startRow = stopRow = fieldValue; + compareOp = null; + } else { + startRow = prefix.getBytes(Charsets.UTF_8); + stopRow = startRow.clone(); + boolean isMaxVal = true; + for (int i = stopRow.length - 1; i >= 0 ; --i) { + int nextByteValue = (0xff & stopRow[i]) + 1; + if (nextByteValue < 0xff) { + stopRow[i] = (byte) nextByteValue; + isMaxVal = false; + break; + } else { + stopRow[i] = 0; + } + } + if (isMaxVal) { + stopRow = HConstants.EMPTY_END_ROW; + } + } + } + } + break; } if (compareOp != null || startRow != HConstants.EMPTY_START_ROW || stopRow != HConstants.EMPTY_END_ROW) { http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRegexParser.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRegexParser.java b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRegexParser.java new file mode 100644 index 0000000..c05baaa --- /dev/null +++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRegexParser.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.hbase; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.ValueExpressions.QuotedString; + +public class HBaseRegexParser { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HBaseRegexParser.class); + + /** + * Regular expression pattern to parse the value operand of the SQL LIKE operator. + * The tokens could be one of the 3 types.<br/> + * <ol> + * <li>Wildcards, i.e. "%" or "_" ==> first regex group ([%_])</li> + * <li>Character ranges, i.e. "[]" or "[^]" ==> second regex group (\[[^]]*\])</li> + * <li>Literals ==> third regex group ([^%_\[]+)</li> + * </ol> + */ + private static final Pattern SQL_LIKE_REGEX = Pattern.compile("([%_])|(\\[[^]]*\\])|([^%_\\[]+)"); + + private static final String SQL_LIKE_ESCAPE_REGEX_STR = "(%s.?)|([%%_])|(\\[[^]]*\\])|([^%%_\\[%s]+)"; + + private static final String JAVA_REGEX_SPECIALS = ".()[]{}<>|^-+=*?!$\\"; + + private final String likeString_; + + private final String escapeChar_; + + private String regexString_ = null; + + private String prefixString_ = null; + + public HBaseRegexParser(FunctionCall call) { + this(likeString(call), escapeString(call)); + } + + public HBaseRegexParser(String likeString) { + this(likeString, null); + } + + public HBaseRegexParser(String likeString, Character escapeChar) { + likeString_ = likeString; + if (escapeChar == null) { + escapeChar_ = null; + } else { + escapeChar_ = JAVA_REGEX_SPECIALS.indexOf(escapeChar) == -1 + ? String.valueOf(escapeChar) : ("\\" + escapeChar); + } + } + + /** + * Convert a SQL LIKE operator Value to a Regular Expression. + */ + public HBaseRegexParser parse() { + if (regexString_ != null) { + return this; + } + + Matcher matcher = null; + StringBuilder prefixSB = new StringBuilder(); + StringBuilder regexSB = new StringBuilder("^"); // starts with + if (escapeChar_ == null) { + matcher = SQL_LIKE_REGEX.matcher(likeString_); + } else { + /* + * When an escape character is specified, add another capturing group + * with the escape character in the front for the escape sequence and + * add the escape character to the exclusion list of literals + */ + matcher = Pattern.compile( + String.format(SQL_LIKE_ESCAPE_REGEX_STR, escapeChar_, escapeChar_)) + .matcher(likeString_); + } + String fragment = null; + boolean literalsSoFar = true; + while (matcher.find()) { + if (escapeChar_ != null && matcher.group(1) != null) { + fragment = matcher.group(1); + if (fragment.length() != 2) { + throw new IllegalArgumentException("Invalid fragment '" + + fragment + "' at index " + matcher.start() + + " in the LIKE operand '" + likeString_ + "'"); + } + String escapedChar = fragment.substring(1); + if (literalsSoFar) { + prefixSB.append(escapedChar); + } + regexSB.append(Pattern.quote(escapedChar)); + } else { + fragment = matcher.group(); + switch (fragment) { + case "_": // LIKE('_') => REGEX('.') + literalsSoFar = false; + regexSB.append("."); + break; + case "%": // LIKE('%') => REGEX('.*') + literalsSoFar = false; + regexSB.append(".*"); + break; + default: // ALL other including character ranges + if (fragment.startsWith("[") && fragment.endsWith("]")) { + literalsSoFar = false; + regexSB.append(fragment); + } else { + if (literalsSoFar) { + prefixSB.append(fragment); + } + // found literal, just quote it. + regexSB.append(Pattern.quote(fragment)); + } + break; + } + } + } + prefixString_ = prefixSB.toString(); + regexString_ = regexSB.append("$") // ends with + .toString(); + + logger.debug("Converted LIKE string '{}' to REGEX string '{}'.", likeString_, regexString_); + return this; + } + + public String getRegexString() { + return regexString_; + } + + public String getPrefixString() { + return prefixString_; + } + + public String getLikeString() { + return likeString_; + } + + private static String likeString(FunctionCall call) { + return ((QuotedString) call.args.get(1)).value; + } + + private static Character escapeString(FunctionCall call) { + if (call.args.size() > 2) { + return ((QuotedString) call.args.get(2)).value.charAt(0); + } + return null; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/BaseHBaseTest.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/BaseHBaseTest.java b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/BaseHBaseTest.java index 086900a..1152b7b 100644 --- a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/BaseHBaseTest.java +++ b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/BaseHBaseTest.java @@ -30,10 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.junit.AfterClass; import org.junit.Assert; -import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.rules.TestName; import com.google.common.base.Charsets; import com.google.common.io.Files; @@ -48,13 +45,6 @@ public class BaseHBaseTest extends BaseTestQuery { protected static HBaseStoragePluginConfig storagePluginConfig; - @Rule public TestName TEST_NAME = new TestName(); - - @Before - public void printID() throws Exception { - System.out.printf("Running %s#%s\n", getClass().getName(), TEST_NAME.getMethodName()); - } - @BeforeClass public static void setUpBeforeClass() throws Exception { /* http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/HBaseTestsSuite.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/HBaseTestsSuite.java b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/HBaseTestsSuite.java index 18cf87c..888a9f5 100644 --- a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/HBaseTestsSuite.java +++ b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/HBaseTestsSuite.java @@ -34,6 +34,7 @@ import org.junit.runners.Suite.SuiteClasses; @RunWith(Suite.class) @SuiteClasses({ + TestHBaseRegexParser.class, HBaseRecordReaderTest.class, TestHBaseFilterPushDown.class, TestHBaseProjectPushDown.class, @@ -47,6 +48,7 @@ public class HBaseTestsSuite { private static final boolean IS_DEBUG = ManagementFactory.getRuntimeMXBean().getInputArguments().toString().indexOf("-agentlib:jdwp") > 0; protected static final String TEST_TABLE_1 = "TestTable1"; + protected static final String TEST_TABLE_3 = "TestTable3"; private static Configuration conf; @@ -128,7 +130,7 @@ public class HBaseTestsSuite { } private static boolean tablesExist() throws IOException { - return admin.tableExists(TEST_TABLE_1); + return admin.tableExists(TEST_TABLE_1) && admin.tableExists(TEST_TABLE_3); } private static void createTestTables() throws Exception { @@ -138,11 +140,14 @@ public class HBaseTestsSuite { * Will revert to multiple region once the issue is resolved. */ TestTableGenerator.generateHBaseDataset1(admin, TEST_TABLE_1, 1); + TestTableGenerator.generateHBaseDataset3(admin, TEST_TABLE_3, 1); } private static void cleanupTestTables() throws IOException { admin.disableTable(TEST_TABLE_1); admin.deleteTable(TEST_TABLE_1); + admin.disableTable(TEST_TABLE_3); + admin.deleteTable(TEST_TABLE_3); } public static int getZookeeperPort() { http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseFilterPushDown.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseFilterPushDown.java b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseFilterPushDown.java index 29e7033..4e63a3d 100644 --- a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseFilterPushDown.java +++ b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseFilterPushDown.java @@ -34,6 +34,42 @@ public class TestHBaseFilterPushDown extends BaseHBaseTest { } @Test + public void testFilterPushDownRowKeyLike() throws Exception { + setColumnWidths(new int[] {8, 22}); + runHBaseSQLVerifyCount("SELECT\n" + + " row_key, convert_from(tableName.f.c, 'UTF8') `f.c`\n" + + "FROM\n" + + " hbase.`TestTable3` tableName\n" + + "WHERE\n" + + " row_key LIKE '08%0' OR row_key LIKE '%70'" + , 21); + } + + @Test + public void testFilterPushDownRowKeyLikeWithEscape() throws Exception { + setColumnWidths(new int[] {8, 22}); + runHBaseSQLVerifyCount("SELECT\n" + + " row_key, convert_from(tableName.f.c, 'UTF8') `f.c`\n" + + "FROM\n" + + " hbase.`TestTable3` tableName\n" + + "WHERE\n" + + " row_key LIKE '!%!_AS!_PREFIX!_%' ESCAPE '!'" + , 2); + } + + @Test + public void testFilterPushDownRowKeyRangeAndColumnValueLike() throws Exception { + setColumnWidths(new int[] {8, 22}); + runHBaseSQLVerifyCount("SELECT\n" + + " row_key, convert_from(tableName.f.c, 'UTF8') `f.c`\n" + + "FROM\n" + + " hbase.`TestTable3` tableName\n" + + "WHERE\n" + + " row_key >= '07' AND row_key < '09' AND tableName.f.c LIKE 'value 0%9'" + , 22); + } + + @Test public void testFilterPushDownRowKeyGreaterThan() throws Exception { setColumnWidths(new int[] {8, 38, 38}); runHBaseSQLVerifyCount("SELECT\n" http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseRegexParser.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseRegexParser.java b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseRegexParser.java new file mode 100644 index 0000000..a925d0e --- /dev/null +++ b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestHBaseRegexParser.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.hbase; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.regex.Pattern; + +import org.apache.drill.exec.store.hbase.HBaseRegexParser; +import org.apache.drill.test.DrillTest; +import org.junit.Test; + +public class TestHBaseRegexParser extends DrillTest { + + @Test + public void testLikeExprToRegex() throws Exception { + HBaseRegexParser parser = new HBaseRegexParser("ABC%[0-7][0-9A-Fa-f]").parse(); + assertEquals("^\\QABC\\E.*[0-7][0-9A-Fa-f]$", parser.getRegexString()); + assertEquals("ABC", parser.getPrefixString()); + Pattern pattern = Pattern.compile(parser.getRegexString(), Pattern.DOTALL); + assertTrue(pattern.matcher("ABC79").matches()); + assertTrue(pattern.matcher("ABCxxxxxxx79").matches()); + + parser = new HBaseRegexParser("ABC%[0-8]%_").parse(); + assertEquals("^\\QABC\\E.*[0-8].*.$", parser.getRegexString()); + assertEquals("ABC", parser.getPrefixString()); + pattern = Pattern.compile(parser.getRegexString(), Pattern.DOTALL); + assertTrue(pattern.matcher("ABC79").matches()); + assertTrue(pattern.matcher("ABCxxxx79").matches()); + assertTrue(pattern.matcher("ABCxxxx7xxxxx9").matches()); + assertTrue(pattern.matcher("ABC[0-8]_").matches()); + + parser = new HBaseRegexParser("ABC%[0-8]%_", '%').parse(); + assertEquals("^\\QABC\\E\\Q[\\E\\Q0-8]\\E\\Q_\\E$", parser.getRegexString()); + assertEquals("ABC[0-8]_", parser.getPrefixString()); + pattern = Pattern.compile(parser.getRegexString(), Pattern.DOTALL); + assertFalse(pattern.matcher("ABC79").matches()); + assertTrue(pattern.matcher("ABC[0-8]_").matches()); + + try { + parser = new HBaseRegexParser("ABC%[0-8][^a-f]%", '%').parse(); + fail("Parsed an illegal LIKE expression."); + } catch (IllegalArgumentException e) { + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71f5ad44/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestTableGenerator.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestTableGenerator.java b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestTableGenerator.java index ccf5ad5..c244f9e 100644 --- a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestTableGenerator.java +++ b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/TestTableGenerator.java @@ -34,6 +34,9 @@ public class TestTableGenerator { {'r'}, {'s'}, {'t'}, {'u'}, {'v'}, {'w'}, {'x'}, {'y'}, {'z'} }; + static final byte[] FAMILY_F = {'f'}; + static final byte[] COLUMN_C = {'c'}; + public static void generateHBaseDataset1(HBaseAdmin admin, String tableName, int numberRegions) throws Exception { if (admin.tableExists(tableName)) { admin.disableTable(tableName); @@ -152,4 +155,46 @@ public class TestTableGenerator { admin.flush(tableName); } + public static void generateHBaseDataset3(HBaseAdmin admin, String tableName, int numberRegions) throws Exception { + if (admin.tableExists(tableName)) { + admin.disableTable(tableName); + admin.deleteTable(tableName); + } + + HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILY_F)); + + if (numberRegions > 1) { + admin.createTable(desc, Arrays.copyOfRange(SPLIT_KEYS, 0, numberRegions-1)); + } else { + admin.createTable(desc); + } + + HTable table = new HTable(admin.getConfiguration(), tableName); + + for (int i = 0; i <= 100; ++i) { + Put p = new Put((String.format("%03d", i)).getBytes()); + p.add(FAMILY_F, COLUMN_C, String.format("value %03d", i).getBytes()); + table.put(p); + } + for (int i = 0; i <= 1000; ++i) { + Put p = new Put((String.format("%04d", i)).getBytes()); + p.add(FAMILY_F, COLUMN_C, String.format("value %04d", i).getBytes()); + table.put(p); + } + + Put p = new Put("%_AS_PREFIX_ROW1".getBytes()); + p.add(FAMILY_F, COLUMN_C, "dummy".getBytes()); + table.put(p); + + p = new Put("%_AS_PREFIX_ROW2".getBytes()); + p.add(FAMILY_F, COLUMN_C, "dummy".getBytes()); + table.put(p); + + table.flushCommits(); + table.close(); + + admin.flush(tableName); + } + }