Author: jimk Date: Sat Mar 22 15:35:36 2008 New Revision: 640106 URL: http://svn.apache.org/viewvc?rev=640106&view=rev Log: HBASE-476 RegexpRowFilter behaves incorectly when there are multiple store files (Clint Morgan via Jim Kellerman) HBASE-527 RegexpRowFilter does not work when there are columns from multiple families (Clint Morgan via Jim Kellerman)
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterAfterWrite.java hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterOnMultipleFamilies.java Modified: hadoop/hbase/trunk/CHANGES.txt hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/InclusiveStopRowFilter.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/StopRowFilter.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/WhileMatchRowFilter.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java Modified: hadoop/hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/CHANGES.txt (original) +++ hadoop/hbase/trunk/CHANGES.txt Sat Mar 22 15:35:36 2008 @@ -50,6 +50,10 @@ HBASE-531 Merge tool won't merge two overlapping regions (port HBASE-483 to trunk) HBASE-537 Wait for hdfs to exit safe mode + HBASE-476 RegexpRowFilter behaves incorectly when there are multiple store + files (Clint Morgan via Jim Kellerman) + HBASE-527 RegexpRowFilter does not work when there are columns from + multiple families (Clint Morgan via Jim Kellerman) IMPROVEMENTS HBASE-415 Rewrite leases to use DelayedBlockingQueue instead of polling Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/InclusiveStopRowFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/InclusiveStopRowFilter.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/InclusiveStopRowFilter.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/InclusiveStopRowFilter.java Sat Mar 22 15:35:36 2008 @@ -19,17 +19,9 @@ */ package org.apache.hadoop.hbase.filter; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.TreeMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.Text; - -/* +/** * Subclass of StopRowFilter that filters rows > the stop row, * making it include up to the last row but no further. */ @@ -49,6 +41,8 @@ super(stopRowKey); } + /** [EMAIL PROTECTED] */ + @Override public boolean filter(final Text rowKey) { if (rowKey == null) { if (this.stopRowKey == null) { Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java Sat Mar 22 15:35:36 2008 @@ -22,7 +22,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.TreeMap; +import java.util.SortedMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -137,7 +137,7 @@ * [EMAIL PROTECTED] */ public boolean filterNotNull(@SuppressWarnings("unused") - final TreeMap<Text, byte[]> columns) { + final SortedMap<Text, byte[]> columns) { return filterAllRemaining(); } Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java Sat Mar 22 15:35:36 2008 @@ -27,7 +27,7 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; -import java.util.TreeMap; +import java.util.SortedMap; import java.util.Map.Entry; import java.util.regex.Pattern; @@ -196,7 +196,7 @@ * * [EMAIL PROTECTED] */ - public boolean filterNotNull(final TreeMap<Text, byte[]> columns) { + public boolean filterNotNull(final SortedMap<Text, byte[]> columns) { for (Entry<Text, byte[]> col : columns.entrySet()) { if (nullColumns.contains(col.getKey()) && !HLogEdit.isDeleted(col.getValue())) { @@ -211,7 +211,7 @@ if (!columns.containsKey(col)) { if (LOG.isDebugEnabled()) { LOG.debug("filterNotNull returning true for colKey: " + col + - ", column not found in given TreeMap<Text, byte[]>."); + ", column not found in given SortedMap<Text, byte[]>."); } return true; } Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java Sat Mar 22 15:35:36 2008 @@ -19,7 +19,7 @@ */ package org.apache.hadoop.hbase.filter; -import java.util.TreeMap; +import java.util.SortedMap; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -108,7 +108,7 @@ * @param columns * @return true if null/non-null criteria not met. */ - boolean filterNotNull(final TreeMap<Text, byte[]> columns); + boolean filterNotNull(final SortedMap<Text, byte[]> columns); /** * Validates that this filter applies only to a subset of the given columns. Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java Sat Mar 22 15:35:36 2008 @@ -24,7 +24,7 @@ import java.io.IOException; import java.util.HashSet; import java.util.Set; -import java.util.TreeMap; +import java.util.SortedMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -232,7 +232,7 @@ } /** [EMAIL PROTECTED] */ - public boolean filterNotNull(final TreeMap<Text, byte[]> columns) { + public boolean filterNotNull(final SortedMap<Text, byte[]> columns) { boolean resultFound = false; boolean result = operator == Operator.MUST_PASS_ONE; for (RowFilterInterface filter : filters) { Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/StopRowFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/StopRowFilter.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/StopRowFilter.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/StopRowFilter.java Sat Mar 22 15:35:36 2008 @@ -22,7 +22,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.TreeMap; +import java.util.SortedMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -132,7 +132,7 @@ * @param columns */ public boolean filterNotNull(@SuppressWarnings("unused") - final TreeMap<Text, byte[]> columns) { + final SortedMap<Text, byte[]> columns) { return filterAllRemaining(); } Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/WhileMatchRowFilter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/WhileMatchRowFilter.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/WhileMatchRowFilter.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/filter/WhileMatchRowFilter.java Sat Mar 22 15:35:36 2008 @@ -22,7 +22,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.TreeMap; +import java.util.SortedMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -31,7 +31,7 @@ /** * WhileMatchRowFilter is a wrapper filter that filters everything after the * first filtered row. Once the nested filter returns true for either of it's - * filter(..) methods or filterNotNull(TreeMap<Text, byte[]>), this wrapper's + * filter(..) methods or filterNotNull(SortedMap<Text, byte[]>), this wrapper's * filterAllRemaining() will return true. All filtering methods will * thereafter defer to the result of filterAllRemaining(). */ @@ -115,7 +115,7 @@ } /** [EMAIL PROTECTED] */ - public boolean filterNotNull(final TreeMap<Text, byte[]> columns) { + public boolean filterNotNull(final SortedMap<Text, byte[]> columns) { changeFAR(this.filter.filterNotNull(columns)); boolean result = filterAllRemaining(); if (LOG.isDebugEnabled()) { Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Sat Mar 22 15:35:36 2008 @@ -1721,12 +1721,14 @@ private HInternalScannerInterface[] scanners; private TreeMap<Text, byte []>[] resultSets; private HStoreKey[] keys; + private RowFilterInterface filter; /** Create an HScanner with a handle on many HStores. */ @SuppressWarnings("unchecked") HScanner(Text[] cols, Text firstRow, long timestamp, HStore[] stores, RowFilterInterface filter) throws IOException { + this.filter = filter; this.scanners = new HInternalScannerInterface[stores.length]; try { for (int i = 0; i < stores.length; i++) { @@ -1737,8 +1739,8 @@ // At least WhileMatchRowFilter will mess up the scan if only // one shared across many rows. See HADOOP-2467. scanners[i] = stores[i].getScanner(timestamp, cols, firstRow, - (i > 0 && filter != null)? - (RowFilterInterface)WritableUtils.clone(filter, conf): filter); + filter != null ? + (RowFilterInterface)WritableUtils.clone(filter, conf) : filter); } } catch(IOException e) { for (int i = 0; i < this.scanners.length; i++) { @@ -1839,6 +1841,12 @@ } } } + + if (filter != null && filter.filterNotNull(results)) { + LOG.warn("Filter return true on assembled Results in hstore"); + return moreToFollow == true && this.next(key, results); + } + return moreToFollow; } Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java?rev=640106&r1=640105&r2=640106&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java Sat Mar 22 15:35:36 2008 @@ -32,7 +32,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.hbase.HStoreKey; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.filter.RowFilterInterface; import org.apache.hadoop.hbase.HConstants; @@ -165,11 +164,6 @@ && !multipleMatchers && (keys[i].getTimestamp() != chosenTimestamp)) { break; - } - - // Filter out null criteria columns that are not null - if (dataFilter != null) { - filtered = dataFilter.filterNotNull(resultSets[i]); } // NOTE: We used to do results.putAll(resultSets[i]); Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterAfterWrite.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterAfterWrite.java?rev=640106&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterAfterWrite.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterAfterWrite.java Sat Mar 22 15:35:36 2008 @@ -0,0 +1,225 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.filter; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import junit.framework.Assert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseClusterTestCase; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HScannerInterface; +import org.apache.hadoop.hbase.HStoreKey; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.io.BatchUpdate; +import org.apache.hadoop.io.Text; + +/** Test regexp filters HBASE-476 */ +public class TestRowFilterAfterWrite extends HBaseClusterTestCase { + + @SuppressWarnings("hiding") + private static final Log LOG = LogFactory.getLog(TestRowFilterAfterWrite.class.getName()); + + static final String TABLE_NAME = "TestTable"; + static final String FAMILY = "C:"; + static final String COLUMN1 = FAMILY + "col1"; + static final Text TEXT_COLUMN1 = new Text(COLUMN1); + static final String COLUMN2 = FAMILY + "col2"; + static final Text TEXT_COLUMN2 = new Text(COLUMN2); + + private static final Text[] columns = { + TEXT_COLUMN1, TEXT_COLUMN2 + }; + + private static final int NUM_ROWS = 10; + private static final int VALUE_SIZE = 1000; + private static final byte[] VALUE = new byte[VALUE_SIZE]; + private static final int COL_2_SIZE = 5; + private static final int KEY_SIZE = 9; + private static final int NUM_REWRITES = 10; + static { + Arrays.fill(VALUE, (byte) 'a'); + } + + /** constructor */ + public TestRowFilterAfterWrite() { + super(); + + // Make sure the cache gets flushed so we get multiple stores + conf.setInt("hbase.hregion.memcache.flush.size", (NUM_ROWS * (VALUE_SIZE + COL_2_SIZE + KEY_SIZE))); + LOG.info("memcach flush : " + conf.get("hbase.hregion.memcache.flush.size")); + conf.setInt("hbase.regionserver.optionalcacheflushinterval", 100000000); + // Avoid compaction to keep multiple stores + conf.setInt("hbase.hstore.compactionThreshold", 10000); + + // Make lease timeout longer, lease checks less frequent + conf.setInt("hbase.master.lease.period", 10 * 1000); + conf.setInt("hbase.master.lease.thread.wakefrequency", 5 * 1000); + + // For debugging + conf.setInt("hbase.regionserver.lease.period", 20 * 60 * 1000); + conf.setInt("ipc.client.timeout", 20 * 60 * 1000); + } + + /** + * [EMAIL PROTECTED] + */ + @Override + public void setUp() throws Exception { + // this.conf.set(HConstants.HBASE_DIR, "file:///opt/benchmark/hadoop/hbase"); + this.conf.set(HConstants.MASTER_ADDRESS, "0.0.0.0:60100"); + // Must call super.setup() after starting mini dfs cluster. Otherwise + // we get a local file system instead of hdfs + + super.setUp(); + } + + /** + * [EMAIL PROTECTED] + */ + @Override + public void tearDown() throws Exception { + super.tearDown(); + } + + /** + * Test hbase mapreduce jobs against single region and multi-region tables. + * + * @throws IOException + * @throws InterruptedException + */ + public void testAfterWrite() throws IOException, InterruptedException { + singleTableTest(); + } + + /* + * Test against a single region. @throws IOException + */ + private void singleTableTest() throws IOException, InterruptedException { + HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); + desc.addFamily(new HColumnDescriptor(FAMILY)); + + // Create a table. + HBaseAdmin admin = new HBaseAdmin(this.conf); + admin.createTable(desc); + + // insert some data into the test table + HTable table = new HTable(conf, new Text(TABLE_NAME)); + + for (int i = 0; i < NUM_ROWS; i++) { + BatchUpdate b = + new BatchUpdate(new Text("row_" + String.format("%1$05d", i))); + + b.put(TEXT_COLUMN1, VALUE); + b.put(TEXT_COLUMN2, String.format("%1$05d", i).getBytes()); + table.commit(b); + } + + // LOG.info("Print table contents using scanner before map/reduce for " + TABLE_NAME); + // scanTable(TABLE_NAME, false); + // LOG.info("Print table contents using scanner+filter before map/reduce for " + TABLE_NAME); + // scanTableWithRowFilter(TABLE_NAME, false); + + // Do some identity write operations on one column of the data. + for (int n = 0; n < NUM_REWRITES; n++) { + for (int i = 0; i < NUM_ROWS; i++) { + BatchUpdate b = + new BatchUpdate(new Text("row_" + String.format("%1$05d", i))); + + b.put(TEXT_COLUMN2, String.format("%1$05d", i).getBytes()); + table.commit(b); + } + } + + // Wait for the flush to happen + LOG.info("Waiting, for flushes to complete"); + Thread.sleep(5 * 1000); + // Wait for the flush to happen + LOG.info("Done. No flush should happen after this"); + + // Do another round so to populate the mem cache + for (int i = 0; i < NUM_ROWS; i++) { + BatchUpdate b = + new BatchUpdate(new Text("row_" + String.format("%1$05d", i))); + + b.put(TEXT_COLUMN2, String.format("%1$05d", i).getBytes()); + table.commit(b); + } + + LOG.info("Print table contents using scanner after map/reduce for " + TABLE_NAME); + scanTable(TABLE_NAME, true); + LOG.info("Print table contents using scanner+filter after map/reduce for " + TABLE_NAME); + scanTableWithRowFilter(TABLE_NAME, true); + } + + private void scanTable(final String tableName, final boolean printValues) throws IOException { + HTable table = new HTable(conf, new Text(tableName)); + + HScannerInterface scanner = table.obtainScanner(columns, HConstants.EMPTY_START_ROW); + int numFound = doScan(scanner, printValues); + Assert.assertEquals(NUM_ROWS, numFound); + } + + private void scanTableWithRowFilter(final String tableName, final boolean printValues) throws IOException { + HTable table = new HTable(conf, new Text(tableName)); + Map<Text, byte[]> columnMap = new HashMap<Text, byte[]>(); + columnMap.put(TEXT_COLUMN1, VALUE); + RegExpRowFilter filter = new RegExpRowFilter(null, columnMap); + HScannerInterface scanner = table.obtainScanner(columns, HConstants.EMPTY_START_ROW, filter); + int numFound = doScan(scanner, printValues); + Assert.assertEquals(NUM_ROWS, numFound); + } + + private int doScan(final HScannerInterface scanner, final boolean printValues) throws IOException { + { + int count = 0; + + try { + HStoreKey key = new HStoreKey(); + TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>(); + while (scanner.next(key, results)) { + if (printValues) { + LOG.info("row: " + key.getRow()); + + for (Map.Entry<Text, byte[]> e : results.entrySet()) { + LOG.info(" column: " + e.getKey() + " value: " + + new String(e.getValue(), HConstants.UTF8_ENCODING)); + } + } + count++; + } + + } finally { + scanner.close(); + } + return count; + } + } +} Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterOnMultipleFamilies.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterOnMultipleFamilies.java?rev=640106&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterOnMultipleFamilies.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/filter/TestRowFilterOnMultipleFamilies.java Sat Mar 22 15:35:36 2008 @@ -0,0 +1,133 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.filter; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import junit.framework.Assert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseClusterTestCase; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HScannerInterface; +import org.apache.hadoop.hbase.HStoreKey; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.io.BatchUpdate; +import org.apache.hadoop.io.Text; + +/** + * Test for regexp filters (HBASE-527) + */ +public class TestRowFilterOnMultipleFamilies extends HBaseClusterTestCase { + private static final Log LOG = LogFactory.getLog(TestRowFilterOnMultipleFamilies.class.getName()); + + static final String TABLE_NAME = "TestTable"; + static final String COLUMN1 = "A:col1"; + static final Text TEXT_COLUMN1 = new Text(COLUMN1); + static final String COLUMN2 = "B:col2"; + static final Text TEXT_COLUMN2 = new Text(COLUMN2); + + private static final Text[] columns = { + TEXT_COLUMN1, TEXT_COLUMN2 + }; + + private static final int NUM_ROWS = 10; + private static final byte[] VALUE = "HELLO".getBytes(); + + /** @throws IOException */ + public void testMultipleFamilies() throws IOException { + HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); + desc.addFamily(new HColumnDescriptor("A:")); + desc.addFamily(new HColumnDescriptor("B:")); + + // Create a table. + HBaseAdmin admin = new HBaseAdmin(this.conf); + admin.createTable(desc); + + // insert some data into the test table + HTable table = new HTable(conf, new Text(TABLE_NAME)); + + for (int i = 0; i < NUM_ROWS; i++) { + BatchUpdate b = + new BatchUpdate(new Text("row_" + String.format("%1$05d", i))); + b.put(TEXT_COLUMN1, VALUE); + b.put(TEXT_COLUMN2, String.format("%1$05d", i).getBytes()); + table.commit(b); + } + + LOG.info("Print table contents using scanner before map/reduce for " + TABLE_NAME); + scanTable(TABLE_NAME, true); + LOG.info("Print table contents using scanner+filter before map/reduce for " + TABLE_NAME); + scanTableWithRowFilter(TABLE_NAME, true); + } + + private void scanTable(final String tableName, final boolean printValues) throws IOException { + HTable table = new HTable(conf, new Text(tableName)); + + HScannerInterface scanner = table.obtainScanner(columns, HConstants.EMPTY_START_ROW); + int numFound = doScan(scanner, printValues); + Assert.assertEquals(NUM_ROWS, numFound); + } + + private void scanTableWithRowFilter(final String tableName, final boolean printValues) throws IOException { + HTable table = new HTable(conf, new Text(tableName)); + Map<Text, byte[]> columnMap = new HashMap<Text, byte[]>(); + columnMap.put(TEXT_COLUMN1, VALUE); + RegExpRowFilter filter = new RegExpRowFilter(null, columnMap); + HScannerInterface scanner = table.obtainScanner(columns, HConstants.EMPTY_START_ROW, filter); + int numFound = doScan(scanner, printValues); + Assert.assertEquals(NUM_ROWS, numFound); + } + + private int doScan(final HScannerInterface scanner, final boolean printValues) throws IOException { + { + int count = 0; + + try { + HStoreKey key = new HStoreKey(); + TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>(); + while (scanner.next(key, results)) { + if (printValues) { + LOG.info("row: " + key.getRow()); + + for (Map.Entry<Text, byte[]> e : results.entrySet()) { + LOG.info(" column: " + e.getKey() + " value: " + + new String(e.getValue(), HConstants.UTF8_ENCODING)); + } + } + Assert.assertEquals(2, results.size()); + count++; + } + + } finally { + scanner.close(); + } + return count; + } + } +}