Author: nspiegelberg
Date: Thu Jan  5 01:41:01 2012
New Revision: 1227429

URL: http://svn.apache.org/viewvc?rev=1227429&view=rev
Log:
[jira] [HBase-5021] Enforce upper bound on timestamp

Summary:
We have been getting hit with performance problems on the ODS
side due to invalid timestamps being inserted by the timestamp.  ODS is
working on adding proper checks to app server, but production
performance could be severely impacted with significant recovery time if
something slips past.  Therefore, we should also allow the option to
check the upper bound in HBase.

This is the first draft.  Probably should allow per-CF customization.

Test Plan:  - mvn test -Dtest=TestHRegion#testPutWithTsTooNew

Reviewers: Kannan, Liyin, JIRA

CC: stack, nspiegelberg, tedyu, Kannan, mbautin

Differential Revision: 849

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
    
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java

Modified: 
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1227429&r1=1227428&r2=1227429&view=diff
==============================================================================
--- 
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java 
(original)
+++ 
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java 
Thu Jan  5 01:41:01 2012
@@ -32,7 +32,7 @@ public final class HConstants {
   public enum OperationStatusCode {
     NOT_RUN,
     SUCCESS,
-    BAD_FAMILY,
+    SANITY_CHECK_FAILURE,
     FAILURE;
   }
 

Modified: 
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1227429&r1=1227428&r2=1227429&view=diff
==============================================================================
--- 
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
 (original)
+++ 
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
 Thu Jan  5 01:41:01 2012
@@ -248,6 +248,7 @@ public class HRegion implements HeapSize
 
   final WriteState writestate = new WriteState();
 
+  final long timestampTooNew;
   final long memstoreFlushSize;
   private volatile long lastFlushTime;
   private List<Pair<Long,Long>> recentFlushes
@@ -386,6 +387,7 @@ public class HRegion implements HeapSize
     this.conf = null;
     this.flushListener = null;
     this.fs = null;
+    this.timestampTooNew = HConstants.LATEST_TIMESTAMP;
     this.memstoreFlushSize = 0L;
     this.log = null;
     this.regiondir = null;
@@ -437,6 +439,17 @@ public class HRegion implements HeapSize
       // Write out region name as string and its encoded name.
       LOG.debug("Creating region " + this);
     }
+
+    /*
+     * timestamp.slop provides a server-side constraint on the timestamp. This
+     * assumes that you base your TS around currentTimeMillis(). In this case,
+     * throw an error to the user if the user-specified TS is newer than now +
+     * slop. LATEST_TIMESTAMP == don't use this functionality
+     */
+    this.timestampTooNew = conf.getLong(
+        "hbase.hregion.keyvalue.timestamp.slop.millisecs",
+        HConstants.LATEST_TIMESTAMP);
+
     long flushSize = regionInfo.getTableDesc().getMemStoreFlushSize();
     if (flushSize == HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE) {
       flushSize = conf.getLong("hbase.hregion.memstore.flush.size",
@@ -1825,9 +1838,10 @@ public class HRegion implements HeapSize
         // Check the families in the put. If bad, skip this one.
         try {
           checkFamilies(put.getFamilyMap().keySet());
-        } catch (NoSuchColumnFamilyException nscf) {
-          LOG.warn("No such column family in batch put", nscf);
-          batchOp.retCodes[lastIndexExclusive] = 
OperationStatusCode.BAD_FAMILY;
+          checkTimestamps(put, now);
+        } catch (DoNotRetryIOException dnrioe) {
+          LOG.warn("Sanity check error in batch put", dnrioe);
+          batchOp.retCodes[lastIndexExclusive] = 
OperationStatusCode.SANITY_CHECK_FAILURE;
           lastIndexExclusive++;
           continue;
         }
@@ -2122,6 +2136,7 @@ public class HRegion implements HeapSize
     this.updatesLock.readLock().lock();
     try {
       checkFamilies(familyMap.keySet());
+      checkTimestamps(familyMap, now);
       updateKVTimestamps(familyMap.values(), byteNow);
       // write/sync to WAL should happen before we touch memstore.
       //
@@ -2201,6 +2216,26 @@ public class HRegion implements HeapSize
       checkFamily(family);
     }
   }
+  private void checkTimestamps(Put p, long now) throws DoNotRetryIOException {
+    checkTimestamps(p.getFamilyMap(), now);
+  }
+
+  private void checkTimestamps(final Map<byte[], List<KeyValue>> familyMap,
+      long now) throws DoNotRetryIOException {
+    if (timestampTooNew == HConstants.LATEST_TIMESTAMP) {
+      return;
+    }
+    long maxTs = now + timestampTooNew;
+    for (List<KeyValue> kvs : familyMap.values()) {
+      for (KeyValue kv : kvs) {
+        // see if the user-side TS is out of range. latest = server-side
+        if (!kv.isLatestTimestamp() && kv.getTimestamp() > maxTs) {
+          throw new DoNotRetryIOException("Timestamp for KV out of range "
+              + kv + " (too.new=" + timestampTooNew + ")");
+        }
+      }
+    }
+  }
 
   /**
    * Append the given map of family->edits to a WALEdit data structure.

Modified: 
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java?rev=1227429&r1=1227428&r2=1227429&view=diff
==============================================================================
--- 
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
 (original)
+++ 
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
 Thu Jan  5 01:41:01 2012
@@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.regionse
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestCase;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -377,7 +378,7 @@ public class TestHRegion extends HBaseTe
     codes = this.region.put(puts);
     assertEquals(10, codes.length);
     for (int i = 0; i < 10; i++) {
-      assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY :
+      assertEquals((i == 5) ? OperationStatusCode.SANITY_CHECK_FAILURE :
         OperationStatusCode.SUCCESS, codes[i]);
     }
     assertEquals(1, HLog.getSyncTime().count);
@@ -415,7 +416,7 @@ public class TestHRegion extends HBaseTe
     assertEquals(1, HLog.getSyncTime().count);
     codes = retFromThread.get();
     for (int i = 0; i < 10; i++) {
-      assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY :
+      assertEquals((i == 5) ? OperationStatusCode.SANITY_CHECK_FAILURE :
         OperationStatusCode.SUCCESS, codes[i]);
     }
 
@@ -432,7 +433,7 @@ public class TestHRegion extends HBaseTe
     codes = region.put(putsAndLocks.toArray(new Pair[0]));
     LOG.info("...performed put");
     for (int i = 0; i < 10; i++) {
-      assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY :
+      assertEquals((i == 5) ? OperationStatusCode.SANITY_CHECK_FAILURE :
         OperationStatusCode.SUCCESS, codes[i]);
     }
     // Make sure we didn't do an extra batch
@@ -892,6 +893,35 @@ public class TestHRegion extends HBaseTe
 
   }
 
+  /**
+   * Tests that there is server-side filtering for invalid timestamp upper
+   * bound. Note that the timestamp lower bound is automatically handled for us
+   * by the TTL field.
+   */
+  public void testPutWithTsSlop() throws IOException {
+    byte[] tableName = Bytes.toBytes("testtable");
+    byte[] fam = Bytes.toBytes("info");
+    byte[][] families = { fam };
+    String method = this.getName();
+    HBaseConfiguration conf = new HBaseConfiguration();
+
+    // add data with a timestamp that is too recent for range. Ensure assert
+    conf.setInt("hbase.hregion.keyvalue.timestamp.slop.millisecs", 1000);
+    initHRegion(tableName, method, conf, families);
+    try {
+      // no TS specified == use latest. should not error
+      region.put(new Put(row).add(fam, Bytes.toBytes("qual"), Bytes
+          .toBytes("value")), false);
+      // TS out of range. should error
+      region.put(new Put(row).add(fam, Bytes.toBytes("qual"),
+                 System.currentTimeMillis() + 2000,
+                 Bytes.toBytes("value")), false);
+      fail("Expected IOE for TS out of configured timerange");
+    } catch (DoNotRetryIOException ioe) {
+      LOG.debug("Received expected exception", ioe);
+    }
+  }
+
   public void testScanner_DeleteOneFamilyNotAnother() throws IOException {
     byte [] tableName = Bytes.toBytes("test_table");
     byte [] fam1 = Bytes.toBytes("columnA");


Reply via email to