HBASE-17276 Only log stacktraces for exceptions once for updates in a batch
For large batches of updates, repeatedly logging WrongRegionExceptions, FailedSanityCheckExceptions, and/or NoSuchColumnFamilyExceptions can easily dominate the contents of a RegionServer log. After the first occurence of logging the full exception, switch to logging only the message on the exception. Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a1ca7234 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a1ca7234 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a1ca7234 Branch: refs/heads/0.98 Commit: a1ca72344498731e2751fe74d4387797610a9ab0 Parents: dc85895 Author: Josh Elser <els...@apache.org> Authored: Wed Dec 7 13:11:16 2016 -0500 Committer: Josh Elser <els...@apache.org> Committed: Sun Dec 11 15:40:53 2016 -0500 ---------------------------------------------------------------------- .../hadoop/hbase/regionserver/HRegion.java | 76 +++++++++++++++++++- .../TestObservedExceptionsInBatch.java | 58 +++++++++++++++ 2 files changed, 131 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/a1ca7234/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 24851e0..6b98b20 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -492,6 +492,57 @@ public class HRegion implements HeapSize { // , Writable{ } } + /** + * A class that tracks exceptions that have been observed in one batch. Not thread safe. + */ + static class ObservedExceptionsInBatch { + private boolean wrongRegion = false; + private boolean failedSanityCheck = false; + private boolean wrongFamily = false; + + /** + * @return If a {@link WrongRegionException} has been observed. + */ + boolean hasSeenWrongRegion() { + return wrongRegion; + } + + /** + * Records that a {@link WrongRegionException} has been observed. + */ + void sawWrongRegion() { + wrongRegion = true; + } + + /** + * @return If a {@link FailedSanityCheckException} has been observed. + */ + boolean hasSeenFailedSanityCheck() { + return failedSanityCheck; + } + + /** + * Records that a {@link FailedSanityCheckException} has been observed. + */ + void sawFailedSanityCheck() { + failedSanityCheck = true; + } + + /** + * @return If a {@link NoSuchColumnFamilyException} has been observed. + */ + boolean hasSeenNoSuchFamily() { + return wrongFamily; + } + + /** + * Records that a {@link NoSuchColumnFamilyException} has been observed. + */ + void sawNoSuchFamily() { + wrongFamily = true; + } + } + final WriteState writestate = new WriteState(); long memstoreFlushSize; @@ -2437,6 +2488,7 @@ public class HRegion implements HeapSize { // , Writable{ boolean success = false; int noOfPuts = 0, noOfDeletes = 0; long addedSize = 0; + final ObservedExceptionsInBatch observedExceptions = new ObservedExceptionsInBatch(); try { // ------------------------------------ // STEP 1. Try to acquire as many locks as we can, and ensure @@ -2473,19 +2525,37 @@ public class HRegion implements HeapSize { // , Writable{ } checkRow(mutation.getRow(), "doMiniBatchMutation"); } catch (NoSuchColumnFamilyException nscf) { - LOG.warn("No such column family in batch mutation", nscf); + final String msg = "No such column family in batch mutation. "; + if (observedExceptions.hasSeenNoSuchFamily()) { + LOG.warn(msg + nscf.getMessage()); + } else { + LOG.warn(msg, nscf); + observedExceptions.sawNoSuchFamily(); + } batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus( OperationStatusCode.BAD_FAMILY, nscf.getMessage()); lastIndexExclusive++; continue; } catch (FailedSanityCheckException fsce) { - LOG.warn("Batch Mutation did not pass sanity check", fsce); + final String msg = "Batch Mutation did not pass sanity check. "; + if (observedExceptions.hasSeenFailedSanityCheck()) { + LOG.warn(msg + fsce.getMessage()); + } else { + LOG.warn(msg, fsce); + observedExceptions.sawFailedSanityCheck(); + } batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus( OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage()); lastIndexExclusive++; continue; } catch (WrongRegionException we) { - LOG.warn("Batch mutation had a row that does not belong to this region", we); + final String msg = "Batch mutation had a row that does not belong to this region. "; + if (observedExceptions.hasSeenWrongRegion()) { + LOG.warn(msg + we.getMessage()); + } else { + LOG.warn(msg, we); + observedExceptions.sawWrongRegion(); + } batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus( OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage()); lastIndexExclusive++; http://git-wip-us.apache.org/repos/asf/hbase/blob/a1ca7234/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestObservedExceptionsInBatch.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestObservedExceptionsInBatch.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestObservedExceptionsInBatch.java new file mode 100644 index 0000000..64237fd --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestObservedExceptionsInBatch.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.apache.hadoop.hbase.regionserver.HRegion.ObservedExceptionsInBatch; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test class for {@link ObservedExceptionsInBatch}. + */ +@Category(SmallTests.class) +public class TestObservedExceptionsInBatch { + + private ObservedExceptionsInBatch observedExceptions; + + @Before + public void setup() { + observedExceptions = new ObservedExceptionsInBatch(); + } + + @Test + public void testNoObservationsOnCreation() { + assertFalse(observedExceptions.hasSeenFailedSanityCheck()); + assertFalse(observedExceptions.hasSeenNoSuchFamily()); + assertFalse(observedExceptions.hasSeenWrongRegion()); + } + + @Test + public void testObservedAfterRecording() { + observedExceptions.sawFailedSanityCheck(); + assertTrue(observedExceptions.hasSeenFailedSanityCheck()); + observedExceptions.sawNoSuchFamily(); + assertTrue(observedExceptions.hasSeenNoSuchFamily()); + observedExceptions.sawWrongRegion(); + assertTrue(observedExceptions.hasSeenWrongRegion()); + } +}