Repository: hive Updated Branches: refs/heads/master 6908198df -> 4536dcd68
HIVE-12637 : make retryable SQLExceptions in TxnHandler configurable (Wei Zheng, reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4536dcd6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4536dcd6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4536dcd6 Branch: refs/heads/master Commit: 4536dcd686ba136dc7b462e4c605ef16ba981a9f Parents: 6908198 Author: Wei Zheng <[email protected]> Authored: Mon Apr 25 11:17:11 2016 -0700 Committer: Wei Zheng <[email protected]> Committed: Mon Apr 25 11:17:11 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 8 ++++++++ .../hadoop/hive/metastore/txn/TxnHandler.java | 18 +++++++++++++++--- .../hadoop/hive/metastore/txn/TestTxnHandler.java | 15 +++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4536dcd6/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 5b5b350..8ccc262 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -214,6 +214,7 @@ public class HiveConf extends Configuration { HiveConf.ConfVars.HIVE_TXN_TIMEOUT, HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE, HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, + HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE, @@ -1679,6 +1680,13 @@ public class HiveConf extends Configuration { "transactions that Hive has to track at any given time, which may negatively affect\n" + "read performance."), + HIVE_TXN_RETRYABLE_SQLEX_REGEX("hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" + + "of regular expression patterns for SQL state, error code, and error message of\n" + + "retryable SQLExceptions, that's suitable for the metastore DB.\n" + + "For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n" + + "The string that the regex will be matched against is of the following form, where ex is a SQLException:\n" + + "ex.getMessage() + \" (SQLState=\" + ex.getSQLState() + \", ErrorCode=\" + ex.getErrorCode() + \")\""), + HIVE_COMPACTOR_INITIATOR_ON("hive.compactor.initiator.on", false, "Whether to run the initiator and cleaner threads on this metastore instance or not.\n" + "Set this to true on one instance of the Thrift metastore service as part of turning\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/4536dcd6/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index be3c6de..df6591f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -49,6 +49,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Pattern; /** * A handler to answer transaction related calls that come into the metastore @@ -1521,7 +1522,7 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { } else { LOG.error("Too many repeated deadlocks in " + caller + ", giving up."); } - } else if (isRetryable(e)) { + } else if (isRetryable(conf, e)) { //in MSSQL this means Communication Link Failure if (retryNum++ < retryLimit) { LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval + @@ -2620,7 +2621,7 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { /** * Returns true if {@code ex} should be retried */ - private static boolean isRetryable(Exception ex) { + static boolean isRetryable(HiveConf conf, Exception ex) { if(ex instanceof SQLException) { SQLException sqlException = (SQLException)ex; if("08S01".equalsIgnoreCase(sqlException.getSQLState())) { @@ -2631,6 +2632,17 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { sqlException.getMessage().contains("consistent read failure; rollback data not available")) { return true; } + + String regex = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX); + if (regex != null && !regex.isEmpty()) { + String[] patterns = regex.split(",(?=\\S)"); + String message = getMessage((SQLException)ex); + for (String p : patterns) { + if (Pattern.matches(p, message)) { + return true; + } + } + } //see also https://issues.apache.org/jira/browse/HIVE-9938 } return false; @@ -2670,7 +2682,7 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { return false; } private static String getMessage(SQLException ex) { - return ex.getMessage() + "(SQLState=" + ex.getSQLState() + ",ErrorCode=" + ex.getErrorCode() + ")"; + return ex.getMessage() + " (SQLState=" + ex.getSQLState() + ", ErrorCode=" + ex.getErrorCode() + ")"; } /** * Given a {@code selectStatement}, decorated it with FOR UPDATE or semantically equivalent http://git-wip-us.apache.org/repos/asf/hive/blob/4536dcd6/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java index 37eacde..28d0269 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java @@ -1346,6 +1346,21 @@ public class TestTxnHandler { error = e; } } + + @Test + public void testRetryableRegex() throws Exception { + SQLException sqlException = new SQLException("ORA-08177: can't serialize access for this transaction", "72000"); + // Note that we have 3 regex'es below + conf.setVar(HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, "^Deadlock detected, roll back,.*08177.*,.*08178.*"); + boolean result = TxnHandler.isRetryable(conf, sqlException); + Assert.assertTrue("regex should be retryable", result); + + sqlException = new SQLException("This error message, has comma in it"); + conf.setVar(HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, ".*comma.*"); + result = TxnHandler.isRetryable(conf, sqlException); + Assert.assertTrue("regex should be retryable", result); + } + private void updateTxns(Connection conn) throws SQLException { Statement stmt = conn.createStatement(); stmt.executeUpdate("update TXNS set txn_last_heartbeat = txn_last_heartbeat + 1");
