Repository: hive Updated Branches: refs/heads/branch-1 edf89a6a0 -> 648f19307
HIVE-12637 : make retryable SQLExceptions in TxnHandler configurable (Wei Zheng, reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/648f1930 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/648f1930 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/648f1930 Branch: refs/heads/branch-1 Commit: 648f19307cab1b55e44b930ffaf043cc93cd4d46 Parents: edf89a6 Author: Wei Zheng <[email protected]> Authored: Mon Apr 25 11:17:11 2016 -0700 Committer: Wei Zheng <[email protected]> Committed: Mon Apr 25 11:19:35 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 8 ++++++++ .../hadoop/hive/metastore/txn/TxnHandler.java | 18 +++++++++++++++--- .../hadoop/hive/metastore/txn/TestTxnHandler.java | 15 +++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/648f1930/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0d31131..7c93e44 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -173,6 +173,7 @@ public class HiveConf extends Configuration { HiveConf.ConfVars.HIVE_TXN_TIMEOUT, HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE, HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, + HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE, @@ -1492,6 +1493,13 @@ public class HiveConf extends Configuration { "transactions that Hive has to track at any given time, which may negatively affect\n" + "read performance."), + HIVE_TXN_RETRYABLE_SQLEX_REGEX("hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" + + "of regular expression patterns for SQL state, error code, and error message of\n" + + "retryable SQLExceptions, that's suitable for the metastore DB.\n" + + "For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n" + + "The string that the regex will be matched against is of the following form, where ex is a SQLException:\n" + + "ex.getMessage() + \" (SQLState=\" + ex.getSQLState() + \", ErrorCode=\" + ex.getErrorCode() + \")\""), + HIVE_COMPACTOR_INITIATOR_ON("hive.compactor.initiator.on", false, "Whether to run the initiator and cleaner threads on this metastore instance or not.\n" + "Set this to true on one instance of the Thrift metastore service as part of turning\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/648f1930/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index ed4a3c2..a64e7c8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -51,6 +51,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Pattern; /** * A handler to answer transaction related calls that come into the metastore @@ -1559,7 +1560,7 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { } else { LOG.error("Too many repeated deadlocks in " + caller + ", giving up."); } - } else if (isRetryable(e)) { + } else if (isRetryable(conf, e)) { //in MSSQL this means Communication Link Failure if (retryNum++ < retryLimit) { LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval + @@ -2658,7 +2659,7 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { /** * Returns true if {@code ex} should be retried */ - private static boolean isRetryable(Exception ex) { + static boolean isRetryable(HiveConf conf, Exception ex) { if(ex instanceof SQLException) { SQLException sqlException = (SQLException)ex; if("08S01".equalsIgnoreCase(sqlException.getSQLState())) { @@ -2669,6 +2670,17 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { sqlException.getMessage().contains("consistent read failure; rollback data not available")) { return true; } + + String regex = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX); + if (regex != null && !regex.isEmpty()) { + String[] patterns = regex.split(",(?=\\S)"); + String message = getMessage((SQLException)ex); + for (String p : patterns) { + if (Pattern.matches(p, message)) { + return true; + } + } + } //see also https://issues.apache.org/jira/browse/HIVE-9938 } return false; @@ -2708,7 +2720,7 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI { return false; } private static String getMessage(SQLException ex) { - return ex.getMessage() + "(SQLState=" + ex.getSQLState() + ",ErrorCode=" + ex.getErrorCode() + ")"; + return ex.getMessage() + " (SQLState=" + ex.getSQLState() + ", ErrorCode=" + ex.getErrorCode() + ")"; } /** * Given a {@code selectStatement}, decorated it with FOR UPDATE or semantically equivalent http://git-wip-us.apache.org/repos/asf/hive/blob/648f1930/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java index 4d3c3e1..0cacef7 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java @@ -1307,6 +1307,21 @@ public class TestTxnHandler { error = e; } } + + @Test + public void testRetryableRegex() throws Exception { + SQLException sqlException = new SQLException("ORA-08177: can't serialize access for this transaction", "72000"); + // Note that we have 3 regex'es below + conf.setVar(HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, "^Deadlock detected, roll back,.*08177.*,.*08178.*"); + boolean result = TxnHandler.isRetryable(conf, sqlException); + Assert.assertTrue("regex should be retryable", result); + + sqlException = new SQLException("This error message, has comma in it"); + conf.setVar(HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, ".*comma.*"); + result = TxnHandler.isRetryable(conf, sqlException); + Assert.assertTrue("regex should be retryable", result); + } + private void updateTxns(Connection conn) throws SQLException { Statement stmt = conn.createStatement(); stmt.executeUpdate("update TXNS set txn_last_heartbeat = txn_last_heartbeat + 1");
