Repository: tajo Updated Branches: refs/heads/master cc6917804 -> cea832aca
TAJO-1830: Fix race condition in HdfsServiceTracker. Closes #748 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/cea832ac Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/cea832ac Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/cea832ac Branch: refs/heads/master Commit: cea832acaf36398ddf32392d7b13493911ba6014 Parents: cc69178 Author: Jinho Kim <[email protected]> Authored: Fri Sep 11 14:36:31 2015 +0900 Committer: Jinho Kim <[email protected]> Committed: Fri Sep 11 14:36:31 2015 +0900 ---------------------------------------------------------------------- CHANGES | 2 + .../apache/tajo/ha/TestHAServiceHDFSImpl.java | 6 +-- .../org/apache/tajo/ha/HdfsServiceTracker.java | 51 +++++++++++--------- 3 files changed, 34 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 771c1bd..04adaa5 100644 --- a/CHANGES +++ b/CHANGES @@ -259,6 +259,8 @@ Release 0.11.0 - unreleased BUG FIXES + TAJO-1830: Fix race condition in HdfsServiceTracker. (jinho) + TAJO-1727: Avoid to create external table using TableSpace. (jaehwa) TAJO-1600: Invalid query planning for distinct group-by. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java b/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java index 265f075..f0f01bf 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/ha/TestHAServiceHDFSImpl.java @@ -81,11 +81,11 @@ public class TestHAServiceHDFSImpl { verifyDataBaseAndTable(tracker); - assertEquals(2, fs.listStatus(activePath).length); - assertEquals(0, fs.listStatus(backupPath).length); - assertTrue(fs.exists(new Path(activePath, HAConstants.ACTIVE_LOCK_FILE))); assertTrue(fs.exists(new Path(activePath, backupMaster.getMasterName().replaceAll(":", "_")))); + + assertEquals(2, fs.listStatus(activePath).length); + assertEquals(0, fs.listStatus(backupPath).length); } finally { backupMaster.stop(); } http://git-wip-us.apache.org/repos/asf/tajo/blob/cea832ac/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java b/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java index d0eb985..4b97fe6 100644 --- a/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java +++ b/tajo-core/src/main/java/org/apache/tajo/ha/HdfsServiceTracker.java @@ -32,13 +32,11 @@ import org.apache.tajo.master.TajoMaster; import org.apache.tajo.service.HAServiceTracker; import org.apache.tajo.service.ServiceTrackerException; import org.apache.tajo.service.TajoMasterInfo; -import org.apache.tajo.util.*; import org.apache.tajo.util.FileUtil; +import org.apache.tajo.util.TUtil; -import javax.net.SocketFactory; import java.io.IOException; import java.net.InetSocketAddress; -import java.net.Socket; import java.util.ArrayList; import java.util.List; @@ -191,7 +189,10 @@ public class HdfsServiceTracker extends HAServiceTracker { } } } - startPingChecker(); + + if(!isActiveMaster()) { + startPingChecker(); + } } /** @@ -219,7 +220,6 @@ public class HdfsServiceTracker extends HAServiceTracker { Path lockFile = new Path(activePath, HAConstants.ACTIVE_LOCK_FILE); try { lockOutput = fs.create(lockFile, false); - lockOutput.hsync(); lockOutput.close(); fs.deleteOnExit(lockFile); result = true; @@ -241,7 +241,6 @@ public class HdfsServiceTracker extends HAServiceTracker { out = fs.create(path, false); out.writeUTF(sb.toString()); - out.hsync(); out.close(); fs.deleteOnExit(path); @@ -283,7 +282,9 @@ public class HdfsServiceTracker extends HAServiceTracker { } @Override - public void delete() throws IOException { + public synchronized void delete() throws IOException { + stopped = true; + if (ShutdownHookManager.get().isShutdownInProgress()) return; String fileName = masterName.replaceAll(":", "_"); @@ -291,8 +292,6 @@ public class HdfsServiceTracker extends HAServiceTracker { fs.delete(new Path(activePath, fileName), false); fs.delete(new Path(activePath, HAConstants.ACTIVE_LOCK_FILE), false); fs.delete(new Path(backupPath, fileName), false); - - stopped = true; } @Override @@ -366,16 +365,17 @@ public class HdfsServiceTracker extends HAServiceTracker { // If active master is dead, this master should be active master instead of // previous active master. - if (!checkConnection(currentActiveMaster)) { + if (!stopped && !checkConnection(currentActiveMaster)) { Path activeFile = new Path(activePath, currentActiveMaster.replaceAll(":", "_")); fs.delete(activeFile, false); + Path lockFile = new Path(activePath, HAConstants.ACTIVE_LOCK_FILE); fs.delete(lockFile, false); register(); } } } catch (Exception e) { - e.printStackTrace(); + LOG.error(e.getMessage(), e); } } } @@ -458,19 +458,34 @@ public class HdfsServiceTracker extends HAServiceTracker { throw new ServiceTrackerException("Active master base path must be a directory."); } - FileStatus[] files = fs.listStatus(activeMasterBaseDir); /* wait for active master from HDFS */ int pause = conf.getIntVar(ConfVars.TAJO_MASTER_HA_CLIENT_RETRY_PAUSE_TIME); int maxRetry = conf.getIntVar(ConfVars.TAJO_MASTER_HA_CLIENT_RETRY_MAX_NUM); int retry = 0; - while (files.length < 2 && retry < maxRetry) { + + Path activeMasterEntry = null; + FileStatus[] files = null; + + loop:while (retry < maxRetry) { + files = fs.listStatus(activeMasterBaseDir); + for (FileStatus eachFile : files) { + //check if active file is written + if (!eachFile.getPath().getName().equals(HAConstants.ACTIVE_LOCK_FILE) && eachFile.getLen() > 0) { + activeMasterEntry = eachFile.getPath(); + break loop; + } + } + try { this.wait(pause); } catch (InterruptedException e) { throw new ServiceTrackerException(e); } - files = fs.listStatus(activeMasterBaseDir); + } + + if (files == null || activeMasterEntry == null) { + throw new ServiceTrackerException("Active master entry cannot be found in: " + activeMasterBaseDir); } if (files.length < 1) { @@ -480,14 +495,6 @@ public class HdfsServiceTracker extends HAServiceTracker { throw new ServiceTrackerException("Three or more than active master entries."); } - Path activeMasterEntry = null; - - for (FileStatus eachFile : files) { - if (!eachFile.getPath().getName().equals(HAConstants.ACTIVE_LOCK_FILE)) { - activeMasterEntry = eachFile.getPath(); - } - } - if (!fs.isFile(activeMasterEntry)) { throw new ServiceTrackerException("Active master entry must be a file, but it is a directory."); }
