Author: cutting Date: Fri Jun 9 10:17:14 2006 New Revision: 413096 URL: http://svn.apache.org/viewvc?rev=413096&view=rev Log: HADOOP-289. Improved exception handling in DFS datanode. Contributed by Konstantin.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=413096&r1=413095&r2=413096&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Fri Jun 9 10:17:14 2006 @@ -23,6 +23,9 @@ 6. HADOOP-277. Fix a race condition when creating directories. (Sameer Paranjpye via cutting) + 7. HADOOP-289. Improved exception handling in DFS datanode. + (Konstantin Shvachko via cutting) + Release 0.3.1 - 2006-06-05 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=413096&r1=413095&r2=413096&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Fri Jun 9 10:17:14 2006 @@ -105,17 +105,7 @@ new File(datadir), createSocketAddr(conf.get("fs.default.name", "local")), conf); // register datanode - while (shouldRun) { - try { - register(); - break; - } catch (ConnectException ce) { - LOG.info("Namenode not available yet, Zzzzz..."); - try { - Thread.sleep(10 * 1000); - } catch (InterruptedException ie) {} - } - } + register(); } /** @@ -182,7 +172,19 @@ * @throws IOException */ private void register() throws IOException { - dnRegistration = namenode.register( dnRegistration ); + while (shouldRun) { + try { + dnRegistration = namenode.register( dnRegistration ); + break; + } catch( ConnectException se ) { // namenode has not been started + LOG.info("Namenode not available yet, Zzzzz..."); + } catch( SocketTimeoutException te ) { // namenode is busy + LOG.info("Namenode " + te.getLocalizedMessage() ); + } + try { + Thread.sleep(10 * 1000); + } catch (InterruptedException ie) {} + } if( storage.getStorageID().equals("") ) { storage.setStorageID( dnRegistration.getStorageID()); storage.write(); @@ -203,7 +205,7 @@ } void handleDiskError( String errMsgr ) { - LOG.warn( "Shuting down DataNode because "+errMsgr ); + LOG.warn( "DataNode is shutting down.\n" + errMsgr ); try { namenode.errorReport( dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr); @@ -332,9 +334,16 @@ } // synchronized } // while (shouldRun) } catch(DiskErrorException e) { - handleDiskError(e.getMessage()); + handleDiskError(e.getLocalizedMessage()); + } catch( RemoteException re ) { + String reClass = re.getClassName(); + if( UnregisteredDatanodeException.class.getName().equals( reClass )) { + LOG.warn( "DataNode is shutting down.\n" + re ); + shutdown(); + return; + } + throw re; } - } // offerService /** Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=413096&r1=413095&r2=413096&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Fri Jun 9 10:17:14 2006 @@ -1329,6 +1329,8 @@ */ private void proccessOverReplicatedBlock( Block block, short replication ) { TreeSet containingNodes = (TreeSet) blocksMap.get(block); + if( containingNodes == null ) + return; Vector nonExcess = new Vector(); for (Iterator it = containingNodes.iterator(); it.hasNext(); ) { DatanodeInfo cur = (DatanodeInfo) it.next();