[ https://issues.apache.org/jira/browse/HBASE-8325?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
zavakid updated HBASE-8325: --------------------------- Description: I'm using the replication of Hbase in my test environment. When a replicationSource open a empty HLog, the EOFException throws. Should we detect the empty file and processed it, like we process the FileNotFoundException? here's the code: {code:java} /** * Open a reader on the current path * * @param sleepMultiplier by how many times the default sleeping time is augmented * @return true if we should continue with that file, false if we are over with it */ protected boolean openReader(int sleepMultiplier) { try { LOG.debug("Opening log for replication " + this.currentPath.getName() + " at " + this.repLogReader.getPosition()); try { this.reader = repLogReader.openReader(this.currentPath); } catch (FileNotFoundException fnfe) { if (this.queueRecovered) { // We didn't find the log in the archive directory, look if it still // exists in the dead RS folder (there could be a chain of failures // to look at) LOG.info("NB dead servers : " + deadRegionServers.length); for (int i = this.deadRegionServers.length - 1; i >= 0; i--) { Path deadRsDirectory = new Path(manager.getLogDir().getParent(), this.deadRegionServers[i]); Path[] locs = new Path[] { new Path(deadRsDirectory, currentPath.getName()), new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT), currentPath.getName()), }; for (Path possibleLogLocation : locs) { LOG.info("Possible location " + possibleLogLocation.toUri().toString()); if (this.manager.getFs().exists(possibleLogLocation)) { // We found the right new location LOG.info("Log " + this.currentPath + " still exists at " + possibleLogLocation); // Breaking here will make us sleep since reader is null return true; } } } // TODO What happens if the log was missing from every single location? // Although we need to check a couple of times as the log could have // been moved by the master between the checks // It can also happen if a recovered queue wasn't properly cleaned, // such that the znode pointing to a log exists but the log was // deleted a long time ago. // For the moment, we'll throw the IO and processEndOfFile throw new IOException("File from recovered queue is " + "nowhere to be found", fnfe); } else { // If the log was archived, continue reading from there Path archivedLogLocation = new Path(manager.getOldLogDir(), currentPath.getName()); if (this.manager.getFs().exists(archivedLogLocation)) { currentPath = archivedLogLocation; LOG.info("Log " + this.currentPath + " was moved to " + archivedLogLocation); // Open the log at the new location this.openReader(sleepMultiplier); } // TODO What happens the log is missing in both places? } } } catch (IOException ioe) { LOG.warn(peerClusterZnode + " Got: ", ioe); this.reader = null; // TODO Need a better way to determinate if a file is really gone but // TODO without scanning all logs dir if (sleepMultiplier == this.maxRetriesMultiplier) { LOG.warn("Waited too long for this file, considering dumping"); return !processEndOfFile(); } } return true; } {code} I find the TODO label : // TODO What happens the log is missing in both places? maybe we need to add this case? was: I'm using the replication of Hbase in my test environment. When a replicationSource open a empty HLog, the EOFException throws. Should we detect the empty file and processed it, like we process the FileNotFoundException? here's the code: ``` /** * Open a reader on the current path * * @param sleepMultiplier by how many times the default sleeping time is augmented * @return true if we should continue with that file, false if we are over with it */ protected boolean openReader(int sleepMultiplier) { try { LOG.debug("Opening log for replication " + this.currentPath.getName() + " at " + this.repLogReader.getPosition()); try { this.reader = repLogReader.openReader(this.currentPath); } catch (FileNotFoundException fnfe) { if (this.queueRecovered) { // We didn't find the log in the archive directory, look if it still // exists in the dead RS folder (there could be a chain of failures // to look at) LOG.info("NB dead servers : " + deadRegionServers.length); for (int i = this.deadRegionServers.length - 1; i >= 0; i--) { Path deadRsDirectory = new Path(manager.getLogDir().getParent(), this.deadRegionServers[i]); Path[] locs = new Path[] { new Path(deadRsDirectory, currentPath.getName()), new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT), currentPath.getName()), }; for (Path possibleLogLocation : locs) { LOG.info("Possible location " + possibleLogLocation.toUri().toString()); if (this.manager.getFs().exists(possibleLogLocation)) { // We found the right new location LOG.info("Log " + this.currentPath + " still exists at " + possibleLogLocation); // Breaking here will make us sleep since reader is null return true; } } } // TODO What happens if the log was missing from every single location? // Although we need to check a couple of times as the log could have // been moved by the master between the checks // It can also happen if a recovered queue wasn't properly cleaned, // such that the znode pointing to a log exists but the log was // deleted a long time ago. // For the moment, we'll throw the IO and processEndOfFile throw new IOException("File from recovered queue is " + "nowhere to be found", fnfe); } else { // If the log was archived, continue reading from there Path archivedLogLocation = new Path(manager.getOldLogDir(), currentPath.getName()); if (this.manager.getFs().exists(archivedLogLocation)) { currentPath = archivedLogLocation; LOG.info("Log " + this.currentPath + " was moved to " + archivedLogLocation); // Open the log at the new location this.openReader(sleepMultiplier); } // TODO What happens the log is missing in both places? } } } catch (IOException ioe) { LOG.warn(peerClusterZnode + " Got: ", ioe); this.reader = null; // TODO Need a better way to determinate if a file is really gone but // TODO without scanning all logs dir if (sleepMultiplier == this.maxRetriesMultiplier) { LOG.warn("Waited too long for this file, considering dumping"); return !processEndOfFile(); } } return true; } ``` I find the TODO label : // TODO What happens the log is missing in both places? maybe we need to add this case? > ReplicationSource read a empty HLog throws EOFException > ------------------------------------------------------- > > Key: HBASE-8325 > URL: https://issues.apache.org/jira/browse/HBASE-8325 > Project: HBase > Issue Type: Bug > Components: Replication > Affects Versions: 0.94.5 > Environment: replication enabled > Reporter: zavakid > Priority: Critical > > I'm using the replication of Hbase in my test environment. > When a replicationSource open a empty HLog, the EOFException throws. Should > we detect the empty file and processed it, like we process the > FileNotFoundException? > here's the code: > {code:java} > /** > * Open a reader on the current path > * > * @param sleepMultiplier by how many times the default sleeping time is > augmented > * @return true if we should continue with that file, false if we are over > with it > */ > protected boolean openReader(int sleepMultiplier) { > try { > LOG.debug("Opening log for replication " + this.currentPath.getName() + > " at " + this.repLogReader.getPosition()); > try { > this.reader = repLogReader.openReader(this.currentPath); > } catch (FileNotFoundException fnfe) { > if (this.queueRecovered) { > // We didn't find the log in the archive directory, look if it still > // exists in the dead RS folder (there could be a chain of failures > // to look at) > LOG.info("NB dead servers : " + deadRegionServers.length); > for (int i = this.deadRegionServers.length - 1; i >= 0; i--) { > Path deadRsDirectory = > new Path(manager.getLogDir().getParent(), > this.deadRegionServers[i]); > Path[] locs = new Path[] { > new Path(deadRsDirectory, currentPath.getName()), > new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT), > currentPath.getName()), > }; > for (Path possibleLogLocation : locs) { > LOG.info("Possible location " + > possibleLogLocation.toUri().toString()); > if (this.manager.getFs().exists(possibleLogLocation)) { > // We found the right new location > LOG.info("Log " + this.currentPath + " still exists at " + > possibleLogLocation); > // Breaking here will make us sleep since reader is null > return true; > } > } > } > // TODO What happens if the log was missing from every single > location? > // Although we need to check a couple of times as the log could have > // been moved by the master between the checks > // It can also happen if a recovered queue wasn't properly cleaned, > // such that the znode pointing to a log exists but the log was > // deleted a long time ago. > // For the moment, we'll throw the IO and processEndOfFile > throw new IOException("File from recovered queue is " + > "nowhere to be found", fnfe); > } else { > // If the log was archived, continue reading from there > Path archivedLogLocation = > new Path(manager.getOldLogDir(), currentPath.getName()); > if (this.manager.getFs().exists(archivedLogLocation)) { > currentPath = archivedLogLocation; > LOG.info("Log " + this.currentPath + " was moved to " + > archivedLogLocation); > // Open the log at the new location > this.openReader(sleepMultiplier); > } > // TODO What happens the log is missing in both places? > } > } > } catch (IOException ioe) { > LOG.warn(peerClusterZnode + " Got: ", ioe); > this.reader = null; > // TODO Need a better way to determinate if a file is really gone but > // TODO without scanning all logs dir > if (sleepMultiplier == this.maxRetriesMultiplier) { > LOG.warn("Waited too long for this file, considering dumping"); > return !processEndOfFile(); > } > } > return true; > } > {code} > I find the TODO label : // TODO What happens the log is missing in both > places? > maybe we need to add this case? -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira