Repository: nutch Updated Branches: refs/heads/master 7956daee8 -> beb48a84b
NUTCH-2272 Index checker server to optionally keep client connection open Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/beb48a84 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/beb48a84 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/beb48a84 Branch: refs/heads/master Commit: beb48a84b2be52f92af24956ae59286ad116913c Parents: 7956dae Author: Markus Jelsma <mar...@apache.org> Authored: Fri Jun 3 15:02:12 2016 +0200 Committer: Markus Jelsma <mar...@apache.org> Committed: Fri Jun 3 15:02:12 2016 +0200 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../nutch/indexer/IndexingFiltersChecker.java | 35 ++++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/beb48a84/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index ffcf5ae..877f23b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -37,6 +37,7 @@ Bug Improvement + [NUTCH-2272] - Index checker server to optionally keep client connection open [NUTCH-1233] - Rely on Tika for outlink extraction [NUTCH-1712] - Use MultipleInputs in Injector to make it a single mapreduce job [NUTCH-2172] - index-more: document format of contenttype-mapping.txt http://git-wip-us.apache.org/repos/asf/nutch/blob/beb48a84/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java index da4123f..2e1b9c2 100644 --- a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java +++ b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java @@ -69,6 +69,7 @@ public class IndexingFiltersChecker extends Configured implements Tool { protected URLNormalizers normalizers = null; protected boolean dumpText = false; protected boolean followRedirects = false; + protected boolean keepClientCnxOpen = false; // used to simulate the metadata propagated from injection protected HashMap<String, String> metadata = new HashMap<String, String>(); protected int tcpPort = -1; @@ -82,7 +83,7 @@ public class IndexingFiltersChecker extends Configured implements Tool { public int run(String[] args) throws Exception { String url = null; - String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] <url>"; + String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] [-keepClientCnxOpen]"; if (args.length == 0) { System.err.println(usage); @@ -96,6 +97,8 @@ public class IndexingFiltersChecker extends Configured implements Tool { tcpPort = Integer.parseInt(args[++i]); } else if (args[i].equals("-followRedirects")) { followRedirects = true; + } else if (args[i].equals("-keepClientCnxOpen")) { + keepClientCnxOpen = true; } else if (args[i].equals("-dumpText")) { dumpText = true; } else if (args[i].equals("-md")) { @@ -164,7 +167,23 @@ public class IndexingFiltersChecker extends Configured implements Tool { LOG.info(client.toString()); } - public void run(){ + public void run() { + if (keepClientCnxOpen) { + while (true) { // keep connection open until closes + readWrite(); + } + } else { + readWrite(); + + try { // close ourselves + client.close(); + } catch (Exception e){ + LOG.error(e.toString()); + } + } + } + + protected void readWrite() { String line; BufferedReader in = null; PrintWriter out = null; @@ -185,14 +204,6 @@ public class IndexingFiltersChecker extends Configured implements Tool { }catch (Exception e) { LOG.error("Read/Write failed: " + e); } - - try { - client.close(); - } catch (Exception e){ - LOG.error(e.toString()); - } - - return; } } @@ -331,6 +342,8 @@ public class IndexingFiltersChecker extends Configured implements Tool { } } } + + output.append("\n"); // For readability if keepClientCnxOpen if (getConf().getBoolean("doIndex", false) && doc != null) { IndexWriters writers = new IndexWriters(getConf()); @@ -355,4 +368,4 @@ public class IndexingFiltersChecker extends Configured implements Tool { new IndexingFiltersChecker(), args); System.exit(res); } -} +} \ No newline at end of file