Repository: nutch Updated Branches: refs/heads/2.x 700857d16 -> a165e4165
NUTCH-2347 Logger is used instead of printing Throwable. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/8dbf8083 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/8dbf8083 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/8dbf8083 Branch: refs/heads/2.x Commit: 8dbf8083aa63fbd881c18fc8824981b4c84c9c02 Parents: 6e3c34d Author: kamaci <[email protected]> Authored: Fri Jan 20 15:25:49 2017 +0200 Committer: kamaci <[email protected]> Committed: Fri Jan 20 15:25:49 2017 +0200 ---------------------------------------------------------------------- src/java/org/apache/nutch/crawl/WebTableReader.java | 7 ++++--- src/java/org/apache/nutch/host/HostDbReader.java | 15 +++++++++------ .../org/apache/nutch/parse/NutchSitemapParser.java | 13 ++++++++++--- src/java/org/apache/nutch/parse/ParseUtil.java | 10 ++++++---- .../org/apache/nutch/protocol/RobotRulesParser.java | 3 ++- src/java/org/apache/nutch/tools/DmozParser.java | 4 +++- .../nutch/util/GenericWritableConfigurable.java | 10 +++++++++- src/java/org/apache/nutch/util/NutchTool.java | 12 +++++++++--- src/java/org/apache/nutch/util/URLUtil.java | 2 +- 9 files changed, 53 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/crawl/WebTableReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/crawl/WebTableReader.java b/src/java/org/apache/nutch/crawl/WebTableReader.java index 5e58641..81d933f 100644 --- a/src/java/org/apache/nutch/crawl/WebTableReader.java +++ b/src/java/org/apache/nutch/crawl/WebTableReader.java @@ -248,11 +248,12 @@ public class WebTableReader extends NutchTool implements Tool { System.out.println(getPageRepresentation(url, page, dumpContent, dumpHeaders, dumpLinks, dumpText)); } catch (Exception e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } } - if (!found) - System.out.println(key + " not found"); + if (!found) { + LOG.info("{} not found", key); + } result.close(); datastore.close(); } http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/host/HostDbReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/host/HostDbReader.java b/src/java/org/apache/nutch/host/HostDbReader.java index 7ea14bc..1c1d462 100644 --- a/src/java/org/apache/nutch/host/HostDbReader.java +++ b/src/java/org/apache/nutch/host/HostDbReader.java @@ -17,9 +17,8 @@ package org.apache.nutch.host; import java.io.IOException; +import java.lang.invoke.MethodHandles; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.gora.query.Query; import org.apache.gora.query.Result; import org.apache.gora.store.DataStore; @@ -31,13 +30,16 @@ import org.apache.nutch.storage.Host; import org.apache.nutch.storage.StorageUtils; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.TableUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Display entries from the hostDB. Allows to verify that the storage is OK. **/ public class HostDbReader extends Configured implements Tool { - public static final Log LOG = LogFactory.getLog(HostDbReader.class); + private static final Logger LOG = LoggerFactory + .getLogger(MethodHandles.lookup().lookupClass()); private void read(String key) throws ClassNotFoundException, IOException, Exception { @@ -54,13 +56,14 @@ public class HostDbReader extends Configured implements Tool { Result<String, Host> result = datastore.execute(query); while (result.next()) { + String hostName = null; try { - String hostName = TableUtil.unreverseUrl(result.getKey()); + hostName = TableUtil.unreverseUrl(result.getKey()); Host host = result.get(); System.out.println(hostName); System.out.println(host); } catch (Exception e) { - e.printStackTrace(); + LOG.error("Failed to get host from hostname {}: {}", hostName, e.getMessage()); } } result.close(); @@ -85,7 +88,7 @@ public class HostDbReader extends Configured implements Tool { read(key); return 0; } catch (Exception e) { - LOG.fatal("HostDBReader: " + StringUtils.stringifyException(e)); + LOG.error("HostDBReader: " + StringUtils.stringifyException(e)); return -1; } } http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/parse/NutchSitemapParser.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/parse/NutchSitemapParser.java b/src/java/org/apache/nutch/parse/NutchSitemapParser.java index c1bb178..dc0df58 100644 --- a/src/java/org/apache/nutch/parse/NutchSitemapParser.java +++ b/src/java/org/apache/nutch/parse/NutchSitemapParser.java @@ -17,10 +17,14 @@ package org.apache.nutch.parse; import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.net.MalformedURLException; import java.net.URL; import java.util.*; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import crawlercommons.sitemaps.*; import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; @@ -35,6 +39,9 @@ public class NutchSitemapParser { private static Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>(); + private static final Logger LOG = LoggerFactory + .getLogger(MethodHandles.lookup().lookupClass()); + static { FIELDS.add(WebPage.Field.BASE_URL); } @@ -50,9 +57,9 @@ public class NutchSitemapParser { .parseSiteMap(contentType, page.getContent().array(), new URL(url)); } catch (UnknownFormatException e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } catch (IOException e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } Map<Outlink, Metadata> outlinkMap = null; Iterator i$; @@ -80,7 +87,7 @@ public class NutchSitemapParser { new Outlink(sitemapUrl.getUrl().toString(), "sitemap.outlink"), metadata); } catch (MalformedURLException e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } } } http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/parse/ParseUtil.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/parse/ParseUtil.java b/src/java/org/apache/nutch/parse/ParseUtil.java index a38fb0a..8de89d2 100644 --- a/src/java/org/apache/nutch/parse/ParseUtil.java +++ b/src/java/org/apache/nutch/parse/ParseUtil.java @@ -42,6 +42,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.net.MalformedURLException; import java.net.URL; import java.nio.ByteBuffer; @@ -69,7 +70,8 @@ public class ParseUtil extends Configured { ALWAYS, HOURLY, DAILY, WEEKLY, MONTHLY, YEARLY, NEVER } /* our log stream */ - public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class); + private static final Logger LOG = LoggerFactory + .getLogger(MethodHandles.lookup().lookupClass()); private static final int DEFAULT_MAX_PARSE_TIME = 30; private static final int DEFAULT_OUTLINKS_MAX_TARGET_LENGTH = 3000; @@ -237,7 +239,7 @@ public class ParseUtil extends Configured { try { reversedUrl = TableUtil.reverseUrl(toUrl); // collect it } catch (MalformedURLException e) { - e.printStackTrace(); + LOG.error("Failed to reverse URL {}: {}", toUrl, e.getMessage()); } WebPage newRow = WebPage.newBuilder().build(); Set<Map.Entry<String, String[]>> metaDatas = outlinkMap.get(outlink) @@ -262,9 +264,9 @@ public class ParseUtil extends Configured { try { context.write(reversedUrl, newRow); } catch (IOException e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } catch (InterruptedException e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } } http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/protocol/RobotRulesParser.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/protocol/RobotRulesParser.java b/src/java/org/apache/nutch/protocol/RobotRulesParser.java index 867b71b..dffeeeb 100644 --- a/src/java/org/apache/nutch/protocol/RobotRulesParser.java +++ b/src/java/org/apache/nutch/protocol/RobotRulesParser.java @@ -28,6 +28,7 @@ import java.util.Hashtable; import java.util.StringTokenizer; // Commons Logging imports +import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -183,7 +184,7 @@ public abstract class RobotRulesParser implements Configurable { } testsIn.close(); } catch (Exception e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } } } http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/tools/DmozParser.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/tools/DmozParser.java b/src/java/org/apache/nutch/tools/DmozParser.java index 03d2662..bb6cef6 100644 --- a/src/java/org/apache/nutch/tools/DmozParser.java +++ b/src/java/org/apache/nutch/tools/DmozParser.java @@ -24,6 +24,8 @@ import java.util.*; import java.util.regex.*; import javax.xml.parsers.*; + +import org.apache.hadoop.util.StringUtils; import org.xml.sax.*; import org.xml.sax.helpers.*; import org.apache.xerces.util.XMLChar; @@ -210,7 +212,7 @@ public class DmozParser { } catch (IOException e) { // TODO Auto-generated catch block - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); } } else { System.out.println(curURL); http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/util/GenericWritableConfigurable.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/util/GenericWritableConfigurable.java b/src/java/org/apache/nutch/util/GenericWritableConfigurable.java index 755aad0..87c6f31 100644 --- a/src/java/org/apache/nutch/util/GenericWritableConfigurable.java +++ b/src/java/org/apache/nutch/util/GenericWritableConfigurable.java @@ -18,6 +18,11 @@ package org.apache.nutch.util; import java.io.DataInput; import java.io.IOException; +import java.lang.invoke.MethodHandles; + +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; @@ -33,6 +38,9 @@ public abstract class GenericWritableConfigurable extends GenericWritable private Configuration conf; + private static final Logger LOG = LoggerFactory + .getLogger(MethodHandles.lookup().lookupClass()); + public Configuration getConf() { return conf; } @@ -48,7 +56,7 @@ public abstract class GenericWritableConfigurable extends GenericWritable try { set((Writable) clazz.newInstance()); } catch (Exception e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); throw new IOException("Cannot initialize the class: " + clazz); } Writable w = get(); http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/util/NutchTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/util/NutchTool.java b/src/java/org/apache/nutch/util/NutchTool.java index 443d1da..8fed1e1 100644 --- a/src/java/org/apache/nutch/util/NutchTool.java +++ b/src/java/org/apache/nutch/util/NutchTool.java @@ -17,10 +17,14 @@ package org.apache.nutch.util; import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.mapreduce.Job; import org.apache.nutch.metadata.Nutch; @@ -33,6 +37,8 @@ public abstract class NutchTool extends Configured { protected Job currentJob; protected int numJobs; protected int currentJobNum; + private static final Logger LOG = LoggerFactory + .getLogger(MethodHandles.lookup().lookupClass()); /** * Runs the tool, using a map of arguments. May return results, or null. @@ -55,10 +61,10 @@ public abstract class NutchTool extends Configured { try { res = (currentJob.mapProgress() + currentJob.reduceProgress()) / 2.0f; } catch (IOException e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); res = 0; } catch (IllegalStateException ile) { - ile.printStackTrace(); + LOG.error(StringUtils.stringifyException(ile)); res = 0; } } @@ -103,7 +109,7 @@ public abstract class NutchTool extends Configured { currentJob.killJob(); return true; } catch (Exception e) { - e.printStackTrace(); + LOG.error(StringUtils.stringifyException(e)); return false; } } http://git-wip-us.apache.org/repos/asf/nutch/blob/8dbf8083/src/java/org/apache/nutch/util/URLUtil.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/util/URLUtil.java b/src/java/org/apache/nutch/util/URLUtil.java index e1df9e3..95de2a6 100644 --- a/src/java/org/apache/nutch/util/URLUtil.java +++ b/src/java/org/apache/nutch/util/URLUtil.java @@ -473,7 +473,7 @@ public class URLUtil { try { System.out.println(URLUtil.getDomainName(new URL(url))); } catch (MalformedURLException ex) { - ex.printStackTrace(); + System.err.println(ex.getMessage()); } } }
