Code style :  2 spaces instead of tabs

Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/f5adbcc3
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/f5adbcc3
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/f5adbcc3

Branch: refs/heads/master
Commit: f5adbcc3c3bb447110b6733e0851b931e57465c3
Parents: 298cffc
Author: Thamme Gowda <[email protected]>
Authored: Sat Apr 30 17:15:50 2016 -0700
Committer: Thamme Gowda <[email protected]>
Committed: Sat Apr 30 17:15:50 2016 -0700

----------------------------------------------------------------------
 .../nutch/tools/AbstractCommonCrawlFormat.java  | 694 +++++++++----------
 1 file changed, 347 insertions(+), 347 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/f5adbcc3/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java 
b/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java
index d5a0154..1b425c4 100644
--- a/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java
+++ b/src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java
@@ -43,351 +43,351 @@ import com.ibm.icu.text.SimpleDateFormat;
  *
  */
 public abstract class AbstractCommonCrawlFormat implements CommonCrawlFormat {
-       protected static final Logger LOG = 
LoggerFactory.getLogger(AbstractCommonCrawlFormat.class.getName());
-
-       protected String url;
-
-       protected Content content;
-
-       protected Metadata metadata;
-
-       protected Configuration conf;
-
-       protected String keyPrefix;
-
-       protected boolean simpleDateFormat;
-
-       protected boolean jsonArray;
-
-       protected boolean reverseKey;
-
-       protected String reverseKeyValue;
-
-       protected List<String> inLinks;
-
-       public AbstractCommonCrawlFormat(String url, Content content, Metadata 
metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException 
{
-               this.url = url;
-               this.content = content;
-               this.metadata = metadata;
-               this.conf = nutchConf;
-
-               this.keyPrefix = config.getKeyPrefix();
-               this.simpleDateFormat = config.getSimpleDateFormat();
-               this.jsonArray = config.getJsonArray();
-               this.reverseKey = config.getReverseKey();
-               this.reverseKeyValue = config.getReverseKeyValue();
-       }
-
-       public String getJsonData(String url, Content content, Metadata 
metadata)
-                       throws IOException {
-               this.url = url;
-               this.content = content;
-               this.metadata = metadata;
-
-               return this.getJsonData();
-       }
-
-       public String getJsonData(String url, Content content, Metadata 
metadata,
-                       ParseData parseData) throws IOException {
-
-               // override of this is required in the actual formats
-               throw new NotImplementedException();
-       }
-
-       @Override
-       public String getJsonData() throws IOException {
-               try {
-                       startObject(null);
-
-                       // url
-                       writeKeyValue("url", getUrl());
-
-                       // timestamp
-                       writeKeyValue("timestamp", getTimestamp());
-
-                       // request
-                       startObject("request");
-                       writeKeyValue("method", getMethod());
-                       startObject("client");
-                       writeKeyValue("hostname", getRequestHostName());
-                       writeKeyValue("address", getRequestHostAddress());
-                       writeKeyValue("software", getRequestSoftware());
-                       writeKeyValue("robots", getRequestRobots());
-                       startObject("contact");
-                       writeKeyValue("name", getRequestContactName());
-                       writeKeyValue("email", getRequestContactEmail());
-                       closeObject("contact");
-                       closeObject("client");
-                       // start request headers
-                       startHeaders("headers", false, true);
-                       writeKeyValueWrapper("Accept", getRequestAccept());
-                       writeKeyValueWrapper("Accept-Encoding", 
getRequestAcceptEncoding());
-                       writeKeyValueWrapper("Accept-Language", 
getRequestAcceptLanguage());
-                       writeKeyValueWrapper("User-Agent", 
getRequestUserAgent());
-                       //closeObject("headers");
-                       closeHeaders("headers", false, true);
-                       writeKeyNull("body");
-                       closeObject("request");
-
-                       // response
-                       startObject("response");
-                       writeKeyValue("status", getResponseStatus());
-                       startObject("server");
-                       writeKeyValue("hostname", getResponseHostName());
-                       writeKeyValue("address", getResponseAddress());
-                       closeObject("server");
-                       // start response headers
-                       startHeaders("headers", false, true);
-                       writeKeyValueWrapper("Content-Encoding", 
getResponseContentEncoding());
-                       writeKeyValueWrapper("Content-Type", 
getResponseContentType());
-                       writeKeyValueWrapper("Date", getResponseDate());
-                       writeKeyValueWrapper("Server", getResponseServer());
-                       for (String name : metadata.names()) {
-                               if (name.equalsIgnoreCase("Content-Encoding") 
|| name.equalsIgnoreCase("Content-Type") || name.equalsIgnoreCase("Date") || 
name.equalsIgnoreCase("Server")) {
-                                       continue;
-                               }
-                               writeKeyValueWrapper(name, metadata.get(name));
-                       }
-                       closeHeaders("headers", false, true);
-                       writeKeyValue("body", getResponseContent());
-                       closeObject("response");
-
-                       // key
-                       if (!this.keyPrefix.isEmpty()) {
-                               this.keyPrefix += "-";
-                       }
-                       writeKeyValue("key", this.keyPrefix + getKey());
-
-                       // imported
-                       writeKeyValue("imported", getImported());
-
-                       if (getInLinks() != null){
-                               startArray("inlinks", false, true);
-                               for (String link : getInLinks()) {
-                                       writeArrayValue(link);
-                               }
-                               closeArray("inlinks", false, true);
-                       }
-                       closeObject(null);
-
-                       return generateJson();
-
-               } catch (IOException ioe) {
-                       LOG.warn("Error in processing file " + url + ": " + 
ioe.getMessage());
-                       throw new IOException("Error in generating JSON:" + 
ioe.getMessage());
-               }
-       }
-
-       // abstract methods
-
-       protected abstract void writeKeyValue(String key, String value) throws 
IOException;
-
-       protected abstract void writeKeyNull(String key) throws IOException;
-
-       protected abstract void startArray(String key, boolean nested, boolean 
newline) throws IOException;
-
-       protected abstract void closeArray(String key, boolean nested, boolean 
newline) throws IOException;
-
-       protected abstract void writeArrayValue(String value) throws 
IOException;
-
-       protected abstract void startObject(String key) throws IOException;
-
-       protected abstract void closeObject(String key) throws IOException;
-
-       protected abstract String generateJson() throws IOException;
-
-       // getters
-
-       protected String getUrl() {
-               try {
-                       return URIUtil.encodePath(url);
-               } catch (URIException e) {
-                       LOG.error("Can't encode URL " + url);
-               }
-
-               return url;
-       }
-
-       protected String getTimestamp() {
-               if (this.simpleDateFormat) {
-                       String timestamp = null;
-                       try {
-                               long epoch = new SimpleDateFormat("EEE, d MMM 
yyyy HH:mm:ss 
z").parse(ifNullString(metadata.get(Metadata.LAST_MODIFIED))).getTime();
-                               timestamp = String.valueOf(epoch);
-                       } catch (ParseException pe) {
-                               LOG.warn(pe.getMessage());
-                       }
-                       return timestamp;
-               } else {
-                       return 
ifNullString(metadata.get(Metadata.LAST_MODIFIED));
-               }
-       }
-
-       protected String getMethod() {
-               return new String("GET");
-       }
-
-       protected String getRequestHostName() {
-               String hostName = "";
-               try {
-                       hostName = InetAddress.getLocalHost().getHostName();
-               } catch (UnknownHostException uhe) {
-
-               }
-               return hostName;
-       }
-
-       protected String getRequestHostAddress() {
-               String hostAddress = "";
-               try {
-                       hostAddress = 
InetAddress.getLocalHost().getHostAddress();
-               } catch (UnknownHostException uhe) {
-
-               }
-               return hostAddress;
-       }
-
-       protected String getRequestSoftware() {
-               return conf.get("http.agent.version", "");
-       }
-
-       protected String getRequestRobots() {
-               return new String("CLASSIC");
-       }
-
-       protected String getRequestContactName() {
-               return conf.get("http.agent.name", "");
-       }
-
-       protected String getRequestContactEmail() {
-               return conf.get("http.agent.email", "");
-       }
-
-       protected String getRequestAccept() {
-               return conf.get("http.accept", "");
-       }
-
-       protected String getRequestAcceptEncoding() {
-               return new String(""); // TODO
-       }
-
-       protected String getRequestAcceptLanguage() {
-               return conf.get("http.accept.language", "");
-       }
-
-       protected String getRequestUserAgent() {
-               return conf.get("http.robots.agents", "");
-       }
-
-       protected String getResponseStatus() {
-               return ifNullString(metadata.get("status"));
-       }
-
-       protected String getResponseHostName() {
-               return URLUtil.getHost(url);
-       }
-
-       protected String getResponseAddress() {
-               return ifNullString(metadata.get("_ip_"));
-       }
-
-       protected String getResponseContentEncoding() {
-               return ifNullString(metadata.get("Content-Encoding"));
-       }
-
-       protected String getResponseContentType() {
-               return ifNullString(metadata.get("Content-Type"));
-       }
-
-       public List<String> getInLinks() {
-               return inLinks;
-       }
-
-       public void setInLinks(List<String> inLinks) {
-               this.inLinks = inLinks;
-       }
-
-       protected String getResponseDate() {
-               if (this.simpleDateFormat) {
-                       String timestamp = null;
-                       try {
-                               long epoch = new SimpleDateFormat("EEE, dd MMM 
yyyy HH:mm:ss z").parse(ifNullString(metadata.get("Date"))).getTime();
-                               timestamp = String.valueOf(epoch);
-                       } catch (ParseException pe) {
-                               LOG.warn(pe.getMessage());
-                       }
-                       return timestamp;
-               } else {
-                       return ifNullString(metadata.get("Date"));
-               }
-       }
-
-       protected String getResponseServer() {
-               return ifNullString(metadata.get("Server"));
-       }
-
-       protected String getResponseContent() {
-               return new String(content.getContent());
-       }
-
-       protected String getKey() {
-               if (this.reverseKey) {
-                       return this.reverseKeyValue;
-               }
-               else {
-                       return url;
-               }
-       }
-
-       protected String getImported() {
-               if (this.simpleDateFormat) {
-                       String timestamp = null;
-                       try {
-                               long epoch = new SimpleDateFormat("EEE, d MMM 
yyyy HH:mm:ss z").parse(ifNullString(metadata.get("Date"))).getTime();
-                               timestamp = String.valueOf(epoch);
-                       } catch (ParseException pe) {
-                               LOG.warn(pe.getMessage());
-                       }
-                       return timestamp;
-               } else {
-                       return ifNullString(metadata.get("Date"));
-               }
-       }
-
-       private static String ifNullString(String value) {
-               return (value != null) ? value : "";
-       }
-
-       private void startHeaders(String key, boolean nested, boolean newline) 
throws IOException {
-               if (this.jsonArray) {
-                       startArray(key, nested, newline);
-               }
-               else {
-                       startObject(key);
-               }
-       }
-
-       private void closeHeaders(String key, boolean nested, boolean newline) 
throws IOException {
-               if (this.jsonArray) {
-                       closeArray(key, nested, newline);
-               }
-               else {
-                       closeObject(key);
-               }
-       }
-
-       private void writeKeyValueWrapper(String key, String value) throws 
IOException {
-               if (this.jsonArray) {
-                       startArray(null, true, false);
-                       writeArrayValue(key);
-                       writeArrayValue(value);
-                       closeArray(null, true, false);
-               }
-               else {
-                       writeKeyValue(key, value);
-               }
-       }
-
-       @Override
-       public void close() {}
+  protected static final Logger LOG = 
LoggerFactory.getLogger(AbstractCommonCrawlFormat.class.getName());
+
+  protected String url;
+
+  protected Content content;
+
+  protected Metadata metadata;
+
+  protected Configuration conf;
+
+  protected String keyPrefix;
+
+  protected boolean simpleDateFormat;
+
+  protected boolean jsonArray;
+
+  protected boolean reverseKey;
+
+  protected String reverseKeyValue;
+
+  protected List<String> inLinks;
+
+  public AbstractCommonCrawlFormat(String url, Content content, Metadata 
metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException 
{
+    this.url = url;
+    this.content = content;
+    this.metadata = metadata;
+    this.conf = nutchConf;
+
+    this.keyPrefix = config.getKeyPrefix();
+    this.simpleDateFormat = config.getSimpleDateFormat();
+    this.jsonArray = config.getJsonArray();
+    this.reverseKey = config.getReverseKey();
+    this.reverseKeyValue = config.getReverseKeyValue();
+  }
+
+  public String getJsonData(String url, Content content, Metadata metadata)
+      throws IOException {
+    this.url = url;
+    this.content = content;
+    this.metadata = metadata;
+
+    return this.getJsonData();
+  }
+
+  public String getJsonData(String url, Content content, Metadata metadata,
+      ParseData parseData) throws IOException {
+
+    // override of this is required in the actual formats
+    throw new NotImplementedException();
+  }
+
+  @Override
+  public String getJsonData() throws IOException {
+    try {
+      startObject(null);
+
+      // url
+      writeKeyValue("url", getUrl());
+
+      // timestamp
+      writeKeyValue("timestamp", getTimestamp());
+
+      // request
+      startObject("request");
+      writeKeyValue("method", getMethod());
+      startObject("client");
+      writeKeyValue("hostname", getRequestHostName());
+      writeKeyValue("address", getRequestHostAddress());
+      writeKeyValue("software", getRequestSoftware());
+      writeKeyValue("robots", getRequestRobots());
+      startObject("contact");
+      writeKeyValue("name", getRequestContactName());
+      writeKeyValue("email", getRequestContactEmail());
+      closeObject("contact");
+      closeObject("client");
+      // start request headers
+      startHeaders("headers", false, true);
+      writeKeyValueWrapper("Accept", getRequestAccept());
+      writeKeyValueWrapper("Accept-Encoding", getRequestAcceptEncoding());
+      writeKeyValueWrapper("Accept-Language", getRequestAcceptLanguage());
+      writeKeyValueWrapper("User-Agent", getRequestUserAgent());
+      //closeObject("headers");
+      closeHeaders("headers", false, true);
+      writeKeyNull("body");
+      closeObject("request");
+
+      // response
+      startObject("response");
+      writeKeyValue("status", getResponseStatus());
+      startObject("server");
+      writeKeyValue("hostname", getResponseHostName());
+      writeKeyValue("address", getResponseAddress());
+      closeObject("server");
+      // start response headers
+      startHeaders("headers", false, true);
+      writeKeyValueWrapper("Content-Encoding", getResponseContentEncoding());
+      writeKeyValueWrapper("Content-Type", getResponseContentType());
+      writeKeyValueWrapper("Date", getResponseDate());
+      writeKeyValueWrapper("Server", getResponseServer());
+      for (String name : metadata.names()) {
+        if (name.equalsIgnoreCase("Content-Encoding") || 
name.equalsIgnoreCase("Content-Type") || name.equalsIgnoreCase("Date") || 
name.equalsIgnoreCase("Server")) {
+          continue;
+        }
+        writeKeyValueWrapper(name, metadata.get(name));
+      }
+      closeHeaders("headers", false, true);
+      writeKeyValue("body", getResponseContent());
+      closeObject("response");
+
+      // key
+      if (!this.keyPrefix.isEmpty()) {
+        this.keyPrefix += "-";
+      }
+      writeKeyValue("key", this.keyPrefix + getKey());
+
+      // imported
+      writeKeyValue("imported", getImported());
+
+      if (getInLinks() != null){
+        startArray("inlinks", false, true);
+        for (String link : getInLinks()) {
+          writeArrayValue(link);
+        }
+        closeArray("inlinks", false, true);
+      }
+      closeObject(null);
+
+      return generateJson();
+
+    } catch (IOException ioe) {
+      LOG.warn("Error in processing file " + url + ": " + ioe.getMessage());
+      throw new IOException("Error in generating JSON:" + ioe.getMessage());
+    }
+  }
+
+  // abstract methods
+
+  protected abstract void writeKeyValue(String key, String value) throws 
IOException;
+
+  protected abstract void writeKeyNull(String key) throws IOException;
+
+  protected abstract void startArray(String key, boolean nested, boolean 
newline) throws IOException;
+
+  protected abstract void closeArray(String key, boolean nested, boolean 
newline) throws IOException;
+
+  protected abstract void writeArrayValue(String value) throws IOException;
+
+  protected abstract void startObject(String key) throws IOException;
+
+  protected abstract void closeObject(String key) throws IOException;
+
+  protected abstract String generateJson() throws IOException;
+
+  // getters
+
+  protected String getUrl() {
+    try {
+      return URIUtil.encodePath(url);
+    } catch (URIException e) {
+      LOG.error("Can't encode URL " + url);
+    }
+
+    return url;
+  }
+
+  protected String getTimestamp() {
+    if (this.simpleDateFormat) {
+      String timestamp = null;
+      try {
+        long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss 
z").parse(ifNullString(metadata.get(Metadata.LAST_MODIFIED))).getTime();
+        timestamp = String.valueOf(epoch);
+      } catch (ParseException pe) {
+        LOG.warn(pe.getMessage());
+      }
+      return timestamp;
+    } else {
+      return ifNullString(metadata.get(Metadata.LAST_MODIFIED));
+    }
+  }
+
+  protected String getMethod() {
+    return new String("GET");
+  }
+
+  protected String getRequestHostName() {
+    String hostName = "";
+    try {
+      hostName = InetAddress.getLocalHost().getHostName();
+    } catch (UnknownHostException uhe) {
+
+    }
+    return hostName;
+  }
+
+  protected String getRequestHostAddress() {
+    String hostAddress = "";
+    try {
+      hostAddress = InetAddress.getLocalHost().getHostAddress();
+    } catch (UnknownHostException uhe) {
+
+    }
+    return hostAddress;
+  }
+
+  protected String getRequestSoftware() {
+    return conf.get("http.agent.version", "");
+  }
+
+  protected String getRequestRobots() {
+    return new String("CLASSIC");
+  }
+
+  protected String getRequestContactName() {
+    return conf.get("http.agent.name", "");
+  }
+
+  protected String getRequestContactEmail() {
+    return conf.get("http.agent.email", "");
+  }
+
+  protected String getRequestAccept() {
+    return conf.get("http.accept", "");
+  }
+
+  protected String getRequestAcceptEncoding() {
+    return new String(""); // TODO
+  }
+
+  protected String getRequestAcceptLanguage() {
+    return conf.get("http.accept.language", "");
+  }
+
+  protected String getRequestUserAgent() {
+    return conf.get("http.robots.agents", "");
+  }
+
+  protected String getResponseStatus() {
+    return ifNullString(metadata.get("status"));
+  }
+
+  protected String getResponseHostName() {
+    return URLUtil.getHost(url);
+  }
+
+  protected String getResponseAddress() {
+    return ifNullString(metadata.get("_ip_"));
+  }
+
+  protected String getResponseContentEncoding() {
+    return ifNullString(metadata.get("Content-Encoding"));
+  }
+
+  protected String getResponseContentType() {
+    return ifNullString(metadata.get("Content-Type"));
+  }
+
+  public List<String> getInLinks() {
+    return inLinks;
+  }
+
+  public void setInLinks(List<String> inLinks) {
+    this.inLinks = inLinks;
+  }
+
+  protected String getResponseDate() {
+    if (this.simpleDateFormat) {
+      String timestamp = null;
+      try {
+        long epoch = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 
z").parse(ifNullString(metadata.get("Date"))).getTime();
+        timestamp = String.valueOf(epoch);
+      } catch (ParseException pe) {
+        LOG.warn(pe.getMessage());
+      }
+      return timestamp;
+    } else {
+      return ifNullString(metadata.get("Date"));
+    }
+  }
+
+  protected String getResponseServer() {
+    return ifNullString(metadata.get("Server"));
+  }
+
+  protected String getResponseContent() {
+    return new String(content.getContent());
+  }
+
+  protected String getKey() {
+    if (this.reverseKey) {
+      return this.reverseKeyValue;
+    }
+    else {
+      return url;
+    }
+  }
+
+  protected String getImported() {
+    if (this.simpleDateFormat) {
+      String timestamp = null;
+      try {
+        long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss 
z").parse(ifNullString(metadata.get("Date"))).getTime();
+        timestamp = String.valueOf(epoch);
+      } catch (ParseException pe) {
+        LOG.warn(pe.getMessage());
+      }
+      return timestamp;
+    } else {
+      return ifNullString(metadata.get("Date"));
+    }
+  }
+
+  private static String ifNullString(String value) {
+    return (value != null) ? value : "";
+  }
+
+  private void startHeaders(String key, boolean nested, boolean newline) 
throws IOException {
+    if (this.jsonArray) {
+      startArray(key, nested, newline);
+    }
+    else {
+      startObject(key);
+    }
+  }
+
+  private void closeHeaders(String key, boolean nested, boolean newline) 
throws IOException {
+    if (this.jsonArray) {
+      closeArray(key, nested, newline);
+    }
+    else {
+      closeObject(key);
+    }
+  }
+
+  private void writeKeyValueWrapper(String key, String value) throws 
IOException {
+    if (this.jsonArray) {
+      startArray(null, true, false);
+      writeArrayValue(key);
+      writeArrayValue(value);
+      closeArray(null, true, false);
+    }
+    else {
+      writeKeyValue(key, value);
+    }
+  }
+
+  @Override
+  public void close() {}
 }

Reply via email to