Author: lewismc
Date: Thu Jun 20 20:20:27 2013
New Revision: 1495161
URL: http://svn.apache.org/r1495161
Log:
format CCParseFilter
Modified:
nutch/branches/2.x/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
Modified:
nutch/branches/2.x/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java?rev=1495161&r1=1495160&r2=1495161&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
(original)
+++
nutch/branches/2.x/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
Thu Jun 20 20:20:27 2013
@@ -259,55 +259,57 @@ public class CCParseFilter implements Pa
}
private static final Collection<WebPage.Field> FIELDS = new
HashSet<WebPage.Field>();
- static {
- FIELDS.add(WebPage.Field.BASE_URL);
- FIELDS.add(WebPage.Field.METADATA);
- }
-
- private static final HashMap<String,String> WORK_TYPE_NAMES = new
HashMap<String,String>();
- static {
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage",
"video");
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage",
"image");
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
"interactive");
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software",
"software");
- WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
- }
+
+ static {
+ FIELDS.add(WebPage.Field.BASE_URL);
+ FIELDS.add(WebPage.Field.METADATA);
+ }
- private Configuration conf;
+ private static final HashMap<String,String> WORK_TYPE_NAMES = new
HashMap<String,String>();
+
+ static {
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
"interactive");
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
+ WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
+ }
- public void setConf(Configuration conf) {
- this.conf = conf;
- }
+ private Configuration conf;
- public Configuration getConf() {
- return this.conf;
- }
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ }
- @Override
- public Collection<Field> getFields() {
- return FIELDS;
- }
+ public Configuration getConf() {
+ return this.conf;
+ }
- /**
- * Adds metadata or otherwise modifies a parse of an HTML document, given
- * the DOM tree of a page.
- */
- @Override
- public Parse filter(String url, WebPage page, Parse parse,
- HTMLMetaTags metaTags, DocumentFragment doc) {
- // construct base url
- URL base;
- try {
- base = new URL(page.getBaseUrl().toString());
- // extract license metadata
- Walker.walk(doc, base, page, getConf());
- } catch (Exception e) {
- LOG.error("Error parsing " + url, e);
- return ParseStatusUtils.getEmptyParse(e, getConf());
- }
+ @Override
+ public Collection<Field> getFields() {
+ return FIELDS;
+ }
- return parse;
+ /**
+ * Adds metadata or otherwise modifies a parse of an HTML document, given
+ * the DOM tree of a page.
+ */
+ @Override
+ public Parse filter(String url, WebPage page, Parse parse,
+ HTMLMetaTags metaTags, DocumentFragment doc) {
+ // construct base url
+ URL base;
+ try {
+ base = new URL(page.getBaseUrl().toString());
+ // extract license metadata
+ Walker.walk(doc, base, page, getConf());
+ } catch (Exception e) {
+ LOG.error("Error parsing " + url, e);
+ return ParseStatusUtils.getEmptyParse(e, getConf());
}
+
+ return parse;
+ }
}