svn commit: r1497447 - in /nutch/branches/2.x: ./ src/java/org/apache/nutch/api/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/parse/ src/java/org/apach
Author: lewismc Date: Thu Jun 27 17:04:42 2013 New Revision: 1497447 URL: http://svn.apache.org/r1497447 Log: NUTCH-1591 Incorrect conversion of ByteBuffer to String Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java nutch/branches/2.x/src/java/org/apache/nutch/crawl/MD5Signature.java nutch/branches/2.x/src/java/org/apache/nutch/crawl/SignatureComparator.java nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexUtil.java nutch/branches/2.x/src/java/org/apache/nutch/parse/ParserChecker.java nutch/branches/2.x/src/java/org/apache/nutch/storage/Host.java nutch/branches/2.x/src/java/org/apache/nutch/util/Bytes.java nutch/branches/2.x/src/java/org/apache/nutch/util/EncodingDetector.java nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java nutch/branches/2.x/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java nutch/branches/2.x/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java nutch/branches/2.x/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java nutch/branches/2.x/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java nutch/branches/2.x/src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java nutch/branches/2.x/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java nutch/branches/2.x/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java nutch/branches/2.x/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1497447r1=1497446r2=1497447view=diff == --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Thu Jun 27 17:04:42 2013 @@ -2,6 +2,8 @@ Nutch Change Log Current Development +* NUTCH-1591 Incorrect conversion of ByteBuffer to String (Jason Howes via lewismc) + * NUTCH-1571 SolrInputSplit doesn't implement Writable and crawl script doesn't pass crawlId to generate and updatedb tasks (yuanyun.cn via lewismc) * NUTCH-1126 JUnit test for urlfilter-prefix (Talat UYARER via markus) Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java?rev=1497447r1=1497446r2=1497447view=diff == --- nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java Thu Jun 27 17:04:42 2013 @@ -195,7 +195,7 @@ public class DbReader { while (iterator.hasNext()) { EntryUtf8, ByteBuffer entry = iterator.next(); simpleMeta.put(entry.getKey().toString(), - Bytes.toStringBinary(entry.getValue().array())); + Bytes.toStringBinary(entry.getValue())); } } res.put(f, simpleMeta); @@ -207,10 +207,10 @@ public class DbReader { res.put(f, ParseStatusUtils.toString(ps)); } else if (signature.equals(f)) { ByteBuffer bb = page.getSignature(); - res.put(f, StringUtil.toHexString(bb.array())); + res.put(f, StringUtil.toHexString(bb)); } else if (content.equals(f)) { ByteBuffer bb = page.getContent(); - res.put(f, Bytes.toStringBinary(bb.array())); + res.put(f, Bytes.toStringBinary(bb)); } else if (markers.equals(f)) { res.put(f, convertMap(page.getMarkers())); } else if (inlinks.equals(f)) { @@ -221,7 +221,7 @@ public class DbReader { if (val instanceof Utf8) { val = val.toString(); } else if (val instanceof ByteBuffer) { -val = Bytes.toStringBinary(((ByteBuffer)val).array()); +val = Bytes.toStringBinary((ByteBuffer)val); } res.put(f, val); } Modified:
svn commit: r1497460 - /nutch/branches/branch-2.2.1/
Author: lewismc Date: Thu Jun 27 17:19:25 2013 New Revision: 1497460 URL: http://svn.apache.org/r1497460 Log: Nutch 2.2.1 branch Added: nutch/branches/branch-2.2.1/ - copied from r1497459, nutch/branches/2.x/
svn commit: r1497469 - /nutch/branches/branch-2.2.1/pom.xml
Author: lewismc Date: Thu Jun 27 17:28:04 2013 New Revision: 1497469 URL: http://svn.apache.org/r1497469 Log: commit new pom.xml Modified: nutch/branches/branch-2.2.1/pom.xml Modified: nutch/branches/branch-2.2.1/pom.xml URL: http://svn.apache.org/viewvc/nutch/branches/branch-2.2.1/pom.xml?rev=1497469r1=1497468r2=1497469view=diff == --- nutch/branches/branch-2.2.1/pom.xml (original) +++ nutch/branches/branch-2.2.1/pom.xml Thu Jun 27 17:28:04 2013 @@ -22,7 +22,7 @@ groupIdorg.apache.nutch/groupId artifactIdnutch/artifactId packagingjar/packaging - version2.2/version + version2.2.1/version nameApache Nutch/name urlhttp://nutch.apache.org/url licenses @@ -153,7 +153,7 @@ dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-core/artifactId -version1.1.1/version +version1.2.0/version optionaltrue/optional /dependency dependency @@ -165,7 +165,7 @@ dependency groupIdorg.apache.tika/groupId artifactIdtika-core/artifactId -version1.2/version +version1.3/version scopecompile/scope /dependency dependency @@ -231,7 +231,7 @@ dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-test/artifactId -version1.1.1/version +version1.2.0/version optionaltrue/optional /dependency dependency
svn commit: r1497480 - /nutch/branches/2.x/CHANGES.txt
Author: lewismc Date: Thu Jun 27 18:01:56 2013 New Revision: 1497480 URL: http://svn.apache.org/r1497480 Log: update for release report Modified: nutch/branches/2.x/CHANGES.txt Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1497480r1=1497479r2=1497480view=diff == --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Thu Jun 27 18:01:56 2013 @@ -5,6 +5,7 @@ Current Development NUTCH- Release 2.2.1 - 06/27/2013 (mm/dd/) +Release Report - http://s.apache.org/PGa * NUTCH-1591 Incorrect conversion of ByteBuffer to String (Jason Howes via lewismc)
svn commit: r1497557 - in /nutch/trunk: ./ conf/ src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/ src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/
Author: snagel Date: Thu Jun 27 20:16:22 2013 New Revision: 1497557 URL: http://svn.apache.org/r1497557 Log: NUTCH-1580 index-static returns object instead of value for index.static Modified: nutch/trunk/CHANGES.txt nutch/trunk/conf/nutch-default.xml nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1497557r1=1497556r2=1497557view=diff == --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Jun 27 20:16:22 2013 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Development Trunk +* NUTCH-1580 index-static returns object instead of value for index.static (Antoinette, lewismc, snagel) + * NUTCH-1126 JUnit test for urlfilter-prefix (Talat UYARER via markus) Apache Nutch 1.7 Release - 06/20/2013 (mm/dd/) Modified: nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1497557r1=1497556r2=1497557view=diff == --- nutch/trunk/conf/nutch-default.xml (original) +++ nutch/trunk/conf/nutch-default.xml Thu Jun 27 20:16:22 2013 @@ -1241,9 +1241,11 @@ nameindex.static/name value/value description - A simple plugin called at indexing that adds fields with static data. - You can specify a list of fieldname:fieldcontent per nutch job. - It can be useful when collections can't be created by urlpatterns, + Used by plugin index-static to adds fields with static data at indexing time. + You can specify a comma-separated list of fieldname:fieldcontent per Nutch job. + Each fieldcontent can have multiple values separated by space, e.g., +field1:value1.1 value1.2 value1.3,field2:value2.1 value2.2 ... + It can be useful when collections can't be created by URL patterns, like in subcollection, but on a job-basis. /description /property Modified: nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java?rev=1497557r1=1497556r2=1497557view=diff == --- nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java (original) +++ nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java Thu Jun 27 20:16:22 2013 @@ -57,7 +57,9 @@ public class StaticFieldIndexer implemen if (this.addStaticFields == true) { for (EntryString, String[] entry : this.fields.entrySet()) { -doc.add(entry.getKey(), entry.getValue()); +for (String val : entry.getValue()) { + doc.add(entry.getKey(), val); +} } } return doc; Modified: nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java?rev=1497557r1=1497556r2=1497557view=diff == --- nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java (original) +++ nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java Thu Jun 27 20:16:22 2013 @@ -100,11 +100,11 @@ public class TestStaticFieldIndexerTest assertNotNull(doc); assertFalse(test if doc is not empty, doc.getFieldNames().isEmpty()); assertEquals(test if doc has 3 fields, 3, doc.getFieldNames().size()); -assertEquals(test if doc has field1, val1, -((String[]) doc.getField(field1).getValues().get(0))[0]); -assertEquals(test if doc has field2, val2, -((String[]) doc.getField(field2).getValues().get(0))[0]); -assertEquals(test if doc has field4, val4, -((String[]) doc.getField(field4).getValues().get(0))[0]); +assertTrue(test if doc has field1, doc.getField(field1).getValues() +.contains(val1)); +assertTrue(test if doc has field2, doc.getField(field2).getValues() +.contains(val2)); +assertTrue(test if doc has field4, doc.getField(field4).getValues() +.contains(val4)); } }