Author: ab Date: Wed May 9 12:15:45 2007 New Revision: 536623 URL: http://svn.apache.org/viewvc?view=rev&rev=536623 Log: Add missing javadoc and license header.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java?view=diff&rev=536623&r1=536622&r2=536623 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Wed May 9 12:15:45 2007 @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nutch.parse; import java.util.HashMap; @@ -12,6 +29,11 @@ /** * A utility class that stores result of a parse. Internally * a ParseResult stores <[EMAIL PROTECTED] Text}, [EMAIL PROTECTED] Parse}> pairs. + * <p>Parsers may return multiple results, which correspond to parts + * or other associated documents related to the original URL.</p> + * <p>There will be usually one parse result that corresponds directly + * to the original URL, and possibly many (or none) results that correspond + * to derived URLs (or sub-URLs). */ public class ParseResult implements Iterable<Map.Entry<Text, Parse>> { private Map<Text, Parse> parseMap; @@ -19,45 +41,94 @@ public static final Log LOG = LogFactory.getLog(ParseResult.class); + /** + * Create a container for parse results. + * @param originalUrl the original url from which all parse results + * have been obtained. + */ public ParseResult(String originalUrl) { parseMap = new HashMap<Text, Parse>(); this.originalUrl = originalUrl; } + /** + * Convenience method for obtaining [EMAIL PROTECTED] ParseResult} from a single + * [EMAIL PROTECTED] Parse} output. + * @param url canonical url + * @param parse single parse output + * @return result containing the single parse output + */ public static ParseResult createParseResult(String url, Parse parse) { ParseResult parseResult = new ParseResult(url); parseResult.put(new Text(url), new ParseText(parse.getText()), parse.getData()); return parseResult; } + /** + * Checks whether the result is empty. + * @return + */ public boolean isEmpty() { return parseMap.isEmpty(); } + /** + * Return the number of parse outputs (both successful and failed) + */ public int size() { return parseMap.size(); } + /** + * Retrieve a single parse output. + * @param key sub-url under which the parse output is stored. + * @return parse output corresponding to this sub-url, or null. + */ public Parse get(String key) { return get(new Text(key)); } + /** + * Retrieve a single parse output. + * @param key sub-url under which the parse output is stored. + * @return parse output corresponding to this sub-url, or null. + */ public Parse get(Text key) { return parseMap.get(key); } + /** + * Store a result of parsing. + * @param key URL or sub-url of this parse result + * @param text plain text result + * @param data corresponding parse metadata of this result + */ public void put(Text key, ParseText text, ParseData data) { put(key.toString(), text, data); } + /** + * Store a result of parsing. + * @param key URL or sub-url of this parse result + * @param text plain text result + * @param data corresponding parse metadata of this result + */ public void put(String key, ParseText text, ParseData data) { parseMap.put(new Text(key), new ParseImpl(text, data, key.equals(originalUrl))); } + /** + * Iterate over all entries in the <url, Parse> map. + */ public Iterator<Entry<Text, Parse>> iterator() { return parseMap.entrySet().iterator(); } + /** + * Remove all results where status is not successful (as determined + * by [EMAIL PROTECTED] ParseStatus#isSuccess()}). Note that effects of this operation + * cannot be reversed. + */ public void filter() { for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) { Entry<Text, Parse> entry = i.next(); ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs