Author: mikemccand
Date: Tue Aug 25 12:02:17 2009
New Revision: 807574
URL: http://svn.apache.org/viewvc?rev=807574&view=rev
Log:
improvements to CharStream/Reader/Filter & related classes' javadocs
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
URL:
http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/BaseCharFilter.java
Tue Aug 25 12:02:17 2009
@@ -21,15 +21,15 @@
import java.util.List;
/**
- * Base utility class for implementing a {...@link
- * CharFilter}. You record mappings by calling {...@link
- * #addOffCorrectMap}, and then invoke the correct method.
- * This class is not particularly efficient, eg a new class
- * instance is created for every call to {...@link
- * #addOffCorrectMap}, which is appended to a private list.
- * When retrieving a mapping, that list is linearly
- * checked.
- * @version $Id$
+ * Base utility class for implementing a {...@link CharFilter}.
+ * You subclass this, and then record mappings by calling
+ * {...@link #addOffCorrectMap}, and then invoke the correct
+ * method to correct an offset.
+ *
+ * <p><b>NOTE</b>: This class is not particularly efficient.
+ * For example, a new class instance is created for every
+ * call to {...@link #addOffCorrectMap}, which is then appended
+ * to a private list.
*/
public abstract class BaseCharFilter extends CharFilter {
@@ -41,8 +41,10 @@
}
/** Retrieve the corrected offset. Note that this method
- * is slow if you correct positions far before the most
- * recently added position. */
+ * is slow, if you correct positions far before the most
+ * recently added position, as it's a simple linear
+ * searhc backwards through all offset corrections added
+ * by {...@link #addOffCorrectMap}. */
protected int correct(int currentOff) {
if (pcmList == null || pcmList.isEmpty()) {
return currentOff;
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java
URL:
http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharReader.java Tue
Aug 25 12:02:17 2009
@@ -21,10 +21,10 @@
import java.io.Reader;
/**
- * CharReader is a Reader wrapper. It reads chars from Reader and outputs
CharStream.
- *
- * @version $Id$
- *
+ * CharReader is a Reader wrapper. It reads chars from
+ * Reader and outputs {...@link CharStream}, defining an
+ * identify fucntion {...@link #correctOffset} method that
+ * simply returns the provided offset.
*/
public final class CharReader extends CharStream {
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
URL:
http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharStream.java Tue
Aug 25 12:02:17 2009
@@ -20,17 +20,23 @@
import java.io.Reader;
/**
- * CharStream adds <a href="#correctOffset(int)">correctOffset</a>
functionality over Reader.
- *
- * @version $Id$
+ * CharStream adds <a
+ * href="#correctOffset(int)">correctOffset</a>
+ * functionality over Reader. All Tokenizers accept a
+ * CharStream as input, which enables arbitrary character
+ * based filtering before tokenization. The {...@link
+ * #correctOffset} method fixed offsets to account for
+ * removal or insertion of characters, so that the offsets
+ * reported in the tokens match the character offsets of the
+ * original Reader.
*/
public abstract class CharStream extends Reader {
/**
* Called by CharFilter(s) and Tokenizer to correct token offset.
*
- * @param currentOff current offset
- * @return corrected token offset
+ * @param currentOff offset as seen in the output
+ * @return corrected offset based on the input
*/
public abstract int correctOffset(int currentOff);
}
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
URL:
http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
(original)
+++
lucene/java/trunk/src/java/org/apache/lucene/analysis/MappingCharFilter.java
Tue Aug 25 12:02:17 2009
@@ -21,10 +21,10 @@
import java.util.LinkedList;
/**
- * {...@link CharFilter} that applies the mappings contained in
- * a {...@link NormalizeCharMap} to the character stream.
- *
- * @version $Id$
+ * Simplistic {...@link CharFilter} that applies the mappings
+ * contained in a {...@link NormalizeCharMap} to the character
+ * stream, and correcting the resulting changes to the
+ * offsets.
*/
public class MappingCharFilter extends BaseCharFilter {
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
URL:
http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java?rev=807574&r1=807573&r2=807574&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
Tue Aug 25 12:02:17 2009
@@ -23,7 +23,6 @@
/**
* Holds a map of String input to String output, to be used
* with {...@link MappingCharFilter}.
- * @version $Id$
*/
public class NormalizeCharMap {
@@ -32,6 +31,14 @@
String normStr;
int diff;
+ /** Records a replacement to be applied to the inputs
+ * stream. Whenever <code>singleMatch</code> occurs in
+ * the input, it will be replaced with
+ * <code>replacement</code>.
+ *
+ * @param singleMatch input String to be replaced
+ * @param replacement output String
+ */
public void add(String singleMatch, String replacement) {
NormalizeCharMap currMap = this;
for(int i = 0; i < singleMatch.length(); i++) {