Lucene.Net.Analysis.Compound refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6dc3ac1f Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6dc3ac1f Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6dc3ac1f Branch: refs/heads/api-work Commit: 6dc3ac1fad1053c2f65856ddd15243dd57e5045b Parents: 487927c Author: Shad Storhaug <[email protected]> Authored: Fri Feb 3 01:12:23 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Fri Feb 3 01:13:43 2017 +0700 ---------------------------------------------------------------------- .../Compound/CompoundWordTokenFilterBase.cs | 45 +++++---- .../DictionaryCompoundWordTokenFilter.cs | 25 +++-- .../DictionaryCompoundWordTokenFilterFactory.cs | 9 +- .../Analysis/Compound/Hyphenation/ByteVector.cs | 2 +- .../Analysis/Compound/Hyphenation/CharVector.cs | 9 +- .../Analysis/Compound/Hyphenation/Hyphen.cs | 2 +- .../Compound/Hyphenation/Hyphenation.cs | 9 +- .../Compound/Hyphenation/HyphenationTree.cs | 66 +++++++------ .../Compound/Hyphenation/PatternConsumer.cs | 49 +++++----- .../Compound/Hyphenation/PatternParser.cs | 97 +++++++++++++++----- .../Compound/Hyphenation/TernaryTree.cs | 62 ++++++------- .../HyphenationCompoundWordTokenFilter.cs | 36 ++++---- ...HyphenationCompoundWordTokenFilterFactory.cs | 39 ++++---- 13 files changed, 251 insertions(+), 199 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs index 705ce55..0d42753 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs @@ -26,18 +26,15 @@ namespace Lucene.Net.Analysis.Compound /// <summary> /// Base class for decomposition token filters. - /// <para> - /// - /// <a name="version"></a> - /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating - /// CompoundWordTokenFilterBase: - /// <ul> - /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 - /// supplementary characters in strings and char arrays provided as compound word - /// dictionaries. - /// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets. - /// </ul> - /// </para> + /// <para/> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="CompoundWordTokenFilterBase"/>: + /// <list type="bullet"> + /// <item>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 + /// supplementary characters in strings and char arrays provided as compound word + /// dictionaries.</item> + /// <item>As of 4.4, <see cref="CompoundWordTokenFilterBase"/> doesn't update offsets.</item> + /// </list> /// </summary> public abstract class CompoundWordTokenFilterBase : TokenFilter { @@ -56,16 +53,16 @@ namespace Lucene.Net.Analysis.Compound /// </summary> public const int DEFAULT_MAX_SUBWORD_SIZE = 15; - protected internal readonly LuceneVersion m_matchVersion; - protected internal readonly CharArraySet m_dictionary; - protected internal readonly LinkedList<CompoundToken> m_tokens; - protected internal readonly int m_minWordSize; - protected internal readonly int m_minSubwordSize; - protected internal readonly int m_maxSubwordSize; - protected internal readonly bool m_onlyLongestMatch; + protected readonly LuceneVersion m_matchVersion; + protected readonly CharArraySet m_dictionary; + protected readonly LinkedList<CompoundToken> m_tokens; + protected readonly int m_minWordSize; + protected readonly int m_minSubwordSize; + protected readonly int m_maxSubwordSize; + protected readonly bool m_onlyLongestMatch; - protected internal readonly ICharTermAttribute m_termAtt; - protected internal readonly IOffsetAttribute m_offsetAtt; + protected readonly ICharTermAttribute m_termAtt; + protected readonly IOffsetAttribute m_offsetAtt; private readonly IPositionIncrementAttribute posIncAtt; private AttributeSource.State current; @@ -144,7 +141,7 @@ namespace Lucene.Net.Analysis.Compound } /// <summary> - /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list. + /// Decomposes the current <see cref="m_termAtt"/> and places <see cref="CompoundToken"/> instances in the <see cref="m_tokens"/> list. /// The original token may not be placed in the list, as it is automatically passed through this filter. /// </summary> protected abstract void Decompose(); @@ -159,7 +156,7 @@ namespace Lucene.Net.Analysis.Compound /// <summary> /// Helper class to hold decompounded token information /// </summary> - protected internal class CompoundToken + protected class CompoundToken { private readonly ICharSequence txt; private readonly int startOffset, endOffset; @@ -180,7 +177,7 @@ namespace Lucene.Net.Analysis.Compound } /// <summary> - /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary> + /// Construct the compound token based on a slice of the current <see cref="CompoundWordTokenFilterBase.m_termAtt"/>. </summary> public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length) { this.txt = outerInstance.m_termAtt.SubSequence(offset, offset + length); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs index 7221927..12ce070 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs @@ -21,27 +21,26 @@ namespace Lucene.Net.Analysis.Compound */ /// <summary> - /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages. + /// A <see cref="TokenFilter"/> that decomposes compound words found in many Germanic languages. /// <para> /// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find /// "Donaudampfschiff" even when you only enter "schiff". /// It uses a brute-force algorithm to achieve this. /// </para> /// <para> - /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating - /// CompoundWordTokenFilterBase: - /// <ul> - /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 - /// supplementary characters in strings and char arrays provided as compound word - /// dictionaries. - /// </ul> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="CompoundWordTokenFilterBase"/>: + /// <list type="bullet"> + /// <item>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 + /// supplementary characters in strings and char arrays provided as compound word + /// dictionaries.</item> + /// </list> /// </para> /// </summary> public class DictionaryCompoundWordTokenFilter : CompoundWordTokenFilterBase { - /// <summary> - /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/> + /// Creates a new <see cref="DictionaryCompoundWordTokenFilter"/> /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the @@ -49,7 +48,7 @@ namespace Lucene.Net.Analysis.Compound /// href="CompoundWordTokenFilterBase.html#version" /// >CompoundWordTokenFilterBase</a> for details. </param> /// <param name="input"> - /// the <seealso cref="TokenStream"/> to process </param> + /// the <see cref="TokenStream"/> to process </param> /// <param name="dictionary"> /// the word dictionary to match against. </param> public DictionaryCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary) @@ -62,7 +61,7 @@ namespace Lucene.Net.Analysis.Compound } /// <summary> - /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/> + /// Creates a new <see cref="DictionaryCompoundWordTokenFilter"/> /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the @@ -70,7 +69,7 @@ namespace Lucene.Net.Analysis.Compound /// href="CompoundWordTokenFilterBase.html#version" /// >CompoundWordTokenFilterBase</a> for details. </param> /// <param name="input"> - /// the <seealso cref="TokenStream"/> to process </param> + /// the <see cref="TokenStream"/> to process </param> /// <param name="dictionary"> /// the word dictionary to match against. </param> /// <param name="minWordSize"> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs index a44eb19..4dcb266 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs @@ -21,15 +21,16 @@ namespace Lucene.Net.Analysis.Compound */ /// <summary> - /// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>. - /// <pre class="prettyprint"> + /// Factory for <see cref="DictionaryCompoundWordTokenFilter"/>. + /// <code> /// <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt" /// minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/> /// </analyzer> - /// </fieldType></pre> + /// </fieldType> + /// </code> /// </summary> public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware { @@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.Compound private readonly bool onlyLongestMatch; /// <summary> - /// Creates a new DictionaryCompoundWordTokenFilterFactory </summary> + /// Creates a new <see cref="DictionaryCompoundWordTokenFilterFactory"/> </summary> public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs index 206a7c4..4df2e3a 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs @@ -27,7 +27,6 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// </summary> public class ByteVector { - /// <summary> /// Capacity increment size /// </summary> @@ -122,6 +121,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation get { return array.Length; } } + // LUCENENET NOTE: Not needed (replaced with this[]) //public virtual void Put(int index, sbyte val) //{ // array[index] = val; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs index c9f513b..2e67343 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs @@ -99,7 +99,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation public virtual object Clone() { - CharVector cv = new CharVector(array, blockSize); + CharVector cv = new CharVector((char[])array.Clone(), blockSize); cv.n = this.n; return cv; } @@ -117,8 +117,6 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// LUCENENET indexer for .NET /// </summary> - /// <param name="index"></param> - /// <returns></returns> public virtual char this[int index] { get { return array[index]; } @@ -128,9 +126,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// return number of items in array /// </summary> - public virtual int Length() + public virtual int Length { - return n; + get { return n; } } /// <summary> @@ -141,6 +139,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation get { return array.Length; } } + // LUCENENET NOTE: Not needed (replaced with this[]) //public virtual void Put(int index, char val) //{ // array[index] = val; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs index 08168a4..0fb57d9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs @@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// cases in some languages where words change spelling if they're split across /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes /// from TeX. - /// + /// <para/> /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. /// </summary> public class Hyphen http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs index ec33fd0..1cdb7d4 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs @@ -22,29 +22,28 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// This class represents a hyphenated word. - /// + /// <para/> /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. /// </summary> public class Hyphenation { - private readonly int[] hyphenPoints; /// <summary> - /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances + /// rawWord as made of alternating strings and <see cref="Hyphen"/> instances /// </summary> internal Hyphenation(int[] points) { hyphenPoints = points; } - /// <returns> the number of hyphenation points in the word </returns> + /// <summary> the number of hyphenation points in the word </summary> public virtual int Length { get { return hyphenPoints.Length; } } - /// <returns> the hyphenation points </returns> + /// <summary> the hyphenation points </summary> [WritableArray] [SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")] public virtual int[] HyphenationPoints http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs index 7e1420d..c4dfe8b 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs @@ -27,7 +27,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// This tree structure stores the hyphenation patterns in an efficient way for /// fast lookup. It provides the provides the method to hyphenate a word. - /// + /// <para/> /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. /// </summary> public class HyphenationTree : TernaryTree, IPatternConsumer @@ -35,17 +35,17 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// value space: stores the interletter values /// </summary> - protected internal ByteVector m_vspace; + protected ByteVector m_vspace; /// <summary> /// This map stores hyphenation exceptions /// </summary> - protected internal IDictionary<string, IList<object>> m_stoplist; + protected IDictionary<string, IList<object>> m_stoplist; /// <summary> /// This map stores the character classes /// </summary> - protected internal TernaryTree m_classmap; + protected TernaryTree m_classmap; /// <summary> /// Temporary map to store interletter values on pattern loading. @@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <param name="values"> a string of digits from '0' to '9' representing the /// interletter values. </param> /// <returns> the index into the vspace array where the packed values are stored. </returns> - protected internal virtual int PackValues(string values) + protected virtual int PackValues(string values) { int i, n = values.Length; int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1; @@ -94,7 +94,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return offset; } - protected internal virtual string UnpackValues(int k) + protected virtual string UnpackValues(int k) { StringBuilder buf = new StringBuilder(); byte v = m_vspace[k++]; @@ -128,6 +128,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// Read hyphenation patterns from an XML file. /// </summary> /// <param name="f"> the filename </param> + /// <param name="encoding">The character encoding to use</param> /// <exception cref="IOException"> In case the parsing fails </exception> public virtual void LoadPatterns(string filename, Encoding encoding) { @@ -138,7 +139,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Read hyphenation patterns from an XML file. /// </summary> - /// <param name="f"> the filename </param> + /// <param name="f"> a <see cref="FileInfo"/> object representing the file </param> /// <exception cref="IOException"> In case the parsing fails </exception> public virtual void LoadPatterns(FileInfo f) { @@ -148,7 +149,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Read hyphenation patterns from an XML file. /// </summary> - /// <param name="f"> the filename </param> + /// <param name="f"> a <see cref="FileInfo"/> object representing the file </param> + /// <param name="encoding">The character encoding to use</param> /// <exception cref="IOException"> In case the parsing fails </exception> public virtual void LoadPatterns(FileInfo f, Encoding encoding) { @@ -159,7 +161,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Read hyphenation patterns from an XML file. /// </summary> - /// <param name="source"> the InputSource for the file </param> + /// <param name="source"> <see cref="Stream"/> input source for the file </param> /// <exception cref="IOException"> In case the parsing fails </exception> public virtual void LoadPatterns(Stream source) { @@ -169,7 +171,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Read hyphenation patterns from an XML file. /// </summary> - /// <param name="source"> the InputSource for the file </param> + /// <param name="source"> <see cref="Stream"/> input source for the file </param> + /// <param name="encoding">The character encoding to use</param> /// <exception cref="IOException"> In case the parsing fails </exception> public virtual void LoadPatterns(Stream source, Encoding encoding) { @@ -190,6 +193,11 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation } } + /// <summary> + /// Read hyphenation patterns from an <see cref="XmlReader"/>. + /// </summary> + /// <param name="source"> <see cref="XmlReader"/> input source for the file </param> + /// <exception cref="IOException"> In case the parsing fails </exception> public virtual void LoadPatterns(XmlReader source) { PatternParser pp = new PatternParser(this); @@ -220,7 +228,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// String compare, returns 0 if equal or t is a substring of s /// </summary> - protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti) + protected virtual int HStrCmp(char[] s, int si, char[] t, int ti) { for (; s[si] == t[ti]; si++, ti++) { @@ -236,7 +244,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return s[si] - t[ti]; } - protected internal virtual byte[] GetValues(int k) + protected virtual byte[] GetValues(int k) { StringBuilder buf = new StringBuilder(); byte v = m_vspace[k++]; @@ -267,9 +275,10 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// interletter values. In other words, it does something like: /// </para> /// <code> - /// for(i=0; i<patterns.length; i++) { - /// if ( word.substring(index).startsWidth(patterns[i]) ) - /// update_interletter_values(patterns[i]); + /// for (i=0; i<patterns.Length; i++) + /// { + /// if (word.Substring(index).StartsWith(patterns[i])) + /// update_interletter_values(patterns[i]); /// } /// </code> /// <para> @@ -286,7 +295,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <param name="word"> null terminated word to match </param> /// <param name="index"> start index from word </param> /// <param name="il"> interletter values array to update </param> - protected internal virtual void SearchPatterns(char[] word, int index, byte[] il) + protected virtual void SearchPatterns(char[] word, int index, byte[] il) { byte[] values; int i = index; @@ -365,14 +374,14 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation } /// <summary> - /// Hyphenate word and return a Hyphenation object. + /// Hyphenate word and return a <see cref="Hyphenation"/> object. /// </summary> /// <param name="word"> the word to be hyphenated </param> /// <param name="remainCharCount"> Minimum number of characters allowed before the /// hyphenation point. </param> /// <param name="pushCharCount"> Minimum number of characters allowed after the /// hyphenation point. </param> - /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the + /// <returns> a <see cref="Hyphenation"/> object representing the /// hyphenated word or null if word is not hyphenated. </returns> public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount) { @@ -380,7 +389,12 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount); } + + /// <summary> + /// Hyphenate word and return an array of hyphenation points. + /// </summary> + /// <remarks> /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n /// may be absent, the first n is at offset, the first l is at offset + /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied @@ -392,11 +406,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) = /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset + /// iIgnoreAtBeginning - /// </summary> - - /// <summary> - /// Hyphenate word and return an array of hyphenation points. - /// </summary> + /// </remarks> /// <param name="w"> char array that contains the word </param> /// <param name="offset"> Offset to first character in word </param> /// <param name="len"> Length of word </param> @@ -404,7 +414,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// hyphenation point. </param> /// <param name="pushCharCount"> Minimum number of characters allowed after the /// hyphenation point. </param> - /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the + /// <returns> a <see cref="Hyphenation"/> object representing the /// hyphenated word or null if word is not hyphenated. </returns> public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount) { @@ -522,7 +532,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Add a character class to the tree. It is used by - /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes. + /// <see cref="PatternParser"/> as callback to add character classes. /// Character classes define the valid word characters for hyphenation. If a /// word contains a character not defined in any of the classes, it is not /// hyphenated. It also defines a way to normalize the characters in order to @@ -547,12 +557,12 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Add an exception to the tree. It is used by - /// <seealso cref="PatternParser PatternParser"/> class as callback to store the + /// <see cref="PatternParser"/> class as callback to store the /// hyphenation exceptions. /// </summary> /// <param name="word"> normalized word </param> /// <param name="hyphenatedword"> a vector of alternating strings and - /// <seealso cref="Hyphen hyphen"/> objects. </param> + /// <see cref="Hyphen"/> objects. </param> public virtual void AddException(string word, IList<object> hyphenatedword) { m_stoplist[word] = hyphenatedword; @@ -560,7 +570,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Add a pattern to the tree. Mainly, to be used by - /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to + /// <see cref="PatternParser"/> class as callback to add a pattern to /// the tree. /// </summary> /// <param name="pattern"> the hyphenation pattern </param> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs index 4929892..1d92db5 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs @@ -22,33 +22,32 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// This interface is used to connect the XML pattern file parser to the /// hyphenation tree. - /// - /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. + /// <para/> + /// This interface has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. /// </summary> public interface IPatternConsumer - { + { + /// <summary> + /// Add a character class. A character class defines characters that are + /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It + /// usually means to ignore case. + /// </summary> + /// <param name="chargroup"> character group </param> + void AddClass(string chargroup); - /// <summary> - /// Add a character class. A character class defines characters that are - /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It - /// usually means to ignore case. - /// </summary> - /// <param name="chargroup"> character group </param> - void AddClass(string chargroup); + /// <summary> + /// Add a hyphenation exception. An exception replaces the result obtained by + /// the algorithm for cases for which this fails or the user wants to provide + /// his own hyphenation. A hyphenatedword is a vector of alternating String's + /// and <see cref="Hyphen"/> instances + /// </summary> + void AddException(string word, IList<object> hyphenatedword); - /// <summary> - /// Add a hyphenation exception. An exception replaces the result obtained by - /// the algorithm for cases for which this fails or the user wants to provide - /// his own hyphenation. A hyphenatedword is a vector of alternating String's - /// and <seealso cref="Hyphen"/> instances - /// </summary> - void AddException(string word, IList<object> hyphenatedword); - - /// <summary> - /// Add hyphenation patterns. - /// </summary> - /// <param name="pattern"> the pattern </param> - /// <param name="values"> interletter values expressed as a string of digit characters. </param> - void AddPattern(string pattern, string values); - } + /// <summary> + /// Add hyphenation patterns. + /// </summary> + /// <param name="pattern"> the pattern </param> + /// <param name="values"> interletter values expressed as a string of digit characters. </param> + void AddPattern(string pattern, string values); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs index 8849cff..9b9f226 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs @@ -27,7 +27,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// A XMLReader document handler to read and parse hyphenation patterns from a XML /// file. - /// + /// <para/> /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather /// than a SAX parser. /// </summary> @@ -59,7 +59,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation hyphenChar = '-'; // default } - public PatternParser(IPatternConsumer consumer) : this() + public PatternParser(IPatternConsumer consumer) + : this() { this.consumer = consumer; } @@ -79,14 +80,26 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Parses a hyphenation pattern file. /// </summary> - /// <param name="filename"> the filename </param> + /// <param name="path">The complete file path to be read.</param> /// <exception cref="IOException"> In case of an exception while parsing </exception> - public virtual void Parse(string filename) + public virtual void Parse(string path) + { + // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in. + Parse(path, Encoding.UTF8); + } + + /// <summary> + /// Parses a hyphenation pattern file. + /// </summary> + /// <param name="path">The complete file path to be read.</param> + /// <param name="encoding">The character encoding to use</param> + /// <exception cref="IOException"> In case of an exception while parsing </exception> + public virtual void Parse(string path, Encoding encoding) { var xmlReaderSettings = GetXmlReaderSettings(); // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in. - using (var src = XmlReader.Create(filename, xmlReaderSettings)) + using (var src = XmlReader.Create(new StreamReader(path, encoding), xmlReaderSettings)) { Parse(src); } @@ -95,7 +108,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Parses a hyphenation pattern file. /// </summary> - /// <param name="file"> the pattern file </param> + /// <param name="file"> a <see cref="FileInfo"/> object representing the file </param> + /// <exception cref="IOException"> In case of an exception while parsing </exception> public virtual void Parse(FileInfo file) { Parse(file, Encoding.UTF8); @@ -104,7 +118,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Parses a hyphenation pattern file. /// </summary> - /// <param name="file"> the pattern file </param> + /// <param name="file"> a <see cref="FileInfo"/> object representing the file </param> + /// <param name="encoding">The character encoding to use</param> + /// <exception cref="IOException"> In case of an exception while parsing </exception> public virtual void Parse(FileInfo file, Encoding encoding) { var xmlReaderSettings = GetXmlReaderSettings(); @@ -118,7 +134,14 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Parses a hyphenation pattern file. /// </summary> - /// <param name="file"> the pattern file </param> + /// <param name="xmlStream"> + /// The stream containing the XML data. + /// <para/> + /// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark + /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading + /// the stream, and processing continues parsing the input as a stream of (Unicode) characters. + /// </param> + /// <exception cref="IOException"> In case of an exception while parsing </exception> public virtual void Parse(Stream xmlStream) { var xmlReaderSettings = GetXmlReaderSettings(); @@ -132,7 +155,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Parses a hyphenation pattern file. /// </summary> - /// <param name="source"> the InputSource for the file </param> + /// <param name="source"> <see cref="XmlReader"/> input source for the file </param> /// <exception cref="IOException"> In case of an exception while parsing </exception> public virtual void Parse(XmlReader source) { @@ -209,7 +232,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return result; } - protected internal virtual string ReadToken(StringBuilder chars) + protected virtual string ReadToken(StringBuilder chars) { string word; bool space = false; @@ -266,7 +289,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return null; } - protected internal static string GetPattern(string word) + protected static string GetPattern(string word) { StringBuilder pat = new StringBuilder(); int len = word.Length; @@ -280,7 +303,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return pat.ToString(); } - protected internal virtual IList<object> NormalizeException<T1>(IList<T1> ex) + protected virtual IList<object> NormalizeException<T1>(IList<T1> ex) { List<object> res = new List<object>(); for (int i = 0; i < ex.Count; i++) @@ -321,7 +344,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return res; } - protected internal virtual string GetExceptionWord<T1>(IList<T1> ex) + protected virtual string GetExceptionWord<T1>(IList<T1> ex) { StringBuilder res = new StringBuilder(); for (int i = 0; i < ex.Count; i++) @@ -342,7 +365,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation return res.ToString(); } - protected internal static string GetInterletterValues(string pat) + protected static string GetInterletterValues(string pat) { StringBuilder il = new StringBuilder(); string word = pat + "a"; // add dummy letter to serve as sentinel @@ -388,9 +411,19 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation // ContentHandler methods // - /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String, - /// java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso> - public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs) + /// <summary> + /// Receive notification of the beginning of an element. + /// <para/> + /// The Parser will invoke this method at the beginning of every element in the XML document; + /// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event + /// (even when the element is empty). All of the element's content will be reported, + /// in order, before the corresponding endElement event. + /// </summary> + /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param> + /// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param> + /// <param name="raw"></param> + /// <param name="attrs"> the attributes attached to the element. If there are no attributes, it shall be an empty Attributes object. The value of this object after startElement returns is undefined</param> + public virtual void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs) { if (local.Equals("hyphen-char")) { @@ -425,9 +458,17 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation token.Length = 0; } - /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String, - /// java.lang.String, java.lang.String) </seealso> - public void EndElement(string uri, string local, string raw) + /// <summary> + /// Receive notification of the end of an element. + /// <para/> + /// The parser will invoke this method at the end of every element in the XML document; + /// there will be a corresponding <see cref="StartElement"/> event for every + /// <see cref="EndElement"/> event (even when the element is empty). + /// </summary> + /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param> + /// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param> + /// <param name="raw"></param> + public virtual void EndElement(string uri, string local, string raw) { if (token.Length > 0) { @@ -464,8 +505,20 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation } } - /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso> - public void Characters(char[] ch, int start, int length) + /// <summary> + /// Receive notification of character data. + /// <para/> + /// The Parser will call this method to report each chunk of character data. Parsers may + /// return all contiguous character data in a single chunk, or they may split it into + /// several chunks; however, all of the characters in any single event must come from + /// the same external entity so that the Locator provides useful information. + /// <para/> + /// The application must not attempt to read from the array outside of the specified range. + /// </summary> + /// <param name="ch"></param> + /// <param name="start"></param> + /// <param name="length"></param> + public virtual void Characters(char[] ch, int start, int length) { StringBuilder chars = new StringBuilder(length); chars.Append(ch, start, length); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs index 87e9d19..82feaec 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs @@ -70,65 +70,63 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation : ICloneable #endif { - /// <summary> - /// We use 4 arrays to represent a node. I guess I should have created a proper - /// node class, but somehow Knuth's pascal code made me forget we now have a - /// portable language with virtual memory management and automatic garbage - /// collection! And now is kind of late, furthermore, if it ain't broken, don't - /// fix it. - /// </summary> + // We use 4 arrays to represent a node.I guess I should have created a proper + // node class, but somehow Knuth's pascal code made me forget we now have a + // portable language with virtual memory management and automatic garbage + // collection! And now is kind of late, furthermore, if it ain't broken, don't + // fix it. /// <summary> /// Pointer to low branch and to rest of the key when it is stored directly in /// this node, we don't have unions in java! /// </summary> - protected internal char[] m_lo; + protected char[] m_lo; /// <summary> /// Pointer to high branch. /// </summary> - protected internal char[] m_hi; + protected char[] m_hi; /// <summary> /// Pointer to equal branch and to data when this node is a string terminator. /// </summary> - protected internal char[] m_eq; + protected char[] m_eq; /// <summary> - /// <P> + /// <para> /// The character stored in this node: splitchar. Two special values are /// reserved: - /// </P> - /// <ul> - /// <li>0x0000 as string terminator</li> - /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li> - /// </ul> + /// </para> + /// <list type="bullet"> + /// <item>0x0000 as string terminator</item> + /// <item>0xFFFF to indicate that the branch starting at this node is compressed</item> + /// </list> /// <para> /// This shouldn't be a problem if we give the usual semantics to strings since /// 0xFFFF is guaranteed not to be an Unicode character. /// </para> /// </summary> - protected internal char[] m_sc; + protected char[] m_sc; /// <summary> /// This vector holds the trailing of the keys when the branch is compressed. /// </summary> - protected internal CharVector m_kv; + protected CharVector m_kv; - protected internal char m_root; + protected char m_root; - protected internal char m_freenode; + protected char m_freenode; - protected internal int m_length; // number of items in tree + protected int m_length; // number of items in tree - protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays + protected const int BLOCK_SIZE = 2048; // allocation size for arrays internal TernaryTree() { Init(); } - protected internal virtual void Init() + protected virtual void Init() { m_root = (char)0; m_freenode = (char)1; @@ -408,7 +406,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation get { return m_length; } } - public object Clone() + public virtual object Clone() { TernaryTree t = new TernaryTree(); t.m_lo = (char[])this.m_lo.Clone(); @@ -428,7 +426,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// upper halves, and so on in order to get a balanced tree. The array of keys /// is assumed to be sorted in ascending order. /// </summary> - protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n) + protected virtual void InsertBalanced(string[] k, char[] v, int offset, int n) { int m; if (n < 1) @@ -555,7 +553,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// </summary> private string curkey; - internal class Item + private class Item #if FEATURE_CLONEABLE : ICloneable #endif @@ -585,12 +583,12 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// Node stack /// </summary> - internal Stack<Item> ns; + private Stack<Item> ns; /// <summary> - /// key stack implemented with a StringBuilder + /// key stack implemented with a <see cref="StringBuilder"/> /// </summary> - internal StringBuilder ks; + private StringBuilder ks; private bool isInitialized = false; @@ -626,7 +624,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// traverse upwards /// </summary> - internal virtual int Up() + private int Up() { Item i = new Item(); int res = 0; @@ -690,7 +688,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation /// <summary> /// traverse the tree to find next key /// </summary> - internal virtual int Run() + private int Run() { if (cur == -1) { @@ -794,7 +792,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation @out.WriteLine("Number of keys = " + Convert.ToString(m_length)); @out.WriteLine("Node count = " + Convert.ToString(m_freenode)); // System.out.println("Array length = " + Integer.toString(eq.length)); - @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length())); + @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length)); /* * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 ) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs index 4608c01..83a1a46 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs @@ -24,20 +24,20 @@ namespace Lucene.Net.Analysis.Compound */ /// <summary> - /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages. + /// A <see cref="TokenFilter"/> that decomposes compound words found in many Germanic languages. /// <para> /// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find /// "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation /// grammar and a word dictionary to achieve this. /// </para> /// <para> - /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating - /// CompoundWordTokenFilterBase: - /// <ul> - /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 - /// supplementary characters in strings and char arrays provided as compound word - /// dictionaries. - /// </ul> + /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating + /// <see cref="CompoundWordTokenFilterBase"/>: + /// <list type="bullet"> + /// <item>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0 + /// supplementary characters in strings and char arrays provided as compound word + /// dictionaries.</item> + /// </list> /// </para> /// </summary> public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase @@ -45,7 +45,7 @@ namespace Lucene.Net.Analysis.Compound private readonly HyphenationTree hyphenator; /// <summary> - /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance. + /// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance. /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the @@ -66,7 +66,7 @@ namespace Lucene.Net.Analysis.Compound } /// <summary> - /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance. + /// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance. /// </summary> /// <param name="matchVersion"> /// Lucene version to enable correct Unicode 4.0 behavior in the @@ -93,16 +93,13 @@ namespace Lucene.Net.Analysis.Compound : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch) { - this.hyphenator = hyphenator; } /// <summary> - /// Create a HyphenationCompoundWordTokenFilter with no dictionary. + /// Create a <see cref="HyphenationCompoundWordTokenFilter"/> with no dictionary. /// <para> - /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean) - /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, - /// null, minWordSize, minSubwordSize, maxSubwordSize } + /// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, CharArraySet, int, int, int, bool)"/> /// </para> /// </summary> public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, @@ -114,11 +111,9 @@ namespace Lucene.Net.Analysis.Compound } /// <summary> - /// Create a HyphenationCompoundWordTokenFilter with no dictionary. + /// Create a <see cref="HyphenationCompoundWordTokenFilter"/> with no dictionary. /// <para> - /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int) - /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator, - /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE } + /// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, int, int, int)"/> /// </para> /// </summary> public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, @@ -143,6 +138,7 @@ namespace Lucene.Net.Analysis.Compound /// Create a hyphenator tree /// </summary> /// <param name="hyphenationFilename"> the filename of the XML grammar to load </param> + /// <param name="encoding">The character encoding to use</param> /// <returns> An object representing the hyphenation patterns </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding) @@ -165,6 +161,7 @@ namespace Lucene.Net.Analysis.Compound /// Create a hyphenator tree /// </summary> /// <param name="hyphenationFile"> the file of the XML grammar to load </param> + /// <param name="encoding">The character encoding to use</param> /// <returns> An object representing the hyphenation patterns </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding) @@ -187,6 +184,7 @@ namespace Lucene.Net.Analysis.Compound /// Create a hyphenator tree /// </summary> /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param> + /// <param name="encoding">The character encoding to use</param> /// <returns> An object representing the hyphenation patterns </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public static HyphenationTree GetHyphenationTree(Stream hyphenationSource, Encoding encoding) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs index 5dfec4a..75a7917 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs @@ -25,34 +25,33 @@ namespace Lucene.Net.Analysis.Compound */ /// <summary> - /// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>. - /// <para> + /// Factory for <see cref="HyphenationCompoundWordTokenFilter"/>. + /// <para/> /// This factory accepts the following parameters: - /// <ul> - /// <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. - /// See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>. - /// <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8. - /// <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary. - /// <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5. - /// <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2. - /// <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15. - /// <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword - /// to the stream. defaults to false. - /// </ul> - /// </para> + /// <list type="bullet"> + /// <item><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. + /// See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.</item> + /// <item><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.</item> + /// <item><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.</item> + /// <item><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.</item> + /// <item><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.</item> + /// <item><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.</item> + /// <item><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword + /// to the stream. defaults to false.</item> + /// </list> /// <para> - /// <pre class="prettyprint"> + /// <code> /// <fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"> /// <analyzer> /// <tokenizer class="solr.WhitespaceTokenizerFactory"/> /// <filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8" /// dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/> /// </analyzer> - /// </fieldType></pre> - /// + /// </fieldType> + /// </code> /// </para> /// </summary> - /// <seealso cref= HyphenationCompoundWordTokenFilter </seealso> + /// <seealso cref="HyphenationCompoundWordTokenFilter"/> public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware { private CharArraySet dictionary; @@ -66,13 +65,13 @@ namespace Lucene.Net.Analysis.Compound private readonly bool onlyLongestMatch; /// <summary> - /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary> + /// Creates a new <see cref="HyphenationCompoundWordTokenFilterFactory"/> </summary> public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args) { AssureMatchVersion(); dictFile = Get(args, "dictionary"); encoding = Get(args, "encoding"); - hypFile = Require(args, "hyphenator"); // LUCENENET TODO: Not sure what to do with this + hypFile = Require(args, "hyphenator"); minWordSize = GetInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE); minSubwordSize = GetInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE); maxSubwordSize = GetInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
