fixed errors - builds now
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc52518c Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc52518c Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc52518c Branch: refs/heads/branch_4x Commit: fc52518cef229f535ec4a33e191f810cbbd2b983 Parents: d6717ed Author: James Blair <[email protected]> Authored: Tue Nov 5 10:33:19 2013 -0500 Committer: James Blair <[email protected]> Committed: Tue Nov 5 10:33:19 2013 -0500 ---------------------------------------------------------------------- .../Analyzers/AR/ArabicNormalizationFilter.cs | 7 ++- .../Analyzers/Charfilter/BaseCharFilter.cs | 52 ++++++++++---------- .../Analyzers/Charfilter/HTMLStripCharfilter.cs | 46 +++++++++-------- .../Analyzers/Charfilter/MappingCharFilter.cs | 20 +++----- .../Analyzers/Charfilter/NormalizeCharMap.cs | 9 ++-- .../Analyzers/Miscellaneous/PatternAnalyzer.cs | 12 ++--- src/core/Support/Character.cs | 19 +++++++ 7 files changed, 95 insertions(+), 70 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/contrib/Analyzers/AR/ArabicNormalizationFilter.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/AR/ArabicNormalizationFilter.cs b/src/contrib/Analyzers/AR/ArabicNormalizationFilter.cs index 528b4a4..a4a196c 100644 --- a/src/contrib/Analyzers/AR/ArabicNormalizationFilter.cs +++ b/src/contrib/Analyzers/AR/ArabicNormalizationFilter.cs @@ -22,9 +22,12 @@ namespace Lucene.Net.Analysis.AR public class ArabicNormalizationFilter : TokenFilter { private readonly ArabicNormalizer _normalizer = new ArabicNormalizer(); - private readonly CharTermAttribute _termAtt = AddAttribute<CharTermAttribute>(); + private readonly CharTermAttribute _termAtt; - public ArabicNormalizationFilter(TokenStream input) : base(input) { } + public ArabicNormalizationFilter(TokenStream input) : base(input) + { + _termAtt = AddAttribute<CharTermAttribute>(); + } public override bool IncrementToken() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/contrib/Analyzers/Charfilter/BaseCharFilter.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/Charfilter/BaseCharFilter.cs b/src/contrib/Analyzers/Charfilter/BaseCharFilter.cs index fb21282..24c9550 100644 --- a/src/contrib/Analyzers/Charfilter/BaseCharFilter.cs +++ b/src/contrib/Analyzers/Charfilter/BaseCharFilter.cs @@ -7,9 +7,9 @@ namespace Lucene.Net.Analysis.Charfilter { public abstract class BaseCharFilter : CharFilter { - private int[] _offsets; - private int[] _diffs; - private int _size; + private int[] offsets; + private int[] diffs; + private int size; protected BaseCharFilter(StreamReader input) : base(input) @@ -18,15 +18,15 @@ namespace Lucene.Net.Analysis.Charfilter protected override int Correct(int currentOff) { - if (_offsets == null || currentOff < _offsets[0]) + if (offsets == null || currentOff < offsets[0]) { return currentOff; } - var hi = _size - 1; - if (currentOff >= _offsets[hi]) + var hi = size - 1; + if (currentOff >= offsets[hi]) { - return currentOff + _diffs[hi]; + return currentOff + diffs[hi]; } var lo = 0; @@ -35,50 +35,50 @@ namespace Lucene.Net.Analysis.Charfilter while (hi >= lo) { mid = Number.URShift((lo + hi), 1); - if (currentOff < _offsets[mid]) + if (currentOff < offsets[mid]) hi = mid - 1; - else if (currentOff > _offsets[mid]) + else if (currentOff > offsets[mid]) lo = mid + 1; else - return currentOff + _diffs[mid]; + return currentOff + diffs[mid]; } - if (currentOff < _offsets[mid]) - return mid == 0 ? currentOff : currentOff + _diffs[mid - 1]; + if (currentOff < offsets[mid]) + return mid == 0 ? currentOff : currentOff + diffs[mid - 1]; else - return currentOff + _diffs[mid]; + return currentOff + diffs[mid]; } protected int LastCumulativeDiff { - get { return _offsets == null ? 0 : _diffs[_size - 1]; } + get { return offsets == null ? 0 : diffs[size - 1]; } } protected void AddOffCorrectMap(int off, int cumulativeDiff) { - if (_offsets == null) + if (offsets == null) { - _offsets = new int[64]; - _diffs = new int[64]; + offsets = new int[64]; + diffs = new int[64]; } - else if (_size == _offsets.Length) + else if (size == offsets.Length) { - _offsets = ArrayUtil.Grow(_offsets); - _diffs = ArrayUtil.Grow(_diffs); + offsets = ArrayUtil.Grow(offsets); + diffs = ArrayUtil.Grow(diffs); } - Debug.Assert(_size == 0 || off >= _offsets[_size - 1], + Debug.Assert(size == 0 || off >= offsets[size - 1], string.Format("Offset #{0}({1}) is less than the last recorded offset {2}\n{3}\n{4}", - _size, off, _offsets[_size - 1], Arrays.ToString(_offsets), Arrays.ToString(_diffs))); + size, off, offsets[size - 1], Arrays.ToString(offsets), Arrays.ToString(diffs))); - if (_size == 0 || off != _offsets[_size - 1]) + if (size == 0 || off != offsets[size - 1]) { - _offsets[_size] = off; - _diffs[_size++] = cumulativeDiff; + offsets[size] = off; + diffs[size++] = cumulativeDiff; } else { - _diffs[_size - 1] = cumulativeDiff; + diffs[size - 1] = cumulativeDiff; } } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/contrib/Analyzers/Charfilter/HTMLStripCharfilter.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/Charfilter/HTMLStripCharfilter.cs b/src/contrib/Analyzers/Charfilter/HTMLStripCharfilter.cs index 225157a..8796e71 100644 --- a/src/contrib/Analyzers/Charfilter/HTMLStripCharfilter.cs +++ b/src/contrib/Analyzers/Charfilter/HTMLStripCharfilter.cs @@ -40,7 +40,7 @@ namespace Lucene.Net.Analysis.Charfilter private const int STYLE = 42; private const int STYLE_COMMENT = 44; - private static readonly ResourceManager ResourceManager = + private static readonly ResourceManager Resources = new ResourceManager("HTMLCharSTripFilterResources", Assembly.GetAssembly(typeof (HTMLStripCharFilter))); /// <summary> @@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Charfilter /// <summary> /// Translates characters to character classes. /// </summary> - private static readonly string ZZ_CMAP_PACKED = ResourceManager.GetString("ZZ_CMAP_PACKED"); + private static readonly string ZZ_CMAP_PACKED = Resources.GetString("ZZ_CMAP_PACKED"); /// <summary> @@ -86,7 +86,7 @@ namespace Lucene.Net.Analysis.Charfilter private static readonly int[] ZZ_ACTION = zzUnpackAction(); - private static readonly string ZZ_ACTION_PACKED_0 = ResourceManager.GetString("ZZ_ACTION_PACKED_0"); + private static readonly string ZZ_ACTION_PACKED_0 = Resources.GetString("ZZ_ACTION_PACKED_0"); /// <summary> @@ -95,7 +95,7 @@ namespace Lucene.Net.Analysis.Charfilter private static readonly int[] ZZ_ROWMAP = zzUnpackRowMap(); - private static readonly string ZZ_ROWMAP_PACKED_0 = ResourceManager.GetString("ZZ_ROWMAP_PACKED_0"); + private static readonly string ZZ_ROWMAP_PACKED_0 = Resources.GetString("ZZ_ROWMAP_PACKED_0"); /// <summary> @@ -104,33 +104,33 @@ namespace Lucene.Net.Analysis.Charfilter private static readonly int[] ZZ_TRANS = zzUnpackTrans(); - private static readonly string ZZ_TRANS_PACKED_0 = ResourceManager.GetString("ZZ_TRANS_PACKED_0"); + private static readonly string ZZ_TRANS_PACKED_0 = Resources.GetString("ZZ_TRANS_PACKED_0"); - private static readonly string ZZ_TRANS_PACKED_1 = ResourceManager.GetString("ZZ_TRANS_PACKED_1"); + private static readonly string ZZ_TRANS_PACKED_1 = Resources.GetString("ZZ_TRANS_PACKED_1"); - private static readonly string ZZ_TRANS_PACKED_2 = ResourceManager.GetString("ZZ_TRANS_PACKED_2"); + private static readonly string ZZ_TRANS_PACKED_2 = Resources.GetString("ZZ_TRANS_PACKED_2"); - private static readonly string ZZ_TRANS_PACKED_3 = ResourceManager.GetString("ZZ_TRANS_PACKED_3"); + private static readonly string ZZ_TRANS_PACKED_3 = Resources.GetString("ZZ_TRANS_PACKED_3"); - private static readonly string ZZ_TRANS_PACKED_4 = ResourceManager.GetString("ZZ_TRANS_PACKED_4"); + private static readonly string ZZ_TRANS_PACKED_4 = Resources.GetString("ZZ_TRANS_PACKED_4"); - private static readonly string ZZ_TRANS_PACKED_5 = ResourceManager.GetString("ZZ_TRANS_PACKED_5"); + private static readonly string ZZ_TRANS_PACKED_5 = Resources.GetString("ZZ_TRANS_PACKED_5"); - private static readonly string ZZ_TRANS_PACKED_6 = ResourceManager.GetString("ZZ_TRANS_PACKED_6"); + private static readonly string ZZ_TRANS_PACKED_6 = Resources.GetString("ZZ_TRANS_PACKED_6"); - private static readonly string ZZ_TRANS_PACKED_7 = ResourceManager.GetString("ZZ_TRANS_PACKED_7"); + private static readonly string ZZ_TRANS_PACKED_7 = Resources.GetString("ZZ_TRANS_PACKED_7"); - private static readonly string ZZ_TRANS_PACKED_8 = ResourceManager.GetString("ZZ_TRANS_PACKED_8"); + private static readonly string ZZ_TRANS_PACKED_8 = Resources.GetString("ZZ_TRANS_PACKED_8"); - private static readonly string ZZ_TRANS_PACKED_9 = ResourceManager.GetString("ZZ_TRANS_PACKED_9"); + private static readonly string ZZ_TRANS_PACKED_9 = Resources.GetString("ZZ_TRANS_PACKED_9"); - private static readonly string ZZ_TRANS_PACKED_10 = ResourceManager.GetString("ZZ_TRANS_PACKED_10"); + private static readonly string ZZ_TRANS_PACKED_10 = Resources.GetString("ZZ_TRANS_PACKED_10"); - private static readonly string ZZ_TRANS_PACKED_11 = ResourceManager.GetString("ZZ_TRANS_PACKED_11"); + private static readonly string ZZ_TRANS_PACKED_11 = Resources.GetString("ZZ_TRANS_PACKED_11"); - private static readonly string ZZ_TRANS_PACKED_12 = ResourceManager.GetString("ZZ_TRANS_PACKED_12"); + private static readonly string ZZ_TRANS_PACKED_12 = Resources.GetString("ZZ_TRANS_PACKED_12"); - private static readonly string ZZ_TRANS_PACKED_13 = ResourceManager.GetString("ZZ_TRANS_PACKED_13"); + private static readonly string ZZ_TRANS_PACKED_13 = Resources.GetString("ZZ_TRANS_PACKED_13"); /* error codes */ private static readonly int ZZ_UNKNOWN_ERROR = 0; @@ -152,7 +152,7 @@ namespace Lucene.Net.Analysis.Charfilter private static readonly int[] ZZ_ATTRIBUTE = zzUnpackAttribute(); - private static readonly string ZZ_ATTRIBUTE_PACKED_0 = ResourceManager.GetString("ZZ_ATTRIBUTE_PACKED_0"); + private static readonly string ZZ_ATTRIBUTE_PACKED_0 = Resources.GetString("ZZ_ATTRIBUTE_PACKED_0"); /* user code: */ private static readonly IDictionary<String, String> upperCaseVariantsAccepted = new HashMap<String, String>(); @@ -179,7 +179,7 @@ namespace Lucene.Net.Analysis.Charfilter private int eofReturnValue; private int inputStart; private int outputCharCount; - private TextSegment outputSegment = inputSegment; + private TextSegment outputSegment; private int previousRestoreState; private int restoreState; private int yychar; @@ -312,6 +312,7 @@ namespace Lucene.Net.Analysis.Charfilter : base(source) { zzReader = source; + outputSegment = inputSegment; } @@ -323,10 +324,13 @@ namespace Lucene.Net.Analysis.Charfilter * will not be filtered out. */ - public HTMLStripCharFilter(StreamReader source, ISet<String> escapedTags) + public HTMLStripCharFilter(StreamReader source, ISet<string> escapedTags) : base(source) { zzReader = source; + + outputSegment = inputSegment; + if (null != escapedTags) { foreach (string tag in escapedTags) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/contrib/Analyzers/Charfilter/MappingCharFilter.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/Charfilter/MappingCharFilter.cs b/src/contrib/Analyzers/Charfilter/MappingCharFilter.cs index 6df530f..a870d8c 100644 --- a/src/contrib/Analyzers/Charfilter/MappingCharFilter.cs +++ b/src/contrib/Analyzers/Charfilter/MappingCharFilter.cs @@ -1,12 +1,8 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Diagnostics; using System.IO; -using System.Linq; -using System.Text; using Lucene.Net.Analysis.Support; using Lucene.Net.Analysis.Util; -using Lucene.Net.Support; using Lucene.Net.Util; using Lucene.Net.Util.Fst; @@ -19,13 +15,13 @@ namespace Lucene.Net.Analysis.Charfilter private readonly FST.BytesReader _fstReader; private readonly RollingCharBuffer _buffer = new RollingCharBuffer(); private readonly FST.Arc<CharsRef> _scratchArc = new FST.Arc<CharsRef>(); - private readonly IDictionary<Character, FST.Arc<CharsRef>> _cachedRootArcs; + private readonly IDictionary<char, FST.Arc<CharsRef>> _cachedRootArcs; private CharsRef _replacement; private int _replacementPointer; private int _inputOff; - public MappingCharFilter(NormalizeCharMap normMap, TextReader input) + public MappingCharFilter(NormalizeCharMap normMap, StreamReader input) : base(input) { _buffer.Reset(input); @@ -63,10 +59,10 @@ namespace Lucene.Net.Analysis.Charfilter var lastMatchLen = -1; CharsRef lastMatch = null; - var firstCH = _buffer[_inputOff]; + var firstCH = _buffer.Get(_inputOff); if (firstCH != -1) { - var arc = _cachedRootArcs[Character.ValueOf((char) firstCH)]; + var arc = _cachedRootArcs[(char) firstCH]; if (arc != null) { if (!FST<CharsRef>.TargetHasArcs(arc)) @@ -94,7 +90,7 @@ namespace Lucene.Net.Analysis.Charfilter break; } - var ch = _buffer[_inputOff + lookahead]; + var ch = _buffer.Get(_inputOff + lookahead); if (ch == -1) { break; @@ -133,7 +129,7 @@ namespace Lucene.Net.Analysis.Charfilter } else { - var ret = _buffer[_inputOff]; + var ret = _buffer.Get(_inputOff); if (ret != -1) { _inputOff++; @@ -151,7 +147,7 @@ namespace Lucene.Net.Analysis.Charfilter { var c = Read(); if (c == -1) break; - cbuf[i] = (char) c; + buffer[i] = (char) c; numRead++; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/contrib/Analyzers/Charfilter/NormalizeCharMap.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/Charfilter/NormalizeCharMap.cs b/src/contrib/Analyzers/Charfilter/NormalizeCharMap.cs index 8529de0..22facfd 100644 --- a/src/contrib/Analyzers/Charfilter/NormalizeCharMap.cs +++ b/src/contrib/Analyzers/Charfilter/NormalizeCharMap.cs @@ -12,7 +12,10 @@ namespace Lucene.Net.Analysis.Charfilter { protected internal readonly FST<CharsRef> map; protected internal readonly IDictionary<char, FST.Arc<CharsRef>> cachedRootArcs = new HashMap<char, FST.Arc<CharsRef>>(); - + + public FST<CharsRef> Map { get { return map; } } + public IDictionary<char, FST.Arc<CharsRef>> CachedRootArcs { get { return cachedRootArcs; } } + private NormalizeCharMap(FST<CharsRef> map) { this.map = map; @@ -30,7 +33,7 @@ namespace Lucene.Net.Analysis.Charfilter while (true) { Debug.Assert(scratchArc.Label != FST<CharsRef>.END_LABEL); - cachedRootArcs.Add((char) scratchArc.Label, + cachedRootArcs.Add((char)scratchArc.Label, new FST.Arc<CharsRef>().CopyFrom(scratchArc)); if (scratchArc.IsLast()) { @@ -50,7 +53,7 @@ namespace Lucene.Net.Analysis.Charfilter public class Builder { private readonly IDictionary<string, string> _pendingPairs = new TreeMap<string, string>(); - + public void Add(string match, string replacement) { if (match.Length == 0) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs b/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs index 47c971b..82bd5ec 100644 --- a/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs +++ b/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs @@ -133,7 +133,7 @@ namespace Lucene.Net.Analysis.Miscellaneous private readonly Regex Regex; private readonly bool toLowerCase; - private readonly ISet<string> stopWords; + private readonly CharArraySet stopWords; private readonly Version matchVersion; @@ -156,7 +156,7 @@ namespace Lucene.Net.Analysis.Miscellaneous * or <a href="http://www.unine.ch/info/clef/">other stop words * lists </a>. */ - public PatternAnalyzer(Version matchVersion, Regex Regex, bool toLowerCase, ISet<string> stopWords) + public PatternAnalyzer(Version matchVersion, Regex Regex, bool toLowerCase, CharArraySet stopWords) { if (Regex == null) throw new ArgumentException("Regex must not be null"); @@ -251,8 +251,8 @@ namespace Lucene.Net.Analysis.Miscellaneous return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords)); } - Tokenizer tokenizer = new RegexTokenizer(reader, Regex, toLowerCase); - TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer; + var tokenizer = new RegexTokenizer(reader, Regex, toLowerCase); + var result = (stopWords != null) ? (TokenStream) new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer; return new TokenStreamComponents(tokenizer, result); } @@ -523,7 +523,7 @@ namespace Lucene.Net.Analysis.Miscellaneous { return false; } - termAtt.SetTermBuffer(text); + termAtt.SetEmpty().Append(text); offsetAtt.SetOffset(start, i); return true; } @@ -531,7 +531,7 @@ namespace Lucene.Net.Analysis.Miscellaneous public override sealed void End() { // set final offset - int finalOffset = str.Length; + int finalOffset = CorrectOffset(str.Length); this.offsetAtt.SetOffset(finalOffset, finalOffset); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc52518c/src/core/Support/Character.cs ---------------------------------------------------------------------- diff --git a/src/core/Support/Character.cs b/src/core/Support/Character.cs index 2f1277e..3575b7f 100644 --- a/src/core/Support/Character.cs +++ b/src/core/Support/Character.cs @@ -36,6 +36,15 @@ namespace Lucene.Net.Support public const int MAX_CODE_POINT = 0x10FFFF; public const int MIN_CODE_POINT = 0x000000; + public const char MAX_SURROGATE = '\uDFFF'; + public const char MIN_SURROGATE = '\uD800'; + + public const char MIN_LOW_SURROGATE = '\uDC00'; + public const char MAX_LOW_SURROGATE = '\uDFFF'; + + public const char MIN_HIGH_SURROGATE = '\uD800'; + public const char MAX_HIGH_SURROGATE = '\uDBFF'; + /// <summary> /// /// </summary> @@ -82,5 +91,15 @@ namespace Lucene.Net.Support // .NET chars are always length 1 return 1; } + + public static bool IsLowSurrogate(char ch) + { + return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; + } + + public static bool IsHighSurrogate(char ch) + { + return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; + } } }
