Bugfixes for BreakIterator
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fe26d1e9 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fe26d1e9 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fe26d1e9 Branch: refs/heads/branch_4x Commit: fe26d1e91cc75babb0438d3f44e61a6b5d1e0dab Parents: d2d763c Author: Paul Irwin <[email protected]> Authored: Tue Oct 29 11:58:28 2013 -0400 Committer: Paul Irwin <[email protected]> Committed: Tue Oct 29 11:58:28 2013 -0400 ---------------------------------------------------------------------- .../BreakIteratorBoundaryScanner.cs | 50 ++++++++++---------- .../Support/BreakIterators/BreakIteratorBase.cs | 2 +- .../BreakIterators/EnglishWordBreakIterator.cs | 8 +++- 3 files changed, 32 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fe26d1e9/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs b/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs index b023452..b2da99b 100644 --- a/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs +++ b/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs @@ -1,35 +1,35 @@ -using System; +using Lucene.Net.Support; +using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace Lucene.Net.Search.VectorHighlight { - // .NET: without re-implementing BreakIterator from scratch, we can't use this type. - //public class BreakIteratorBoundaryScanner : IBoundaryScanner - //{ - // readonly BreakIterator bi; + public class BreakIteratorBoundaryScanner : IBoundaryScanner + { + readonly BreakIterator bi; - // public BreakIteratorBoundaryScanner(BreakIterator bi) - // { - // this.bi = bi; - // } + public BreakIteratorBoundaryScanner(BreakIterator bi) + { + this.bi = bi; + } - // public override int FindStartOffset(StringBuilder buffer, int start) - // { - // if (start > buffer.Length || start < 1) - // return start; - // bi.SetText(buffer.ToString().Substring(0, start)); - // bi.Last(); - // return bi.Previous(); - // } + public override int FindStartOffset(StringBuilder buffer, int start) + { + if (start > buffer.Length || start < 1) + return start; + bi.Text = buffer.ToString().Substring(0, start); + bi.Last(); + return bi.Previous(); + } - // public override int FindEndOffset(StringBuilder buffer, int start) - // { - // if (start > buffer.Length || start < 0) - // return start; - // bi.SetText(buffer.ToString().Substring(start)); - // return bi.Next() + start; - // } - //} + public override int FindEndOffset(StringBuilder buffer, int start) + { + if (start > buffer.Length || start < 0) + return start; + bi.Text = buffer.ToString().Substring(start); + return bi.Next() + start; + } + } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fe26d1e9/src/core/Support/BreakIterators/BreakIteratorBase.cs ---------------------------------------------------------------------- diff --git a/src/core/Support/BreakIterators/BreakIteratorBase.cs b/src/core/Support/BreakIterators/BreakIteratorBase.cs index a149907..09a08a3 100644 --- a/src/core/Support/BreakIterators/BreakIteratorBase.cs +++ b/src/core/Support/BreakIterators/BreakIteratorBase.cs @@ -75,7 +75,7 @@ namespace Lucene.Net.Support.BreakIterators if (_position == _text.Length - 1) return DONE; - return Following(++_position); + return Following(_position); } public override int Next(int n) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fe26d1e9/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs ---------------------------------------------------------------------- diff --git a/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs b/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs index 0fbb39f..3b26a19 100644 --- a/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs +++ b/src/core/Support/BreakIterators/EnglishWordBreakIterator.cs @@ -11,11 +11,15 @@ namespace Lucene.Net.Support.BreakIterators public override bool IsBoundary(int offset) { char c = Peek(offset); - char cplus = Peek(offset + 1); - + if (char.IsLetterOrDigit(c)) return false; + if (char.IsWhiteSpace(c)) + return true; + + char cplus = Peek(offset + 1); + if (cplus != ENDINPUT && char.IsLetterOrDigit(cplus)) return false;
