SWEEP: Moved BreakIterator-dependent functionality to a common Lucene.Net.Icu 
library so we can manage the icu.net dependency from one place and not make the 
majority of the users deal with it when they don't need to


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b1fdcca3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b1fdcca3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b1fdcca3

Branch: refs/heads/master
Commit: b1fdcca3b3c3f418dfe37aafeda6f4dab75fb6d4
Parents: 63c599e
Author: Shad Storhaug <[email protected]>
Authored: Mon Apr 17 01:38:10 2017 +0700
Committer: Shad Storhaug <[email protected]>
Committed: Mon Apr 17 01:38:10 2017 +0700

----------------------------------------------------------------------
 Lucene.Net.Portable.sln                         |  20 +
 Lucene.Net.sln                                  |  52 +++
 NuGet.config                                    |   1 +
 src/IcuBreakIterator.cs                         | 394 -----------------
 .../Analysis/Th/ThaiAnalyzer.cs                 |   2 +-
 .../Lucene.Net.Analysis.Common.csproj           |   3 -
 src/Lucene.Net.Analysis.Common/project.json     |   6 +-
 .../Lucene.Net.Highlighter.csproj               |   5 +-
 .../DefaultPassageFormatter.cs                  |   4 +-
 .../PostingsHighlight/MultiTermHighlighting.cs  |   4 +-
 .../PostingsHighlight/Passage.cs                |   4 +-
 .../PostingsHighlight/PassageFormatter.cs       |   4 +-
 .../PostingsHighlight/PassageScorer.cs          |   4 +-
 .../Properties/AssemblyInfo.cs                  |   2 +
 src/Lucene.Net.Highlighter/project.json         |   6 +-
 src/Lucene.Net.Icu/Analysis/Th/stopwords.txt    | 119 ++++++
 src/Lucene.Net.Icu/Lucene.Net.Icu.csproj        | 124 ++++++
 src/Lucene.Net.Icu/Lucene.Net.Icu.project.json  |  11 +
 src/Lucene.Net.Icu/Lucene.Net.Icu.xproj         |  19 +
 src/Lucene.Net.Icu/Properties/AssemblyInfo.cs   |  31 ++
 src/Lucene.Net.Icu/Support/BreakIterator.cs     | 231 ++++++++++
 src/Lucene.Net.Icu/Support/CharacterIterator.cs |  50 +++
 src/Lucene.Net.Icu/Support/IcuBreakIterator.cs  | 394 +++++++++++++++++
 .../Support/StringCharacterIterator.cs          | 232 ++++++++++
 src/Lucene.Net.Icu/project.json                 |  63 +++
 .../Lucene.Net.Tests.Highlighter.csproj         |   3 +-
 .../TestBreakIterator.cs                        | 421 -------------------
 src/Lucene.Net.Tests.Highlighter/project.json   |   4 +-
 .../Lucene.Net.Tests.Icu.csproj                 | 121 ++++++
 .../Lucene.Net.Tests.Icu.project.json           |  12 +
 .../Lucene.Net.Tests.Icu.xproj                  |  22 +
 .../Properties/AssemblyInfo.cs                  |  21 +
 .../Search/PostingsHighlight/CambridgeMA.utf8   |   1 +
 .../Support/TestApiConsistency.cs               | 126 ++++++
 .../Support/TestExceptionSerialization.cs       |  54 +++
 .../Support/TestIcuBreakIterator.cs             | 421 +++++++++++++++++++
 src/Lucene.Net.Tests.Icu/project.json           |  67 +++
 src/Lucene.Net/Lucene.Net.csproj                |   3 -
 src/Lucene.Net/Properties/AssemblyInfo.cs       |   2 +
 src/Lucene.Net/Support/BreakIterator.cs         | 231 ----------
 src/Lucene.Net/Support/CharacterIterator.cs     |  50 ---
 .../Support/StringCharacterIterator.cs          | 232 ----------
 42 files changed, 2220 insertions(+), 1356 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/Lucene.Net.Portable.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln
index 8044aed..7f4edad 100644
--- a/Lucene.Net.Portable.sln
+++ b/Lucene.Net.Portable.sln
@@ -79,6 +79,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", 
"build", "{EFA10A77
                build\build.ps1 = build\build.ps1
        EndProjectSection
 EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Icu", 
"src\Lucene.Net.Icu\Lucene.Net.Icu.xproj", 
"{44A5341B-0F52-429D-977A-C35E10ECCADF}"
+EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Icu", 
"src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.xproj", 
"{32FD3471-E862-4055-B969-79C12A656366}"
+EndProject
 Global
        GlobalSection(SolutionConfigurationPlatforms) = preSolution
                Debug|Any CPU = Debug|Any CPU
@@ -367,6 +371,22 @@ Global
                {C708701D-4318-469F-9822-49A80386CFEA}.Release|Any CPU.Build.0 
= Release|Any CPU
                {C708701D-4318-469F-9822-49A80386CFEA}.Release|x86.ActiveCfg = 
Release|Any CPU
                {C708701D-4318-469F-9822-49A80386CFEA}.Release|x86.Build.0 = 
Release|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|x86.ActiveCfg = 
Debug|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Debug|x86.Build.0 = 
Debug|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|x86.ActiveCfg = 
Release|Any CPU
+               {44A5341B-0F52-429D-977A-C35E10ECCADF}.Release|x86.Build.0 = 
Release|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Debug|x86.ActiveCfg = 
Debug|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Debug|x86.Build.0 = 
Debug|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Release|x86.ActiveCfg = 
Release|Any CPU
+               {32FD3471-E862-4055-B969-79C12A656366}.Release|x86.Build.0 = 
Release|Any CPU
        EndGlobalSection
        GlobalSection(SolutionProperties) = preSolution
                HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index b218f0d..66e91a6 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -88,6 +88,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", 
"build", "{9811D53E
                build\build.ps1 = build\build.ps1
        EndProjectSection
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Icu", 
"src\Lucene.Net.Icu\Lucene.Net.Icu.csproj", 
"{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Icu", 
"src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.csproj", 
"{D5AA1A22-1B28-4DF6-BFDA-02519A189839}"
+EndProject
 Global
        GlobalSection(SolutionConfigurationPlatforms) = preSolution
                Debug|Any CPU = Debug|Any CPU
@@ -849,6 +853,54 @@ Global
                {FBCD6AFE-0A5C-4399-8044-99C58D2912D1}.Release35|Mixed 
Platforms.Build.0 = Release|Any CPU
                {FBCD6AFE-0A5C-4399-8044-99C58D2912D1}.Release35|x86.ActiveCfg 
= Release|Any CPU
                {FBCD6AFE-0A5C-4399-8044-99C58D2912D1}.Release35|x86.Build.0 = 
Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|x86.ActiveCfg = 
Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug|x86.Build.0 = 
Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Any 
CPU.ActiveCfg = Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Any CPU.Build.0 
= Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|x86.ActiveCfg = 
Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Debug35|x86.Build.0 = 
Debug|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|x86.ActiveCfg = 
Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release|x86.Build.0 = 
Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Any 
CPU.ActiveCfg = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Any 
CPU.Build.0 = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|x86.ActiveCfg 
= Release|Any CPU
+               {349CB7C9-7534-4E1D-9B0A-5521441AF0AE}.Release35|x86.Build.0 = 
Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|x86.ActiveCfg = 
Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug|x86.Build.0 = 
Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Any 
CPU.ActiveCfg = Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Any CPU.Build.0 
= Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Mixed 
Platforms.ActiveCfg = Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|Mixed 
Platforms.Build.0 = Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|x86.ActiveCfg = 
Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Debug35|x86.Build.0 = 
Debug|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Any CPU.Build.0 
= Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|x86.ActiveCfg = 
Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release|x86.Build.0 = 
Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Any 
CPU.ActiveCfg = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Any 
CPU.Build.0 = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Mixed 
Platforms.ActiveCfg = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|Mixed 
Platforms.Build.0 = Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|x86.ActiveCfg 
= Release|Any CPU
+               {D5AA1A22-1B28-4DF6-BFDA-02519A189839}.Release35|x86.Build.0 = 
Release|Any CPU
        EndGlobalSection
        GlobalSection(SolutionProperties) = preSolution
                HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/NuGet.config
----------------------------------------------------------------------
diff --git a/NuGet.config b/NuGet.config
index 8df6c0f..e0c6211 100644
--- a/NuGet.config
+++ b/NuGet.config
@@ -2,6 +2,7 @@
 <configuration>
   <packageSources>
     <clear />
+       <add key="icunet" value="https://www.myget.org/F/icu-dotnet/api/v2"; />
     <add key="dotnet-cat" value="https://www.myget.org/F/dotnetcat/api/v2"; />
        <add key="spatial4n" value="https://www.myget.org/F/spatial4n/api/v2"; />
     <add key="nugetorg" value="https://www.nuget.org/api/v2"; />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
deleted file mode 100644
index cc0f7cd..0000000
--- a/src/IcuBreakIterator.cs
+++ /dev/null
@@ -1,394 +0,0 @@
-#if FEATURE_BREAKITERATOR
-using Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net
-{
-    /*
-        * Licensed to the Apache Software Foundation (ASF) under one or more
-        * contributor license agreements.  See the NOTICE file distributed with
-        * this work for additional information regarding copyright ownership.
-        * The ASF licenses this file to You under the Apache License, Version 
2.0
-        * (the "License"); you may not use this file except in compliance with
-        * the License.  You may obtain a copy of the License at
-        *
-        *     http://www.apache.org/licenses/LICENSE-2.0
-        *
-        * Unless required by applicable law or agreed to in writing, software
-        * distributed under the License is distributed on an "AS IS" BASIS,
-        * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
-        * See the License for the specific language governing permissions and
-        * limitations under the License.
-        */
-
-    /// <summary>
-    /// A <see cref="BreakIterator"/> implementation that encapsulates the 
functionality
-    /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see 
cref="BreakIterator"/>
-    /// provides methods to move forward, reverse, and randomly through a set 
of text breaks
-    /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> 
enumeration.
-    /// </summary>
-    // LUCENENET specific type
-    internal class IcuBreakIterator : BreakIterator
-    {
-        private readonly Icu.Locale locale;
-        private readonly Icu.BreakIterator.UBreakIteratorType type;
-
-        private List<int> boundaries = new List<int>();
-        private int currentBoundaryIndex; // Index (not the value) of the 
current boundary in boundaries
-        private string text;
-
-        /// <summary>
-        /// The start offset for the string, if supplied by a <see 
cref="CharacterIterator"/>
-        /// </summary>
-        protected int m_start;
-
-        /// <summary>
-        /// The end offset for the string, if supplied by a <see 
cref="CharacterIterator"/>
-        /// </summary>
-        protected int m_end;
-
-        private bool enableHacks = false;
-
-        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
-            : this(type, CultureInfo.CurrentCulture)
-        {
-        }
-
-        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, 
CultureInfo locale)
-        {
-            if (locale == null)
-                throw new ArgumentNullException("locale");
-            this.locale = new Icu.Locale(locale.Name);
-            this.type = type;
-        }
-
-        
-        public virtual bool EnableHacks
-        {
-            get { return enableHacks; }
-            set { enableHacks = value; }
-        }
-
-        /// <summary>
-        /// Sets the current iteration position to the beginning of the text.
-        /// </summary>
-        /// <returns>The offset of the beginning of the text.</returns>
-        public override int First()
-        {
-            currentBoundaryIndex = 0;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Sets the current iteration position to the end of the text.
-        /// </summary>
-        /// <returns>The text's past-the-end offset.</returns>
-        public override int Last()
-        {
-            currentBoundaryIndex = boundaries.Count - 1;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Advances the iterator either forward or backward the specified 
number of steps.
-        /// Negative values move backward, and positive values move forward.  
This is
-        /// equivalent to repeatedly calling <see cref="Next()"/> or <see 
cref="Previous()"/>.
-        /// </summary>
-        /// <param name="n">The number of steps to move.  The sign indicates 
the direction
-        /// (negative is backwards, and positive is forwards).</param>
-        /// <returns>The character offset of the boundary position n 
boundaries away from
-        /// the current one.</returns>
-        public override int Next(int n)
-        {
-            int result = Current;
-            while (n > 0)
-            {
-                result = Next();
-                --n;
-            }
-            while (n < 0)
-            {
-                result = Previous();
-                ++n;
-            }
-            return result;
-        }
-
-        /// <summary>
-        /// Advances the iterator to the next boundary position.
-        /// </summary>
-        /// <returns>The position of the first boundary after this 
one.</returns>
-        public override int Next()
-        {
-            if (currentBoundaryIndex >= boundaries.Count - 1 || 
boundaries.Count == 0)
-            {
-                return DONE;
-            }
-            currentBoundaryIndex++;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Advances the iterator backwards, to the last boundary preceding 
this one.
-        /// </summary>
-        /// <returns>The position of the last boundary position preceding this 
one.</returns>
-        public override int Previous()
-        {
-            if (currentBoundaryIndex == 0 || boundaries.Count == 0)
-            {
-                return DONE;
-            }
-            currentBoundaryIndex--;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Throw <see cref="ArgumentException"/> unless begin &lt;= offset 
&lt; end.
-        /// </summary>
-        /// <param name="offset"></param>
-        private void CheckOffset(int offset)
-        {
-            if (offset < m_start || offset > m_end)
-            {
-                throw new ArgumentException("offset out of bounds");
-            }
-        }
-
-        /// <summary>
-        /// Sets the iterator to refer to the first boundary position following
-        /// the specified position.
-        /// </summary>
-        /// <param name="offset">The position from which to begin searching 
for a break position.</param>
-        /// <returns>The position of the first break after the current 
position.</returns>
-        public override int Following(int offset)
-        {
-            CheckOffset(offset);
-
-            if (boundaries.Count == 0)
-            {
-                return DONE;
-            }
-
-            int following = GetLowestIndexGreaterThan(offset);
-            if (following == -1)
-            {
-                currentBoundaryIndex = boundaries.Count - 1;
-                return DONE;
-            }
-            else
-            {
-                currentBoundaryIndex = following;
-            }
-            return ReturnCurrent();
-        }
-
-        private int GetLowestIndexGreaterThan(int offset)
-        {
-            int index = boundaries.BinarySearch(offset);
-            if (index < 0)
-            {
-                return ~index;
-            }
-            else if (index + 1 < boundaries.Count)
-            {
-                return index + 1;
-            }
-
-            return -1;
-        }
-
-        /// <summary>
-        /// Sets the iterator to refer to the last boundary position before the
-        /// specified position.
-        /// </summary>
-        /// <param name="offset">The position to begin searching for a break 
from.</param>
-        /// <returns>The position of the last boundary before the starting 
position.</returns>
-        public override int Preceding(int offset)
-        {
-            CheckOffset(offset);
-
-            if (boundaries.Count == 0)
-            {
-                return DONE;
-            }
-
-            int preceeding = GetHighestIndexLessThan(offset);
-            if (preceeding == -1)
-            {
-                currentBoundaryIndex = 0;
-                return DONE;
-            }
-            else
-            {
-                currentBoundaryIndex = preceeding;
-            }
-            return ReturnCurrent();
-        }
-
-        private int GetHighestIndexLessThan(int offset)
-        {
-            int index = boundaries.BinarySearch(offset);
-            if (index < 0)
-            {
-                return ~index - 1;
-            }
-            else
-            {
-                // NOTE: This is intentionally allowed to return -1 in the case
-                // where index == 0. This state indicates we are before the 
first boundary.
-                return index - 1;
-            }
-        }
-
-        /// <summary>
-        /// Returns the current iteration position.
-        /// </summary>
-        public override int Current
-        {
-            get { return ReturnCurrent(); }
-        }
-
-        /// <summary>
-        /// Gets the text being analyzed.
-        /// </summary>
-        public override string Text
-        {
-            get
-            {
-                return text;
-            }
-        }
-
-        /// <summary>
-        /// Set the iterator to analyze a new piece of text.  This function 
resets
-        /// the current iteration position to the beginning of the text.
-        /// </summary>
-        /// <param name="newText">The text to analyze.</param>
-        public override void SetText(string newText)
-        {
-            text = newText;
-            currentBoundaryIndex = 0;
-            m_start = 0;
-            m_end = newText.Length;
-
-            LoadBoundaries(m_start, m_end);
-        }
-
-        public override void SetText(CharacterIterator newText)
-        {
-            text = newText.GetTextAsString();
-            currentBoundaryIndex = 0;
-            m_start = newText.BeginIndex;
-            m_end = newText.EndIndex;
-
-            LoadBoundaries(m_start, m_end);
-        }
-
-        private void LoadBoundaries(int start, int end)
-        {
-            IEnumerable<Icu.Boundary> icuBoundaries;
-            string offsetText = text.Substring(start, end - start);
-
-#if !NETSTANDARD
-            try
-            {
-#endif
-                if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
-                {
-                    if (enableHacks)
-                    {
-                        // LUCENENET TODO: HACK - replacing hyphen with "a" so 
hyphenated words aren't broken
-                        offsetText = offsetText.Replace("-", "a");
-                    }
-
-                    icuBoundaries = 
Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
-                }
-                else
-                {
-                    if (enableHacks && type == 
Icu.BreakIterator.UBreakIteratorType.SENTENCE)
-                    {
-                        // LUCENENET TODO: HACK - newline character causes 
incorrect sentence breaking.
-                        offsetText = offsetText.Replace("\n", " ");
-                        // LUCENENET TODO: HACK - the ICU sentence logic 
doesn't work (in English anyway) when sentences don't
-                        // begin with capital letters.
-                        offsetText = CapitalizeFirst(offsetText);
-                    }
-
-                    icuBoundaries = Icu.BreakIterator.GetBoundaries(type, 
locale, offsetText);
-                }
-#if !NETSTANDARD
-            }
-            catch (AccessViolationException ace)
-            {
-                // LUCENENET TODO: Find a reliable way to reproduce and report 
the 
-                // AccessViolationException that happens here to the 
icu-dotnet project team
-                throw new Exception("Hit AccessViolationException: " + 
ace.ToString(), ace);
-            }
-#endif
-
-            boundaries = icuBoundaries
-                .Select(t => new[] { t.Start + start, t.End + start })
-                .SelectMany(b => b)
-                .Distinct()
-                .ToList();
-        }
-
-        /// <summary>
-        /// Returns true if the specified character offset is a text boundary.
-        /// </summary>
-        /// <param name="offset">the character offset to check.</param>
-        /// <returns><c>true</c> if "offset" is a boundary position, 
<c>false</c> otherwise.</returns>
-        public override bool IsBoundary(int offset)
-        {
-            CheckOffset(offset);
-            return boundaries.Contains(offset);
-        }
-
-        private int ReturnCurrent()
-        {
-            if (boundaries.Count > 0)
-            {
-                return currentBoundaryIndex < boundaries.Count && 
currentBoundaryIndex > -1
-                    ? boundaries[currentBoundaryIndex]
-                    : DONE;
-            }
-
-            // If there are no boundaries, we must return the start offset
-            return m_start;
-        }
-
-        /// <summary>
-        /// LUCENENET TODO: This is a temporary workaround for an issue with 
icu-dotnet
-        /// where it doesn't correctly break sentences unless they begin with 
a capital letter.
-        /// If/when ICU is fixed, this method should be deleted and the 
IcuBreakIterator 
-        /// code changed to remove calls to this method.
-        /// </summary>
-        public static string CapitalizeFirst(string s)
-        {
-            bool isNewSentence = true;
-            var result = new StringBuilder(s.Length);
-            for (int i = 0; i < s.Length; i++)
-            {
-                if (isNewSentence && char.IsLetter(s[i]))
-                {
-                    result.Append(char.ToUpper(s[i]));
-                    isNewSentence = false;
-                }
-                else
-                    result.Append(s[i]);
-
-                if (s[i] == '!' || s[i] == '?' || s[i] == '.')
-                {
-                    isNewSentence = true;
-                }
-            }
-
-            return result.ToString();
-        }
-    }
-}
-#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
index aa6e1d7..0885069 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
@@ -111,7 +111,7 @@ namespace Lucene.Net.Analysis.Th
         ///         built from a <see cref="StandardTokenizer"/> filtered with
         ///         <see cref="StandardFilter"/>, <see 
cref="LowerCaseFilter"/>, <see cref="ThaiWordFilter"/>, and
         ///         <see cref="StopFilter"/> </returns>
-        protected override TokenStreamComponents CreateComponents(string 
fieldName, TextReader reader)
+        protected internal override TokenStreamComponents 
CreateComponents(string fieldName, TextReader reader)
         {
             if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_48))
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj 
b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index fb403aa..02545b2 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -41,9 +41,6 @@
     <Reference Include="System.XML" />
   </ItemGroup>
   <ItemGroup>
-    <Compile Include="..\IcuBreakIterator.cs">
-      <Link>IcuBreakIterator.cs</Link>
-    </Compile>
     <Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
     <Compile Include="Analysis\Bg\BulgarianStemFilter.cs" />
     <Compile Include="Analysis\Bg\BulgarianStemFilterFactory.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/project.json 
b/src/Lucene.Net.Analysis.Common/project.json
index ca771a3..556a89e 100644
--- a/src/Lucene.Net.Analysis.Common/project.json
+++ b/src/Lucene.Net.Analysis.Common/project.json
@@ -26,8 +26,7 @@
         "define": [ "NETSTANDARD" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         },
         "embed": {
@@ -52,8 +51,7 @@
         "define": [ "FEATURE_CLONEABLE", "FEATURE_DTD_PROCESSING", 
"FEATURE_SERIALIZABLE" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         },
         "embed": {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj 
b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
index 31ac251..9c885d4 100644
--- a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
+++ b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
@@ -44,9 +44,6 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
-    <Compile Include="..\IcuBreakIterator.cs">
-      <Link>IcuBreakIterator.cs</Link>
-    </Compile>
     <Compile Include="Highlight\DefaultEncoder.cs" />
     <Compile Include="Highlight\GradientFormatter.cs" />
     <Compile Include="Highlight\Highlighter.cs" />
@@ -101,7 +98,7 @@
     <Compile Include="VectorHighlight\SingleFragListBuilder.cs" />
     <Compile Include="VectorHighlight\WeightedFieldFragList.cs" />
     <Compile Include="VectorHighlight\WeightedFragListBuilder.cs" />
-       <Compile Include="..\CommonAssemblyInfo.cs">
+    <Compile Include="..\CommonAssemblyInfo.cs">
       <Link>Properties\CommonAssemblyInfo.cs</Link>
     </Compile>
   </ItemGroup>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs 
b/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
index 4538d46..6a38bec 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs
@@ -1,4 +1,5 @@
-using System;
+#if FEATURE_BREAKITERATOR
+using System;
 using System.Text;
 
 namespace Lucene.Net.Search.PostingsHighlight
@@ -161,3 +162,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs 
b/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
index e5a5bcd..bd79c80 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis;
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Index;
 using Lucene.Net.Search.Spans;
@@ -344,3 +345,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs 
b/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
index 54a2446..b9a664f 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/Passage.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Util;
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Util;
 using System.Collections.Generic;
 using System.Diagnostics;
 
@@ -183,3 +184,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs 
b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
index ce367a6..770a6fa 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs
@@ -1,4 +1,5 @@
-namespace Lucene.Net.Search.PostingsHighlight
+#if FEATURE_BREAKITERATOR
+namespace Lucene.Net.Search.PostingsHighlight
 {
     /*
         * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -42,3 +43,4 @@
         public abstract object Format(Passage[] passages, string content); // 
LUCENENET TODO: Make return type generic?
     }
 }
+#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs 
b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
index af398da..de0fd45 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs
@@ -1,4 +1,5 @@
-using System;
+#if FEATURE_BREAKITERATOR
+using System;
 
 namespace Lucene.Net.Search.PostingsHighlight
 {
@@ -110,3 +111,4 @@ namespace Lucene.Net.Search.PostingsHighlight
         }
     }
 }
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs 
b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
index 6d2eedf..8969ff6 100644
--- a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
+++ b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs
@@ -24,7 +24,9 @@ using System.Runtime.InteropServices;
 // The following GUID is for the ID of the typelib if this project is exposed 
to COM
 [assembly: Guid("e9e769ea-8504-44bc-8dc9-ccf958765f8f")]
 
+[assembly: InternalsVisibleTo("Lucene.Net.Icu")]
 // for testing
 [assembly: InternalsVisibleTo("Lucene.Net.Tests.Highlighter")]
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")]
 
 // NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/project.json 
b/src/Lucene.Net.Highlighter/project.json
index 5016f93..ce4b726 100644
--- a/src/Lucene.Net.Highlighter/project.json
+++ b/src/Lucene.Net.Highlighter/project.json
@@ -25,8 +25,7 @@
         "define": [ "NETSTANDARD" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         }
       },
@@ -40,8 +39,7 @@
         "define": [ "FEATURE_SERIALIZABLE" ],
         "compile": {
           "includeFiles": [
-            "../CommonAssemblyInfo.cs",
-            "../IcuBreakIterator.cs"
+            "../CommonAssemblyInfo.cs"
           ]
         }
       }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt 
b/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
new file mode 100644
index 0000000..07f0fab
--- /dev/null
+++ b/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ทั้ง
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj 
b/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
new file mode 100644
index 0000000..267132e
--- /dev/null
+++ b/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" 
xmlns="http://schemas.microsoft.com/developer/msbuild/2003";>
+  <Import 
Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props"
 
Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')"
 />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net</RootNamespace>
+    <AssemblyName>Lucene.Net.Icu</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' 
">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 
'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup>
+    
<DefineConstants>$(DefineConstants);FEATURE_BREAKITERATOR;FEATURE_SERIALIZABLE</DefineConstants>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiAnalyzer.cs">
+      <Link>Analysis\Th\ThaiAnalyzer.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizer.cs">
+      <Link>Analysis\Th\ThaiTokenizer.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizerFactory.cs">
+      <Link>Analysis\Th\ThaiTokenizerFactory.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilter.cs">
+      <Link>Analysis\Th\ThaiWordFilter.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilterFactory.cs">
+      <Link>Analysis\Th\ThaiWordFilterFactory.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Util\CharArrayIterator.cs">
+      <Link>Analysis\Util\CharArrayIterator.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs">
+      <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs">
+      <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\PostingsHighlight\MultiTermHighlighting.cs">
+      <Link>Search\PostingsHighlight\MultiTermHighlighting.cs</Link>
+    </Compile>
+    <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\Passage.cs">
+      <Link>Search\PostingsHighlight\Passage.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageFormatter.cs">
+      <Link>Search\PostingsHighlight\PassageFormatter.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageScorer.cs">
+      <Link>Search\PostingsHighlight\PassageScorer.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\PostingsHighlight\PostingsHighlighter.cs">
+      <Link>Search\PostingsHighlight\PostingsHighlighter.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\PostingsHighlight\WholeBreakIterator.cs">
+      <Link>Search\PostingsHighlight\WholeBreakIterator.cs</Link>
+    </Compile>
+    <Compile 
Include="..\Lucene.Net.Highlighter\VectorHighlight\BreakIteratorBoundaryScanner.cs">
+      <Link>Search\VectorHighlight\BreakIteratorBoundaryScanner.cs</Link>
+    </Compile>
+    <Compile Include="Support\BreakIterator.cs" />
+    <Compile Include="Support\CharacterIterator.cs" />
+    <Compile Include="Support\IcuBreakIterator.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+    <Compile Include="Support\StringCharacterIterator.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference 
Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference 
Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj">
+      <Project>{e9e769ea-8504-44bc-8dc9-ccf958765f8f}</Project>
+      <Name>Lucene.Net.Highlighter</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Lucene.Net.Icu.project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Analysis\Th\stopwords.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets 
below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json 
b/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
new file mode 100644
index 0000000..af28fc8
--- /dev/null
+++ b/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json
@@ -0,0 +1,11 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "dependencies": {
+    "icu.net": "54.1.1-alpha"
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj 
b/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
new file mode 100644
index 0000000..dd48901
--- /dev/null
+++ b/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" 
xmlns="http://schemas.microsoft.com/developer/msbuild/2003";>
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == 
''">14.0</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == 
''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" 
Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>44a5341b-0f52-429d-977a-c35e10eccadf</ProjectGuid>
+    <RootNamespace>Lucene.Net.Search</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' 
">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" 
Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs 
b/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..3cdd9b2
--- /dev/null
+++ b/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs
@@ -0,0 +1,31 @@
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Icu")]
+[assembly: AssemblyDescription(
+    "International Components for Unicode-based features including Thai 
analyzer support, " +
+    "an international postings highlighter, and BreakIterator support for the 
vector highlighter in Lucene.Net.Highlighter " +
+    "for the Lucene.Net full-text search engine library from The Apache 
Software Foundation.")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyDefaultAlias("Lucene.Net.Icu")]
+[assembly: AssemblyCulture("")]
+
+[assembly: CLSCompliant(true)]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed 
to COM
+[assembly: Guid("349cb7c9-7534-4e1d-9b0a-5521441af0ae")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/BreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/BreakIterator.cs 
b/src/Lucene.Net.Icu/Support/BreakIterator.cs
new file mode 100644
index 0000000..ded1c9c
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/BreakIterator.cs
@@ -0,0 +1,231 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// The <code>BreakIterator</code> class implements methods for finding
+    /// the location of boundaries in text. Instances of 
<code>BreakIterator</code>
+    /// maintain a current position and scan over text
+    /// returning the index of characters where boundaries occur.
+    /// </summary>
+    public abstract class BreakIterator
+#if FEATURE_CLONEABLE
+        : ICloneable
+#endif
+    {
+        /// <summary>
+        /// Constructor. BreakIterator is stateless and has no default 
behavior.
+        /// </summary>
+        protected BreakIterator()
+        {
+        }
+
+        /// <summary>
+        /// Create a copy of this iterator
+        /// </summary>
+        /// <returns>A member-wise copy of this</returns>
+        public object Clone()
+        {
+            return MemberwiseClone();
+        }
+
+        /// <summary>
+        /// DONE is returned by Previous(), Next(), Next(int), Preceding(int)
+        /// and Following(int) when either the first or last text boundary has 
been
+        /// reached.
+        /// </summary>
+        public static readonly int DONE = -1;
+
+        /// <summary>
+        /// Returns the first boundary. The iterator's current position is set
+        /// to the first text boundary.
+        /// </summary>
+        /// <returns>The character index of the first text boundary</returns>
+        public abstract int First();
+
+        /// <summary>
+        /// Returns the last boundary. The iterator's current position is set
+        /// to the last text boundary.
+        /// </summary>
+        /// <returns>The character index of the last text boundary.</returns>
+        public abstract int Last();
+
+        /// <summary>
+        /// Returns the nth boundary from the current boundary. If either
+        /// the first or last text boundary has been reached, it returns
+        /// <see cref="BreakIterator.DONE"/> and the current position is set 
to either
+        /// the first or last text boundary depending on which one is reached. 
Otherwise,
+        /// the iterator's current position is set to the new boundary.
+        /// For example, if the iterator's current position is the mth text 
boundary
+        /// and three more boundaries exist from the current boundary to the 
last text
+        /// boundary, the Next(2) call will return m + 2. The new text 
position is set
+        /// to the (m + 2)th text boundary. A Next(4) call would return
+        /// <see cref="BreakIterator.DONE"/> and the last text boundary would 
become the
+        /// new text position.
+        /// </summary>
+        /// <param name="n">
+        /// which boundary to return.  A value of 0
+        /// does nothing.  Negative values move to previous boundaries
+        /// and positive values move to later boundaries.
+        /// </param>
+        /// <returns>
+        /// The character index of the nth boundary from the current position
+        /// or <see cref="BreakIterator.DONE"/> if either first or last text 
boundary
+        /// has been reached.
+        /// </returns>
+        public abstract int Next(int n);
+
+        /// <summary>
+        /// Returns the boundary following the current boundary. If the 
current boundary
+        /// is the last text boundary, it returns <c>BreakIterator.DONE</c> and
+        /// the iterator's current position is unchanged. Otherwise, the 
iterator's
+        /// current position is set to the boundary following the current 
boundary.
+        /// </summary>
+        /// <returns>
+        /// The character index of the next text boundary or
+        /// <see cref="BreakIterator.DONE"/> if the current boundary is the 
last text
+        /// boundary.
+        /// Equivalent to Next(1).
+        /// </returns>
+        /// <seealso cref="Next(int)"/>
+        public abstract int Next();
+
+        /// <summary>
+        /// Returns the boundary preceding the current boundary. If the 
current boundary
+        /// is the first text boundary, it returns 
<code>BreakIterator.DONE</code> and
+        /// the iterator's current position is unchanged. Otherwise, the 
iterator's
+        /// current position is set to the boundary preceding the current 
boundary.
+        /// </summary>
+        /// <returns>
+        /// The character index of the previous text boundary or
+        /// <see cref="BreakIterator.DONE"/> if the current boundary is the 
first text
+        /// boundary.
+        /// </returns>
+        public abstract int Previous();
+
+        /// <summary>
+        /// Returns the first boundary following the specified character 
offset. If the
+        /// specified offset equals to the last text boundary, it returns
+        /// <see cref="BreakIterator.DONE"/> and the iterator's current 
position is unchanged.
+        /// Otherwise, the iterator's current position is set to the returned 
boundary.
+        /// The value returned is always greater than the offset or the value
+        /// <see cref="BreakIterator.DONE"/>.
+        /// </summary>
+        /// <param name="offset">the character offset to begin 
scanning.</param>
+        /// <returns>
+        /// The first boundary after the specified offset or
+        /// <see cref="BreakIterator.DONE"/> if the last text boundary is 
passed in
+        /// as the offset.
+        /// </returns>
+        /// <exception cref="ArgumentException">
+        /// if the specified offset is less than
+        /// the first text boundary or greater than the last text boundary.
+        /// </exception>
+        public abstract int Following(int offset);
+
+        /// <summary>
+        /// Returns the last boundary preceding the specified character 
offset. If the
+        /// specified offset equals to the first text boundary, it returns
+        /// <see cref="BreakIterator.DONE"/> and the iterator's current 
position is unchanged.
+        /// Otherwise, the iterator's current position is set to the returned 
boundary.
+        /// The value returned is always less than the offset or the value
+        /// <see cref="BreakIterator.DONE"/>.
+        /// </summary>
+        /// <param name="offset">the character offset to begin 
scanning.</param>
+        /// <returns>
+        /// The last boundary before the specified offset or
+        /// <see cref="BreakIterator.DONE"/> if the first text boundary is 
passed in
+        /// as the offset.
+        /// </returns>
+        public abstract int Preceding(int offset);
+        //{
+        //    // NOTE:  This implementation is here solely because we can't 
add new
+        //    // abstract methods to an existing class.  There is almost 
ALWAYS a
+        //    // better, faster way to do this.
+        //    int pos = Following(offset);
+        //    while (pos >= offset && pos != DONE)
+        //    {
+        //        pos = Previous();
+        //    }
+        //    return pos;
+        //}
+
+        /// <summary>
+        /// Returns true if the specified character offset is a text boundary.
+        /// </summary>
+        /// <param name="offset">the character offset to check.</param>
+        /// <returns><c>true</c> if "offset" is a boundary position, 
<c>false</c> otherwise.</returns>
+        /// <exception cref="ArgumentException">
+        /// if the specified offset is less than
+        /// the first text boundary or greater than the last text boundary.
+        /// </exception>
+        public abstract bool IsBoundary(int offset);
+        //{
+        //    // NOTE: This implementation probably is wrong for most 
situations
+        //    // because it fails to take into account the possibility that a
+        //    // CharacterIterator passed to setText() may not have a begin 
offset
+        //    // of 0.  But since the abstract BreakIterator doesn't have that
+        //    // knowledge, it assumes the begin offset is 0.  If you subclass
+        //    // BreakIterator, copy the SimpleTextBoundary implementation of 
this
+        //    // function into your subclass.  [This should have been abstract 
at
+        //    // this level, but it's too late to fix that now.]
+        //    if (offset == 0)
+        //    {
+        //        return true;
+        //    }
+        //    int boundary = Following(offset - 1);
+        //    if (boundary == DONE)
+        //    {
+        //        throw new ArgumentException();
+        //    }
+        //    return boundary == offset;
+        //}
+
+        /// <summary>
+        /// Returns character index of the text boundary that was most
+        /// recently returned by Next(), Next(int), Previous(), First(), 
Last(),
+        /// Following(int) or Preceding(int). If any of these methods returns
+        /// <see cref="BreakIterator.DONE"/> because either first or last text 
boundary
+        /// has been reached, it returns the first or last text boundary 
depending on
+        /// which one is reached.
+        /// </summary>
+        /// <returns>
+        /// The text boundary returned from the above methods, first or last
+        /// text boundary.
+        /// </returns>
+        /// <seealso cref="Next()"/>
+        /// <seealso cref="Next(int)"/>
+        /// <seealso cref="Previous()"/>
+        /// <seealso cref="First()"/>
+        /// <seealso cref="Last()"/>
+        /// <seealso cref="Following(int)"/>
+        /// <seealso cref="Preceding(int)"/>
+        public abstract int Current { get; }
+
+        /// <summary>
+        /// Get the text being scanned
+        /// </summary>
+        /// <returns>the text being scanned</returns>
+        //public abstract CharacterIterator GetText();
+        public abstract string Text { get; }
+
+        /// <summary>
+        /// Set a new text string to be scanned.  The current scan
+        /// position is reset to First().
+        /// </summary>
+        /// <param name="newText">new text to scan.</param>
+        public virtual void SetText(string newText)
+        {
+            SetText(new StringCharacterIterator(newText));
+        }
+
+        /// <summary>
+        /// Set a new text string to be scanned.  The current scan
+        /// position is reset to First().
+        /// </summary>
+        /// <param name="newText">new text to scan.</param>
+        public abstract void SetText(CharacterIterator newText);
+    }
+}
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/CharacterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/CharacterIterator.cs 
b/src/Lucene.Net.Icu/Support/CharacterIterator.cs
new file mode 100644
index 0000000..0c81629
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/CharacterIterator.cs
@@ -0,0 +1,50 @@
+#if FEATURE_BREAKITERATOR
+using System;
+
+namespace Lucene.Net.Support
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public abstract class CharacterIterator
+    {
+        public static readonly char DONE = '\uFFFF';
+
+        public abstract char Current { get; }
+
+        public abstract char First();
+
+        public abstract char Last();
+
+        public abstract char Next();
+
+        public abstract char Previous();
+
+        public abstract char SetIndex(int position);
+
+        public abstract int BeginIndex { get; }
+
+        public abstract int EndIndex { get; }
+
+        public abstract int Index { get; }
+
+        public abstract object Clone();
+
+        public abstract string GetTextAsString();
+    }
+}
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs 
b/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs
new file mode 100644
index 0000000..79819ed
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/IcuBreakIterator.cs
@@ -0,0 +1,394 @@
+#if FEATURE_BREAKITERATOR
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net
+{
+    /*
+        * Licensed to the Apache Software Foundation (ASF) under one or more
+        * contributor license agreements.  See the NOTICE file distributed with
+        * this work for additional information regarding copyright ownership.
+        * The ASF licenses this file to You under the Apache License, Version 
2.0
+        * (the "License"); you may not use this file except in compliance with
+        * the License.  You may obtain a copy of the License at
+        *
+        *     http://www.apache.org/licenses/LICENSE-2.0
+        *
+        * Unless required by applicable law or agreed to in writing, software
+        * distributed under the License is distributed on an "AS IS" BASIS,
+        * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
+        * See the License for the specific language governing permissions and
+        * limitations under the License.
+        */
+
+    /// <summary>
+    /// A <see cref="BreakIterator"/> implementation that encapsulates the 
functionality
+    /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see 
cref="BreakIterator"/>
+    /// provides methods to move forward, reverse, and randomly through a set 
of text breaks
+    /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> 
enumeration.
+    /// </summary>
+    // LUCENENET specific type
+    public class IcuBreakIterator : BreakIterator
+    {
+        private readonly Icu.Locale locale;
+        private readonly Icu.BreakIterator.UBreakIteratorType type;
+
+        private List<int> boundaries = new List<int>();
+        private int currentBoundaryIndex; // Index (not the value) of the 
current boundary in boundaries
+        private string text;
+
+        /// <summary>
+        /// The start offset for the string, if supplied by a <see 
cref="CharacterIterator"/>
+        /// </summary>
+        protected int m_start;
+
+        /// <summary>
+        /// The end offset for the string, if supplied by a <see 
cref="CharacterIterator"/>
+        /// </summary>
+        protected int m_end;
+
+        private bool enableHacks = false;
+
+        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
+            : this(type, CultureInfo.CurrentCulture)
+        {
+        }
+
+        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, 
CultureInfo locale)
+        {
+            if (locale == null)
+                throw new ArgumentNullException("locale");
+            this.locale = new Icu.Locale(locale.Name);
+            this.type = type;
+        }
+
+        
+        public virtual bool EnableHacks
+        {
+            get { return enableHacks; }
+            set { enableHacks = value; }
+        }
+
+        /// <summary>
+        /// Sets the current iteration position to the beginning of the text.
+        /// </summary>
+        /// <returns>The offset of the beginning of the text.</returns>
+        public override int First()
+        {
+            currentBoundaryIndex = 0;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Sets the current iteration position to the end of the text.
+        /// </summary>
+        /// <returns>The text's past-the-end offset.</returns>
+        public override int Last()
+        {
+            currentBoundaryIndex = boundaries.Count - 1;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Advances the iterator either forward or backward the specified 
number of steps.
+        /// Negative values move backward, and positive values move forward.  
This is
+        /// equivalent to repeatedly calling <see cref="Next()"/> or <see 
cref="Previous()"/>.
+        /// </summary>
+        /// <param name="n">The number of steps to move.  The sign indicates 
the direction
+        /// (negative is backwards, and positive is forwards).</param>
+        /// <returns>The character offset of the boundary position n 
boundaries away from
+        /// the current one.</returns>
+        public override int Next(int n)
+        {
+            int result = Current;
+            while (n > 0)
+            {
+                result = Next();
+                --n;
+            }
+            while (n < 0)
+            {
+                result = Previous();
+                ++n;
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Advances the iterator to the next boundary position.
+        /// </summary>
+        /// <returns>The position of the first boundary after this 
one.</returns>
+        public override int Next()
+        {
+            if (currentBoundaryIndex >= boundaries.Count - 1 || 
boundaries.Count == 0)
+            {
+                return DONE;
+            }
+            currentBoundaryIndex++;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Advances the iterator backwards, to the last boundary preceding 
this one.
+        /// </summary>
+        /// <returns>The position of the last boundary position preceding this 
one.</returns>
+        public override int Previous()
+        {
+            if (currentBoundaryIndex == 0 || boundaries.Count == 0)
+            {
+                return DONE;
+            }
+            currentBoundaryIndex--;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Throw <see cref="ArgumentException"/> unless begin &lt;= offset 
&lt; end.
+        /// </summary>
+        /// <param name="offset"></param>
+        private void CheckOffset(int offset)
+        {
+            if (offset < m_start || offset > m_end)
+            {
+                throw new ArgumentException("offset out of bounds");
+            }
+        }
+
+        /// <summary>
+        /// Sets the iterator to refer to the first boundary position following
+        /// the specified position.
+        /// </summary>
+        /// <param name="offset">The position from which to begin searching 
for a break position.</param>
+        /// <returns>The position of the first break after the current 
position.</returns>
+        public override int Following(int offset)
+        {
+            CheckOffset(offset);
+
+            if (boundaries.Count == 0)
+            {
+                return DONE;
+            }
+
+            int following = GetLowestIndexGreaterThan(offset);
+            if (following == -1)
+            {
+                currentBoundaryIndex = boundaries.Count - 1;
+                return DONE;
+            }
+            else
+            {
+                currentBoundaryIndex = following;
+            }
+            return ReturnCurrent();
+        }
+
+        private int GetLowestIndexGreaterThan(int offset)
+        {
+            int index = boundaries.BinarySearch(offset);
+            if (index < 0)
+            {
+                return ~index;
+            }
+            else if (index + 1 < boundaries.Count)
+            {
+                return index + 1;
+            }
+
+            return -1;
+        }
+
+        /// <summary>
+        /// Sets the iterator to refer to the last boundary position before the
+        /// specified position.
+        /// </summary>
+        /// <param name="offset">The position to begin searching for a break 
from.</param>
+        /// <returns>The position of the last boundary before the starting 
position.</returns>
+        public override int Preceding(int offset)
+        {
+            CheckOffset(offset);
+
+            if (boundaries.Count == 0)
+            {
+                return DONE;
+            }
+
+            int preceeding = GetHighestIndexLessThan(offset);
+            if (preceeding == -1)
+            {
+                currentBoundaryIndex = 0;
+                return DONE;
+            }
+            else
+            {
+                currentBoundaryIndex = preceeding;
+            }
+            return ReturnCurrent();
+        }
+
+        private int GetHighestIndexLessThan(int offset)
+        {
+            int index = boundaries.BinarySearch(offset);
+            if (index < 0)
+            {
+                return ~index - 1;
+            }
+            else
+            {
+                // NOTE: This is intentionally allowed to return -1 in the case
+                // where index == 0. This state indicates we are before the 
first boundary.
+                return index - 1;
+            }
+        }
+
+        /// <summary>
+        /// Returns the current iteration position.
+        /// </summary>
+        public override int Current
+        {
+            get { return ReturnCurrent(); }
+        }
+
+        /// <summary>
+        /// Gets the text being analyzed.
+        /// </summary>
+        public override string Text
+        {
+            get
+            {
+                return text;
+            }
+        }
+
+        /// <summary>
+        /// Set the iterator to analyze a new piece of text.  This function 
resets
+        /// the current iteration position to the beginning of the text.
+        /// </summary>
+        /// <param name="newText">The text to analyze.</param>
+        public override void SetText(string newText)
+        {
+            text = newText;
+            currentBoundaryIndex = 0;
+            m_start = 0;
+            m_end = newText.Length;
+
+            LoadBoundaries(m_start, m_end);
+        }
+
+        public override void SetText(CharacterIterator newText)
+        {
+            text = newText.GetTextAsString();
+            currentBoundaryIndex = 0;
+            m_start = newText.BeginIndex;
+            m_end = newText.EndIndex;
+
+            LoadBoundaries(m_start, m_end);
+        }
+
+        private void LoadBoundaries(int start, int end)
+        {
+            IEnumerable<Icu.Boundary> icuBoundaries;
+            string offsetText = text.Substring(start, end - start);
+
+#if !NETSTANDARD
+            try
+            {
+#endif
+                if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
+                {
+                    if (enableHacks)
+                    {
+                        // LUCENENET TODO: HACK - replacing hyphen with "a" so 
hyphenated words aren't broken
+                        offsetText = offsetText.Replace("-", "a");
+                    }
+
+                    icuBoundaries = 
Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
+                }
+                else
+                {
+                    if (enableHacks && type == 
Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+                    {
+                        // LUCENENET TODO: HACK - newline character causes 
incorrect sentence breaking.
+                        offsetText = offsetText.Replace("\n", " ");
+                        // LUCENENET TODO: HACK - the ICU sentence logic 
doesn't work (in English anyway) when sentences don't
+                        // begin with capital letters.
+                        offsetText = CapitalizeFirst(offsetText);
+                    }
+
+                    icuBoundaries = Icu.BreakIterator.GetBoundaries(type, 
locale, offsetText);
+                }
+#if !NETSTANDARD
+            }
+            catch (AccessViolationException ace)
+            {
+                // LUCENENET TODO: Find a reliable way to reproduce and report 
the 
+                // AccessViolationException that happens here to the 
icu-dotnet project team
+                throw new Exception("Hit AccessViolationException: " + 
ace.ToString(), ace);
+            }
+#endif
+
+            boundaries = icuBoundaries
+                .Select(t => new[] { t.Start + start, t.End + start })
+                .SelectMany(b => b)
+                .Distinct()
+                .ToList();
+        }
+
+        /// <summary>
+        /// Returns true if the specified character offset is a text boundary.
+        /// </summary>
+        /// <param name="offset">the character offset to check.</param>
+        /// <returns><c>true</c> if "offset" is a boundary position, 
<c>false</c> otherwise.</returns>
+        public override bool IsBoundary(int offset)
+        {
+            CheckOffset(offset);
+            return boundaries.Contains(offset);
+        }
+
+        private int ReturnCurrent()
+        {
+            if (boundaries.Count > 0)
+            {
+                return currentBoundaryIndex < boundaries.Count && 
currentBoundaryIndex > -1
+                    ? boundaries[currentBoundaryIndex]
+                    : DONE;
+            }
+
+            // If there are no boundaries, we must return the start offset
+            return m_start;
+        }
+
+        /// <summary>
+        /// LUCENENET TODO: This is a temporary workaround for an issue with 
icu-dotnet
+        /// where it doesn't correctly break sentences unless they begin with 
a capital letter.
+        /// If/when ICU is fixed, this method should be deleted and the 
IcuBreakIterator 
+        /// code changed to remove calls to this method.
+        /// </summary>
+        public static string CapitalizeFirst(string s)
+        {
+            bool isNewSentence = true;
+            var result = new StringBuilder(s.Length);
+            for (int i = 0; i < s.Length; i++)
+            {
+                if (isNewSentence && char.IsLetter(s[i]))
+                {
+                    result.Append(char.ToUpper(s[i]));
+                    isNewSentence = false;
+                }
+                else
+                    result.Append(s[i]);
+
+                if (s[i] == '!' || s[i] == '?' || s[i] == '.')
+                {
+                    isNewSentence = true;
+                }
+            }
+
+            return result.ToString();
+        }
+    }
+}
+#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs 
b/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs
new file mode 100644
index 0000000..a91e49a
--- /dev/null
+++ b/src/Lucene.Net.Icu/Support/StringCharacterIterator.cs
@@ -0,0 +1,232 @@
+#if FEATURE_BREAKITERATOR
+/*
+ * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
+ *
+ * The original version of this source code and documentation
+ * is copyrighted and owned by Taligent, Inc., a wholly-owned
+ * subsidiary of IBM. These materials are provided under terms
+ * of a License Agreement between Taligent and Sun. This technology
+ * is protected by multiple US and International patents.
+ *
+ * This notice and attribution to Taligent may not be removed.
+ * Taligent is a registered trademark of Taligent, Inc.
+ *
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// <see cref="StringCharacterIterator"/> implements the
+    /// <see cref="CharacterIterator"/> protocol for a <see cref="string"/>.
+    /// The <see cref="StringCharacterIterator"/> class iterates over the
+    /// entire <see cref="string"/>.
+    /// </summary>
+    /// <seealso cref="CharacterIterator"/>
+    public class StringCharacterIterator : CharacterIterator
+    {
+        private string text;
+        private int begin;
+        private int end;
+        // invariant: begin <= pos <= end
+        private int pos;
+
+
+        public StringCharacterIterator(string text)
+            : this(text, 0)
+        {
+        }
+
+        public StringCharacterIterator(string text, int pos)
+            : this(text, 0, text.Length, pos)
+        {
+        }
+
+        public StringCharacterIterator(string text, int begin, int end, int 
pos)
+        {
+            if (text == null)
+                throw new ArgumentNullException("text");
+            this.text = text;
+
+            if (begin < 0 || begin > end || end > text.Length)
+                throw new ArgumentException("Invalid substring range");
+
+            if (pos < begin || pos > end)
+                throw new ArgumentException("Invalid position");
+
+            this.begin = begin;
+            this.end = end;
+            this.pos = pos;
+        }
+
+        public void SetText(string text)
+        {
+            if (text == null)
+                throw new ArgumentNullException("text");
+            this.text = text;
+            this.begin = 0;
+            this.end = text.Length;
+            this.pos = 0;
+        }
+
+        public override char First()
+        {
+            pos = begin;
+            return Current;
+        }
+
+        public override char Last()
+        {
+            if (end != begin)
+            {
+                pos = end - 1;
+            }
+            else
+            {
+                pos = end;
+            }
+            return Current;
+        }
+
+        public override char SetIndex(int position)
+        {
+            if (position < begin || position > end)
+                throw new ArgumentException("Invalid index");
+            pos = position;
+            return Current;
+        }
+
+        public override char Current
+        {
+            get
+            {
+                if (pos >= begin && pos < end)
+                {
+                    return text[pos];
+                }
+                else
+                {
+                    return DONE;
+                }
+            }
+        }
+
+        public override char Next()
+        {
+            if (pos < end - 1)
+            {
+                pos++;
+                return text[pos];
+            }
+            else
+            {
+                pos = end;
+                return DONE;
+            }
+        }
+
+        public override char Previous()
+        {
+            if (pos > begin)
+            {
+                pos--;
+                return text[pos];
+            }
+            else
+            {
+                return DONE;
+            }
+        }
+
+
+        public override int BeginIndex
+        {
+            get
+            {
+                return begin;
+            }
+        }
+
+        public override int EndIndex
+        {
+            get
+            {
+                return end;
+            }
+        }
+
+        public override int Index
+        {
+            get
+            {
+                return pos;
+            }
+        }
+
+        public override string GetTextAsString()
+        {
+            return text;
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+                return true;
+            if (!(obj is StringCharacterIterator))
+            return false;
+
+            StringCharacterIterator that = (StringCharacterIterator)obj;
+
+            if (GetHashCode() != that.GetHashCode())
+                return false;
+            if (!text.Equals(that.text, StringComparison.Ordinal))
+                return false;
+            if (pos != that.pos || begin != that.begin || end != that.end)
+                return false;
+            return true;
+        }
+
+        public override int GetHashCode()
+        {
+            return base.GetHashCode() ^ pos ^ begin ^ end;
+        }
+
+        public override object Clone()
+        {
+            return MemberwiseClone();
+        }
+    }
+}
+#endif

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Icu/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Icu/project.json b/src/Lucene.Net.Icu/project.json
new file mode 100644
index 0000000..2e8f212
--- /dev/null
+++ b/src/Lucene.Net.Icu/project.json
@@ -0,0 +1,63 @@
+{
+  "version": "4.8.0",
+  "dependencies": {
+    "icu.net": "54.1.1-alpha",
+    "Lucene.Net": "4.8.0",
+    "Lucene.Net.Analysis.Common": "4.8.0",
+    "Lucene.Net.Highlighter": "4.8.0"
+  },
+  "buildOptions": {
+    "debugType": "portable",
+    "compile": {
+      "includeFiles": [
+        "../CommonAssemblyInfo.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs",
+        "../Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs",
+        
"../Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs",
+        
"../Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/Passage.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs",
+        "../Lucene.Net.Highlighter/PostingsHighlight/WholeBreakIterator.cs",
+        
"../Lucene.Net.Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs"
+      ]
+    },
+    "embed": {
+      "includeFiles": [ "Analysis/Th/stopwords.txt" ]
+    }
+  },
+  "packOptions": {
+    "summary": "<Added from AssemblyDescriptionAttribute by the build script - 
do not remove this>",
+    "licenseUrl": 
"https://github.com/apache/lucenenet/blob/master/LICENSE.txt";,
+    "iconUrl": 
"https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true";,
+    "owners": [
+      "The Apache Software Foundation"
+    ],
+    "repository": {
+      "url": "https://github.com/apache/lucenenet";
+    },
+    "tags": [ "lucene.net", "core", "text", "search", "information", 
"retrieval", "lucene", "apache", "analysis", "index", "query" ]
+  },
+  "frameworks": {
+    "netstandard1.5": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "define": [ "NETSTANDARD", "FEATURE_BREAKITERATOR" ]
+      },
+      "dependencies": {
+        "NETStandard.Library": "1.6.0"
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "define": [ "FEATURE_BREAKITERATOR", "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b1fdcca3/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj 
b/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
index 3ed7239..d87e43d 100644
--- a/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
+++ b/src/Lucene.Net.Tests.Highlighter/Lucene.Net.Tests.Highlighter.csproj
@@ -56,7 +56,6 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Support\TestExceptionSerialization.cs" />
     <Compile Include="Support\TestApiConsistency.cs" />
-    <Compile Include="TestBreakIterator.cs" />
     <Compile Include="VectorHighlight\AbstractTestCase.cs" />
     <Compile Include="VectorHighlight\BreakIteratorBoundaryScannerTest.cs" />
     <Compile Include="VectorHighlight\FastVectorHighlighterTest.cs" />
@@ -70,7 +69,7 @@
     <Compile Include="VectorHighlight\SimpleFragmentsBuilderTest.cs" />
     <Compile Include="VectorHighlight\SingleFragListBuilderTest.cs" />
     <Compile Include="VectorHighlight\WeightedFragListBuilderTest.cs" />
-       <Compile Include="..\CommonAssemblyInfo.cs">
+    <Compile Include="..\CommonAssemblyInfo.cs">
       <Link>Properties\CommonAssemblyInfo.cs</Link>
     </Compile>
   </ItemGroup>

Reply via email to