[1/3] lucenenet git commit: Lucene.Net.Analysis.ICU: Renamed ICU directory Icu to match namespace conventions

nightowl888 Sun, 10 Sep 2017 15:02:56 -0700

Repository: lucenenet
Updated Branches:
  refs/heads/master 60e812525 -> 84fdac04c



http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilter.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilter.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilter.cs
new file mode 100644
index 0000000..4254298
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilter.cs
@@ -0,0 +1,235 @@
+ï»¿// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Lucene.Net.Analysis.CharFilters;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    /// <summary>
+//    /// Normalize token text with ICU's <see cref="Normalizer2"/>.
+//    /// </summary>
+//    public sealed class ICUNormalizer2CharFilter : BaseCharFilter
+//    {
+//        private static readonly int IO_BUFFER_SIZE = 128;
+
+//        private readonly Normalizer2 normalizer;
+//        private readonly StringBuilder inputBuffer = new StringBuilder();
+//        private readonly StringBuilder resultBuffer = new StringBuilder();
+
+//        private bool inputFinished;
+//        private bool afterQuickCheckYes;
+//        private int checkedInputBoundary;
+//        private int charCount;
+
+
+//        /**
+//         * Create a new Normalizer2CharFilter that combines NFKC 
normalization, Case
+//         * Folding, and removes Default Ignorables (NFKC_Casefold)
+//         */
+//        public ICUNormalizer2CharFilter(TextReader input)
+//            : this(input, new 
Normalizer2(Icu.Normalizer.UNormalizationMode.UNORM_NFKC) 
/*Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)*/)
+//        {
+//        }
+
+//        /**
+//         * Create a new Normalizer2CharFilter with the specified Normalizer2
+//         * @param in text
+//         * @param normalizer normalizer to use
+//         */
+//        public ICUNormalizer2CharFilter(TextReader input, Normalizer2 
normalizer)
+//            : base(input)
+//        {
+//            if (normalizer == null)
+//            {
+//                throw new ArgumentNullException("normalizer");
+//            }
+//            this.normalizer = normalizer;
+//        }
+
+//        public override int Read(char[] cbuf, int off, int len)
+//        {
+//            if (off < 0) throw new ArgumentException("off < 0");
+//            if (off >= cbuf.Length) throw new ArgumentException("off >= 
cbuf.length");
+//            if (len <= 0) throw new ArgumentException("len <= 0");
+
+//            while (!inputFinished || inputBuffer.Length > 0 || 
resultBuffer.Length > 0)
+//            {
+//                int retLen;
+
+//                if (resultBuffer.Length > 0)
+//                {
+//                    retLen = OutputFromResultBuffer(cbuf, off, len);
+//                    if (retLen > 0)
+//                    {
+//                        return retLen;
+//                    }
+//                }
+
+//                int resLen = ReadAndNormalizeFromInput();
+//                if (resLen > 0)
+//                {
+//                    retLen = OutputFromResultBuffer(cbuf, off, len);
+//                    if (retLen > 0)
+//                    {
+//                        return retLen;
+//                    }
+//                }
+
+//                ReadInputToBuffer();
+//            }
+
+//            return -1;
+//        }
+
+//        private readonly char[] tmpBuffer = new char[IO_BUFFER_SIZE];
+
+//        private int ReadInputToBuffer()
+//        {
+//            int len = m_input.Read(tmpBuffer, 0, tmpBuffer.Length);
+//            if (len == -1)
+//            {
+//                inputFinished = true;
+//                return 0;
+//            }
+//            inputBuffer.Append(tmpBuffer, 0, len);
+
+//            // if checkedInputBoundary was at the end of a buffer, we need 
to check that char again
+//            checkedInputBoundary = Math.Max(checkedInputBoundary - 1, 0);
+//            // this loop depends on 'isInert' (changes under normalization) 
but looks only at characters.
+//            // so we treat all surrogates as non-inert for simplicity
+//            if (normalizer.IsInert(tmpBuffer[len - 1]) && 
!char.IsSurrogate(tmpBuffer[len - 1]))
+//            {
+//                return len;
+//            }
+//            else return len + ReadInputToBuffer();
+//        }
+
+//        private int ReadAndNormalizeFromInput()
+//        {
+//            if (inputBuffer.Length <= 0)
+//            {
+//                afterQuickCheckYes = false;
+//                return 0;
+//            }
+//            if (!afterQuickCheckYes)
+//            {
+//                int resLen2 = ReadFromInputWhileSpanQuickCheckYes();
+//                afterQuickCheckYes = true;
+//                if (resLen2 > 0) return resLen2;
+//            }
+//            int resLen = ReadFromIoNormalizeUptoBoundary();
+//            if (resLen > 0)
+//            {
+//                afterQuickCheckYes = false;
+//            }
+//            return resLen;
+//        }
+
+//        private int ReadFromInputWhileSpanQuickCheckYes()
+//        {
+//            int end = normalizer.spanQuickCheckYes(inputBuffer);
+//            if (end > 0)
+//            {
+//                //resultBuffer.Append(inputBuffer.subSequence(0, end));
+//                resultBuffer.Append(inputBuffer.ToString(0, end));
+//                //inputBuffer.delete(0, end);
+//                inputBuffer.Remove(0, end);
+//                checkedInputBoundary = Math.Max(checkedInputBoundary - end, 
0);
+//                charCount += end;
+//            }
+//            return end;
+//        }
+
+//        private int ReadFromIoNormalizeUptoBoundary()
+//        {
+//            // if there's no buffer to normalize, return 0
+//            if (inputBuffer.Length <= 0)
+//            {
+//                return 0;
+//            }
+
+//            bool foundBoundary = false;
+//            int bufLen = inputBuffer.Length;
+
+//            while (checkedInputBoundary <= bufLen - 1)
+//            {
+//                int charLen = 
Character.CharCount(inputBuffer.CodePointAt(checkedInputBoundary));
+//                checkedInputBoundary += charLen;
+//                if (checkedInputBoundary < bufLen && 
normalizer.HasBoundaryBefore(inputBuffer
+//                  .CodePointAt(checkedInputBoundary)))
+//                {
+//                    foundBoundary = true;
+//                    break;
+//                }
+//            }
+//            if (!foundBoundary && checkedInputBoundary >= bufLen && 
inputFinished)
+//            {
+//                foundBoundary = true;
+//                checkedInputBoundary = bufLen;
+//            }
+
+//            if (!foundBoundary)
+//            {
+//                return 0;
+//            }
+
+//            return NormalizeInputUpto(checkedInputBoundary);
+//        }
+
+//        private int NormalizeInputUpto(int length)
+//        {
+//            int destOrigLen = resultBuffer.Length;
+//            normalizer.NormalizeSecondAndAppend(resultBuffer, 
inputBuffer.ToString(0, length));
+//              //inputBuffer.SubSequence(0, length));
+
+//            //inputBuffer.Delete(0, length);
+//            inputBuffer.Remove(0, length);
+//            checkedInputBoundary = Math.Max(checkedInputBoundary - length, 
0);
+//            int resultLength = resultBuffer.Length - destOrigLen;
+//            RecordOffsetDiff(length, resultLength);
+//            return resultLength;
+//        }
+
+//        private void RecordOffsetDiff(int inputLength, int outputLength)
+//        {
+//            if (inputLength == outputLength)
+//            {
+//                charCount += outputLength;
+//                return;
+//            }
+//            int diff = inputLength - outputLength;
+//            int cumuDiff = LastCumulativeDiff;
+//            if (diff < 0)
+//            {
+//                for (int i = 1; i <= -diff; ++i)
+//                {
+//                    AddOffCorrectMap(charCount + i, cumuDiff - i);
+//                }
+//            }
+//            else
+//            {
+//                AddOffCorrectMap(charCount + outputLength, cumuDiff + diff);
+//            }
+//            charCount += outputLength;
+//        }
+
+//        private int OutputFromResultBuffer(char[] cbuf, int begin, int len)
+//        {
+//            len = Math.Min(resultBuffer.Length, len);
+//            //resultBuffer.GetChars(0, len, cbuf, begin);
+//            resultBuffer.CopyTo(0, cbuf, begin, len);
+//            if (len > 0)
+//            {
+//                //resultBuffer.delete(0, len);
+//                resultBuffer.Remove(0, len);
+//            }
+//            return len;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilterFactory.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilterFactory.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilterFactory.cs
new file mode 100644
index 0000000..bd4cbe5
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2CharFilterFactory.cs
@@ -0,0 +1,60 @@
+ï»¿// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUNormalizer2CharFilterFactory : CharFilterFactory, 
IMultiTermAwareComponent
+//    {
+//        private readonly Normalizer2 normalizer;
+
+//        /// <summary>Creates a new ICUNormalizer2CharFilterFactory</summary>
+//        public ICUNormalizer2CharFilterFactory(IDictionary<string, string> 
args)
+//            : base(args)
+//        {
+//            string name = Get(args, "name", "NFKC");
+//            //string name = Get(args, "name", "nfkc_cf");
+//            //string mode = Get(args, "mode", new string[] { "compose", 
"decompose" }, "compose");
+//            //Normalizer2 normalizer = Normalizer2.getInstance
+//            //    (null, name, "compose".Equals(mode) ? 
Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
+
+//            var mode = 
(Icu.Normalizer.UNormalizationMode)Enum.Parse(typeof(Icu.Normalizer.UNormalizationMode),
 "UNORM_" + name);
+//            Normalizer2 normalizer = new Normalizer2(mode);
+
+//            string filter = Get(args, "filter");
+//            if (filter != null)
+//            {
+//                //UnicodeSet set = new UnicodeSet(filter);
+//                var set = UnicodeSet.ToCharacters(filter);
+//                if (set.Any())
+//                {
+//                    //set.freeze();
+//                    normalizer = new FilteredNormalizer2(normalizer, set);
+//                }
+//            }
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//            this.normalizer = normalizer;
+//        }
+
+//        public override TextReader Create(TextReader input)
+//        {
+//            return new ICUNormalizer2CharFilter(input, normalizer);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs
new file mode 100644
index 0000000..bca3d24
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs
@@ -0,0 +1,60 @@
+ï»¿// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Analysis.TokenAttributes;
+//using Lucene.Net.Support;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUNormalizer2Filter : TokenFilter
+//    {
+//        private readonly ICharTermAttribute termAtt;
+//        private readonly Normalizer2 normalizer;
+
+//        /// <summary>
+//        /// Create a new <see cref="Normalizer2Filter"/> that combines NFKC 
normalization, Case
+//        /// Folding, and removes Default Ignorables (NFKC_Casefold)
+//        /// </summary>
+//        /// <param name="input"></param>
+//        public ICUNormalizer2Filter(TokenStream input)
+//            : this(input, new 
Normalizer2(Normalizer.UNormalizationMode.UNORM_NFKC) 
/*Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)*/)
+//        {
+//        }
+
+//        /// <summary>
+//        /// Create a new <see cref="Normalizer2Filter"/> with the specified 
<see cref="Normalizer2"/>
+//        /// </summary>
+//        /// <param name="input">stream</param>
+//        /// <param name="normalizer">normalizer to use</param>
+//        public ICUNormalizer2Filter(TokenStream input, Normalizer2 
normalizer)
+//            : base(input)
+//        {
+//            this.normalizer = normalizer;
+//            this.termAtt = AddAttribute<ICharTermAttribute>();
+//        }
+
+//        public override sealed bool IncrementToken()
+//        {
+//            if (m_input.IncrementToken())
+//            {
+//                var term = termAtt.ToString();
+//                try
+//                {
+//                    if (!normalizer.IsNormalized(term))
+//                    {
+//                        
termAtt.SetEmpty().Append(normalizer.Normalize(term));
+//                    }
+//                }
+//                catch (System.Exception ex)
+//                {
+
+//                }
+//                return true;
+//            }
+//            else
+//            {
+//                return false;
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2FilterFactory.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2FilterFactory.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2FilterFactory.cs
new file mode 100644
index 0000000..c17fb7f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2FilterFactory.cs
@@ -0,0 +1,59 @@
+ï»¿// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUNormalizer2FilterFactory : TokenFilterFactory, 
IMultiTermAwareComponent
+//    {
+//        private readonly Normalizer2 normalizer;
+
+//        /// <summary>Creates a new ICUNormalizer2FilterFactory</summary>
+//        public ICUNormalizer2FilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            string name = Get(args, "name", "NFKC");
+//            //string name = Get(args, "name", "nfkc_cf");
+//            //string mode = Get(args, "mode", new string[] { "compose", 
"decompose" }, "compose");
+
+//            var mode = 
(Normalizer.UNormalizationMode)Enum.Parse(typeof(Normalizer.UNormalizationMode),
 "UNORM_" + name);
+//            Normalizer2 normalizer = new Normalizer2(mode);
+
+//            //Normalizer2 normalizer = Normalizer2.getInstance
+//            //    (null, name, "compose".Equals(mode) ? 
Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
+
+//            string filter = Get(args, "filter");
+//            if (filter != null)
+//            {
+//                //UnicodeSet set = new UnicodeSet(filter);
+//                var set = UnicodeSet.ToCharacters(filter);
+//                if (set.Any())
+//                {
+//                    //set.freeze();
+//                    normalizer = new FilteredNormalizer2(normalizer, set);
+//                }
+//            }
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//            this.normalizer = normalizer;
+//        }
+
+//        // TODO: support custom normalization
+//        public override TokenStream Create(TokenStream input)
+//        {
+//            return new ICUNormalizer2Filter(input, normalizer);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs
new file mode 100644
index 0000000..7f22c3d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilter.cs
@@ -0,0 +1,152 @@
+ï»¿// LUCENENET TODO: Port issues - missing Transliterator dependency from 
icu.net
+
+//using Lucene.Net.Analysis.TokenAttributes;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public sealed class ICUTransformFilter : TokenFilter
+//    {
+//        // Transliterator to transform the text
+//        private readonly Transliterator transform;
+
+//        // Reusable position object
+//        private readonly Transliterator.Position position = new 
Transliterator.Position();
+
+//        // term attribute, will be updated with transformed text.
+//        private readonly ICharTermAttribute termAtt;
+
+//        // Wraps a termAttribute around the replaceable interface.
+//        private readonly ReplaceableTermAttribute replaceableAttribute = new 
ReplaceableTermAttribute();
+
+//        /// <summary>
+//        /// Create a new ICUTransformFilter that transforms text on the 
given stream.
+//        /// </summary>
+//        /// <param name="input"><see cref="TokenStream"/> to filter.</param>
+//        /// <param name="transform">Transliterator to transform the 
text.</param>
+//        public ICUTransformFilter(TokenStream input, Transliterator 
transform)
+//            : base(input)
+//        {
+//            this.transform = transform;
+//            this.termAtt = AddAttribute<ICharTermAttribute>();
+
+//            /* 
+//             * This is cheating, but speeds things up a lot.
+//             * If we wanted to use pkg-private APIs we could probably do 
better.
+//             */
+//            if (transform.getFilter() == null && transform is 
com.ibm.icu.text.RuleBasedTransliterator)
+//            {
+//                UnicodeSet sourceSet = transform.getSourceSet();
+//                if (sourceSet != null && !sourceSet.isEmpty())
+//                    transform.setFilter(sourceSet);
+//            }
+//        }
+
+//        public override bool IncrementToken()
+//        {
+//            /*
+//             * Wrap around replaceable. clear the positions, and 
transliterate.
+//             */
+//            if (m_input.IncrementToken())
+//            {
+//                replaceableAttribute.SetText(termAtt);
+
+//                int length = termAtt.Length;
+//                position.start = 0;
+//                position.limit = length;
+//                position.contextStart = 0;
+//                position.contextLimit = length;
+
+//                transform.FilteredTransliterate(replaceableAttribute, 
position, false);
+//                return true;
+//            }
+//            else
+//            {
+//                return false;
+//            }
+//        }
+
+//        /// <summary>
+//        /// Wrap a <see cref="ICharTermAttribute"/> with the Replaceable API.
+//        /// </summary>
+//        private sealed class ReplaceableTermAttribute //: IReplaceable
+//        {
+//            private char[] buffer;
+//            private int length;
+//            private ICharTermAttribute token;
+
+//            public void SetText(ICharTermAttribute token)
+//            {
+//                this.token = token;
+//                this.buffer = token.Buffer;
+//                this.length = token.Length;
+//            }
+
+//            public int Char32At(int pos)
+//            {
+//                return UTF16.charAt(buffer, 0, length, pos);
+//            }
+
+//            public char CharAt(int pos)
+//            {
+//                return buffer[pos];
+//            }
+
+//            public void Copy(int start, int limit, int dest)
+//            {
+//                char[] text = new char[limit - start];
+//                GetChars(start, limit, text, 0);
+//                Replace(dest, dest, text, 0, limit - start);
+//            }
+
+//            public void GetChars(int srcStart, int srcLimit, char[] dst, int 
dstStart)
+//            {
+//                System.Array.Copy(buffer, srcStart, dst, dstStart, srcLimit 
- srcStart);
+//            }
+
+//            public bool HasMetaData
+//            {
+//                get { return false; }
+//            }
+
+//            public int Length
+//            {
+//                get { return length; }
+//            }
+
+//            public void Replace(int start, int limit, string text)
+//            {
+//                int charsLen = text.Length;
+//                int newLength = ShiftForReplace(start, limit, charsLen);
+//                // insert the replacement text
+//                //text.getChars(0, charsLen, buffer, start);
+//                text.CopyTo(0, buffer, start, charsLen);
+//                token.Length = (length = newLength);
+//            }
+
+//            public void Replace(int start, int limit, char[] text, int 
charsStart,
+//                int charsLen)
+//            {
+//                // shift text if necessary for the replacement
+//                int newLength = ShiftForReplace(start, limit, charsLen);
+//                // insert the replacement text
+//                System.Array.Copy(text, charsStart, buffer, start, charsLen);
+//                token.Length = (length = newLength);
+//            }
+
+//            /// <summary>shift text (if necessary) for a replacement 
operation</summary>
+//            private int ShiftForReplace(int start, int limit, int charsLen)
+//            {
+//                int replacementLength = limit - start;
+//                int newLength = length - replacementLength + charsLen;
+//                // resize if necessary
+//                if (newLength > length)
+//                    buffer = token.ResizeBuffer(newLength);
+//                // if the substring being replaced is longer or shorter than 
the
+//                // replacement, need to shift things around
+//                if (replacementLength != charsLen && limit < length)
+//                    System.Array.Copy(buffer, limit, buffer, start + 
charsLen, length - limit);
+//                return newLength;
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilterFactory.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilterFactory.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilterFactory.cs
new file mode 100644
index 0000000..081ebf5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUTransformFilterFactory.cs
@@ -0,0 +1,38 @@
+ï»¿// LUCENENET TODO: Port issues - missing Transliterator dependency from 
icu.net
+
+//using Lucene.Net.Analysis.Util;
+//using System;
+//using System.Collections.Generic;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUTransformFilterFactory : TokenFilterFactory, 
IMultiTermAwareComponent
+//    {
+//        private readonly Transliterator transliterator;
+
+//        // TODO: add support for custom rules
+//        /// <summary>Creates a new ICUTransformFilterFactory</summary>
+//        public ICUTransformFilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            string id = Require(args, "id");
+//            string direction = Get(args, "direction", new string[] { 
"forward", "reverse" }, "forward", false);
+//            int dir = "forward".Equals(direction) ? Transliterator.FORWARD : 
Transliterator.REVERSE;
+//            transliterator = Transliterator.getInstance(id, dir);
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//        }
+
+//        public override TokenStream Create(TokenStream input)
+//        {
+//            return new ICUTransformFilter(input, transliterator);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs
new file mode 100644
index 0000000..c124a88
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/BreakIteratorWrapper.cs
@@ -0,0 +1,166 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Contain all the issues surrounding BreakIterators in ICU in one 
place.
+//    /// Basically this boils down to the fact that they aren't very friendly 
to any
+//    /// sort of OO design.
+//    /// <para/>
+//    /// http://bugs.icu-project.org/trac/ticket/5901: RBBI.getRuleStatus(), 
hoist to
+//    /// BreakIterator from RuleBasedBreakIterator
+//    /// <para/>
+//    /// DictionaryBasedBreakIterator is a subclass of 
RuleBasedBreakIterator, but
+//    /// doesn't actually behave as a subclass: it always returns 0 for
+//    /// getRuleStatus(): 
+//    /// http://bugs.icu-project.org/trac/ticket/4730: Thai RBBI, no boundary 
type
+//    /// tags
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    internal abstract class BreakIteratorWrapper
+//    {
+//        protected readonly CharArrayIterator textIterator = new 
CharArrayIterator();
+//        protected char[] text;
+//        protected int start;
+//        protected int length;
+
+//        public abstract int Next();
+//        public abstract int Current { get; }
+//        public abstract int GetRuleStatus();
+//        public abstract void SetText(CharacterIterator text);
+
+//        public void SetText(char[] text, int start, int length)
+//        {
+//            this.text = text;
+//            this.start = start;
+//            this.length = length;
+//            textIterator.SetText(text, start, length);
+//            SetText(textIterator);
+//        }
+
+//        /**
+//         * If its a RuleBasedBreakIterator, the rule status can be used for 
token type. If its
+//         * any other BreakIterator, the rulestatus method is not available, 
so treat
+//         * it like a generic BreakIterator.
+//         */
+//        public static BreakIteratorWrapper Wrap(Icu.BreakIterator 
breakIterator)
+//        {
+//            if (breakIterator is Icu.RuleBasedBreakIterator)
+//                return new 
RBBIWrapper((Icu.RuleBasedBreakIterator)breakIterator);
+//            else
+//                return new BIWrapper(breakIterator);
+//        }
+
+//        /**
+//         * RuleBasedBreakIterator wrapper: RuleBasedBreakIterator (as long 
as its not
+//         * a DictionaryBasedBreakIterator) behaves correctly.
+//         */
+//        private sealed class RBBIWrapper : BreakIteratorWrapper
+//        {
+//            private readonly Icu.RuleBasedBreakIterator rbbi;
+
+//            internal RBBIWrapper(Icu.RuleBasedBreakIterator rbbi)
+//            {
+//                this.rbbi = rbbi;
+//            }
+
+//            public override int Current
+//            {
+//                get { return rbbi.Current; }
+//            }
+
+//            public override int GetRuleStatus()
+//            {
+//                return rbbi.GetRuleStatus();
+//            }
+
+//            public override int Next()
+//            {
+//                return rbbi.Next();
+//            }
+
+//            public override void SetText(CharacterIterator text)
+//            {
+//                rbbi.SetText(text);
+//            }
+//        }
+
+//        /**
+//         * Generic BreakIterator wrapper: Either the rulestatus method is not
+//         * available or always returns 0. Calculate a rulestatus here so it 
behaves
+//         * like RuleBasedBreakIterator.
+//         * 
+//         * Note: This is slower than RuleBasedBreakIterator.
+//         */
+//        private sealed class BIWrapper : BreakIteratorWrapper
+//        {
+//            private readonly Support.BreakIterator bi;
+//            private int status;
+
+//            internal BIWrapper(Support.BreakIterator bi)
+//            {
+//                this.bi = bi;
+//            }
+
+//            public override int Current
+//            {
+//                get { return bi.Current; }
+//            }
+
+//            public override int GetRuleStatus()
+//            {
+//                return status;
+//            }
+
+//            public override int Next()
+//            {
+//                int current = bi.Current;
+//                int next = bi.Next();
+//                status = CalcStatus(current, next);
+//                return next;
+//            }
+
+//            private int CalcStatus(int current, int next)
+//            {
+//                if (current == Support.BreakIterator.DONE || next == 
Support.BreakIterator.DONE)
+//                    return RuleBasedBreakIterator.WORD_NONE;
+
+//                int begin = start + current;
+//                int end = start + next;
+
+//                int codepoint;
+//                for (int i = begin; i < end; i += 
UTF16.getCharCount(codepoint))
+//                {
+//                    codepoint = UTF16.charAt(text, 0, end, begin);
+
+//                    if (UCharacter.isDigit(codepoint))
+//                        return RuleBasedBreakIterator.WORD_NUMBER;
+//                    else if (UCharacter.isLetter(codepoint))
+//                    {
+//                        // TODO: try to separately specify ideographic, 
kana? 
+//                        // [currently all bundled as letter for this case]
+//                        return RuleBasedBreakIterator.WORD_LETTER;
+//                    }
+//                }
+
+//                return RuleBasedBreakIterator.WORD_NONE;
+//            }
+
+//            public override void SetText(CharacterIterator text)
+//            {
+//                bi.SetText(text);
+//                status = RuleBasedBreakIterator.WORD_NONE;
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CharArrayIterator.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CharArrayIterator.cs
new file mode 100644
index 0000000..209d583
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CharArrayIterator.cs
@@ -0,0 +1,134 @@
+ï»¿using Lucene.Net.Support;
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace Lucene.Net.Analysis.Icu.Segmentation
+{
+    /// <summary>
+    /// Wraps a char[] as CharacterIterator for processing with a BreakIterator
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    internal sealed class CharArrayIterator : CharacterIterator
+    {
+        private char[] array;
+        private int start;
+        private int index;
+        private int length;
+        private int limit;
+
+        [WritableArray]
+        [SuppressMessage("Microsoft.Performance", "CA1819", Justification = 
"Lucene's design requires some writable array properties")]
+        public char[] Text
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        public int Start
+        {
+            get { return start; }
+        }
+
+        public int Length
+        {
+            get { return length; }
+        }
+
+        /// <summary>
+        /// Set a new region of text to be examined by this iterator
+        /// </summary>
+        /// <param name="array">text buffer to examine</param>
+        /// <param name="start">offset into buffer</param>
+        /// <param name="length"> maximum length to examine</param>
+        public void SetText(char[] array, int start, int length)
+        {
+            this.array = array;
+            this.start = start;
+            this.index = start;
+            this.length = length;
+            this.limit = start + length;
+        }
+
+        public override char Current
+        {
+            get { return (index == limit) ? DONE : array[index]; }
+        }
+
+        public override char First()
+        {
+            index = start;
+            return Current;
+        }
+
+        public override int BeginIndex
+        {
+            get { return 0; }
+        }
+
+        public override int EndIndex
+        {
+            get { return length; }
+        }
+
+        public override int Index
+        {
+            get { return index - start; }
+        }
+
+        public override char Last()
+        {
+            index = (limit == start) ? limit : limit - 1;
+            return Current;
+        }
+
+        public override char Next()
+        {
+            if (++index >= limit)
+            {
+                index = limit;
+                return DONE;
+            }
+            else
+            {
+                return Current;
+            }
+        }
+
+        public override char Previous()
+        {
+            if (--index < start)
+            {
+                index = start;
+                return DONE;
+            }
+            else
+            {
+                return Current;
+            }
+        }
+
+        public override char SetIndex(int position)
+        {
+            if (position < BeginIndex || position > EndIndex)
+                throw new ArgumentException("Illegal Position: " + position);
+            index = start + position;
+            return Current;
+        }
+
+        public override string GetTextAsString()
+        {
+            return new string(array);
+        }
+
+        public override object Clone()
+        {
+            CharArrayIterator clone = new CharArrayIterator();
+            clone.SetText(array, start, length);
+            clone.index = index;
+            return clone;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs
 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs
new file mode 100644
index 0000000..a004193
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/CompositeBreakIterator.cs
@@ -0,0 +1,132 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// An internal BreakIterator for multilingual text, following 
recommendations
+//    /// from: UAX #29: Unicode Text Segmentation. 
(http://unicode.org/reports/tr29/)
+//    /// <para/>
+//    /// See http://unicode.org/reports/tr29/#Tailoring for the motivation of 
this
+//    /// design.
+//    /// <para/>
+//    /// Text is first divided into script boundaries. The processing is then
+//    /// delegated to the appropriate break iterator for that specific script.
+//    /// <para/>
+//    /// This break iterator also allows you to retrieve the ISO 15924 script 
code
+//    /// associated with a piece of text.
+//    /// <para/>
+//    /// See also UAX #29, UTR #24
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    internal sealed class CompositeBreakIterator
+//    {
+//        private readonly ICUTokenizerConfig config;
+//        private readonly BreakIteratorWrapper[] wordBreakers = new 
BreakIteratorWrapper[UScript.CODE_LIMIT];
+
+//        private BreakIteratorWrapper rbbi;
+//        private readonly ScriptIterator scriptIterator;
+
+//        private char[] text;
+
+//        public CompositeBreakIterator(ICUTokenizerConfig config)
+//        {
+//            this.config = config;
+//            this.scriptIterator = new ScriptIterator(config.CombineCJ);
+//        }
+
+//        /**
+//         * Retrieve the next break position. If the RBBI range is exhausted 
within the
+//         * script boundary, examine the next script boundary.
+//         * 
+//         * @return the next break position or BreakIterator.DONE
+//         */
+//        public int Next()
+//        {
+//            int next = rbbi.Next();
+//            while (next == Support.BreakIterator.DONE && 
scriptIterator.Next())
+//            {
+//                rbbi = GetBreakIterator(scriptIterator.GetScriptCode());
+//                rbbi.SetText(text, scriptIterator.GetScriptStart(),
+//                    scriptIterator.GetScriptLimit() - 
scriptIterator.GetScriptStart());
+//                next = rbbi.Next();
+//            }
+//            return (next == Support.BreakIterator.DONE) ? 
Support.BreakIterator.DONE : next
+//                + scriptIterator.GetScriptStart();
+//        }
+
+//        /**
+//         * Retrieve the current break position.
+//         * 
+//         * @return the current break position or BreakIterator.DONE
+//         */
+//        public int Current
+//        {
+//            get
+//            {
+//                int current = rbbi.Current;
+//                return (current == Support.BreakIterator.DONE) ? 
Support.BreakIterator.DONE : current
+//                    + scriptIterator.GetScriptStart();
+//            }
+//        }
+
+//        /**
+//         * Retrieve the rule status code (token type) from the underlying 
break
+//         * iterator
+//         * 
+//         * @return rule status code (see RuleBasedBreakIterator constants)
+//         */
+//        public int GetRuleStatus()
+//        {
+//            return rbbi.GetRuleStatus();
+//        }
+
+//        /**
+//         * Retrieve the UScript script code for the current token. This code 
can be
+//         * decoded with UScript into a name or ISO 15924 code.
+//         * 
+//         * @return UScript script code for the current token.
+//         */
+//        public int GetScriptCode()
+//        {
+//            return scriptIterator.GetScriptCode();
+//        }
+
+//        /**
+//         * Set a new region of text to be examined by this iterator
+//         * 
+//         * @param text buffer of text
+//         * @param start offset into buffer
+//         * @param length maximum length to examine
+//         */
+//        public void SetText(char[] text, int start, int length)
+//        {
+//            this.text = text;
+//            scriptIterator.SetText(text, start, length);
+//            if (scriptIterator.Next())
+//            {
+//                rbbi = GetBreakIterator(scriptIterator.GetScriptCode());
+//                rbbi.SetText(text, scriptIterator.GetScriptStart(),
+//                    scriptIterator.GetScriptLimit() - 
scriptIterator.GetScriptStart());
+//            }
+//            else
+//            {
+//                rbbi = GetBreakIterator(UScript.COMMON);
+//                rbbi.SetText(text, 0, 0);
+//            }
+//        }
+
+//        private BreakIteratorWrapper GetBreakIterator(int scriptCode)
+//        {
+//            if (wordBreakers[scriptCode] == null)
+//                wordBreakers[scriptCode] = 
BreakIteratorWrapper.Wrap(config.GetBreakIterator(scriptCode));
+//            return wordBreakers[scriptCode];
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs
 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs
new file mode 100644
index 0000000..fc2a989
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/DefaultICUTokenizerConfig.cs
@@ -0,0 +1,127 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Icu;
+//using Lucene.Net.Analysis.Standard;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Globalization;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Default <see cref="ICUTokenizerConfig"/> that is generally applicable
+//    /// to many languages.
+//    /// </summary>
+//    /// <remarks>
+//    /// Generally tokenizes Unicode text according to UAX#29 
+//    /// ({@link BreakIterator#getWordInstance(ULocale) 
BreakIterator.getWordInstance(ULocale.ROOT)}), 
+//    /// but with the following tailorings:
+//    /// <list type="bullet">
+//    ///     <item><description>Thai, Lao, and CJK text is broken into words 
with a dictionary.</description></item>
+//    ///     <item><description>Myanmar, and Khmer text is broken into 
syllables based on custom BreakIterator rules.</description></item>
+//    /// </list>
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </remarks>
+//    public class DefaultICUTokenizerConfig : ICUTokenizerConfig
+//    {
+//        /** Token type for words containing ideographic characters */
+//        public static readonly string WORD_IDEO = 
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
+//        /** Token type for words containing Japanese hiragana */
+//        public static readonly string WORD_HIRAGANA = 
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
+//        /** Token type for words containing Japanese katakana */
+//        public static readonly string WORD_KATAKANA = 
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
+//        /** Token type for words containing Korean hangul  */
+//        public static readonly string WORD_HANGUL = 
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
+//        /** Token type for words that contain letters */
+//        public static readonly string WORD_LETTER = 
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
+//        /** Token type for words that appear to be numbers */
+//        public static readonly string WORD_NUMBER = 
StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
+
+//        /*
+//         * the default breakiterators in use. these can be expensive to
+//         * instantiate, cheap to clone.
+//         */
+//        // we keep the cjk breaking separate, thats because it cannot be 
customized (because dictionary
+//        // is only triggered when kind = WORD, but kind = LINE by default 
and we have no non-evil way to change it)
+//        private static readonly Icu.BreakIterator cjkBreakIterator = new 
Icu.RuleBasedBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new 
Locale()); //BreakIterator.getWordInstance(ULocale.ROOT);
+//                                                                             
                                                                                
                   // the same as ROOT, except no dictionary segmentation for 
cjk
+//        private static readonly Icu.BreakIterator defaultBreakIterator =
+//            ReadBreakIterator("Default.brk");
+//        private static readonly Icu.BreakIterator khmerBreakIterator =
+//            ReadBreakIterator("Khmer.brk");
+//        private static readonly Icu.BreakIterator myanmarBreakIterator =
+//            ReadBreakIterator("Myanmar.brk");
+
+//        // TODO: deprecate this boolean? you only care if you are doing 
super-expert stuff...
+//        private readonly bool cjkAsWords;
+
+//        /** 
+//         * Creates a new config. This object is lightweight, but the first
+//         * time the class is referenced, breakiterators will be initialized.
+//         * @param cjkAsWords true if cjk text should undergo 
dictionary-based segmentation, 
+//         *                   otherwise text will be segmented according to 
UAX#29 defaults.
+//         *                   If this is true, all Han+Hiragana+Katakana 
words will be tagged as
+//         *                   IDEOGRAPHIC.
+//         */
+//        public DefaultICUTokenizerConfig(bool cjkAsWords)
+//        {
+//            this.cjkAsWords = cjkAsWords;
+//        }
+
+//        public override bool CombineCJ
+//        {
+//            get { return cjkAsWords; }
+//        }
+
+//        public override Icu.BreakIterator GetBreakIterator(int script)
+//        {
+//            switch (script)
+//            {
+//                case UScript.KHMER: return 
(Icu.BreakIterator)khmerBreakIterator.Clone();
+//                case UScript.MYANMAR: return 
(Icu.BreakIterator)myanmarBreakIterator.Clone();
+//                case UScript.JAPANESE: return 
(Icu.BreakIterator)cjkBreakIterator.Clone();
+//                default: return 
(Icu.BreakIterator)defaultBreakIterator.Clone();
+//            }
+//        }
+
+//        public override string GetType(int script, int ruleStatus)
+//        {
+//            switch (ruleStatus)
+//            {
+//                case RuleBasedBreakIterator.WORD_IDEO:
+//                    return WORD_IDEO;
+//                case RuleBasedBreakIterator.WORD_KANA:
+//                    return script == UScript.HIRAGANA ? WORD_HIRAGANA : 
WORD_KATAKANA;
+//                case RuleBasedBreakIterator.WORD_LETTER:
+//                    return script == UScript.HANGUL ? WORD_HANGUL : 
WORD_LETTER;
+//                case RuleBasedBreakIterator.WORD_NUMBER:
+//                    return WORD_NUMBER;
+//                default: /* some other custom code */
+//                    return "<OTHER>";
+//            }
+//        }
+
+//        private static RuleBasedBreakIterator ReadBreakIterator(string 
filename)
+//        {
+//            Stream @is =
+//              
typeof(DefaultICUTokenizerConfig).Assembly.FindAndGetManifestResourceStream(typeof(DefaultICUTokenizerConfig),
 filename);
+//            try
+//            {
+//                RuleBasedBreakIterator bi =
+//                    RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
+//                @is.Dispose();
+//                return bi;
+//            }
+//            catch (IOException e)
+//            {
+//                throw new Exception(e.ToString(), e);
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizer.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizer.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizer.cs
new file mode 100644
index 0000000..7677c0c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizer.cs
@@ -0,0 +1,229 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Analysis.ICU.TokenAttributes;
+//using Lucene.Net.Analysis.TokenAttributes;
+//using System;
+//using System.Collections.Generic;
+//using System.Diagnostics;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Breaks text into words according to UAX #29: Unicode Text 
Segmentation
+//    /// (http://www.unicode.org/reports/tr29/)
+//    /// <para/>
+//    /// Words are broken across script boundaries, then segmented according 
to
+//    /// the BreakIterator and typing provided by the <see 
cref="ICUTokenizerConfig"/>
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    /// <seealso cref="ICUTokenizerConfig"/>
+//    public sealed class ICUTokenizer : Tokenizer
+//    {
+//        private static readonly int IOBUFFER = 4096;
+//        private readonly char[] buffer = new char[IOBUFFER];
+//        /** true length of text in the buffer */
+//        private int length = 0;
+//        /** length in buffer that can be evaluated safely, up to a safe end 
point */
+//        private int usableLength = 0;
+//        /** accumulated offset of previous buffers for this reader, for 
offsetAtt */
+//        private int offset = 0;
+
+//        private readonly CompositeBreakIterator breaker; /* tokenizes a 
char[] of text */
+//        private readonly ICUTokenizerConfig config;
+//        private readonly IOffsetAttribute offsetAtt;
+//        private readonly ICharTermAttribute termAtt;
+//        private readonly ITypeAttribute typeAtt;
+//        private readonly IScriptAttribute scriptAtt;
+
+//        /**
+//        * Construct a new ICUTokenizer that breaks text into words from the 
given
+//        * Reader.
+//        * <p>
+//        * The default script-specific handling is used.
+//        * <p>
+//        * The default attribute factory is used.
+//        * 
+//        * @param input Reader containing text to tokenize.
+//        * @see DefaultICUTokenizerConfig
+//        */
+//        public ICUTokenizer(TextReader input)
+//            : this(input, new DefaultICUTokenizerConfig(true))
+//        {
+//        }
+
+//        /**
+//         * Construct a new ICUTokenizer that breaks text into words from the 
given
+//         * Reader, using a tailored BreakIterator configuration.
+//         * <p>
+//         * The default attribute factory is used.
+//         *
+//         * @param input Reader containing text to tokenize.
+//         * @param config Tailored BreakIterator configuration 
+//         */
+//        public ICUTokenizer(TextReader input, ICUTokenizerConfig config)
+//            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, config)
+//        {
+//        }
+
+//        /**
+//         * Construct a new ICUTokenizer that breaks text into words from the 
given
+//         * Reader, using a tailored BreakIterator configuration.
+//         *
+//         * @param factory AttributeFactory to use
+//         * @param input Reader containing text to tokenize.
+//         * @param config Tailored BreakIterator configuration 
+//         */
+//        public ICUTokenizer(AttributeFactory factory, TextReader input, 
ICUTokenizerConfig config)
+//            : base(factory, input)
+//        {
+//            this.config = config;
+//            breaker = new CompositeBreakIterator(config);
+
+//            this.offsetAtt = AddAttribute<IOffsetAttribute>();
+//            this.termAtt = AddAttribute<ICharTermAttribute>();
+//            this.typeAtt = AddAttribute<ITypeAttribute>();
+//            this.scriptAtt = AddAttribute<IScriptAttribute>();
+//        }
+
+
+//        public override bool IncrementToken()
+//        {
+//            ClearAttributes();
+//            if (length == 0)
+//                Refill();
+//            while (!IncrementTokenBuffer())
+//            {
+//                Refill();
+//                if (length <= 0) // no more bytes to read;
+//                    return false;
+//            }
+//            return true;
+//        }
+
+
+//        public override void Reset()
+//        {
+//            base.Reset();
+//            breaker.SetText(buffer, 0, 0);
+//            length = usableLength = offset = 0;
+//        }
+
+//        public override void End()
+//        {
+//            base.End();
+//            int finalOffset = (length < 0) ? offset : offset + length;
+//            offsetAtt.SetOffset(CorrectOffset(finalOffset), 
CorrectOffset(finalOffset));
+//        }
+
+//        /*
+//         * This tokenizes text based upon the longest matching rule, and 
because of 
+//         * this, isn't friendly to a Reader.
+//         * 
+//         * Text is read from the input stream in 4kB chunks. Within a 4kB 
chunk of
+//         * text, the last unambiguous break point is found (in this 
implementation:
+//         * white space character) Any remaining characters represent 
possible partial
+//         * words, so are appended to the front of the next chunk.
+//         * 
+//         * There is the possibility that there are no unambiguous break 
points within
+//         * an entire 4kB chunk of text (binary data). So there is a maximum 
word limit
+//         * of 4kB since it will not try to grow the buffer in this case.
+//         */
+
+//        /**
+//         * Returns the last unambiguous break position in the text.
+//         * 
+//         * @return position of character, or -1 if one does not exist
+//         */
+//        private int FindSafeEnd()
+//        {
+//            for (int i = length - 1; i >= 0; i--)
+//                if (char.IsWhiteSpace(buffer[i]))
+//                    return i + 1;
+//            return -1;
+//        }
+
+//        /**
+//         * Refill the buffer, accumulating the offset and setting 
usableLength to the
+//         * last unambiguous break position
+//         * 
+//         * @throws IOException If there is a low-level I/O error.
+//         */
+//        private void Refill()
+//        {
+//            offset += usableLength;
+//            int leftover = length - usableLength;
+//            System.Array.Copy(buffer, usableLength, buffer, 0, leftover);
+//            int requested = buffer.Length - leftover;
+//            int returned = Read(m_input, buffer, leftover, requested);
+//            length = returned + leftover;
+//            if (returned < requested) /* reader has been emptied, process 
the rest */
+//                usableLength = length;
+//            else
+//            { /* still more data to be read, find a safe-stopping place */
+//                usableLength = FindSafeEnd();
+//                if (usableLength < 0)
+//                    usableLength = length; /*
+//                                * more than IOBUFFER of text without space,
+//                                * gonna possibly truncate tokens
+//                                */
+//            }
+
+//            breaker.SetText(buffer, 0, Math.Max(0, usableLength));
+//        }
+
+//        // TODO: refactor to a shared readFully somewhere
+//        // (NGramTokenizer does this too):
+//        /** commons-io's readFully, but without bugs if offset != 0 */
+//        private static int Read(TextReader input, char[] buffer, int offset, 
int length)
+//        {
+//            Debug.Assert(length >= 0, "length must not be negative: " + 
length);
+
+//            int remaining = length;
+//            while (remaining > 0)
+//            {
+//                int location = length - remaining;
+//                int count = input.Read(buffer, offset + location, remaining);
+//                if (-1 == count)
+//                { // EOF
+//                    break;
+//                }
+//                remaining -= count;
+//            }
+//            return length - remaining;
+//        }
+
+//        /*
+//         * return true if there is a token from the buffer, or null if it is
+//         * exhausted.
+//         */
+//        private bool IncrementTokenBuffer()
+//        {
+//            int start = breaker.Current;
+//            if (start == Support.BreakIterator.DONE)
+//                return false; // BreakIterator exhausted
+
+//            // find the next set of boundaries, skipping over non-tokens 
(rule status 0)
+//            int end = breaker.Next();
+//            while (start != Support.BreakIterator.DONE && 
breaker.GetRuleStatus() == 0)
+//            {
+//                start = end;
+//                end = breaker.Next();
+//            }
+
+//            if (start == Support.BreakIterator.DONE)
+//                return false; // BreakIterator exhausted
+
+//            termAtt.CopyBuffer(buffer, start, end - start);
+//            offsetAtt.SetOffset(CorrectOffset(offset + start), 
CorrectOffset(offset + end));
+//            typeAtt.Type = config.GetType(breaker.GetScriptCode(), 
breaker.GetRuleStatus());
+//            scriptAtt.Code = breaker.GetScriptCode();
+
+//            return true;
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs
new file mode 100644
index 0000000..0c13316
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerConfig.cs
@@ -0,0 +1,33 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Support;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// Class that allows for tailored Unicode Text Segmentation on
+//    /// a per-writing system basis.
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    public abstract class ICUTokenizerConfig
+//    {
+//        /// <summary>
+//        /// Sole constructor. (For invocation by subclass 
+//        /// constructors, typically implicit.)
+//        /// </summary>
+//        public ICUTokenizerConfig() { }
+//        /// <summary>
+//        /// Return a breakiterator capable of processing a given script.
+//        /// </summary>
+//        public abstract Icu.BreakIterator GetBreakIterator(int script);
+//        /// <summary>
+//        /// Return a token type value for a given script and BreakIterator 
rule status.
+//        /// </summary>
+//        public abstract string GetType(int script, int ruleStatus);
+//        /// <summary>
+//        /// true if Han, Hiragana, and Katakana scripts should all be 
returned as Japanese
+//        /// </summary>
+//        public abstract bool CombineCJ { get; }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerFactory.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerFactory.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerFactory.cs
new file mode 100644
index 0000000..14aa9c0
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ICUTokenizerFactory.cs
@@ -0,0 +1,139 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Icu;
+//using Lucene.Net.Analysis.Util;
+//using Lucene.Net.Support;
+//using Lucene.Net.Util;
+//using System;
+//using System.Collections.Generic;
+//using System.Diagnostics;
+//using System.IO;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+
+//    public class ICUTokenizerFactory : TokenizerFactory, IResourceLoaderAware
+//    {
+//        internal static readonly string RULEFILES = "rulefiles";
+//        private readonly IDictionary<int, string> tailored;
+//        private ICUTokenizerConfig config;
+//        private readonly bool cjkAsWords;
+
+//        /// <summary>Creates a new ICUTokenizerFactory</summary>
+//        public ICUTokenizerFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            tailored = new Dictionary<int, string>();
+//            string rulefilesArg = Get(args, RULEFILES);
+//            if (rulefilesArg != null)
+//            {
+//                IList<string> scriptAndResourcePaths = 
SplitFileNames(rulefilesArg);
+//                foreach (string scriptAndResourcePath in 
scriptAndResourcePaths)
+//                {
+//                    int colonPos = scriptAndResourcePath.IndexOf(":");
+//                    string scriptCode = scriptAndResourcePath.Substring(0, 
colonPos - 0).Trim();
+//                    string resourcePath = 
scriptAndResourcePath.Substring(colonPos + 1).Trim();
+//                    
tailored[UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode)] = 
resourcePath;
+//                }
+//            }
+//            cjkAsWords = GetBoolean(args, "cjkAsWords", true);
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//        }
+
+//        public virtual void Inform(IResourceLoader loader)
+//        {
+//            Debug.Assert(tailored != null, "init must be called first!");
+//            if (tailored.Count == 0)
+//            {
+//                config = new DefaultICUTokenizerConfig(cjkAsWords);
+//            }
+//            else
+//            {
+//                config = new 
DefaultICUTokenizerConfigAnonymousHelper(cjkAsWords, tailored, loader);
+
+//                //BreakIterator[] breakers = new 
BreakIterator[UScript.CODE_LIMIT];
+//                //foreach (var entry in tailored)
+//                //{
+//                //    int code = entry.Key;
+//                //    string resourcePath = entry.Value;
+//                //    breakers[code] = ParseRules(resourcePath, loader);
+//                //}
+//                //            config = new 
DefaultICUTokenizerConfig(cjkAsWords)
+//                //            {
+
+//                //    public override BreakIterator GetBreakIterator(int 
script)
+//                //    {
+//                //        if (breakers[script] != null)
+//                //        {
+//                //            return (BreakIterator)breakers[script].clone();
+//                //        }
+//                //        else
+//                //        {
+//                //            return base.GetBreakIterator(script);
+//                //        }
+//                //    }
+//                //    // TODO: we could also allow codes->types mapping
+//                //};
+//            }
+//        }
+
+//        private class DefaultICUTokenizerConfigAnonymousHelper : 
DefaultICUTokenizerConfig
+//        {
+//            private readonly Icu.BreakIterator[] breakers;
+//            public DefaultICUTokenizerConfigAnonymousHelper(bool cjkAsWords, 
IDictionary<int, string> tailored, IResourceLoader loader)
+//                : base(cjkAsWords)
+//            {
+//                breakers = new Icu.BreakIterator[UScript.CODE_LIMIT];
+//                foreach (var entry in tailored)
+//                {
+//                    int code = entry.Key;
+//                    string resourcePath = entry.Value;
+//                    breakers[code] = ParseRules(resourcePath, loader);
+//                }
+//            }
+
+//            public override Icu.BreakIterator GetBreakIterator(int script)
+//            {
+//                if (breakers[script] != null)
+//                {
+//                    return (Icu.BreakIterator)breakers[script].Clone();
+//                }
+//                else
+//                {
+//                    return base.GetBreakIterator(script);
+//                }
+//            }
+
+//            private Icu.BreakIterator ParseRules(string filename, 
IResourceLoader loader)
+//            {
+//                StringBuilder rules = new StringBuilder();
+//                Stream rulesStream = loader.OpenResource(filename);
+//                using (TextReader reader = 
IOUtils.GetDecodingReader(rulesStream, Encoding.UTF8))
+//                {
+//                    string line = null;
+//                    while ((line = reader.ReadLine()) != null)
+//                    {
+//                        if (!line.StartsWith("#", StringComparison.Ordinal))
+//                        {
+//                            rules.Append(line);
+//                        }
+//                        rules.Append('\n');
+//                    }
+//                }
+//                return new RuleBasedBreakIterator(rules.ToString());
+//            }
+//        }
+
+//        public override Tokenizer Create(AttributeSource.AttributeFactory 
factory, TextReader input)
+//        {
+//            Debug.Assert(config != null, "inform must be called first!");
+//            return new ICUTokenizer(factory, input, config);
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ScriptIterator.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ScriptIterator.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ScriptIterator.cs
new file mode 100644
index 0000000..f328851
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/Segmentation/ScriptIterator.cs
@@ -0,0 +1,206 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Text.RegularExpressions;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.Segmentation
+//{
+//    /// <summary>
+//    /// An iterator that locates ISO 15924 script boundaries in text. 
+//    /// </summary>
+//    /// <remarks>
+//    /// This is not the same as simply looking at the Unicode block, or even 
the 
+//    /// Script property. Some characters are 'common' across multiple 
scripts, and
+//    /// some 'inherit' the script value of text surrounding them.
+//    /// <para/>
+//    /// This is similar to ICU (internal-only) UScriptRun, with the following
+//    /// differences:
+//    /// <list type="bullet">
+//    ///     <item><description>
+//    ///         Doesn't attempt to match paired punctuation. For 
tokenization purposes, this
+//    ///         is not necessary. Its also quite expensive. 
+//    ///     </description></item>
+//    ///     <item><description>
+//    ///         Non-spacing marks inherit the script of their base 
character, following 
+//    ///         recommendations from UTR #24.
+//    ///     </description></item>
+//    /// </list>
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </remarks>
+//    internal sealed class ScriptIterator
+//    {
+//        private char[] text;
+//        private int start;
+//        private int limit;
+//        private int index;
+
+//        private int scriptStart;
+//        private int scriptLimit;
+//        private int scriptCode;
+
+//        private readonly bool combineCJ;
+
+//        /**
+//         * @param combineCJ if true: Han,Hiragana,Katakana will all return 
as {@link UScript#JAPANESE}
+//         */
+//        internal ScriptIterator(bool combineCJ)
+//        {
+//            this.combineCJ = combineCJ;
+//        }
+
+//        /**
+//         * Get the start of this script run
+//         * 
+//         * @return start position of script run
+//         */
+//        public int ScriptStart
+//        {
+//            get { return scriptStart; }
+//        }
+
+//        /**
+//         * Get the index of the first character after the end of this script 
run
+//         * 
+//         * @return position of the first character after this script run
+//         */
+//        public int ScriptLimit
+//        {
+//            get { return scriptLimit; }
+//        }
+
+//        /**
+//         * Get the UScript script code for this script run
+//         * 
+//         * @return code for the script of the current run
+//         */
+//        public int ScriptCode
+//        {
+//            get { return scriptCode; }
+//        }
+
+//        /**
+//         * Iterates to the next script run, returning true if one exists.
+//         * 
+//         * @return true if there is another script run, false otherwise.
+//         */
+//        public bool Next()
+//        {
+//            if (scriptLimit >= limit)
+//                return false;
+
+//            scriptCode = UScript.COMMON;
+//            scriptStart = scriptLimit;
+
+//            while (index < limit)
+//            {
+//                //int ch = UTF16.charAt(text, start, limit, index - start);
+//                int ch = Encoding.Unicode.(text, start, limit);
+//                int sc = GetScript(ch);
+
+//                /*
+//                 * From UTR #24: Implementations that determine the 
boundaries between
+//                 * characters of given scripts should never break between a 
non-spacing
+//                 * mark and its base character. Thus for boundary 
determinations and
+//                 * similar sorts of processing, a non-spacing mark â 
whatever its script
+//                 * value â should inherit the script value of its base 
character.
+//                 */
+//                if (isSameScript(scriptCode, sc)
+//                    || UCharacter.getType(ch) == 
ECharacterCategory.NON_SPACING_MARK)
+//                {
+//                    //index += UTF16.getCharCount(ch);
+//                    index += Encoding.Unicode.GetCharCount()
+
+//                    /*
+//                     * Inherited or Common becomes the script code of the 
surrounding text.
+//                     */
+//                    if (scriptCode <= UScript.INHERITED && sc > 
UScript.INHERITED)
+//                    {
+//                        scriptCode = sc;
+//                    }
+
+//                }
+//                else
+//                {
+//                    break;
+//                }
+//            }
+
+//            scriptLimit = index;
+//            return true;
+//        }
+
+//        /** Determine if two scripts are compatible. */
+//        private static bool IsSameScript(int scriptOne, int scriptTwo)
+//        {
+//            return scriptOne <= UScript.INHERITED || scriptTwo <= 
UScript.INHERITED
+//                || scriptOne == scriptTwo;
+//        }
+
+//        /**
+//         * Set a new region of text to be examined by this iterator
+//         * 
+//         * @param text text buffer to examine
+//         * @param start offset into buffer
+//         * @param length maximum length to examine
+//         */
+//        public void SetText(char[] text, int start, int length)
+//        {
+//            this.text = text;
+//            this.start = start;
+//            this.index = start;
+//            this.limit = start + length;
+//            this.scriptStart = start;
+//            this.scriptLimit = start;
+//            this.scriptCode = UScript.INVALID_CODE;
+//        }
+
+//        /** linear fast-path for basic latin case */
+//        private static readonly int[] basicLatin = new int[128];
+
+//        static ScriptIterator()
+//        {
+//            for (int i = 0; i < basicLatin.Length; i++)
+//                basicLatin[i] = UScript.GetScript(i);
+//        }
+
+//        /** fast version of UScript.getScript(). Basic Latin is an array 
lookup */
+//        private int GetScript(int codepoint)
+//        {
+//            if (0 <= codepoint && codepoint < basicLatin.Length)
+//            {
+//                return basicLatin[codepoint];
+//            }
+//            else
+//            {
+//                //int script = UScript.GetScript(codepoint);
+//                if (combineCJ)
+//                {
+//                    if (Regex.IsMatch(new 
string(Support.Character.ToChars(codepoint)), 
@"\p{IsHangulCompatibilityJamo}+|\p{IsHiragana}+|\p{IsKatakana}+"))
+//                    //if (script == UScript.HAN || script == 
UScript.HIRAGANA || script == UScript.KATAKANA)
+//                    {
+//                        return UScript.JAPANESE;
+//                    }
+//                    else if (codepoint >= 0xFF10 && codepoint <= 0xFF19)
+//                    {
+//                        // when using CJK dictionary breaking, don't let 
full width numbers go to it, otherwise
+//                        // they are treated as punctuation. we currently 
have no cleaner way to fix this!
+//                        return UScript.LATIN;
+//                    }
+//                    else
+//                    {
+//                        return script;
+//                    }
+//                }
+//                else
+//                {
+//                    return script;
+//                }
+//            }
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttribute.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttribute.cs 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttribute.cs
new file mode 100644
index 0000000..abc1ae2
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttribute.cs
@@ -0,0 +1,42 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Util;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.TokenAttributes
+//{
+//    /// <summary>
+//    /// This attribute stores the UTR #24 script value for a token of text.
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    public interface IScriptAttribute : IAttribute
+//    {
+//        /**
+//   * Get the numeric code for this script value.
+//   * This is the constant value from {@link UScript}.
+//   * @return numeric code
+//   */
+//        int Code { get; set; }
+//        ///**
+//        // * Set the numeric code for this script value.
+//        // * This is the constant value from {@link UScript}.
+//        // * @param code numeric code
+//        // */
+//        //public void setCode(int code);
+//        /**
+//         * Get the full name.
+//         * @return UTR #24 full name.
+//         */
+//        string GetName();
+//        /**
+//         * Get the abbreviated name.
+//         * @return UTR #24 abbreviated name.
+//         */
+//        string GetShortName();
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttributeImpl.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttributeImpl.cs
 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttributeImpl.cs
new file mode 100644
index 0000000..f97ccf1
--- /dev/null
+++ 
b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/TokenAttributes/ScriptAttributeImpl.cs
@@ -0,0 +1,80 @@
+ï»¿// LUCENENET TODO: Port issues - missing dependencies
+
+//using Lucene.Net.Util;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU.TokenAttributes
+//{
+//    /// <summary>
+//    /// Implementation of <see cref="IScriptAttribute"/> that stores the 
script
+//    /// as an integer.
+//    /// <para/>
+//    /// @lucene.experimental
+//    /// </summary>
+//    public class ScriptAttribute : Attribute, IScriptAttribute, 
System.ICloneable
+//    {
+//        private int code = UScript.COMMON;
+
+//        /** Initializes this attribute with <code>UScript.COMMON</code> */
+//        public ScriptAttribute() { }
+
+//        public virtual int Code
+//        {
+//            get { return code; }
+//            set { code = value; }
+//        }
+
+//        public virtual string GetName()
+//        {
+//            return UScript.GetName(code);
+//        }
+
+//        public virtual string GetShortName()
+//        {
+//            return UScript.GetShortName(code);
+//        }
+
+//        public override void Clear()
+//        {
+//            code = UScript.COMMON;
+//        }
+
+//        public override void CopyTo(IAttribute target)
+//        {
+//            ScriptAttribute t = (ScriptAttribute)target;
+//            t.Code = code;
+//        }
+
+//        public override bool Equals(object other)
+//        {
+//            if (this == other)
+//            {
+//                return true;
+//            }
+
+//            if (other is ScriptAttribute)
+//            {
+//                return ((ScriptAttribute)other).code == code;
+//            }
+
+//            return false;
+//        }
+
+//        public override int GetHashCode()
+//        {
+//            return code;
+//        }
+
+//        public override void ReflectWith(IAttributeReflector reflector)
+//        {
+//            // when wordbreaking CJK, we use the 15924 code Japanese 
(Han+Hiragana+Katakana) to 
+//            // mark runs of Chinese/Japanese. our use is correct (as for 
chinese Han is a subset), 
+//            // but this is just to help prevent confusion.
+//            string name = code == UScript.JAPANESE ? "Chinese/Japanese" : 
GetName();
+//            reflector.Reflect<IScriptAttribute>("script", name);
+//        }
+//    }
+//}

[1/3] lucenenet git commit: Lucene.Net.Analysis.ICU: Renamed ICU directory Icu to match namespace conventions

Reply via email to