[lucenenet] 04/07: Lucene.Net.Benchmark: Factored out StreamTokenizer from support in favor of the implementation in J2N

nightowl888 Sun, 15 Dec 2019 13:19:14 -0800

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git


commit 4e93ba502f140e23783a5fc9f368020215f52031
Author: Shad Storhaug <[email protected]>
AuthorDate: Mon Dec 16 01:04:40 2019 +0700

    Lucene.Net.Benchmark: Factored out StreamTokenizer from support in favor of 
the implementation in J2N
---
 .../ByTask/Tasks/AnalyzerFactoryTask.cs            |  32 +-
 .../ByTask/Tasks/NewAnalyzerTask.cs                |  10 +-
 src/Lucene.Net.Benchmark/ByTask/Utils/Algorithm.cs |  30 +-
 .../Lucene.Net.Benchmark.csproj                    |   1 +
 .../Support/IO/TestStreamTokenizer.cs              | 514 --------------
 src/Lucene.Net/Support/IO/StreamTokenizer.cs       | 738 ---------------------
 6 files changed, 37 insertions(+), 1288 deletions(-)

diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs 
b/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs
index b428a52..dc1174a 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs
@@ -1,6 +1,6 @@
-using Lucene.Net.Analysis.Util;
+using J2N.IO;
+using Lucene.Net.Analysis.Util;
 using Lucene.Net.Benchmarks.ByTask.Utils;
-using Lucene.Net.Support.IO;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
@@ -114,14 +114,14 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
             stok.CommentChar('#');
             stok.QuoteChar('"');
             stok.QuoteChar('\'');
-            stok.IsEOLSignificant = false;
+            stok.EndOfLineIsSignificant = false;
             stok.OrdinaryChar('(');
             stok.OrdinaryChar(')');
             stok.OrdinaryChar(':');
             stok.OrdinaryChar(',');
             try
             {
-                while (stok.NextToken() != StreamTokenizer.TT_EOF)
+                while (stok.NextToken() != 
StreamTokenizer.TokenType_EndOfStream)
                 {
                     switch (stok.TokenType)
                     {
@@ -130,7 +130,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                 // Do nothing
                                 break;
                             }
-                        case StreamTokenizer.TT_WORD:
+                        case StreamTokenizer.TokenType_Word:
                             {
                                 if 
(expectedArgType.Equals(ArgType.ANALYZER_ARG))
                                 {
@@ -153,7 +153,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                     string argValue = stok.StringValue;
                                     switch (stok.TokenType)
                                     {
-                                        case StreamTokenizer.TT_NUMBER:
+                                        case StreamTokenizer.TokenType_Number:
                                             {
                                                 argValue = 
stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                                                 // Drop the ".0" from numbers, 
for integer arguments
@@ -190,7 +190,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                             }
                                         case '"':
                                         case '\'':
-                                        case StreamTokenizer.TT_WORD:
+                                        case StreamTokenizer.TokenType_Word:
                                             {
                                                 if (argName.Equals("name", 
StringComparison.OrdinalIgnoreCase))
                                                 {
@@ -220,7 +220,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                                 }
                                                 break;
                                             }
-                                        case StreamTokenizer.TT_EOF:
+                                        case 
StreamTokenizer.TokenType_EndOfStream:
                                             {
                                                 throw new 
Exception("Unexpected EOF: " + stok.ToString());
                                             }
@@ -250,7 +250,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                         {
                                             case '"':
                                             case '\'':
-                                            case StreamTokenizer.TT_WORD:
+                                            case 
StreamTokenizer.TokenType_Word:
                                                 {
                                                     intArgValue = 0;
                                                     try
@@ -274,7 +274,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                                     }
                                                     break;
                                                 }
-                                            case StreamTokenizer.TT_NUMBER:
+                                            case 
StreamTokenizer.TokenType_Number:
                                                 {
                                                     if 
(argName.Equals("positionIncrementGap", StringComparison.OrdinalIgnoreCase))
                                                     {
@@ -286,7 +286,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                                     }
                                                     break;
                                                 }
-                                            case StreamTokenizer.TT_EOF:
+                                            case 
StreamTokenizer.TokenType_EndOfStream:
                                                 {
                                                     throw new 
Exception("Unexpected EOF: " + stok.ToString());
                                                 }
@@ -377,7 +377,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
             bool parenthetical = false;
             try
             {
-                while (stok.NextToken() != StreamTokenizer.TT_EOF)
+                while (stok.NextToken() != 
StreamTokenizer.TokenType_EndOfStream)
                 {
                     switch (stok.TokenType)
                     {
@@ -417,7 +417,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                 }
                                 break;
                             }
-                        case StreamTokenizer.TT_WORD:
+                        case StreamTokenizer.TokenType_Word:
                             {
                                 if (!parenthetical)
                                 {
@@ -434,7 +434,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                 string argValue = stok.StringValue;
                                 switch (stok.TokenType)
                                 {
-                                    case StreamTokenizer.TT_NUMBER:
+                                    case StreamTokenizer.TokenType_Number:
                                         {
                                             argValue = 
stok.NumberValue.ToString(CultureInfo.InvariantCulture);
                                             // Drop the ".0" from numbers, for 
integer arguments
@@ -445,12 +445,12 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                                         }
                                     case '"':
                                     case '\'':
-                                    case StreamTokenizer.TT_WORD:
+                                    case StreamTokenizer.TokenType_Word:
                                         {
                                             argMap[argName] = argValue;
                                             break;
                                         }
-                                    case StreamTokenizer.TT_EOF:
+                                    case StreamTokenizer.TokenType_EndOfStream:
                                         {
                                             throw new Exception("Unexpected 
EOF: " + stok.ToString());
                                         }
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs 
b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs
index 0549872..6395bfd 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs
@@ -1,6 +1,6 @@
-using Lucene.Net.Analysis;
+using J2N.IO;
+using Lucene.Net.Analysis;
 using Lucene.Net.Benchmarks.ByTask.Utils;
-using Lucene.Net.Support.IO;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
@@ -139,11 +139,11 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
             StreamTokenizer stok = new StreamTokenizer(new 
StringReader(@params));
             stok.QuoteChar('"');
             stok.QuoteChar('\'');
-            stok.IsEOLSignificant = false;
+            stok.EndOfLineIsSignificant = false;
             stok.OrdinaryChar(',');
             try
             {
-                while (stok.NextToken() != StreamTokenizer.TT_EOF)
+                while (stok.NextToken() != 
StreamTokenizer.TokenType_EndOfStream)
                 {
                     switch (stok.TokenType)
                     {
@@ -154,7 +154,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                             }
                         case '\'':
                         case '\"':
-                        case StreamTokenizer.TT_WORD:
+                        case StreamTokenizer.TokenType_Word:
                             {
                                 analyzerNames.Add(stok.StringValue);
                                 break;
diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/Algorithm.cs 
b/src/Lucene.Net.Benchmark/ByTask/Utils/Algorithm.cs
index 9aea396..9e311ea 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Utils/Algorithm.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Utils/Algorithm.cs
@@ -1,6 +1,6 @@
-using Lucene.Net.Benchmarks.ByTask.Tasks;
+using J2N.IO;
+using Lucene.Net.Benchmarks.ByTask.Tasks;
 using Lucene.Net.Support;
-using Lucene.Net.Support.IO;
 using System;
 using System.Collections.Generic;
 using System.Globalization;
@@ -61,7 +61,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
             PerfTask prevTask = null;
             StreamTokenizer stok = new StreamTokenizer(new 
StringReader(algTxt));
             stok.CommentChar('#');
-            stok.IsEOLSignificant = false;
+            stok.EndOfLineIsSignificant = false;
             stok.QuoteChar('"');
             stok.QuoteChar('\'');
             stok.OrdinaryChar('/');
@@ -71,12 +71,12 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
             bool isDisableCountNextTask = false; // only for primitive tasks
             currSequence.Depth = 0;
 
-            while (stok.NextToken() != StreamTokenizer.TT_EOF)
+            while (stok.NextToken() != StreamTokenizer.TokenType_EndOfStream)
             {
                 switch (stok.TokenType)
                 {
 
-                    case StreamTokenizer.TT_WORD:
+                    case StreamTokenizer.TokenType_Word:
                         string s = stok.StringValue;
                         PerfTask task = 
(PerfTask)Activator.CreateInstance(TaskClass(config, s), runData);
                         task.AlgLineNum = stok.LineNumber;
@@ -102,7 +102,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                         else
                         {
                             // get params, for tasks that supports them - 
allow recursive parenthetical expressions
-                            stok.IsEOLSignificant = true;  // Allow params 
tokenizer to keep track of line number
+                            stok.EndOfLineIsSignificant = true;  // Allow 
params tokenizer to keep track of line number
                             StringBuilder @params = new StringBuilder();
                             stok.NextToken();
                             if (stok.TokenType != ')')
@@ -112,17 +112,17 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                 {
                                     switch (stok.TokenType)
                                     {
-                                        case StreamTokenizer.TT_NUMBER:
+                                        case StreamTokenizer.TokenType_Number:
                                             {
                                                 
@params.Append(stok.NumberValue.ToString(CultureInfo.InvariantCulture));
                                                 break;
                                             }
-                                        case StreamTokenizer.TT_WORD:
+                                        case StreamTokenizer.TokenType_Word:
                                             {
                                                 
@params.Append(stok.StringValue);
                                                 break;
                                             }
-                                        case StreamTokenizer.TT_EOF:
+                                        case 
StreamTokenizer.TokenType_EndOfStream:
                                             {
                                                 throw new 
Exception("Unexpexted EOF: - " + stok.ToString());
                                             }
@@ -163,7 +163,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                 }
                                 BALANCED_PARENS_BREAK: { }
                             }
-                            stok.IsEOLSignificant = false;
+                            stok.EndOfLineIsSignificant = false;
                             string prm = @params.ToString().Trim();
                             if (prm.Length > 0)
                             {
@@ -192,7 +192,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                 }
                                 else
                                 {
-                                    if (stok.TokenType != 
StreamTokenizer.TT_NUMBER)
+                                    if (stok.TokenType != 
StreamTokenizer.TokenType_Number)
                                     {
                                         throw new Exception("expected 
repetitions number or XXXs: - " + stok.ToString());
                                     }
@@ -200,7 +200,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                     {
                                         double num = stok.NumberValue;
                                         stok.NextToken();
-                                        if (stok.TokenType == 
StreamTokenizer.TT_WORD && stok.StringValue.Equals("s", 
StringComparison.Ordinal))
+                                        if (stok.TokenType == 
StreamTokenizer.TokenType_Word && stok.StringValue.Equals("s", 
StringComparison.Ordinal))
                                         {
                                             
((TaskSequence)prevTask).SetRunTime(num);
                                         }
@@ -221,7 +221,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                 {
                                     // get rate number
                                     stok.NextToken();
-                                    if (stok.TokenType != 
StreamTokenizer.TT_NUMBER) throw new Exception("expected rate number: - " + 
stok.ToString());
+                                    if (stok.TokenType != 
StreamTokenizer.TokenType_Number) throw new Exception("expected rate number: - 
" + stok.ToString());
                                     // check for unit - min or sec, sec is 
default
                                     stok.NextToken();
                                     if (stok.TokenType != '/')
@@ -232,7 +232,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                     else
                                     {
                                         stok.NextToken();
-                                        if (stok.TokenType != 
StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 
'sec' - " + stok.ToString());
+                                        if (stok.TokenType != 
StreamTokenizer.TokenType_Word) throw new Exception("expected rate unit: 'min' 
or 'sec' - " + stok.ToString());
                                         string unit = 
stok.StringValue.ToLowerInvariant();
                                         if ("min".Equals(unit, 
StringComparison.Ordinal))
                                         {
@@ -283,7 +283,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
                                 }
                                 stok.NextToken();
                                 int deltaPri;
-                                if (stok.TokenType != 
StreamTokenizer.TT_NUMBER)
+                                if (stok.TokenType != 
StreamTokenizer.TokenType_Number)
                                 {
                                     stok.PushBack();
                                     deltaPri = 0;
diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj 
b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
index b5b68b5..44336b3 100644
--- a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
+++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
@@ -51,6 +51,7 @@
   </ItemGroup>
 
   <ItemGroup>
+    <PackageReference Include="J2N" Version="$(J2NPackageVersion)" />
     <PackageReference Include="ICU4N.Collation" 
Version="$(ICU4NCollationPackageVersion)" />
     <PackageReference Include="Spatial4n.Core" 
Version="$(Spatial4nCorePackageVersion)" />
   </ItemGroup>
diff --git a/src/Lucene.Net.Tests/Support/IO/TestStreamTokenizer.cs 
b/src/Lucene.Net.Tests/Support/IO/TestStreamTokenizer.cs
deleted file mode 100644
index a3e1454..0000000
--- a/src/Lucene.Net.Tests/Support/IO/TestStreamTokenizer.cs
+++ /dev/null
@@ -1,514 +0,0 @@
-// This class was sourced from the Apache Harmony project
-// https://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/
-
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System;
-using System.IO;
-using System.Text;
-
-namespace Lucene.Net.Support.IO
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     *
-     *     http://www.apache.org/licenses/LICENSE-2.0
-     *
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    [TestFixture]
-    public class TestStreamTokenizer : LuceneTestCase
-    {
-        StringReader r;
-
-        StreamTokenizer st;
-
-        String testString;
-
-        /**
-         * @tests java.io.StreamTokenizer#StreamTokenizer(java.io.InputStream)
-         */
-        [Test]
-        public void Test_ConstructorLSystem_IO_InputStream()
-        {
-#pragma warning disable 612, 618
-            st = new StreamTokenizer(new MemoryStream(
-#pragma warning restore 612, 618
-                Encoding.UTF8.GetBytes("/comments\n d 8 'h'")));
-
-
-            assertEquals("the next token returned should be the letter d",
-                     StreamTokenizer.TT_WORD, st.NextToken());
-
-            assertEquals("the next token returned should be the letter d",
-                     "d", st.StringValue);
-
-
-            assertEquals("the next token returned should be the digit 8",
-                     StreamTokenizer.TT_NUMBER, st.NextToken());
-
-            assertEquals("the next token returned should be the digit 8",
-                     8.0, st.NumberValue);
-
-
-            assertEquals("the next token returned should be the quote 
character",
-                     39, st.NextToken());
-
-            assertEquals("the next token returned should be the quote 
character",
-                     "h", st.StringValue);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)
-         */
-        [Test]
-        public void Test_ConstructorLSystem_IO_TextReader()
-        {
-            setTest("/testing\n d 8 'h' ");
-            assertEquals("the next token returned should be the letter d 
skipping the comments",
-                     StreamTokenizer.TT_WORD, st.NextToken());
-            assertEquals("the next token returned should be the letter d",
-                     "d", st.StringValue);
-
-            assertEquals("the next token returned should be the digit 8",
-                     StreamTokenizer.TT_NUMBER, st.NextToken());
-            assertEquals("the next token returned should be the digit 8",
-                     8.0, st.NumberValue);
-
-            assertEquals("the next token returned should be the quote 
character",
-                     39, st.NextToken());
-            assertEquals("the next token returned should be the quote 
character",
-                     "h", st.StringValue);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#commentChar(int)
-         */
-        [Test]
-        public void Test_commentCharI()
-        {
-            setTest("*comment \n / 8 'h' ");
-            st.OrdinaryChar('/');
-            st.CommentChar('*');
-            assertEquals("nextToken() did not return the character / skiping 
the comments starting with *",
-                     47, st.NextToken());
-            assertTrue("the next token returned should be the digit 8", st
-                   .NextToken() == StreamTokenizer.TT_NUMBER
-                   && st.NumberValue == 8.0);
-            assertTrue("the next token returned should be the quote character",
-                   st.NextToken() == 39 && st.StringValue.Equals("h", 
StringComparison.Ordinal));
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#eolIsSignificant(boolean)
-         */
-        [Test]
-        public void Test_eolIsSignificantZ()
-        {
-            setTest("d 8\n");
-            // by default end of line characters are not significant
-            assertTrue("nextToken did not return d",
-                   st.NextToken() == StreamTokenizer.TT_WORD
-                   && st.StringValue.Equals("d", StringComparison.Ordinal));
-            assertTrue("nextToken did not return 8",
-                   st.NextToken() == StreamTokenizer.TT_NUMBER
-                   && st.NumberValue == 8.0);
-            assertTrue("nextToken should be the end of file",
-                   st.NextToken() == StreamTokenizer.TT_EOF);
-            setTest("d\n");
-            st.IsEOLSignificant = (true);
-            // end of line characters are significant
-            assertTrue("nextToken did not return d",
-                   st.NextToken() == StreamTokenizer.TT_WORD
-                   && st.StringValue.Equals("d", StringComparison.Ordinal));
-            assertTrue("nextToken is the end of line",
-                   st.NextToken() == StreamTokenizer.TT_EOL);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#lineno()
-         */
-        [Test]
-        public void Test_lineno()
-        {
-            setTest("d\n 8\n");
-            assertEquals("the lineno should be 1", 1, st.LineNumber);
-            st.NextToken();
-            st.NextToken();
-            assertEquals("the lineno should be 2", 2, st.LineNumber);
-            st.NextToken();
-            assertEquals("the next line no should be 3", 3, st.LineNumber);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#lowerCaseMode(boolean)
-         */
-        [Test]
-        public void Test_lowerCaseModeZ()
-        {
-            // SM.
-            setTest("HELLOWORLD");
-            st.LowerCaseMode = (true);
-
-            st.NextToken();
-            assertEquals("sval not converted to lowercase.", "helloworld", 
st.StringValue
-                     );
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#nextToken()
-         */
-        [Test]
-        public void Test_nextToken()
-        {
-            // SM.
-            // LUCENENET NOTE: The original test had \257 (which is octal)
-            // that is not supported in a .NET string, so we convert to 
decimal 175 here.
-            // This also changes the semantics of the test, because for 
whatever
-            // reason in Java it was expecting the octal number to register as 
a TT_WORD.
-            // So, we changed to expect a TT_NUMBER as a result of the above 
change.
-            // Also, we don't need to escape single quotes in .NET.
-            setTest("\r\n/* fje fje 43.4 f \r\n f g */  456.459 \r\n"
-                    + "Hello  /        \r\n \r\n \n \r 175 Hi 'Hello World'");
-            st.OrdinaryChar('/');
-            st.SlashStarComments = true;
-            st.NextToken();
-            assertTrue("Wrong Token type1: " + (char)st.TokenType,
-                   st.TokenType == StreamTokenizer.TT_NUMBER);
-            st.NextToken();
-            assertTrue("Wrong Token type2: " + st.TokenType,
-                   st.TokenType == StreamTokenizer.TT_WORD);
-            st.NextToken();
-            assertTrue("Wrong Token type3: " + st.TokenType, st.TokenType == 
'/');
-            st.NextToken();
-            assertTrue("Wrong Token type4: " + st.TokenType,
-                   st.TokenType == StreamTokenizer.TT_NUMBER);
-            st.NextToken();
-            assertTrue("Wrong Token type5: " + st.TokenType,
-                   st.TokenType == StreamTokenizer.TT_WORD);
-            st.NextToken();
-            assertTrue("Wrong Token type6: " + st.TokenType, st.TokenType == 
'\'');
-            assertTrue("Wrong Token type7: " + st.TokenType, st.StringValue
-                   .Equals("Hello World", StringComparison.Ordinal));
-            st.NextToken();
-            assertTrue("Wrong Token type8: " + st.TokenType, st.TokenType == 
-1);
-
-            using (var pin = new 
MemoryStream(Encoding.UTF8.GetBytes("hello\n\r\r")))
-            {
-#pragma warning disable 612, 618
-                StreamTokenizer s = new StreamTokenizer(pin);
-#pragma warning restore 612, 618
-                s.IsEOLSignificant = (true);
-
-                assertTrue("Wrong token 1,1",
-                       s.NextToken() == StreamTokenizer.TT_WORD
-                       && s.StringValue.Equals("hello", 
StringComparison.Ordinal));
-
-                assertTrue("Wrong token 1,2", s.NextToken() == '\n');
-
-                assertTrue("Wrong token 1,3", s.NextToken() == '\n');
-
-                assertTrue("Wrong token 1,4", s.NextToken() == '\n');
-
-
-                assertTrue("Wrong token 1,5",
-                       s.NextToken() == StreamTokenizer.TT_EOF);
-            }
-            StreamTokenizer tokenizer = new StreamTokenizer(
-                                    new StringReader("\n \r\n#"));
-            tokenizer.OrdinaryChar('\n'); // make \n ordinary
-            tokenizer.IsEOLSignificant = (true);
-
-            assertTrue("Wrong token 2,1", tokenizer.NextToken() == '\n');
-
-            assertTrue("Wrong token 2,2", tokenizer.NextToken() == '\n');
-
-            assertEquals("Wrong token 2,3", '#', tokenizer.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#ordinaryChar(int)
-         */
-        [Test]
-        public void Test_ordinaryCharI()
-        {
-            // SM.
-            setTest("Ffjein 893");
-            st.OrdinaryChar('F');
-            st.NextToken();
-            assertTrue("OrdinaryChar failed." + (char)st.TokenType,
-                       st.TokenType == 'F');
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#ordinaryChars(int, int)
-         */
-        [Test]
-        public void Test_ordinaryCharsII()
-        {
-            // SM.
-            setTest("azbc iof z 893");
-            st.OrdinaryChars('a', 'z');
-            assertEquals("OrdinaryChars failed.", 'a', st.NextToken());
-            assertEquals("OrdinaryChars failed.", 'z', st.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#parseNumbers()
-         */
-        [Test]
-        public void Test_parseNumbers()
-        {
-            // SM
-            setTest("9.9 678");
-            assertTrue("Base behavior failed.",
-                       st.NextToken() == StreamTokenizer.TT_NUMBER);
-            st.OrdinaryChars('0', '9');
-            assertEquals("setOrdinary failed.", '6', st.NextToken());
-            st.ParseNumbers();
-            assertTrue("parseNumbers failed.",
-                       st.NextToken() == StreamTokenizer.TT_NUMBER);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#pushBack()
-         */
-        [Test]
-        public void Test_pushBack()
-        {
-            // SM.
-            setTest("Hello 897");
-            st.NextToken();
-            st.PushBack();
-            assertTrue("PushBack failed.",
-                       st.NextToken() == StreamTokenizer.TT_WORD);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#quoteChar(int)
-         */
-        [Test]
-        public void Test_quoteCharI()
-        {
-            // SM
-            setTest("<Hello World<    HelloWorldH");
-            st.QuoteChar('<');
-            assertEquals("QuoteChar failed.", '<', st.NextToken());
-            assertEquals("QuoteChar failed.", "Hello World", st.StringValue);
-            st.QuoteChar('H');
-            st.NextToken();
-            assertEquals("QuoteChar failed for word.", "elloWorld", 
st.StringValue
-                         );
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#resetSyntax()
-         */
-        [Test]
-        public void Test_resetSyntax()
-        {
-            // SM
-            setTest("H 9\' ello World");
-            st.ResetSyntax();
-            assertTrue("resetSyntax failed1." + (char)st.TokenType,
-                       st.NextToken() == 'H');
-            assertTrue("resetSyntax failed1." + (char)st.TokenType,
-                       st.NextToken() == ' ');
-            assertTrue("resetSyntax failed2." + (char)st.TokenType,
-                       st.NextToken() == '9');
-            assertTrue("resetSyntax failed3." + (char)st.TokenType,
-                       st.NextToken() == '\'');
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashSlashComments(boolean)
-         */
-        [Test]
-        public void Test_slashSlashCommentsZ()
-        {
-            // SM.
-            setTest("// foo \r\n /fiji \r\n -456");
-            st.OrdinaryChar('/');
-            st.SlashSlashComments = (true);
-            assertEquals("Test failed.", '/', st.NextToken());
-            assertTrue("Test failed.",
-                       st.NextToken() == StreamTokenizer.TT_WORD);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashSlashComments(boolean)
-         */
-        [Test]
-        public void Test_slashSlashComments_withSSOpen()
-        {
-            TextReader reader = new StringReader("t // t t t");
-
-            StreamTokenizer st = new StreamTokenizer(reader);
-            st.SlashSlashComments = (true);
-
-            assertEquals(StreamTokenizer.TT_WORD, st.NextToken());
-            assertEquals(StreamTokenizer.TT_EOF, st.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashSlashComments(boolean)
-         */
-        [Test]
-        public void Test_slashSlashComments_withSSOpen_NoComment()
-        {
-            TextReader reader = new StringReader("// t");
-
-            StreamTokenizer st = new StreamTokenizer(reader);
-            st.SlashSlashComments = (true);
-            st.OrdinaryChar('/');
-
-            assertEquals(StreamTokenizer.TT_EOF, st.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashSlashComments(boolean)
-         */
-        [Test]
-        public void Test_slashSlashComments_withSSClosed()
-        {
-            TextReader reader = new StringReader("// t");
-
-            StreamTokenizer st = new StreamTokenizer(reader);
-            st.SlashSlashComments = (false);
-            st.OrdinaryChar('/');
-
-            assertEquals('/', st.NextToken());
-            assertEquals('/', st.NextToken());
-            assertEquals(StreamTokenizer.TT_WORD, st.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashStarComments(boolean)
-         */
-        [Test]
-        public void Test_slashStarCommentsZ()
-        {
-            setTest("/* foo \r\n /fiji \r\n*/ -456");
-            st.OrdinaryChar('/');
-            st.SlashStarComments = (true);
-            assertTrue("Test failed.",
-                       st.NextToken() == StreamTokenizer.TT_NUMBER);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashStarComments(boolean)
-         */
-        [Test]
-        public void Test_slashStarComments_withSTOpen()
-        {
-            TextReader reader = new StringReader("t /* t */ t");
-
-            StreamTokenizer st = new StreamTokenizer(reader);
-            st.SlashStarComments = (true);
-
-            assertEquals(StreamTokenizer.TT_WORD, st.NextToken());
-            assertEquals(StreamTokenizer.TT_WORD, st.NextToken());
-            assertEquals(StreamTokenizer.TT_EOF, st.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#slashStarComments(boolean)
-         */
-        [Test]
-        public void Test_slashStarComments_withSTClosed()
-        {
-            TextReader reader = new StringReader("t /* t */ t");
-
-            StreamTokenizer st = new StreamTokenizer(reader);
-            st.SlashStarComments = (false);
-
-            assertEquals(StreamTokenizer.TT_WORD, st.NextToken());
-            assertEquals(StreamTokenizer.TT_EOF, st.NextToken());
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#toString()
-         */
-        [Test]
-        public void Test_toString()
-        {
-            setTest("ABC Hello World");
-            st.NextToken();
-            assertTrue("toString failed." + st.toString(),
-                       st.toString().Equals(
-                                "Token[ABC], line 1", 
StringComparison.Ordinal));
-
-            // Regression test for HARMONY-4070
-            byte[] data = new byte[] { (byte)'-' };
-#pragma warning disable 612, 618
-            StreamTokenizer tokenizer = new StreamTokenizer(
-                    new MemoryStream(data));
-#pragma warning restore 612, 618
-            tokenizer.NextToken();
-            String result = tokenizer.toString();
-            assertEquals("Token['-'], line 1", result);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#whitespaceChars(int, int)
-         */
-        [Test]
-        public void Test_whitespaceCharsII()
-        {
-            setTest("azbc iof z 893");
-            st.WhitespaceChars('a', 'z');
-            assertTrue("OrdinaryChar failed.",
-                       st.NextToken() == StreamTokenizer.TT_NUMBER);
-        }
-
-        /**
-         * @tests java.io.StreamTokenizer#wordChars(int, int)
-         */
-        [Test]
-        public void Test_wordCharsII()
-        {
-            setTest("A893 -9B87");
-            st.WordChars('0', '9');
-            assertTrue("WordChar failed1.",
-                       st.NextToken() == StreamTokenizer.TT_WORD);
-            assertEquals("WordChar failed2.", "A893", st.StringValue);
-            assertTrue("WordChar failed3.",
-                       st.NextToken() == StreamTokenizer.TT_NUMBER);
-            st.NextToken();
-            assertEquals("WordChar failed4.", "B87", st.StringValue);
-
-            setTest("    Hello World");
-            st.WordChars(' ', ' ');
-            st.NextToken();
-            assertEquals("WordChars failed for whitespace.", "Hello World", 
st.StringValue
-                         );
-
-            setTest("    Hello World\r\n  \'Hello World\' Hello\' World");
-            st.WordChars(' ', ' ');
-            st.WordChars('\'', '\'');
-            st.NextToken();
-            assertTrue("WordChars failed for whitespace: " + st.StringValue, 
st.StringValue
-                       .Equals("Hello World", StringComparison.Ordinal));
-            st.NextToken();
-            assertTrue("WordChars failed for quote1: " + st.StringValue, 
st.StringValue
-                       .Equals("\'Hello World\' Hello\' World", 
StringComparison.Ordinal));
-        }
-
-        private void setTest(string s)
-        {
-            testString = s;
-            r = new StringReader(testString);
-            st = new StreamTokenizer(r);
-        }
-    }
-}
diff --git a/src/Lucene.Net/Support/IO/StreamTokenizer.cs 
b/src/Lucene.Net/Support/IO/StreamTokenizer.cs
deleted file mode 100644
index 1ba9033..0000000
--- a/src/Lucene.Net/Support/IO/StreamTokenizer.cs
+++ /dev/null
@@ -1,738 +0,0 @@
-// This class was sourced from the Apache Harmony project
-// https://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/
-
-using System;
-using System.Globalization;
-using System.IO;
-using System.Text;
-
-namespace Lucene.Net.Support.IO
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     *
-     *     http://www.apache.org/licenses/LICENSE-2.0
-     *
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    /// <summary>
-    /// Parses a stream into a set of defined tokens, one at a time. The 
different
-    /// types of tokens that can be found are numbers, identifiers, quoted 
strings,
-    /// and different comment styles. The class can be used for limited 
processing
-    /// of source code of programming languages like Java, although it is 
nowhere
-    /// near a full parser.
-    /// </summary>
-    public class StreamTokenizer
-    {
-        /// <summary>
-        /// Contains a number if the current token is a number 
-        /// (<see cref="TokenType"/> == <see cref="TT_NUMBER"/>).
-        /// </summary>
-        public double NumberValue { get; set; }
-
-        /// <summary>
-        /// Contains a string if the current token is a word 
-        /// (<see cref="TokenType"/> == <see cref="TT_WORD"/>).
-        /// </summary>
-        public string StringValue { get; set; }
-
-        /// <summary>
-        /// The constant representing the end of the stream.
-        /// </summary>
-        public const int TT_EOF = -1;
-
-        /// <summary>
-        /// The constant representing the end of the line.
-        /// </summary>
-        public const int TT_EOL = '\n';
-
-        /// <summary>
-        /// The constant representing a number token.
-        /// </summary>
-        public const int TT_NUMBER = -2;
-
-        /// <summary>
-        /// The constant representing a word token.
-        /// </summary>
-        public const int TT_WORD = -3;
-
-        /// <summary>
-        /// Internal representation of unknown state.
-        /// </summary>
-        private const int TT_UNKNOWN = -4;
-
-        /// <summary>
-        /// After calling {@code nextToken()}, {@code ttype} contains the type 
of
-        /// token that has been read. When a single character is read, its 
value
-        /// converted to an integer is stored in {@code ttype}. For a quoted 
string,
-        /// the value is the quoted character. Otherwise, its value is one of 
the
-        /// following:
-        /// <list type="bullet">
-        ///     <item><description><see cref="TT_WORD"/> - the token is a 
word.</description></item>
-        ///     <item><description><see cref="TT_NUMBER"/> - the token is a 
number.</description></item>
-        ///     <item><description><see cref="TT_EOL"/> - the end of line has 
been reached. Depends on
-        ///     whether <see cref="IsEOLSignificant"/> is 
<c>true</c>.</description></item>
-        ///     <item><description><see cref="TT_EOF"/> - the end of the 
stream has been reached.</description></item>
-        /// </list>
-        /// </summary>
-        public int TokenType { get; private set; } = TT_UNKNOWN;
-
-        /// <summary>
-        /// Internal character meanings, 0 implies TOKEN_ORDINARY
-        /// </summary>
-        private byte[] tokenTypes = new byte[256];
-
-        private static readonly byte TOKEN_COMMENT = 1;
-        private static readonly byte TOKEN_QUOTE = 2;
-        private static readonly byte TOKEN_WHITE = 4;
-        private static readonly byte TOKEN_WORD = 8;
-        private static readonly byte TOKEN_DIGIT = 16;
-
-        private int lineNumber = 1;
-        private bool forceLowercase;
-        private bool isEOLSignificant;
-        private bool slashStarComments;
-        private bool slashSlashComments;
-        private bool pushBackToken;
-        private bool lastCr;
-
-        /// <summary>One of these will have the stream</summary>
-        private Stream inStream;
-        private TextReader inReader;
-        private int peekChar = -2;
-
-        /// <summary>
-        /// Private constructor to initialize the default values according to 
the
-        /// specification.
-        /// </summary>
-        private StreamTokenizer()
-        {
-            //
-            // Initialize the default state per specification. All byte values 
'A'
-            // through 'Z', 'a' through 'z', and '\u00A0' through '\u00FF' are
-            // considered to be alphabetic.
-            //
-            WordChars('A', 'Z');
-            WordChars('a', 'z');
-            WordChars(160, 255);
-            //
-            // All byte values '\u0000' through '\u0020' are considered to be 
white
-            // space.
-            //
-            WhitespaceChars(0, 32);
-            //
-            // '/' is a comment character. Single quote '\'' and double quote 
'"'
-            // are string quote characters.
-            //
-            CommentChar('/');
-            QuoteChar('"');
-            QuoteChar('\'');
-            //
-            // Numbers are parsed.
-            //
-            ParseNumbers();
-            //
-            // Ends of lines are treated as white space, not as separate 
tokens.
-            // C-style and C++-style comments are not recognized. These are the
-            // defaults and are not needed in constructor.
-            //
-        }
-
-        /// <summary>
-        /// Constructs a new <see cref="StreamTokenizer"/> with <paramref 
name="input"/> as source input
-        /// stream. This constructor is deprecated; instead, the constructor 
that
-        /// takes a <see cref="TextReader"/> as an arugment should be used.
-        /// </summary>
-        /// <param name="input">the source stream from which to parse 
tokens.</param>
-        /// <exception cref="ArgumentNullException">If <paramref 
name="input"/> is <c>null</c>.</exception>
-        [Obsolete("Use StreamTokenizer(TextReader)")]
-        public StreamTokenizer(Stream input)
-            : this() // Calls private constructor
-        {
-            if (input == null)
-            {
-                throw new ArgumentNullException("input");
-            }
-            inStream = input;
-        }
-
-        /// <summary>
-        /// Constructs a new {@code StreamTokenizer} with {@code r} as source 
reader.
-        /// The tokenizer's initial state is as follows:
-        /// <list type="bullet">
-        ///     <item><description>All byte values 'A' through 'Z', 'a' 
through 'z', and '&#92;u00A0' through '&#92;u00FF' are considered to be 
alphabetic.</description></item>
-        ///     <item><description>All byte values '&#92;u0000' through 
'&#92;u0020' are considered to be white space. '/' is a comment 
character.</description></item>
-        ///     <item><description>Single quote '\'' and double quote '"' are 
string quote characters.</description></item>
-        ///     <item><description>Numbers are parsed.</description></item>
-        ///     <item><description>End of lines are considered to be white 
space rather than separate tokens.</description></item>
-        ///     <item><description>C-style and C++-style comments are not 
recognized.</description></item>
-        /// </list>
-        /// </summary>
-        /// <param name="reader">The source text reader from which to parse 
tokens.</param>
-        public StreamTokenizer(TextReader reader)
-            : this() // Calls private constructor
-        {
-            if (reader == null)
-            {
-                throw new ArgumentNullException("reader");
-            }
-            inReader = reader;
-        }
-
-        /// <summary>
-        /// Specifies that the character <paramref name="ch"/> shall be 
treated as a comment
-        /// character.
-        /// </summary>
-        /// <param name="ch">The character to be considered a comment 
character.</param>
-        public virtual void CommentChar(int ch)
-        {
-            if (0 <= ch && ch < tokenTypes.Length)
-            {
-                tokenTypes[ch] = TOKEN_COMMENT;
-            }
-        }
-
-        /// <summary>
-        /// Specifies whether the end of a line is significant and should be 
returned
-        /// as <see cref="TT_EOF"/> in <see cref="TokenType"/> by this 
tokenizer.
-        /// <c>true</c> if EOL is significant, <c>false</c> otherwise.
-        /// </summary>
-        public virtual bool IsEOLSignificant
-        {
-            get { return isEOLSignificant; }
-            set { isEOLSignificant = value; }
-        }
-
-        /// <summary>
-        /// Gets the current line number.
-        /// </summary>
-        public int LineNumber
-        {
-            get { return lineNumber; }
-        }
-
-        /// <summary>
-        /// Specifies whether word tokens should be converted to lower case 
when they
-        /// are stored in <see cref="StringValue"/>. <c>true</c> if <see 
cref="StringValue"/>
-        /// should be converted to lower case, <c>false</c> otherwise.
-        /// </summary>
-        public bool LowerCaseMode
-        {
-            get { return forceLowercase; }
-            set { forceLowercase = value; }
-        }
-
-        /// <summary>
-        /// Parses the next token from this tokenizer's source stream or 
reader. The
-        /// type of the token is stored in the <see cref="TokenType"/> field, 
additional
-        /// information may be stored in the <see cref="NumberValue"/> or <see 
cref="StringValue"/> fields.
-        /// </summary>
-        /// <returns>The value of <see cref="TokenType"/>.</returns>
-        /// <exception cref="IOException">If an I/O error occurs while parsing 
the next token.</exception>
-        public int NextToken()
-        {
-            if (pushBackToken)
-            {
-                pushBackToken = false;
-                if (TokenType != TT_UNKNOWN)
-                {
-                    return TokenType;
-                }
-            }
-            StringValue = null; // Always reset sval to null
-            int currentChar = peekChar == -2 ? Read() : peekChar;
-
-            if (lastCr && currentChar == '\n')
-            {
-                lastCr = false;
-                currentChar = Read();
-            }
-            if (currentChar == -1)
-            {
-                return (TokenType = TT_EOF);
-            }
-
-            byte currentType = currentChar > 255 ? TOKEN_WORD
-                    : tokenTypes[currentChar];
-            while ((currentType & TOKEN_WHITE) != 0)
-            {
-                //
-                // Skip over white space until we hit a new line or a real 
token
-                //
-                if (currentChar == '\r')
-                {
-                    lineNumber++;
-                    if (isEOLSignificant)
-                    {
-                        lastCr = true;
-                        peekChar = -2;
-                        return (TokenType = TT_EOL);
-                    }
-                    if ((currentChar = Read()) == '\n')
-                    {
-                        currentChar = Read();
-                    }
-                }
-                else if (currentChar == '\n')
-                {
-                    lineNumber++;
-                    if (isEOLSignificant)
-                    {
-                        peekChar = -2;
-                        return (TokenType = TT_EOL);
-                    }
-                    currentChar = Read();
-                }
-                else
-                {
-                    // Advance over this white space character and try again.
-                    currentChar = Read();
-                }
-                if (currentChar == -1)
-                {
-                    return (TokenType = TT_EOF);
-                }
-                currentType = currentChar > 255 ? TOKEN_WORD
-                        : tokenTypes[currentChar];
-            }
-
-            //
-            // Check for digits before checking for words since digits can be
-            // contained within words.
-            //
-            if ((currentType & TOKEN_DIGIT) != 0)
-            {
-                StringBuilder digits = new StringBuilder(20);
-                bool haveDecimal = false, checkJustNegative = currentChar == 
'-';
-                while (true)
-                {
-                    if (currentChar == '.')
-                    {
-                        haveDecimal = true;
-                    }
-                    digits.Append((char)currentChar);
-                    currentChar = Read();
-                    if ((currentChar < '0' || currentChar > '9')
-                            && (haveDecimal || currentChar != '.'))
-                    {
-                        break;
-                    }
-                }
-                peekChar = currentChar;
-                if (checkJustNegative && digits.Length == 1)
-                {
-                    // Didn't get any other digits other than '-'
-                    return (TokenType = '-');
-                }
-                try
-                {
-                    NumberValue = double.Parse(digits.ToString(), 
CultureInfo.InvariantCulture);
-                }
-#pragma warning disable 168
-                catch (FormatException e)
-#pragma warning disable 168
-                {
-                    // Unsure what to do, will write test.
-                    NumberValue = 0;
-                }
-                return (TokenType = TT_NUMBER);
-            }
-            // Check for words
-            if ((currentType & TOKEN_WORD) != 0)
-            {
-                StringBuilder word = new StringBuilder(20);
-                while (true)
-                {
-                    word.Append((char)currentChar);
-                    currentChar = Read();
-                    if (currentChar == -1
-                            || (currentChar < 256 && (tokenTypes[currentChar] 
& (TOKEN_WORD | TOKEN_DIGIT)) == 0))
-                    {
-                        break;
-                    }
-                }
-                peekChar = currentChar;
-                StringValue = forceLowercase ? 
word.ToString().ToLowerInvariant() : word
-                       .ToString();
-                return (TokenType = TT_WORD);
-            }
-            // Check for quoted character
-            if (currentType == TOKEN_QUOTE)
-            {
-                int matchQuote = currentChar;
-                StringBuilder quoteString = new StringBuilder();
-                int peekOne = Read();
-                while (peekOne >= 0 && peekOne != matchQuote && peekOne != '\r'
-                        && peekOne != '\n')
-                {
-                    bool readPeek = true;
-                    if (peekOne == '\\')
-                    {
-                        int c1 = Read();
-                        // Check for quoted octal IE: \377
-                        if (c1 <= '7' && c1 >= '0')
-                        {
-                            int digitValue = c1 - '0';
-                            c1 = Read();
-                            if (c1 > '7' || c1 < '0')
-                            {
-                                readPeek = false;
-                            }
-                            else
-                            {
-                                digitValue = digitValue * 8 + (c1 - '0');
-                                c1 = Read();
-                                // limit the digit value to a byte
-                                if (digitValue > 037 || c1 > '7' || c1 < '0')
-                                {
-                                    readPeek = false;
-                                }
-                                else
-                                {
-                                    digitValue = digitValue * 8 + (c1 - '0');
-                                }
-                            }
-                            if (!readPeek)
-                            {
-                                // We've consumed one to many
-                                quoteString.Append((char)digitValue);
-                                peekOne = c1;
-                            }
-                            else
-                            {
-                                peekOne = digitValue;
-                            }
-                        }
-                        else
-                        {
-                            switch (c1)
-                            {
-                                case 'a':
-                                    peekOne = 0x7;
-                                    break;
-                                case 'b':
-                                    peekOne = 0x8;
-                                    break;
-                                case 'f':
-                                    peekOne = 0xc;
-                                    break;
-                                case 'n':
-                                    peekOne = 0xA;
-                                    break;
-                                case 'r':
-                                    peekOne = 0xD;
-                                    break;
-                                case 't':
-                                    peekOne = 0x9;
-                                    break;
-                                case 'v':
-                                    peekOne = 0xB;
-                                    break;
-                                default:
-                                    peekOne = c1;
-                                    break;
-                            }
-                        }
-                    }
-                    if (readPeek)
-                    {
-                        quoteString.Append((char)peekOne);
-                        peekOne = Read();
-                    }
-                }
-                if (peekOne == matchQuote)
-                {
-                    peekOne = Read();
-                }
-                peekChar = peekOne;
-                TokenType = matchQuote;
-                StringValue = quoteString.ToString();
-                return TokenType;
-            }
-            // Do comments, both "//" and "/*stuff*/"
-            if (currentChar == '/' && (slashSlashComments || 
slashStarComments))
-            {
-                if ((currentChar = Read()) == '*' && slashStarComments)
-                {
-                    int peekOne = Read();
-                    while (true)
-                    {
-                        currentChar = peekOne;
-                        peekOne = Read();
-                        if (currentChar == -1)
-                        {
-                            peekChar = -1;
-                            return (TokenType = TT_EOF);
-                        }
-                        if (currentChar == '\r')
-                        {
-                            if (peekOne == '\n')
-                            {
-                                peekOne = Read();
-                            }
-                            lineNumber++;
-                        }
-                        else if (currentChar == '\n')
-                        {
-                            lineNumber++;
-                        }
-                        else if (currentChar == '*' && peekOne == '/')
-                        {
-                            peekChar = Read();
-                            return NextToken();
-                        }
-                    }
-                }
-                else if (currentChar == '/' && slashSlashComments)
-                {
-                    // Skip to EOF or new line then return the next token
-                    while ((currentChar = Read()) >= 0 && currentChar != '\r'
-                            && currentChar != '\n')
-                    {
-                        // Intentionally empty
-                    }
-                    peekChar = currentChar;
-                    return NextToken();
-                }
-                else if (currentType != TOKEN_COMMENT)
-                {
-                    // Was just a slash by itself
-                    peekChar = currentChar;
-                    return (TokenType = '/');
-                }
-            }
-            // Check for comment character
-            if (currentType == TOKEN_COMMENT)
-            {
-                // Skip to EOF or new line then return the next token
-                while ((currentChar = Read()) >= 0 && currentChar != '\r'
-                        && currentChar != '\n')
-                {
-                    // Intentionally empty
-                }
-                peekChar = currentChar;
-                return NextToken();
-            }
-
-            peekChar = Read();
-            return (TokenType = currentChar);
-        }
-
-        /// <summary>
-        /// Specifies that the character <paramref name="ch"/> shall be 
treated as an ordinary
-        /// character by this tokenizer. That is, it has no special meaning as 
a
-        /// comment character, word component, white space, string delimiter or
-        /// number.
-        /// </summary>
-        /// <param name="ch">The character to be considered an ordinary 
character.</param>
-        public void OrdinaryChar(int ch)
-        {
-            if (0 <= ch && ch < tokenTypes.Length)
-            {
-                tokenTypes[ch] = 0;
-            }
-        }
-
-        /// <summary>
-        /// Specifies that the characters in the range from <paramref 
name="low"/> to <paramref name="hi"/>
-        /// shall be treated as an ordinary character by this tokenizer. That 
is,
-        /// they have no special meaning as a comment character, word 
component,
-        /// white space, string delimiter or number.
-        /// </summary>
-        /// <param name="low">The first character in the range of ordinary 
characters.</param>
-        /// <param name="hi">The last character in the range of ordinary 
characters.</param>
-        public void OrdinaryChars(int low, int hi)
-        {
-            if (low < 0)
-            {
-                low = 0;
-            }
-            if (hi > tokenTypes.Length)
-            {
-                hi = tokenTypes.Length - 1;
-            }
-            for (int i = low; i <= hi; i++)
-            {
-                tokenTypes[i] = 0;
-            }
-        }
-
-        /// <summary>
-        /// Specifies that this tokenizer shall parse numbers.
-        /// </summary>
-        public void ParseNumbers()
-        {
-            for (int i = '0'; i <= '9'; i++)
-            {
-                tokenTypes[i] |= TOKEN_DIGIT;
-            }
-            tokenTypes['.'] |= TOKEN_DIGIT;
-            tokenTypes['-'] |= TOKEN_DIGIT;
-        }
-
-        /// <summary>
-        /// Indicates that the current token should be pushed back and 
returned again
-        /// the next time <see cref="NextToken()"/> is called.
-        /// </summary>
-        public void PushBack()
-        {
-            pushBackToken = true;
-        }
-
-        /// <summary>
-        /// Specifies that the character <paramref name="ch"/> shall be 
treated as a quote
-        /// character.
-        /// </summary>
-        /// <param name="ch">The character to be considered a quote 
character.</param>
-        public void QuoteChar(int ch)
-        {
-            if (0 <= ch && ch < tokenTypes.Length)
-            {
-                tokenTypes[ch] = TOKEN_QUOTE;
-            }
-        }
-
-        private int Read()
-        {
-            // Call the read for the appropriate stream
-            if (inStream == null)
-            {
-                return inReader.Read();
-            }
-            return inStream.ReadByte();
-        }
-
-        /// <summary>
-        /// Specifies that all characters shall be treated as ordinary 
characters.
-        /// </summary>
-        public void ResetSyntax()
-        {
-            for (int i = 0; i < 256; i++)
-            {
-                tokenTypes[i] = 0;
-            }
-        }
-
-        /// <summary>
-        /// Specifies whether "slash-slash" (C++-style) comments shall be 
recognized.
-        /// This kind of comment ends at the end of the line.
-        /// <c>true</c> if <c>//</c> should be recognized as the start
-        /// of a comment, <c>false</c> otherwise.
-        /// </summary>
-        public bool SlashSlashComments
-        {
-            get { return slashSlashComments; }
-            set { slashSlashComments = value; }
-        }
-
-        /// <summary>
-        /// Specifies whether "slash-star" (C-style) comments shall be 
recognized.
-        /// Slash-star comments cannot be nested and end when a star-slash
-        /// combination is found.
-        /// <c>true</c> if <c>/*</c> should be recognized as the start
-        /// of a comment, <c>false</c> otherwise.
-        /// </summary>
-        public bool SlashStarComments
-        {
-            get { return slashStarComments; }
-            set { slashStarComments = value; }
-        }
-
-        /// <summary>
-        /// Returns the state of this tokenizer in a readable format.
-        /// </summary>
-        /// <returns>The current state of this tokenizer.</returns>
-        public override string ToString()
-        {
-            // Values determined through experimentation
-            StringBuilder result = new StringBuilder();
-            result.Append("Token["); //$NON-NLS-1$
-            switch (TokenType)
-            {
-                case TT_EOF:
-                    result.Append("EOF"); //$NON-NLS-1$
-                    break;
-                case TT_EOL:
-                    result.Append("EOL"); //$NON-NLS-1$
-                    break;
-                case TT_NUMBER:
-                    result.Append("n="); //$NON-NLS-1$
-                    result.Append(NumberValue);
-                    break;
-                case TT_WORD:
-                    result.Append(StringValue);
-                    break;
-                default:
-                    if (TokenType == TT_UNKNOWN || tokenTypes[TokenType] == 
TOKEN_QUOTE)
-                    {
-                        result.Append(StringValue);
-                    }
-                    else
-                    {
-                        result.Append('\'');
-                        result.Append((char)TokenType);
-                        result.Append('\'');
-                    }
-                    break;
-            }
-            result.Append("], line "); //$NON-NLS-1$
-            result.Append(lineNumber);
-            return result.ToString();
-        }
-
-        /// <summary>
-        /// Specifies that the characters in the range from <paramref 
name="low"/> to <paramref name="hi"/>
-        /// shall be treated as whitespace characters by this tokenizer.
-        /// </summary>
-        /// <param name="low">The first character in the range of whitespace 
characters.</param>
-        /// <param name="hi">The last character in the range of whitespace 
characters.</param>
-        public void WhitespaceChars(int low, int hi)
-        {
-            if (low < 0)
-            {
-                low = 0;
-            }
-            if (hi > tokenTypes.Length)
-            {
-                hi = tokenTypes.Length - 1;
-            }
-            for (int i = low; i <= hi; i++)
-            {
-                tokenTypes[i] = TOKEN_WHITE;
-            }
-        }
-
-        /// <summary>
-        /// Specifies that the characters in the range from <paramref 
name="low"/> to <paramref name="hi"/>
-        /// shall be treated as word characters by this tokenizer. A word 
consists of
-        /// a word character followed by zero or more word or number 
characters.
-        /// </summary>
-        /// <param name="low">The first character in the range of word 
characters.</param>
-        /// <param name="hi">The last character in the range of word 
characters.</param>
-        public void WordChars(int low, int hi)
-        {
-            if (low < 0)
-            {
-                low = 0;
-            }
-            if (hi > tokenTypes.Length)
-            {
-                hi = tokenTypes.Length - 1;
-            }
-            for (int i = low; i <= hi; i++)
-            {
-                tokenTypes[i] |= TOKEN_WORD;
-            }
-        }
-    }
-}

[lucenenet] 04/07: Lucene.Net.Benchmark: Factored out StreamTokenizer from support in favor of the implementation in J2N

Reply via email to