Renamed Lucene.Net.Icu > Lucene.Net.ICU, Lucene.Net.Tests.Icu > Lucene.Net.Tests.ICU
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3c077fb1 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3c077fb1 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3c077fb1 Branch: refs/heads/master Commit: 3c077fb1b44503c778d9385f3419dae7c02a99f2 Parents: b1a701c Author: Shad Storhaug <[email protected]> Authored: Sat May 6 03:12:29 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Sat May 6 03:12:29 2017 +0700 ---------------------------------------------------------------------- CONTRIBUTING.md | 2 +- Lucene.Net.Portable.sln | 4 +- Lucene.Net.sln | 22 +- .../Properties/AssemblyInfo.cs | 4 +- src/Lucene.Net.ICU/Analysis/Th/stopwords.txt | 119 ++++++ src/Lucene.Net.ICU/Lucene.Net.ICU.csproj | 144 +++++++ src/Lucene.Net.ICU/Lucene.Net.ICU.project.json | 11 + src/Lucene.Net.ICU/Lucene.Net.ICU.xproj | 39 ++ src/Lucene.Net.ICU/Properties/AssemblyInfo.cs | 52 +++ src/Lucene.Net.ICU/Support/BreakIterator.cs | 248 +++++++++++ src/Lucene.Net.ICU/Support/CharacterIterator.cs | 50 +++ src/Lucene.Net.ICU/Support/IcuBreakIterator.cs | 394 +++++++++++++++++ .../Support/StringCharacterIterator.cs | 204 +++++++++ src/Lucene.Net.ICU/project.json | 64 +++ src/Lucene.Net.Icu/Analysis/Th/stopwords.txt | 119 ------ src/Lucene.Net.Icu/Lucene.Net.Icu.csproj | 145 ------- src/Lucene.Net.Icu/Lucene.Net.Icu.project.json | 11 - src/Lucene.Net.Icu/Lucene.Net.Icu.xproj | 39 -- src/Lucene.Net.Icu/Properties/AssemblyInfo.cs | 52 --- src/Lucene.Net.Icu/Support/BreakIterator.cs | 248 ----------- src/Lucene.Net.Icu/Support/CharacterIterator.cs | 50 --- src/Lucene.Net.Icu/Support/IcuBreakIterator.cs | 394 ----------------- .../Support/StringCharacterIterator.cs | 204 --------- src/Lucene.Net.Icu/project.json | 64 --- .../Lucene.Net.Tests.ICU.csproj | 141 +++++++ .../Lucene.Net.Tests.ICU.project.json | 12 + .../Lucene.Net.Tests.ICU.xproj | 42 ++ .../Properties/AssemblyInfo.cs | 42 ++ .../Search/PostingsHighlight/CambridgeMA.utf8 | 1 + .../Support/TestApiConsistency.cs | 147 +++++++ .../Support/TestExceptionSerialization.cs | 54 +++ .../Support/TestIcuBreakIterator.cs | 420 +++++++++++++++++++ src/Lucene.Net.Tests.ICU/project.json | 83 ++++ .../Lucene.Net.Tests.Icu.csproj | 142 ------- .../Lucene.Net.Tests.Icu.project.json | 12 - .../Lucene.Net.Tests.Icu.xproj | 42 -- .../Properties/AssemblyInfo.cs | 42 -- .../Search/PostingsHighlight/CambridgeMA.utf8 | 1 - .../Support/TestApiConsistency.cs | 147 ------- .../Support/TestExceptionSerialization.cs | 54 --- .../Support/TestIcuBreakIterator.cs | 420 ------------------- src/Lucene.Net.Tests.Icu/project.json | 83 ---- src/Lucene.Net/Properties/AssemblyInfo.cs | 4 +- 43 files changed, 2294 insertions(+), 2278 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/CONTRIBUTING.md ---------------------------------------------------------------------- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index be00288..4c3522b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,7 +66,7 @@ See [Documenting Lucene.Net](https://cwiki.apache.org/confluence/display/LUCENEN * [Lucene.Net.Demo](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/demo) (might be a good learning experience) * [Lucene.Net.Replicator](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/replicator) -* [Lucene.Net.Analysis.Icu](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/icu) +* [Lucene.Net.Analysis.ICU](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/icu) (note that we will be putting this functionality into the Lucene.Net.ICU package) * [Lucene.Net.Analysis.Kuromoji](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/kuromoji) * [Lucene.Net.Analysis.SmartCn](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/smartcn) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/Lucene.Net.Portable.sln ---------------------------------------------------------------------- diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln index d6affa8..7e71238 100644 --- a/Lucene.Net.Portable.sln +++ b/Lucene.Net.Portable.sln @@ -98,9 +98,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{EFA10A77 Version.proj = Version.proj EndProjectSection EndProject -Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Icu", "src\Lucene.Net.Icu\Lucene.Net.Icu.xproj", "{44A5341B-0F52-429D-977A-C35E10ECCADF}" +Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.ICU", "src\Lucene.Net.ICU\Lucene.Net.ICU.xproj", "{44A5341B-0F52-429D-977A-C35E10ECCADF}" EndProject -Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Icu", "src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.xproj", "{32FD3471-E862-4055-B969-79C12A656366}" +Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.ICU", "src\Lucene.Net.Tests.ICU\Lucene.Net.Tests.ICU.xproj", "{32FD3471-E862-4055-B969-79C12A656366}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/Lucene.Net.sln ---------------------------------------------------------------------- diff --git a/Lucene.Net.sln b/Lucene.Net.sln index e6940c9..b1d2752 100644 --- a/Lucene.Net.sln +++ b/Lucene.Net.sln @@ -1,6 +1,24 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# VisualStudioVersion = 14.0.25420.1 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net", "src\Lucene.Net\Lucene.Net.csproj", "{5D4AD9BE-1FFB-41AB-9943-25737971BF57}" @@ -89,9 +107,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{9811D53E Version.proj = Version.proj EndProjectSection EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Icu", "src\Lucene.Net.Icu\Lucene.Net.Icu.csproj", "{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.ICU", "src\Lucene.Net.ICU\Lucene.Net.ICU.csproj", "{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Icu", "src\Lucene.Net.Tests.Icu\Lucene.Net.Tests.Icu.csproj", "{D5AA1A22-1B28-4DF6-BFDA-02519A189839}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.ICU", "src\Lucene.Net.Tests.ICU\Lucene.Net.Tests.ICU.csproj", "{D5AA1A22-1B28-4DF6-BFDA-02519A189839}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs index 4da6116..8b18901 100644 --- a/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs +++ b/src/Lucene.Net.Highlighter/Properties/AssemblyInfo.cs @@ -45,9 +45,9 @@ using System.Runtime.InteropServices; // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("e9e769ea-8504-44bc-8dc9-ccf958765f8f")] -[assembly: InternalsVisibleTo("Lucene.Net.Icu")] +[assembly: InternalsVisibleTo("Lucene.Net.ICU")] // for testing [assembly: InternalsVisibleTo("Lucene.Net.Tests.Highlighter")] -[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")] +[assembly: InternalsVisibleTo("Lucene.Net.Tests.ICU")] // NOTE: Version information is in CommonAssemblyInfo.cs http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt b/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt new file mode 100644 index 0000000..07f0fab --- /dev/null +++ b/src/Lucene.Net.ICU/Analysis/Th/stopwords.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +à¹à¸§à¹ +à¹à¸¡à¹ +à¹à¸ +à¹à¸à¹ +à¹à¸«à¹ +à¹à¸ +à¹à¸à¸¢ +à¹à¸«à¹à¸ +à¹à¸¥à¹à¸§ +à¹à¸¥à¸° +à¹à¸£à¸ +à¹à¸à¸ +à¹à¸à¹ +à¹à¸à¸ +à¹à¸«à¹à¸ +à¹à¸¥à¸¢ +à¹à¸£à¸´à¹à¸¡ +à¹à¸£à¸² +à¹à¸¡à¸·à¹à¸ +à¹à¸à¸·à¹à¸ +à¹à¸à¸£à¸²à¸° +à¹à¸à¹à¸à¸à¸²à¸£ +à¹à¸à¹à¸ +à¹à¸à¸´à¸à¹à¸à¸¢ +à¹à¸à¸´à¸ +à¹à¸à¸·à¹à¸à¸à¸à¸²à¸ +à¹à¸à¸µà¸¢à¸§à¸à¸±à¸ +à¹à¸à¸µà¸¢à¸§ +à¹à¸à¹à¸ +à¹à¸à¸à¸²à¸° +à¹à¸à¸¢ +à¹à¸à¹à¸² +à¹à¸à¸² +à¸à¸µà¸ +à¸à¸²à¸ +à¸à¸°à¹à¸£ +à¸à¸à¸ +à¸à¸¢à¹à¸²à¸ +à¸à¸¢à¸¹à¹ +à¸à¸¢à¸²à¸ +หาภ+หลาย +หลัà¸à¸à¸²à¸ +หลัภ+หรืภ+หà¸à¸¶à¹à¸ +สà¹à¸§à¸ +สà¹à¸ +สุภ+สà¹à¸²à¸«à¸£à¸±à¸ +วà¹à¸² +วัภ+ลภ+รà¹à¸§à¸¡ +ราย +รัภ+ระหวà¹à¸²à¸ +รวม +ยัภ+มี +มาภ+มา +à¸à¸£à¹à¸à¸¡ +à¸à¸ +à¸à¹à¸²à¸ +à¸à¸¥ +à¸à¸²à¸ +à¸à¹à¸² +à¸à¸µà¹ +à¸à¹à¸² +à¸à¸±à¹à¸ +à¸à¸±à¸ +à¸à¸à¸à¸à¸²à¸ +à¸à¸¸à¸ +à¸à¸µà¹à¸ªà¸¸à¸ +à¸à¸µà¹ +à¸à¹à¸²à¹à¸«à¹ +à¸à¹à¸² +à¸à¸²à¸ +à¸à¸±à¹à¸à¸à¸µà¹ +à¸à¸±à¹à¸ +à¸à¹à¸² +à¸à¸¹à¸ +à¸à¸¶à¸ +à¸à¹à¸à¸ +à¸à¹à¸²à¸à¹ +à¸à¹à¸²à¸ +à¸à¹à¸ +à¸à¸²à¸¡ +à¸à¸±à¹à¸à¹à¸à¹ +à¸à¸±à¹à¸ +à¸à¹à¸²à¸ +à¸à¹à¸§à¸¢ +à¸à¸±à¸ +à¸à¸¶à¹à¸ +à¸à¹à¸§à¸ +à¸à¸¶à¸ +à¸à¸²à¸ +à¸à¸±à¸ +à¸à¸° +à¸à¸·à¸ +à¸à¸§à¸²à¸¡ +à¸à¸£à¸±à¹à¸ +à¸à¸ +à¸à¸¶à¹à¸ +à¸à¸à¸ +à¸à¸ +à¸à¸à¸° +à¸à¹à¸à¸ +à¸à¹ +à¸à¸²à¸£ +à¸à¸±à¸ +à¸à¸±à¸ +à¸à¸§à¹à¸² +à¸à¸¥à¹à¸²à¸§ http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj b/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj new file mode 100644 index 0000000..b1510b9 --- /dev/null +++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.csproj @@ -0,0 +1,144 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <ProjectGuid>{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}</ProjectGuid> + <OutputType>Library</OutputType> + <AppDesignerFolder>Properties</AppDesignerFolder> + <RootNamespace>Lucene.Net</RootNamespace> + <AssemblyName>Lucene.Net.ICU</AssemblyName> + <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion> + <FileAlignment>512</FileAlignment> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> + <DebugSymbols>true</DebugSymbols> + <DebugType>full</DebugType> + <Optimize>false</Optimize> + <OutputPath>bin\Debug\</OutputPath> + <DefineConstants>DEBUG;TRACE</DefineConstants> + <ErrorReport>prompt</ErrorReport> + <WarningLevel>4</WarningLevel> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> + <DebugType>pdbonly</DebugType> + <Optimize>true</Optimize> + <OutputPath>bin\Release\</OutputPath> + <DefineConstants>TRACE</DefineConstants> + <ErrorReport>prompt</ErrorReport> + <WarningLevel>4</WarningLevel> + </PropertyGroup> + <PropertyGroup> + <DefineConstants>$(DefineConstants);FEATURE_BREAKITERATOR;FEATURE_SERIALIZABLE</DefineConstants> + </PropertyGroup> + <ItemGroup> + <Reference Include="System" /> + <Reference Include="System.Core" /> + <Reference Include="Microsoft.CSharp" /> + <Reference Include="System.Data" /> + </ItemGroup> + <ItemGroup> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiAnalyzer.cs"> + <Link>Analysis\Th\ThaiAnalyzer.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizer.cs"> + <Link>Analysis\Th\ThaiTokenizer.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizerFactory.cs"> + <Link>Analysis\Th\ThaiTokenizerFactory.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilter.cs"> + <Link>Analysis\Th\ThaiWordFilter.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilterFactory.cs"> + <Link>Analysis\Th\ThaiWordFilterFactory.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\CharArrayIterator.cs"> + <Link>Analysis\Util\CharArrayIterator.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs"> + <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs"> + <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\MultiTermHighlighting.cs"> + <Link>Search\PostingsHighlight\MultiTermHighlighting.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\Passage.cs"> + <Link>Search\PostingsHighlight\Passage.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageFormatter.cs"> + <Link>Search\PostingsHighlight\PassageFormatter.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageScorer.cs"> + <Link>Search\PostingsHighlight\PassageScorer.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PostingsHighlighter.cs"> + <Link>Search\PostingsHighlight\PostingsHighlighter.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\WholeBreakIterator.cs"> + <Link>Search\PostingsHighlight\WholeBreakIterator.cs</Link> + </Compile> + <Compile Include="..\Lucene.Net.Highlighter\VectorHighlight\BreakIteratorBoundaryScanner.cs"> + <Link>Search\VectorHighlight\BreakIteratorBoundaryScanner.cs</Link> + </Compile> + <Compile Include="Support\BreakIterator.cs" /> + <Compile Include="Support\CharacterIterator.cs" /> + <Compile Include="Support\IcuBreakIterator.cs" /> + <Compile Include="Properties\AssemblyInfo.cs" /> + <Compile Include="..\CommonAssemblyInfo.cs"> + <Link>Properties\CommonAssemblyInfo.cs</Link> + </Compile> + <Compile Include="Support\StringCharacterIterator.cs" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj"> + <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project> + <Name>Lucene.Net.Analysis.Common</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj"> + <Project>{e9e769ea-8504-44bc-8dc9-ccf958765f8f}</Project> + <Name>Lucene.Net.Highlighter</Name> + </ProjectReference> + <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj"> + <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project> + <Name>Lucene.Net</Name> + </ProjectReference> + </ItemGroup> + <ItemGroup> + <None Include="Lucene.Net.Icu.project.json" /> + </ItemGroup> + <ItemGroup> + <EmbeddedResource Include="Analysis\Th\stopwords.txt" /> + </ItemGroup> + <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> + <!-- To modify your build process, add your task inside one of the targets below and uncomment it. + Other similar extension points exist, see Microsoft.Common.targets. + <Target Name="BeforeBuild"> + </Target> + <Target Name="AfterBuild"> + </Target> + --> +</Project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json b/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json new file mode 100644 index 0000000..af28fc8 --- /dev/null +++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.project.json @@ -0,0 +1,11 @@ +{ + "runtimes": { + "win": {} + }, + "dependencies": { + "icu.net": "54.1.1-alpha" + }, + "frameworks": { + "net451": {} + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj b/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj new file mode 100644 index 0000000..dbc1701 --- /dev/null +++ b/src/Lucene.Net.ICU/Lucene.Net.ICU.xproj @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <PropertyGroup> + <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion> + <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath> + </PropertyGroup> + <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" /> + <PropertyGroup Label="Globals"> + <ProjectGuid>44a5341b-0f52-429d-977a-c35e10eccadf</ProjectGuid> + <RootNamespace>Lucene.Net</RootNamespace> + <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath> + <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath> + <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion> + </PropertyGroup> + <PropertyGroup> + <SchemaVersion>2.0</SchemaVersion> + </PropertyGroup> + <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" /> +</Project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs b/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..4d88887 --- /dev/null +++ b/src/Lucene.Net.ICU/Properties/AssemblyInfo.cs @@ -0,0 +1,52 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Lucene.Net.ICU")] +[assembly: AssemblyDescription( + "International Components for Unicode-based features including Thai analyzer support, " + + "an international postings highlighter, and BreakIterator support for the vector highlighter in Lucene.Net.Highlighter " + + "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyDefaultAlias("Lucene.Net.ICU")] +[assembly: AssemblyCulture("")] + +[assembly: CLSCompliant(true)] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("349cb7c9-7534-4e1d-9b0a-5521441af0ae")] + +// for testing +[assembly: InternalsVisibleTo("Lucene.Net.Tests.ICU")] + +// NOTE: Version information is in CommonAssemblyInfo.cs http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/BreakIterator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Support/BreakIterator.cs b/src/Lucene.Net.ICU/Support/BreakIterator.cs new file mode 100644 index 0000000..df4a945 --- /dev/null +++ b/src/Lucene.Net.ICU/Support/BreakIterator.cs @@ -0,0 +1,248 @@ +#if FEATURE_BREAKITERATOR +using System; + +namespace Lucene.Net.Support +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// The <code>BreakIterator</code> class implements methods for finding + /// the location of boundaries in text. Instances of <code>BreakIterator</code> + /// maintain a current position and scan over text + /// returning the index of characters where boundaries occur. + /// </summary> + public abstract class BreakIterator +#if FEATURE_CLONEABLE + : ICloneable +#endif + { + /// <summary> + /// Constructor. BreakIterator is stateless and has no default behavior. + /// </summary> + protected BreakIterator() + { + } + + /// <summary> + /// Create a copy of this iterator + /// </summary> + /// <returns>A member-wise copy of this</returns> + public object Clone() + { + return MemberwiseClone(); + } + + /// <summary> + /// DONE is returned by Previous(), Next(), Next(int), Preceding(int) + /// and Following(int) when either the first or last text boundary has been + /// reached. + /// </summary> + public static readonly int DONE = -1; + + /// <summary> + /// Returns the first boundary. The iterator's current position is set + /// to the first text boundary. + /// </summary> + /// <returns>The character index of the first text boundary</returns> + public abstract int First(); + + /// <summary> + /// Returns the last boundary. The iterator's current position is set + /// to the last text boundary. + /// </summary> + /// <returns>The character index of the last text boundary.</returns> + public abstract int Last(); + + /// <summary> + /// Returns the nth boundary from the current boundary. If either + /// the first or last text boundary has been reached, it returns + /// <see cref="BreakIterator.DONE"/> and the current position is set to either + /// the first or last text boundary depending on which one is reached. Otherwise, + /// the iterator's current position is set to the new boundary. + /// For example, if the iterator's current position is the mth text boundary + /// and three more boundaries exist from the current boundary to the last text + /// boundary, the Next(2) call will return m + 2. The new text position is set + /// to the (m + 2)th text boundary. A Next(4) call would return + /// <see cref="BreakIterator.DONE"/> and the last text boundary would become the + /// new text position. + /// </summary> + /// <param name="n"> + /// which boundary to return. A value of 0 + /// does nothing. Negative values move to previous boundaries + /// and positive values move to later boundaries. + /// </param> + /// <returns> + /// The character index of the nth boundary from the current position + /// or <see cref="BreakIterator.DONE"/> if either first or last text boundary + /// has been reached. + /// </returns> + public abstract int Next(int n); + + /// <summary> + /// Returns the boundary following the current boundary. If the current boundary + /// is the last text boundary, it returns <c>BreakIterator.DONE</c> and + /// the iterator's current position is unchanged. Otherwise, the iterator's + /// current position is set to the boundary following the current boundary. + /// </summary> + /// <returns> + /// The character index of the next text boundary or + /// <see cref="BreakIterator.DONE"/> if the current boundary is the last text + /// boundary. + /// Equivalent to Next(1). + /// </returns> + /// <seealso cref="Next(int)"/> + public abstract int Next(); + + /// <summary> + /// Returns the boundary preceding the current boundary. If the current boundary + /// is the first text boundary, it returns <code>BreakIterator.DONE</code> and + /// the iterator's current position is unchanged. Otherwise, the iterator's + /// current position is set to the boundary preceding the current boundary. + /// </summary> + /// <returns> + /// The character index of the previous text boundary or + /// <see cref="BreakIterator.DONE"/> if the current boundary is the first text + /// boundary. + /// </returns> + public abstract int Previous(); + + /// <summary> + /// Returns the first boundary following the specified character offset. If the + /// specified offset equals to the last text boundary, it returns + /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged. + /// Otherwise, the iterator's current position is set to the returned boundary. + /// The value returned is always greater than the offset or the value + /// <see cref="BreakIterator.DONE"/>. + /// </summary> + /// <param name="offset">the character offset to begin scanning.</param> + /// <returns> + /// The first boundary after the specified offset or + /// <see cref="BreakIterator.DONE"/> if the last text boundary is passed in + /// as the offset. + /// </returns> + /// <exception cref="ArgumentException"> + /// if the specified offset is less than + /// the first text boundary or greater than the last text boundary. + /// </exception> + public abstract int Following(int offset); + + /// <summary> + /// Returns the last boundary preceding the specified character offset. If the + /// specified offset equals to the first text boundary, it returns + /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged. + /// Otherwise, the iterator's current position is set to the returned boundary. + /// The value returned is always less than the offset or the value + /// <see cref="BreakIterator.DONE"/>. + /// </summary> + /// <param name="offset">the character offset to begin scanning.</param> + /// <returns> + /// The last boundary before the specified offset or + /// <see cref="BreakIterator.DONE"/> if the first text boundary is passed in + /// as the offset. + /// </returns> + public abstract int Preceding(int offset); + //{ + // // NOTE: This implementation is here solely because we can't add new + // // abstract methods to an existing class. There is almost ALWAYS a + // // better, faster way to do this. + // int pos = Following(offset); + // while (pos >= offset && pos != DONE) + // { + // pos = Previous(); + // } + // return pos; + //} + + /// <summary> + /// Returns true if the specified character offset is a text boundary. + /// </summary> + /// <param name="offset">the character offset to check.</param> + /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns> + /// <exception cref="ArgumentException"> + /// if the specified offset is less than + /// the first text boundary or greater than the last text boundary. + /// </exception> + public abstract bool IsBoundary(int offset); + //{ + // // NOTE: This implementation probably is wrong for most situations + // // because it fails to take into account the possibility that a + // // CharacterIterator passed to setText() may not have a begin offset + // // of 0. But since the abstract BreakIterator doesn't have that + // // knowledge, it assumes the begin offset is 0. If you subclass + // // BreakIterator, copy the SimpleTextBoundary implementation of this + // // function into your subclass. [This should have been abstract at + // // this level, but it's too late to fix that now.] + // if (offset == 0) + // { + // return true; + // } + // int boundary = Following(offset - 1); + // if (boundary == DONE) + // { + // throw new ArgumentException(); + // } + // return boundary == offset; + //} + + /// <summary> + /// Returns character index of the text boundary that was most + /// recently returned by Next(), Next(int), Previous(), First(), Last(), + /// Following(int) or Preceding(int). If any of these methods returns + /// <see cref="BreakIterator.DONE"/> because either first or last text boundary + /// has been reached, it returns the first or last text boundary depending on + /// which one is reached. + /// </summary> + /// <returns> + /// The text boundary returned from the above methods, first or last + /// text boundary. + /// </returns> + /// <seealso cref="Next()"/> + /// <seealso cref="Next(int)"/> + /// <seealso cref="Previous()"/> + /// <seealso cref="First()"/> + /// <seealso cref="Last()"/> + /// <seealso cref="Following(int)"/> + /// <seealso cref="Preceding(int)"/> + public abstract int Current { get; } + + /// <summary> + /// Get the text being scanned + /// </summary> + /// <returns>the text being scanned</returns> + //public abstract CharacterIterator GetText(); + public abstract string Text { get; } + + /// <summary> + /// Set a new text string to be scanned. The current scan + /// position is reset to First(). + /// </summary> + /// <param name="newText">new text to scan.</param> + public virtual void SetText(string newText) + { + SetText(new StringCharacterIterator(newText)); + } + + /// <summary> + /// Set a new text string to be scanned. The current scan + /// position is reset to First(). + /// </summary> + /// <param name="newText">new text to scan.</param> + public abstract void SetText(CharacterIterator newText); + } +} +#endif http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/CharacterIterator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Support/CharacterIterator.cs b/src/Lucene.Net.ICU/Support/CharacterIterator.cs new file mode 100644 index 0000000..0c81629 --- /dev/null +++ b/src/Lucene.Net.ICU/Support/CharacterIterator.cs @@ -0,0 +1,50 @@ +#if FEATURE_BREAKITERATOR +using System; + +namespace Lucene.Net.Support +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public abstract class CharacterIterator + { + public static readonly char DONE = '\uFFFF'; + + public abstract char Current { get; } + + public abstract char First(); + + public abstract char Last(); + + public abstract char Next(); + + public abstract char Previous(); + + public abstract char SetIndex(int position); + + public abstract int BeginIndex { get; } + + public abstract int EndIndex { get; } + + public abstract int Index { get; } + + public abstract object Clone(); + + public abstract string GetTextAsString(); + } +} +#endif \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs b/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs new file mode 100644 index 0000000..79819ed --- /dev/null +++ b/src/Lucene.Net.ICU/Support/IcuBreakIterator.cs @@ -0,0 +1,394 @@ +#if FEATURE_BREAKITERATOR +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; + +namespace Lucene.Net +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality + /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/> + /// provides methods to move forward, reverse, and randomly through a set of text breaks + /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration. + /// </summary> + // LUCENENET specific type + public class IcuBreakIterator : BreakIterator + { + private readonly Icu.Locale locale; + private readonly Icu.BreakIterator.UBreakIteratorType type; + + private List<int> boundaries = new List<int>(); + private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries + private string text; + + /// <summary> + /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/> + /// </summary> + protected int m_start; + + /// <summary> + /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/> + /// </summary> + protected int m_end; + + private bool enableHacks = false; + + public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type) + : this(type, CultureInfo.CurrentCulture) + { + } + + public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale) + { + if (locale == null) + throw new ArgumentNullException("locale"); + this.locale = new Icu.Locale(locale.Name); + this.type = type; + } + + + public virtual bool EnableHacks + { + get { return enableHacks; } + set { enableHacks = value; } + } + + /// <summary> + /// Sets the current iteration position to the beginning of the text. + /// </summary> + /// <returns>The offset of the beginning of the text.</returns> + public override int First() + { + currentBoundaryIndex = 0; + return ReturnCurrent(); + } + + /// <summary> + /// Sets the current iteration position to the end of the text. + /// </summary> + /// <returns>The text's past-the-end offset.</returns> + public override int Last() + { + currentBoundaryIndex = boundaries.Count - 1; + return ReturnCurrent(); + } + + /// <summary> + /// Advances the iterator either forward or backward the specified number of steps. + /// Negative values move backward, and positive values move forward. This is + /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>. + /// </summary> + /// <param name="n">The number of steps to move. The sign indicates the direction + /// (negative is backwards, and positive is forwards).</param> + /// <returns>The character offset of the boundary position n boundaries away from + /// the current one.</returns> + public override int Next(int n) + { + int result = Current; + while (n > 0) + { + result = Next(); + --n; + } + while (n < 0) + { + result = Previous(); + ++n; + } + return result; + } + + /// <summary> + /// Advances the iterator to the next boundary position. + /// </summary> + /// <returns>The position of the first boundary after this one.</returns> + public override int Next() + { + if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0) + { + return DONE; + } + currentBoundaryIndex++; + return ReturnCurrent(); + } + + /// <summary> + /// Advances the iterator backwards, to the last boundary preceding this one. + /// </summary> + /// <returns>The position of the last boundary position preceding this one.</returns> + public override int Previous() + { + if (currentBoundaryIndex == 0 || boundaries.Count == 0) + { + return DONE; + } + currentBoundaryIndex--; + return ReturnCurrent(); + } + + /// <summary> + /// Throw <see cref="ArgumentException"/> unless begin <= offset < end. + /// </summary> + /// <param name="offset"></param> + private void CheckOffset(int offset) + { + if (offset < m_start || offset > m_end) + { + throw new ArgumentException("offset out of bounds"); + } + } + + /// <summary> + /// Sets the iterator to refer to the first boundary position following + /// the specified position. + /// </summary> + /// <param name="offset">The position from which to begin searching for a break position.</param> + /// <returns>The position of the first break after the current position.</returns> + public override int Following(int offset) + { + CheckOffset(offset); + + if (boundaries.Count == 0) + { + return DONE; + } + + int following = GetLowestIndexGreaterThan(offset); + if (following == -1) + { + currentBoundaryIndex = boundaries.Count - 1; + return DONE; + } + else + { + currentBoundaryIndex = following; + } + return ReturnCurrent(); + } + + private int GetLowestIndexGreaterThan(int offset) + { + int index = boundaries.BinarySearch(offset); + if (index < 0) + { + return ~index; + } + else if (index + 1 < boundaries.Count) + { + return index + 1; + } + + return -1; + } + + /// <summary> + /// Sets the iterator to refer to the last boundary position before the + /// specified position. + /// </summary> + /// <param name="offset">The position to begin searching for a break from.</param> + /// <returns>The position of the last boundary before the starting position.</returns> + public override int Preceding(int offset) + { + CheckOffset(offset); + + if (boundaries.Count == 0) + { + return DONE; + } + + int preceeding = GetHighestIndexLessThan(offset); + if (preceeding == -1) + { + currentBoundaryIndex = 0; + return DONE; + } + else + { + currentBoundaryIndex = preceeding; + } + return ReturnCurrent(); + } + + private int GetHighestIndexLessThan(int offset) + { + int index = boundaries.BinarySearch(offset); + if (index < 0) + { + return ~index - 1; + } + else + { + // NOTE: This is intentionally allowed to return -1 in the case + // where index == 0. This state indicates we are before the first boundary. + return index - 1; + } + } + + /// <summary> + /// Returns the current iteration position. + /// </summary> + public override int Current + { + get { return ReturnCurrent(); } + } + + /// <summary> + /// Gets the text being analyzed. + /// </summary> + public override string Text + { + get + { + return text; + } + } + + /// <summary> + /// Set the iterator to analyze a new piece of text. This function resets + /// the current iteration position to the beginning of the text. + /// </summary> + /// <param name="newText">The text to analyze.</param> + public override void SetText(string newText) + { + text = newText; + currentBoundaryIndex = 0; + m_start = 0; + m_end = newText.Length; + + LoadBoundaries(m_start, m_end); + } + + public override void SetText(CharacterIterator newText) + { + text = newText.GetTextAsString(); + currentBoundaryIndex = 0; + m_start = newText.BeginIndex; + m_end = newText.EndIndex; + + LoadBoundaries(m_start, m_end); + } + + private void LoadBoundaries(int start, int end) + { + IEnumerable<Icu.Boundary> icuBoundaries; + string offsetText = text.Substring(start, end - start); + +#if !NETSTANDARD + try + { +#endif + if (type == Icu.BreakIterator.UBreakIteratorType.WORD) + { + if (enableHacks) + { + // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken + offsetText = offsetText.Replace("-", "a"); + } + + icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true); + } + else + { + if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE) + { + // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking. + offsetText = offsetText.Replace("\n", " "); + // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't + // begin with capital letters. + offsetText = CapitalizeFirst(offsetText); + } + + icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText); + } +#if !NETSTANDARD + } + catch (AccessViolationException ace) + { + // LUCENENET TODO: Find a reliable way to reproduce and report the + // AccessViolationException that happens here to the icu-dotnet project team + throw new Exception("Hit AccessViolationException: " + ace.ToString(), ace); + } +#endif + + boundaries = icuBoundaries + .Select(t => new[] { t.Start + start, t.End + start }) + .SelectMany(b => b) + .Distinct() + .ToList(); + } + + /// <summary> + /// Returns true if the specified character offset is a text boundary. + /// </summary> + /// <param name="offset">the character offset to check.</param> + /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns> + public override bool IsBoundary(int offset) + { + CheckOffset(offset); + return boundaries.Contains(offset); + } + + private int ReturnCurrent() + { + if (boundaries.Count > 0) + { + return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1 + ? boundaries[currentBoundaryIndex] + : DONE; + } + + // If there are no boundaries, we must return the start offset + return m_start; + } + + /// <summary> + /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet + /// where it doesn't correctly break sentences unless they begin with a capital letter. + /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator + /// code changed to remove calls to this method. + /// </summary> + public static string CapitalizeFirst(string s) + { + bool isNewSentence = true; + var result = new StringBuilder(s.Length); + for (int i = 0; i < s.Length; i++) + { + if (isNewSentence && char.IsLetter(s[i])) + { + result.Append(char.ToUpper(s[i])); + isNewSentence = false; + } + else + result.Append(s[i]); + + if (s[i] == '!' || s[i] == '?' || s[i] == '.') + { + isNewSentence = true; + } + } + + return result.ToString(); + } + } +} +#endif http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs b/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs new file mode 100644 index 0000000..156f81e --- /dev/null +++ b/src/Lucene.Net.ICU/Support/StringCharacterIterator.cs @@ -0,0 +1,204 @@ +#if FEATURE_BREAKITERATOR +using System; + +namespace Lucene.Net.Support +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// <see cref="StringCharacterIterator"/> implements the + /// <see cref="CharacterIterator"/> protocol for a <see cref="string"/>. + /// The <see cref="StringCharacterIterator"/> class iterates over the + /// entire <see cref="string"/>. + /// </summary> + /// <seealso cref="CharacterIterator"/> + public class StringCharacterIterator : CharacterIterator + { + private string text; + private int begin; + private int end; + // invariant: begin <= pos <= end + private int pos; + + + public StringCharacterIterator(string text) + : this(text, 0) + { + } + + public StringCharacterIterator(string text, int pos) + : this(text, 0, text.Length, pos) + { + } + + public StringCharacterIterator(string text, int begin, int end, int pos) + { + if (text == null) + throw new ArgumentNullException("text"); + this.text = text; + + if (begin < 0 || begin > end || end > text.Length) + throw new ArgumentException("Invalid substring range"); + + if (pos < begin || pos > end) + throw new ArgumentException("Invalid position"); + + this.begin = begin; + this.end = end; + this.pos = pos; + } + + public void SetText(string text) + { + if (text == null) + throw new ArgumentNullException("text"); + this.text = text; + this.begin = 0; + this.end = text.Length; + this.pos = 0; + } + + public override char First() + { + pos = begin; + return Current; + } + + public override char Last() + { + if (end != begin) + { + pos = end - 1; + } + else + { + pos = end; + } + return Current; + } + + public override char SetIndex(int position) + { + if (position < begin || position > end) + throw new ArgumentException("Invalid index"); + pos = position; + return Current; + } + + public override char Current + { + get + { + if (pos >= begin && pos < end) + { + return text[pos]; + } + else + { + return DONE; + } + } + } + + public override char Next() + { + if (pos < end - 1) + { + pos++; + return text[pos]; + } + else + { + pos = end; + return DONE; + } + } + + public override char Previous() + { + if (pos > begin) + { + pos--; + return text[pos]; + } + else + { + return DONE; + } + } + + + public override int BeginIndex + { + get + { + return begin; + } + } + + public override int EndIndex + { + get + { + return end; + } + } + + public override int Index + { + get + { + return pos; + } + } + + public override string GetTextAsString() + { + return text; + } + + public override bool Equals(object obj) + { + if (this == obj) + return true; + if (!(obj is StringCharacterIterator)) + return false; + + StringCharacterIterator that = (StringCharacterIterator)obj; + + if (GetHashCode() != that.GetHashCode()) + return false; + if (!text.Equals(that.text, StringComparison.Ordinal)) + return false; + if (pos != that.pos || begin != that.begin || end != that.end) + return false; + return true; + } + + public override int GetHashCode() + { + return base.GetHashCode() ^ pos ^ begin ^ end; + } + + public override object Clone() + { + return MemberwiseClone(); + } + } +} +#endif http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.ICU/project.json ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.ICU/project.json b/src/Lucene.Net.ICU/project.json new file mode 100644 index 0000000..f0889b8 --- /dev/null +++ b/src/Lucene.Net.ICU/project.json @@ -0,0 +1,64 @@ +{ + "version": "4.8.0", + "title": "Lucene.Net.ICU", + "description": "International Components for Unicode-based features including Thai analyzer support, an international postings highlighter, and BreakIterator support for the vector highlighter for the Lucene.Net full-text search engine library from The Apache Software Foundation.", + "authors": [ "The Apache Software Foundation" ], + "packOptions": { + "projectUrl": "http://lucenenet.apache.org/", + "licenseUrl": "https://github.com/apache/lucenenet/blob/master/LICENSE.txt", + "iconUrl": "https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true", + "owners": [ "The Apache Software Foundation" ], + "repository": { "url": "https://github.com/apache/lucenenet" }, + "tags": [ "lucene.net", "core", "text", "search", "information", "retrieval", "lucene", "apache", "analysis", "index", "query" ] + }, + "buildOptions": { + "define": [ "FEATURE_BREAKITERATOR" ], + "compile": { + "includeFiles": [ + "../CommonAssemblyInfo.cs", + "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs", + "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs", + "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs", + "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs", + "../Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs", + "../Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs", + "../Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/DefaultPassageFormatter.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/MultiTermHighlighting.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/Passage.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/PassageFormatter.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/PassageScorer.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs", + "../Lucene.Net.Highlighter/PostingsHighlight/WholeBreakIterator.cs", + "../Lucene.Net.Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs" + ] + }, + "embed": { + "includeFiles": [ "Analysis/Th/stopwords.txt" ] + } + }, + "dependencies": { + "icu.net": "54.1.1-alpha", + "Lucene.Net": "4.8.0", + "Lucene.Net.Analysis.Common": "4.8.0", + "Lucene.Net.Highlighter": "4.8.0" + }, + "frameworks": { + "netstandard1.5": { + "imports": "dnxcore50", + "buildOptions": { + "debugType": "portable", + "define": [ "NETSTANDARD" ] + }, + "dependencies": { + "NETStandard.Library": "1.6.0" + } + }, + "net451": { + "buildOptions": { + "debugType": "full", + "define": [ "FEATURE_SERIALIZABLE" ] + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt b/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt deleted file mode 100644 index 07f0fab..0000000 --- a/src/Lucene.Net.Icu/Analysis/Th/stopwords.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -à¹à¸§à¹ -à¹à¸¡à¹ -à¹à¸ -à¹à¸à¹ -à¹à¸«à¹ -à¹à¸ -à¹à¸à¸¢ -à¹à¸«à¹à¸ -à¹à¸¥à¹à¸§ -à¹à¸¥à¸° -à¹à¸£à¸ -à¹à¸à¸ -à¹à¸à¹ -à¹à¸à¸ -à¹à¸«à¹à¸ -à¹à¸¥à¸¢ -à¹à¸£à¸´à¹à¸¡ -à¹à¸£à¸² -à¹à¸¡à¸·à¹à¸ -à¹à¸à¸·à¹à¸ -à¹à¸à¸£à¸²à¸° -à¹à¸à¹à¸à¸à¸²à¸£ -à¹à¸à¹à¸ -à¹à¸à¸´à¸à¹à¸à¸¢ -à¹à¸à¸´à¸ -à¹à¸à¸·à¹à¸à¸à¸à¸²à¸ -à¹à¸à¸µà¸¢à¸§à¸à¸±à¸ -à¹à¸à¸µà¸¢à¸§ -à¹à¸à¹à¸ -à¹à¸à¸à¸²à¸° -à¹à¸à¸¢ -à¹à¸à¹à¸² -à¹à¸à¸² -à¸à¸µà¸ -à¸à¸²à¸ -à¸à¸°à¹à¸£ -à¸à¸à¸ -à¸à¸¢à¹à¸²à¸ -à¸à¸¢à¸¹à¹ -à¸à¸¢à¸²à¸ -หาภ-หลาย -หลัà¸à¸à¸²à¸ -หลัภ-หรืภ-หà¸à¸¶à¹à¸ -สà¹à¸§à¸ -สà¹à¸ -สุภ-สà¹à¸²à¸«à¸£à¸±à¸ -วà¹à¸² -วัภ-ลภ-รà¹à¸§à¸¡ -ราย -รัภ-ระหวà¹à¸²à¸ -รวม -ยัภ-มี -มาภ-มา -à¸à¸£à¹à¸à¸¡ -à¸à¸ -à¸à¹à¸²à¸ -à¸à¸¥ -à¸à¸²à¸ -à¸à¹à¸² -à¸à¸µà¹ -à¸à¹à¸² -à¸à¸±à¹à¸ -à¸à¸±à¸ -à¸à¸à¸à¸à¸²à¸ -à¸à¸¸à¸ -à¸à¸µà¹à¸ªà¸¸à¸ -à¸à¸µà¹ -à¸à¹à¸²à¹à¸«à¹ -à¸à¹à¸² -à¸à¸²à¸ -à¸à¸±à¹à¸à¸à¸µà¹ -à¸à¸±à¹à¸ -à¸à¹à¸² -à¸à¸¹à¸ -à¸à¸¶à¸ -à¸à¹à¸à¸ -à¸à¹à¸²à¸à¹ -à¸à¹à¸²à¸ -à¸à¹à¸ -à¸à¸²à¸¡ -à¸à¸±à¹à¸à¹à¸à¹ -à¸à¸±à¹à¸ -à¸à¹à¸²à¸ -à¸à¹à¸§à¸¢ -à¸à¸±à¸ -à¸à¸¶à¹à¸ -à¸à¹à¸§à¸ -à¸à¸¶à¸ -à¸à¸²à¸ -à¸à¸±à¸ -à¸à¸° -à¸à¸·à¸ -à¸à¸§à¸²à¸¡ -à¸à¸£à¸±à¹à¸ -à¸à¸ -à¸à¸¶à¹à¸ -à¸à¸à¸ -à¸à¸ -à¸à¸à¸° -à¸à¹à¸à¸ -à¸à¹ -à¸à¸²à¸£ -à¸à¸±à¸ -à¸à¸±à¸ -à¸à¸§à¹à¸² -à¸à¸¥à¹à¸²à¸§ http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj b/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj deleted file mode 100644 index e2ccbc0..0000000 --- a/src/Lucene.Net.Icu/Lucene.Net.Icu.csproj +++ /dev/null @@ -1,145 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<!-- - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - ---> - -<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> - <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> - <PropertyGroup> - <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> - <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> - <ProjectGuid>{349CB7C9-7534-4E1D-9B0A-5521441AF0AE}</ProjectGuid> - <OutputType>Library</OutputType> - <AppDesignerFolder>Properties</AppDesignerFolder> - <RootNamespace>Lucene.Net</RootNamespace> - <AssemblyName>Lucene.Net.Icu</AssemblyName> - <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion> - <FileAlignment>512</FileAlignment> - </PropertyGroup> - <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> - <DebugSymbols>true</DebugSymbols> - <DebugType>full</DebugType> - <Optimize>false</Optimize> - <OutputPath>bin\Debug\</OutputPath> - <DefineConstants>DEBUG;TRACE</DefineConstants> - <ErrorReport>prompt</ErrorReport> - <WarningLevel>4</WarningLevel> - </PropertyGroup> - <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> - <DebugType>pdbonly</DebugType> - <Optimize>true</Optimize> - <OutputPath>bin\Release\</OutputPath> - <DefineConstants>TRACE</DefineConstants> - <ErrorReport>prompt</ErrorReport> - <WarningLevel>4</WarningLevel> - </PropertyGroup> - <PropertyGroup> - <DefineConstants>$(DefineConstants);FEATURE_BREAKITERATOR;FEATURE_SERIALIZABLE</DefineConstants> - </PropertyGroup> - <ItemGroup> - <Reference Include="System" /> - <Reference Include="System.Core" /> - <Reference Include="Microsoft.CSharp" /> - <Reference Include="System.Data" /> - </ItemGroup> - <ItemGroup> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiAnalyzer.cs"> - <Link>Analysis\Th\ThaiAnalyzer.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizer.cs"> - <Link>Analysis\Th\ThaiTokenizer.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiTokenizerFactory.cs"> - <Link>Analysis\Th\ThaiTokenizerFactory.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilter.cs"> - <Link>Analysis\Th\ThaiWordFilter.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Th\ThaiWordFilterFactory.cs"> - <Link>Analysis\Th\ThaiWordFilterFactory.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\CharArrayIterator.cs"> - <Link>Analysis\Util\CharArrayIterator.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Analysis.Common\Analysis\Util\SegmentingTokenizerBase.cs"> - <Link>Analysis\Util\SegmentingTokenizerBase.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\DefaultPassageFormatter.cs"> - <Link>Search\PostingsHighlight\DefaultPassageFormatter.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\MultiTermHighlighting.cs"> - <Link>Search\PostingsHighlight\MultiTermHighlighting.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\Passage.cs"> - <Link>Search\PostingsHighlight\Passage.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageFormatter.cs"> - <Link>Search\PostingsHighlight\PassageFormatter.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PassageScorer.cs"> - <Link>Search\PostingsHighlight\PassageScorer.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\PostingsHighlighter.cs"> - <Link>Search\PostingsHighlight\PostingsHighlighter.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\PostingsHighlight\WholeBreakIterator.cs"> - <Link>Search\PostingsHighlight\WholeBreakIterator.cs</Link> - </Compile> - <Compile Include="..\Lucene.Net.Highlighter\VectorHighlight\BreakIteratorBoundaryScanner.cs"> - <Link>Search\VectorHighlight\BreakIteratorBoundaryScanner.cs</Link> - </Compile> - <Compile Include="Support\BreakIterator.cs" /> - <Compile Include="Support\CharacterIterator.cs" /> - <Compile Include="Support\IcuBreakIterator.cs" /> - <Compile Include="Properties\AssemblyInfo.cs" /> - <Compile Include="..\CommonAssemblyInfo.cs"> - <Link>Properties\CommonAssemblyInfo.cs</Link> - </Compile> - <Compile Include="Support\StringCharacterIterator.cs" /> - </ItemGroup> - <ItemGroup> - <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj"> - <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project> - <Name>Lucene.Net.Analysis.Common</Name> - </ProjectReference> - <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj"> - <Project>{e9e769ea-8504-44bc-8dc9-ccf958765f8f}</Project> - <Name>Lucene.Net.Highlighter</Name> - </ProjectReference> - <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj"> - <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project> - <Name>Lucene.Net</Name> - </ProjectReference> - </ItemGroup> - <ItemGroup> - <None Include="Lucene.Net.Icu.project.json" /> - </ItemGroup> - <ItemGroup> - <EmbeddedResource Include="Analysis\Th\stopwords.txt" /> - </ItemGroup> - <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> - <!-- To modify your build process, add your task inside one of the targets below and uncomment it. - Other similar extension points exist, see Microsoft.Common.targets. - <Target Name="BeforeBuild"> - </Target> - <Target Name="AfterBuild"> - </Target> - --> -</Project> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json b/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json deleted file mode 100644 index af28fc8..0000000 --- a/src/Lucene.Net.Icu/Lucene.Net.Icu.project.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "runtimes": { - "win": {} - }, - "dependencies": { - "icu.net": "54.1.1-alpha" - }, - "frameworks": { - "net451": {} - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj b/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj deleted file mode 100644 index 4b09c43..0000000 --- a/src/Lucene.Net.Icu/Lucene.Net.Icu.xproj +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<!-- - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - ---> -<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> - <PropertyGroup> - <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0</VisualStudioVersion> - <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath> - </PropertyGroup> - <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" /> - <PropertyGroup Label="Globals"> - <ProjectGuid>44a5341b-0f52-429d-977a-c35e10eccadf</ProjectGuid> - <RootNamespace>Lucene.Net.Search</RootNamespace> - <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath> - <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath> - <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion> - </PropertyGroup> - <PropertyGroup> - <SchemaVersion>2.0</SchemaVersion> - </PropertyGroup> - <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" /> -</Project> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs b/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs deleted file mode 100644 index cfe269c..0000000 --- a/src/Lucene.Net.Icu/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * -*/ - -using System; -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Lucene.Net.Icu")] -[assembly: AssemblyDescription( - "International Components for Unicode-based features including Thai analyzer support, " + - "an international postings highlighter, and BreakIterator support for the vector highlighter in Lucene.Net.Highlighter " + - "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyDefaultAlias("Lucene.Net.Icu")] -[assembly: AssemblyCulture("")] - -[assembly: CLSCompliant(true)] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("349cb7c9-7534-4e1d-9b0a-5521441af0ae")] - -// for testing -[assembly: InternalsVisibleTo("Lucene.Net.Tests.Icu")] - -// NOTE: Version information is in CommonAssemblyInfo.cs http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3c077fb1/src/Lucene.Net.Icu/Support/BreakIterator.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Icu/Support/BreakIterator.cs b/src/Lucene.Net.Icu/Support/BreakIterator.cs deleted file mode 100644 index df4a945..0000000 --- a/src/Lucene.Net.Icu/Support/BreakIterator.cs +++ /dev/null @@ -1,248 +0,0 @@ -#if FEATURE_BREAKITERATOR -using System; - -namespace Lucene.Net.Support -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - /// <summary> - /// The <code>BreakIterator</code> class implements methods for finding - /// the location of boundaries in text. Instances of <code>BreakIterator</code> - /// maintain a current position and scan over text - /// returning the index of characters where boundaries occur. - /// </summary> - public abstract class BreakIterator -#if FEATURE_CLONEABLE - : ICloneable -#endif - { - /// <summary> - /// Constructor. BreakIterator is stateless and has no default behavior. - /// </summary> - protected BreakIterator() - { - } - - /// <summary> - /// Create a copy of this iterator - /// </summary> - /// <returns>A member-wise copy of this</returns> - public object Clone() - { - return MemberwiseClone(); - } - - /// <summary> - /// DONE is returned by Previous(), Next(), Next(int), Preceding(int) - /// and Following(int) when either the first or last text boundary has been - /// reached. - /// </summary> - public static readonly int DONE = -1; - - /// <summary> - /// Returns the first boundary. The iterator's current position is set - /// to the first text boundary. - /// </summary> - /// <returns>The character index of the first text boundary</returns> - public abstract int First(); - - /// <summary> - /// Returns the last boundary. The iterator's current position is set - /// to the last text boundary. - /// </summary> - /// <returns>The character index of the last text boundary.</returns> - public abstract int Last(); - - /// <summary> - /// Returns the nth boundary from the current boundary. If either - /// the first or last text boundary has been reached, it returns - /// <see cref="BreakIterator.DONE"/> and the current position is set to either - /// the first or last text boundary depending on which one is reached. Otherwise, - /// the iterator's current position is set to the new boundary. - /// For example, if the iterator's current position is the mth text boundary - /// and three more boundaries exist from the current boundary to the last text - /// boundary, the Next(2) call will return m + 2. The new text position is set - /// to the (m + 2)th text boundary. A Next(4) call would return - /// <see cref="BreakIterator.DONE"/> and the last text boundary would become the - /// new text position. - /// </summary> - /// <param name="n"> - /// which boundary to return. A value of 0 - /// does nothing. Negative values move to previous boundaries - /// and positive values move to later boundaries. - /// </param> - /// <returns> - /// The character index of the nth boundary from the current position - /// or <see cref="BreakIterator.DONE"/> if either first or last text boundary - /// has been reached. - /// </returns> - public abstract int Next(int n); - - /// <summary> - /// Returns the boundary following the current boundary. If the current boundary - /// is the last text boundary, it returns <c>BreakIterator.DONE</c> and - /// the iterator's current position is unchanged. Otherwise, the iterator's - /// current position is set to the boundary following the current boundary. - /// </summary> - /// <returns> - /// The character index of the next text boundary or - /// <see cref="BreakIterator.DONE"/> if the current boundary is the last text - /// boundary. - /// Equivalent to Next(1). - /// </returns> - /// <seealso cref="Next(int)"/> - public abstract int Next(); - - /// <summary> - /// Returns the boundary preceding the current boundary. If the current boundary - /// is the first text boundary, it returns <code>BreakIterator.DONE</code> and - /// the iterator's current position is unchanged. Otherwise, the iterator's - /// current position is set to the boundary preceding the current boundary. - /// </summary> - /// <returns> - /// The character index of the previous text boundary or - /// <see cref="BreakIterator.DONE"/> if the current boundary is the first text - /// boundary. - /// </returns> - public abstract int Previous(); - - /// <summary> - /// Returns the first boundary following the specified character offset. If the - /// specified offset equals to the last text boundary, it returns - /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged. - /// Otherwise, the iterator's current position is set to the returned boundary. - /// The value returned is always greater than the offset or the value - /// <see cref="BreakIterator.DONE"/>. - /// </summary> - /// <param name="offset">the character offset to begin scanning.</param> - /// <returns> - /// The first boundary after the specified offset or - /// <see cref="BreakIterator.DONE"/> if the last text boundary is passed in - /// as the offset. - /// </returns> - /// <exception cref="ArgumentException"> - /// if the specified offset is less than - /// the first text boundary or greater than the last text boundary. - /// </exception> - public abstract int Following(int offset); - - /// <summary> - /// Returns the last boundary preceding the specified character offset. If the - /// specified offset equals to the first text boundary, it returns - /// <see cref="BreakIterator.DONE"/> and the iterator's current position is unchanged. - /// Otherwise, the iterator's current position is set to the returned boundary. - /// The value returned is always less than the offset or the value - /// <see cref="BreakIterator.DONE"/>. - /// </summary> - /// <param name="offset">the character offset to begin scanning.</param> - /// <returns> - /// The last boundary before the specified offset or - /// <see cref="BreakIterator.DONE"/> if the first text boundary is passed in - /// as the offset. - /// </returns> - public abstract int Preceding(int offset); - //{ - // // NOTE: This implementation is here solely because we can't add new - // // abstract methods to an existing class. There is almost ALWAYS a - // // better, faster way to do this. - // int pos = Following(offset); - // while (pos >= offset && pos != DONE) - // { - // pos = Previous(); - // } - // return pos; - //} - - /// <summary> - /// Returns true if the specified character offset is a text boundary. - /// </summary> - /// <param name="offset">the character offset to check.</param> - /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns> - /// <exception cref="ArgumentException"> - /// if the specified offset is less than - /// the first text boundary or greater than the last text boundary. - /// </exception> - public abstract bool IsBoundary(int offset); - //{ - // // NOTE: This implementation probably is wrong for most situations - // // because it fails to take into account the possibility that a - // // CharacterIterator passed to setText() may not have a begin offset - // // of 0. But since the abstract BreakIterator doesn't have that - // // knowledge, it assumes the begin offset is 0. If you subclass - // // BreakIterator, copy the SimpleTextBoundary implementation of this - // // function into your subclass. [This should have been abstract at - // // this level, but it's too late to fix that now.] - // if (offset == 0) - // { - // return true; - // } - // int boundary = Following(offset - 1); - // if (boundary == DONE) - // { - // throw new ArgumentException(); - // } - // return boundary == offset; - //} - - /// <summary> - /// Returns character index of the text boundary that was most - /// recently returned by Next(), Next(int), Previous(), First(), Last(), - /// Following(int) or Preceding(int). If any of these methods returns - /// <see cref="BreakIterator.DONE"/> because either first or last text boundary - /// has been reached, it returns the first or last text boundary depending on - /// which one is reached. - /// </summary> - /// <returns> - /// The text boundary returned from the above methods, first or last - /// text boundary. - /// </returns> - /// <seealso cref="Next()"/> - /// <seealso cref="Next(int)"/> - /// <seealso cref="Previous()"/> - /// <seealso cref="First()"/> - /// <seealso cref="Last()"/> - /// <seealso cref="Following(int)"/> - /// <seealso cref="Preceding(int)"/> - public abstract int Current { get; } - - /// <summary> - /// Get the text being scanned - /// </summary> - /// <returns>the text being scanned</returns> - //public abstract CharacterIterator GetText(); - public abstract string Text { get; } - - /// <summary> - /// Set a new text string to be scanned. The current scan - /// position is reset to First(). - /// </summary> - /// <param name="newText">new text to scan.</param> - public virtual void SetText(string newText) - { - SetText(new StringCharacterIterator(newText)); - } - - /// <summary> - /// Set a new text string to be scanned. The current scan - /// position is reset to First(). - /// </summary> - /// <param name="newText">new text to scan.</param> - public abstract void SetText(CharacterIterator newText); - } -} -#endif
