Move Contrib.Regex into Contrib.Sandbox to match upstream
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/65d8a533 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/65d8a533 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/65d8a533 Branch: refs/heads/branch_4x Commit: 65d8a53351a93f5ad156e1659330d30a7434f380 Parents: c53f7d6 Author: Paul Irwin <[email protected]> Authored: Wed Nov 6 16:41:19 2013 -0500 Committer: Paul Irwin <[email protected]> Committed: Wed Nov 6 16:41:19 2013 -0500 ---------------------------------------------------------------------- build/vs2012/Lucene.Net.All/Lucene.Net.All.sln | 11 -- src/contrib/Sandbox/Contrib.Sandbox.csproj | 5 + .../Queries/Regex/CSharpRegexCapabilities.cs | 93 ++++++++++++++ .../Sandbox/Queries/Regex/IRegexCapabilities.cs | 49 ++++++++ .../Sandbox/Queries/Regex/IRegexQueryCapable.cs | 28 +++++ src/contrib/Sandbox/Queries/Regex/RegexQuery.cs | 125 +++++++++++++++++++ .../Sandbox/Queries/Regex/RegexTermsEnum.cs | 64 ++++++++++ 7 files changed, 364 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln ---------------------------------------------------------------------- diff --git a/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln b/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln index 833fb8d..bffb200 100644 --- a/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln +++ b/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln @@ -17,8 +17,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Memory", "..\..\..\ EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Queries", "..\..\..\src\contrib\Queries\Contrib.Queries.csproj", "{481CF6E3-52AF-4621-9DEB-022122079AF6}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Regex", "..\..\..\src\contrib\Regex\Contrib.Regex.csproj", "{A26BD3B7-DF90-43B4-99E2-6A617CDE1579}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SimpleFacetedSearch", "..\..\..\src\contrib\SimpleFacetedSearch\SimpleFacetedSearch.csproj", "{66772190-FB3F-48F5-8E05-0B302BACEA73}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Snowball", "..\..\..\src\contrib\Snowball\Contrib.Snowball.csproj", "{8F9D7A92-F122-413E-9D8D-027E4ECD327C}" @@ -97,14 +95,6 @@ Global {481CF6E3-52AF-4621-9DEB-022122079AF6}.Release|Any CPU.Build.0 = Release|Any CPU {481CF6E3-52AF-4621-9DEB-022122079AF6}.Release35|Any CPU.ActiveCfg = Release35|Any CPU {481CF6E3-52AF-4621-9DEB-022122079AF6}.Release35|Any CPU.Build.0 = Release35|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug|Any CPU.Build.0 = Debug|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug35|Any CPU.ActiveCfg = Debug35|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug35|Any CPU.Build.0 = Debug35|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release|Any CPU.ActiveCfg = Release|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release|Any CPU.Build.0 = Release|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release35|Any CPU.ActiveCfg = Release35|Any CPU - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release35|Any CPU.Build.0 = Release35|Any CPU {66772190-FB3F-48F5-8E05-0B302BACEA73}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {66772190-FB3F-48F5-8E05-0B302BACEA73}.Debug|Any CPU.Build.0 = Debug|Any CPU {66772190-FB3F-48F5-8E05-0B302BACEA73}.Debug35|Any CPU.ActiveCfg = Debug35|Any CPU @@ -204,7 +194,6 @@ Global {901D5415-383C-4AA6-A256-879558841BEA} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} {112B9A7C-29CC-4539-8F5A-45669C07CD4D} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} {481CF6E3-52AF-4621-9DEB-022122079AF6} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} - {A26BD3B7-DF90-43B4-99E2-6A617CDE1579} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} {66772190-FB3F-48F5-8E05-0B302BACEA73} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} {8F9D7A92-F122-413E-9D8D-027E4ECD327C} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} {35C347F4-24B2-4BE5-8117-A0E3001551CE} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Contrib.Sandbox.csproj ---------------------------------------------------------------------- diff --git a/src/contrib/Sandbox/Contrib.Sandbox.csproj b/src/contrib/Sandbox/Contrib.Sandbox.csproj index 643f561..f10090e 100644 --- a/src/contrib/Sandbox/Contrib.Sandbox.csproj +++ b/src/contrib/Sandbox/Contrib.Sandbox.csproj @@ -43,6 +43,11 @@ <Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Queries\DuplicateFilter.cs" /> <Compile Include="Queries\FuzzyLikeThisQuery.cs" /> + <Compile Include="Queries\Regex\CSharpRegexCapabilities.cs" /> + <Compile Include="Queries\Regex\IRegexCapabilities.cs" /> + <Compile Include="Queries\Regex\IRegexQueryCapable.cs" /> + <Compile Include="Queries\Regex\RegexQuery.cs" /> + <Compile Include="Queries\Regex\RegexTermsEnum.cs" /> <Compile Include="Queries\SlowCollatedStringComparer.cs" /> <Compile Include="Queries\SlowCollatedTermRangeFilter.cs" /> <Compile Include="Queries\SlowCollatedTermRangeQuery.cs" /> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs b/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs new file mode 100644 index 0000000..41cbc91 --- /dev/null +++ b/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Util; +using System; + +namespace Lucene.Net.Sandbox.Queries.Regex +{ + /// <summary> + /// C# Regex based implementation of <see cref="IRegexCapabilities"/>. + /// </summary> + /// <remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java.htm</remarks> + public class CSharpRegexCapabilities : IRegexCapabilities, IEquatable<CSharpRegexCapabilities> + { + private System.Text.RegularExpressions.Regex _rPattern; + + /// <summary> + /// Called by the constructor of <see cref="RegexTermEnum"/> allowing implementations to cache + /// a compiled version of the regular expression pattern. + /// </summary> + /// <param name="pattern">regular expression pattern</param> + public void Compile(string pattern) + { + _rPattern = new System.Text.RegularExpressions.Regex(pattern, + System.Text.RegularExpressions.RegexOptions.Compiled); + } + + /// <summary> + /// True on match. + /// </summary> + /// <param name="s">text to match</param> + /// <returns>true on match</returns> + public bool Match(BytesRef s) + { + string str = s.Utf8ToString(); + return _rPattern.IsMatch(str); + } + + /// <summary> + /// A wise prefix implementation can reduce the term enumeration (and thus performance) + /// of RegexQuery dramatically. + /// </summary> + /// <returns>static non-regex prefix of the pattern last passed to <see cref="IRegexCapabilities.Compile"/>. + /// May return null</returns> + public string Prefix() + { + return null; + } + + /// <summary> + /// Indicates whether the current object is equal to another object of the same type. + /// </summary> + /// <returns> + /// true if the current object is equal to the <paramref name="other"/> parameter; otherwise, false. + /// </returns> + /// <param name="other">An object to compare with this object</param> + public bool Equals(CSharpRegexCapabilities other) + { + if (other == null) return false; + if (this == other) return true; + + if (_rPattern != null ? !_rPattern.Equals(other._rPattern) : other._rPattern != null) + return false; + + return true; + } + + public override bool Equals(object obj) + { + if (obj as CSharpRegexCapabilities == null) return false; + return Equals((CSharpRegexCapabilities) obj); + } + + public override int GetHashCode() + { + return (_rPattern != null ? _rPattern.GetHashCode() : 0); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs b/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs new file mode 100644 index 0000000..528a8fa --- /dev/null +++ b/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Util; +namespace Lucene.Net.Sandbox.Queries.Regex +{ + /// <summary> + /// Defines basic operations needed by <see cref="RegexQuery"/> for a regular expression implementation. + /// </summary> + /// <remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexCapabilities.java.htm</remarks> + public interface IRegexCapabilities + { + /// <summary> + /// Called by the constructor of <see cref="RegexTermEnum"/> allowing implementations to cache + /// a compiled version of the regular expression pattern. + /// </summary> + /// <param name="pattern">regular expression pattern</param> + void Compile(string pattern); + + /// <summary> + /// True on match. + /// </summary> + /// <param name="s">text to match</param> + /// <returns>true on match</returns> + bool Match(BytesRef s); + + /// <summary> + /// A wise prefix implementation can reduce the term enumeration (and thus performance) + /// of RegexQuery dramatically. + /// </summary> + /// <returns>static non-regex prefix of the pattern last passed to <see cref="Compile"/>. + /// May return null</returns> + string Prefix(); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs b/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs new file mode 100644 index 0000000..c53a119 --- /dev/null +++ b/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Sandbox.Queries.Regex +{ + /// <summary> + /// Defines methods for regular expression supporting queries to use. + /// </summary> + /// <remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexQueryCapable.java.htm</remarks> + public interface IRegexQueryCapable + { + IRegexCapabilities RegexImplementation { set; get; } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs b/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs new file mode 100644 index 0000000..43fabd2 --- /dev/null +++ b/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Text; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Util; + +namespace Lucene.Net.Sandbox.Queries.Regex +{ + /// <summary> + /// Regular expression based query. + /// </summary> + /// <remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexQuery.java.htm</remarks> + public class RegexQuery : MultiTermQuery, IRegexQueryCapable + { + private IRegexCapabilities regexImpl = new CSharpRegexCapabilities(); + private Term term; + + public RegexQuery(Term term) + : base(term.Field) + { + this.term = term; + } + + public Term Term + { + get { return this.term; } + } + + public IRegexCapabilities RegexImplementation + { + set { regexImpl = value; } + get { return regexImpl; } + } + + protected override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) + { + return new RegexTermsEnum(terms.Iterator(null), term, regexImpl); + } + + public override String ToString(String field) + { + StringBuilder buffer = new StringBuilder(); + if (!term.Field.Equals(field)) + { + buffer.Append(term.Field); + buffer.Append(":"); + } + buffer.Append(term.Text); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override int GetHashCode() + { + const int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((regexImpl == null) ? 0 : regexImpl.GetHashCode()); + result = prime * result + ((term == null) ? 0 : term.GetHashCode()); + return result; + } + + public override bool Equals(object obj) + { + if (this == obj) + { + return true; + } + + if (!base.Equals(obj)) + { + return false; + } + + if (GetType() != obj.GetType()) + { + return false; + } + + RegexQuery other = (RegexQuery)obj; + if (regexImpl == null) + { + if (other.regexImpl != null) + { + return false; + } + } + else if (!regexImpl.Equals(other.regexImpl)) + { + return false; + } + + if (term == null) + { + if (other.term != null) + { + return false; + } + } + else if (!term.Equals(other.term)) + { + return false; + } + + return true; + } + + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs b/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs new file mode 100644 index 0000000..71f9025 --- /dev/null +++ b/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Util; + +namespace Lucene.Net.Sandbox.Queries.Regex +{ + /// <summary> + /// Subclass of FilteredTermEnum for enumerating all terms that match the + /// specified regular expression term using the specified regular expression + /// implementation. + /// <para>Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it.</para> + /// </summary> + /// <remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexTermEnum.java.htm</remarks> + public class RegexTermsEnum : FilteredTermsEnum + { + private IRegexCapabilities regexImpl; + private readonly BytesRef prefixRef; + + public RegexTermsEnum(TermsEnum tenum, Term term, IRegexCapabilities regexCap) + : base(tenum) + { + string text = term.Text; + this.regexImpl = regexCap; + regexCap.Compile(text); + string pre = regexImpl.Prefix(); + if (pre == null) + { + pre = ""; + } + + InitialSeekTerm = prefixRef = new BytesRef(pre); + } + + protected override AcceptStatus Accept(BytesRef term) + { + if (StringHelper.StartsWith(term, prefixRef)) + { + return regexImpl.Match(term) ? AcceptStatus.YES : AcceptStatus.NO; + } + else + { + return AcceptStatus.NO; + } + } + } +}
