Move Contrib.Regex into Contrib.Sandbox to match upstream

Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/65d8a533
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/65d8a533
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/65d8a533

Branch: refs/heads/branch_4x
Commit: 65d8a53351a93f5ad156e1659330d30a7434f380
Parents: c53f7d6
Author: Paul Irwin <[email protected]>
Authored: Wed Nov 6 16:41:19 2013 -0500
Committer: Paul Irwin <[email protected]>
Committed: Wed Nov 6 16:41:19 2013 -0500

----------------------------------------------------------------------
 build/vs2012/Lucene.Net.All/Lucene.Net.All.sln  |  11 --
 src/contrib/Sandbox/Contrib.Sandbox.csproj      |   5 +
 .../Queries/Regex/CSharpRegexCapabilities.cs    |  93 ++++++++++++++
 .../Sandbox/Queries/Regex/IRegexCapabilities.cs |  49 ++++++++
 .../Sandbox/Queries/Regex/IRegexQueryCapable.cs |  28 +++++
 src/contrib/Sandbox/Queries/Regex/RegexQuery.cs | 125 +++++++++++++++++++
 .../Sandbox/Queries/Regex/RegexTermsEnum.cs     |  64 ++++++++++
 7 files changed, 364 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
----------------------------------------------------------------------
diff --git a/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln 
b/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
index 833fb8d..bffb200 100644
--- a/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
+++ b/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
@@ -17,8 +17,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = 
"Contrib.Memory", "..\..\..\
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Queries", 
"..\..\..\src\contrib\Queries\Contrib.Queries.csproj", 
"{481CF6E3-52AF-4621-9DEB-022122079AF6}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Regex", 
"..\..\..\src\contrib\Regex\Contrib.Regex.csproj", 
"{A26BD3B7-DF90-43B4-99E2-6A617CDE1579}"
-EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SimpleFacetedSearch", 
"..\..\..\src\contrib\SimpleFacetedSearch\SimpleFacetedSearch.csproj", 
"{66772190-FB3F-48F5-8E05-0B302BACEA73}"
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Snowball", 
"..\..\..\src\contrib\Snowball\Contrib.Snowball.csproj", 
"{8F9D7A92-F122-413E-9D8D-027E4ECD327C}"
@@ -97,14 +95,6 @@ Global
                {481CF6E3-52AF-4621-9DEB-022122079AF6}.Release|Any CPU.Build.0 
= Release|Any CPU
                {481CF6E3-52AF-4621-9DEB-022122079AF6}.Release35|Any 
CPU.ActiveCfg = Release35|Any CPU
                {481CF6E3-52AF-4621-9DEB-022122079AF6}.Release35|Any 
CPU.Build.0 = Release35|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug35|Any 
CPU.ActiveCfg = Debug35|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Debug35|Any CPU.Build.0 
= Debug35|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release|Any 
CPU.ActiveCfg = Release|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release|Any CPU.Build.0 
= Release|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release35|Any 
CPU.ActiveCfg = Release35|Any CPU
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579}.Release35|Any 
CPU.Build.0 = Release35|Any CPU
                {66772190-FB3F-48F5-8E05-0B302BACEA73}.Debug|Any CPU.ActiveCfg 
= Debug|Any CPU
                {66772190-FB3F-48F5-8E05-0B302BACEA73}.Debug|Any CPU.Build.0 = 
Debug|Any CPU
                {66772190-FB3F-48F5-8E05-0B302BACEA73}.Debug35|Any 
CPU.ActiveCfg = Debug35|Any CPU
@@ -204,7 +194,6 @@ Global
                {901D5415-383C-4AA6-A256-879558841BEA} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
                {112B9A7C-29CC-4539-8F5A-45669C07CD4D} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
                {481CF6E3-52AF-4621-9DEB-022122079AF6} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
-               {A26BD3B7-DF90-43B4-99E2-6A617CDE1579} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
                {66772190-FB3F-48F5-8E05-0B302BACEA73} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
                {8F9D7A92-F122-413E-9D8D-027E4ECD327C} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
                {35C347F4-24B2-4BE5-8117-A0E3001551CE} = 
{7E19085A-545B-4DE8-BBF5-B1DBC370FD37}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Contrib.Sandbox.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Sandbox/Contrib.Sandbox.csproj 
b/src/contrib/Sandbox/Contrib.Sandbox.csproj
index 643f561..f10090e 100644
--- a/src/contrib/Sandbox/Contrib.Sandbox.csproj
+++ b/src/contrib/Sandbox/Contrib.Sandbox.csproj
@@ -43,6 +43,11 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Queries\DuplicateFilter.cs" />
     <Compile Include="Queries\FuzzyLikeThisQuery.cs" />
+    <Compile Include="Queries\Regex\CSharpRegexCapabilities.cs" />
+    <Compile Include="Queries\Regex\IRegexCapabilities.cs" />
+    <Compile Include="Queries\Regex\IRegexQueryCapable.cs" />
+    <Compile Include="Queries\Regex\RegexQuery.cs" />
+    <Compile Include="Queries\Regex\RegexTermsEnum.cs" />
     <Compile Include="Queries\SlowCollatedStringComparer.cs" />
     <Compile Include="Queries\SlowCollatedTermRangeFilter.cs" />
     <Compile Include="Queries\SlowCollatedTermRangeQuery.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs 
b/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs
new file mode 100644
index 0000000..41cbc91
--- /dev/null
+++ b/src/contrib/Sandbox/Queries/Regex/CSharpRegexCapabilities.cs
@@ -0,0 +1,93 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Util;
+using System;
+
+namespace Lucene.Net.Sandbox.Queries.Regex
+{
+       /// <summary>
+       /// C# Regex based implementation of <see cref="IRegexCapabilities"/>.
+       /// </summary>
+       /// 
<remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java.htm</remarks>
+       public class CSharpRegexCapabilities : IRegexCapabilities, 
IEquatable<CSharpRegexCapabilities>
+       {
+               private System.Text.RegularExpressions.Regex _rPattern;
+
+               /// <summary>
+               /// Called by the constructor of <see cref="RegexTermEnum"/> 
allowing implementations to cache 
+               /// a compiled version of the regular expression pattern.
+               /// </summary>
+               /// <param name="pattern">regular expression pattern</param>
+               public void Compile(string pattern)
+               {
+                       _rPattern = new 
System.Text.RegularExpressions.Regex(pattern, 
+                               
System.Text.RegularExpressions.RegexOptions.Compiled);
+               }
+
+               /// <summary>
+               /// True on match.
+               /// </summary>
+               /// <param name="s">text to match</param>
+               /// <returns>true on match</returns>
+               public bool Match(BytesRef s)
+               {
+            string str = s.Utf8ToString();
+                       return _rPattern.IsMatch(str);
+               }
+
+               /// <summary>
+               /// A wise prefix implementation can reduce the term 
enumeration (and thus performance)
+               /// of RegexQuery dramatically.
+               /// </summary>
+               /// <returns>static non-regex prefix of the pattern last passed 
to <see cref="IRegexCapabilities.Compile"/>.
+               ///   May return null</returns>
+               public string Prefix()
+               {
+                       return null;
+               }
+
+               /// <summary>
+               /// Indicates whether the current object is equal to another 
object of the same type.
+               /// </summary>
+               /// <returns>
+               /// true if the current object is equal to the <paramref 
name="other"/> parameter; otherwise, false.
+               /// </returns>
+               /// <param name="other">An object to compare with this 
object</param>
+               public bool Equals(CSharpRegexCapabilities other)
+               {
+                       if (other == null) return false;
+                       if (this == other) return true;
+
+                       if (_rPattern != null ? 
!_rPattern.Equals(other._rPattern) : other._rPattern != null)
+                               return false;
+
+                       return true;
+               }
+
+               public override bool Equals(object obj)
+               {
+                       if (obj as CSharpRegexCapabilities == null) return 
false;
+                       return Equals((CSharpRegexCapabilities) obj);
+               }
+
+               public override int GetHashCode()
+               {
+                       return (_rPattern != null ? _rPattern.GetHashCode() : 
0);
+               }
+       }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs 
b/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs
new file mode 100644
index 0000000..528a8fa
--- /dev/null
+++ b/src/contrib/Sandbox/Queries/Regex/IRegexCapabilities.cs
@@ -0,0 +1,49 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Util;
+namespace Lucene.Net.Sandbox.Queries.Regex
+{
+       /// <summary>
+       /// Defines basic operations needed by <see cref="RegexQuery"/> for a 
regular expression implementation.
+       /// </summary>
+       /// 
<remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexCapabilities.java.htm</remarks>
+       public interface IRegexCapabilities
+       {
+               /// <summary>
+               /// Called by the constructor of <see cref="RegexTermEnum"/> 
allowing implementations to cache 
+               /// a compiled version of the regular expression pattern.
+               /// </summary>
+               /// <param name="pattern">regular expression pattern</param>
+               void Compile(string pattern);
+
+               /// <summary>
+               /// True on match.
+               /// </summary>
+               /// <param name="s">text to match</param>
+               /// <returns>true on match</returns>
+               bool Match(BytesRef s);
+
+               /// <summary>
+               /// A wise prefix implementation can reduce the term 
enumeration (and thus performance)
+               /// of RegexQuery dramatically.
+               /// </summary>
+               /// <returns>static non-regex prefix of the pattern last passed 
to <see cref="Compile"/>.
+               ///   May return null</returns>
+               string Prefix();
+       }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs 
b/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs
new file mode 100644
index 0000000..c53a119
--- /dev/null
+++ b/src/contrib/Sandbox/Queries/Regex/IRegexQueryCapable.cs
@@ -0,0 +1,28 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Sandbox.Queries.Regex
+{
+       /// <summary>
+       /// Defines methods for regular expression supporting queries to use.
+       /// </summary>
+       /// 
<remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexQueryCapable.java.htm</remarks>
+       public interface IRegexQueryCapable
+       {
+           IRegexCapabilities RegexImplementation { set; get; }
+       }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs 
b/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs
new file mode 100644
index 0000000..43fabd2
--- /dev/null
+++ b/src/contrib/Sandbox/Queries/Regex/RegexQuery.cs
@@ -0,0 +1,125 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Text;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Sandbox.Queries.Regex
+{
+       /// <summary>
+       /// Regular expression based query.
+       /// </summary>
+       /// 
<remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexQuery.java.htm</remarks>
+       public class RegexQuery : MultiTermQuery, IRegexQueryCapable
+       {
+               private IRegexCapabilities regexImpl = new 
CSharpRegexCapabilities();
+        private Term term;
+
+               public RegexQuery(Term term)
+            : base(term.Field)
+               {
+            this.term = term;
+               }
+
+        public Term Term
+        {
+            get { return this.term; }
+        }
+
+        public IRegexCapabilities RegexImplementation
+        {
+            set { regexImpl = value; }
+            get { return regexImpl; }
+        }
+
+        protected override TermsEnum GetTermsEnum(Terms terms, AttributeSource 
atts)
+        {
+            return new RegexTermsEnum(terms.Iterator(null), term, regexImpl);
+        }
+        
+           public override String ToString(String field)
+        {
+            StringBuilder buffer = new StringBuilder();
+            if (!term.Field.Equals(field))
+            {
+                buffer.Append(term.Field);
+                buffer.Append(":");
+            }
+            buffer.Append(term.Text);
+            buffer.Append(ToStringUtils.Boost(Boost));
+            return buffer.ToString();
+        }
+        
+        public override int GetHashCode()
+        {
+            const int prime = 31;
+            int result = base.GetHashCode();
+            result = prime * result + ((regexImpl == null) ? 0 : 
regexImpl.GetHashCode());
+            result = prime * result + ((term == null) ? 0 : 
term.GetHashCode());
+            return result;
+        }
+
+               public override bool Equals(object obj)
+               {
+            if (this == obj)
+            {
+                return true;
+            }
+
+            if (!base.Equals(obj))
+            {
+                return false;
+            }
+
+            if (GetType() != obj.GetType())
+            {
+                return false;
+            }
+
+            RegexQuery other = (RegexQuery)obj;
+            if (regexImpl == null)
+            {
+                if (other.regexImpl != null)
+                {
+                    return false;
+                }
+            }
+            else if (!regexImpl.Equals(other.regexImpl))
+            {
+                return false;
+            }
+
+            if (term == null)
+            {
+                if (other.term != null)
+                {
+                    return false;
+                }
+            }
+            else if (!term.Equals(other.term))
+            {
+                return false;
+            }
+
+            return true;
+               }
+
+       }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65d8a533/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs 
b/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs
new file mode 100644
index 0000000..71f9025
--- /dev/null
+++ b/src/contrib/Sandbox/Queries/Regex/RegexTermsEnum.cs
@@ -0,0 +1,64 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Sandbox.Queries.Regex
+{
+    /// <summary>
+    /// Subclass of FilteredTermEnum for enumerating all terms that match the
+    /// specified regular expression term using the specified regular 
expression
+    /// implementation.
+    /// <para>Term enumerations are always ordered by Term.compareTo().  Each 
term in
+    /// the enumeration is greater than all that precede it.</para>
+    /// </summary>
+    /// 
<remarks>http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexTermEnum.java.htm</remarks>
+    public class RegexTermsEnum : FilteredTermsEnum
+    {
+        private IRegexCapabilities regexImpl;
+        private readonly BytesRef prefixRef;
+
+        public RegexTermsEnum(TermsEnum tenum, Term term, IRegexCapabilities 
regexCap)
+            : base(tenum)
+        {
+            string text = term.Text;
+            this.regexImpl = regexCap;
+            regexCap.Compile(text);
+            string pre = regexImpl.Prefix();
+            if (pre == null)
+            {
+                pre = "";
+            }
+
+            InitialSeekTerm = prefixRef = new BytesRef(pre);
+        }
+
+        protected override AcceptStatus Accept(BytesRef term)
+        {
+            if (StringHelper.StartsWith(term, prefixRef))
+            {
+                return regexImpl.Match(term) ? AcceptStatus.YES : 
AcceptStatus.NO;
+            }
+            else
+            {
+                return AcceptStatus.NO;
+            }
+        }
+    }
+}

Reply via email to