Started porting QueryParsers.Flexible.Standard
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f66837d2 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f66837d2 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f66837d2 Branch: refs/heads/branch_4x Commit: f66837d249e43a23258cc630442c85507801e84c Parents: 539a6de Author: Paul Irwin <[email protected]> Authored: Fri Oct 4 14:05:50 2013 -0400 Committer: Paul Irwin <[email protected]> Committed: Sat Oct 5 16:37:29 2013 -0400 ---------------------------------------------------------------------- .../QueryParsers/Contrib.QueryParsers.csproj | 4 + .../Standard/Builders/AnyQueryNodeBuilder.cs | 58 +++- .../Builders/BooleanQueryNodeBuilder.cs | 81 +++++ .../Standard/Builders/IStandardQueryBuilder.cs | 2 +- .../Standard/Parser/EscapeQuerySyntaxImpl.cs | 297 +++++++++++++++++++ .../Flexible/Standard/Parser/ICharStream.cs | 37 +++ .../QueryParsers/Support/StringExtensions.cs | 16 + 7 files changed, 492 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Contrib.QueryParsers.csproj ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Contrib.QueryParsers.csproj b/src/contrib/QueryParsers/Contrib.QueryParsers.csproj index 59815b4..b69ba58 100644 --- a/src/contrib/QueryParsers/Contrib.QueryParsers.csproj +++ b/src/contrib/QueryParsers/Contrib.QueryParsers.csproj @@ -110,9 +110,13 @@ <Compile Include="Flexible\Messages\Message.cs" /> <Compile Include="Flexible\Messages\NLS.cs" /> <Compile Include="Flexible\Standard\Builders\AnyQueryNodeBuilder.cs" /> + <Compile Include="Flexible\Standard\Builders\BooleanQueryNodeBuilder.cs" /> <Compile Include="Flexible\Standard\Builders\IStandardQueryBuilder.cs" /> <Compile Include="Flexible\Standard\ICommonQueryParserConfiguration.cs" /> + <Compile Include="Flexible\Standard\Parser\EscapeQuerySyntaxImpl.cs" /> + <Compile Include="Flexible\Standard\Parser\ICharStream.cs" /> <Compile Include="Properties\AssemblyInfo.cs" /> + <Compile Include="Support\StringExtensions.cs" /> </ItemGroup> <ItemGroup> <ProjectReference Include="..\..\core\Lucene.Net.csproj"> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs b/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs index f3f0789..30e60a4 100644 --- a/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs +++ b/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs @@ -1,4 +1,10 @@ -using System; +using Lucene.Net.QueryParsers.Flexible.Core; +using Lucene.Net.QueryParsers.Flexible.Core.Builders; +using Lucene.Net.QueryParsers.Flexible.Core.Messages; +using Lucene.Net.QueryParsers.Flexible.Core.Nodes; +using Lucene.Net.QueryParsers.Flexible.Messages; +using Lucene.Net.Search; +using System; using System.Collections.Generic; using System.Linq; using System.Text; @@ -6,7 +12,55 @@ using System.Threading.Tasks; namespace Lucene.Net.QueryParsers.Flexible.Standard.Builders { - class AnyQueryNodeBuilder + public class AnyQueryNodeBuilder : IStandardQueryBuilder { + public AnyQueryNodeBuilder() + { + // empty constructor + } + + public Query Build(IQueryNode queryNode) + { + AnyQueryNode andNode = (AnyQueryNode)queryNode; + + BooleanQuery bQuery = new BooleanQuery(); + IList<IQueryNode> children = andNode.Children; + + if (children != null) + { + foreach (IQueryNode child in children) + { + Object obj = child.GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); + + if (obj != null) + { + Query query = (Query)obj; + + try + { + bQuery.Add(query, Occur.SHOULD); + } + catch (BooleanQuery.TooManyClauses ex) + { + throw new QueryNodeException(new Message( + /* + * IQQQ.Q0028E_TOO_MANY_BOOLEAN_CLAUSES, + * BooleanQuery.getMaxClauseCount() + */ + QueryParserMessages.EMPTY_MESSAGE), ex); + } + } + } + } + + bQuery.MinimumNumberShouldMatch = andNode.MinimumMatchingElements; + + return bQuery; + } + + object IQueryBuilder.Build(IQueryNode queryNode) + { + return Build(queryNode); + } } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs b/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs new file mode 100644 index 0000000..97b580f --- /dev/null +++ b/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs @@ -0,0 +1,81 @@ +using Lucene.Net.QueryParsers.Flexible.Core; +using Lucene.Net.QueryParsers.Flexible.Core.Builders; +using Lucene.Net.QueryParsers.Flexible.Core.Messages; +using Lucene.Net.QueryParsers.Flexible.Core.Nodes; +using Lucene.Net.QueryParsers.Flexible.Messages; +using Lucene.Net.QueryParsers.Flexible.Standard.Parser; +using Lucene.Net.Search; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Lucene.Net.QueryParsers.Flexible.Standard.Builders +{ + public class BooleanQueryNodeBuilder : IStandardQueryBuilder + { + public BooleanQueryNodeBuilder() + { + // empty constructor + } + + public Query Build(IQueryNode queryNode) + { + BooleanQueryNode booleanNode = (BooleanQueryNode)queryNode; + + BooleanQuery bQuery = new BooleanQuery(); + IList<IQueryNode> children = booleanNode.Children; + + if (children != null) + { + foreach (IQueryNode child in children) + { + Object obj = child.GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); + + if (obj != null) + { + Query query = (Query)obj; + + try + { + bQuery.Add(query, GetModifierValue(child)); + } + catch (BooleanQuery.TooManyClauses ex) + { + throw new QueryNodeException(new Message( + QueryParserMessages.TOO_MANY_BOOLEAN_CLAUSES, BooleanQuery.MaxClauseCount, queryNode.ToQueryString(new EscapeQuerySyntaxImpl())), ex); + + } + } + } + } + + return bQuery; + } + + object IQueryBuilder.Build(IQueryNode queryNode) + { + return Build(queryNode); + } + + private static Occur GetModifierValue(IQueryNode node) + { + if (node is ModifierQueryNode) + { + ModifierQueryNode mNode = ((ModifierQueryNode)node); + switch (mNode.ModifierValue) + { + case ModifierQueryNode.Modifier.MOD_REQ: + return Occur.MUST; + case ModifierQueryNode.Modifier.MOD_NOT: + return Occur.MUST_NOT; + case ModifierQueryNode.Modifier.MOD_NONE: + return Occur.SHOULD; + } + } + + return Occur.SHOULD; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs b/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs index 366568f..cca4a1a 100644 --- a/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs +++ b/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs @@ -11,6 +11,6 @@ namespace Lucene.Net.QueryParsers.Flexible.Standard.Builders { public interface IStandardQueryBuilder : IQueryBuilder { - Query Build(IQueryNode queryNode); + new Query Build(IQueryNode queryNode); } } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs b/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs new file mode 100644 index 0000000..6c524a0 --- /dev/null +++ b/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs @@ -0,0 +1,297 @@ +using Lucene.Net.QueryParsers.Classic; +using Lucene.Net.QueryParsers.Flexible.Core.Messages; +using Lucene.Net.QueryParsers.Flexible.Core.Parser; +using Lucene.Net.QueryParsers.Flexible.Core.Util; +using Lucene.Net.QueryParsers.Flexible.Messages; +using Lucene.Net.QueryParsers.Support; +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Lucene.Net.QueryParsers.Flexible.Standard.Parser +{ + public class EscapeQuerySyntaxImpl : IEscapeQuerySyntax + { + private static readonly char[] wildcardChars = { '*', '?' }; + + private static readonly string[] escapableTermExtraFirstChars = { "+", "-", "@" }; + + private static readonly string[] escapableTermChars = { "\"", "<", ">", "=", + "!", "(", ")", "^", "[", "{", ":", "]", "}", "~", "/" }; + + // TODO: check what to do with these "*", "?", "\\" + private static readonly string[] escapableQuotedChars = { "\"" }; + private static readonly string[] escapableWhiteChars = { " ", "\t", "\n", "\r", + "\f", "\b", "\u3000" }; + private static readonly string[] escapableWordTokens = { "AND", "OR", "NOT", + "TO", "WITHIN", "SENTENCE", "PARAGRAPH", "INORDER" }; + + private static ICharSequence EscapeChar(ICharSequence str, CultureInfo locale) + { + if (str == null || str.Length == 0) + return str; + + ICharSequence buffer = str; + + // regular escapable Char for terms + for (int i = 0; i < escapableTermChars.Length; i++) + { + buffer = ReplaceIgnoreCase(buffer, escapableTermChars[i].ToLower(locale), + "\\", locale); + } + + // First Character of a term as more escaping chars + for (int i = 0; i < escapableTermExtraFirstChars.Length; i++) + { + if (buffer.CharAt(0) == escapableTermExtraFirstChars[i][0]) + { + buffer = new StringCharSequenceWrapper("\\" + buffer.CharAt(0) + + buffer.SubSequence(1, buffer.Length)); + break; + } + } + + return buffer; + } + + private ICharSequence EscapeQuoted(ICharSequence str, CultureInfo locale) + { + if (str == null || str.Length == 0) + return str; + + ICharSequence buffer = str; + + for (int i = 0; i < escapableQuotedChars.Length; i++) + { + buffer = ReplaceIgnoreCase(buffer, escapableTermChars[i].ToLower(locale), "\\", locale); + } + + return buffer; + } + + private static ICharSequence EscapeTerm(ICharSequence term, CultureInfo locale) + { + if (term == null) + return term; + + // Escape single Chars + term = EscapeChar(term, locale); + term = EscapeWhiteChar(term, locale); + + // Escape Parser Words + for (int i = 0; i < escapableWordTokens.Length; i++) + { + if (escapableWordTokens[i].EqualsIgnoreCase(term.ToString())) + return new StringCharSequenceWrapper("\\" + term); + } + return term; + } + + private static ICharSequence ReplaceIgnoreCase(ICharSequence str, string sequence1, string escapeChar, CultureInfo locale) + { + if (escapeChar == null || sequence1 == null || str == null) + throw new NullReferenceException(); + + // empty string case + int count = str.Length; + int sequence1Length = sequence1.Length; + if (sequence1Length == 0) + { + StringBuilder result = new StringBuilder((count + 1) + * escapeChar.Length); + result.Append(escapeChar); + for (int i = 0; i < count; i++) + { + result.Append(str.CharAt(i)); + result.Append(escapeChar); + } + return new StringCharSequenceWrapper(result.ToString()); + } + + // normal case + StringBuilder result2 = new StringBuilder(); + char first = sequence1[0]; + int start = 0, copyStart = 0, firstIndex; + while (start < count) + { + if ((firstIndex = str.ToString().ToLower(locale).IndexOf(first, start)) == -1) + break; + bool found = true; + if (sequence1.Length > 1) + { + if (firstIndex + sequence1Length > count) + break; + for (int i = 1; i < sequence1Length; i++) + { + if (str.ToString().ToLower(locale)[firstIndex + i] != sequence1[i]) + { + found = false; + break; + } + } + } + if (found) + { + result2.Append(str.ToString().Substring(copyStart, firstIndex)); + result2.Append(escapeChar); + result2.Append(str.ToString().Substring(firstIndex, + firstIndex + sequence1Length)); + copyStart = start = firstIndex + sequence1Length; + } + else + { + start = firstIndex + 1; + } + } + if (result2.Length == 0 && copyStart == 0) + return str; + result2.Append(str.ToString().Substring(copyStart)); + return new StringCharSequenceWrapper(result2.ToString()); + } + + private static ICharSequence EscapeWhiteChar(ICharSequence str, CultureInfo locale) + { + if (str == null || str.Length == 0) + return str; + + ICharSequence buffer = str; + + for (int i = 0; i < escapableWhiteChars.Length; i++) + { + buffer = ReplaceIgnoreCase(buffer, escapableWhiteChars[i].ToLower(locale), "\\", locale); + } + return buffer; + } + + public ICharSequence Escape(ICharSequence text, CultureInfo locale, EscapeQuerySyntax.Type type) + { + if (text == null || text.Length == 0) + return text; + + // escape wildcards and the escape char (this has to be perform before + // anything else) + // since we need to preserve the UnescapedCharSequence and escape the + // original escape chars + if (text is UnescapedCharSequence) + { + text = new StringCharSequenceWrapper(((UnescapedCharSequence)text).ToStringEscaped(wildcardChars)); + } + else + { + text = new StringCharSequenceWrapper(new UnescapedCharSequence(text).ToStringEscaped(wildcardChars)); + } + + if (type == EscapeQuerySyntax.Type.STRING) + { + return EscapeQuoted(text, locale); + } + else + { + return EscapeTerm(text, locale); + } + } + + public static UnescapedCharSequence DiscardEscapeChar(ICharSequence input) + { + // Create char array to hold unescaped char sequence + char[] output = new char[input.Length]; + bool[] wasEscaped = new bool[input.Length]; + + // The length of the output can be less than the input + // due to discarded escape chars. This variable holds + // the actual length of the output + int length = 0; + + // We remember whether the last processed character was + // an escape character + bool lastCharWasEscapeChar = false; + + // The multiplier the current unicode digit must be multiplied with. + // E. g. the first digit must be multiplied with 16^3, the second with + // 16^2... + int codePointMultiplier = 0; + + // Used to calculate the codepoint of the escaped unicode character + int codePoint = 0; + + for (int i = 0; i < input.Length; i++) + { + char curChar = input.CharAt(i); + if (codePointMultiplier > 0) + { + codePoint += HexToInt(curChar) * codePointMultiplier; + codePointMultiplier = Number.URShift(codePointMultiplier, 4); + if (codePointMultiplier == 0) + { + output[length++] = (char)codePoint; + codePoint = 0; + } + } + else if (lastCharWasEscapeChar) + { + if (curChar == 'u') + { + // found an escaped unicode character + codePointMultiplier = 16 * 16 * 16; + } + else + { + // this character was escaped + output[length] = curChar; + wasEscaped[length] = true; + length++; + } + lastCharWasEscapeChar = false; + } + else + { + if (curChar == '\\') + { + lastCharWasEscapeChar = true; + } + else + { + output[length] = curChar; + length++; + } + } + } + + if (codePointMultiplier > 0) + { + throw new ParseException(new Message(QueryParserMessages.INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION).ToString()); + } + + if (lastCharWasEscapeChar) + { + throw new ParseException(new Message(QueryParserMessages.INVALID_SYNTAX_ESCAPE_CHARACTER).ToString()); + } + + return new UnescapedCharSequence(output, wasEscaped, 0, length); + } + + private static int HexToInt(char c) + { + if ('0' <= c && c <= '9') + { + return c - '0'; + } + else if ('a' <= c && c <= 'f') + { + return c - 'a' + 10; + } + else if ('A' <= c && c <= 'F') + { + return c - 'A' + 10; + } + else + { + throw new ParseException(new Message(QueryParserMessages.INVALID_SYNTAX_ESCAPE_NONE_HEX_UNICODE, c).ToString()); + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs b/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs new file mode 100644 index 0000000..3a43414 --- /dev/null +++ b/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Lucene.Net.QueryParsers.Flexible.Standard.Parser +{ + public interface ICharStream + { + char ReadChar(); + + [Obsolete] + int Column { get; } + + [Obsolete] + int Line { get; } + + int EndColumn { get; } + + int EndLine { get; } + + int BeginColumn { get; } + + int BeginLine { get; } + + void Backup(int amount); + + char BeginToken(); + + string Image { get; } + + char[] GetSuffix(int len); + + void Done(); + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Support/StringExtensions.cs ---------------------------------------------------------------------- diff --git a/src/contrib/QueryParsers/Support/StringExtensions.cs b/src/contrib/QueryParsers/Support/StringExtensions.cs new file mode 100644 index 0000000..c688e67 --- /dev/null +++ b/src/contrib/QueryParsers/Support/StringExtensions.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Lucene.Net.QueryParsers.Support +{ + public static class StringExtensions + { + public static bool EqualsIgnoreCase(this string value, string other) + { + return string.Equals(value, other, StringComparison.OrdinalIgnoreCase); + } + } +}
