kelvint 2004/01/10 05:28:33 Modified: contributions/javascript/queryValidator luceneQueryValidator.js Log: An almost complete rewrite of all RE patterns. Supports a more complete set of possible characters and also special characters referenced mentioned in http://jakarta.apache.org/lucene/docs/queryparsersyntax.html. Supports full query parser syntax except for AND NOT OR, but warns user of possible errors for these queries. Added alertUser option to disable window.alert boxes. Revision Changes Path 1.5 +100 -14 jakarta-lucene-sandbox/contributions/javascript/queryValidator/luceneQueryValidator.js Index: luceneQueryValidator.js =================================================================== RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/javascript/queryValidator/luceneQueryValidator.js,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- luceneQueryValidator.js 16 Dec 2003 02:22:02 -0000 1.4 +++ luceneQueryValidator.js 10 Jan 2004 13:28:33 -0000 1.5 @@ -1,6 +1,10 @@ // Author: Kelvin Tan ([EMAIL PROTECTED]) // JavaScript Lucene Query Validator // Version: $Id$ +// Tested: IE 6.0.2800 and Mozilla Firebird 0.7 + +// Special characters are + - && || ! ( ) { } [ ] ^ " ~ * ? : \ +// Special words are (case-sensitive) AND NOT OR // Makes wildcard queries case-insensitive if true. // Refer to http://www.mail-archive.com/[EMAIL PROTECTED]/msg00646.html @@ -13,6 +17,13 @@ wildcardCaseInsensitive = bool; } +var alertUser = true; + +function setAlertUser(bool) +{ + alertUser = bool; +} + // validates a lucene query. // @param Form field that contains the query function doCheckLuceneQuery(queryField) @@ -20,48 +31,123 @@ var query = queryField.value; if(query != null && query.length > 0) { - var matches = query.match(/^(\*)|([^a-zA-Z0-9_]\*)/); - + // check for allowed characters + var matches = query.match(/[^a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED] ]/); + if(matches != null && matches.length > 0) + { + if(alertUser) alert("Invalid search query! The allowed characters are a-z A-Z 0-9. _ + - : () \" & * ? | ! {} [ ] ^ ~ \\ @ #. Please try again.") + return false; + } + // check wildcards are used properly + matches = query.match(/^[\*]*$|([\s]\*)/); if(matches != null && matches.length > 0) { - alert("Invalid search query! The wildcard (*) character must be preceded by at least one alphabet or number. Please try again.") + if(alertUser) alert("Invalid search query! The wildcard (*) character must be preceded by at least one alphabet or number. Please try again.") return false; } - + + // check for && usage + // NB: doesn't handle term1 && term2 && term3 in Firebird 0.7 + matches = query.match(/[&]{2}/); + if(matches != null && matches.length > 0) + { + matches = query.match(/^([a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED]( && )?[a-zA-Z0-9_+\-:.()\"*?|[EMAIL PROTECTED] ]*)+$/); // note missing & in pattern + if(matches == null) + { + if(alertUser) alert("Invalid search query! Queries containing the special characters && must be in the form: term1 && term2. Please try again.") + return false; + } + } + + // check ^ is used properly + matches = query.match(/^[^\^]*$|^([a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED](\^[\d]+)?[ ]*)+$/); // note missing ^ in pattern + if(matches == null) + { + if(alertUser) alert("Invalid search query! The caret (^) character must be preceded by alphanumeric characters and followed by numbers. Please try again.") + return false; + } + + // check ~ is used properly + matches = query.match(/^[^~]*$|^([a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED](~[\d.]+)?[ ]*)+$/); // note missing ~in pattern + if(matches == null) + { + if(alertUser) alert("Invalid search query! The tilde (~) character must be preceded by alphanumeric characters and followed by numbers. Please try again.") + return false; + } + + // check ! is used properly + // NB: doesn't handle term1 ! term2 ! term3 + matches = query.match(/^[^!]*$|^([a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED]( ! )?[a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED] ]*)+$/); + if(matches == null || matches.length == 0) + { + if(alertUser) alert("Invalid search query! Queries containing the special character ! must be in the form: term1 ! term2. Please try again.") + return false; + } + + // check question marks are used properly + matches = query.match(/^(\?)|([^a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED])/); + if(matches != null && matches.length > 0) + { + if(alertUser) alert("Invalid search query! The question mark (?) character must be preceded by at least one alphabet or number. Please try again.") + return false; + } + // check parentheses are used properly - matches = query.match(/^([^\n()]*|(\(([a-zA-Z0-9_+\-:()\" ]|\*)+\)))*$/); + matches = query.match(/^[^()]*$|^(\([a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED]))*$/); if(matches == null || matches.length == 0) { - alert("Invalid search query! Parentheses must contain at least one alphabet or number. Please try again.") + if(alertUser) alert("Invalid search query! Parentheses must be closed and contain at least one alphabet or number. Please try again.") return false; } // check '+' and '-' are used properly - matches = query.match(/^(([^\n+-]*|[+-]([a-zA-Z0-9_:()]|\*)+))*$/); + matches = query.match(/^[^\n+\-]*$|^([+-]?[a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED] ]?)+$/); if(matches == null || matches.length == 0) { - alert("Invalid search query! '+' and '-' modifiers must be followed by at least one alphabet or number. Please try again.") + if(alertUser) alert("Invalid search query! '+' and '-' modifiers must be followed by at least one alphabet or number. Please try again.") return false; - } + } + + // check AND, OR and NOT are used properly + matches = query.match(/AND|OR|NOT/); + if(matches != null && matches.length > 0) + { + // I've no idea why the code below doesn't work since it's identical to the exclamation mark and && RE + //matches = query.match(/^([a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED](?: AND )?(?: OR )?(?: NOT )?[a-zA-Z0-9_+\-:.()\"*?&|[EMAIL PROTECTED] ]*)+$/); + + // we'll notify the user that this query is not validated and an error may result + if(alertUser) + { + return confirm("Validation for queries containing AND/OR/NOT is currently not supported. If your query is not well-formed, an error may result."); + } + /* + if(matches == null || matches.length == 0) + { + if(alertUser) alert("Invalid search query! Queries containing AND/OR/NOT must be in the form: term1 AND|OR|NOT term2 Please try again.") + return false; + } + */ + } + // check that quote marks are closed matches = query.match(/\"/g); - if(matches != null) + if(matches != null && matches.length > 0) { var number = matches.length; if((number % 2) > 0) { - alert("Invalid search query! Please close all quote (\") marks."); + if(alertUser) alert("Invalid search query! Please close all quote (\") marks."); return false; } } // check ':' is used properly - matches = query.match(/^(([^\n:]*|([a-zA-Z0-9_]|\*)+[:]([a-zA-Z0-9_()"]|\*)+))*$/); + matches = query.match(/^[^:]*$|^([a-zA-Z0-9_+\-.()\"*?&|[EMAIL PROTECTED](:[a-zA-Z0-9_+\-.()\"*?&|[EMAIL PROTECTED])?[ ]*)+$/); // note missing : in pattern if(matches == null || matches.length == 0) { - alert("Invalid search query! Field declarations (:) must be preceded by at least one alphabet or number and followed by at least one alphabet or number. Please try again.") + if(alertUser) alert("Invalid search query! Field declarations (:) must be preceded by at least one alphabet or number and followed by at least one alphabet or number. Please try again.") return false; }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]