Updated Branches: refs/heads/develop 430b45a16 -> 288b90439
regexp optimizations Project: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/commit/288b9043 Tree: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/tree/288b9043 Diff: http://git-wip-us.apache.org/repos/asf/incubator-marmotta/diff/288b9043 Branch: refs/heads/develop Commit: 288b904395359759f5e429bfdad123a67344d56f Parents: 430b45a Author: Sebastian Schaffert <[email protected]> Authored: Mon Apr 29 19:06:03 2013 +0200 Committer: Sebastian Schaffert <[email protected]> Committed: Mon Apr 29 19:06:03 2013 +0200 ---------------------------------------------------------------------- .../sparql/persistence/KiWiSparqlConnection.java | 63 ++++++++++++++- 1 files changed, 62 insertions(+), 1 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/288b9043/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java index 1bd1227..e2f405c 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java @@ -44,6 +44,7 @@ import java.sql.SQLException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.*; +import java.util.regex.Pattern; /** * Provide improved SPARQL support by evaluating certain common compley SPARQL constructs directly on the @@ -354,7 +355,8 @@ public class KiWiSparqlConnection { // TODO: simplify the trivial cases to LIKE - return parent.getDialect().getRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING)); + //return parent.getDialect().getRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING)); + return optimizeRegexp(evaluateExpression(re.getArg(),queryVariables, optype), evaluateExpression(re.getPatternArg(), queryVariables, OPTypes.STRING)); } else if(expr instanceof LangMatches) { LangMatches lm = (LangMatches)expr; String value = evaluateExpression(lm.getLeftArg(), queryVariables, optype); @@ -582,6 +584,65 @@ public class KiWiSparqlConnection { } } + + /** + * Test if the regular expression given in the pattern can be simplified to a LIKE SQL statement; these are + * considerably more efficient to evaluate in most databases, so in case we can simplify, we return a LIKE. + * + * @param value + * @param pattern + * @return + */ + private String optimizeRegexp(String value, String pattern) { + String simplified = pattern; + + // apply simplifications + + // remove SQL quotes at beginning and end + simplified = simplified.replaceFirst("^'",""); + simplified = simplified.replaceFirst("'$",""); + + + // remove .* at beginning and end, they are the default anyways + simplified = simplified.replaceFirst("^\\.\\*",""); + simplified = simplified.replaceFirst("\\.\\*$",""); + + // replace all occurrences of % with \% and _ with \_, as they are special characters in SQL + simplified = simplified.replaceAll("%","\\%"); + simplified = simplified.replaceAll("_","\\_"); + + // if pattern now does not start with a ^, we put a "%" in front + if(!simplified.startsWith("^")) { + simplified = "%" + simplified; + } else { + simplified = simplified.substring(1); + } + + // if pattern does not end with a "$", we put a "%" at the end + if(!simplified.endsWith("$")) { + simplified = simplified + "%"; + } else { + simplified = simplified.substring(0,simplified.length()-2); + } + + // replace all non-escaped occurrences of .* with % + simplified = simplified.replaceAll("(?<!\\\\)\\.\\*","%"); + + // replace all non-escaped occurrences of .+ with _% + simplified = simplified.replaceAll("(?<!\\\\)\\.\\+","_%"); + + // the pattern is not simplifiable if the simplification still contains unescaped regular expression constructs + Pattern notSimplifiable = Pattern.compile("(?<!\\\\)[\\.\\*\\+\\{\\}\\[\\]\\|]"); + + if(notSimplifiable.matcher(simplified).find()) { + return parent.getDialect().getRegexp(value, pattern); + } else { + return value + " LIKE '"+simplified+"'"; + } + + } + + private static enum OPTypes { STRING, DOUBLE, INT, DATE, ANY }
