Repository: marmotta Updated Branches: refs/heads/develop 66be7f541 -> 7d18b0303
SPARQL: - move expression evaluation to a more efficient visitor Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/d90e1429 Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/d90e1429 Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/d90e1429 Branch: refs/heads/develop Commit: d90e1429b1f76c2d3142b2c88de80b727445b9f9 Parents: 8c62e91 Author: Sebastian Schaffert <[email protected]> Authored: Thu Nov 6 18:47:35 2014 +0100 Committer: Sebastian Schaffert <[email protected]> Committed: Thu Nov 6 18:47:35 2014 +0100 ---------------------------------------------------------------------- .../kiwi/sparql/builder/ProjectionType.java | 2 +- .../kiwi/sparql/builder/SQLBuilder.java | 486 +----------- .../builder/eval/ExpressionEvaluator.java | 735 +++++++++++++++++++ .../persistence/KiWiSparqlConnection.java | 6 + 4 files changed, 766 insertions(+), 463 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/marmotta/blob/d90e1429/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ProjectionType.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ProjectionType.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ProjectionType.java index eff7798..5b903c0 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ProjectionType.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ProjectionType.java @@ -23,5 +23,5 @@ package org.apache.marmotta.kiwi.sparql.builder; * @author Sebastian Schaffert ([email protected]) */ public enum ProjectionType { - NODE, URI, BNODE, STRING, INT, DOUBLE, DATE, NONE + NODE, URI, BNODE, STRING, INT, DOUBLE, DATE, BOOL, NONE } http://git-wip-us.apache.org/repos/asf/marmotta/blob/d90e1429/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java index 34997c9..6915aaf 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java @@ -18,34 +18,29 @@ package org.apache.marmotta.kiwi.sparql.builder; import com.google.common.base.Preconditions; -import org.apache.commons.lang3.StringUtils; import org.apache.marmotta.commons.collections.CollectionUtils; -import org.apache.marmotta.commons.util.DateUtils; import org.apache.marmotta.kiwi.model.rdf.KiWiNode; import org.apache.marmotta.kiwi.persistence.KiWiDialect; import org.apache.marmotta.kiwi.sail.KiWiValueFactory; import org.apache.marmotta.kiwi.sparql.builder.collect.*; +import org.apache.marmotta.kiwi.sparql.builder.eval.ExpressionEvaluator; import org.apache.marmotta.kiwi.sparql.builder.model.SQLAbstractSubquery; import org.apache.marmotta.kiwi.sparql.builder.model.SQLFragment; import org.apache.marmotta.kiwi.sparql.builder.model.SQLPattern; import org.apache.marmotta.kiwi.sparql.builder.model.SQLVariable; import org.apache.marmotta.kiwi.sparql.exception.UnsatisfiableQueryException; -import org.apache.marmotta.kiwi.sparql.function.NativeFunction; import org.apache.marmotta.kiwi.sparql.function.NativeFunctionRegistry; -import org.openrdf.model.*; -import org.openrdf.model.vocabulary.FN; +import org.openrdf.model.Resource; +import org.openrdf.model.URI; +import org.openrdf.model.Value; import org.openrdf.model.vocabulary.SESAME; -import org.openrdf.model.vocabulary.XMLSchema; import org.openrdf.query.BindingSet; import org.openrdf.query.Dataset; import org.openrdf.query.algebra.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.text.DateFormat; -import java.text.SimpleDateFormat; import java.util.*; -import java.util.regex.Pattern; /** * A builder for translating SPARQL queries into SQL. @@ -61,11 +56,6 @@ public class SQLBuilder { */ private static final String[] positions = new String[] {"subject","predicate","object","context"}; - /** - * Date format used for SQL timestamps. - */ - private static final DateFormat sqlDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S"); - /** * Reference to the registry of natively supported functions with parameter and return types as well as SQL translation @@ -125,9 +115,6 @@ public class SQLBuilder { // a prefix for naming table aliases (needed in case this is a subquery) private String prefix; - // used by BNodeGenerator - private static Random anonIdGenerator = new Random(); - /** * Create a new SQLBuilder for the given query, initial bindings, dataset, and * @param query @@ -172,6 +159,21 @@ public class SQLBuilder { return variables; } + public ValueConverter getConverter() { + return converter; + } + + public KiWiDialect getDialect() { + return dialect; + } + + public Dataset getDataset() { + return dataset; + } + + public BindingSet getBindings() { + return bindings; + } public Set<String> getProjectedVars() { return projectedVars; @@ -770,452 +772,8 @@ public class SQLBuilder { } - private String evaluateExpression(ValueExpr expr, final OPTypes optype) { - if(expr instanceof And) { - return "(" + evaluateExpression(((And) expr).getLeftArg(), optype) + " AND " + evaluateExpression(((And) expr).getRightArg(), optype) + ")"; - } else if(expr instanceof Or) { - return "(" + evaluateExpression(((Or) expr).getLeftArg(), optype) + " OR " + evaluateExpression(((Or) expr).getRightArg(), optype) + ")"; - } else if(expr instanceof Not) { - return "NOT (" + evaluateExpression(((Not) expr).getArg(), optype) + ")"; - } else if(expr instanceof Exists) { - - // TODO: need to make sure that variables of the parent are visible in the subquery - // - pattern names need to be unique even in subqueries - // - variable lookup for expressions in the subquery need to refer to the parent - SQLBuilder sq_builder = new SQLBuilder(((Exists) expr).getSubQuery(), bindings, dataset, converter, dialect, "_", Collections.EMPTY_SET, copyVariables(variables)); - - return "EXISTS (" + sq_builder.build() + ")"; - } else if(expr instanceof Str) { - Str str = (Str)expr; - - // get value of argument and express it as string - return evaluateExpression(str.getArg(), OPTypes.STRING); - } else if(expr instanceof Label) { - Label str = (Label)expr; - - // get value of argument and express it as string - return evaluateExpression(str.getArg(), OPTypes.STRING); - } else if(expr instanceof BNodeGenerator) { - BNodeGenerator str = (BNodeGenerator)expr; - - if(str.getNodeIdExpr() != null) { - // get value of argument and express it as string - return evaluateExpression(str.getNodeIdExpr(), OPTypes.STRING); - } else { - return "'" + Long.toHexString(System.currentTimeMillis())+Integer.toHexString(anonIdGenerator.nextInt(1000)) + "'"; - } - } else if(expr instanceof IRIFunction) { - IRIFunction str = (IRIFunction)expr; - - if(str.getBaseURI() != null) { - String ex = evaluateExpression(str.getArg(), OPTypes.STRING); - - return "CASE WHEN position(':' IN " + ex +") > 0 THEN " + ex + " ELSE " + - NativeFunctionRegistry.getInstance().get(FN.CONCAT.stringValue()).getNative(dialect,"'"+str.getBaseURI()+"'", ex) + - " END "; - } else { - // get value of argument and express it as string - return evaluateExpression(str.getArg(), OPTypes.STRING); - } - } else if(expr instanceof Lang) { - Lang lang = (Lang)expr; - - if(lang.getArg() instanceof Var) { - return variables.get(((Var) lang.getArg()).getName()).getAlias() + ".lang"; - } - } else if(expr instanceof SameTerm) { - SameTerm cmp = (SameTerm)expr; - - // covered by value binding in variables - return evaluateExpression(cmp.getLeftArg(), OPTypes.TERM) + " = " + evaluateExpression(cmp.getRightArg(), OPTypes.TERM); - } else if(expr instanceof Compare) { - Compare cmp = (Compare)expr; - - OPTypes ot = new OPTypeFinder(cmp).coerce(); - - return evaluateExpression(cmp.getLeftArg(), ot) + getSQLOperator(cmp.getOperator()) + evaluateExpression(cmp.getRightArg(), ot); - } else if(expr instanceof MathExpr) { - MathExpr cmp = (MathExpr)expr; - - OPTypes ot = new OPTypeFinder(cmp).coerce(); - - if(ot == OPTypes.STRING) { - if(cmp.getOperator() == MathExpr.MathOp.PLUS) { - return NativeFunctionRegistry.getInstance().get(FN.CONCAT.stringValue()).getNative(dialect,evaluateExpression(cmp.getLeftArg(), ot), evaluateExpression(cmp.getRightArg(), ot)); - } else { - throw new IllegalArgumentException("operation "+cmp.getOperator()+" is not supported on strings"); - } - } else { - if(ot == OPTypes.ANY || ot == OPTypes.TERM) { - ot = OPTypes.DOUBLE; - } - - return evaluateExpression(cmp.getLeftArg(), ot) + getSQLOperator(cmp.getOperator()) + evaluateExpression(cmp.getRightArg(), ot); - } - } else if(expr instanceof Regex) { - Regex re = (Regex)expr; - - return optimizeRegexp(evaluateExpression(re.getArg(), OPTypes.STRING), evaluateExpression(re.getPatternArg(), OPTypes.STRING), re.getFlagsArg()); - } else if(expr instanceof LangMatches) { - LangMatches lm = (LangMatches)expr; - String value = evaluateExpression(lm.getLeftArg(), optype); - ValueConstant pattern = (ValueConstant) lm.getRightArg(); - - if(pattern.getValue().stringValue().equals("*")) { - return value + " LIKE '%'"; - } else if(pattern.getValue().stringValue().equals("")) { - return value + " IS NULL"; - } else { - return "(" + value + " = '"+pattern.getValue().stringValue().toLowerCase()+"' OR " + dialect.getILike(value, "'" + pattern.getValue().stringValue().toLowerCase() + "-%' )"); - } - } else if(expr instanceof Bound) { - ValueExpr arg = ((Bound)expr).getArg(); - - if(arg instanceof ValueConstant) { - return Boolean.toString(true); - } else if(arg instanceof Var) { - - return "(" + evaluateExpression(arg, optype) + " IS NOT NULL)"; - } - - } else if(expr instanceof IsResource) { - ValueExpr arg = ((UnaryValueOperator)expr).getArg(); - - // operator must be a variable or a constant - if(arg instanceof ValueConstant) { - return Boolean.toString(((ValueConstant) arg).getValue() instanceof URI || ((ValueConstant) arg).getValue() instanceof BNode); - } else if(arg instanceof Var) { - String var = variables.get(((Var) arg).getName()).getAlias(); - - return "(" + var + ".ntype = 'uri' OR " + var + ".ntype = 'bnode')"; - } - } else if(expr instanceof IsURI) { - ValueExpr arg = ((UnaryValueOperator)expr).getArg(); - - // operator must be a variable or a constant - if(arg instanceof ValueConstant) { - return Boolean.toString(((ValueConstant) arg).getValue() instanceof URI); - } else if(arg instanceof Var) { - String var = variables.get(((Var) arg).getName()).getAlias(); - - return var + ".ntype = 'uri'"; - } - } else if(expr instanceof IsBNode) { - ValueExpr arg = ((UnaryValueOperator)expr).getArg(); - - // operator must be a variable or a constant - if(arg instanceof ValueConstant) { - return Boolean.toString(((ValueConstant) arg).getValue() instanceof BNode); - } else if(arg instanceof Var) { - String var = variables.get(((Var) arg).getName()).getAlias(); - - return var + ".ntype = 'bnode'"; - } - } else if(expr instanceof IsLiteral) { - ValueExpr arg = ((UnaryValueOperator)expr).getArg(); - - // operator must be a variable or a constant - if (arg instanceof ValueConstant) { - return Boolean.toString(((ValueConstant) arg).getValue() instanceof Literal); - } else if(arg instanceof Var) { - String var = variables.get(((Var) arg).getName()).getAlias(); - - return "(" + var + ".ntype = 'string' OR " + var + ".ntype = 'int' OR " + var + ".ntype = 'double' OR " + var + ".ntype = 'date' OR " + var + ".ntype = 'boolean')"; - } - } else if(expr instanceof Var) { - // distinguish between the case where the variable is plain and the variable is bound - SQLVariable sv = variables.get(((Var) expr).getName()); - - if(sv.getBindings().size() > 0) { - // in case the variable is actually an alias for an expression, we evaluate that expression instead, effectively replacing the - // variable occurrence with its value - return evaluateExpression(sv.getBindings().get(0),optype); - } else { - String var = sv.getAlias(); - - if(sv.getProjectionType() != ProjectionType.NODE && sv.getProjectionType() != ProjectionType.NONE) { - // in case the variable represents a constructed or bound value instead of a node, we need to - // use the SQL expression as value; SQL should take care of proper casting... - // TODO: explicit casting needed? - return sv.getExpressions().get(0); - } else { - // in case the variable represents an entry from the NODES table (i.e. has been bound to a node - // in the database, we take the NODES alias and resolve to the correct column according to the - // operator type - if (optype == null) { - return var + ".svalue"; - } else { - switch (optype) { - case STRING: - return var + ".svalue"; - case INT: - return var + ".ivalue"; - case DOUBLE: - return var + ".dvalue"; - case DATE: - return var + ".tvalue"; - case VALUE: - return var + ".svalue"; - case URI: - return var + ".svalue"; - case TERM: - if(var == null) { - // might happen in case the variable has been bound to a constant value and not - // joined in a pattern query - return sv.getExpressions().get(0); - } else { - return var + ".id"; - } - case ANY: - return var + ".id"; - } - } - } - } - } else if(expr instanceof ValueConstant) { - String val = ((ValueConstant) expr).getValue().stringValue(); - - if(optype == null) { - return "'" + val + "'"; - } else { - switch (optype) { - case STRING: return "'" + val + "'"; - case VALUE: return "'" + val + "'"; - case URI: return "'" + val + "'"; - case INT: return "" + Integer.parseInt(val); - case DOUBLE: return "" + Double.parseDouble(val); - case DATE: return "'" + sqlDateFormat.format(DateUtils.parseDate(val)) + "'"; - - // in this case we should return a node ID and also need to make sure it actually exists - case TERM: - case ANY: - KiWiNode n = converter.convert(((ValueConstant) expr).getValue()); - return "" + n.getId(); - - default: throw new IllegalArgumentException("unsupported value type: " + optype); - } - } - } else if(expr instanceof Coalesce) { - return "COALESCE(" + CollectionUtils.fold(((Coalesce) expr).getArguments(), new CollectionUtils.StringSerializer<ValueExpr>() { - @Override - public String serialize(ValueExpr valueExpr) { - return evaluateExpression(valueExpr, optype); - } - },", ") + ")"; - } else if(expr instanceof Count) { - StringBuilder countExp = new StringBuilder(); - countExp.append("COUNT("); - - if(((Count) expr).isDistinct()) { - countExp.append("DISTINCT "); - } - - if(((Count) expr).getArg() == null) { - // this is a weird special case where we need to expand to all variables selected in the query wrapped - // by the group; we cannot simply use "*" because the concept of variables is a different one in SQL, - // so instead we construct an ARRAY of the bindings of all variables - - List<String> countVariables = new ArrayList<>(); - for(SQLVariable v : variables.values()) { - if(v.getProjectionType() == ProjectionType.NONE) { - //countVariables.add(v.getExpressions().get(0)); - countVariables.add(v.getAlias()); - } - } - countExp.append("ARRAY["); - countExp.append(CollectionUtils.fold(countVariables,",")); - countExp.append("]"); - - } else { - countExp.append(evaluateExpression(((Count) expr).getArg(), OPTypes.ANY)); - } - countExp.append(")"); - - return countExp.toString(); - } else if(expr instanceof Avg) { - return "AVG(" + evaluateExpression(((Avg) expr).getArg(), OPTypes.DOUBLE) + ")"; - } else if(expr instanceof Min) { - return "MIN(" + evaluateExpression(((Min) expr).getArg(), OPTypes.DOUBLE) + ")"; - } else if(expr instanceof Max) { - return "MAX(" + evaluateExpression(((Max) expr).getArg(), OPTypes.DOUBLE) + ")"; - } else if(expr instanceof Sum) { - return "SUM(" + evaluateExpression(((Sum) expr).getArg(), OPTypes.DOUBLE) + ")"; - } else if(expr instanceof FunctionCall) { - FunctionCall fc = (FunctionCall)expr; - - // special optimizations for frequent cases with variables - if((XMLSchema.DOUBLE.toString().equals(fc.getURI()) || XMLSchema.FLOAT.toString().equals(fc.getURI()) ) && fc.getArgs().size() == 1) { - return evaluateExpression(fc.getArgs().get(0), OPTypes.DOUBLE); - } else if((XMLSchema.INTEGER.toString().equals(fc.getURI()) || XMLSchema.INT.toString().equals(fc.getURI())) && fc.getArgs().size() == 1) { - return evaluateExpression(fc.getArgs().get(0), OPTypes.INT); - } else if(XMLSchema.BOOLEAN.toString().equals(fc.getURI()) && fc.getArgs().size() == 1) { - return evaluateExpression(fc.getArgs().get(0), OPTypes.BOOL); - } else if(XMLSchema.DATE.toString().equals(fc.getURI()) && fc.getArgs().size() == 1) { - return evaluateExpression(fc.getArgs().get(0), OPTypes.DATE); - } - - String fnUri = fc.getURI(); - - String[] args = new String[fc.getArgs().size()]; - - NativeFunction nf = functionRegistry.get(fnUri); - - if(nf != null && nf.isSupported(dialect)) { - - for (int i = 0; i < args.length; i++) { - args[i] = evaluateExpression(fc.getArgs().get(i), nf.getArgumentType(i)); - } - - if (optype != null && optype != nf.getReturnType()) { - return castExpression(nf.getNative(dialect, args), optype); - } else { - return nf.getNative(dialect, args); - } - } else { - throw new IllegalArgumentException("the function "+fnUri+" is not supported by the SQL translation"); - } - } - - - throw new IllegalArgumentException("unsupported value expression: "+expr); - } - - /** - * Copy variables from the set to a new set suitable for a subquery; this allows passing over variable expressions - * from parent queries to subqueries without the subquery adding expressions that are then not visible outside - * @param variables - * @return - */ - private Map<String, SQLVariable> copyVariables(Map<String, SQLVariable> variables) { - Map<String,SQLVariable> copy = new HashMap<>(); - try { - for(Map.Entry<String,SQLVariable> entry : variables.entrySet()) { - copy.put(entry.getKey(), (SQLVariable) entry.getValue().clone()); - } - } catch (CloneNotSupportedException e) { - log.error("could not clone SQL variable:",e); - } - - return copy; - } - - private String castExpression(String arg, OPTypes type) { - if(type == null) { - return arg; - } - - switch (type) { - case DOUBLE: - return functionRegistry.get(XMLSchema.DOUBLE).getNative(dialect, arg); - case INT: - return functionRegistry.get(XMLSchema.INTEGER).getNative(dialect, arg); - case BOOL: - return functionRegistry.get(XMLSchema.BOOLEAN).getNative(dialect, arg); - case DATE: - return functionRegistry.get(XMLSchema.DATETIME).getNative(dialect, arg); - case VALUE: - return arg; - case ANY: - return arg; - default: - return arg; - } - } - - private String getSQLOperator(Compare.CompareOp op) { - switch (op) { - case EQ: return " = "; - case GE: return " >= "; - case GT: return " > "; - case LE: return " <= "; - case LT: return " < "; - case NE: return " <> "; - } - throw new IllegalArgumentException("unsupported operator type for comparison: "+op); - } - - - private String getSQLOperator(MathExpr.MathOp op) { - switch (op) { - case PLUS: return " + "; - case MINUS: return " - "; - case DIVIDE: return " / "; - case MULTIPLY: return " / "; - } - throw new IllegalArgumentException("unsupported operator type for math expression: "+op); - } - - - /** - * Test if the regular expression given in the pattern can be simplified to a LIKE SQL statement; these are - * considerably more efficient to evaluate in most databases, so in case we can simplify, we return a LIKE. - * - * @param value - * @param pattern - * @return - */ - private String optimizeRegexp(String value, String pattern, ValueExpr flags) { - String _flags = flags != null && flags instanceof ValueConstant ? ((ValueConstant)flags).getValue().stringValue() : null; - - String simplified = pattern; - - // apply simplifications - - // remove SQL quotes at beginning and end - simplified = simplified.replaceFirst("^'",""); - simplified = simplified.replaceFirst("'$",""); - - - // remove .* at beginning and end, they are the default anyways - simplified = simplified.replaceFirst("^\\.\\*",""); - simplified = simplified.replaceFirst("\\.\\*$",""); - - // replace all occurrences of % with \% and _ with \_, as they are special characters in SQL - simplified = simplified.replaceAll("%","\\%"); - simplified = simplified.replaceAll("_","\\_"); - - // if pattern now does not start with a ^, we put a "%" in front - if(!simplified.startsWith("^")) { - simplified = "%" + simplified; - } else { - simplified = simplified.substring(1); - } - - // if pattern does not end with a "$", we put a "%" at the end - if(!simplified.endsWith("$")) { - simplified = simplified + "%"; - } else { - simplified = simplified.substring(0,simplified.length()-1); - } - - // replace all non-escaped occurrences of .* with % - simplified = simplified.replaceAll("(?<!\\\\)\\.\\*","%"); - - // replace all non-escaped occurrences of .+ with _% - simplified = simplified.replaceAll("(?<!\\\\)\\.\\+","_%"); - - // the pattern is not simplifiable if the simplification still contains unescaped regular expression constructs - Pattern notSimplifiable = Pattern.compile("(?<!\\\\)[\\.\\*\\+\\{\\}\\[\\]\\|]"); - - if(notSimplifiable.matcher(simplified).find()) { - return dialect.getRegexp(value, pattern, _flags); - } else { - if(!simplified.startsWith("%") && !simplified.endsWith("%")) { - if(StringUtils.containsIgnoreCase(_flags, "i")) { - return String.format("lower(%s) = lower('%s')", value, simplified); - } else { - return String.format("%s = '%s'", value, simplified); - } - } else { - if(StringUtils.containsIgnoreCase(_flags,"i")) { - return dialect.getILike(value, "'" + simplified + "'"); - } else { - return value + " LIKE '"+simplified+"'"; - } - } - } - + return new ExpressionEvaluator(expr, this, optype).build(); } @@ -1259,6 +817,8 @@ public class SQLBuilder { return ProjectionType.DOUBLE; } else if(expr instanceof Avg) { return ProjectionType.DOUBLE; + } else if(expr instanceof Compare) { + return ProjectionType.BOOL; } else { return ProjectionType.STRING; } @@ -1279,6 +839,8 @@ public class SQLBuilder { return ProjectionType.DATE; case STRING: return ProjectionType.STRING; + case BOOL: + return ProjectionType.BOOL; default: log.warn("optype {} cannot be projected!",t); return ProjectionType.STRING; http://git-wip-us.apache.org/repos/asf/marmotta/blob/d90e1429/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/eval/ExpressionEvaluator.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/eval/ExpressionEvaluator.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/eval/ExpressionEvaluator.java new file mode 100644 index 0000000..8eb2e46 --- /dev/null +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/eval/ExpressionEvaluator.java @@ -0,0 +1,735 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.marmotta.kiwi.sparql.builder.eval; + +import org.apache.commons.lang3.StringUtils; +import org.apache.marmotta.commons.collections.CollectionUtils; +import org.apache.marmotta.commons.util.DateUtils; +import org.apache.marmotta.kiwi.model.rdf.KiWiNode; +import org.apache.marmotta.kiwi.sparql.builder.OPTypes; +import org.apache.marmotta.kiwi.sparql.builder.ProjectionType; +import org.apache.marmotta.kiwi.sparql.builder.SQLBuilder; +import org.apache.marmotta.kiwi.sparql.builder.collect.OPTypeFinder; +import org.apache.marmotta.kiwi.sparql.builder.model.SQLVariable; +import org.apache.marmotta.kiwi.sparql.function.NativeFunction; +import org.apache.marmotta.kiwi.sparql.function.NativeFunctionRegistry; +import org.openrdf.model.BNode; +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.vocabulary.FN; +import org.openrdf.model.vocabulary.XMLSchema; +import org.openrdf.query.algebra.*; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.regex.Pattern; + +/** + * Evaluate a SPARQL ValueExpr by translating it into a SQL expression. + * + * @author Sebastian Schaffert ([email protected]) + */ +public class ExpressionEvaluator extends QueryModelVisitorBase<RuntimeException> { + + private static Logger log = LoggerFactory.getLogger(ExpressionEvaluator.class); + + /** + * Date format used for SQL timestamps. + */ + private static final DateFormat sqlDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S"); + + /** + * Reference to the registry of natively supported functions with parameter and return types as well as SQL translation + */ + private static NativeFunctionRegistry functionRegistry = NativeFunctionRegistry.getInstance(); + + // used by BNodeGenerator + private static Random anonIdGenerator = new Random(); + + + private StringBuilder builder = new StringBuilder(); + + private Deque<OPTypes> optypes = new ArrayDeque<>(); + + private SQLBuilder parent; + + public ExpressionEvaluator(ValueExpr expr, SQLBuilder parent) { + this(expr,parent, OPTypes.ANY); + } + + public ExpressionEvaluator(ValueExpr expr, SQLBuilder parent, OPTypes optype) { + this.parent = parent; + + optypes.push(optype); + + if(log.isTraceEnabled()) { + long start = System.currentTimeMillis(); + expr.visit(this); + log.trace("expression evaluated in {} ms", (System.currentTimeMillis()-start)); + } else { + expr.visit(this); + } + } + + + /** + * Create the actual SQL string generated by this evaluator. + * + * @return + */ + public String build() { + return builder.toString(); + } + + @Override + public void meet(And node) throws RuntimeException { + builder.append("("); + node.getLeftArg().visit(this); + builder.append(" AND "); + node.getRightArg().visit(this); + builder.append(")"); + } + + @Override + public void meet(Or node) throws RuntimeException { + builder.append("("); + node.getLeftArg().visit(this); + builder.append(" OR "); + node.getRightArg().visit(this); + builder.append(")"); + } + + @Override + public void meet(Not node) throws RuntimeException { + builder.append("NOT ("); + node.getArg().visit(this); + builder.append(")"); + } + + @Override + public void meet(Exists node) throws RuntimeException { + // TODO: need to make sure that variables of the parent are visible in the subquery + // - pattern names need to be unique even in subqueries + // - variable lookup for expressions in the subquery need to refer to the parent + SQLBuilder sq_builder = new SQLBuilder(node.getSubQuery(), parent.getBindings(), parent.getDataset(), parent.getConverter(), parent.getDialect(), "_", Collections.EMPTY_SET, copyVariables(parent.getVariables())); + + builder.append("EXISTS (").append(sq_builder.build()).append(")"); + } + + @Override + public void meet(FunctionCall fc) throws RuntimeException { + // special optimizations for frequent cases with variables + if((XMLSchema.DOUBLE.toString().equals(fc.getURI()) || XMLSchema.FLOAT.toString().equals(fc.getURI()) ) && fc.getArgs().size() == 1) { + optypes.push(OPTypes.DOUBLE); + fc.getArgs().get(0).visit(this); + optypes.pop(); + } else if((XMLSchema.INTEGER.toString().equals(fc.getURI()) || XMLSchema.INT.toString().equals(fc.getURI())) && fc.getArgs().size() == 1) { + optypes.push(OPTypes.INT); + fc.getArgs().get(0).visit(this); + optypes.pop(); + } else if(XMLSchema.BOOLEAN.toString().equals(fc.getURI()) && fc.getArgs().size() == 1) { + optypes.push(OPTypes.BOOL); + fc.getArgs().get(0).visit(this); + optypes.pop(); + } else if(XMLSchema.DATE.toString().equals(fc.getURI()) && fc.getArgs().size() == 1) { + optypes.push(OPTypes.DATE); + fc.getArgs().get(0).visit(this); + optypes.pop(); + } else { + + String fnUri = fc.getURI(); + + String[] args = new String[fc.getArgs().size()]; + + NativeFunction nf = functionRegistry.get(fnUri); + + if (nf != null && nf.isSupported(parent.getDialect())) { + + for (int i = 0; i < args.length; i++) { + args[i] = new ExpressionEvaluator(fc.getArgs().get(i), parent, nf.getArgumentType(i)).build(); + } + + if (optypes.peek() != nf.getReturnType()) { + builder.append(castExpression(nf.getNative(parent.getDialect(), args), optypes.peek())); + } else { + builder.append(nf.getNative(parent.getDialect(), args)); + } + } else { + throw new IllegalArgumentException("the function " + fnUri + " is not supported by the SQL translation"); + } + } + + } + + @Override + public void meet(Avg node) throws RuntimeException { + builder.append("AVG("); + optypes.push(OPTypes.DOUBLE); + node.getArg().visit(this); + optypes.pop(); + builder.append(")"); + } + + @Override + public void meet(BNodeGenerator gen) throws RuntimeException { + if(gen.getNodeIdExpr() != null) { + // get value of argument and express it as string + optypes.push(OPTypes.STRING); + gen.getNodeIdExpr().visit(this); + optypes.pop(); + } else { + builder.append("'").append(Long.toHexString(System.currentTimeMillis())+Integer.toHexString(anonIdGenerator.nextInt(1000))).append("'"); + } + } + + @Override + public void meet(Bound node) throws RuntimeException { + ValueExpr arg = node.getArg(); + + if(arg instanceof ValueConstant) { + builder.append(Boolean.toString(true)); + } else if(arg instanceof Var) { + builder.append("("); + arg.visit(this); + builder.append(" IS NOT NULL)"); + } + } + + @Override + public void meet(Coalesce node) throws RuntimeException { + builder.append("COALESCE("); + for(Iterator<ValueExpr> it = node.getArguments().iterator(); it.hasNext(); ) { + it.next().visit(this); + if(it.hasNext()) { + builder.append(", "); + } + } + builder.append(")"); + } + + @Override + public void meet(Compare cmp) throws RuntimeException { + optypes.push(new OPTypeFinder(cmp).coerce()); + cmp.getLeftArg().visit(this); + builder.append(getSQLOperator(cmp.getOperator())); + cmp.getRightArg().visit(this); + optypes.pop(); + } + + @Override + public void meet(Count node) throws RuntimeException { + builder.append("COUNT("); + + if(node.isDistinct()) { + builder.append("DISTINCT "); + } + + if(node.getArg() == null) { + // this is a weird special case where we need to expand to all variables selected in the query wrapped + // by the group; we cannot simply use "*" because the concept of variables is a different one in SQL, + // so instead we construct an ARRAY of the bindings of all variables + + List<String> countVariables = new ArrayList<>(); + for(SQLVariable v : parent.getVariables().values()) { + if(v.getProjectionType() == ProjectionType.NONE) { + //countVariables.add(v.getExpressions().get(0)); + countVariables.add(v.getAlias()); + } + } + builder.append("ARRAY["); + builder.append(CollectionUtils.fold(countVariables,",")); + builder.append("]"); + + } else { + optypes.push(OPTypes.ANY); + node.getArg().visit(this); + optypes.pop(); + } + builder.append(")"); + } + + @Override + public void meet(IsBNode node) throws RuntimeException { + ValueExpr arg = node.getArg(); + + // operator must be a variable or a constant + if(arg instanceof ValueConstant) { + builder.append(Boolean.toString(((ValueConstant) arg).getValue() instanceof BNode)); + } else if(arg instanceof Var) { + String var = getVariableAlias((Var) arg); + + builder.append(var).append(".ntype = 'bnode'"); + } + } + + @Override + public void meet(IsLiteral node) throws RuntimeException { + ValueExpr arg = node.getArg(); + + // operator must be a variable or a constant + if (arg instanceof ValueConstant) { + builder.append(Boolean.toString(((ValueConstant) arg).getValue() instanceof Literal)); + } else if(arg instanceof Var) { + String var = getVariableAlias((Var) arg); + + builder.append("(") + .append(var) + .append(".ntype = 'string' OR ") + .append(var) + .append(".ntype = 'int' OR ") + .append(var) + .append(".ntype = 'double' OR ") + .append(var) + .append(".ntype = 'date' OR ") + .append(var) + .append(".ntype = 'boolean')"); + } + } + + @Override + public void meet(IsNumeric node) throws RuntimeException { + ValueExpr arg = node.getArg(); + + // operator must be a variable or a constant + if (arg instanceof ValueConstant) { + try { + Double.parseDouble(((ValueConstant) arg).getValue().stringValue()); + builder.append(Boolean.toString(true)); + } catch (NumberFormatException ex) { + builder.append(Boolean.toString(false)); + } + } else if(arg instanceof Var) { + String var = getVariableAlias((Var) arg); + + builder.append("(") + .append(var) + .append(".ntype = 'int' OR ") + .append(var) + .append(".ntype = 'double')"); + } + } + + @Override + public void meet(IsResource node) throws RuntimeException { + ValueExpr arg = node.getArg(); + + // operator must be a variable or a constant + if(arg instanceof ValueConstant) { + builder.append(Boolean.toString(((ValueConstant) arg).getValue() instanceof URI || ((ValueConstant) arg).getValue() instanceof BNode)); + } else if(arg instanceof Var) { + String var = getVariableAlias((Var) arg); + + builder .append("(") + .append(var) + .append(".ntype = 'uri' OR ") + .append(var) + .append(".ntype = 'bnode')"); + } + } + + @Override + public void meet(IsURI node) throws RuntimeException { + ValueExpr arg = node.getArg(); + + // operator must be a variable or a constant + if(arg instanceof ValueConstant) { + builder.append(Boolean.toString(((ValueConstant) arg).getValue() instanceof URI)); + } else if(arg instanceof Var) { + String var = getVariableAlias((Var) arg); + + builder.append(var).append(".ntype = 'uri'"); + } + } + + @Override + public void meet(IRIFunction fun) throws RuntimeException { + if(fun.getBaseURI() != null) { + + String ex = new ExpressionEvaluator(fun.getArg(), parent, OPTypes.STRING).build(); + + builder + .append("CASE WHEN position(':' IN ").append(ex).append(") > 0 THEN ").append(ex) + .append(" ELSE ").append(functionRegistry.get(FN.CONCAT.stringValue()).getNative(parent.getDialect(), "'" + fun.getBaseURI() + "'", ex)) + .append(" END "); + } else { + // get value of argument and express it as string + optypes.push(OPTypes.STRING); + fun.getArg().visit(this); + optypes.pop(); + } + } + + @Override + public void meet(Label node) throws RuntimeException { + optypes.push(OPTypes.STRING); + node.getArg().visit(this); + optypes.pop(); + } + + @Override + public void meet(Lang lang) throws RuntimeException { + if(lang.getArg() instanceof Var) { + builder.append(getVariableAlias((Var) lang.getArg())); + builder.append(".lang"); + } + } + + @Override + public void meet(LangMatches lm) throws RuntimeException { + ValueConstant pattern = (ValueConstant) lm.getRightArg(); + + if(pattern.getValue().stringValue().equals("*")) { + lm.getLeftArg().visit(this); + builder.append(" LIKE '%'"); + } else if(pattern.getValue().stringValue().equals("")) { + lm.getLeftArg().visit(this); + builder.append(" IS NULL"); + } else { + builder.append("(lower("); + lm.getLeftArg().visit(this); + builder.append(") = lower('"); + builder.append(pattern.getValue().stringValue().toLowerCase()); + builder.append("') OR lower("); + lm.getLeftArg().visit(this); + builder.append(") LIKE lower('"); + builder.append(pattern.getValue().stringValue().toLowerCase()); + builder.append("-%') )"); + } + } + + @Override + public void meet(LocalName node) throws RuntimeException { + super.meet(node); + } + + @Override + public void meet(MathExpr expr) throws RuntimeException { + OPTypes ot = new OPTypeFinder(expr).coerce(); + + if(ot == OPTypes.STRING) { + if(expr.getOperator() == MathExpr.MathOp.PLUS) { + builder.append(functionRegistry.get(FN.CONCAT.stringValue()).getNative(parent.getDialect(),new ExpressionEvaluator(expr.getLeftArg(), parent, ot).build(), new ExpressionEvaluator(expr.getRightArg(), parent, ot).build())); + } else { + throw new IllegalArgumentException("operation "+expr.getOperator()+" is not supported on strings"); + } + } else { + if(ot == OPTypes.ANY || ot == OPTypes.TERM) { + ot = OPTypes.DOUBLE; + } + + optypes.push(ot); + expr.getLeftArg().visit(this); + builder.append(getSQLOperator(expr.getOperator())); + expr.getRightArg().visit(this); + optypes.pop(); + } + } + + @Override + public void meet(Max node) throws RuntimeException { + builder.append("MAX("); + optypes.push(OPTypes.DOUBLE); + node.getArg().visit(this); + optypes.pop(); + builder.append(")"); + } + + @Override + public void meet(Min node) throws RuntimeException { + builder.append("MIN("); + optypes.push(OPTypes.DOUBLE); + node.getArg().visit(this); + optypes.pop(); + builder.append(")"); + } + + @Override + public void meet(Regex re) throws RuntimeException { + builder.append(optimizeRegexp( + new ExpressionEvaluator(re.getArg(), parent, OPTypes.STRING).build(), + new ExpressionEvaluator(re.getPatternArg(), parent, OPTypes.STRING).build(), + re.getFlagsArg() + )); + } + + @Override + public void meet(SameTerm cmp) throws RuntimeException { + // covered by value binding in variables + optypes.push(OPTypes.TERM); + cmp.getLeftArg().visit(this); + builder.append(" = "); + cmp.getRightArg().visit(this); + optypes.pop(); + } + + @Override + public void meet(Str node) throws RuntimeException { + optypes.push(OPTypes.STRING); + node.getArg().visit(this); + optypes.pop(); + } + + @Override + public void meet(Sum node) throws RuntimeException { + builder.append("SUM("); + optypes.push(OPTypes.DOUBLE); + node.getArg().visit(this); + optypes.pop(); + builder.append(")"); + } + + @Override + public void meet(Var node) throws RuntimeException { + // distinguish between the case where the variable is plain and the variable is bound + SQLVariable sv = parent.getVariables().get(node.getName()); + + if(sv == null) { + builder.append("NULL"); + } else if(sv.getBindings().size() > 0) { + // in case the variable is actually an alias for an expression, we evaluate that expression instead, effectively replacing the + // variable occurrence with its value + sv.getBindings().get(0).visit(this); + } else { + String var = sv.getAlias(); + + if(sv.getProjectionType() != ProjectionType.NODE && sv.getProjectionType() != ProjectionType.NONE) { + // in case the variable represents a constructed or bound value instead of a node, we need to + // use the SQL expression as value; SQL should take care of proper casting... + // TODO: explicit casting needed? + builder.append(sv.getExpressions().get(0)); + } else { + // in case the variable represents an entry from the NODES table (i.e. has been bound to a node + // in the database, we take the NODES alias and resolve to the correct column according to the + // operator type + switch (optypes.peek()) { + case STRING: + builder.append(var).append(".svalue"); + break; + case INT: + builder.append(var).append(".ivalue"); + break; + case DOUBLE: + builder.append(var).append(".dvalue"); + break; + case DATE: + builder.append(var).append(".tvalue"); + break; + case VALUE: + builder.append(var).append(".svalue"); + break; + case URI: + builder.append(var).append(".svalue"); + break; + case TERM: + if(var == null) { + // might happen in case the variable has been bound to a constant value and not + // joined in a pattern query + builder.append(sv.getExpressions().get(0)); + } else { + builder.append(var).append(".id"); + } + break; + case ANY: + builder.append(var).append(".id"); + break; + } + } + } + } + + @Override + public void meet(ValueConstant node) throws RuntimeException { + String val = node.getValue().stringValue(); + + switch (optypes.peek()) { + case STRING: + case VALUE: + case URI: + builder.append("'").append(val).append("'"); + break; + case INT: + builder.append(Integer.parseInt(val)); + break; + case DOUBLE: + builder.append(Double.parseDouble(val)); + break; + case DATE: + builder.append("'").append(sqlDateFormat.format(DateUtils.parseDate(val))).append("'"); + break; + + // in this case we should return a node ID and also need to make sure it actually exists + case TERM: + case ANY: + KiWiNode n = parent.getConverter().convert(node.getValue()); + builder.append(n.getId()); + break; + + default: throw new IllegalArgumentException("unsupported value type: " + optypes.peek()); + } + } + + private String getVariableAlias(Var var) { + return parent.getVariables().get(var.getName()).getAlias(); + } + + + private String getVariableAlias(String varName) { + return parent.getVariables().get(varName).getAlias(); + } + + /** + * Copy variables from the set to a new set suitable for a subquery; this allows passing over variable expressions + * from parent queries to subqueries without the subquery adding expressions that are then not visible outside + * @param variables + * @return + */ + private static Map<String, SQLVariable> copyVariables(Map<String, SQLVariable> variables) { + Map<String,SQLVariable> copy = new HashMap<>(); + try { + for(Map.Entry<String,SQLVariable> entry : variables.entrySet()) { + copy.put(entry.getKey(), (SQLVariable) entry.getValue().clone()); + } + } catch (CloneNotSupportedException e) { + log.error("could not clone SQL variable:",e); + } + + return copy; + } + + private String castExpression(String arg, OPTypes type) { + if(type == null) { + return arg; + } + + switch (type) { + case DOUBLE: + return functionRegistry.get(XMLSchema.DOUBLE).getNative(parent.getDialect(), arg); + case INT: + return functionRegistry.get(XMLSchema.INTEGER).getNative(parent.getDialect(), arg); + case BOOL: + return functionRegistry.get(XMLSchema.BOOLEAN).getNative(parent.getDialect(), arg); + case DATE: + return functionRegistry.get(XMLSchema.DATETIME).getNative(parent.getDialect(), arg); + case VALUE: + return arg; + case ANY: + return arg; + default: + return arg; + } + } + + private static String getSQLOperator(Compare.CompareOp op) { + switch (op) { + case EQ: return " = "; + case GE: return " >= "; + case GT: return " > "; + case LE: return " <= "; + case LT: return " < "; + case NE: return " <> "; + } + throw new IllegalArgumentException("unsupported operator type for comparison: "+op); + } + + + private static String getSQLOperator(MathExpr.MathOp op) { + switch (op) { + case PLUS: return " + "; + case MINUS: return " - "; + case DIVIDE: return " / "; + case MULTIPLY: return " / "; + } + throw new IllegalArgumentException("unsupported operator type for math expression: "+op); + } + + /** + * Test if the regular expression given in the pattern can be simplified to a LIKE SQL statement; these are + * considerably more efficient to evaluate in most databases, so in case we can simplify, we return a LIKE. + * + * @param value + * @param pattern + * @return + */ + private String optimizeRegexp(String value, String pattern, ValueExpr flags) { + String _flags = flags != null && flags instanceof ValueConstant ? ((ValueConstant)flags).getValue().stringValue() : null; + + String simplified = pattern; + + // apply simplifications + + // remove SQL quotes at beginning and end + simplified = simplified.replaceFirst("^'",""); + simplified = simplified.replaceFirst("'$",""); + + + // remove .* at beginning and end, they are the default anyways + simplified = simplified.replaceFirst("^\\.\\*",""); + simplified = simplified.replaceFirst("\\.\\*$",""); + + // replace all occurrences of % with \% and _ with \_, as they are special characters in SQL + simplified = simplified.replaceAll("%","\\%"); + simplified = simplified.replaceAll("_","\\_"); + + // if pattern now does not start with a ^, we put a "%" in front + if(!simplified.startsWith("^")) { + simplified = "%" + simplified; + } else { + simplified = simplified.substring(1); + } + + // if pattern does not end with a "$", we put a "%" at the end + if(!simplified.endsWith("$")) { + simplified = simplified + "%"; + } else { + simplified = simplified.substring(0,simplified.length()-1); + } + + // replace all non-escaped occurrences of .* with % + simplified = simplified.replaceAll("(?<!\\\\)\\.\\*","%"); + + // replace all non-escaped occurrences of .+ with _% + simplified = simplified.replaceAll("(?<!\\\\)\\.\\+","_%"); + + // the pattern is not simplifiable if the simplification still contains unescaped regular expression constructs + Pattern notSimplifiable = Pattern.compile("(?<!\\\\)[\\.\\*\\+\\{\\}\\[\\]\\|]"); + + if(notSimplifiable.matcher(simplified).find()) { + return parent.getDialect().getRegexp(value, pattern, _flags); + } else { + if(!simplified.startsWith("%") && !simplified.endsWith("%")) { + if(StringUtils.containsIgnoreCase(_flags, "i")) { + return String.format("lower(%s) = lower('%s')", value, simplified); + } else { + return String.format("%s = '%s'", value, simplified); + } + } else { + if(StringUtils.containsIgnoreCase(_flags,"i")) { + return parent.getDialect().getILike(value, "'" + simplified + "'"); + } else { + return value + " LIKE '"+simplified+"'"; + } + } + } + + } + +} http://git-wip-us.apache.org/repos/asf/marmotta/blob/d90e1429/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java index f035d56..65d3bb3 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/persistence/KiWiSparqlConnection.java @@ -163,6 +163,12 @@ public class KiWiSparqlConnection { resultRow.addBinding(sv.getSparqlName(), new LiteralImpl(svalue, XSD.Double)); } break; + case BOOL: + if(row.getObject(sv.getName()) != null) { + svalue = Boolean.toString(row.getBoolean(sv.getName())); + resultRow.addBinding(sv.getSparqlName(), new LiteralImpl(svalue.toLowerCase(), XSD.Boolean)); + } + break; case STRING: default: svalue = row.getString(sv.getName());
