SQL translation

sschaffert Wed, 17 Sep 2014 04:54:07 -0700

Repository: marmotta
Updated Branches:
  refs/heads/develop 9ead232e1 -> 25568a16d



cleaning up the SPARQL->SQL translation


Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/6e306bb4
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/6e306bb4
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/6e306bb4

Branch: refs/heads/develop
Commit: 6e306bb4e302e2a2881168f79a11eeaf505cbed6
Parents: 9ead232
Author: Sebastian Schaffert <[email protected]>
Authored: Wed Sep 17 12:26:43 2014 +0200
Committer: Sebastian Schaffert <[email protected]>
Committed: Wed Sep 17 12:26:43 2014 +0200

----------------------------------------------------------------------
 .../kiwi/sparql/builder/SQLBuilder.java         | 120 +++++-------------
 .../kiwi/sparql/builder/SQLFragment.java        | 121 +++++++++++++++++++
 .../kiwi/sparql/builder/SQLPattern.java         | 117 +++++++++++++++++-
 .../kiwi/sparql/test/KiWiSparqlJoinTest.java    |   1 -
 4 files changed, 263 insertions(+), 96 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/marmotta/blob/6e306bb4/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java
----------------------------------------------------------------------
diff --git 
a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java
 
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java
index 20cfcc5..35b4d8f 100644
--- 
a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java
+++ 
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java
@@ -537,6 +537,27 @@ public class SQLBuilder {
             }
         }
 
+        // for each pattern, update the joinFields by checking if subject, 
predicate, object or context are involved
+        // in any filters or functions; in this case, the corresponding field 
needs to be joined with the NODES table
+        // and we need to mark the pattern accordingly.
+        boolean first = true;
+        for(SQLFragment f : fragments) {
+            if(first) {
+                // the conditions of the first fragment need to be placed in 
the WHERE part of the query, because
+                // there is not necessarily a JOIN ... ON where we can put it
+                f.setConditionPosition(SQLFragment.ConditionPosition.WHERE);
+                first = false;
+            }
+
+            for (SQLPattern p : f.getPatterns()) {
+                for(Map.Entry<SQLPattern.TripleColumns, Var> fieldEntry : 
p.getTripleFields().entrySet()) {
+                    if(fieldEntry.getValue() != null && 
!fieldEntry.getValue().hasValue() && 
hasNodeCondition(fieldEntry.getValue(),query)) {
+                        p.setJoinField(fieldEntry.getKey(), 
variableNames.get(fieldEntry.getValue()));
+                    }
+                }
+            }
+        }
+
     }
 
 
@@ -576,95 +597,7 @@ public class SQLBuilder {
         //    - context, there will be a "inner join P.context as P_C_V" or 
"left outer join p.context as P_C_V"
         StringBuilder fromClause = new StringBuilder();
         for(Iterator<SQLFragment> fit = fragments.iterator(); fit.hasNext(); ) 
{
-            SQLFragment frag = fit.next();
-
-            for (Iterator<SQLPattern> it = frag.getPatterns().iterator(); 
it.hasNext(); ) {
-                boolean firstFragment = fromClause.length() == 0;
-
-                SQLPattern p = it.next();
-                String pName = p.getName();
-
-
-                // the joinClause consists of a reference to the triples table 
and possibly inner joins with the
-                // nodes table in case we need to verify node values, e.g. in 
a filter
-                StringBuilder joinClause = new StringBuilder();
-
-                // indicate if we need parentheses because of node joins
-                boolean hasNodeJoins = false;
-
-
-                // the onClause consists of the filter conditions from the 
statement for joining/left joining with
-                // previous statements
-                StringBuilder onClause = new StringBuilder();
-
-
-                joinClause.append("triples " + pName);
-
-                Var[] fields = p.getFields();
-                for (int i = 0; i < fields.length; i++) {
-
-                    if (fields[i] != null && !fields[i].hasValue() && 
hasNodeCondition(fields[i], query)) {
-
-                        String vName = variableNames.get(fields[i]);
-                        joinClause.append("\n    INNER JOIN nodes AS ");
-                        joinClause.append(pName + "_" + positions[i] + "_" + 
vName);
-
-                        joinClause.append(" ON " + pName + "." + positions[i] 
+ " = " + pName + "_" + positions[i] + "_" + vName + ".id ");
-
-                        hasNodeJoins = true;
-                    }
-                }
-
-
-                // in case this is not the first fragment, we add all 
statement filter conditions to the ON clause of the query;
-                // in case this IS the first fragment, they will be added to 
the WHERE clause.
-                if(!firstFragment) {
-                    for(Iterator<String> cit = p.getConditions().iterator(); 
cit.hasNext(); ) {
-                        if(onClause.length() > 0) {
-                            onClause.append("\n      AND ");
-                        }
-                        onClause.append(cit.next());
-                    }
-                }
-
-                // in case the pattern is the last of the fragment, also add 
the filter conditions of the fragment (TODO: verify this does indeed the right 
thing)
-                if(!firstFragment && !it.hasNext()) {
-                    // if this is the last pattern of the fragment, add the 
filter conditions
-                    for(Iterator<String> cit = 
frag.getConditions().iterator(); cit.hasNext(); ) {
-                        if(onClause.length() > 0) {
-                            onClause.append("\n       AND ");
-                        }
-                        onClause.append(cit.next());
-                    }
-                }
-
-
-
-                // in case we add join conditions, open a parentheses to cover 
the subquery
-                if(hasNodeJoins && onClause.length() > 0) {
-                    fromClause.append("(");
-                }
-
-                fromClause.append(joinClause);
-
-                if(hasNodeJoins && onClause.length() > 0) {
-                    fromClause.append(")");
-                }
-
-
-                // close the "subquery" and add any join conditions
-                if(onClause.length() > 0) {
-                    fromClause.append(" ON (");
-                    fromClause.append(onClause);
-                    fromClause.append(")");
-                }
-
-
-                if (it.hasNext()) {
-                    fromClause.append("\n JOIN \n  ");
-                }
-            }
-
+            fromClause.append(fit.next().buildFromClause());
             if(fit.hasNext()) {
                 fromClause.append("\n LEFT JOIN \n  ");
             }
@@ -686,11 +619,12 @@ public class SQLBuilder {
         List<String> whereConditions = new LinkedList<String>();
 
         // 1. for the first pattern of the first fragment, we add the 
conditions to the WHERE clause
-        if(fragments.size() > 0 && fragments.get(0).getPatterns().size() > 0) {
-            whereConditions.addAll(fragments.get(0).getConditions());
-            
whereConditions.addAll(fragments.get(0).getPatterns().get(0).getConditions());
-        }
 
+        for(SQLFragment fragment : fragments) {
+            if(fragment.getConditionPosition() == 
SQLFragment.ConditionPosition.WHERE) {
+                whereConditions.add(fragment.buildConditionClause());
+            }
+        }
 
         // 3. for each variable in the initialBindings, add a condition to the 
where clause setting it
         //    to the node given as binding

http://git-wip-us.apache.org/repos/asf/marmotta/blob/6e306bb4/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLFragment.java
----------------------------------------------------------------------
diff --git 
a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLFragment.java
 
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLFragment.java
index b9f40b4..55920c4 100644
--- 
a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLFragment.java
+++ 
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLFragment.java
@@ -20,6 +20,7 @@ package org.apache.marmotta.kiwi.sparql.builder;
 import org.openrdf.query.algebra.ValueExpr;
 
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -31,6 +32,16 @@ import java.util.List;
 public class SQLFragment {
 
     /**
+     * Indicate where the fragment's conditions should be placed (ON part of 
the JOIN clause or WHERE part of the query).
+     * This distinction is necessary when OPTIONAL constructs are used, i.e. 
the created SQL uses LEFT JOINs. We cannot
+     * always place it in JOIN conditions, because the first pattern will not 
have a JOIN.
+     */
+    public static enum ConditionPosition {
+        JOIN, WHERE
+    };
+
+
+    /**
      * The patterns contained in this fragment. All patterns are joined using 
an INNER JOIN.
      */
     private List<SQLPattern> patterns;
@@ -39,6 +50,7 @@ public class SQLFragment {
 
     private List<ValueExpr> filters;
 
+    private ConditionPosition conditionPosition = ConditionPosition.JOIN;
 
     public SQLFragment() {
         this.patterns   = new ArrayList<>();
@@ -57,4 +69,113 @@ public class SQLFragment {
     public List<ValueExpr> getFilters() {
         return filters;
     }
+
+
+    /**
+     * Indicate where the fragment's conditions should be placed (ON part of 
the JOIN clause or WHERE part of the query).
+     * For the first fragment in a query this will always be WHERE, while for 
all other fragments it should be JOIN. Note
+     * that JOIN is strictly necessary for all fragments that are OPTIONAL.
+     */
+    public void setConditionPosition(ConditionPosition conditionPosition) {
+        this.conditionPosition = conditionPosition;
+    }
+
+    public ConditionPosition getConditionPosition() {
+        return conditionPosition;
+    }
+
+    /**
+     * Build the FROM clause by joining together all patterns appropriately 
and adding the filter conditions
+     * @return
+     */
+    public String buildFromClause() {
+        StringBuilder fromClause = new StringBuilder();
+
+        for (Iterator<SQLPattern> it = patterns.iterator(); it.hasNext(); ) {
+
+            SQLPattern p = it.next();
+
+
+            StringBuilder conditionClause = new StringBuilder();
+
+            // in case we add the condition to the JOIN, build first the 
conditions for the pattern; otherwise, the
+            // conditions for the pattern will be added to the WHERE clause
+            if(conditionPosition == ConditionPosition.JOIN) {
+                conditionClause.append(p.buildConditionClause());
+            }
+
+
+            // in case the pattern is the last of the fragment, also add the 
filter conditions of the fragment (TODO: verify this does indeed the right 
thing)
+            if(conditionPosition == ConditionPosition.JOIN && !it.hasNext()) {
+                // if this is the last pattern of the fragment, add the filter 
conditions
+                for(Iterator<String> cit = getConditions().iterator(); 
cit.hasNext(); ) {
+                    if(conditionClause.length() > 0) {
+                        conditionClause.append("\n       AND ");
+                    }
+                    conditionClause.append(cit.next());
+                }
+            }
+
+
+            // when the pattern builds a join with the nodes table and we have 
fragment-wide conditions, we need to
+            // wrap the pattern's from clause in parentheses
+            if(conditionClause.length() > 0) {
+                if(p.hasJoinFields())
+                    fromClause.append("(");
+                fromClause.append(p.buildFromClause());
+                if(p.hasJoinFields())
+                    fromClause.append(")");
+                fromClause.append(" ON (");
+                fromClause.append(conditionClause);
+                fromClause.append(")");
+
+            } else {
+                fromClause.append(p.buildFromClause());
+            }
+
+
+            if (it.hasNext()) {
+                if(conditionPosition == ConditionPosition.JOIN) {
+                    fromClause.append("\n JOIN \n  ");
+                } else {
+                    fromClause.append("\n CROSS JOIN \n  ");
+                }
+            }
+        }
+
+        return fromClause.toString();
+    }
+
+    /**
+     * Build the combined condition clause for this fragment. This will be the 
empty string when the conditionPosition is JOIN.
+     * @return
+     */
+    public String buildConditionClause() {
+        StringBuilder conditionClause = new StringBuilder();
+
+        if(conditionPosition == ConditionPosition.WHERE) {
+            for (Iterator<SQLPattern> it = patterns.iterator(); it.hasNext(); 
) {
+                SQLPattern p = it.next();
+
+                // in case we add the condition to the JOIN, build first the 
conditions for the pattern; otherwise, the
+                // conditions for the pattern will be added to the WHERE clause
+                if(conditionClause.length() > 0) {
+                    conditionClause.append("\n       AND ");
+                }
+                conditionClause.append(p.buildConditionClause());
+
+            }
+
+            // in case the pattern is the last of the fragment, also add the 
filter conditions of the fragment
+            // if this is the last pattern of the fragment, add the filter 
conditions
+            for(Iterator<String> cit = getConditions().iterator(); 
cit.hasNext(); ) {
+                if(conditionClause.length() > 0) {
+                    conditionClause.append("\n       AND ");
+                }
+                conditionClause.append(cit.next());
+            }
+        }
+
+        return conditionClause.toString();
+    }
 }

http://git-wip-us.apache.org/repos/asf/marmotta/blob/6e306bb4/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLPattern.java
----------------------------------------------------------------------
diff --git 
a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLPattern.java
 
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLPattern.java
index 61525eb..3508e46 100644
--- 
a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLPattern.java
+++ 
b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLPattern.java
@@ -21,8 +21,7 @@ import org.openrdf.model.Resource;
 import org.openrdf.query.algebra.StatementPattern;
 import org.openrdf.query.algebra.Var;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
 
 /**
  * A statement pattern translated to SQL consists of a named reference to the 
triple table, an indicator giving the
@@ -34,6 +33,39 @@ public class SQLPattern {
 
 
     /**
+     * Describe the different columns of the triple table that we might need 
to join with
+     */
+    public static enum TripleColumns {
+        SUBJECT  ("subject"),
+        PREDICATE("predicate"),
+        OBJECT   ("object"),
+        CONTEXT  ("context");
+
+        TripleColumns(String fieldName) {
+            this.fieldName = fieldName;
+        }
+
+        private final String fieldName;
+
+        public String getFieldName() {
+            return fieldName;
+        }
+    };
+
+    /**
+     * This map contains mappings from column to variable names. If for a 
given column an entry is contained in the
+     * map, the statement requires to join the TRIPLE table on this field with 
the NODES table. This is typically
+     * the case when there is a condition or function referring to the actual 
value of the node and not only the ID.
+     * The joined NODES table will be aliased with the variable name contained 
as value for the field.
+     */
+    private EnumMap<TripleColumns, String> joinFields = new 
EnumMap<>(TripleColumns.class);
+
+    /**
+     * A map containing references to the variables used in the triple fields.
+     */
+    private EnumMap<TripleColumns, Var> tripleFields = new 
EnumMap<>(TripleColumns.class);
+
+    /**
      * SQL conditions defined on this pattern; may only refer to previous or 
the current statement.
      */
     private List<String> conditions;
@@ -59,8 +91,30 @@ public class SQLPattern {
         this.conditions = new ArrayList<>();
         this.conditions.add(name + ".deleted = false");
         this.sparqlPattern = sparqlPattern;
+
+        tripleFields.put(TripleColumns.SUBJECT,   
sparqlPattern.getSubjectVar());
+        tripleFields.put(TripleColumns.PREDICATE, 
sparqlPattern.getPredicateVar());
+        tripleFields.put(TripleColumns.OBJECT,    
sparqlPattern.getObjectVar());
+        tripleFields.put(TripleColumns.CONTEXT,   
sparqlPattern.getContextVar());
+    }
+
+    /**
+     * Set the variable name (alias for the NODES table) for the given column 
to "varName".
+     * @param col
+     * @param varName
+     */
+    public void setJoinField(TripleColumns col, String varName) {
+        joinFields.put(col,varName);
     }
 
+    /**
+     * Return true when the pattern involves JOINs with the NODES table; in 
this case we need to enclose the
+     * FROM clause with parentheses before joining with previous clauses.
+     * @return
+     */
+    public boolean hasJoinFields() {
+        return joinFields.size() > 0;
+    }
 
     public Var[] getFields() {
         return new Var[] {
@@ -71,6 +125,65 @@ public class SQLPattern {
         };
     }
 
+    public EnumMap<TripleColumns, Var> getTripleFields() {
+        return tripleFields;
+    }
+
+    /**
+     * Create the clause to be used in the FROM part to represent this pattern 
and return it. The name and join fields
+     * need to be set before so this produces the correct output.
+     *
+     * This method works as follows:
+     * - for the statement pattern, it adds a reference to the TRIPLES table 
and aliases it with the pattern name
+     * - for each field of the pattern whose value is used in conditions or 
functions, an INNER JOIN with the NODES table
+     *   is added to retrieve the actual node with its values; the NODES table 
is aliased using the variable name set in
+     *   setJoinField()
+     *
+     * @return
+     */
+    public String buildFromClause() {
+        // the joinClause consists of a reference to the triples table and 
possibly inner joins with the
+        // nodes table in case we need to verify node values, e.g. in a filter
+        StringBuilder fromClause = new StringBuilder();
+
+
+        fromClause.append("triples " + name);
+
+
+        for(Map.Entry<TripleColumns,String> colEntry : joinFields.entrySet()) {
+            TripleColumns col = colEntry.getKey();
+            String        var = colEntry.getValue();
+
+            fromClause.append("\n    INNER JOIN nodes AS ");
+            fromClause.append(name + "_" + col.getFieldName() + "_" + var);
+
+            fromClause.append(" ON " + name + "." + col.getFieldName() + " = " 
+ name + "_" + col.getFieldName() + "_" + var + ".id ");
+
+        }
+
+
+        return fromClause.toString();
+    }
+
+    /**
+     * Build the condition clause for this statement to be used in the WHERE 
part or the ON part of a JOIN.
+     * @return
+     */
+    public String buildConditionClause() {
+        // the onClause consists of the filter conditions from the statement 
for joining/left joining with
+        // previous statements
+        StringBuilder onClause = new StringBuilder();
+
+        for(Iterator<String> cit = conditions.iterator(); cit.hasNext(); ) {
+            if(onClause.length() > 0) {
+                onClause.append("\n      AND ");
+            }
+            onClause.append(cit.next());
+        }
+
+        return onClause.toString();
+    }
+
 
     public List<String> getConditions() {
         return conditions;

http://git-wip-us.apache.org/repos/asf/marmotta/blob/6e306bb4/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java
----------------------------------------------------------------------
diff --git 
a/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java
 
b/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java
index 17fa358..26291b8 100644
--- 
a/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java
+++ 
b/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java
@@ -382,7 +382,6 @@ public class KiWiSparqlJoinTest {
             Assert.assertTrue(result1.hasNext());
 
 
-
             compareResults(result1,result2);
 
         } catch(RepositoryException ex) {

[1/3] git commit: cleaning up the SPARQL->SQL translation

Reply via email to