Hello Gregory,

The attached patch "properly" fixes the issue (without blocking the
query optimization), so it will probably go to the next server release.
Meanwhile the patch can be applied to any recent release or snapshot.

Best Regards,

Ivan Mikhailov
OpenLink Software
http://virtuoso.openlinksw.com

On Thu, 2010-09-30 at 16:14 -0400, Gregory Williams wrote:
> I'm running into what seems like a bug in virtuoso's SPARQL optimizer. I'm 
> trying to execute a query using the OPTIONAL/!BOUND trick for negation and 
> getting this error message:
> 
> Internal error: sparp_gp_detach_all_filters(): optimization tries to break 
> the semantics of LEFT OUTER JOIN for OPTIONAL clause
> 
> MacTed on #swig told me to send along a copy of the EXPLAIN for the query to 
> the list, but EXPLAIN gives me the same error. Strangely, this seems to 
> happen only when the optional/filter are within a GRAPH graph pattern. The 
> query I'm trying to execute is:
> 
> 
> PREFIX dcterms: <http://purl.org/dc/terms/>
> PREFIX void: <http://rdfs.org/ns/void#>
> SELECT DISTINCT ?dataset ?modified
> WHERE {
>       GRAPH <http://kasei.us/temp/data-gov-times.rdf> {
>               ?dataset a void:Dataset;
>                       dcterms:modified ?modified .
>               OPTIONAL {
>                       ?dataset dcterms:modified ?m1 .
>                       FILTER(?m1 > ?modified)
>               }
>               FILTER(!BOUND(?m1))
>       }
> }
> ORDER BY ?dataset
> 
> I'm using virtuoso 06.01.3127. Any thoughts on how to get around this problem 
> and when this might be fixed (perhaps it is in 6.2?) would be greatly 
> appreciated.
> 
> thanks,
> .greg

Index: sparql2sql.c
===================================================================
RCS file: /home/staff/us-cvs/virtuoso/libsrc/Wi/sparql2sql.c,v
retrieving revision 1.26.2.69
diff -u -U 10 -r1.26.2.69 sparql2sql.c
--- sparql2sql.c	17 Sep 2010 13:32:13 -0000	1.26.2.69
+++ sparql2sql.c	1 Oct 2010 12:46:26 -0000
@@ -3461,25 +3461,77 @@
             }
           END_SPARP_FOREACH_GP_EQUIV;
           if (-1 == first_conflicting_predecessor_idx)
             goto just_remove_braces; /* see below */
 #if 0
 /* If there are things between first conflicting predecessor and the  */
           if ((memb_ctr-1) == first_conflicting_predecessor_idx)
             continue;
 /*!!! TBD: moving members from first_conflicting_predecessor_idx+1 to memb_ctr-1 inclusive into left part of memb if appropriate */
 #endif
-          continue;
         }
       continue;
 
 just_remove_braces:
+      if (0 != memb->_.gp.glued_filters_count)
+        {
+          int glued_last_idx = BOX_ELEMENTS (memb->_.gp.filters);
+          int glued_first_idx = glued_last_idx - memb->_.gp.glued_filters_count;
+          sparp_equiv_t *suspicious_filt_eq = NULL;
+          int glued_idx, memb_equiv_inx;
+          if (parent_gp->_.gp.glued_filters_count)
+            continue; /* Don't know how to safely mix two lists of glued filters, one already in parent and one from member, hence the sabotage */
+/* Consider a glued filter in memb that refers to ?x . ?x may present in memb or not, it may also present in parent_gp or not.
+?x in memb	| ?x in parent	| Can filter be moved?
+Yes & bound	| Yes & bound	| These two are equal due to join so filter can be moved
+Yes & bound	| Yes & !bound	| Empty join, filter does not matter, so it can be moved
+Yes & bound	| No		| Safe to move, the only occurence will define the value as it was
+Yes & !bound	| Yes & bound	| Empty join, filter does not matter, so it can be moved
+Yes & !bound	| Yes & !bound	| Empty join, filter does not matter, so it can be moved
+Yes & !bound	| No		| Safe to move, the only occurence will define the value as it was
+No		| Yes & bound	| !!! Can't move, not bound may become bound
+No		| Yes & !bound	| Safe to move, unbound anyway
+No		| No		| Safe to move, unbound anyway
+So the only unsafe case is a fixed filter on a variable that is missing where the filter resides but present at the parent.
+*/
+          SPARP_FOREACH_GP_EQUIV (sparp, memb, memb_equiv_inx, memb_eq)
+            {
+              int parent_conn_ctr;
+              if (SPART_VARR_NOT_NULL & memb_eq->e_rvr.rvrRestrictions)
+                continue;
+              DO_BOX_FAST (ptrlong, parent_equiv_idx, parent_conn_ctr, memb_eq->e_receiver_idxs)
+                {
+                  sparp_equiv_t *parent_equiv = SPARP_EQUIV (sparp, parent_equiv_idx);
+                  int glued_idx;
+                  for (glued_idx = glued_first_idx; glued_idx < glued_last_idx; glued_idx++)
+                    {
+                      SPART *glued_filt = memb->_.gp.filters[glued_idx];
+                      if (sparp_tree_uses_var_of_eq (sparp, glued_filt, parent_equiv))
+                        {
+                          suspicious_filt_eq = memb_eq;
+                          goto suspicious_filt_eq_found; /* see below */
+                        }
+                    }
+                }
+              END_DO_BOX_FAST;
+            }
+          END_SPARP_FOREACH_GP_EQUIV;
+suspicious_filt_eq_found:
+          if (NULL != suspicious_filt_eq)
+            continue;
+          for (glued_idx = glued_first_idx; glued_idx < glued_last_idx; glued_idx++)
+            {
+              SPART *filt = sparp_gp_detach_filter (sparp, memb, glued_first_idx, NULL);
+              sparp_gp_attach_filter (sparp, parent_gp, filt, BOX_ELEMENTS (parent_gp->_.gp.filters), NULL);
+            }
+          parent_gp->_.gp.glued_filters_count += (glued_last_idx - glued_first_idx);
+        }
       memb_filters = sparp_gp_detach_all_filters (sparp, memb, NULL);
       memb_filters_count = BOX_ELEMENTS_0 (memb_filters);
       for (sub_ctr = sub_count; sub_ctr--; /* no step */)
         {
           SPART *sub_memb = sparp_gp_detach_member (sparp, memb, sub_ctr, NULL);
           sparp_gp_attach_member (sparp, parent_gp, sub_memb, memb_ctr, NULL);
         }
       if (0 != memb_filters_count)
         sparp_gp_attach_many_filters (sparp, parent_gp, memb_filters /*!!! should it be sparp_treelist_full_copy (sparp, memb_filters, NULL) ? */, 0, NULL);
       memb_ctr += sub_count;

Reply via email to