[jira] [Updated] (JENA-1895) Incorrect results on join of two subqueries with HAVING

Andy Seaborne (Jira) Fri, 15 May 2020 01:13:24 -0700


     [ 
https://issues.apache.org/jira/browse/JENA-1895?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Andy Seaborne updated JENA-1895:
--------------------------------
    Description: 
This query
PREFIX p0: <https://ddwmetrics.linked.data.world/d/zendesk-data/>

SELECT?v_16?v_17?v_3?v_4?v_15
WHERE {
{
SELECT (?v_5AS?v_3) (?v_6AS?v_4) (COUNT(*) AS?v_16)
WHERE {
SELECT?v_5?v_6
WHERE {
{https://issues.apache.org/jira/browse/JENA-1895#
SELECT?v_7
WHERE {
?v_9 a p0:tbl-tickets .
?v_9 p0:col-tickets-status ?v_8 .
?v_9 p0:col-tickets-created ?v_7 .
FILTER (?v_8!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_7) AS?v_5)
BIND (<http://data.world/function/functions#datePart>("month", ?v_7) AS?v_6)
}
}
GROUP BY ?v_5 ?v_6
}
{
SELECT (?v_10AS?v_3) (?v_11AS?v_4) (COUNT(*) AS?v_17)
WHERE {
SELECT?v_10?v_11
WHERE {
{
SELECT?v_12
WHERE {
?v_14 a p0:tbl-tickets .
?v_14 p0:col-tickets-status ?v_13 .
OPTIONAL {
?v_14 p0:col-tickets-solved ?v_12 .
}
FILTER (?v_13!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_12) AS?v_10)
BIND (<http://data.world/function/functions#datePart>("month", ?v_12) AS?v_11)
}
}
GROUPBY ?v_10 ?v_11
}
FILTER (?v_3 > 2017||?v_3=2017&&?v_4 > 7)
BIND (CONCAT(<http://www.w3.org/2001/XMLSchema#string>(?v_3), "-", 
<http://www.w3.org/2001/XMLSchema#string>(?v_4)) AS?v_15)
}
Results, correctly, in 34 rows when executed against our graph.
 
This similar query, with the final FILTER replaced by equivalent HAVINGs on 
subqueries
 
PREFIX p0: <https://ddwmetrics.linked.data.world/d/zendesk-data/>

SELECT?v_16?v_17?v_3?v_4?v_15
WHERE {
{
SELECT (?v_5AS?v_3) (?v_6AS?v_4) (COUNT(*) AS?v_16)
WHERE {
SELECT?v_5?v_6
WHERE {
{
SELECT?v_7
WHERE {
?v_9 a p0:tbl-tickets .
?v_9 p0:col-tickets-status ?v_8 .
?v_9 p0:col-tickets-created ?v_7 .
FILTER (?v_8!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_7) AS?v_5)
BIND (<http://data.world/function/functions#datePart>("month", ?v_7) AS?v_6)
}
}
GROUPBY?v_5?v_6
HAVING (?v_5 > 2017||?v_5=2017&&?v_6 > 7)
}
{
SELECT (?v_10AS?v_3) (?v_11AS?v_4) (COUNT(*) AS?v_17)
WHERE {
SELECT?v_10?v_11
WHERE {
{
SELECT?v_12
WHERE {
?v_14 a p0:tbl-tickets .
?v_14 p0:col-tickets-status ?v_13 .
OPTIONAL {
?v_14 p0:col-tickets-solved ?v_12 .
}
FILTER (?v_13!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_12) AS?v_10)
BIND (<http://data.world/function/functions#datePart>("month", ?v_12) AS?v_11)
}
}
GROUPBY?v_10?v_11
HAVING (?v_10 > 2017||?v_10=2017&&?v_11 > 7)
}
BIND (CONCAT(<http://www.w3.org/2001/XMLSchema#string>(?v_3), "-", 
<http://www.w3.org/2001/XMLSchema#string>(?v_4)) AS?v_15)
 
results in 1156 rows when executed against our graph, in ways that suggest that 
a cartesian cross is being done instead of a join between the two subqueries.  
 
Additionally, our logs show many instances of errors like 
 
merge: Mismatch : "2018"^^http://www.w3.org/2001/XMLSchema#integer != 
"2019"^^http://www.w3.org/2001/XMLSchema#integer level=WARN 
logger=org.apache.jena.sparql.engine.iterator.QueryIterProjectMerge 

  was:
This query
PREFIX p0: <https://ddwmetrics.linked.data.world/d/zendesk-data/>

SELECT?v_16?v_17?v_3?v_4?v_15
WHERE {
{
SELECT (?v_5AS?v_3) (?v_6AS?v_4) (COUNT(*) AS?v_16)
WHERE {
SELECT?v_5?v_6
WHERE {
{
SELECT?v_7
WHERE {
?v_9 a p0:tbl-tickets .
?v_9 p0:col-tickets-status ?v_8 .
?v_9 p0:col-tickets-created ?v_7 .
FILTER (?v_8!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_7) AS?v_5)
BIND (<http://data.world/function/functions#datePart>("month", ?v_7) AS?v_6)
}
}
GROUP BY ?v_5 ?v_6
}
{
SELECT (?v_10AS?v_3) (?v_11AS?v_4) (COUNT(*) AS?v_17)
WHERE {
SELECT?v_10?v_11
WHERE {
{
SELECT?v_12
WHERE {
?v_14 a p0:tbl-tickets .
?v_14 p0:col-tickets-status ?v_13 .
OPTIONAL {
?v_14 p0:col-tickets-solved ?v_12 .
}
FILTER (?v_13!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_12) AS?v_10)
BIND (<http://data.world/function/functions#datePart>("month", ?v_12) AS?v_11)
}
}
GROUPBY ?v_10 ?v_11
}
FILTER (?v_3 > 2017||?v_3=2017&&?v_4 > 7)
BIND (CONCAT(<http://www.w3.org/2001/XMLSchema#string>(?v_3), "-", 
<http://www.w3.org/2001/XMLSchema#string>(?v_4)) AS?v_15)
}
Results, correctly, in 34 rows when executed against our graph.
 
This similar query, with the final FILTER replaced by equivalent HAVINGs on 
subqueries
 
PREFIX p0: <https://ddwmetrics.linked.data.world/d/zendesk-data/>

SELECT?v_16?v_17?v_3?v_4?v_15
WHERE {
{
SELECT (?v_5AS?v_3) (?v_6AS?v_4) (COUNT(*) AS?v_16)
WHERE {
SELECT?v_5?v_6
WHERE {
{
SELECT?v_7
WHERE {
?v_9 a p0:tbl-tickets .
?v_9 p0:col-tickets-status ?v_8 .
?v_9 p0:col-tickets-created ?v_7 .
FILTER (?v_8!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_7) AS?v_5)
BIND (<http://data.world/function/functions#datePart>("month", ?v_7) AS?v_6)
}
}
GROUPBY?v_5?v_6
HAVING (?v_5 > 2017||?v_5=2017&&?v_6 > 7)
}
{
SELECT (?v_10AS?v_3) (?v_11AS?v_4) (COUNT(*) AS?v_17)
WHERE {
SELECT?v_10?v_11
WHERE {
{
SELECT?v_12
WHERE {
?v_14 a p0:tbl-tickets .
?v_14 p0:col-tickets-status ?v_13 .
OPTIONAL {
?v_14 p0:col-tickets-solved ?v_12 .
}
FILTER (?v_13!="deleted")
}
}
BIND (<http://data.world/function/functions#datePart>("year", ?v_12) AS?v_10)
BIND (<http://data.world/function/functions#datePart>("month", ?v_12) AS?v_11)
}
}
GROUPBY?v_10?v_11
HAVING (?v_10 > 2017||?v_10=2017&&?v_11 > 7)
}
BIND (CONCAT(<http://www.w3.org/2001/XMLSchema#string>(?v_3), "-", 
<http://www.w3.org/2001/XMLSchema#string>(?v_4)) AS?v_15)
 
results in 1156 rows when executed against our graph, in ways that suggest that 
a cartesian cross is being done instead of a join between the two subqueries.  
 
Additionally, our logs show many instances of errors like 
 
merge: Mismatch : "2018"^^http://www.w3.org/2001/XMLSchema#integer != 
"2019"^^http://www.w3.org/2001/XMLSchema#integer level=WARN 
logger=org.apache.jena.sparql.engine.iterator.QueryIterProjectMerge 


> Incorrect results on join of two subqueries with HAVING
> -------------------------------------------------------
>
>                 Key: JENA-1895
>                 URL: https://issues.apache.org/jira/browse/JENA-1895
>             Project: Apache Jena
>          Issue Type: Bug
>            Reporter: Dave Griffith
>            Priority: Major
>
> This query
> PREFIX p0: <https://ddwmetrics.linked.data.world/d/zendesk-data/>
> SELECT?v_16?v_17?v_3?v_4?v_15
> WHERE {
> {
> SELECT (?v_5AS?v_3) (?v_6AS?v_4) (COUNT(*) AS?v_16)
> WHERE {
> SELECT?v_5?v_6
> WHERE {
> {https://issues.apache.org/jira/browse/JENA-1895#
> SELECT?v_7
> WHERE {
> ?v_9 a p0:tbl-tickets .
> ?v_9 p0:col-tickets-status ?v_8 .
> ?v_9 p0:col-tickets-created ?v_7 .
> FILTER (?v_8!="deleted")
> }
> }
> BIND (<http://data.world/function/functions#datePart>("year", ?v_7) AS?v_5)
> BIND (<http://data.world/function/functions#datePart>("month", ?v_7) AS?v_6)
> }
> }
> GROUP BY ?v_5 ?v_6
> }
> {
> SELECT (?v_10AS?v_3) (?v_11AS?v_4) (COUNT(*) AS?v_17)
> WHERE {
> SELECT?v_10?v_11
> WHERE {
> {
> SELECT?v_12
> WHERE {
> ?v_14 a p0:tbl-tickets .
> ?v_14 p0:col-tickets-status ?v_13 .
> OPTIONAL {
> ?v_14 p0:col-tickets-solved ?v_12 .
> }
> FILTER (?v_13!="deleted")
> }
> }
> BIND (<http://data.world/function/functions#datePart>("year", ?v_12) AS?v_10)
> BIND (<http://data.world/function/functions#datePart>("month", ?v_12) AS?v_11)
> }
> }
> GROUPBY ?v_10 ?v_11
> }
> FILTER (?v_3 > 2017||?v_3=2017&&?v_4 > 7)
> BIND (CONCAT(<http://www.w3.org/2001/XMLSchema#string>(?v_3), "-", 
> <http://www.w3.org/2001/XMLSchema#string>(?v_4)) AS?v_15)
> }
> Results, correctly, in 34 rows when executed against our graph.
>  
> This similar query, with the final FILTER replaced by equivalent HAVINGs on 
> subqueries
>  
> PREFIX p0: <https://ddwmetrics.linked.data.world/d/zendesk-data/>
> SELECT?v_16?v_17?v_3?v_4?v_15
> WHERE {
> {
> SELECT (?v_5AS?v_3) (?v_6AS?v_4) (COUNT(*) AS?v_16)
> WHERE {
> SELECT?v_5?v_6
> WHERE {
> {
> SELECT?v_7
> WHERE {
> ?v_9 a p0:tbl-tickets .
> ?v_9 p0:col-tickets-status ?v_8 .
> ?v_9 p0:col-tickets-created ?v_7 .
> FILTER (?v_8!="deleted")
> }
> }
> BIND (<http://data.world/function/functions#datePart>("year", ?v_7) AS?v_5)
> BIND (<http://data.world/function/functions#datePart>("month", ?v_7) AS?v_6)
> }
> }
> GROUPBY?v_5?v_6
> HAVING (?v_5 > 2017||?v_5=2017&&?v_6 > 7)
> }
> {
> SELECT (?v_10AS?v_3) (?v_11AS?v_4) (COUNT(*) AS?v_17)
> WHERE {
> SELECT?v_10?v_11
> WHERE {
> {
> SELECT?v_12
> WHERE {
> ?v_14 a p0:tbl-tickets .
> ?v_14 p0:col-tickets-status ?v_13 .
> OPTIONAL {
> ?v_14 p0:col-tickets-solved ?v_12 .
> }
> FILTER (?v_13!="deleted")
> }
> }
> BIND (<http://data.world/function/functions#datePart>("year", ?v_12) AS?v_10)
> BIND (<http://data.world/function/functions#datePart>("month", ?v_12) AS?v_11)
> }
> }
> GROUPBY?v_10?v_11
> HAVING (?v_10 > 2017||?v_10=2017&&?v_11 > 7)
> }
> BIND (CONCAT(<http://www.w3.org/2001/XMLSchema#string>(?v_3), "-", 
> <http://www.w3.org/2001/XMLSchema#string>(?v_4)) AS?v_15)
>  
> results in 1156 rows when executed against our graph, in ways that suggest 
> that a cartesian cross is being done instead of a join between the two 
> subqueries.  
>  
> Additionally, our logs show many instances of errors like 
>  
> merge: Mismatch : "2018"^^http://www.w3.org/2001/XMLSchema#integer != 
> "2019"^^http://www.w3.org/2001/XMLSchema#integer level=WARN 
> logger=org.apache.jena.sparql.engine.iterator.QueryIterProjectMerge 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Updated] (JENA-1895) Incorrect results on join of two subqueries with HAVING

Reply via email to