[ 
https://issues.apache.org/jira/browse/ASTERIXDB-3329?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Wail Y. Alkowaileet updated ASTERIXDB-3329:
-------------------------------------------
    Description: 
DDL:
{noformat}
DROP DATAVERSE tpch IF EXISTS;
CREATE DATAVERSE tpch;
 
USE tpch;

CREATE TYPE CustomerType AS CLOSED {
  c_custkey : bigint,
  c_name : string,
  c_address : string,
  c_nationkey : bigint,
  c_phone : string,
  c_acctbal : double,
  c_mktsegment : string,
  c_comment : string
};

CREATE TYPE OrderType AS CLOSED {
  o_orderkey : bigint,
  o_custkey : bigint,
  o_orderstatus : string,
  o_totalprice : double,
  o_orderdate : string,
  o_orderpriority : string,
  o_clerk : string,
  o_shippriority : bigint,
  o_comment : string
};

CREATE DATASET Customer(CustomerType)
PRIMARY KEY c_custkey;

CREATE DATASET Orders(OrderType) 
PRIMARY KEY o_orderkey;


CREATE INDEX customer_fk_nation ON Customer (c_nationkey);
CREATE INDEX orders_fk_customer ON Orders (o_custkey);
CREATE INDEX orders_orderdateIx ON Orders (o_orderdate);
{noformat}
Queries:
{noformat}
USE tpch;

-- This query produces correct result
SELECT COUNT(*) 
FROM            Customer c 
LEFT OUTER JOIN Orders o 
ON  c.c_custkey = o.o_custkey 
AND o.o_comment NOT LIKE '%special%requests%'

USE tpch;

-- This query produces incorrect result
SELECT COUNT(*) 
FROM            Customer c 
LEFT OUTER JOIN Orders o 
ON  c.c_custkey /*+indexnl*/ = o.o_custkey 
AND o.o_comment NOT LIKE '%special%requests%'
{noformat}
using the index to perform this query fails to produce a correct result as an 
artifact of breaking the join condition into multiple selects as depicted in 
the plan below:
{noformat}
...
  select (eq($$56, $$o.getField(1))) retain-untrue ($$57 <- missing) 
  -- STREAM_SELECT  |PARTITIONED|
    select (not(like($$o.getField(8), \"%special%requests%\"))) 
    -- STREAM_SELECT  |PARTITIONED|
      project ([$$56, $$57, $$o]) 
      -- STREAM_PROJECT  |PARTITIONED|
        exchange 
        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
          left-outer-unnest-map [$$57, $$o] <- index-search(\"Orders\", 0, 
\"Default\", \"tpch\", \"Orders\", true, false, 1, $$66, 1, $$66, true, true, 
true) 
            ...
{noformat}
The two consecutive SELECT operators should be merged with retain MISSING 
condition.

  was:
DDL:
{noformat}
DROP DATAVERSE tpch IF EXISTS;
CREATE DATAVERSE tpch;
 
USE tpch;

CREATE TYPE CustomerType AS CLOSED {
  c_custkey : bigint,
  c_name : string,
  c_address : string,
  c_nationkey : bigint,
  c_phone : string,
  c_acctbal : double,
  c_mktsegment : string,
  c_comment : string
};

CREATE TYPE OrderType AS CLOSED {
  o_orderkey : bigint,
  o_custkey : bigint,
  o_orderstatus : string,
  o_totalprice : double,
  o_orderdate : string,
  o_orderpriority : string,
  o_clerk : string,
  o_shippriority : bigint,
  o_comment : string
};

CREATE DATASET Customer(CustomerType)
PRIMARY KEY c_custkey;

CREATE DATASET Orders(OrderType) 
PRIMARY KEY o_orderkey;


CREATE INDEX customer_fk_nation ON Customer (c_nationkey);
CREATE INDEX orders_fk_customer ON Orders (o_custkey);
CREATE INDEX orders_orderdateIx ON Orders (o_orderdate);
{noformat}

Queries:
{noformat}
USE tpch;

-- This query produces correct result
SELECT COUNT(*) 
FROM            Customer c 
LEFT OUTER JOIN Orders o 
ON  c.c_custkey = o.o_custkey 
AND o.o_comment NOT LIKE '%special%requests%'

USE tpch;

-- This query produces incorrect result
SELECT COUNT(*) 
FROM            Customer c 
LEFT OUTER JOIN Orders o 
ON  c.c_custkey /*+indexnl*/ = o.o_custkey 
AND o.o_comment NOT LIKE '%special%requests%'
{noformat}

using the index to perform this query fails to produce a correct result as an 
artifact of breaking the select condition as depicted in the plan below:
{noformat}
...
  select (eq($$56, $$o.getField(1))) retain-untrue ($$57 <- missing) 
  -- STREAM_SELECT  |PARTITIONED|
    select (not(like($$o.getField(8), \"%special%requests%\"))) 
    -- STREAM_SELECT  |PARTITIONED|
      project ([$$56, $$57, $$o]) 
      -- STREAM_PROJECT  |PARTITIONED|
        exchange 
        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
          left-outer-unnest-map [$$57, $$o] <- index-search(\"Orders\", 0, 
\"Default\", \"tpch\", \"Orders\", true, false, 1, $$66, 1, $$66, true, true, 
true) 
            ...
{noformat}

The two consecutive SELECT operators should be merged with retain MISSING 
condition.



> Consolidate SELECT operators on index nested-loop outer join
> ------------------------------------------------------------
>
>                 Key: ASTERIXDB-3329
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-3329
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: COMP - Compiler
>    Affects Versions: 0.9.9
>            Reporter: Wail Y. Alkowaileet
>            Assignee: Wail Y. Alkowaileet
>            Priority: Major
>             Fix For: 0.9.9
>
>
> DDL:
> {noformat}
> DROP DATAVERSE tpch IF EXISTS;
> CREATE DATAVERSE tpch;
>  
> USE tpch;
> CREATE TYPE CustomerType AS CLOSED {
>   c_custkey : bigint,
>   c_name : string,
>   c_address : string,
>   c_nationkey : bigint,
>   c_phone : string,
>   c_acctbal : double,
>   c_mktsegment : string,
>   c_comment : string
> };
> CREATE TYPE OrderType AS CLOSED {
>   o_orderkey : bigint,
>   o_custkey : bigint,
>   o_orderstatus : string,
>   o_totalprice : double,
>   o_orderdate : string,
>   o_orderpriority : string,
>   o_clerk : string,
>   o_shippriority : bigint,
>   o_comment : string
> };
> CREATE DATASET Customer(CustomerType)
> PRIMARY KEY c_custkey;
> CREATE DATASET Orders(OrderType) 
> PRIMARY KEY o_orderkey;
> CREATE INDEX customer_fk_nation ON Customer (c_nationkey);
> CREATE INDEX orders_fk_customer ON Orders (o_custkey);
> CREATE INDEX orders_orderdateIx ON Orders (o_orderdate);
> {noformat}
> Queries:
> {noformat}
> USE tpch;
> -- This query produces correct result
> SELECT COUNT(*) 
> FROM            Customer c 
> LEFT OUTER JOIN Orders o 
> ON  c.c_custkey = o.o_custkey 
> AND o.o_comment NOT LIKE '%special%requests%'
> USE tpch;
> -- This query produces incorrect result
> SELECT COUNT(*) 
> FROM            Customer c 
> LEFT OUTER JOIN Orders o 
> ON  c.c_custkey /*+indexnl*/ = o.o_custkey 
> AND o.o_comment NOT LIKE '%special%requests%'
> {noformat}
> using the index to perform this query fails to produce a correct result as an 
> artifact of breaking the join condition into multiple selects as depicted in 
> the plan below:
> {noformat}
> ...
>   select (eq($$56, $$o.getField(1))) retain-untrue ($$57 <- missing) 
>   -- STREAM_SELECT  |PARTITIONED|
>     select (not(like($$o.getField(8), \"%special%requests%\"))) 
>     -- STREAM_SELECT  |PARTITIONED|
>       project ([$$56, $$57, $$o]) 
>       -- STREAM_PROJECT  |PARTITIONED|
>         exchange 
>         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>           left-outer-unnest-map [$$57, $$o] <- index-search(\"Orders\", 0, 
> \"Default\", \"tpch\", \"Orders\", true, false, 1, $$66, 1, $$66, true, true, 
> true) 
>             ...
> {noformat}
> The two consecutive SELECT operators should be merged with retain MISSING 
> condition.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to