Bruce Robbins created SPARK-47633:
-------------------------------------

             Summary: Cache miss for queries using JOIN LATERAL with join 
condition
                 Key: SPARK-47633
                 URL: https://issues.apache.org/jira/browse/SPARK-47633
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 4.0.0
            Reporter: Bruce Robbins


For example:
{noformat}
CREATE or REPLACE TEMP VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2);
CREATE or REPLACE TEMP VIEW t2(c1, c2) AS VALUES (0, 1), (1, 2);

create or replace temp view v1 as
select *
from t1
join lateral (
  select c1 as a, c2 as b
  from t2)
on c1 = a;

cache table v1;

explain select * from v1;
== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- BroadcastHashJoin [c1#180], [a#173], Inner, BuildRight, false
   :- LocalTableScan [c1#180, c2#181]
   +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, 
false] as bigint)),false), [plan_id=113]
      +- LocalTableScan [a#173, b#174]
{noformat}
Note that there is no {{InMemoryRelation}}.

However, if you move the join condition into the subquery, the cached plan is 
used:
{noformat}
CREATE or REPLACE TEMP VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2);
CREATE or REPLACE TEMP VIEW t2(c1, c2) AS VALUES (0, 1), (1, 2);

create or replace temp view v2 as
select *
from t1
join lateral (
  select c1 as a, c2 as b
  from t2
  where t1.c1 = t2.c1);

cache table v2;

explain select * from v2;
== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- Scan In-memory table v2 [c1#176, c2#177, a#178, b#179]
      +- InMemoryRelation [c1#176, c2#177, a#178, b#179], StorageLevel(disk, 
memory, deserialized, 1 replicas)
            +- AdaptiveSparkPlan isFinalPlan=true
               +- == Final Plan ==
                  *(1) Project [c1#26, c2#27, a#19, b#20]
                  +- *(1) BroadcastHashJoin [c1#26], [c1#30], Inner, BuildLeft, 
false
                     :- BroadcastQueryStage 0
                     :  +- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), 
[plan_id=37]
                     :     +- LocalTableScan [c1#26, c2#27]
                     +- *(1) LocalTableScan [a#19, b#20, c1#30]
               +- == Initial Plan ==
                  Project [c1#26, c2#27, a#19, b#20]
                  +- BroadcastHashJoin [c1#26], [c1#30], Inner, BuildLeft, false
                     :- BroadcastExchange 
HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), 
[plan_id=37]
                     :  +- LocalTableScan [c1#26, c2#27]
                     +- LocalTableScan [a#19, b#20, c1#30]
{noformat}




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to