[
https://issues.apache.org/jira/browse/DRILL-5375?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15938228#comment-15938228
]
Arina Ielchiieva commented on DRILL-5375:
-----------------------------------------
Nested loop join code generation example for between condition:
Query: {noformat}select t1.c1, t2.c1, t2.c2 from t1 inner join t2 on t1.c1
between t2.c1 and t2.c2{noformat}
Raw join condition:{noformat} AND(>=($0, $1), <=($0, $2)){noformat}
Logical expression:
{noformat}
FunctionCall [func=booleanAnd,
args=[FunctionCall [func=greater_than_or_equal_to, args=[`i1`,
`i10`]],
FunctionCall [func=less_than_or_equal_to, args=[`i1`, `i2`]]]
{noformat}
Generated class:
{noformat}
package org.apache.drill.exec.test.generated;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.holders.BitHolder;
import org.apache.drill.exec.expr.holders.IntHolder;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.vector.IntVector;
import org.apache.drill.exec.vector.NullableIntVector;
public class NestedLoopJoinGen20 {
IntVector vv1;
IntVector[] vv5;
IntVector[] vv10;
IntVector vv15;
IntVector vv18;
IntVector[] vv21;
NullableIntVector vv24;
IntVector[] vv27;
NullableIntVector vv30;
public boolean doEval(int leftIndex, int rightBatchIndex, int
rightRecordIndexWithinBatch)
throws SchemaChangeException
{
{
BitHolder out0 = new BitHolder();
AndOP0:
{
IntHolder out4 = new IntHolder();
{
out4 .value = vv1 .getAccessor().get((leftIndex));
}
IntHolder out8 = new IntHolder();
{
out8 .value = vv5 [((rightBatchIndex)>>>
16)].getAccessor().get(((rightRecordIndexWithinBatch)& 65535));
}
//---- start of eval portion of less_than_or_equal_to function.
----//
BitHolder out9 = new BitHolder();
{
final BitHolder out = new BitHolder();
IntHolder left = out4;
IntHolder right = out8;
GCompareIntVsInt$LessThanEqIntVsInt_eval: {
out.value = left.value <= right.value ? 1 : 0;
}
out9 = out;
}
//---- end of eval portion of less_than_or_equal_to function.
----//
if (out9 .value!= 1) {
out0 .value = 0;
break AndOP0;
}
IntHolder out13 = new IntHolder();
{
out13 .value = vv10 [((rightBatchIndex)>>>
16)].getAccessor().get(((rightRecordIndexWithinBatch)& 65535));
}
//---- start of eval portion of greater_than_or_equal_to
function. ----//
BitHolder out14 = new BitHolder();
{
final BitHolder out = new BitHolder();
IntHolder left = out4;
IntHolder right = out13;
GCompareIntVsInt$GreaterThanEqIntVsInt_eval: {
out.value = left.value >= right.value ? 1 : 0;
}
out14 = out;
}
//---- end of eval portion of greater_than_or_equal_to
function. ----//
if (out14 .value!= 1) {
out0 .value = 0;
break AndOP0;
}
out0 .value = 1;
}
return (out0 .value == 1);
}
}
public void doSetup(FragmentContext context, VectorContainer
rightContainer, RecordBatch leftBatch, RecordBatch outgoing)
throws SchemaChangeException
{
{
int[] fieldIds2 = new int[ 1 ] ;
fieldIds2 [ 0 ] = 0;
Object tmp3 = (leftBatch).getValueAccessorById(IntVector.class,
fieldIds2).getValueVector();
if (tmp3 == null) {
throw new SchemaChangeException("Failure while loading vector
vv1 with id: TypedFieldId [fieldIds=[0], remainder=null].");
}
vv1 = ((IntVector) tmp3);
int[] fieldIds6 = new int[ 1 ] ;
fieldIds6 [ 0 ] = 1;
Object tmp7 =
(rightContainer).getValueAccessorById(IntVector.class,
fieldIds6).getValueVectors();
if (tmp7 == null) {
throw new SchemaChangeException("Failure while loading vector
vv5 with id: TypedFieldId [fieldIds=[1], remainder=null].");
}
vv5 = ((IntVector[]) tmp7);
/** start SETUP for function less_than_or_equal_to **/
{
{}
}
/** end SETUP for function less_than_or_equal_to **/
int[] fieldIds11 = new int[ 1 ] ;
fieldIds11 [ 0 ] = 0;
Object tmp12 =
(rightContainer).getValueAccessorById(IntVector.class,
fieldIds11).getValueVectors();
if (tmp12 == null) {
throw new SchemaChangeException("Failure while loading vector
vv10 with id: TypedFieldId [fieldIds=[0], remainder=null].");
}
vv10 = ((IntVector[]) tmp12);
/** start SETUP for function greater_than_or_equal_to **/
{
{}
}
/** end SETUP for function greater_than_or_equal_to **/
int[] fieldIds16 = new int[ 1 ] ;
fieldIds16 [ 0 ] = 0;
Object tmp17 = (leftBatch).getValueAccessorById(IntVector.class,
fieldIds16).getValueVector();
if (tmp17 == null) {
throw new SchemaChangeException("Failure while loading vector
vv15 with id: TypedFieldId [fieldIds=[0], remainder=null].");
}
vv15 = ((IntVector) tmp17);
int[] fieldIds19 = new int[ 1 ] ;
fieldIds19 [ 0 ] = 0;
Object tmp20 = (outgoing).getValueAccessorById(IntVector.class,
fieldIds19).getValueVector();
if (tmp20 == null) {
throw new SchemaChangeException("Failure while loading vector
vv18 with id: TypedFieldId [fieldIds=[0], remainder=null].");
}
vv18 = ((IntVector) tmp20);
}
{
int[] fieldIds22 = new int[ 1 ] ;
fieldIds22 [ 0 ] = 0;
Object tmp23 =
(rightContainer).getValueAccessorById(IntVector.class,
fieldIds22).getValueVectors();
if (tmp23 == null) {
throw new SchemaChangeException("Failure while loading vector
vv21 with id: TypedFieldId [fieldIds=[0], remainder=null].");
}
vv21 = ((IntVector[]) tmp23);
int[] fieldIds25 = new int[ 1 ] ;
fieldIds25 [ 0 ] = 1;
Object tmp26 =
(outgoing).getValueAccessorById(NullableIntVector.class,
fieldIds25).getValueVector();
if (tmp26 == null) {
throw new SchemaChangeException("Failure while loading vector
vv24 with id: TypedFieldId [fieldIds=[1], remainder=null].");
}
vv24 = ((NullableIntVector) tmp26);
}
{
int[] fieldIds28 = new int[ 1 ] ;
fieldIds28 [ 0 ] = 1;
Object tmp29 =
(rightContainer).getValueAccessorById(IntVector.class,
fieldIds28).getValueVectors();
if (tmp29 == null) {
throw new SchemaChangeException("Failure while loading vector
vv27 with id: TypedFieldId [fieldIds=[1], remainder=null].");
}
vv27 = ((IntVector[]) tmp29);
int[] fieldIds31 = new int[ 1 ] ;
fieldIds31 [ 0 ] = 2;
Object tmp32 =
(outgoing).getValueAccessorById(NullableIntVector.class,
fieldIds31).getValueVector();
if (tmp32 == null) {
throw new SchemaChangeException("Failure while loading vector
vv30 with id: TypedFieldId [fieldIds=[2], remainder=null].");
}
vv30 = ((NullableIntVector) tmp32);
}
}
public void emitLeft(int leftIndex, int outIndex)
throws SchemaChangeException
{
{
vv18 .copyFromSafe((leftIndex), (outIndex), vv15);
}
}
public void emitRight(int batchIndex, int recordIndexWithinBatch, int
outIndex)
throws SchemaChangeException
{
{
vv24 .copyFromSafe((recordIndexWithinBatch), (outIndex), vv21
[(batchIndex)]);
}
{
vv30 .copyFromSafe((recordIndexWithinBatch), (outIndex), vv27
[(batchIndex)]);
}
}
public void __DRILL_INIT__()
throws SchemaChangeException
{
}
}
{noformat}
> Nested loop join: return correct result for left join
> -----------------------------------------------------
>
> Key: DRILL-5375
> URL: https://issues.apache.org/jira/browse/DRILL-5375
> Project: Apache Drill
> Issue Type: Bug
> Affects Versions: 1.8.0
> Reporter: Arina Ielchiieva
> Assignee: Arina Ielchiieva
> Labels: doc-impacting
>
> Mini repro:
> 1. Create 2 Hive tables with data
> {code}
> CREATE TABLE t1 (
> FYQ varchar(999),
> dts varchar(999),
> dte varchar(999)
> )
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
> 2016-Q1,2016-06-01,2016-09-30
> 2016-Q2,2016-09-01,2016-12-31
> 2016-Q3,2017-01-01,2017-03-31
> 2016-Q4,2017-04-01,2017-06-30
> CREATE TABLE t2 (
> who varchar(999),
> event varchar(999),
> dt varchar(999)
> )
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
> aperson,did somthing,2017-01-06
> aperson,did somthing else,2017-01-12
> aperson,had chrsitmas,2016-12-26
> aperson,went wild,2016-01-01
> {code}
> 2. Impala Query shows correct result
> {code}
> select t2.dt, t1.fyq, t2.who, t2.event
> from t2
> left join t1 on t2.dt between t1.dts and t1.dte
> order by t2.dt;
> +------------+---------+---------+-------------------+
> | dt | fyq | who | event |
> +------------+---------+---------+-------------------+
> | 2016-01-01 | NULL | aperson | went wild |
> | 2016-12-26 | 2016-Q2 | aperson | had chrsitmas |
> | 2017-01-06 | 2016-Q3 | aperson | did somthing |
> | 2017-01-12 | 2016-Q3 | aperson | did somthing else |
> +------------+---------+---------+-------------------+
> {code}
> 3. Drill query shows wrong results:
> {code}
> alter session set planner.enable_nljoin_for_scalar_only=false;
> use hive;
> select t2.dt, t1.fyq, t2.who, t2.event
> from t2
> left join t1 on t2.dt between t1.dts and t1.dte
> order by t2.dt;
> +-------------+----------+----------+--------------------+
> | dt | fyq | who | event |
> +-------------+----------+----------+--------------------+
> | 2016-12-26 | 2016-Q2 | aperson | had chrsitmas |
> | 2017-01-06 | 2016-Q3 | aperson | did somthing |
> | 2017-01-12 | 2016-Q3 | aperson | did somthing else |
> +-------------+----------+----------+--------------------+
> 3 rows selected (2.523 seconds)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)