[ 
https://issues.apache.org/jira/browse/DRILL-4196?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16497339#comment-16497339
 ] 

ASF GitHub Bot commented on DRILL-4196:
---------------------------------------

ilooner closed pull request #338: DRILL-4196 Fix to stop returning no more data 
when output batch is fu…
URL: https://github.com/apache/drill/pull/338
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinStatus.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinStatus.java
index 9e31763398..8e48515150 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinStatus.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinStatus.java
@@ -42,6 +42,7 @@
   private boolean allowMarking;
 
   public boolean ok = true;
+  public boolean hasMoreData = false;
 
   public JoinStatus(RecordIterator left, RecordIterator right, MergeJoinBatch 
output) {
     this.left = left;
@@ -120,6 +121,14 @@ public boolean shouldMark() {
     return allowMarking;
   }
 
+  public boolean isHasMoreData() {
+    return hasMoreData;
+  }
+
+  public void setHasMoreData(boolean hasMoreData) {
+    this.hasMoreData = hasMoreData;
+  }
+
   /**
    * Return state of join based on status of left and right iterator.
    * @return
@@ -132,6 +141,9 @@ public JoinOutcome getOutcome() {
     if (!ok) {
       return JoinOutcome.FAILURE;
     }
+    if (hasMoreData) {
+      return JoinOutcome.BATCH_RETURNED;
+    }
     if (bothMatches(IterOutcome.NONE) ||
       (joinType == JoinRelType.INNER && eitherMatches(IterOutcome.NONE)) ||
       (joinType == JoinRelType.LEFT && getLeftStatus() == IterOutcome.NONE) ||
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinTemplate.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinTemplate.java
index 43cbf71d70..37cf0ed559 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinTemplate.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinTemplate.java
@@ -42,12 +42,13 @@ public void setupJoin(FragmentContext context, JoinStatus 
status, VectorContaine
    */
   public final boolean doJoin(final JoinStatus status) {
     final boolean isLeftJoin = 
(((MergeJoinPOP)status.outputBatch.getPopConfig()).getJoinType() == 
JoinRelType.LEFT);
-
+    status.setHasMoreData(false);
     while (!status.isOutgoingBatchFull()) {
       if (status.right.finished()) {
         if (isLeftJoin) {
           while (!status.left.finished()) {
             if (status.isOutgoingBatchFull()) {
+              status.setHasMoreData(true);
               return true;
             }
             doCopyLeft(status.left.getCurrentPosition(), 
status.getOutPosition());
@@ -86,6 +87,7 @@ public final boolean doJoin(final JoinStatus status) {
           if (status.isOutgoingBatchFull()) {
             // Leave iterators at their current positions and markers.
             // Don't mark on all subsequent doJoin iterations.
+            status.setHasMoreData(true);
             status.disableMarking();
             return true;
           }
@@ -97,6 +99,7 @@ public final boolean doJoin(final JoinStatus status) {
               doCopyRight(status.right.getCurrentPosition(), 
status.getOutPosition());
               status.incOutputPos();
               if (status.isOutgoingBatchFull()) {
+                status.setHasMoreData(true);
                 status.disableMarking();
                 return true;
               }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinAdvanced.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinAdvanced.java
index ac6ac89369..3e0deb206a 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinAdvanced.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinAdvanced.java
@@ -213,4 +213,38 @@ public void testDrill4165() throws Exception {
       .baselineValues(202452l)
       .go();
   }
+
+  @Test
+  public void testDrill4196() throws Exception {
+    final String leftSide = BaseTestQuery.getTempDir("merge-join-left.json");
+    final String rightSide = BaseTestQuery.getTempDir("merge-join-right.json");
+    final BufferedWriter leftWriter = new BufferedWriter(new FileWriter(new 
File(leftSide)));
+    final BufferedWriter rightWriter = new BufferedWriter(new FileWriter(new 
File(rightSide)));
+
+    // output batch is 32k, create 60k left batch
+    leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 9999, 9999));
+    for (int i=0; i < 6000; ++i) {
+      leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10000, 
10000));
+    }
+    leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10001, 
10001));
+    leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10002, 
10002));
+
+    // Keep all values same. Jon will consume entire right side.
+    for (int i=0; i < 800; ++i) {
+      rightWriter.write(String.format("{ \"k1\" : %d , \"v1\": %d }", 10000, 
10000));
+    }
+
+    leftWriter.close();
+    rightWriter.close();
+
+    final String query1 = String.format("select count(*) c1 from dfs_test.`%s` 
L %s join dfs_test.`%s` R on L.k=R.k1",
+      leftSide, "inner", rightSide);
+    testBuilder()
+      .sqlQuery(query1)
+      .optionSettingQueriesForTestQuery("alter session set 
`planner.enable_hashjoin` = false")
+      .unOrdered()
+      .baselineColumns("c1")
+      .baselineValues(6000*800L)
+      .go();
+  }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> some TPCDS queries return wrong result when hash join is disabled
> -----------------------------------------------------------------
>
>                 Key: DRILL-4196
>                 URL: https://issues.apache.org/jira/browse/DRILL-4196
>             Project: Apache Drill
>          Issue Type: Bug
>            Reporter: Victoria Markman
>            Assignee: amit hadke
>            Priority: Major
>             Fix For: 1.5.0
>
>         Attachments: 1.5.0-amit-branch_tpcds_sf1.txt, query40.tar, query52.tar
>
>
> With hash join disabled query52.sql and query40.sql returned incorrect result 
> with 1.4.0 :
> {noformat}
> +-----------------+-------------------------------------------+---------------------------------------------------------------------+----------------------------+--------------+----------------------------+
> |     version     |                 commit_id                 |               
>             commit_message                            |        commit_time    
>      | build_email  |         build_time         |
> +-----------------+-------------------------------------------+---------------------------------------------------------------------+----------------------------+--------------+----------------------------+
> | 1.4.0-SNAPSHOT  | b9068117177c3b47025f52c00f67938e0c3e4732  | DRILL-4165 
> Add a precondition for size of merge join record batch.  | 08.12.2015 @ 
> 01:25:34 UTC  | Unknown      | 08.12.2015 @ 03:36:25 UTC  |
> +-----------------+-------------------------------------------+---------------------------------------------------------------------+----------------------------+--------------+----------------------------+
> 1 row selected (2.13 seconds)
> {noformat}
> Setup and options are the same as in DRILL-4190
> See attached queries (.sql), expected result (.e_tsv) and actual output (.out)



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to