[
https://issues.apache.org/jira/browse/DRILL-6318?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16464864#comment-16464864
]
ASF GitHub Bot commented on DRILL-6318:
---------------------------------------
asfgit closed pull request #1204: DRILL-6318
URL: https://github.com/apache/drill/pull/1204
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushLimitToScanRule.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushLimitToScanRule.java
index 2d33d3842a..79ba9b0ab6 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushLimitToScanRule.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushLimitToScanRule.java
@@ -67,9 +67,11 @@ public boolean matches(RelOptRuleCall call) {
// mess up the schema since Convert_FromJson() is different from other
regular functions in that it only knows
// the output schema after evaluation is performed. When input has 0
row, Drill essentially does not have a way
// to know the output type.
+ // Cannot pushdown limit and offset in to flatten as long as we don't
know data distribution in flattened field
if (!limitRel.isPushDown() && (limitRel.getFetch() != null)
&& (!DrillRelOptUtil.isLimit0(limitRel.getFetch())
- || !DrillRelOptUtil.isProjectOutputSchemaUnknown(projectRel))) {
+ || !DrillRelOptUtil.isProjectOutputSchemaUnknown(projectRel))
+ && !DrillRelOptUtil.isProjectOutputRowcountUnknown(projectRel)) {
return true;
}
return false;
@@ -82,14 +84,7 @@ public void onMatch(RelOptRuleCall call) {
RelNode child = projectRel.getInput();
final RelNode limitUnderProject = limitRel.copy(limitRel.getTraitSet(),
ImmutableList.of(child));
final RelNode newProject = projectRel.copy(projectRel.getTraitSet(),
ImmutableList.of(limitUnderProject));
- if (DrillRelOptUtil.isProjectOutputRowcountUnknown(projectRel)) {
- //Preserve limit above the project since Flatten can produce more
rows. Also mark it so we do not fire the rule again.
- final RelNode limitAboveProject = new
DrillLimitRel(limitRel.getCluster(), limitRel.getTraitSet(), newProject,
- limitRel.getOffset(), limitRel.getFetch(), true);
- call.transformTo(limitAboveProject);
- } else {
- call.transformTo(newProject);
- }
+ call.transformTo(newProject);
}
};
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestBugFixes.java
b/exec/java-exec/src/test/java/org/apache/drill/TestBugFixes.java
index 100d194cff..f22db7b371 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestBugFixes.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestBugFixes.java
@@ -22,6 +22,7 @@
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.test.BaseTestQuery;
+import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
@@ -300,4 +301,16 @@ public void testDRILL5269() throws Exception {
test("ALTER SESSION RESET `planner.slice_target`");
}
}
+
+ @Test
+ public void testDRILL6318() throws Exception {
+ int rows = testSql("SELECT FLATTEN(data) AS d FROM
cp.`jsoninput/bug6318.json`");
+ Assert.assertEquals(11, rows);
+
+ rows = testSql("SELECT FLATTEN(data) AS d FROM cp.`jsoninput/bug6318.json`
LIMIT 3");
+ Assert.assertEquals(3, rows);
+
+ rows = testSql("SELECT FLATTEN(data) AS d FROM cp.`jsoninput/bug6318.json`
LIMIT 3 OFFSET 5");
+ Assert.assertEquals(3, rows);
+ }
}
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlattenPlanning.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlattenPlanning.java
index 0e2d92c5c2..9731aa2591 100644
---
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlattenPlanning.java
+++
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlattenPlanning.java
@@ -66,8 +66,9 @@ public void testPushFilterPastProjectWithFlattenNeg() throws
Exception {
@Test // DRILL-6099 : push limit past flatten(project)
public void testLimitPushdownPastFlatten() throws Exception {
final String query = "select rownum, flatten(complex) comp from
cp.`store/json/test_flatten_mappify2.json` limit 1";
- final String[] expectedPatterns =
{".*Limit\\(fetch=\\[1\\]\\).*",".*Flatten.*",".*Limit\\(fetch=\\[1\\]\\).*"};
- final String[] excludedPatterns = null;
+ //DRILL-6318 : limit should not push past flatten(project)
+ final String[] expectedPatterns = {"(?s).*Limit.*Flatten.*Project.*"};
+ final String[] excludedPatterns = {"(?s).*Limit.*Flatten.*Limit.*"};
PlanTestBase.testPlanMatchingPatterns(query, expectedPatterns,
excludedPatterns);
}
diff --git a/exec/java-exec/src/test/resources/jsoninput/bug6318.json
b/exec/java-exec/src/test/resources/jsoninput/bug6318.json
new file mode 100644
index 0000000000..1fdef8e824
--- /dev/null
+++ b/exec/java-exec/src/test/resources/jsoninput/bug6318.json
@@ -0,0 +1,12 @@
+[
+ { "name": "Helpless Templer", "data": [] },
+ { "name": "Humble Grandma", "data": ["Honored Boy Scout", "Yawning
Wolf"] },
+ { "name": "Slow Stinger", "data": [] },
+ { "name": "Slow Salesman", "data": ["Closed Queen", "Innocent
Volunteer", "Junior Wing", "Lame Mantis", "Old Master", "Numb Pawn"] },
+ { "name": "Mellow Tinkerbell", "data": [] },
+ { "name": "Digital Mercury", "data": ["Hollow Guardian", "Twin
Hurricane"] },
+ { "name": "Last Beehive", "data": [] },
+ { "name": "Infamous Balboa", "data": ["Helpless Avalange"] },
+ { "name": "Cold Nurse", "data": [] },
+ { "name": "Major Pawn", "data": [] }
+]
\ No newline at end of file
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Push down limit past flatten is incorrect
> -----------------------------------------
>
> Key: DRILL-6318
> URL: https://issues.apache.org/jira/browse/DRILL-6318
> Project: Apache Drill
> Issue Type: Bug
> Affects Versions: 1.13.0
> Environment: Ubuntu Mate 18.04
> Apache Drill 1.14.0-SNAPSHOT
> Reporter: Oleg Zinoviev
> Assignee: Oleg Zinoviev
> Priority: Major
> Labels: ready-to-commit
> Fix For: 1.14.0
>
> Attachments: sample.json
>
>
> Sample data: [^sample.json]
> Result of
> {code:sql}
> select flatten(data) as d from dfs.root.`sample.json`
> {code}
> is
> ||d||
> |Honored Boy Scout|
> |Yawning Wolf|
> |Closed Queen|
> |Innocent Volunteer|
> |Junior Wing|
> |Lame Mantis|
> |Old Master|
> |Numb Pawn|
> |Hollow Guardian|
> |Twin Hurricane|
> |Helpless Avalange|
> Let's try to get first 3 rows:
> {code:sql}
> select flatten(data) as d from dfs.root.`sample.json` limit 3
> {code}
> Result has only 2 rows:
> ||d||
> |Honored Boy Scout|
> |Yawning Wolf|
> *Reason:* Limit was pushed down below flatten and only 3 top rows from json
> was selected. In this 3 rows only 2nd has items in "data" field.
> Let's try to get 3 rows from the middle:
> {code:sql}
> select flatten(data) as d from dfs.root.`sample.json` limit 3 offset 5
> {code}
> Result is empty.
> *Reason:* Limit and offset was pushed down below flatten and only 6, 7 and 8
> row from json was selected. This 3 rows contains only 3 items in "data"
> field. After flatten limit and offset applies second time and reject all
> select items.
> Error in org/apache/drill/exec/planner/logical/DrillPushLimitToScanRule.java
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)