Repository: tajo Updated Branches: refs/heads/branch-0.11.2 2735c37fe -> a8f62b918
TAJO-2077: Join condition causes incorrect result, when a table has an empty row file. Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/a8f62b91 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/a8f62b91 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/a8f62b91 Branch: refs/heads/branch-0.11.2 Commit: a8f62b918f93997b32110400b350e64ad0459caf Parents: 2735c37 Author: Jinho Kim <[email protected]> Authored: Wed Feb 17 14:25:49 2016 +0900 Committer: Jinho Kim <[email protected]> Committed: Wed Feb 17 14:25:49 2016 +0900 ---------------------------------------------------------------------- CHANGES | 3 ++ .../tajo/engine/query/TestInnerJoinQuery.java | 14 ++++++++++ .../TestJoinQuery/customer_parquet/customer | Bin 0 -> 506 bytes .../TestJoinQuery/customer_parquet/customer1 | Bin 0 -> 506 bytes .../TestJoinQuery/customer_parquet/customer2 | Bin 0 -> 506 bytes .../TestJoinQuery/customer_parquet/customer3 | Bin 0 -> 506 bytes .../customer_parquet/customer_5rows | Bin 0 -> 2311 bytes .../dataset/TestJoinQuery/nation_parquet/nation | Bin 0 -> 314 bytes .../TestJoinQuery/nation_parquet/nation1 | Bin 0 -> 314 bytes .../TestJoinQuery/nation_parquet/nation2 | Bin 0 -> 314 bytes .../TestJoinQuery/nation_parquet/nation3 | Bin 0 -> 314 bytes .../TestJoinQuery/nation_parquet/nation_24rows | Bin 0 -> 3266 bytes .../testBroadcastJoinWithEmptyRows.sql | 13 +++++++++ .../create_customer_parquet_ddl.sql | 4 +++ .../TestJoinQuery/create_nation_parquet_ddl.sql | 6 ++++ .../testBroadcastJoinWithEmptyRows.result | 7 +++++ .../engine/planner/physical/MergeJoinExec.java | 4 +++ .../org/apache/tajo/storage/MergeScanner.java | 28 +++++++++---------- 18 files changed, 65 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 8db093b..55beeaa 100644 --- a/CHANGES +++ b/CHANGES @@ -17,6 +17,9 @@ Release 0.11.2 - unreleased BUG FIXES + TAJO-2077: Join condition causes incorrect result, when a table has an empty + row file. (jinho) + TAJO-2072: The constructor of RegionSizeCalculator changes for HBase 1.0.0 compatibility. (Byunghwa Yun via jinho) http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java index 42d8b48..2b92d80 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java @@ -266,4 +266,18 @@ public class TestInnerJoinQuery extends TestJoinQuery { public final void testBroadcastTwoPartJoin() throws Exception { runSimpleTests(); } + + @Test + @Option(withExplain = false, withExplainGlobal = false, parameterized = true) + @SimpleTest() + public void testBroadcastJoinWithEmptyRows() throws Exception { + executeDDL("create_customer_parquet_ddl.sql", "customer_parquet"); + executeDDL("create_nation_parquet_ddl.sql", "nation_parquet"); + try { + runSimpleTests(); + } finally { + executeString("DROP TABLE customer_parquet"); + executeString("DROP TABLE nation_parquet"); + } + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer new file mode 100644 index 0000000..2ed9752 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 new file mode 100644 index 0000000..2ed9752 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 new file mode 100644 index 0000000..2ed9752 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 new file mode 100644 index 0000000..2ed9752 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows new file mode 100644 index 0000000..3f6fac9 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation new file mode 100644 index 0000000..401ecf0 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 new file mode 100644 index 0000000..401ecf0 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 new file mode 100644 index 0000000..401ecf0 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 new file mode 100644 index 0000000..401ecf0 Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows new file mode 100644 index 0000000..d6a27aa Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows differ http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql b/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql new file mode 100644 index 0000000..318758f --- /dev/null +++ b/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql @@ -0,0 +1,13 @@ +select + c_custkey, + c_name, + n_name +from + customer_parquet, + nation_parquet +where + c_nationkey = n_nationkey +order by + c_custkey, + c_name, + n_name \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql new file mode 100644 index 0000000..1c43a40 --- /dev/null +++ b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql @@ -0,0 +1,4 @@ +create external table if not exists customer_parquet ( + c_custkey INT4, c_name TEXT, c_address TEXT, c_nationkey INT4, + c_phone TEXT, c_acctbal FLOAT8, c_mktsegment TEXT, c_comment TEXT) +using parquet location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql new file mode 100644 index 0000000..65bd56f --- /dev/null +++ b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql @@ -0,0 +1,6 @@ +create external table if not exists nation_parquet ( + n_nationkey int, + n_name text, + n_regionkey int, + n_comment text) + using parquet location ${table.path}; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result b/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result new file mode 100644 index 0000000..7932c52 --- /dev/null +++ b/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result @@ -0,0 +1,7 @@ +c_custkey,c_name,n_name +------------------------------- +1,Customer#000000001,MOROCCO +2,Customer#000000002,JORDAN +3,Customer#000000003,ARGENTINA +4,Customer#000000004,EGYPT +5,Customer#000000005,CANADA http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java index ab831b5..80c10f6 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java @@ -92,6 +92,10 @@ public class MergeJoinExec extends CommonJoinExec { outerTupleSlots.clear(); innerTupleSlots.clear(); + if (innerTuple == null || outerTuple == null) { + return null; + } + int cmp; while ((cmp = joincomparator.compare(outerTuple, innerTuple)) != 0) { if (cmp > 0) { http://git-wip-us.apache.org/repos/asf/tajo/blob/a8f62b91/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java index b3b1edd..fdc6416 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java @@ -37,14 +37,12 @@ import java.util.Iterator; import java.util.List; public class MergeScanner implements Scanner { - private Configuration conf; private TableMeta meta; private Schema schema; private List<Fragment> fragments; private Iterator<Fragment> iterator; private Fragment currentFragment; private Scanner currentScanner; - private Tuple tuple; private boolean projectable = false; private boolean selectable = false; private Schema target; @@ -59,7 +57,6 @@ public class MergeScanner implements Scanner { public MergeScanner(Configuration conf, Schema schema, TableMeta meta, List<Fragment> rawFragmentList, Schema target) throws IOException { - this.conf = conf; this.schema = schema; this.meta = meta; this.target = target; @@ -101,12 +98,15 @@ public class MergeScanner implements Scanner { @Override public Tuple next() throws IOException { - if (currentScanner != null) + Tuple tuple; + while (currentScanner != null) { tuple = currentScanner.next(); - if (tuple != null) { - return tuple; - } else { + if (tuple != null) { + return tuple; + } + + // since read tuple is null, close the current scanner. if (currentScanner != null) { currentScanner.close(); TableStats scannerTableStsts = currentScanner.getInputStats(); @@ -114,13 +114,13 @@ public class MergeScanner implements Scanner { tableStats.setReadBytes(tableStats.getReadBytes() + scannerTableStsts.getReadBytes()); tableStats.setNumRows(tableStats.getNumRows() + scannerTableStsts.getNumRows()); } + currentScanner = null; } + currentScanner = getNextScanner(); - if (currentScanner != null) { - tuple = currentScanner.next(); - } } - return tuple; + + return null; } @Override @@ -201,10 +201,10 @@ public class MergeScanner implements Scanner { currentScannerReadBytes = scannerTableStsts.getReadBytes(); } - return (float)(tableStats.getReadBytes() + currentScannerReadBytes) / (float)tableStats.getNumBytes(); - } else { - return progress; + progress = (float)(tableStats.getReadBytes() + currentScannerReadBytes) / (float)tableStats.getNumBytes(); } + + return progress; } @Override
