Repository: tajo Updated Branches: refs/heads/master 189cf3ffe -> fcc5da03a
TAJO-994: 'count(distinct x)' function counts first null value. Closes #106 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fcc5da03 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fcc5da03 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fcc5da03 Branch: refs/heads/master Commit: fcc5da03ad1e9a692efad20aaf90a4005f9084d0 Parents: 189cf3f Author: Hyunsik Choi <[email protected]> Authored: Mon Aug 11 11:22:59 2014 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Mon Aug 11 11:22:59 2014 +0900 ---------------------------------------------------------------------- CHANGES | 2 + .../function/builtin/CountValueDistinct.java | 4 +- .../tajo/engine/query/TestGroupByQuery.java | 46 ++++++++++++++------ .../testDistinctAggregationCaseByCase3.sql | 8 ++++ .../testDistinctAggregationCaseByCase4.sql | 7 +++ .../testDistinctAggregationCaseByCase3.result | 3 ++ .../testDistinctAggregationCaseByCase4.result | 3 ++ 7 files changed, 57 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 7487983..864eaef 100644 --- a/CHANGES +++ b/CHANGES @@ -106,6 +106,8 @@ Release 0.9.0 - unreleased BUG FIXES + TAJO-994: 'count(distinct x)' function counts first null value. (hyunsik) + TAJO-996: Sometimes, scheduleFetchesByEvenDistributedVolumes loses some FetchImpls. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java index c0f5ec3..7a7f3d8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java @@ -24,7 +24,6 @@ import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.Int8Datum; -import org.apache.tajo.datum.NullDatum; import org.apache.tajo.engine.function.FunctionContext; import org.apache.tajo.engine.function.annotation.Description; import org.apache.tajo.engine.function.annotation.ParamTypes; @@ -57,7 +56,8 @@ public final class CountValueDistinct extends CountRows { public void merge(FunctionContext context, Tuple part) { CountDistinctValueContext distinctContext = (CountDistinctValueContext) context; Datum value = part.get(0); - if ((distinctContext.latest == null || (!distinctContext.latest.equals(value)) && !(value instanceof NullDatum))) { + + if (!value.isNull() && (distinctContext.latest == null || (!distinctContext.latest.equals(value)))) { distinctContext.latest = value; distinctContext.count++; } http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java index 79efd92..fe9f990 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java @@ -342,7 +342,7 @@ public class TestGroupByQuery extends QueryTestCaseBase { } @Test - public final void testDistinctAggregationCasebyCase2() throws Exception { + public final void testDistinctAggregationCaseByCase3() throws Exception { // first distinct is smaller than second distinct. KeyValueSet tableOptions = new KeyValueSet(); tableOptions.put(StorageConstants.CSVFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); @@ -364,22 +364,40 @@ public class TestGroupByQuery extends QueryTestCaseBase { TajoTestingCluster.createTable("table10", schema, tableOptions, data); - ResultSet res = executeString( - "select col1 \n" + - ",count(distinct col2) as cnt1\n" + - ",count(distinct case when col3 is not null then col2 else null end) as cnt2\n" + - "from table10 \n" + - "group by col1" - ); - String result = resultSetToString(res); + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); - String expected = "col1,cnt1,cnt2\n" + - "-------------------------------\n" + - "a,3,1\n"; + executeString("DROP TABLE table10 PURGE").close(); + } - assertEquals(expected, result); + @Test + public final void testDistinctAggregationCaseByCase4() throws Exception { + // Reproduction case for TAJO-994 + KeyValueSet tableOptions = new KeyValueSet(); + tableOptions.put(StorageConstants.CSVFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); + tableOptions.put(StorageConstants.CSVFILE_NULL, "\\\\N"); - executeString("DROP TABLE table10 PURGE").close(); + Schema schema = new Schema(); + schema.addColumn("col1", Type.TEXT); + schema.addColumn("col2", Type.TEXT); + + String[] data = new String[]{ + "a|\\N", + "a|\\N|", + "a|\\N|", + "a|\\N|", + "a|\\N|", + "a|\\N|" + }; + + TajoTestingCluster.createTable("table11", schema, tableOptions, data); + + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + + executeString("DROP TABLE table11 PURGE").close(); } @Test http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql new file mode 100644 index 0000000..925cadd --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql @@ -0,0 +1,8 @@ +select + col1, + count(distinct col2) as cnt1, + count(distinct case when col3 is not null then col2 else null end) as cnt2 +from + table10 +group by + col1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql new file mode 100644 index 0000000..352f21e --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql @@ -0,0 +1,7 @@ +select + col1, + count(distinct col2) as cnt +from + table11 +group by + col1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result new file mode 100644 index 0000000..320ab29 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result @@ -0,0 +1,3 @@ +col1,cnt1,cnt2 +------------------------------- +a,3,0 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result new file mode 100644 index 0000000..a65e71f --- /dev/null +++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result @@ -0,0 +1,3 @@ +col1,cnt +------------------------------- +a,0 \ No newline at end of file
