Keuntae Park created TAJO-541:
---------------------------------
Summary: Parsing Group by clause differently from Hive
Key: TAJO-541
URL: https://issues.apache.org/jira/browse/TAJO-541
Project: Tajo
Issue Type: Bug
Reporter: Keuntae Park
Priority: Minor
testGroupby1() of TestHiveConverter.java failed if I changed the group by_1.sql
as follows.
{noformat}
select col0, col1, col2, col3, sum(col4) as total, avg(col5) from base group by
col0, cube (col1, col2), col3 having total > 100
{noformat}
Following is error log (look at "groups").
{noformat}
2014-01-22 11:11:41,702 INFO parser.TestHiveConverter
(TestHiveConverter.java:compareJsonResult(63)) - ### Tajo Parse Result ###
{
"all": false,
"distinct": false,
"targets": [
{
"child": {
"name": "col0",
"opType": "Column"
},
"opType": "Target"
},
{
"child": {
"name": "col1",
"opType": "Column"
},
"opType": "Target"
},
{
"child": {
"name": "col2",
"opType": "Column"
},
"opType": "Target"
},
{
"child": {
"name": "col3",
"opType": "Column"
},
"opType": "Target"
},
{
"alias": "total",
"child": {
"distinct": false,
"signature": "sum",
"params": [
{
"name": "col4",
"opType": "Column"
}
],
"opType": "GeneralSetFunction"
},
"opType": "Target"
},
{
"child": {
"distinct": false,
"signature": "avg",
"params": [
{
"name": "col5",
"opType": "Column"
}
],
"opType": "GeneralSetFunction"
},
"opType": "Target"
}
],
"child": {
"qual": {
"left": {
"name": "total",
"opType": "Column"
},
"right": {
"valueType": "Unsigned_Integer",
"value": "100",
"opType": "Literal"
},
"opType": "GreaterThan"
},
"child": {
"groups": [
{
"group_type": "OrdinaryGroup",
"grouping_sets": [
{
"name": "col0",
"opType": "Column"
}
]
},
{
"group_type": "Cube",
"grouping_sets": [
{
"name": "col1",
"opType": "Column"
},
{
"name": "col2",
"opType": "Column"
}
]
},
{
"group_type": "OrdinaryGroup",
"grouping_sets": [
{
"name": "col3",
"opType": "Column"
}
]
}
],
"child": {
"relations": [
{
"tableName": "base",
"opType": "Relation"
}
],
"opType": "RelationList"
},
"opType": "Aggregation"
},
"opType": "Having"
},
"opType": "Projection"
}
2014-01-22 11:11:41,710 INFO parser.TestHiveConverter
(TestHiveConverter.java:compareJsonResult(64)) - ### Hive Parse Result ###
{
"all": false,
"distinct": false,
"targets": [
{
"child": {
"name": "col0",
"opType": "Column"
},
"opType": "Target"
},
{
"child": {
"name": "col1",
"opType": "Column"
},
"opType": "Target"
},
{
"child": {
"name": "col2",
"opType": "Column"
},
"opType": "Target"
},
{
"child": {
"name": "col3",
"opType": "Column"
},
"opType": "Target"
},
{
"alias": "total",
"child": {
"distinct": false,
"signature": "sum",
"params": [
{
"name": "col4",
"opType": "Column"
}
],
"opType": "GeneralSetFunction"
},
"opType": "Target"
},
{
"child": {
"distinct": false,
"signature": "avg",
"params": [
{
"name": "col5",
"opType": "Column"
}
],
"opType": "GeneralSetFunction"
},
"opType": "Target"
}
],
"child": {
"qual": {
"left": {
"name": "total",
"opType": "Column"
},
"right": {
"valueType": "Unsigned_Integer",
"value": "100",
"opType": "Literal"
},
"opType": "GreaterThan"
},
"child": {
"groups": [
{
"group_type": "OrdinaryGroup",
"grouping_sets": [
{
"name": "col0",
"opType": "Column"
},
{
"name": "col3",
"opType": "Column"
}
]
},
{
"group_type": "Cube",
"grouping_sets": [
{
"name": "col1",
"opType": "Column"
},
{
"name": "col2",
"opType": "Column"
}
]
}
],
"child": {
"relations": [
{
"tableName": "base",
"opType": "Relation"
}
],
"opType": "RelationList"
},
"opType": "Aggregation"
},
"opType": "Having"
},
"opType": "Projection"
}
{noformat}
It seems that Hive always merges the same group_type as one but Tajo does only
when the same group_type appears sequentially,
as following query passes testGroupby1() without problem.
{noformat}
select col0, col1, col2, col3, sum(col4) as total, avg(col5) from base group by
col0, col3, cube (col1, col2) having total > 100
{noformat}
--
This message was sent by Atlassian JIRA
(v6.1.5#6160)