大家好:
目前在使用中遇到一个kylin查询慢的场景:
Cube 设计如下:
{
"uuid": "dfb77a08-f51d-4088-b559-1da67c28a068",
"last_modified": 1486997875953,
"version": "1.6.0",
"name": "insu_t",
"model_name": "insu_jdmall_model_test",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "BRAND",
"table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
"column": "BRAND_CD",
"derived": null
},
{
"name": "DT",
"table": "DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D",
"column": "DT",
"derived": null
},
{
"name": "DIM.DIM_DAY_DERIVED",
"table": "DIM.DIM_DAY",
"column": null,
"derived": [
"DIM_DAY_NAME",
"DIM_WEEK_NAME",
"DIM_MONTH_NAME"
]
},
{
"name": "FIRST",
"table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
"column": "ITEM_FIRST_CATE_NAME",
"derived": null
},
{
"name": "SECOND",
"table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
"column": "ITEM_SECOND_CATE_NAME",
"derived": null
},
{
"name": "THIRD",
"table": "DIM.DIM_ITEM_GEN_THIRD_CATE_D",
"column": "ITEM_THIRD_CATE_NAME",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1",
"next_parameter": null
},
"returntype": "bigint"
},
"dependent_measure_ref": null
},
{
"name": "QTTY",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "SALE_QTTY",
"next_parameter": null
},
"returntype": "bigint"
},
"dependent_measure_ref": null
},
{
"name": "BEFORE",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "BEFORE_PREFR_AMOUNT",
"next_parameter": null
},
"returntype": "decimal(25,4)"
},
"dependent_measure_ref": null
},
{
"name": "USER",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USER_ACTUAL_PAY_AMOUNT",
"next_parameter": null
},
"returntype": "decimal(25,4)"
},
"dependent_measure_ref": null
},
{
"name": "SALE",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "SALE_ORD_ID",
"next_parameter": null
},
"returntype": "bitmap"
},
"dependent_measure_ref": null
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "BRAND_CD",
"encoding": "dict",
"isShardBy": false
},
{
"column": "DT",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_FIRST_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_SECOND_CATE_NAME",
"encoding": "dict",
"isShardBy": false
},
{
"column": "ITEM_THIRD_CATE_NAME",
"encoding": "dict",
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"QTTY",
"BEFORE",
"USER"
]
}
]
},
{
"name": "F2",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"SALE"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"BRAND_CD",
"DT",
"ITEM_FIRST_CATE_NAME",
"ITEM_SECOND_CATE_NAME",
"ITEM_THIRD_CATE_NAME"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [],
"joint_dims": []
}
}
],
"signature": "Kl5sPTVN78bEYTGKoUOsWg==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 1483747200000,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"retention_range": 0,
"engine_type": 2,
"storage_type": 2,
"override_kylin_properties": {
"kylin.hbase.region.cut": "1"
}
}
数据量是14天的数据,sale_ord_id的基数是1.5亿
Select dt,item_second_cate_name,count(distinct sale_ord_id),sum(sale_qtty)
from DMT.DMT_KYLIN_JDMALL_ORDR_DTL_I_D a
left join dim.dim_day b on a.dt = b.dim_day_txdate
left join DIM.DIM_ITEM_GEN_THIRD_CATE_D c on a.item_third_cate_cd =
c.item_third_cate_id
group by dt,item_second_cate_name;
这条语句执行时间是37秒,去掉count(distinct sale_ord_id)后查询时0.07秒
dt,item_second_cate_name 都是normal维度,从结果看是count_distinct度量导致的慢查询,请问这个有什么优化的建议吗?
谢谢!