The embeded "group by" column of TopN shouldn't be declared as a dimension
in the cube definition; otherwise you wouldn't benefit from this measure;

In v1.5.1 there is no such check when saving the cube; From 1.5.2, kylin
will report error message on such case.

Please check this blog if you haven't:
https://kylin.apache.org/blog/2016/03/19/approximate-topn-measure/



2016-06-03 17:32 GMT+08:00 yu feng <[email protected]>:

> I create a cube with kylin sample data, cube defination is below :
>
> {
>   "uuid": "e6cf2ccc-edca-41c6-b637-b3bc50894b5e",
>   "version": "1.5.1",
>   "name": "kylin_sales_cube_desc_2_clone",
>   "description": null,
>   "dimensions": [
>     {
>       "name": "CAL_DT",
>       "table": "DEFAULT.KYLIN_CAL_DT",
>       "column": "{FK}",
>       "derived": [
>         "WEEK_BEG_DT"
>       ]
>     },
>     {
>       "name": "CATEGORY",
>       "table": "DEFAULT.KYLIN_CATEGORY_GROUPINGS",
>       "column": "{FK}",
>       "derived": [
>         "USER_DEFINED_FIELD1",
>         "USER_DEFINED_FIELD3",
>         "UPD_DATE",
>         "UPD_USER"
>       ]
>     },
>     {
>       "name": "CATEGORY_HIERARCHY",
>       "table": "DEFAULT.KYLIN_CATEGORY_GROUPINGS",
>       "column": "META_CATEG_NAME",
>       "derived": null
>     },
>     {
>       "name": "CATEGORY_HIERARCHY",
>       "table": "DEFAULT.KYLIN_CATEGORY_GROUPINGS",
>       "column": "CATEG_LVL2_NAME",
>       "derived": null
>     },
>     {
>       "name": "CATEGORY_HIERARCHY",
>       "table": "DEFAULT.KYLIN_CATEGORY_GROUPINGS",
>       "column": "CATEG_LVL3_NAME",
>       "derived": null
>     },
>     {
>       "name": "LSTG_FORMAT_NAME",
>       "table": "DEFAULT.KYLIN_SALES",
>       "column": "LSTG_FORMAT_NAME",
>       "derived": null
>     }
>   ],
>   "measures": [
>     {
>       "name": "TRANS_CNT",
>       "function": {
>         "expression": "COUNT",
>         "parameter": {
>           "type": "constant",
>           "value": "1",
>           "next_parameter": null
>         },
>         "returntype": "bigint"
>       },
>       "dependent_measure_ref": null
>     },
>     {
>       "name": "SELLER_CNT_HLL",
>       "function": {
>         "expression": "COUNT_DISTINCT",
>         "parameter": {
>           "type": "column",
>           "value": "SELLER_ID",
>           "next_parameter": null
>         },
>         "returntype": "hllc16"
>       },
>       "dependent_measure_ref": null
>     },
>     {
>       "name": "SELLER_FORMAT_CNT",
>       "function": {
>         "expression": "COUNT_DISTINCT",
>         "parameter": {
>           "type": "column",
>           "value": "LSTG_FORMAT_NAME",
>           "next_parameter": null
>         },
>         "returntype": "hllc12"
>       },
>       "dependent_measure_ref": null
>     },
>     {
>       "name": "ITEM_COUNT_DISTINCT_COUNT",
>       "function": {
>         "expression": "COUNT_DISTINCT",
>         "parameter": {
>           "type": "column",
>           "value": "ITEM_COUNT",
>           "next_parameter": null
>         },
>         "returntype": "bitmap"
>       },
>       "dependent_measure_ref": null
>     },
>     {
>       "name": "TOP",
>       "function": {
>         "expression": "TOP_N",
>         "parameter": {
>           "type": "column",
>           "value": "PRICE",
>           "next_parameter": {
>             "type": "column",
>             "value": "META_CATEG_NAME",
>             "next_parameter": null
>           }
>         },
>         "returntype": "topn(100)"
>       },
>       "dependent_measure_ref": null
>     },
>     {
>       "name": "SOURCE",
>       "function": {
>         "expression": "RAW",
>         "parameter": {
>           "type": "column",
>           "value": "PRICE",
>           "next_parameter": null
>         },
>         "returntype": "raw"
>       },
>       "dependent_measure_ref": null
>     },
>     {
>       "name": "TOPP",
>       "function": {
>         "expression": "TOP_N",
>         "parameter": {
>           "type": "column",
>           "value": "PRICE",
>           "next_parameter": {
>             "type": "column",
>             "value": "ITEM_COUNT",
>             "next_parameter": null
>           }
>         },
>         "returntype": "topn(100)"
>       },
>       "dependent_measure_ref": null
>     }
>   ],
>   "rowkey": {
>     "rowkey_columns": [
>       {
>         "column": "PART_DT",
>         "encoding": "dict",
>         "isShardBy": false
>       },
>       {
>         "column": "LEAF_CATEG_ID",
>         "encoding": "dict",
>         "isShardBy": false
>       },
>       {
>         "column": "META_CATEG_NAME",
>         "encoding": "dict",
>         "isShardBy": false
>       },
>       {
>         "column": "CATEG_LVL2_NAME",
>         "encoding": "dict",
>         "isShardBy": false
>       },
>       {
>         "column": "CATEG_LVL3_NAME",
>         "encoding": "dict",
>         "isShardBy": false
>       },
>       {
>         "column": "LSTG_FORMAT_NAME",
>         "encoding": "fixed_length:16",
>         "isShardBy": false
>       },
>       {
>         "column": "LSTG_SITE_ID",
>         "encoding": "dict",
>         "isShardBy": false
>       }
>     ]
>   },
>   "signature": "RU4IejPOo8asXrxnelDHSw==",
>   "last_modified": 1464771861329,
>   "model_name": "kylin_sales_model",
>   "null_string": null,
>   "hbase_mapping": {
>     "column_family": [
>       {
>         "name": "F1",
>         "columns": [
>           {
>             "qualifier": "M",
>             "measure_refs": [
>               "TRANS_CNT",
>               "TOP",
>               "SOURCE",
>               "TOPP"
>             ]
>           }
>         ]
>       },
>       {
>         "name": "F2",
>         "columns": [
>           {
>             "qualifier": "M",
>             "measure_refs": [
>               "SELLER_CNT_HLL",
>               "SELLER_FORMAT_CNT",
>               "ITEM_COUNT_DISTINCT_COUNT"
>             ]
>           }
>         ]
>       }
>     ]
>   },
>   "aggregation_groups": [
>     {
>       "includes": [
>         "CATEG_LVL2_NAME",
>         "CATEG_LVL3_NAME",
>         "LEAF_CATEG_ID",
>         "LSTG_FORMAT_NAME",
>         "LSTG_SITE_ID",
>         "META_CATEG_NAME",
>         "PART_DT"
>       ],
>       "select_rule": {
>         "hierarchy_dims": [
>           [
>             "META_CATEG_NAME",
>             "CATEG_LVL2_NAME",
>             "CATEG_LVL3_NAME"
>           ]
>         ],
>         "mandatory_dims": [
>           "PART_DT"
>         ],
>         "joint_dims": [
>           [
>             "LEAF_CATEG_ID",
>             "LSTG_SITE_ID"
>           ]
>         ]
>       }
>     }
>   ],
>   "notify_list": null,
>   "status_need_notify": [],
>   "partition_date_start": 1325376000000,
>   "partition_date_end": 3153600000000,
>   "auto_merge_time_ranges": [],
>   "retention_range": 0,
>   "engine_type": 2,
>   "storage_type": 2,
>   "override_kylin_properties": {}
> }
>
>
> Threr are two TOPN deminsions on group by META_CATEG_NAME and one group by
> ITEM_COUNT(just for test),I run query : select  cal_dt, META_CATEG_NAME,
> sum(price) from KYLIN_SALES as f inner join KYLIN_CAL_DT as d on d.cal_dt =
> f.part_dt inner join kylin_category_groupings as g on f.LEAF_CATEG_ID =
>  g.LEAF_CATEG_ID and f.LSTG_SITE_ID = g.SITE_ID group by cal_dt ,
> META_CATEG_NAME order by  sum(price) desc limit 10;
> it return :Can't find any realization. Please confirm with providers.
>
> And I run query : select  cal_dt, ITEM_COUNT, sum(price) from KYLIN_SALES
> as f inner join KYLIN_CAL_DT as d on d.cal_dt = f.part_dt inner join
> kylin_category_groupings as g on f.LEAF_CATEG_ID =  g.LEAF_CATEG_ID and
> f.LSTG_SITE_ID = g.SITE_ID group by cal_dt , ITEM_COUNT order by
>  sum(price) desc limit 10;
> It return success !
>
> I check code and find kylin will remove defined dimension from dimensions
> that used in the query, and try to influenceCapabilityCheck with
> unmatchedDimensions and unmatchedAggregations, as the dimension(like
> META_CATEG_NAME) has been remove and can not find in
> TopNMeasureType.influenceCapabilityCheck, Am I right? or sth I do not get.
>
> Thanks a lot.
>



-- 
Best regards,

Shaofeng Shi

Reply via email to