[
https://issues.apache.org/jira/browse/KYLIN-4818?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17243217#comment-17243217
]
ASF GitHub Bot commented on KYLIN-4818:
---------------------------------------
hit-lacus edited a comment on pull request #1485:
URL: https://github.com/apache/kylin/pull/1485#issuecomment-737050880
## Intro
- Not support cube planner phase two.
- By default, only calculate cuboid statistics for the **FIRST** segment.
- Cuboid statistics for HLLCounter use precision 14.
- Calculate cuboid statistics use 100% input flat table data. (May use
sample in the future.)
## Prepare test
```json
{
"uuid": "295b326f-f1ab-0aa7-a0ce-3f99fba2a53c",
"last_modified": 1607003531014,
"version": "4.0.0.0",
"name": "UserActionStreamCube_3",
"is_draft": false,
"model_name": "UserActionModel",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "UID",
"table": "USERACTIONSTREAM",
"column": "UID",
"derived": null
},
{
"name": "ACT_TYPE",
"table": "USERACTIONSTREAM",
"column": "ACT_TYPE",
"derived": null
},
{
"name": "PAGE_ID",
"table": "USERACTIONSTREAM",
"column": "PAGE_ID",
"derived": null
},
{
"name": "AREA_ID",
"table": "USERACTIONSTREAM",
"column": "AREA_ID",
"derived": null
},
{
"name": "LAST_PAGE_ID",
"table": "USERACTIONSTREAM",
"column": "LAST_PAGE_ID",
"derived": null
},
{
"name": "DEVICE_BRAND",
"table": "USERACTIONSTREAM",
"column": "DEVICE_BRAND",
"derived": null
},
{
"name": "NETWORK_TYPE",
"table": "USERACTIONSTREAM",
"column": "NETWORK_TYPE",
"derived": null
},
{
"name": "DEVICE_TYPE",
"table": "USERACTIONSTREAM",
"column": "DEVICE_TYPE",
"derived": null
},
{
"name": "OS_VERSION",
"table": "USERACTIONSTREAM",
"column": "OS_VERSION",
"derived": null
},
{
"name": "APP_VERSION",
"table": "USERACTIONSTREAM",
"column": "APP_VERSION",
"derived": null
},
{
"name": "ITEM_TYPE_ID",
"table": "USERACTIONSTREAM",
"column": "ITEM_TYPE_ID",
"derived": null
},
{
"name": "REGISTER_DATE",
"table": "USERACTIONSTREAM",
"column": "REGISTER_DATE",
"derived": null
},
{
"name": "LAST_LOGIN_DATE",
"table": "USERACTIONSTREAM",
"column": "LAST_LOGIN_DATE",
"derived": null
},
{
"name": "LOG_TIME",
"table": "USERACTIONSTREAM",
"column": "LOG_TIME",
"derived": null
},
{
"name": "STRATEGY_ID",
"table": "USERACTIONSTREAM",
"column": "STRATEGY_ID",
"derived": null
},
{
"name": "ACTIVITY_ID",
"table": "USERACTIONSTREAM",
"column": "ACTIVITY_ID",
"derived": null
},
{
"name": "ACTIVITY_TYPE",
"table": "USERACTIONSTREAM",
"column": "ACTIVITY_TYPE",
"derived": null
},
{
"name": "ADVERTISEMENT_FLAG",
"table": "USERACTIONSTREAM",
"column": "ADVERTISEMENT_FLAG",
"derived": null
},
{
"name": "IP_ADDRESS",
"table": "USERACTIONSTREAM",
"column": "IP_ADDRESS",
"derived": null
},
{
"name": "COUNTRY",
"table": "USERACTIONSTREAM",
"column": "COUNTRY",
"derived": null
},
{
"name": "PROVINCE",
"table": "USERACTIONSTREAM",
"column": "PROVINCE",
"derived": null
},
{
"name": "CITY",
"table": "USERACTIONSTREAM",
"column": "CITY",
"derived": null
},
{
"name": "PART_DT",
"table": "USERACTIONSTREAM",
"column": "PART_DT",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1"
},
"returntype": "bigint"
}
},
{
"name": "SUM_ITEM_ID",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USERACTIONSTREAM.ITEM_ID"
},
"returntype": "bigint"
}
},
{
"name": "SUM_ACTIVE_MINUTES",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USERACTIONSTREAM.ACTIVE_MINUTES"
},
"returntype": "decimal(11,5)"
}
},
{
"name": "SUM_PLAY_DURATION",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USERACTIONSTREAM.PLAY_DURATION"
},
"returntype": "decimal(11,5)"
}
},
{
"name": "SUM_PLAY_TIMES",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "USERACTIONSTREAM.PLAY_TIMES"
},
"returntype": "bigint"
}
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "USERACTIONSTREAM.UID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.ACT_TYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.PAGE_ID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.AREA_ID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.LAST_PAGE_ID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.DEVICE_BRAND",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.NETWORK_TYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.DEVICE_TYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.OS_VERSION",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.APP_VERSION",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.ITEM_TYPE_ID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.REGISTER_DATE",
"encoding": "date",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.LAST_LOGIN_DATE",
"encoding": "date",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.LOG_TIME",
"encoding": "time",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.STRATEGY_ID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.ACTIVITY_ID",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.ACTIVITY_TYPE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.ADVERTISEMENT_FLAG",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.IP_ADDRESS",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.COUNTRY",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.PROVINCE",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.CITY",
"encoding": "dict",
"isShardBy": false
},
{
"column": "USERACTIONSTREAM.PART_DT",
"encoding": "dict",
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"SUM_ITEM_ID",
"SUM_ACTIVE_MINUTES",
"SUM_PLAY_DURATION",
"SUM_PLAY_TIMES"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"USERACTIONSTREAM.UID",
"USERACTIONSTREAM.ACT_TYPE",
"USERACTIONSTREAM.PAGE_ID",
"USERACTIONSTREAM.AREA_ID",
"USERACTIONSTREAM.LAST_PAGE_ID",
"USERACTIONSTREAM.DEVICE_BRAND",
"USERACTIONSTREAM.NETWORK_TYPE",
"USERACTIONSTREAM.DEVICE_TYPE",
"USERACTIONSTREAM.OS_VERSION",
"USERACTIONSTREAM.APP_VERSION",
"USERACTIONSTREAM.ITEM_TYPE_ID",
"USERACTIONSTREAM.REGISTER_DATE",
"USERACTIONSTREAM.LAST_LOGIN_DATE",
"USERACTIONSTREAM.LOG_TIME",
"USERACTIONSTREAM.STRATEGY_ID",
"USERACTIONSTREAM.ACTIVITY_ID",
"USERACTIONSTREAM.ACTIVITY_TYPE",
"USERACTIONSTREAM.ADVERTISEMENT_FLAG",
"USERACTIONSTREAM.IP_ADDRESS",
"USERACTIONSTREAM.COUNTRY",
"USERACTIONSTREAM.PROVINCE",
"USERACTIONSTREAM.CITY",
"USERACTIONSTREAM.PART_DT"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [
"USERACTIONSTREAM.PART_DT",
"USERACTIONSTREAM.PAGE_ID"
],
"joint_dims": [
[
"USERACTIONSTREAM.PROVINCE",
"USERACTIONSTREAM.COUNTRY",
"USERACTIONSTREAM.CITY"
],
[
"USERACTIONSTREAM.LOG_TIME",
"USERACTIONSTREAM.REGISTER_DATE",
"USERACTIONSTREAM.LAST_LOGIN_DATE"
]
],
"dim_cap": 6
}
}
],
"signature": "6ndV6Dgh5E70aKUQhp85aw==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 0,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"volatile_range": 0,
"retention_range": 0,
"engine_type": 6,
"storage_type": 2,
"override_kylin_properties": {
"kylin.engine.spark-conf.spark.executor.memory": "5G",
"kylin.engine.spark-conf.spark.executor.instances": "10",
"kylin.engine.spark-conf.spark.executor.cores": "2",
"kylin.spark-conf.auto.prior": "false"
},
"cuboid_black_list": [],
"parent_forward": 3,
"mandatory_dimension_set_list": [],
"snapshot_table_desc_list": []
}
```
## Test result
<img width="1071" alt="image"
src="https://user-images.githubusercontent.com/14030549/101027090-a5f9e280-35b2-11eb-834f-586e0b361d93.png">
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Calculate cuboid statistics in Kylin 4
> --------------------------------------
>
> Key: KYLIN-4818
> URL: https://issues.apache.org/jira/browse/KYLIN-4818
> Project: Kylin
> Issue Type: Sub-task
> Components: Spark Engine
> Reporter: Xiaoxiang Yu
> Assignee: Xiaoxiang Yu
> Priority: Major
> Fix For: v4.0.0-beta
>
>
> Refer to SparkFactDistinct.java in Kylin 3, I will try to use spark to
> calculate(estimate) rowcount/size for cuboid candidate. Rowcount/size of
> cuboid si the input for cubeplanner phase one and phase two.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)