[ 
https://issues.apache.org/jira/browse/PHOENIX-889?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

yang ming updated PHOENIX-889:
------------------------------

    Description: 
h3.Table DDL
Table1:
create table if not exists yk.video_summary
(
videoid integer not null,
date date not null,
platform varchar not null,
device varchar not null,
systemgroup varchar not null,
system varchar not null,
vv bigint
constraint pk primary key (videoid, date,platform, device, systemgroup,system)
)salt_buckets = 30,versions=1,compression='snappy';

Table2:
create table if not exists yk.video_meta(
     videoid integer  not null,
     showid integer  not null,
     title varchar  not null,
     showvideotype varchar  not null,
     publishtime date not null
     constraint pk primary key (videoid,showid,showvideotype,publishtime)
) salt_buckets = 10,versions=1,compression='snappy';

h3.Queries
Query1:
select videoid from YK.VIDEO_META where showid=99299;
Result:
+------------+
|  VIDEOID   |
+------------+
| 137102991  |
| 151113895  |
| 171559204  |
| 171559439  |
| 171573932  |
| 171574082  |
| 171574164  |
| 171643206  |
| 171677219  |
| 171764188  |
| 171794335  |
| 171874661  |
+------------+

Query2:
select date,sum(vv) as sv from YK.VIDEO_SUMMARY where videoid in 
(137102991,151113895,171559204,171559439,171573932,171574082,171574164,171643206,171677219,171764188,171794335,171874661)
 and date>=to_date('2014-03-17','yyyy-MM-dd') and 
date<=to_date('2014-03-23','yyyy-MM-dd') group by date order by date desc;
Result(cost 3s):
+---------------------+------------+
|        DATE         |     SV     |
+---------------------+------------+
| 2014-03-23          | 2795341    |
| 2014-03-22          | 3111076    |
| 2014-03-21          | 3588108    |
| 2014-03-20          | 5972243    |
| 2014-03-19          | 5192865    |
| 2014-03-18          | 2848761    |
| 2014-03-17          | 8922       |
+---------------------+------------+

Query3:
select a.date,sum(a.vv) as sv from YK.VIDEO_SUMMARY as a inner join 
YK.VIDEO_META as b on (b.showid=99299 and a.videoid=b.videoid) where 
a.date>=to_date('2014-03-17','yyyy-MM-dd') and 
a.date<=to_date('2014-03-23','yyyy-MM-dd') group by a.date order by a.date desc;
Result:
{color:red}Not return results,the server load is high.I kill this query.{color}

h3.The execution plan:
Query2:
+------------+
|    PLAN    |
+------------+
| CLIENT PARALLEL 90-WAY SKIP SCAN ON 360 RANGES OVER YK.VIDEO_SUMMARY 
[0,137102991,'2014-03-17 00:00:00.000'] - [29,171874661,'2014-03-23 
00:00:00.000'] |
|     SERVER AGGREGATE INTO DISTINCT ROWS BY [DATE] |
| CLIENT MERGE SORT |
| CLIENT SORTED BY [DATE DESC] |
+------------+

Query3:
+------------+
|    PLAN    |
+------------+
| CLIENT PARALLEL 240-WAY FULL SCAN OVER YK.VIDEO_SUMMARY |
|     SERVER FILTER BY (DATE >= '2014-03-17 00:00:00.000' AND DATE <= 
'2014-03-23 00:00:00.000') |
|     SERVER AGGREGATE INTO DISTINCT ROWS BY [DATE] |
| CLIENT MERGE SORT |
| CLIENT SORTED BY [DATE DESC] |
|     PARALLEL EQUI-JOIN 1 HASH TABLES: |
|     BUILD HASH TABLE 0 (SKIP MERGE) |
|         CLIENT PARALLEL 60-WAY FULL SCAN OVER YK.VIDEO_META |
|             SERVER FILTER BY FIRST KEY ONLY AND SHOWID = 99299 |
|         CLIENT MERGE SORT |
+------------+

{color:blue}Table YK.VIDEO_META is small. Query 3 with join is a full scan,it's 
performance with is very poor!Is there any other suggestion?{color}

  was:
h3.Table DDL
Table1:
create table if not exists yk.video_summary
(
videoid integer not null,
date date not null,
platform varchar not null,
device varchar not null,
systemgroup varchar not null,
system varchar not null,
vv bigint
constraint pk primary key (videoid, date,platform, device, systemgroup,system)
)salt_buckets = 30,versions=1,compression='snappy';

Table2:
create table if not exists yk.video_meta(
     videoid integer  not null,
     showid integer  not null,
     title varchar  not null,
     showvideotype varchar  not null,
     publishtime date not null
     constraint pk primary key (videoid,showid,showvideotype,publishtime)
) salt_buckets = 10,versions=1,compression='snappy';

h3.Queries
Query1:
select videoid from YK.VIDEO_META where showid=99299;
Result:
+------------+
|  VIDEOID   |
+------------+
| 137102991  |
| 151113895  |
| 171559204  |
| 171559439  |
| 171573932  |
| 171574082  |
| 171574164  |
| 171643206  |
| 171677219  |
| 171764188  |
| 171794335  |
| 171874661  |
+------------+

Query2:
select date,sum(vv) as sv from YK.VIDEO_SUMMARY where videoid in 
(137102991,151113895,171559204,171559439,171573932,171574082,171574164,171643206,171677219,171764188,171794335,171874661)
 and date>=to_date('2014-03-17','yyyy-MM-dd') and 
date<=to_date('2014-03-23','yyyy-MM-dd') group by date order by date desc;
Result(cost 3s):
+---------------------+------------+
|        DATE         |     SV     |
+---------------------+------------+
| 2014-03-23          | 2795341    |
| 2014-03-22          | 3111076    |
| 2014-03-21          | 3588108    |
| 2014-03-20          | 5972243    |
| 2014-03-19          | 5192865    |
| 2014-03-18          | 2848761    |
| 2014-03-17          | 8922       |
+---------------------+------------+

Query3:
select a.date,sum(a.vv) as sv from YK.VIDEO_SUMMARY as a inner join 
YK.VIDEO_META as b on (b.showid=99299 and a.videoid=b.videoid) where 
a.date>=to_date('2014-03-17','yyyy-MM-dd') and 
a.date<=to_date('2014-03-23','yyyy-MM-dd') group by a.date order by a.date desc;
Result:
{color:red}Not return results,the server load is high.I kill this query.{color}

h3.The execution plan:
Query2:
+------------+
|    PLAN    |
+------------+
| CLIENT PARALLEL 90-WAY SKIP SCAN ON 360 RANGES OVER YK.VIDEO_SUMMARY 
[0,137102991,'2014-03-17 00:00:00.000'] - [29,171874661,'2014-03-23 
00:00:00.000'] |
|     SERVER AGGREGATE INTO DISTINCT ROWS BY [DATE] |
| CLIENT MERGE SORT |
| CLIENT SORTED BY [DATE DESC] |
+------------+

Query3:
+------------+
|    PLAN    |
+------------+
| CLIENT PARALLEL 240-WAY FULL SCAN OVER YK.VIDEO_SUMMARY |
|     SERVER FILTER BY (DATE >= '2014-03-17 00:00:00.000' AND DATE <= 
'2014-03-23 00:00:00.000') |
|     SERVER AGGREGATE INTO DISTINCT ROWS BY [DATE] |
| CLIENT MERGE SORT |
| CLIENT SORTED BY [DATE DESC] |
|     PARALLEL EQUI-JOIN 1 HASH TABLES: |
|     BUILD HASH TABLE 0 (SKIP MERGE) |
|         CLIENT PARALLEL 60-WAY FULL SCAN OVER YK.VIDEO_META |
|             SERVER FILTER BY FIRST KEY ONLY AND SHOWID = 99299 |
|         CLIENT MERGE SORT |
+------------+

Table YK.VIDEO_META is small. Query 3 with join is a full scan,it's performance 
with is very poor!
Is there any other suggestion?


> Query performance with Join feature is poor
> -------------------------------------------
>
>                 Key: PHOENIX-889
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-889
>             Project: Phoenix
>          Issue Type: Improvement
>    Affects Versions: 3.0.0
>            Reporter: yang ming
>
> h3.Table DDL
> Table1:
> create table if not exists yk.video_summary
> (
> videoid integer not null,
> date date not null,
> platform varchar not null,
> device varchar not null,
> systemgroup varchar not null,
> system varchar not null,
> vv bigint
> constraint pk primary key (videoid, date,platform, device, systemgroup,system)
> )salt_buckets = 30,versions=1,compression='snappy';
> Table2:
> create table if not exists yk.video_meta(
>      videoid integer  not null,
>      showid integer  not null,
>      title varchar  not null,
>      showvideotype varchar  not null,
>      publishtime date not null
>      constraint pk primary key (videoid,showid,showvideotype,publishtime)
> ) salt_buckets = 10,versions=1,compression='snappy';
> h3.Queries
> Query1:
> select videoid from YK.VIDEO_META where showid=99299;
> Result:
> +------------+
> |  VIDEOID   |
> +------------+
> | 137102991  |
> | 151113895  |
> | 171559204  |
> | 171559439  |
> | 171573932  |
> | 171574082  |
> | 171574164  |
> | 171643206  |
> | 171677219  |
> | 171764188  |
> | 171794335  |
> | 171874661  |
> +------------+
> Query2:
> select date,sum(vv) as sv from YK.VIDEO_SUMMARY where videoid in 
> (137102991,151113895,171559204,171559439,171573932,171574082,171574164,171643206,171677219,171764188,171794335,171874661)
>  and date>=to_date('2014-03-17','yyyy-MM-dd') and 
> date<=to_date('2014-03-23','yyyy-MM-dd') group by date order by date desc;
> Result(cost 3s):
> +---------------------+------------+
> |        DATE         |     SV     |
> +---------------------+------------+
> | 2014-03-23          | 2795341    |
> | 2014-03-22          | 3111076    |
> | 2014-03-21          | 3588108    |
> | 2014-03-20          | 5972243    |
> | 2014-03-19          | 5192865    |
> | 2014-03-18          | 2848761    |
> | 2014-03-17          | 8922       |
> +---------------------+------------+
> Query3:
> select a.date,sum(a.vv) as sv from YK.VIDEO_SUMMARY as a inner join 
> YK.VIDEO_META as b on (b.showid=99299 and a.videoid=b.videoid) where 
> a.date>=to_date('2014-03-17','yyyy-MM-dd') and 
> a.date<=to_date('2014-03-23','yyyy-MM-dd') group by a.date order by a.date 
> desc;
> Result:
> {color:red}Not return results,the server load is high.I kill this 
> query.{color}
> h3.The execution plan:
> Query2:
> +------------+
> |    PLAN    |
> +------------+
> | CLIENT PARALLEL 90-WAY SKIP SCAN ON 360 RANGES OVER YK.VIDEO_SUMMARY 
> [0,137102991,'2014-03-17 00:00:00.000'] - [29,171874661,'2014-03-23 
> 00:00:00.000'] |
> |     SERVER AGGREGATE INTO DISTINCT ROWS BY [DATE] |
> | CLIENT MERGE SORT |
> | CLIENT SORTED BY [DATE DESC] |
> +------------+
> Query3:
> +------------+
> |    PLAN    |
> +------------+
> | CLIENT PARALLEL 240-WAY FULL SCAN OVER YK.VIDEO_SUMMARY |
> |     SERVER FILTER BY (DATE >= '2014-03-17 00:00:00.000' AND DATE <= 
> '2014-03-23 00:00:00.000') |
> |     SERVER AGGREGATE INTO DISTINCT ROWS BY [DATE] |
> | CLIENT MERGE SORT |
> | CLIENT SORTED BY [DATE DESC] |
> |     PARALLEL EQUI-JOIN 1 HASH TABLES: |
> |     BUILD HASH TABLE 0 (SKIP MERGE) |
> |         CLIENT PARALLEL 60-WAY FULL SCAN OVER YK.VIDEO_META |
> |             SERVER FILTER BY FIRST KEY ONLY AND SHOWID = 99299 |
> |         CLIENT MERGE SORT |
> +------------+
> {color:blue}Table YK.VIDEO_META is small. Query 3 with join is a full 
> scan,it's performance with is very poor!Is there any other suggestion?{color}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to