[
https://issues.apache.org/jira/browse/PHOENIX-2894?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Maryann Xue resolved PHOENIX-2894.
----------------------------------
Resolution: Fixed
> Sort-merge join works incorrectly with DESC columns
> ---------------------------------------------------
>
> Key: PHOENIX-2894
> URL: https://issues.apache.org/jira/browse/PHOENIX-2894
> Project: Phoenix
> Issue Type: Bug
> Affects Versions: 4.7.0
> Reporter: Pierre Lacave
> Assignee: Maryann Xue
> Fix For: 4.8.0
>
> Attachments: PHOENIX-2894.patch, PHOENIX-2894_2.patch,
> PHOENIX-2894_alt.patch
>
>
> Hi
> I am seeing weird result with joins where the output seems to be incomplete
> I tried to summarise the problem with the queries bellow.
> in query 1, I do a join over a period for which I would have expected to
> return a dozen of rows, but only one is returned for a time T1,
> in query 2, I do the same join but filtering for one of the missing row at
> time T2 which is now returned ?!
> I re-ran query 1 to make sure it was not a timing issue, but had the same
> wrong partial result.
> Thanks
> The two tables are using a salt of 2.
> Using Phoenix 4.7, Hbase 1.1
> Query #1
> {noformat}
> 0: jdbc:phoenix:localhost:2181> SELECT C.BUCKET, C.TIMESTAMP FROM (
> . . . . . . . . . . . . . . . > SELECT E.BUCKET as BUCKET, L.BUCKET as
> LBUCKET, E.TIMESTAMP as TIMESTAMP, L.TIMESTAMP as LTIMESTAMP FROM
> . . . . . . . . . . . . . . . > (SELECT BUCKET, TIMESTAMP FROM
> EVENT_COUNT
> . . . . . . . . . . . . . . . > WHERE BUCKET = '5SEC' AND
> LOCATION = 'Tr/Bal'
> . . . . . . . . . . . . . . . > AND TIMESTAMP <=
> 1462993520000000000 AND TIMESTAMP > 1462993420000000000
> . . . . . . . . . . . . . . . > GROUP BY BUCKET, TIMESTAMP,
> LOCATION
> . . . . . . . . . . . . . . . > ) E
> . . . . . . . . . . . . . . . > JOIN
> . . . . . . . . . . . . . . . > (SELECT BUCKET, TIMESTAMP FROM
> EVENT_LATENCY
> . . . . . . . . . . . . . . . > WHERE BUCKET = '5SEC' AND
> SRC_LOCATION = 'Tr/Bal' AND SRC_LOCATION = DST_LOCATION
> . . . . . . . . . . . . . . . > AND TIMESTAMP <=
> 1462993520000000000 AND TIMESTAMP > 1462993420000000000
> . . . . . . . . . . . . . . . > GROUP BY BUCKET, TIMESTAMP,
> SRC_LOCATION, DST_LOCATION
> . . . . . . . . . . . . . . . > ) L
> . . . . . . . . . . . . . . . > ON L.BUCKET = E.BUCKET AND L.TIMESTAMP =
> E.TIMESTAMP
> . . . . . . . . . . . . . . . > ) C
> . . . . . . . . . . . . . . . > GROUP BY C.BUCKET, C.TIMESTAMP;
> +-----------+----------------------+
> | E.BUCKET | E.TIMESTAMP |
> +-----------+----------------------+
> | 5SEC | 1462993430000000000 |
> +-----------+----------------------+
> 1 row selected (0.169 seconds)
> {noformat}
> Query #2
> {noformat}
> 0: jdbc:phoenix:localhost:2181> SELECT C.BUCKET, C.TIMESTAMP FROM (
> . . . . . . . . . . . . . . . > SELECT E.BUCKET as BUCKET, L.BUCKET as
> LBUCKET, E.TIMESTAMP as TIMESTAMP, L.TIMESTAMP as LTIMESTAMP FROM
> . . . . . . . . . . . . . . . > (SELECT BUCKET, TIMESTAMP FROM
> EVENT_COUNT
> . . . . . . . . . . . . . . . > WHERE BUCKET = '5SEC' AND
> LOCATION = 'Tr/Bal'
> . . . . . . . . . . . . . . . > AND TIMESTAMP <=
> 1462993520000000000 AND TIMESTAMP > 1462993420000000000 AND TIMESTAMP =
> 1462993520000000000
> . . . . . . . . . . . . . . . > GROUP BY BUCKET, TIMESTAMP,
> LOCATION
> . . . . . . . . . . . . . . . > ) E
> . . . . . . . . . . . . . . . > JOIN
> . . . . . . . . . . . . . . . > (SELECT BUCKET, TIMESTAMP FROM
> EVENT_LATENCY
> . . . . . . . . . . . . . . . > WHERE BUCKET = '5SEC' AND
> SRC_LOCATION = 'Tr/Bal' AND SRC_LOCATION = DST_LOCATION
> . . . . . . . . . . . . . . . > AND TIMESTAMP <=
> 1462993520000000000 AND TIMESTAMP > 1462993420000000000 AND TIMESTAMP =
> 1462993520000000000
> . . . . . . . . . . . . . . . > GROUP BY BUCKET, TIMESTAMP,
> SRC_LOCATION, DST_LOCATION
> . . . . . . . . . . . . . . . > ) L
> . . . . . . . . . . . . . . . > ON L.BUCKET = E.BUCKET AND L.TIMESTAMP =
> E.TIMESTAMP
> . . . . . . . . . . . . . . . > ) C
> . . . . . . . . . . . . . . . > GROUP BY C.BUCKET, C.TIMESTAMP;
> +-----------+----------------------+
> | E.BUCKET | E.TIMESTAMP |
> +-----------+----------------------+
> | 5SEC | 1462993520000000000 |
> +-----------+----------------------+
> 1 row selected (0.081 seconds)
> {noformat}
> For reference the content of each table and the plan of each query bellow
> {noformat}
> 0: jdbc:phoenix:localhost:2181> SELECT BUCKET, TIMESTAMP as T FROM
> EVENT_LATENCY
> . . . . . . . . . . . . . . . > WHERE BUCKET = '5SEC' AND SRC_LOCATION =
> 'Tr/Bal' AND SRC_LOCATION = DST_LOCATION
> . . . . . . . . . . . . . . . > AND TIMESTAMP <= 1462993520000000000
> AND TIMESTAMP > 1462993420000000000
> . . . . . . . . . . . . . . . > GROUP BY BUCKET, TIMESTAMP, SRC_LOCATION,
> DST_LOCATION ORDER BY T DESC;
> +---------+----------------------+
> | BUCKET | T |
> +---------+----------------------+
> | 5SEC | 1462993520000000000 |
> | 5SEC | 1462993515000000000 |
> | 5SEC | 1462993510000000000 |
> | 5SEC | 1462993505000000000 |
> | 5SEC | 1462993490000000000 |
> | 5SEC | 1462993485000000000 |
> | 5SEC | 1462993480000000000 |
> | 5SEC | 1462993475000000000 |
> | 5SEC | 1462993470000000000 |
> | 5SEC | 1462993430000000000 |
> +---------+----------------------+
> {noformat}
> {noformat}
> 0: jdbc:phoenix:localhost:2181> SELECT BUCKET, TIMESTAMP as T FROM
> EVENT_COUNT
> . . . . . . . . . . . . . . . > WHERE BUCKET = '5SEC' AND LOCATION =
> 'Tr/Bal'
> . . . . . . . . . . . . . . . > AND TIMESTAMP <= 1462993520000000000
> AND TIMESTAMP > 1462993420000000000
> . . . . . . . . . . . . . . . > GROUP BY BUCKET, TIMESTAMP, LOCATION
> order BY T DESC;
> +---------+----------------------+
> | BUCKET | T |
> +---------+----------------------+
> | 5SEC | 1462993520000000000 |
> | 5SEC | 1462993515000000000 |
> | 5SEC | 1462993510000000000 |
> | 5SEC | 1462993505000000000 |
> | 5SEC | 1462993500000000000 |
> | 5SEC | 1462993495000000000 |
> | 5SEC | 1462993490000000000 |
> | 5SEC | 1462993485000000000 |
> | 5SEC | 1462993480000000000 |
> | 5SEC | 1462993475000000000 |
> | 5SEC | 1462993470000000000 |
> | 5SEC | 1462993465000000000 |
> | 5SEC | 1462993460000000000 |
> | 5SEC | 1462993455000000000 |
> | 5SEC | 1462993450000000000 |
> | 5SEC | 1462993445000000000 |
> | 5SEC | 1462993440000000000 |
> | 5SEC | 1462993430000000000 |
> +---------+----------------------+
> {noformat}
> Query #1 plan
> {noformat}
> +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
> |
> PLAN
> |
> +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
> | SORT-MERGE-JOIN (INNER) TABLES
>
> |
> | CLIENT 2-CHUNK 963058947 ROWS 100663313890 BYTES PARALLEL 2-WAY SKIP
> SCAN ON 2 RANGES OVER EVENT_COUNT [0,'5SEC',~1462993520000000000,'Tr/Bal'] -
> [1,'5SEC',~1462993420000000000,'Tr/Bal'] |
> | SERVER FILTER BY FIRST KEY ONLY
>
> |
> | SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [BUCKET, TIMESTAMP,
> LOCATION]
> |
> | CLIENT MERGE SORT
>
> |
> | CLIENT SORTED BY [BUCKET, TIMESTAMP]
>
> |
> | AND (SKIP MERGE)
>
> |
> | CLIENT 2-CHUNK 1387870519 ROWS 167352757221 BYTES PARALLEL 2-WAY SKIP
> SCAN ON 2 RANGES OVER EVENT_LATENCY [0,'5SEC',~1462993520000000000,'Tr/Bal']
> - [1,'5SEC',~1462993420000000000,'Tr/Bal'] |
> | SERVER FILTER BY FIRST KEY ONLY AND SRC_LOCATION = DST_LOCATION
>
> |
> | SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [BUCKET, TIMESTAMP,
> SRC_LOCATION, DST_LOCATION]
> |
> | CLIENT MERGE SORT
>
> |
> | CLIENT SORTED BY [BUCKET, TIMESTAMP]
>
> |
> | CLIENT SORTED BY [E.BUCKET, E.TIMESTAMP]
>
> |
> | CLIENT AGGREGATE INTO DISTINCT ROWS BY [E.BUCKET, E.TIMESTAMP]
>
> |
> +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
> 14 rows selected (0.089 seconds)
> {noformat}
> Query #2 plan
> {noformat}
> +----------------------------------------------------------------------------------------------------------------------------------------------+
> | PLAN
> |
> +----------------------------------------------------------------------------------------------------------------------------------------------+
> | SORT-MERGE-JOIN (INNER) TABLES
> |
> | CLIENT 2-CHUNK 963058947 ROWS 100663313890 BYTES PARALLEL 2-WAY RANGE
> SCAN OVER EVENT_COUNT [0,'5SEC',~1462993520000000000,'Tr/Bal'] |
> | SERVER FILTER BY FIRST KEY ONLY
> |
> | SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [BUCKET, TIMESTAMP,
> LOCATION] |
> | CLIENT MERGE SORT
> |
> | CLIENT SORTED BY [BUCKET, TIMESTAMP]
> |
> | AND (SKIP MERGE)
> |
> | CLIENT 2-CHUNK 1387870519 ROWS 167352757221 BYTES PARALLEL 2-WAY RANGE
> SCAN OVER EVENT_LATENCY [0,'5SEC',~1462993520000000000,'Tr/Bal'] |
> | SERVER FILTER BY FIRST KEY ONLY AND SRC_LOCATION = DST_LOCATION
> |
> | SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [BUCKET, TIMESTAMP,
> SRC_LOCATION, DST_LOCATION] |
> | CLIENT MERGE SORT
> |
> | CLIENT SORTED BY [BUCKET, TIMESTAMP]
> |
> | CLIENT SORTED BY [E.BUCKET, E.TIMESTAMP]
> |
> | CLIENT AGGREGATE INTO DISTINCT ROWS BY [E.BUCKET, E.TIMESTAMP]
> |
> +----------------------------------------------------------------------------------------------------------------------------------------------+
> 14 rows selected (0.079 seconds)
> {noformat}
> possibly relevant hbase settings
> {noformat}
> <property>
> <name>phoenix.query.rowKeyOrderSaltedTable</name>
> <value>true</value>
> <description>Whether or not a non aggregate query returns rows in row key
> or
> der for salted tables. If this option is turned on, split points may not be
> spec
> ified at table create time, but instead the default splits on each salt
> bucket m
> ust be used. Default is true.</description>
> </property>
> {noformat}
> here the DDLs (replaced the irrelevant name of column by letters)
> {noformat}
> CREATE TABLE IF NOT EXISTS EVENT_COUNT (
> BUCKET VARCHAR,
> TIMESTAMP_DATE TIMESTAMP,
> TIMESTAMP UNSIGNED_LONG NOT NULL,
> LOCATION VARCHAR,
> A VARCHAR,
> B VARCHAR,
> C VARCHAR,
> D UNSIGNED_LONG,
> E FLOAT
> CONSTRAINT pk PRIMARY KEY (BUCKET, TIMESTAMP DESC, LOCATION, A, B, C)
> ) SALT_BUCKETS=2, COMPRESSION='GZ', TTL=31622400;
> CREATE TABLE IF NOT EXISTS EVENT_LATENCY (
> BUCKET VARCHAR,
> TIMESTAMP_DATE TIMESTAMP,
> TIMESTAMP UNSIGNED_LONG NOT NULL,
> SRC_LOCATION VARCHAR,
> DST_LOCATION VARCHAR,
> B VARCHAR,
> C VARCHAR,
> F UNSIGNED_LONG,
> G UNSIGNED_LONG,
> H UNSIGNED_LONG,
> I UNSIGNED_LONG
> CONSTRAINT pk PRIMARY KEY (BUCKET, TIMESTAMP DESC, SRC_LOCATION,
> DST_LOCATION, B, C)
> ) SALT_BUCKETS=2, COMPRESSION='GZ', TTL=31622400;
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)