[
https://issues.apache.org/jira/browse/HIVE-20683?focusedWorklogId=308712&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-308712
]
ASF GitHub Bot logged work on HIVE-20683:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 09/Sep/19 09:50
Start Date: 09/Sep/19 09:50
Worklog Time Spent: 10m
Work Description: b-slim commented on pull request #723: [HIVE-20683] Add
the Ability to push Dynamic Between and Bloom filters to Druid
URL: https://github.com/apache/hive/pull/723#discussion_r322155098
##########
File path:
ql/src/test/queries/clientpositive/druidmini_semijoin_reduction_all_types.q
##########
@@ -0,0 +1,144 @@
+--! qt:dataset:srcpart
+--! qt:dataset:druid_table_alltypesorc
+--! qt:dataset:alltypesorc
+
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+set hive.disable.unsafe.external.table.operations=false;
+set hive.tez.dynamic.semijoin.reduction.for.mapjoin=true;
+
+DROP TABLE IF EXISTS alltypesorc_small;
+CREATE TABLE alltypesorc_small(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ cboolean1 BOOLEAN,
+ cboolean2 BOOLEAN)
+ STORED AS ORC;
+Insert into table alltypesorc_small
+Select ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cstring1,
cstring2, cast(`__time` as timestamp), cboolean1, cboolean2 from
druid_table_alltypesorc where cstring2 like '%a%' and cstring1 like '%a%';
+Select count(*) from alltypesorc_small;
+Select count(*) from druid_table_alltypesorc;
+
+DESCRIBE druid_table_alltypesorc;
+DESCRIBE alltypesorc_small;
+
+-- Test Joins on all column types one by one
+-- String
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cstring1 = druid_table_alltypesorc.cstring1);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cstring1 = druid_table_alltypesorc.cstring1);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cstring1 = druid_table_alltypesorc.cstring1);
+
+-- tinyint
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.ctinyint = druid_table_alltypesorc.ctinyint);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.ctinyint = druid_table_alltypesorc.ctinyint);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.ctinyint = druid_table_alltypesorc.ctinyint);
+
+-- smallint
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.csmallint = druid_table_alltypesorc.csmallint);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.csmallint = druid_table_alltypesorc.csmallint);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.csmallint = druid_table_alltypesorc.csmallint);
+
+-- int
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cint = druid_table_alltypesorc.cint);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cint = druid_table_alltypesorc.cint);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cint = druid_table_alltypesorc.cint);
+
+-- bigint
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cbigint = druid_table_alltypesorc.cbigint);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cbigint = druid_table_alltypesorc.cbigint);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cbigint = druid_table_alltypesorc.cbigint);
+
+-- float
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cfloat = druid_table_alltypesorc.cfloat);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cfloat = druid_table_alltypesorc.cfloat);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cfloat = druid_table_alltypesorc.cfloat);
+
+-- double
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cdouble = druid_table_alltypesorc.cdouble);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cdouble = druid_table_alltypesorc.cdouble);
+set hive.disable.unsafe.external.table.operations=true;
+
+-- timestamp
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.ctimestamp1 = druid_table_alltypesorc.`__time`);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.ctimestamp1 = cast(druid_table_alltypesorc.`__time` as
timestamp));
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.ctimestamp1 = cast(druid_table_alltypesorc.`__time` as
timestamp));
+
+-- boolean
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cboolean1 = druid_table_alltypesorc.cboolean1);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cboolean1 = druid_table_alltypesorc.cboolean1);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(alltypesorc_small.cboolean1 = druid_table_alltypesorc.cboolean1);
+
+
+-- Test Casts
+
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cint as string) = druid_table_alltypesorc.cintstring);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cint as string) = druid_table_alltypesorc.cintstring);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cint as string) = druid_table_alltypesorc.cintstring);
+
+
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cdouble as string) =
druid_table_alltypesorc.cdoublestring);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cdouble as string) =
druid_table_alltypesorc.cdoublestring);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cdouble as string) =
druid_table_alltypesorc.cdoublestring);
+
+
+set hive.disable.unsafe.external.table.operations=false;
+EXPLAIN select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cfloat as string) =
druid_table_alltypesorc.cfloatstring);
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cfloat as string) =
druid_table_alltypesorc.cfloatstring);
+set hive.disable.unsafe.external.table.operations=true;
+select count(*) from alltypesorc_small join druid_table_alltypesorc on
(cast(alltypesorc_small.cfloat as string) =
druid_table_alltypesorc.cfloatstring);
+
+
+
+
+
+
+
+
Review comment:
please remove all those blank lines
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 308712)
Time Spent: 3h 40m (was: 3.5h)
> Add the Ability to push Dynamic Between and Bloom filters to Druid
> ------------------------------------------------------------------
>
> Key: HIVE-20683
> URL: https://issues.apache.org/jira/browse/HIVE-20683
> Project: Hive
> Issue Type: New Feature
> Components: Druid integration
> Reporter: Nishant Bangarwa
> Assignee: Nishant Bangarwa
> Priority: Major
> Labels: pull-request-available
> Attachments: HIVE-20683.1.patch, HIVE-20683.2.patch,
> HIVE-20683.3.patch, HIVE-20683.4.patch, HIVE-20683.5.patch,
> HIVE-20683.6.patch, HIVE-20683.8.patch, HIVE-20683.patch
>
> Time Spent: 3h 40m
> Remaining Estimate: 0h
>
> For optimizing joins, Hive generates BETWEEN filter with min-max and BLOOM
> filter for filtering one side of semi-join.
> Druid 0.13.0 will have support for Bloom filters (Added via
> https://github.com/apache/incubator-druid/pull/6222)
> Implementation details -
> # Hive generates and passes the filters as part of 'filterExpr' in TableScan.
> # DruidQueryBasedRecordReader gets this filter passed as part of the conf.
> # During execution phase, before sending the query to druid in
> DruidQueryBasedRecordReader we will deserialize this filter, translate it
> into a DruidDimFilter and add it to existing DruidQuery. Tez executor
> already ensures that when we start reading results from the record reader,
> all the dynamic values are initialized.
> # Explaining a druid query also prints the query sent to druid as
> {{druid.json.query}}. We also need to make sure to update the druid query
> with the filters. During explain we do not have the actual values for the
> dynamic values, so instead of values we will print the dynamic expression
> itself as part of druid query.
> Note:- This work needs druid to be updated to version 0.13.0
--
This message was sent by Atlassian Jira
(v8.3.2#803003)