[
https://issues.apache.org/jira/browse/DRILL-3891?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Khurram Faraaz updated DRILL-3891:
----------------------------------
Fix Version/s: (was: 1.3.0)
Future
> ROW_KEY filter IN(integer values) does not get pushed in to Scan
> ----------------------------------------------------------------
>
> Key: DRILL-3891
> URL: https://issues.apache.org/jira/browse/DRILL-3891
> Project: Apache Drill
> Issue Type: Bug
> Components: Execution - Flow
> Affects Versions: 1.2.0
> Environment: 4 node cluster CentOS
> Reporter: Khurram Faraaz
> Assignee: Smidth Panchamia
> Fix For: Future
>
>
> ROW_KEY filter does not get pushed into Scan when filter involved IN (integer
> values). Data inserted into HBase table is byte ordered and encoded as Int32.
> case 1) NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
> {code}
> 0: jdbc:drill:schema=dfs.tmp> explain plan for select
> convert_from(row_key,'INT_OB') as rk,
> convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where
> convert_from(row_key,'INT_OB') NOT IN
> (8388607,2147483647,67108863,-536870912,-2147483648);
> +------+------+
> | text | json |
> +------+------+
> | 00-00 Screen
> 00-01 Project(rk=[CONVERT_FROMINT_OB($0)],
> val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
> 00-02 SelectionVectorRemover
> 00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'),
> 8388607), =(CONVERT_FROM($0, 'INT_OB'), 2147483647), =(CONVERT_FROM($0,
> 'INT_OB'), 67108863), =(CONVERT_FROM($0, 'INT_OB'), -536870912),
> =(CONVERT_FROM($0, 'INT_OB'), -2147483648)))])
> 00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
> [tableName=int_Tbl, startRow=null, stopRow=null, filter=null],
> columns=[`*`]]])
> {code}
> case 2) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
> {code}
> 0: jdbc:drill:schema=dfs.tmp> explain plan for select
> convert_from(row_key,'INT_OB') as rk,
> convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where
> convert_from(row_key,'INT_OB') NOT IN
> ('8388607','2147483647','67108863','-536870912','-2147483648');
> +------+------+
> | text | json |
> +------+------+
> | 00-00 Screen
> 00-01 Project(rk=[CONVERT_FROMINT_OB($0)],
> val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
> 00-02 SelectionVectorRemover
> 00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'),
> '8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0,
> 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'),
> =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
> 00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
> [tableName=int_Tbl, startRow=null, stopRow=null, filter=null],
> columns=[`*`]]])
> {code}
> case 3) NOT IN (cast('8388607' as int),cast('2147483647' as
> int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as
> int));
> {code}
> 0: jdbc:drill:schema=dfs.tmp> explain plan for select
> convert_from(row_key,'INT_OB') as rk,
> convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where
> convert_from(row_key,'INT_OB') NOT IN (cast('8388607' as
> int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as
> int),cast('-2147483648' as int));
> +------+------+
> | text | json |
> +------+------+
> | 00-00 Screen
> 00-01 Project(rk=[CONVERT_FROMINT_OB($0)],
> val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
> 00-02 SelectionVectorRemover
> 00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'),
> CAST('8388607'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
> CAST('2147483647'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
> CAST('67108863'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
> CAST('-536870912'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
> CAST('-2147483648'):INTEGER NOT NULL)))])
> 00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
> [tableName=int_Tbl, startRow=null, stopRow=null, filter=null],
> columns=[`*`]]])
> {code}
> Data inserted into HBase table
> {code}
> int[] arr =
> {Integer.MIN_VALUE,Integer.MIN_VALUE/4,Integer.MIN_VALUE/8,Integer.MIN_VALUE/16,Integer.MIN_VALUE/32,Integer.MIN_VALUE/64,Integer.MIN_VALUE/128,Integer.MAX_VALUE,Integer.MAX_VALUE/4,Integer.MAX_VALUE/8,Integer.MAX_VALUE/16,Integer.MAX_VALUE/32,Integer.MAX_VALUE/64,Integer.MAX_VALUE/128,Integer.MAX_VALUE/256,Integer.MAX_VALUE};
> for (int i = 0; i < arr.length; i++) {
> byte[] bytes = new byte[5];
> org.apache.hadoop.hbase.util.PositionedByteRange br =
> new
> org.apache.hadoop.hbase.util.SimplePositionedByteRange(bytes, 0, 5);
> org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, arr[i],
> org.apache.hadoop.hbase.util.Order.ASCENDING);
> Put p = new Put(bytes);
>
> p.add(Bytes.toBytes("colfam1"),Bytes.toBytes("qual1"),String.format("value
> %d", i).getBytes());
> table.put(p);
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)