Khurram Faraaz created DRILL-3891:
-------------------------------------
Summary: ROW_KEY filter IN(integer values) does not get pushed in
to Scan
Key: DRILL-3891
URL: https://issues.apache.org/jira/browse/DRILL-3891
Project: Apache Drill
Issue Type: Bug
Components: Execution - Flow
Affects Versions: 1.2.0
Environment: 4 node cluster CentOS
Reporter: Khurram Faraaz
Assignee: Smidth Panchamia
ROW_KEY filter does not get pushed into Scan when filter involved IN (integer
values). Data inserted into HBase table is byte ordered and encoded as Int32.
case 1) NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select
convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8')
val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN
(8388607,2147483647,67108863,-536870912,-2147483648);
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1,
'qual1'))])
00-02 SelectionVectorRemover
00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), 8388607),
=(CONVERT_FROM($0, 'INT_OB'), 2147483647), =(CONVERT_FROM($0, 'INT_OB'),
67108863), =(CONVERT_FROM($0, 'INT_OB'), -536870912), =(CONVERT_FROM($0,
'INT_OB'), -2147483648)))])
00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
[tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}
case 2) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select
convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8')
val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN
('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1,
'qual1'))])
00-02 SelectionVectorRemover
00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'),
'8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0,
'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'),
=(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
[tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}
case 3) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select
convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8')
val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN
('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1,
'qual1'))])
00-02 SelectionVectorRemover
00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'),
'8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0,
'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'),
=(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
[tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}
case 4) NOT IN (cast('8388607' as int),cast('2147483647' as
int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as
int));
{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select
convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8')
val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (cast('8388607'
as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as
int),cast('-2147483648' as int));
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1,
'qual1'))])
00-02 SelectionVectorRemover
00-03 Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'),
CAST('8388607'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
CAST('2147483647'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
CAST('67108863'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
CAST('-536870912'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'),
CAST('-2147483648'):INTEGER NOT NULL)))])
00-04 Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec
[tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}
Data inserted into HBase table
{code}
int[] arr =
{Integer.MIN_VALUE,Integer.MIN_VALUE/4,Integer.MIN_VALUE/8,Integer.MIN_VALUE/16,Integer.MIN_VALUE/32,Integer.MIN_VALUE/64,Integer.MIN_VALUE/128,Integer.MAX_VALUE,Integer.MAX_VALUE/4,Integer.MAX_VALUE/8,Integer.MAX_VALUE/16,Integer.MAX_VALUE/32,Integer.MAX_VALUE/64,Integer.MAX_VALUE/128,Integer.MAX_VALUE/256,Integer.MAX_VALUE};
for (int i = 0; i < arr.length; i++) {
byte[] bytes = new byte[5];
org.apache.hadoop.hbase.util.PositionedByteRange br =
new
org.apache.hadoop.hbase.util.SimplePositionedByteRange(bytes, 0, 5);
org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, arr[i],
org.apache.hadoop.hbase.util.Order.ASCENDING);
Put p = new Put(bytes);
p.add(Bytes.toBytes("colfam1"),Bytes.toBytes("qual1"),String.format("value %d",
i).getBytes());
table.put(p);
}
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)