Namit Jain created HIVE-4096:
--------------------------------

             Summary: problem in hive.map.groupby.sorted with distincts
                 Key: HIVE-4096
                 URL: https://issues.apache.org/jira/browse/HIVE-4096
             Project: Hive
          Issue Type: Bug
          Components: Query Processor
            Reporter: Namit Jain


set hive.enforce.bucketing = true;
set hive.enforce.sorting = true;
set hive.exec.reducers.max = 10;
set hive.map.groupby.sorted=true;

CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;

LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1');

-- perform an insert to make sure there are 2 files
INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = 
'1';

CREATE TABLE outputTbl1(cnt INT);

-- The plan should be converted to a map-side group by, since the
-- sorting columns and grouping columns match, and all the bucketing columns
-- are part of sorting columns
EXPLAIN
select count(distinct key) from T1;

select count(distinct key) from T1;

explain
INSERT OVERWRITE TABLE outputTbl1
select count(distinct key) from T1;

INSERT OVERWRITE TABLE outputTbl1
select count(distinct key) from T1;

SELECT * FROM outputTbl1;

DROP TABLE T1;


The above query gives wrong results

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to