Hi,

I have some problems in saving a union of two tables as compressed file. My
Hive query is as follows:

# begin of query

set mapred.reduce.tasks=40;

DROP TABLE f_table;

CREATE TABLE f_table
(
  col1 string,
  col2 string,
  col3 string
)
CLUSTERED BY  col1 SORTED BY  (col1) into 16 buckets
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/user/root/data/f_table';

drop table f_table1;

create table f_table1 like f_table;

INSERT OVERWRITE TABLE f_table1
SELECT
  col1,
  col2,
  count(1)
FROM
  some_table
GROUP BY
  col1;

drop table f_table2;
create table f_table2 like f_table;
INSERT OVERWRITE TABLE f_table2
SELECT
  col1
  'CONSTANT' as col2,
  count(1)
FROM
  some_table
GROUP BY
  col1;

INSERT OVERWRITE TABLE f_table
SELECT * from
( select * from f_table1 union all
  select * from f_table2
) union_data;

# end of query

Only ONE uncompressed file is generated and stored under HDFS directory
'/user/root/data/f_table'. But there are supposed to be multiple compressed
files generated.

Anyone can help? Your help is highly appreciated.

Best regards,

Ping

Reply via email to