Hi Experts,

           I need one solution for hive in my project. Actually I have to
create a pivot table type of concept in hive. I don't know how to do the
same. But I have studied and implemented in hive for the same. But by
looking into it, I am not satisfied with the solution and I am not knowing
about the performance overhead also.  You people please inform me whether
any other way better than this to implement pivot table in hive.

I have referenced the following site for understanding the basic concept of
pivot table.

http://blogs.msdn.com/b/spike/archive/2009/03/03/pivot-tables-in-sql-server-a-simple-sample.aspx


I am attaching the input file and hive implementation which I have done for
the same.

Attachments:

1. testfile_pivot.txt : This file contains the input data for creating table
for both sql server and hive. You please remove the heading and the under
line and load data into hive
2. pivottable.txt : contains sql server command for creating table,
populating table and pivot table creation and hive command for table
creation. loading data into hive and .
                          pivot  table creation statements etc


                         Please reply me your suggestions.



-- 
Thanks and Regards
Reju. R
VendorId   IncomeDay  IncomeAmount
---------- ---------- ------------
SPIKE       FRI                 100
SPIKE           MON                     300
FREDS           SUN                     400
SPIKE           WED                     500
SPIKE           TUE                     200
JOHNS           WED                     900
SPIKE           FRI                     100
JOHNS           MON                     300
SPIKE           SUN                     400
JOHNS           FRI                     300
FREDS           TUE                     200
SPIKE           MON                     900
FREDS           FRI                     900
FREDS           MON                     500
JOHNS           SUN                     600
SPIKE           FRI                     300
SPIKE           FRI                     300
SPIKE           WED                     500
SPIKE           FRI                     300
JOHNS           THU                     800
JOHNS           SAT                     800
SPIKE           TUE                     100
SPIKE           THU                     300
FREDS           WED                     500
SPIKE           SAT                     100
FREDS           SAT                     500
FREDS           THU                     800
JOHNS           TUE                     600
-- PIVOT TABLE CONCEPT IN SQL SERVER

create table DailyIncome(VendorId nvarchar(10), IncomeDay nvarchar(10), 
IncomeAmount int);

--populating the sql server database
insert into DailyIncome values ('SPIKE', 'FRI', 100);

insert into DailyIncome values ('SPIKE', 'MON', 300);

insert into DailyIncome values ('FREDS', 'SUN', 400);

insert into DailyIncome values ('SPIKE', 'WED', 500);

insert into DailyIncome values ('SPIKE', 'TUE', 200);

insert into DailyIncome values ('JOHNS', 'WED', 900);

insert into DailyIncome values ('SPIKE', 'FRI', 100);

insert into DailyIncome values ('JOHNS', 'MON', 300);

insert into DailyIncome values ('SPIKE', 'SUN', 400);

insert into DailyIncome values ('JOHNS', 'FRI', 300);

insert into DailyIncome values ('FREDS', 'TUE', 500);

insert into DailyIncome values ('FREDS', 'TUE', 200);

insert into DailyIncome values ('SPIKE', 'MON', 900);

insert into DailyIncome values ('FREDS', 'FRI', 900);

insert into DailyIncome values ('FREDS', 'MON', 500);

insert into DailyIncome values ('JOHNS', 'SUN', 600);

insert into DailyIncome values ('SPIKE', 'FRI', 300);

insert into DailyIncome values ('SPIKE', 'WED', 500);

insert into DailyIncome values ('SPIKE', 'FRI', 300);

insert into DailyIncome values ('JOHNS', 'THU', 800);

insert into DailyIncome values ('JOHNS', 'SAT', 800);

insert into DailyIncome values ('SPIKE', 'TUE', 100);

insert into DailyIncome values ('SPIKE', 'THU', 300);

insert into DailyIncome values ('FREDS', 'WED', 500);

insert into DailyIncome values ('SPIKE', 'SAT', 100);

insert into DailyIncome values ('FREDS', 'SAT', 500);

insert into DailyIncome values ('FREDS', 'THU', 800);

insert into DailyIncome values ('JOHNS', 'TUE', 600);


select * from DailyIncome
pivot (avg (IncomeAmount) for IncomeDay in 
([SUN],[MON],[TUE],[WED],[THU],[FRI],[SAT])) as AvgIncomePerDay
 
 
 --PIVOT TABLE CONCEPT IN HIVE
 
CREATE TABLE pivot_source (vendorid string, incomeday string, incomeamount int) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED 
AS textfile;

LOAD DATA INPATH '/userdata/rejur/test/testfile_pivot.txt' INTO TABLE 
pivot_source;

CREATE TABLE pivottest (vendorid string,SUN string, MON string, TUE string, WED 
string,THU string, FRI string, SAT string) ROW FORMAT DELIMITED FIELDS 
TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED AS textfile; 


     
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT vendorid, '1', '2', '3', 
'4','5', '6', '7' FROM pivot_source;


INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, 
source.avgincome AS SUN, dest.MON, dest.TUE, dest.WED, dest.THU, dest.FRI, 
dest.SAT FROM pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS 
avgincome FROM pivot_source WHERE incomeday='SUN' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, dest.SUN, 
source.avgincome AS MON, dest.TUE, dest.WED, dest.THU, dest.FRI, dest.SAT FROM 
pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS avgincome FROM 
pivot_source WHERE incomeday='MON' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, dest.SUN, 
dest.MON, source.avgincome AS TUE, dest.WED, dest.THU, dest.FRI, dest.SAT FROM 
pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS avgincome FROM 
pivot_source WHERE incomeday='TUE' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, dest.SUN, 
dest.MON, dest.TUE, source.avgincome AS WED, dest.THU, dest.FRI, dest.SAT FROM 
pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS avgincome FROM 
pivot_source WHERE incomeday='WED' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, dest.SUN, 
dest.MON, dest.TUE, dest.WED, source.avgincome AS THU, dest.FRI, dest.SAT FROM 
pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS avgincome FROM 
pivot_source WHERE incomeday='THU' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, dest.SUN, 
dest.MON, dest.TUE, dest.WED, dest.THU, source.avgincome AS FRI, dest.SAT FROM 
pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS avgincome FROM 
pivot_source WHERE incomeday='FRI' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   
INSERT OVERWRITE TABLE pivottest SELECT DISTINCT dest.vendorid, dest.SUN, 
dest.MON, dest.TUE, dest.WED, dest.THU, dest.FRI, source.avgincome AS SAT FROM 
pivottest dest JOIN (SELECT vendorid, avg(incomeamount) AS avgincome FROM 
pivot_source WHERE incomeday='SAT' group by vendorid) source ON 
(dest.vendorid=source.vendorid);   

--PIVOT TABLE CONCEPT IN HIVE

Reply via email to