On 06/11/2011 06:32 AM, hiba houimli wrote:
Hi,
When I translate a query sql on a script pig like this exple:
exple 1:
query sql:
select
sum(l_extendedprice*l_discount) as revenue
from
lineitem
where
l_shipdate >= date '[DATE]'
and l_shipdate < date '[DATE]' + interval '1' year
and l_discount between [DISCOUNT] - 0.01 and [DISCOUNT] + 0.01
and l_quantity < [QUANTITY];
script pig :
lineitem = LOAD '/home/hiba/tpch/lineitem.tbl' using
PigStorage('|') AS (l_orderkey:int, l_partkey:int, l_suppkey:int,
l_linenumber:int, l_quantity:int, l_extendedprice:double,
l_discount:double, l_tax:double, l_returnflag:chararray,
l_linestatus:chararray, l_shipdate:chararray, l_commitdate:chararray,
l_receiptdate:chararray, l_shipinstruct:chararray,
l_shipmode:chararray, l_comment:chararray);
filtred_line = filter lineitem by l_shipdate >= '1994-01-01' and
l_shipdate < '1995-01-01' and l_discount > 0.05 and l_discount < 0.07
and l_quantity < 24;
project_lineitem = foreach filtred_line generate SUM(l_extendedprice *
l_discount) as revenue:double;
store project_lineitem into 'OUTPUT_PATH/tpch_query6';
I have this problem: see the FJ: erreur_query_Q6.Png
lineitem = LOAD '/home/hiba/tpch/lineitem.tbl' using PigStorage('|') AS
(l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int,
l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:double,
l_returnflag:chararray, l_linestatus:chararray, l_shipdate:chararray,
l_commitdate:chararray, l_receiptdate:chararray,
l_shipinstruct:chararray, l_shipmode:chararray, l_comment:chararray);
filtred_line = filter lineitem by l_shipdate >= '1994-01-01' and
l_shipdate < '1995-01-01' and l_discount > 0.05 and l_discount < 0.07
and l_quantity < 24;
enhanced = foreach filtred_line generate
(double)l_extendedprice*(double)l_discount as l_realprice;
grouped = group enhanced all;
project_lineitem = foreach grouped generate SUM(enhanced.l_realprice) as
revenue:double;
store project_lineitem into 'OUTPUT_PATH/tpch_query6';
and exple 2:
query sql:
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = [SIZE]
and p_type like '%[TYPE]'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = '[REGION]'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp, supplier,
nation, region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = '[REGION]'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey;
script pig:
partsupp = load '/home/hiba/tpch/partsupp.tbl' using PigStorage('|')
as (ps_partkey:int, ps_suppkey:int, ps_availqty:int,
ps_supplycost:double, ps_comment:chararray);
supplier = load '/home/hiba/tpch/supplier.tbl' using PigStorage('|')
as (s_suppkey:int, s_name:chararray, s_address:chararray,
s_nationkey:int, s_phone:chararray, s_acctbal:double,
s_comment:chararray);
nation = load '/home/hiba/tpch/nation.tbl' using PigStorage('|')
as (n_nationkey:int, n_name:chararray, n_regionkey:int,
n_comment:chararray);
region = load '/home/hiba/tpch/region.tbl' using PigStorage('|')
as (r_regionkey:int, r_name:chararray, r_comment:chararray);
part = load '/home/hiba/tpch/part.tbl' using PigStorage('|')
as (p_partkey:int, p_name:chararray, p_mfgr:chararray,
p_brand:chararray, p_type:chararray, p_size:int,
p_container:chararray, p_retailprice:double, p_comment:chararray);
filter_region = filter region by r_name matches 'EUROPE.*';
joined_part_partsuppr = join partsupp by ps_partkey, part by p_partkey;
joined_supp_ps = join joined_part_partsuppr by ps_partkey, supplier by
s_suppkey;
joined_nation_supp = join nation by n_nationkey, joined_supp_ps by
s_nationkey;
join_nation_region = join filter_region by r_regionkey,
joined_nation_supp by n_regionkey;
project_join = foreach join_nation_region generate MIN(ps_supplycost)
as min_ps:double;
filter_part = filter part by p_size == 15 and p_type matches '.*BRASS';
filter_r = filter region by r_name matches 'EUROPE.*';
join_p_ps = join partsupp by ps_partkey, filter_part by p_partkey;
join_all = join join_p_ps by ps_partkey, supplier by s_suppkey;
join_s_n = join nation by n_nationkey, join_all by s_nationkey;
join_r_n = join filter_r by r_regionkey,join_s_n by n_regionkey;
grouped = cogroup join_r_n by ps_supplycost, project_join by min_ps;
filtered = filter grouped by COUNT(project_join) == COUNT(join_r_n);
project = foreach filtered generate flatten(join_r_n.s_acctbal),
flatten(join_r_n.s_name), flatten(join_r_n.n_name),
flatten(join_r_n.p_partkey), (join_r_n.p_mfgr),
flatten(join_r_n.s_address), flatten(join_r_n.s_phone),
flatten(join_r_n.s_comment);
store project into 'OUTPUT_PATH/tpch_query2';
I have this problem: see the FJ: Erreur_Query_Q2.Png
partsupp = load '/home/hiba/tpch/partsupp.tbl' using PigStorage('|')
as (ps_partkey:int, ps_suppkey:int, ps_availqty:int,
ps_supplycost:double, ps_comment:chararray);
supplier = load '/home/hiba/tpch/supplier.tbl' using PigStorage('|')
as (s_suppkey:int, s_name:chararray, s_address:chararray,
s_nationkey:int, s_phone:chararray, s_acctbal:double, s_comment:chararray);
nation = load '/home/hiba/tpch/nation.tbl' using PigStorage('|')
as (n_nationkey:int, n_name:chararray, n_regionkey:int,
n_comment:chararray);
region = load '/home/hiba/tpch/region.tbl' using PigStorage('|')
as (r_regionkey:int, r_name:chararray, r_comment:chararray);
part = load '/home/hiba/tpch/part.tbl' using PigStorage('|')
as (p_partkey:int, p_name:chararray, p_mfgr:chararray,
p_brand:chararray, p_type:chararray, p_size:int, p_container:chararray,
p_retailprice:double, p_comment:chararray);
filter_region = filter region by r_name matches 'EUROPE.*';
joined_part_partsuppr = join partsupp by ps_partkey, part by p_partkey;
joined_supp_ps = join joined_part_partsuppr by ps_partkey, supplier by
s_suppkey;
joined_nation_supp = join nation by n_nationkey, joined_supp_ps by
s_nationkey;
join_nation_region = join filter_region by r_regionkey,
joined_nation_supp by n_regionkey;
grouped = group join_nation_region all;
stats = foreach grouped generate MIN(join_nation_region.ps_supplycost)
as min_ps;
filter_part = filter part by p_size == 15 and p_type matches '.*BRASS';
filter_r = filter region by r_name matches 'EUROPE.*';
join_p_ps = join partsupp by ps_partkey, filter_part by p_partkey;
join_all = join join_p_ps by ps_partkey, supplier by s_suppkey;
join_s_n = join nation by n_nationkey, join_all by s_nationkey;
join_r_n = join filter_r by r_regionkey,join_s_n by n_regionkey;
filter_min = filter join_r_n by ps_supplycost==stats.min_ps;
ordered = order filter_min by s_acctbal desc, n_name, s_name, p_partkey;
store ordered into 'OUTPUT_PATH/tpch_query2';
I don't Know what I do,
thak you for your help ...