On 06/11/2011 06:32 AM, hiba houimli wrote:
Hi,

When I translate a query sql on a script pig like this exple:

exple 1:

query sql:

select
sum(l_extendedprice*l_discount) as revenue
from
lineitem
where
l_shipdate >= date '[DATE]'
and l_shipdate < date '[DATE]' + interval '1' year
and l_discount between [DISCOUNT] - 0.01 and [DISCOUNT] + 0.01
and l_quantity < [QUANTITY];


script pig :

lineitem = LOAD '/home/hiba/tpch/lineitem.tbl' using PigStorage('|') AS (l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:double, l_returnflag:chararray, l_linestatus:chararray, l_shipdate:chararray, l_commitdate:chararray, l_receiptdate:chararray, l_shipinstruct:chararray, l_shipmode:chararray, l_comment:chararray); filtred_line = filter lineitem by l_shipdate >= '1994-01-01' and l_shipdate < '1995-01-01' and l_discount > 0.05 and l_discount < 0.07 and l_quantity < 24; project_lineitem = foreach filtred_line generate SUM(l_extendedprice * l_discount) as revenue:double;
store project_lineitem into 'OUTPUT_PATH/tpch_query6';


I have this problem: see the FJ: erreur_query_Q6.Png
lineitem = LOAD '/home/hiba/tpch/lineitem.tbl' using PigStorage('|') AS (l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:double, l_returnflag:chararray, l_linestatus:chararray, l_shipdate:chararray, l_commitdate:chararray, l_receiptdate:chararray, l_shipinstruct:chararray, l_shipmode:chararray, l_comment:chararray); filtred_line = filter lineitem by l_shipdate >= '1994-01-01' and l_shipdate < '1995-01-01' and l_discount > 0.05 and l_discount < 0.07 and l_quantity < 24; enhanced = foreach filtred_line generate (double)l_extendedprice*(double)l_discount as l_realprice;
grouped = group enhanced all;
project_lineitem = foreach grouped generate SUM(enhanced.l_realprice) as revenue:double;
store project_lineitem into 'OUTPUT_PATH/tpch_query6';



and exple 2:

query sql:

select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = [SIZE]
and p_type like '%[TYPE]'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = '[REGION]'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp, supplier,
nation, region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = '[REGION]'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey;

script pig:

partsupp = load '/home/hiba/tpch/partsupp.tbl' using PigStorage('|')
as (ps_partkey:int, ps_suppkey:int, ps_availqty:int, ps_supplycost:double, ps_comment:chararray);
supplier = load '/home/hiba/tpch/supplier.tbl' using PigStorage('|')
as (s_suppkey:int, s_name:chararray, s_address:chararray, s_nationkey:int, s_phone:chararray, s_acctbal:double, s_comment:chararray);
nation = load '/home/hiba/tpch/nation.tbl' using PigStorage('|')
as (n_nationkey:int, n_name:chararray, n_regionkey:int, n_comment:chararray);
region = load '/home/hiba/tpch/region.tbl' using PigStorage('|')
            as (r_regionkey:int, r_name:chararray, r_comment:chararray);
part = load '/home/hiba/tpch/part.tbl' using PigStorage('|')
as (p_partkey:int, p_name:chararray, p_mfgr:chararray, p_brand:chararray, p_type:chararray, p_size:int, p_container:chararray, p_retailprice:double, p_comment:chararray);
filter_region = filter region by r_name matches 'EUROPE.*';
joined_part_partsuppr = join partsupp by ps_partkey, part by p_partkey;
joined_supp_ps = join joined_part_partsuppr by ps_partkey, supplier by s_suppkey; joined_nation_supp = join nation by n_nationkey, joined_supp_ps by s_nationkey; join_nation_region = join filter_region by r_regionkey, joined_nation_supp by n_regionkey; project_join = foreach join_nation_region generate MIN(ps_supplycost) as min_ps:double;
filter_part = filter part by p_size == 15 and p_type matches '.*BRASS';
filter_r = filter region by r_name matches 'EUROPE.*';
join_p_ps = join partsupp by ps_partkey, filter_part by p_partkey;
join_all = join join_p_ps by ps_partkey, supplier by s_suppkey;
join_s_n = join nation by n_nationkey, join_all by s_nationkey;
join_r_n = join filter_r by r_regionkey,join_s_n by n_regionkey;
grouped = cogroup join_r_n by ps_supplycost, project_join by min_ps;
filtered = filter grouped by COUNT(project_join) == COUNT(join_r_n);
project = foreach filtered generate flatten(join_r_n.s_acctbal), flatten(join_r_n.s_name), flatten(join_r_n.n_name), flatten(join_r_n.p_partkey), (join_r_n.p_mfgr), flatten(join_r_n.s_address), flatten(join_r_n.s_phone), flatten(join_r_n.s_comment);
store project into 'OUTPUT_PATH/tpch_query2';

I have this problem: see the FJ: Erreur_Query_Q2.Png

partsupp = load '/home/hiba/tpch/partsupp.tbl' using PigStorage('|')
as (ps_partkey:int, ps_suppkey:int, ps_availqty:int, ps_supplycost:double, ps_comment:chararray);
supplier = load '/home/hiba/tpch/supplier.tbl' using PigStorage('|')
as (s_suppkey:int, s_name:chararray, s_address:chararray, s_nationkey:int, s_phone:chararray, s_acctbal:double, s_comment:chararray);
nation = load '/home/hiba/tpch/nation.tbl' using PigStorage('|')
as (n_nationkey:int, n_name:chararray, n_regionkey:int, n_comment:chararray);
region = load '/home/hiba/tpch/region.tbl' using PigStorage('|')
            as (r_regionkey:int, r_name:chararray, r_comment:chararray);
part = load '/home/hiba/tpch/part.tbl' using PigStorage('|')
as (p_partkey:int, p_name:chararray, p_mfgr:chararray, p_brand:chararray, p_type:chararray, p_size:int, p_container:chararray, p_retailprice:double, p_comment:chararray);

filter_region = filter region by r_name matches 'EUROPE.*';
joined_part_partsuppr = join partsupp by ps_partkey, part by p_partkey;
joined_supp_ps = join joined_part_partsuppr by ps_partkey, supplier by s_suppkey; joined_nation_supp = join nation by n_nationkey, joined_supp_ps by s_nationkey; join_nation_region = join filter_region by r_regionkey, joined_nation_supp by n_regionkey;
grouped = group join_nation_region all;
stats = foreach grouped generate MIN(join_nation_region.ps_supplycost) as min_ps;

filter_part = filter part by p_size == 15 and p_type matches '.*BRASS';
filter_r = filter region by r_name matches 'EUROPE.*';
join_p_ps = join partsupp by ps_partkey, filter_part by p_partkey;
join_all = join join_p_ps by ps_partkey, supplier by s_suppkey;
join_s_n = join nation by n_nationkey, join_all by s_nationkey;
join_r_n = join filter_r by r_regionkey,join_s_n by n_regionkey;

filter_min = filter join_r_n by ps_supplycost==stats.min_ps;

ordered = order filter_min by s_acctbal desc, n_name, s_name, p_partkey;

store ordered into 'OUTPUT_PATH/tpch_query2';

I don't Know what I do,

thak you for your help ...






Reply via email to