Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Pig Wiki" for change 
notification.

The "PigMix" page has been changed by daijy.
http://wiki.apache.org/pig/PigMix?action=diff&rev1=15&rev2=16

--------------------------------------------------

  {{{
  A = load 'page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp, 
estimated_revenue, page_info, page_links);
- B = order A by user parallel $mappers;
+ B = order A by user $parallelfactor;
  store B into 'page_views_sorted' using PigStorage('\u0001');
  
  alpha = load 'users' using PigStorage('\u0001') as (name, phone, address, 
city, state, zip);
- a1 = order alpha by name parallel $mappers;
+ a1 = order alpha by name $parallelfactor;
  store a1 into 'users_sorted' using PigStorage('\u0001');
  
  a = load 'power_users' using PigStorage('\u0001') as (name, phone, address, 
city, state, zip);
@@ -287, +287 @@

  This script tests reading from a map, flattening a bag of maps, and use of 
bincond (features 2, 3, and 4).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user, (int)action as action, (map[])page_info as 
page_info,
@@ -304, +304 @@

  This script tests using a join small enough to do in fragment and replicate 
(feature 7). 
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user, estimated_revenue;
@@ -321, +321 @@

  something that pig could potentially optimize by not regrouping.
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user, (double)estimated_revenue;
@@ -340, +340 @@

  This script covers foreach generate with a nested distinct (feature 10).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user, action;
@@ -359, +359 @@

  This script does an anti-join.  This is useful because it is a use of cogroup 
that is not a regular join (feature 9).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user;
@@ -377, +377 @@

  This script covers the case where the group by key is a significant 
percentage of the row (feature 12).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user, action, (int)timespent as timespent, query_term, 
ip_addr, timestamp;
@@ -392, +392 @@

  This script covers having a nested plan with splits (feature 11).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader() as (user, action, 
timespent, query_term,
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader() as (user, action, 
timespent, query_term,
              ip_addr, timestamp, estimated_revenue, page_info, page_links);
  B = foreach A generate user, timestamp;
  C = group B by user $parallelfactor;
@@ -409, +409 @@

  This script covers group all (feature 13).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user, (int)timespent as timespent, 
(double)estimated_revenue as estimated_revenue;
@@ -423, +423 @@

  This script covers order by of a single value (feature 15).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = order A by query_term $parallelfactor;
@@ -435, +435 @@

  This script covers order by of multiple values (feature 15).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent:int, query_term, ip_addr, timestamp,
          estimated_revenue:double, page_info, page_links);
  B = order A by query_term, estimated_revenue desc, timespent $parallelfactor;
@@ -448, +448 @@

  This script covers distinct and union and reading from a wide row but using 
only one field (features: 1, 14).
  {{{
  register pigperf.jar;
- A = load '$page_views' using 
org.apache.pig.test.utils.datagen.PigPerformanceLoader()
+ A = load '$page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, page_info, page_links);
  B = foreach A generate user;
@@ -520, +520 @@

  A = load 'page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp, 
estimated_revenue, page_info, page_links);
  B = foreach A generate user, action, estimated_revenue, timespent;
- C = group B by user parallel 40;
+ C = group B by user $parallelfactor;
  D = foreach C {
      beth = distinct B.action;
      rev = distinct B.estimated_revenue;
@@ -538, +538 @@

  A = load 'page_views' using 
org.apache.pig.test.udf.storefunc.PigPerformanceLoader()
      as (user, action, timespent, query_term, ip_addr, timestamp, 
estimated_revenue, page_info, page_links);
  B = foreach A generate user, estimated_revenue;
- C = group B by user parallel 40;
+ C = group B by user $parallelfactor;
  D = foreach C {
      E = order B by estimated_revenue;
      F = E.estimated_revenue;
@@ -560, +560 @@

  B = group A by (user, action, timespent, query_term, ip_addr, timestamp,
          estimated_revenue, user_1, action_1, timespent_1, query_term_1, 
ip_addr_1, timestamp_1,
          estimated_revenue_1, user_2, action_2, timespent_2, query_term_2, 
ip_addr_2, timestamp_2,
-         estimated_revenue_2) parallel 40;
+         estimated_revenue_2) $parallelfactor;
  C = foreach B generate SUM(A.timespent), SUM(A.timespent_1), 
SUM(A.timespent_2), AVG(A.estimated_revenue), AVG(A.estimated_revenue_1), 
AVG(A.estimated_revenue_2);
  store C into '$out';
  }}}

Reply via email to