Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Hadoop Wiki" for change 
notification.

The following page has been changed by ZhengShao:
http://wiki.apache.org/hadoop/Hive/HiveQL/Transform

------------------------------------------------------------------------------
  Hive runs the reduce script in the reduce task (instead of the map task) 
because of the ''clusterBy''/''distributeBy''/''sortBy'' clause in the inner 
query.
  
  {{{
- clusterBy: CLUSTER BY colName (, colName)*
+ clusterBy: CLUSTER BY colName (',' colName)*
- distributeBy: DISTRIBUTE BY colName (, colName)*
+ distributeBy: DISTRIBUTE BY colName (',' colName)*
- sortBy: SORT BY colName (, colName)*
+ sortBy: SORT BY colName (',' colName)*
  
  query:
    FROM (
      FROM src
-     MAP expression (, expression)*
+     MAP '(' expression (',' expression)* ')'
      USING 'my_map_script'
-     ( AS colName (, colName)* )?
+     ( AS colName (',' colName)* )?
      ( clusterBy? | distributeBy? sortBy? ) src_alias
    )
-   REDUCE expression (, expression)*
+   REDUCE '(' expression (, expression)* ')'
      USING 'my_reduce_script'
-     ( AS colName (, colName)* )?
+     ( AS colName (',' colName)* )?
  }}}
  
  Example:
  {{{
    FROM (
      FROM pv_users
-     MAP pv_users.userid, pv_users.date
+     MAP ( pv_users.userid, pv_users.date )
      USING 'map_script'
      AS dt, uid
      CLUSTER BY dt) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE map_output.dt, map_output.uid
+     REDUCE ( map_output.dt, map_output.uid )
      USING 'reduce_script'
      AS date, count;
  }}}
@@ -51, +51 @@

  {{{
    FROM (
      FROM pv_users
-     MAP pv_users.userid, pv_users.date
+     MAP ( pv_users.userid, pv_users.date )
      USING 'map_script'
      CLUSTER BY key) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE map_output.key, map_output.value
+     REDUCE ( map_output.key, map_output.value )
      USING 'reduce_script'
      AS date, count;
  }}}
@@ -73, +73 @@

  {{{
    FROM (
      FROM pv_users
-     MAP pv_users.userid, pv_users.date
+     MAP ( pv_users.userid, pv_users.date )
      USING 'map_script'
      AS c1, c2, c3
      DISTRIBUTE BY c2
      SORT BY c2, c1) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE map_output.c1, map_output.c2, map_output.c3
+     REDUCE ( map_output.c1, map_output.c2, map_output.c3 )
      USING 'reduce_script'
      AS date, count;
  }}}

Reply via email to