Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Hadoop Wiki" for change 
notification.

The following page has been changed by ZhengShao:
http://wiki.apache.org/hadoop/Hive/LanguageManual/Transform

------------------------------------------------------------------------------
  
  Note that columns will be transformed to ''STRING'' and delimited by TAB 
before feeding to the user script, and the standard output of the user script 
will be treated as TAB-separated ''STRING'' columns. User scripts can output 
debug information to standard error which will be shown on the task detail page 
on hadoop.
  
- In the syntax, both ''MAP'' and ''REDUCE'' can be also written as ''SELECT 
TRANSFORM''.  There are actually no difference between these three.
+ In the syntax, both ''MAP ...'' and ''REDUCE ...'' can be also written as 
''SELECT TRANSFORM ( ... )''.  There are actually no difference between these 
three.
  Hive runs the reduce script in the reduce task (instead of the map task) 
because of the ''clusterBy''/''distributeBy''/''sortBy'' clause in the inner 
query.
  
  Please also see [wiki:Self:Hive/LanguageManual/SortBy Sort By / Cluster By / 
Distribute By].
@@ -23, +23 @@

  query:
    FROM (
      FROM src
-     MAP '(' expression (',' expression)* ')'
+     MAP expression (',' expression)*
      USING 'my_map_script'
      ( AS colName (',' colName)* )?
      ( clusterBy? | distributeBy? sortBy? ) src_alias
    )
-   REDUCE '(' expression (, expression)* ')'
+   REDUCE expression (',' expression)*
+     USING 'my_reduce_script'
+     ( AS colName (',' colName)* )?
+ 
+   FROM (
+     FROM src
+     SELECT TRANSFORM '(' expression (',' expression)* ')'
+     USING 'my_map_script'
+     ( AS colName (',' colName)* )?
+     ( clusterBy? | distributeBy? sortBy? ) src_alias
+   )
+   SELECT TRANSFORM '(' expression (',' expression)* ')'
      USING 'my_reduce_script'
      ( AS colName (',' colName)* )?
  }}}
@@ -37, +48 @@

  {{{
    FROM (
      FROM pv_users
-     MAP ( pv_users.userid, pv_users.date )
+     MAP pv_users.userid, pv_users.date
      USING 'map_script'
      AS dt, uid
      CLUSTER BY dt) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE ( map_output.dt, map_output.uid )
+     REDUCE map_output.dt, map_output.uid
+     USING 'reduce_script'
+     AS date, count;
+   FROM (
+     FROM pv_users
+     SELECT TRANSFORM(pv_users.userid, pv_users.date)
+     USING 'map_script'
+     AS dt, uid
+     CLUSTER BY dt) map_output
+   INSERT OVERWRITE TABLE pv_users_reduced
+     SELECT TRANSFORM(map_output.dt, map_output.uid)
      USING 'reduce_script'
      AS date, count;
  }}}
@@ -54, +75 @@

  {{{
    FROM (
      FROM pv_users
-     MAP ( pv_users.userid, pv_users.date )
+     MAP pv_users.userid, pv_users.date
      USING 'map_script'
      CLUSTER BY key) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE ( map_output.key, map_output.value )
+     REDUCE map_output.key, map_output.value
      USING 'reduce_script'
      AS date, count;
  }}}

Reply via email to