Updated Branches: refs/heads/master 25f328044 -> 1d020be25
Document how the Hadoop Reducer implementation impact the DoFn#process() semantics Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/1d020be2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/1d020be2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/1d020be2 Branch: refs/heads/master Commit: 1d020be259b6b23b1a5ebd0613637f50bf291dc2 Parents: 25f3280 Author: Christian Tzolov <[email protected]> Authored: Thu Jun 28 16:47:14 2012 +0200 Committer: Christian Tzolov <[email protected]> Committed: Thu Jun 28 16:47:14 2012 +0200 ---------------------------------------------------------------------- src/main/java/com/cloudera/crunch/DoFn.java | 14 ++++++++++++-- 1 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/1d020be2/src/main/java/com/cloudera/crunch/DoFn.java ---------------------------------------------------------------------- diff --git a/src/main/java/com/cloudera/crunch/DoFn.java b/src/main/java/com/cloudera/crunch/DoFn.java index b45f6aa..b1bbb73 100644 --- a/src/main/java/com/cloudera/crunch/DoFn.java +++ b/src/main/java/com/cloudera/crunch/DoFn.java @@ -52,8 +52,18 @@ public abstract class DoFn<S, T> implements Serializable { /** * Processes the records from a {@link PCollection}. * - * @param input The input record - * @param emitter The emitter to send the output to + * <br/> + * <br/> + * <b>Note:</b> Crunch can reuse a single input record object whose content + * changes on each {@link #process(Object, Emitter)} method call. This + * functionality is imposed by Hadoop's <a href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/Reducer.html">Reducer</a> implementation: + * <i>The framework will reuse the key and value objects that are passed into the reduce, therefore the application + * should clone the objects they want to keep a copy of.</i> + * + * @param input + * The input record. + * @param emitter + * The emitter to send the output to */ public abstract void process(S input, Emitter<T> emitter);
