Author: buildbot
Date: Wed Oct 29 03:38:42 2014
New Revision: 927238

Log:
Staging update by buildbot for crunch

Modified:
    websites/staging/crunch/trunk/content/   (props changed)
    websites/staging/crunch/trunk/content/user-guide.html

Propchange: websites/staging/crunch/trunk/content/
------------------------------------------------------------------------------
--- cms:source-revision (original)
+++ cms:source-revision Wed Oct 29 03:38:42 2014
@@ -1 +1 @@
-1635032
+1635033

Modified: websites/staging/crunch/trunk/content/user-guide.html
==============================================================================
--- websites/staging/crunch/trunk/content/user-guide.html (original)
+++ websites/staging/crunch/trunk/content/user-guide.html Wed Oct 29 03:38:42 
2014
@@ -579,104 +579,100 @@ can be used to kick off a shuffle on the
   }
 </pre>
 
-<p>If you find yourself in a situation where you have a 
PCollection&lt;Pair&lt;K, V&gt;&gt; and you need a PTable&lt;K, V&gt;, the
+<p>If you find yourself in a situation where you have a 
<code>PCollection&lt;Pair&lt;K, V&gt;&gt;</code> and you need a 
<code>PTable&lt;K, V&gt;</code>, the
 <a href="apidocs/0.10.0/org/apache/crunch/lib/PTables.html">PTables</a> 
library class has methods that will do the conversion for you.</p>
 <p>Let's look at some more example PTypes created using the common primitive 
and collection types. For most of your pipelines,
 you will use one type family exclusively, and so you can cut down on some of 
the boilerplate in your classes by importing
 all of the methods from the <code>Writables</code> or <code>Avros</code> 
classes into your class:</p>
-<pre>
-// Import all of the PType factory methods from Avros
-import static org.apache.crunch.types.avro.Avros.*;
+<div class="codehilite"><pre><span class="c1">// Import all of the PType 
factory methods from Avros</span>
+<span class="kn">import</span> <span class="nn">static</span> <span 
class="n">org</span><span class="p">.</span><span class="n">apache</span><span 
class="p">.</span><span class="n">crunch</span><span class="p">.</span><span 
class="n">types</span><span class="p">.</span><span class="n">avro</span><span 
class="p">.</span><span class="n">Avros</span><span class="p">.</span><span 
class="o">*</span><span class="p">;</span>
+
+<span class="kn">import</span> <span class="nn">org</span><span 
class="p">.</span><span class="n">apache</span><span class="p">.</span><span 
class="n">crunch</span><span class="p">.</span><span class="n">Pair</span><span 
class="p">;</span>
+<span class="kn">import</span> <span class="nn">org</span><span 
class="p">.</span><span class="n">apache</span><span class="p">.</span><span 
class="n">crunch</span><span class="p">.</span><span 
class="n">Tuple3</span><span class="p">;</span>
+<span class="kn">import</span> <span class="nn">org</span><span 
class="p">.</span><span class="n">apache</span><span class="p">.</span><span 
class="n">crunch</span><span class="p">.</span><span 
class="n">TupleN</span><span class="p">;</span>
+
+<span class="kn">import</span> <span class="nn">java</span><span 
class="p">.</span><span class="n">nio</span><span class="p">.</span><span 
class="n">ByteBuffer</span><span class="p">;</span>
+<span class="kn">import</span> <span class="nn">java</span><span 
class="p">.</span><span class="n">util</span><span class="p">.</span><span 
class="n">Collection</span><span class="p">;</span>
+<span class="kn">import</span> <span class="nn">java</span><span 
class="p">.</span><span class="n">util</span><span class="p">.</span><span 
class="n">Map</span><span class="p">;</span>
+
+<span class="n">public</span> <span class="k">class</span> <span 
class="n">MyPipeline</span> <span class="p">{</span>
+
+  <span class="c1">// Common primitive types</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">intType</span> <span class="o">=</span> <span 
class="n">ints</span><span class="p">();</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Long</span><span class="o">&gt;</span> <span 
class="n">longType</span> <span class="o">=</span> <span 
class="n">longs</span><span class="p">();</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Double</span><span class="o">&gt;</span> <span 
class="n">doubleType</span> <span class="o">=</span> <span 
class="n">doubles</span><span class="p">();</span>
+  <span class="c1">// Bytes are represented by java.nio.ByteBuffer</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">ByteBuffer</span><span class="o">&gt;</span> <span 
class="n">bytesType</span> <span class="o">=</span> <span 
class="n">bytes</span><span class="p">();</span>
+
+  <span class="c1">// A PTableType: using tableOf will return a PTable instead 
of a</span>
+  <span class="c1">// PCollection from a parallelDo call.</span>
+  <span class="n">PTableType</span><span class="o">&lt;</span><span 
class="n">String</span><span class="p">,</span> <span 
class="n">Boolean</span><span class="o">&gt;</span> <span 
class="n">tableType</span> <span class="o">=</span> <span 
class="n">tableOf</span><span class="p">(</span><span 
class="n">strings</span><span class="p">(),</span> <span 
class="n">booleans</span><span class="p">());</span>
+
+  <span class="c1">// Pair types: </span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Pair</span><span class="o">&lt;</span><span 
class="n">String</span><span class="p">,</span> <span 
class="n">Boolean</span><span class="o">&gt;&gt;</span> <span 
class="n">pairType</span> <span class="o">=</span> <span 
class="n">pairs</span><span class="p">(</span><span 
class="n">strings</span><span class="p">(),</span> <span 
class="n">booleans</span><span class="p">());</span> 
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Pair</span><span class="o">&lt;</span><span 
class="n">String</span><span class="p">,</span> <span 
class="n">Pair</span><span class="o">&lt;</span><span 
class="n">Long</span><span class="p">,</span> <span class="n">Long</span><span 
class="o">&gt;&gt;</span> <span class="n">nestedPairType</span> <span 
class="o">=</span> <span class="n">pairs</span><span class="p">(</span><span 
class="n">strings</span><span class="p">(),</span> <span 
class="n">pairs</span><span class="p">(</span><span class="n">longs</span><span 
class="p">(),</span> <span class="n">longs</span><span class="p">()));</span>
+
+  <span class="c1">// A triple</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Tuple3</span><span class="o">&lt;</span><span 
class="n">Long</span><span class="p">,</span> <span class="n">Float</span><span 
class="p">,</span> <span class="n">Float</span><span class="o">&gt;&gt;</span> 
<span class="n">tripType</span> <span class="o">=</span> <span 
class="n">trips</span><span class="p">(</span><span class="n">longs</span><span 
class="p">(),</span> <span class="n">floats</span><span class="p">(),</span> 
<span class="n">floats</span><span class="p">());</span>
+  <span class="c1">// An arbitrary length tuple-- note that we lose the 
generic type information</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">TupleN</span><span class="o">&gt;</span> <span 
class="n">tupleType</span> <span class="o">=</span> <span 
class="n">tupleN</span><span class="p">(</span><span class="n">ints</span><span 
class="p">(),</span> <span class="n">ints</span><span class="p">(),</span> 
<span class="n">floats</span><span class="p">(),</span> <span 
class="n">strings</span><span class="p">(),</span> <span 
class="n">strings</span><span class="p">(),</span> <span 
class="n">ints</span><span class="p">());</span>
+
+  <span class="c1">// A Collection type</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Collection</span><span class="o">&lt;</span><span 
class="n">Long</span><span class="o">&gt;&gt;</span> <span 
class="n">longsType</span> <span class="o">=</span> <span 
class="n">collections</span><span class="p">(</span><span 
class="n">longs</span><span class="p">());</span>
+  <span class="c1">// A Map Type-- note that the keys are always strings, we 
only specify the value.</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Map</span><span class="o">&lt;</span><span 
class="n">String</span><span class="p">,</span> <span 
class="n">Boolean</span><span class="o">&gt;&gt;</span> <span 
class="n">mapType</span> <span class="o">=</span> <span 
class="n">maps</span><span class="p">(</span><span 
class="n">booleans</span><span class="p">());</span>
+
+  <span class="c1">// A Pair of collections</span>
+  <span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Pair</span><span class="o">&lt;</span><span 
class="n">Collection</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">&gt;</span><span class="p">,</span> 
<span class="n">Collection</span><span class="o">&lt;</span><span 
class="n">Long</span><span class="o">&gt;&gt;&gt;</span> <span 
class="n">pairColType</span> <span class="o">=</span> <span 
class="n">pairs</span><span class="p">(</span>
+      <span class="n">collections</span><span class="p">(</span><span 
class="n">strings</span><span class="p">()),</span>
+      <span class="n">collections</span><span class="p">(</span><span 
class="n">longs</span><span class="p">()));</span>
+<span class="p">}</span>
+</pre></div>
 
-import org.apache.crunch.Pair;
-import org.apache.crunch.Tuple3;
-import org.apache.crunch.TupleN;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Map;
-
-public class MyPipeline {
-
-  // Common primitive types
-  PType&lt;Integer&gt; intType = ints();
-  PType&lt;Long&gt; longType = longs();
-  PType&lt;Double&gt; doubleType = doubles();
-  // Bytes are represented by java.nio.ByteBuffer
-  PType&lt;ByteBuffer&gt; bytesType = bytes();
-
-  // A PTableType: using tableOf will return a PTable instead of a
-  // PCollection from a parallelDo call.
-  PTableType&lt;String, Boolean&gt; tableType = tableOf(strings(), booleans());
-
-  // Pair types: 
-  PType&lt;Pair&lt;String, Boolean&gt;&gt; pairType = pairs(strings(), 
booleans()); 
-  PType&lt;Pair&lt;String, Pair&lt;Long, Long&gt;&gt; nestedPairType = 
pairs(strings(), pairs(longs(), longs()));
-
-  // A triple
-  PType&lt;Tuple3&lt;Long, Float, Float&gt;&gt; tripType = trips(longs(), 
floats(), floats());
-  // An arbitrary length tuple-- note that we lose the generic type information
-  PType&lt;TupleN&gt; tupleType = tupleN(ints(), ints(), floats(), strings(), 
strings(), ints());
-
-  // A Collection type
-  PType&lt;Collection&lt;Long&gt;&gt; longsType = collections(longs());
-  // A Map Type-- note that the keys are always strings, we only specify the 
value.
-  PType&lt;Map&lt;String, Boolean&gt;&gt; mapType = maps(booleans());
-
-  // A Pair of collections
-  PType&lt;Pair&lt;Collection&lt;String&gt;, Collection&lt;Long&gt;&gt;&gt; 
pairColType = pairs(
-      collections(strings()),
-      collections(longs()));
-}
-</pre>
 
 <p>Both type families also have a method named <code>PType&lt;T&gt; 
records(Class&lt;T&gt; clazz)</code> that can be used to create PTypes that 
support the common
 record format for each type family. For the WritableTypeFamily, the records 
method supports PTypes for implementations of the <code>Writable</code>
 interface, and for the AvroTypeFamily, the records method supports PTypes for 
implementations of Avro's <code>IndexedRecord</code> interface, which
 includes both Avro generic and specific records:</p>
-<pre>
-  PType&lt;FooWritable&gt; fwType1 = Writables.records(FooWritable.class);
-  // The more obvious "writables" method also works.
-  PType&lt;FooWritable&gt; fwType = Writables.writables(FooWritable.class);
-
-  // For a generated Avro class, this works:
-  PType&lt;Person&gt; personType1 = Avros.records(Person.class);
-  // As does this:
-  PType&lt;Person&gt; personType2 = Avros.containers(Person.class); 
-  // If you only have a schema, you can create a generic type, like this:
-  org.apache.avro.Schema schema = ...;
-  PType&lt;Record&gt; avroGenericType = Avros.generics(schema);
-</pre>
+<div class="codehilite"><pre><span class="n">PType</span><span 
class="o">&lt;</span><span class="n">FooWritable</span><span 
class="o">&gt;</span> <span class="n">fwType1</span> <span class="p">=</span> 
<span class="n">Writables</span><span class="p">.</span><span 
class="n">records</span><span class="p">(</span><span 
class="n">FooWritable</span><span class="p">.</span><span 
class="n">class</span><span class="p">);</span>
+<span class="o">//</span> <span class="n">The</span> <span 
class="n">more</span> <span class="n">obvious</span> &quot;<span 
class="n">writables</span>&quot; <span class="n">method</span> <span 
class="n">also</span> <span class="n">works</span><span class="p">.</span>
+<span class="n">PType</span><span class="o">&lt;</span><span 
class="n">FooWritable</span><span class="o">&gt;</span> <span 
class="n">fwType</span> <span class="p">=</span> <span 
class="n">Writables</span><span class="p">.</span><span 
class="n">writables</span><span class="p">(</span><span 
class="n">FooWritable</span><span class="p">.</span><span 
class="n">class</span><span class="p">);</span>
+
+<span class="o">//</span> <span class="n">For</span> <span class="n">a</span> 
<span class="n">generated</span> <span class="n">Avro</span> <span 
class="n">class</span><span class="p">,</span> <span class="n">this</span> 
<span class="n">works</span><span class="p">:</span>
+<span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Person</span><span class="o">&gt;</span> <span 
class="n">personType1</span> <span class="p">=</span> <span 
class="n">Avros</span><span class="p">.</span><span 
class="n">records</span><span class="p">(</span><span 
class="n">Person</span><span class="p">.</span><span 
class="n">class</span><span class="p">);</span>
+<span class="o">//</span> <span class="n">As</span> <span 
class="n">does</span> <span class="n">this</span><span class="p">:</span>
+<span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Person</span><span class="o">&gt;</span> <span 
class="n">personType2</span> <span class="p">=</span> <span 
class="n">Avros</span><span class="p">.</span><span 
class="n">containers</span><span class="p">(</span><span 
class="n">Person</span><span class="p">.</span><span 
class="n">class</span><span class="p">);</span> 
+<span class="o">//</span> <span class="n">If</span> <span class="n">you</span> 
<span class="n">only</span> <span class="n">have</span> <span 
class="n">a</span> <span class="n">schema</span><span class="p">,</span> <span 
class="n">you</span> <span class="n">can</span> <span class="n">create</span> 
<span class="n">a</span> <span class="n">generic</span> <span 
class="n">type</span><span class="p">,</span> <span class="n">like</span> <span 
class="n">this</span><span class="p">:</span>
+<span class="n">org</span><span class="p">.</span><span 
class="n">apache</span><span class="p">.</span><span class="n">avro</span><span 
class="p">.</span><span class="n">Schema</span> <span class="n">schema</span> 
<span class="p">=</span> <span class="p">...;</span>
+<span class="n">PType</span><span class="o">&lt;</span><span 
class="n">Record</span><span class="o">&gt;</span> <span 
class="n">avroGenericType</span> <span class="p">=</span> <span 
class="n">Avros</span><span class="p">.</span><span 
class="n">generics</span><span class="p">(</span><span 
class="n">schema</span><span class="p">);</span>
+</pre></div>
+
 
 <p>The <a 
href="apidocs/0.10.0/org/apache/crunch/types/avro/Avros.html">Avros</a> class 
also has a <code>reflects</code> method for creating PTypes
 for POJOs using Avro's reflection-based serialization mechanism. There are a 
couple of restrictions on the structure of
 the POJO:</p>
 <ol>
 <li>It must have a default, no-arg constructor.</li>
-<li>All of its fields must be Avro primitive types or collection types that 
have Avro equivalents, like <code>ArrayList</code> and
-<code>HashMap&lt;String, T&gt;</code>. You may also have arrays of Avro 
primitive types.</li>
-</ol>
-<pre>
-  // Declare an inline data type and use it for Crunch serialization
-  public static class UrlData {
-    // The fields don't have to be public, just doing this for the example.
-    double curPageRank;
-    String[] outboundUrls;
-
-    // Remember: you must have a no-arg constructor. 
-    public UrlData() { this(0.0, new String[0]); }
-
-    // The regular constructor
-    public UrlData(double pageRank, String[] outboundUrls) {
-      this.curPageRank = pageRank;
-      this.outboundUrls = outboundUrls;
-    }
+<li>
+<p>All of its fields must be Avro primitive types or collection types that 
have Avro equivalents, like <code>ArrayList</code> and
+<code>HashMap&lt;String, T&gt;</code>. You may also have arrays of Avro 
primitive types.</p>
+<p>// Declare an inline data type and use it for Crunch serialization
+public static class UrlData {
+  // The fields don't have to be public, just doing this for the example.
+  double curPageRank;
+  String[] outboundUrls;</p>
+<p>// Remember: you must have a no-arg constructor. 
+  public UrlData() { this(0.0, new String[0]); }</p>
+<p>// The regular constructor
+  public UrlData(double pageRank, String[] outboundUrls) {
+    this.curPageRank = pageRank;
+    this.outboundUrls = outboundUrls;
   }
-
-  PType&lt;UrlData&gt; urlDataType = Avros.reflects(UrlData.class);
-  PTableType&lt;String, UrlData&gt; pageRankType = 
Avros.tableOf(Avros.strings(), urlDataType);
-</pre>
-
+}</p>
+<p>PType<UrlData> urlDataType = Avros.reflects(UrlData.class);
+PTableType<String, UrlData> pageRankType = Avros.tableOf(Avros.strings(), 
urlDataType);</p>
+</li>
+</ol>
 <p>Avro reflection is a great way to define intermediate types for your Crunch 
pipelines; not only is your logic clear
 and easy to test, but the fact that the data is written out as Avro records 
means that you can use tools like Hive and Pig
 to query intermediate results to aid in debugging pipeline failures.</p>


Reply via email to