Regenerate website

Project: http://git-wip-us.apache.org/repos/asf/beam-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam-site/commit/4acb6411
Tree: http://git-wip-us.apache.org/repos/asf/beam-site/tree/4acb6411
Diff: http://git-wip-us.apache.org/repos/asf/beam-site/diff/4acb6411

Branch: refs/heads/asf-site
Commit: 4acb6411a230a543930e2672f1181ea64ad49094
Parents: be9e207
Author: Davor Bonaci <da...@google.com>
Authored: Thu Mar 16 16:21:09 2017 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Mar 16 16:21:09 2017 -0700

----------------------------------------------------------------------
 content/blog/2017/03/16/python-sdk-release.html | 255 +++++++++++++++++++
 content/blog/index.html                         |  16 ++
 content/feed.xml                                | 166 ++++++------
 content/index.html                              |   4 +-
 4 files changed, 347 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam-site/blob/4acb6411/content/blog/2017/03/16/python-sdk-release.html
----------------------------------------------------------------------
diff --git a/content/blog/2017/03/16/python-sdk-release.html 
b/content/blog/2017/03/16/python-sdk-release.html
new file mode 100644
index 0000000..cb1320c
--- /dev/null
+++ b/content/blog/2017/03/16/python-sdk-release.html
@@ -0,0 +1,255 @@
+<!DOCTYPE html>
+<html lang="en">
+
+  <head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Python SDK released in Apache Beam 0.6.0</title>
+  <meta name="description" content="Apache Beam’s latest release, version 
0.6.0, introduces a new SDK – this time, for the Python programming language. 
The Python SDK joins the Java SDK as the ...">
+
+  <link rel="stylesheet" href="/styles/site.css">
+  <link rel="stylesheet" href="/css/theme.css">
+  <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js";></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <script src="/js/language-switch.js"></script>
+  <link rel="canonical" 
href="https://beam.apache.org/blog/2017/03/16/python-sdk-release.html"; 
data-proofer-ignore>
+  <link rel="alternate" type="application/rss+xml" title="Apache Beam" 
href="https://beam.apache.org/feed.xml";>
+  <script>
+    
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+    
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+    
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+    ga('create', 'UA-73650088-1', 'auto');
+    ga('send', 'pageview');
+
+  </script>
+  <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
+</head>
+
+
+  <body role="document">
+
+    <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <a href="/" class="navbar-brand" >
+        <img alt="Brand" style="height: 25px" 
src="/images/beam_logo_navbar.png">
+      </a>
+      <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#navbar" aria-expanded="false" 
aria-controls="navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+    </div>
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+                 <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Get Started <span 
class="caret"></span></a>
+                 <ul class="dropdown-menu">
+                         <li><a href="/get-started/beam-overview/">Beam 
Overview</a></li>
+        <li><a href="/get-started/quickstart-java/">Quickstart - Java</a></li>
+        <li><a href="/get-started/quickstart-py/">Quickstart - Python</a></li>
+                         <li role="separator" class="divider"></li>
+                         <li class="dropdown-header">Example Walkthroughs</li>
+                         <li><a 
href="/get-started/wordcount-example/">WordCount</a></li>
+                         <li><a 
href="/get-started/mobile-gaming-example/">Mobile Gaming</a></li>
+              <li role="separator" class="divider"></li>
+              <li class="dropdown-header">Resources</li>
+              <li><a href="/get-started/downloads">Downloads</a></li>
+              <li><a href="/get-started/support">Support</a></li>
+                 </ul>
+           </li>
+        <li class="dropdown">
+                 <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Documentation <span 
class="caret"></span></a>
+                 <ul class="dropdown-menu">
+                         <li><a href="/documentation">Using the 
Documentation</a></li>
+                         <li role="separator" class="divider"></li>
+                         <li class="dropdown-header">Beam Concepts</li>
+                         <li><a 
href="/documentation/programming-guide/">Programming Guide</a></li>
+                         <li><a href="/documentation/resources/">Additional 
Resources</a></li>
+                         <li role="separator" class="divider"></li>
+              <li class="dropdown-header">Pipeline Fundamentals</li>
+              <li><a 
href="/documentation/pipelines/design-your-pipeline/">Design Your 
Pipeline</a></li>
+              <li><a 
href="/documentation/pipelines/create-your-pipeline/">Create Your 
Pipeline</a></li>
+              <li><a href="/documentation/pipelines/test-your-pipeline/">Test 
Your Pipeline</a></li>
+              <li role="separator" class="divider"></li>
+                         <li class="dropdown-header">SDKs</li>
+                         <li><a href="/documentation/sdks/java/">Java 
SDK</a></li>
+                         <li><a href="/documentation/sdks/javadoc/0.6.0/" 
target="_blank">Java SDK API Reference <img src="/images/external-link-icon.png"
+                 width="14" height="14"
+                 alt="External link."></a>
+        </li>
+        <li><a href="/documentation/sdks/python/">Python SDK</a></li>
+        <li><a href="/documentation/sdks/pydoc/0.6.0/" target="_blank">Python 
SDK API Reference <img src="/images/external-link-icon.png"
+                 width="14" height="14"
+                 alt="External link."></a>
+        </li>
+                         <li role="separator" class="divider"></li>
+                         <li class="dropdown-header">Runners</li>
+                         <li><a 
href="/documentation/runners/capability-matrix/">Capability Matrix</a></li>
+                         <li><a href="/documentation/runners/direct/">Direct 
Runner</a></li>
+                         <li><a href="/documentation/runners/apex/">Apache 
Apex Runner</a></li>
+                         <li><a href="/documentation/runners/flink/">Apache 
Flink Runner</a></li>
+                         <li><a href="/documentation/runners/spark/">Apache 
Spark Runner</a></li>
+                         <li><a href="/documentation/runners/dataflow/">Cloud 
Dataflow Runner</a></li>
+                 </ul>
+           </li>
+        <li class="dropdown">
+                 <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Contribute <span 
class="caret"></span></a>
+                 <ul class="dropdown-menu">
+                         <li><a href="/contribute">Get Started 
Contributing</a></li>
+        <li role="separator" class="divider"></li>
+        <li class="dropdown-header">Guides</li>
+                         <li><a 
href="/contribute/contribution-guide/">Contribution Guide</a></li>
+        <li><a href="/contribute/testing/">Testing Guide</a></li>
+        <li><a href="/contribute/release-guide/">Release Guide</a></li>
+        <li><a href="/contribute/ptransform-style-guide/">PTransform Style 
Guide</a></li>
+        <li role="separator" class="divider"></li>
+        <li class="dropdown-header">Technical References</li>
+        <li><a href="/contribute/design-principles/">Design Principles</a></li>
+                         <li><a href="/contribute/work-in-progress/">Ongoing 
Projects</a></li>
+        <li><a href="/contribute/source-repository/">Source Repository</a></li>
+        <li role="separator" class="divider"></li>
+                         <li class="dropdown-header">Promotion</li>
+        <li><a href="/contribute/presentation-materials/">Presentation 
Materials</a></li>
+        <li><a href="/contribute/logos/">Logos and Design</a></li>
+        <li role="separator" class="divider"></li>
+        <li><a href="/contribute/maturity-model/">Maturity Model</a></li>
+        <li><a href="/contribute/team/">Team</a></li>
+                 </ul>
+           </li>
+
+        <li><a href="/blog">Blog</a></li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false"><img 
src="https://www.apache.org/foundation/press/kit/feather_small.png"; alt="Apache 
Logo" style="height:24px;">Apache Software Foundation<span 
class="caret"></span></a>
+          <ul class="dropdown-menu dropdown-menu-right">
+            <li><a href="http://www.apache.org/";>ASF Homepage</a></li>
+            <li><a href="http://www.apache.org/licenses/";>License</a></li>
+            <li><a href="http://www.apache.org/security/";>Security</a></li>
+            <li><a 
href="http://www.apache.org/foundation/thanks.html";>Thanks</a></li>
+            <li><a 
href="http://www.apache.org/foundation/sponsorship.html";>Sponsorship</a></li>
+            <li><a 
href="https://www.apache.org/foundation/policies/conduct";>Code of 
Conduct</a></li>
+          </ul>
+        </li>
+      </ul>
+    </div><!--/.nav-collapse -->
+  </div>
+</nav>
+
+
+<link rel="stylesheet" href="">
+
+
+    <div class="container" role="main">
+
+      <div class="row">
+        
+
+<article class="post" itemscope itemtype="http://schema.org/BlogPosting";>
+
+  <header class="post-header">
+    <h1 class="post-title" itemprop="name headline">Python SDK released in 
Apache Beam 0.6.0</h1>
+    <p class="post-meta"><time datetime="2017-03-16T01:00:01-07:00" 
itemprop="datePublished">Mar 16, 2017</time> •  Ahmet Altay 
+</p>
+  </header>
+
+  <div class="post-content" itemprop="articleBody">
+    <p>Apache Beam’s latest release, version <a 
href="/get-started/downloads/">0.6.0</a>, introduces a new SDK – this time, 
for the Python programming language. The Python SDK joins the Java SDK as the 
second implementation of the Beam programming model.</p>
+
+<!--more-->
+
+<p>The Python SDK incorporates all of the main concepts of the Beam model, 
including ParDo, GroupByKey, Windowing, and others. It features extensible IO 
APIs for writing bounded sources and sinks, and provides built-in 
implementation for reading and writing Text, Avro, and TensorFlow record files, 
as well as connectors to Google BigQuery and Google Cloud Datastore.</p>
+
+<p>There are two runners capable of executing pipelines written with the 
Python SDK today: <a href="/documentation/runners/direct/">Direct Runner</a> 
and <a href="/documentation/runners/dataflow/">Dataflow Runner</a>, both of 
which are currently limited to batch execution only. Upcoming features will 
shortly bring the benefits of the Python SDK to additional runners.</p>
+
+<h4 id="try-the-apache-beam-python-sdk">Try the Apache Beam Python SDK</h4>
+
+<p>If you would like to try out the Python SDK, a good place to start is the 
<a href="/get-started/quickstart-py/">Quickstart</a>. After that, you can take 
a look at additional <a 
href="https://github.com/apache/beam/tree/v0.6.0/sdks/python/apache_beam/examples";>examples</a>,
 and deep dive into the <a href="/documentation/sdks/pydoc/">API 
reference</a>.</p>
+
+<p>Let’s take a look at a quick example together. First, install the <code 
class="highlighter-rouge">apache-beam</code> package from PyPI and start your 
Python interpreter.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>$ pip install 
apache-beam
+$ python
+</code></pre>
+</div>
+
+<p>We will harness the power of Apache Beam to estimate Pi in honor of the 
recently passed Pi Day.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>import random
+import apache_beam as beam
+
+def run_trials(count):
+  """Throw darts into unit square and count how many fall into unit circle."""
+  inside = 0
+  for _ in xrange(count):
+    x, y = random.uniform(0, 1), random.uniform(0, 1)
+    inside += 1 if x*x + y*y &lt;= 1.0 else 0
+  return count, inside
+
+def combine_results(results):
+  """Given all the trial results, estimate pi."""
+  total, inside = sum(r[0] for r in results), sum(r[1] for r in results)
+  return total, inside, 4 * float(inside) / total if total &gt; 0 else 0
+
+p = beam.Pipeline()
+(p | beam.Create([500] * 10)  # Create 10 experiments with 500 samples each.
+   | beam.Map(run_trials)     # Run experiments in parallel.
+   | beam.CombineGlobally(combine_results)      # Combine the results.
+   | beam.io.WriteToText('./pi_estimate.txt'))  # Write PI estimate to a file.
+
+p.run()
+</code></pre>
+</div>
+
+<p>This example estimates Pi by throwing random darts into the unit square and 
keeping track of the fraction of those darts that fell into the unit circle 
(see the full <a 
href="https://github.com/apache/beam/blob/v0.6.0/sdks/python/apache_beam/examples/complete/estimate_pi.py";>example</a>
 for details). If you are curious, you can check the result of our estimation 
by looking at the output file.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>$ cat 
pi_estimate.txt*
+</code></pre>
+</div>
+
+<h4 id="roadmap">Roadmap</h4>
+
+<p>The first thing on the Python SDK’s roadmap is to address two of its 
limitations. First, the existing runners are currently limited to bounded 
PCollections, and we are looking forward to extending the SDK to support 
unbounded PCollections (“streaming”). Additionally, we are working on 
extending support to more Apache Beam runners, and the upcoming Fn API will do 
the heavy lifting.</p>
+
+<p>Both of these improvements will enable the Python SDK to fulfill the 
mission of Apache Beam: a unified programming model for batch and streaming 
data processing that can run on any execution engine.</p>
+
+<h4 id="join-us">Join us!</h4>
+
+<p>Please consider joining us, whether as a user or a contributor, as we work 
towards our first release with API stability. If you’d like to try out Apache 
Beam today, check out the latest <a href="/get-started/downloads/">0.6.0</a> 
release. We welcome contributions and participation from anyone through our 
mailing lists, issue tracker, pull requests, and events.</p>
+
+  </div>
+
+</article>
+
+      </div>
+
+
+    <hr>
+  <div class="row">
+      <div class="col-xs-12">
+          <footer>
+              <p class="text-center">
+                &copy; Copyright
+                <a href="http://www.apache.org";>The Apache Software 
Foundation</a>,
+                2017. All Rights Reserved.
+              </p>
+              <p class="text-center">
+                <a href="/privacy_policy">Privacy Policy</a> |
+                <a href="/feed.xml">RSS Feed</a>
+              </p>
+          </footer>
+      </div>
+  </div>
+  <!-- container div end -->
+</div>
+
+
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/4acb6411/content/blog/index.html
----------------------------------------------------------------------
diff --git a/content/blog/index.html b/content/blog/index.html
index 73c3874..ca0732f 100644
--- a/content/blog/index.html
+++ b/content/blog/index.html
@@ -155,6 +155,22 @@
 <p>This is the blog for the Apache Beam project. This blog contains news and 
updates
 for the project.</p>
 
+<h3 
id="a-classpost-link-hrefblog20170316python-sdk-releasehtmlpython-sdk-released-in-apache-beam-060a"><a
 class="post-link" href="/blog/2017/03/16/python-sdk-release.html">Python SDK 
released in Apache Beam 0.6.0</a></h3>
+<p><i>Mar 16, 2017 •  Ahmet Altay 
+</i></p>
+
+<p>Apache Beam’s latest release, version <a 
href="/get-started/downloads/">0.6.0</a>, introduces a new SDK – this time, 
for the Python programming language. The Python SDK joins the Java SDK as the 
second implementation of the Beam programming model.</p>
+
+<!-- Render a "read more" button if the post is longer than the excerpt -->
+
+<p>
+<a class="btn btn-default btn-sm" 
href="/blog/2017/03/16/python-sdk-release.html" role="button">
+Read more&nbsp;<span class="glyphicon glyphicon-menu-right" 
aria-hidden="true"></span>
+</a>
+</p>
+
+<hr />
+
 <h3 
id="a-classpost-link-hrefblog20170213stateful-processinghtmlstateful-processing-with-apache-beama"><a
 class="post-link" href="/blog/2017/02/13/stateful-processing.html">Stateful 
processing with Apache Beam</a></h3>
 <p><i>Feb 13, 2017 •  Kenneth Knowles [<a 
href="https://twitter.com/KennKnowles";>@KennKnowles</a>]
 </i></p>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/4acb6411/content/feed.xml
----------------------------------------------------------------------
diff --git a/content/feed.xml b/content/feed.xml
index d0641ad..e27ee59 100644
--- a/content/feed.xml
+++ b/content/feed.xml
@@ -9,6 +9,80 @@
     <generator>Jekyll v3.2.0</generator>
     
       <item>
+        <title>Python SDK released in Apache Beam 0.6.0</title>
+        <description>&lt;p&gt;Apache Beam’s latest release, version &lt;a 
href=&quot;/get-started/downloads/&quot;&gt;0.6.0&lt;/a&gt;, introduces a new 
SDK – this time, for the Python programming language. The Python SDK joins 
the Java SDK as the second implementation of the Beam programming 
model.&lt;/p&gt;
+
+&lt;!--more--&gt;
+
+&lt;p&gt;The Python SDK incorporates all of the main concepts of the Beam 
model, including ParDo, GroupByKey, Windowing, and others. It features 
extensible IO APIs for writing bounded sources and sinks, and provides built-in 
implementation for reading and writing Text, Avro, and TensorFlow record files, 
as well as connectors to Google BigQuery and Google Cloud Datastore.&lt;/p&gt;
+
+&lt;p&gt;There are two runners capable of executing pipelines written with the 
Python SDK today: &lt;a 
href=&quot;/documentation/runners/direct/&quot;&gt;Direct Runner&lt;/a&gt; and 
&lt;a href=&quot;/documentation/runners/dataflow/&quot;&gt;Dataflow 
Runner&lt;/a&gt;, both of which are currently limited to batch execution only. 
Upcoming features will shortly bring the benefits of the Python SDK to 
additional runners.&lt;/p&gt;
+
+&lt;h4 id=&quot;try-the-apache-beam-python-sdk&quot;&gt;Try the Apache Beam 
Python SDK&lt;/h4&gt;
+
+&lt;p&gt;If you would like to try out the Python SDK, a good place to start is 
the &lt;a href=&quot;/get-started/quickstart-py/&quot;&gt;Quickstart&lt;/a&gt;. 
After that, you can take a look at additional &lt;a 
href=&quot;https://github.com/apache/beam/tree/v0.6.0/sdks/python/apache_beam/examples&quot;&gt;examples&lt;/a&gt;,
 and deep dive into the &lt;a 
href=&quot;/documentation/sdks/pydoc/&quot;&gt;API 
reference&lt;/a&gt;.&lt;/p&gt;
+
+&lt;p&gt;Let’s take a look at a quick example together. First, install the 
&lt;code class=&quot;highlighter-rouge&quot;&gt;apache-beam&lt;/code&gt; 
package from PyPI and start your Python interpreter.&lt;/p&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;pre 
class=&quot;highlight&quot;&gt;&lt;code&gt;$ pip install apache-beam
+$ python
+&lt;/code&gt;&lt;/pre&gt;
+&lt;/div&gt;
+
+&lt;p&gt;We will harness the power of Apache Beam to estimate Pi in honor of 
the recently passed Pi Day.&lt;/p&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;pre 
class=&quot;highlight&quot;&gt;&lt;code&gt;import random
+import apache_beam as beam
+
+def run_trials(count):
+  &quot;&quot;&quot;Throw darts into unit square and count how many fall into 
unit circle.&quot;&quot;&quot;
+  inside = 0
+  for _ in xrange(count):
+    x, y = random.uniform(0, 1), random.uniform(0, 1)
+    inside += 1 if x*x + y*y &amp;lt;= 1.0 else 0
+  return count, inside
+
+def combine_results(results):
+  &quot;&quot;&quot;Given all the trial results, estimate pi.&quot;&quot;&quot;
+  total, inside = sum(r[0] for r in results), sum(r[1] for r in results)
+  return total, inside, 4 * float(inside) / total if total &amp;gt; 0 else 0
+
+p = beam.Pipeline()
+(p | beam.Create([500] * 10)  # Create 10 experiments with 500 samples each.
+   | beam.Map(run_trials)     # Run experiments in parallel.
+   | beam.CombineGlobally(combine_results)      # Combine the results.
+   | beam.io.WriteToText('./pi_estimate.txt'))  # Write PI estimate to a file.
+
+p.run()
+&lt;/code&gt;&lt;/pre&gt;
+&lt;/div&gt;
+
+&lt;p&gt;This example estimates Pi by throwing random darts into the unit 
square and keeping track of the fraction of those darts that fell into the unit 
circle (see the full &lt;a 
href=&quot;https://github.com/apache/beam/blob/v0.6.0/sdks/python/apache_beam/examples/complete/estimate_pi.py&quot;&gt;example&lt;/a&gt;
 for details). If you are curious, you can check the result of our estimation 
by looking at the output file.&lt;/p&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;pre 
class=&quot;highlight&quot;&gt;&lt;code&gt;$ cat pi_estimate.txt*
+&lt;/code&gt;&lt;/pre&gt;
+&lt;/div&gt;
+
+&lt;h4 id=&quot;roadmap&quot;&gt;Roadmap&lt;/h4&gt;
+
+&lt;p&gt;The first thing on the Python SDK’s roadmap is to address two of 
its limitations. First, the existing runners are currently limited to bounded 
PCollections, and we are looking forward to extending the SDK to support 
unbounded PCollections (“streaming”). Additionally, we are working on 
extending support to more Apache Beam runners, and the upcoming Fn API will do 
the heavy lifting.&lt;/p&gt;
+
+&lt;p&gt;Both of these improvements will enable the Python SDK to fulfill the 
mission of Apache Beam: a unified programming model for batch and streaming 
data processing that can run on any execution engine.&lt;/p&gt;
+
+&lt;h4 id=&quot;join-us&quot;&gt;Join us!&lt;/h4&gt;
+
+&lt;p&gt;Please consider joining us, whether as a user or a contributor, as we 
work towards our first release with API stability. If you’d like to try out 
Apache Beam today, check out the latest &lt;a 
href=&quot;/get-started/downloads/&quot;&gt;0.6.0&lt;/a&gt; release. We welcome 
contributions and participation from anyone through our mailing lists, issue 
tracker, pull requests, and events.&lt;/p&gt;
+</description>
+        <pubDate>Thu, 16 Mar 2017 01:00:01 -0700</pubDate>
+        
<link>https://beam.apache.org/blog/2017/03/16/python-sdk-release.html</link>
+        <guid 
isPermaLink="true">https://beam.apache.org/blog/2017/03/16/python-sdk-release.html</guid>
+        
+        
+        <category>blog</category>
+        
+      </item>
+    
+      <item>
         <title>Stateful processing with Apache Beam</title>
         <description>&lt;p&gt;Beam lets you process unbounded, out-of-order, 
global-scale data with portable
 high-level pipelines. Stateful processing is a new feature of the Beam model
@@ -1233,97 +1307,5 @@ to us via &lt;a 
href=&quot;/use/mailing-lists/&quot;&gt;user’s mailing list&lt
         
       </item>
     
-      <item>
-        <title>Where's my PCollection.map()?</title>
-        <description>&lt;p&gt;Have you ever wondered why Beam has PTransforms 
for everything instead of having methods on PCollection? Take a look at the 
history that led to this (and other) design decisions.&lt;/p&gt;
-
-&lt;!--more--&gt;
-
-&lt;p&gt;Though Beam is relatively new, its design draws heavily on many years 
of experience with real-world pipelines. One of the primary inspirations is 
&lt;a 
href=&quot;http://research.google.com/pubs/pub35650.html&quot;&gt;FlumeJava&lt;/a&gt;,
 which is Google’s internal successor to MapReduce first introduced in 
2009.&lt;/p&gt;
-
-&lt;p&gt;The original FlumeJava API has methods like &lt;code 
class=&quot;highlighter-rouge&quot;&gt;count&lt;/code&gt; and &lt;code 
class=&quot;highlighter-rouge&quot;&gt;parallelDo&lt;/code&gt; on the 
PCollections. Though slightly more succinct, this approach has many 
disadvantages to extensibility. Every new user to FlumeJava wanted to add 
transforms, and adding them as methods to PCollection simply doesn’t scale 
well. In contrast, a PCollection in Beam has a single &lt;code 
class=&quot;highlighter-rouge&quot;&gt;apply&lt;/code&gt; method which takes 
any PTransform as an argument.&lt;/p&gt;
-
-&lt;table class=&quot;table&quot;&gt;
-  &lt;tr&gt;
-    &lt;th&gt;FlumeJava&lt;/th&gt;
-    &lt;th&gt;Beam&lt;/th&gt;
-  &lt;/tr&gt;
-  &lt;tr&gt;
-    &lt;td&gt;&lt;pre&gt;
-PCollection&amp;lt;T&amp;gt; input = …
-PCollection&amp;lt;O&amp;gt; output = input.count()
-                             .parallelDo(...);
-    &lt;/pre&gt;&lt;/td&gt;
-    &lt;td&gt;&lt;pre&gt;
-PCollection&amp;lt;T&amp;gt; input = …
-PCollection&amp;lt;O&amp;gt; output = input.apply(Count.perElement())
-                             .apply(ParDo.of(...));
-    &lt;/pre&gt;&lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-
-&lt;p&gt;This is a more scalable approach for several reasons.&lt;/p&gt;
-
-&lt;h2 id=&quot;where-to-draw-the-line&quot;&gt;Where to draw the 
line?&lt;/h2&gt;
-&lt;p&gt;Adding methods to PCollection forces a line to be drawn between 
operations that are “useful” enough to merit this special treatment and 
those that are not. It is easy to make the case for flat map, group by key, and 
combine per key. But what about filter? Count? Approximate count? Approximate 
quantiles? Most frequent? WriteToMyFavoriteSource? Going too far down this path 
leads to a single enormous class that contains nearly everything one could want 
to do. (FlumeJava’s PCollection class is over 5000 lines long with around 70 
distinct operations, and it could have been &lt;em&gt;much&lt;/em&gt; larger 
had we accepted every proposal.) Furthermore, since Java doesn’t allow adding 
methods to a class, there is a sharp syntactic divide between those operations 
that are added to PCollection and those that aren’t. A traditional way to 
share code is with a library of functions, but functions (in traditional 
languages like Java at least) are written prefix-style, which does
 n’t mix well with the fluent builder style (e.g. &lt;code 
class=&quot;highlighter-rouge&quot;&gt;input.operation1().operation2().operation3()&lt;/code&gt;
 vs. &lt;code 
class=&quot;highlighter-rouge&quot;&gt;operation3(operation1(input).operation2())&lt;/code&gt;).&lt;/p&gt;
-
-&lt;p&gt;Instead in Beam we’ve chosen a style that places all 
transforms–whether they be primitive operations, composite operations bundled 
in the SDK, or part of an external library–on equal footing. This also 
facilitates alternative implementations (which may even take different options) 
that are easily interchangeable.&lt;/p&gt;
-
-&lt;table class=&quot;table&quot;&gt;
-  &lt;tr&gt;
-    &lt;th&gt;FlumeJava&lt;/th&gt;
-    &lt;th&gt;Beam&lt;/th&gt;
-  &lt;/tr&gt;
-  &lt;tr&gt;
-    &lt;td&gt;&lt;pre&gt;
-PCollection&amp;lt;O&amp;gt; output =
-    ExternalLibrary.doStuff(
-        MyLibrary.transform(input, myArgs)
-            .parallelDo(...),
-        externalLibArgs);
-    &lt;/pre&gt;&lt;/td&gt;
-    &lt;td&gt;&lt;pre&gt;
-PCollection&amp;lt;O&amp;gt; output = input
-    .apply(MyLibrary.transform(myArgs))
-    .apply(ParDo.of(...))
-    .apply(ExternalLibrary.doStuff(externalLibArgs));
-    &amp;nbsp;
-    &lt;/pre&gt;&lt;/td&gt;
-  &lt;/tr&gt;
-&lt;/table&gt;
-
-&lt;h2 id=&quot;configurability&quot;&gt;Configurability&lt;/h2&gt;
-&lt;p&gt;It makes for a fluent style to let values (PCollections) be the 
objects passed around and manipulated (i.e. the handles to the deferred 
execution graph), but it is the operations themselves that need to be 
composable, configurable, and extendable. Using PCollection methods for the 
operations doesn’t scale well here, especially in a language without default 
or keyword arguments. For example, a ParDo operation can have any number of 
side inputs and side outputs, or a write operation may have configurations 
dealing with encoding and compression. One option is to separate these out into 
multiple overloads or even methods, but that exacerbates the problems above. 
(FlumeJava evolved over a dozen overloads of the &lt;code 
class=&quot;highlighter-rouge&quot;&gt;parallelDo&lt;/code&gt; method!) Another 
option is to pass each method a configuration object that can be built up using 
more fluent idioms like the builder pattern, but at that point one might as 
well make the configurati
 on object the operation itself, which is what Beam does.&lt;/p&gt;
-
-&lt;h2 id=&quot;type-safety&quot;&gt;Type Safety&lt;/h2&gt;
-&lt;p&gt;Many operations can only be applied to collections whose elements are 
of a specific type. For example, the GroupByKey operation should only be 
applied to &lt;code 
class=&quot;highlighter-rouge&quot;&gt;PCollection&amp;lt;KV&amp;lt;K, 
V&amp;gt;&amp;gt;&lt;/code&gt;s. In Java at least, it’s not possible to 
restrict methods based on the element type parameter alone. In FlumeJava, this 
led us to add a &lt;code 
class=&quot;highlighter-rouge&quot;&gt;PTable&amp;lt;K, V&amp;gt;&lt;/code&gt; 
subclassing &lt;code 
class=&quot;highlighter-rouge&quot;&gt;PCollection&amp;lt;KV&amp;lt;K, 
V&amp;gt;&amp;gt;&lt;/code&gt; to contain all the operations specific to 
PCollections of key-value pairs. This leads to the same question of which 
element types are special enough to merit being captured by PCollection 
subclasses. It is not very extensible for third parties and often requires 
manual downcasts/conversions (which can’t be safely chained in Java) and 
special operations that produce thes
 e PCollection specializations.&lt;/p&gt;
-
-&lt;p&gt;This is particularly inconvenient for transforms that produce outputs 
whose element types are the same as (or related to) their input’s element 
types, requiring extra support to generate the right subclasses (e.g. a filter 
on a PTable should produce another PTable rather than just a raw PCollection of 
key-value pairs).&lt;/p&gt;
-
-&lt;p&gt;Using PTransforms allows us to sidestep this entire issue. We can 
place arbitrary constraints on the context in which a transform may be used 
based on the type of its inputs; for instance GroupByKey is statically typed to 
only apply to a &lt;code 
class=&quot;highlighter-rouge&quot;&gt;PCollection&amp;lt;KV&amp;lt;K, 
V&amp;gt;&amp;gt;&lt;/code&gt;. The way this happens is generalizable to 
arbitrary shapes, without needing to introduce specialized types like 
PTable.&lt;/p&gt;
-
-&lt;h2 id=&quot;reusability-and-structure&quot;&gt;Reusability and 
Structure&lt;/h2&gt;
-&lt;p&gt;Though PTransforms are generally constructed at the site at which 
they’re used, by pulling them out as separate objects one is able to store 
them and pass them around.&lt;/p&gt;
-
-&lt;p&gt;As pipelines grow and evolve, it is useful to structure your pipeline 
into modular, often reusable components, and PTransforms allow one to do this 
nicely in a data-processing pipeline. In addition, modular PTransforms also 
expose the logical structure of your code to the system (e.g. for monitoring). 
Of the three different representations of the WordCount pipeline below, only 
the structured view captures the high-level intent of the pipeline. Letting 
even the simple operations be PTransforms means there’s less of an abrupt 
edge to packaging things up into composite operations.&lt;/p&gt;
-
-&lt;p&gt;&lt;img class=&quot;center-block&quot; 
src=&quot;/images/blog/simple-wordcount-pipeline.png&quot; alt=&quot;Three 
different visualizations of a simple WordCount pipeline&quot; 
width=&quot;500&quot; /&gt;&lt;/p&gt;
-
-&lt;div class=&quot;text-center&quot;&gt;
-&lt;i&gt;Three different visualizations of a simple WordCount pipeline which 
computes the number of occurrences of every word in a set of text files. The 
flat view gives the full DAG of all operations performed. The execution view 
groups operations according to how they're executed, e.g. after performing 
runner-specific optimizations like function composition. The structured view 
nests operations according to their grouping in PTransforms.&lt;/i&gt;
-&lt;/div&gt;
-
-&lt;h2 id=&quot;summary&quot;&gt;Summary&lt;/h2&gt;
-&lt;p&gt;Although it’s tempting to add methods to PCollections, such an 
approach is not scalable, extensible, or sufficiently expressive. Putting a 
single apply method on PCollection and all the logic into the operation itself 
lets us have the best of both worlds, and avoids hard cliffs of complexity by 
having a single consistent style across simple and complex pipelines, and 
between predefined and user-defined operations.&lt;/p&gt;
-</description>
-        <pubDate>Fri, 27 May 2016 09:00:00 -0700</pubDate>
-        
<link>https://beam.apache.org/blog/2016/05/27/where-is-my-pcollection-dot-map.html</link>
-        <guid 
isPermaLink="true">https://beam.apache.org/blog/2016/05/27/where-is-my-pcollection-dot-map.html</guid>
-        
-        
-        <category>blog</category>
-        
-      </item>
-    
   </channel>
 </rss>

http://git-wip-us.apache.org/repos/asf/beam-site/blob/4acb6411/content/index.html
----------------------------------------------------------------------
diff --git a/content/index.html b/content/index.html
index d160252..dcb2f23 100644
--- a/content/index.html
+++ b/content/index.html
@@ -176,6 +176,8 @@
     <h2>Blog</h2>
     <div class="list-group">
     
+    <a class="list-group-item" 
href="/blog/2017/03/16/python-sdk-release.html">Mar 16, 2017 - Python SDK 
released in Apache Beam 0.6.0</a>
+    
     <a class="list-group-item" 
href="/blog/2017/02/13/stateful-processing.html">Feb 13, 2017 - Stateful 
processing with Apache Beam</a>
     
     <a class="list-group-item" 
href="/blog/2017/02/01/graduation-media-recap.html">Feb 1, 2017 - Media recap 
of the Apache Beam graduation</a>
@@ -188,8 +190,6 @@
     
     <a class="list-group-item" 
href="/beam/update/2016/10/11/strata-hadoop-world-and-beam.html">Oct 11, 2016 - 
Strata+Hadoop World and Beam</a>
     
-    <a class="list-group-item" href="/blog/2016/08/03/six-months.html">Aug 3, 
2016 - Apache Beam: Six Months in Incubation</a>
-    
     </div>
   </div>
   <div class="col-md-6">

Reply via email to