http://git-wip-us.apache.org/repos/asf/arrow-site/blob/3cd84682/build/blog/2017/08/16/0.6.0-release/index.html
----------------------------------------------------------------------
diff --git a/build/blog/2017/08/16/0.6.0-release/index.html 
b/build/blog/2017/08/16/0.6.0-release/index.html
new file mode 100644
index 0000000..7c7d1f9
--- /dev/null
+++ b/build/blog/2017/08/16/0.6.0-release/index.html
@@ -0,0 +1,235 @@
+<!DOCTYPE html>
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <title>Apache Arrow Homepage</title>
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="generator" content="Jekyll v3.4.3">
+    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
+    <link rel="icon" type="image/x-icon" href="/favicon.ico">
+
+    <link rel="stylesheet" 
href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+
+    <link href="/css/main.css" rel="stylesheet">
+    <link href="/css/syntax.css" rel="stylesheet">
+    <script src="https://code.jquery.com/jquery-3.2.1.min.js";
+            integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4="
+            crossorigin="anonymous"></script>
+    <script src="/assets/javascripts/bootstrap.min.js"></script>
+    
+    <!-- Global Site Tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments)};
+  gtag('js', new Date());
+
+  gtag('config', 'UA-107500873-1');
+</script>
+
+    
+  </head>
+
+
+
+<body class="wrap">
+  <div class="container">
+    <nav class="navbar navbar-default">
+  <div class="container-fluid">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target="#arrow-navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a class="navbar-brand" href="/">Apache 
Arrow&#8482;&nbsp;&nbsp;&nbsp;</a>
+    </div>
+
+    <!-- Collect the nav links, forms, and other content for toggling -->
+    <div class="collapse navbar-collapse" id="arrow-navbar">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Project Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/install/">Install</a></li>
+            <li><a href="/blog/">Blog</a></li>
+            <li><a href="/release/">Releases</a></li>
+            <li><a href="https://issues.apache.org/jira/browse/ARROW";>Issue 
Tracker</a></li>
+            <li><a href="https://github.com/apache/arrow";>Source Code</a></li>
+            <li><a 
href="http://mail-archives.apache.org/mod_mbox/arrow-dev/";>Mailing List</a></li>
+            <li><a href="https://apachearrowslackin.herokuapp.com";>Slack 
Channel</a></li>
+            <li><a href="/committers/">Committers</a></li>
+            <li><a href="/powered_by/">Powered By</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Specification<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/memory_layout.html">Memory Layout</a></li>
+            <li><a href="/docs/metadata.html">Metadata</a></li>
+            <li><a href="/docs/ipc.html">Messaging / IPC</a></li>
+          </ul>
+        </li>
+
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Documentation<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/python">Python</a></li>
+            <li><a href="/docs/cpp">C++ API</a></li>
+            <li><a href="/docs/java">Java API</a></li>
+            <li><a href="/docs/c_glib">C GLib API</a></li>
+            <li><a href="/docs/js">Javascript API</a></li>
+          </ul>
+        </li>
+        <!-- <li><a href="/blog">Blog</a></li> -->
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">ASF Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="http://www.apache.org/";>ASF Website</a></li>
+            <li><a href="http://www.apache.org/licenses/";>License</a></li>
+            <li><a 
href="http://www.apache.org/foundation/sponsorship.html";>Donate</a></li>
+            <li><a 
href="http://www.apache.org/foundation/thanks.html";>Thanks</a></li>
+            <li><a href="http://www.apache.org/security/";>Security</a></li>
+          </ul>
+        </li>
+      </ul>
+      <a href="http://www.apache.org/";>
+        <img style="float:right;" src="/img/asf_logo.svg" width="120px"/>
+      </a>
+      </div><!-- /.navbar-collapse -->
+    </div>
+  </nav>
+
+
+    <h2>
+      Apache Arrow 0.6.0 Release
+      <a href="/blog/2017/08/16/0.6.0-release/" class="permalink" 
title="Permalink">∞</a>
+    </h2>
+
+    
+
+    <div class="panel">
+      <div class="panel-body">
+        <div>
+          <span class="label label-default">Published</span>
+          <span class="published">
+            <i class="fa fa-calendar"></i>
+            16 Aug 2017
+          </span>
+        </div>
+        <div>
+          <span class="label label-default">By</span>
+          <a href="http://wesmckinney.com";><i class="fa fa-user"></i> Wes 
McKinney (wesm)</a>
+        </div>
+      </div>
+    </div>
+
+    <!--
+
+-->
+
+<p>The Apache Arrow team is pleased to announce the 0.6.0 release. It includes
+<a 
href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.6.0"><strong>90
 resolved JIRAs</strong></a> with the new Plasma shared memory object store, and
+improvements and bug fixes to the various language implementations. The Arrow
+memory format remains stable since the 0.3.x release.</p>
+
+<p>See the <a href="http://arrow.apache.org/install";>Install Page</a> to learn 
how to get the libraries for your
+platform. The <a href="http://arrow.apache.org/release/0.6.0.html";>complete 
changelog</a> is also available.</p>
+
+<h2 id="plasma-shared-memory-object-store">Plasma Shared Memory Object 
Store</h2>
+
+<p>This release includes the <a 
href="http://arrow.apache.org/blog/2017/08/08/plasma-in-memory-object-store/";>Plasma
 Store</a>, which you can read more about in
+the linked blog post. This system was originally developed as part of the <a 
href="https://ray-project.github.io/ray/";>Ray
+Project</a> at the <a href="https://rise.cs.berkeley.edu/";>UC Berkeley 
RISELab</a>. We recognized that Plasma would be
+highly valuable to the Arrow community as a tool for shared memory management
+and zero-copy deserialization. Additionally, we believe we will be able to
+develop a stronger software stack through sharing of IO and buffer management
+code.</p>
+
+<p>The Plasma store is a server application which runs as a separate process. A
+reference C++ client, with Python bindings, is made available in this
+release. Clients can be developed in Java or other languages in the future to
+enable simple sharing of complex datasets through shared memory.</p>
+
+<h2 id="arrow-format-addition-map-type">Arrow Format Addition: Map type</h2>
+
+<p>We added a Map logical type to represent ordered and unordered maps
+in-memory. This corresponds to the <code class="highlighter-rouge">MAP</code> 
logical type annotation in the Parquet
+format (where maps are represented as repeated structs).</p>
+
+<p>Map is represented as a list of structs. It is the first example of a 
logical
+type whose physical representation is a nested type. We have not yet created
+implementations of Map containers in any of the implementations, but this can
+be done in a future release.</p>
+
+<p>As an example, the Python data:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>data = [{'a': 1, 
'bb': 2, 'cc': 3}, {'dddd': 4}]
+</code></pre>
+</div>
+
+<p>Could be represented in an Arrow <code 
class="highlighter-rouge">Map&lt;String, Int32&gt;</code> as:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>Map&lt;String, 
Int32&gt; = List&lt;Struct&lt;keys: String, values: Int32&gt;&gt;
+  is_valid: [true, true]
+  offsets: [0, 3, 4]
+  values: Struct&lt;keys: String, values: Int32&gt;
+    children:
+      - keys: String
+          is_valid: [true, true, true, true]
+          offsets: [0, 1, 3, 5, 9]
+          data: abbccdddd
+      - values: Int32
+          is_valid: [true, true, true, true]
+          data: [1, 2, 3, 4]
+</code></pre>
+</div>
+<h2 id="python-changes">Python Changes</h2>
+
+<p>Some highlights of Python development outside of bug fixes and general API
+improvements include:</p>
+
+<ul>
+  <li>New <code class="highlighter-rouge">strings_to_categorical=True</code> 
option when calling <code class="highlighter-rouge">Table.to_pandas</code> will
+yield pandas <code class="highlighter-rouge">Categorical</code> types from 
Arrow binary and string columns</li>
+  <li>Expanded Hadoop Filesystem (HDFS) functionality to improve compatibility 
with
+Dask and other HDFS-aware Python libraries.</li>
+  <li>s3fs and other Dask-oriented filesystems can now be used with
+<code class="highlighter-rouge">pyarrow.parquet.ParquetDataset</code></li>
+  <li>More graceful handling of pandas’s nanosecond timestamps when writing 
to
+Parquet format. You can now pass <code 
class="highlighter-rouge">coerce_timestamps='ms'</code> to cast to
+milliseconds, or <code class="highlighter-rouge">'us'</code> for 
microseconds.</li>
+</ul>
+
+<h2 id="toward-arrow-100-and-beyond">Toward Arrow 1.0.0 and Beyond</h2>
+
+<p>We are still discussing the roadmap to 1.0.0 release on the <a 
href="http://mail-archives.apache.org/mod_mbox/arrow-dev/";>developer mailing
+list</a>. The focus of the 1.0.0 release will likely be memory format stability
+and hardening integration tests across the remaining data types implemented in
+Java and C++. Please join the discussion there.</p>
+
+
+
+    <hr/>
+<footer class="footer">
+  <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache 
Arrow project logo are either registered trademarks or trademarks of The Apache 
Software Foundation in the United States and other countries.</p>
+  <p>&copy; 2017 Apache Software Foundation</p>
+</footer>
+
+  </div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/arrow-site/blob/3cd84682/build/blog/2017/09/18/0.7.0-release/index.html
----------------------------------------------------------------------
diff --git a/build/blog/2017/09/18/0.7.0-release/index.html 
b/build/blog/2017/09/18/0.7.0-release/index.html
new file mode 100644
index 0000000..6504954
--- /dev/null
+++ b/build/blog/2017/09/18/0.7.0-release/index.html
@@ -0,0 +1,311 @@
+<!DOCTYPE html>
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <title>Apache Arrow Homepage</title>
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="generator" content="Jekyll v3.4.3">
+    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
+    <link rel="icon" type="image/x-icon" href="/favicon.ico">
+
+    <link rel="stylesheet" 
href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+
+    <link href="/css/main.css" rel="stylesheet">
+    <link href="/css/syntax.css" rel="stylesheet">
+    <script src="https://code.jquery.com/jquery-3.2.1.min.js";
+            integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4="
+            crossorigin="anonymous"></script>
+    <script src="/assets/javascripts/bootstrap.min.js"></script>
+    
+    <!-- Global Site Tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments)};
+  gtag('js', new Date());
+
+  gtag('config', 'UA-107500873-1');
+</script>
+
+    
+  </head>
+
+
+
+<body class="wrap">
+  <div class="container">
+    <nav class="navbar navbar-default">
+  <div class="container-fluid">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target="#arrow-navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a class="navbar-brand" href="/">Apache 
Arrow&#8482;&nbsp;&nbsp;&nbsp;</a>
+    </div>
+
+    <!-- Collect the nav links, forms, and other content for toggling -->
+    <div class="collapse navbar-collapse" id="arrow-navbar">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Project Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/install/">Install</a></li>
+            <li><a href="/blog/">Blog</a></li>
+            <li><a href="/release/">Releases</a></li>
+            <li><a href="https://issues.apache.org/jira/browse/ARROW";>Issue 
Tracker</a></li>
+            <li><a href="https://github.com/apache/arrow";>Source Code</a></li>
+            <li><a 
href="http://mail-archives.apache.org/mod_mbox/arrow-dev/";>Mailing List</a></li>
+            <li><a href="https://apachearrowslackin.herokuapp.com";>Slack 
Channel</a></li>
+            <li><a href="/committers/">Committers</a></li>
+            <li><a href="/powered_by/">Powered By</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Specification<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/memory_layout.html">Memory Layout</a></li>
+            <li><a href="/docs/metadata.html">Metadata</a></li>
+            <li><a href="/docs/ipc.html">Messaging / IPC</a></li>
+          </ul>
+        </li>
+
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Documentation<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/python">Python</a></li>
+            <li><a href="/docs/cpp">C++ API</a></li>
+            <li><a href="/docs/java">Java API</a></li>
+            <li><a href="/docs/c_glib">C GLib API</a></li>
+          </ul>
+        </li>
+        <!-- <li><a href="/blog">Blog</a></li> -->
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">ASF Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="http://www.apache.org/";>ASF Website</a></li>
+            <li><a href="http://www.apache.org/licenses/";>License</a></li>
+            <li><a 
href="http://www.apache.org/foundation/sponsorship.html";>Donate</a></li>
+            <li><a 
href="http://www.apache.org/foundation/thanks.html";>Thanks</a></li>
+            <li><a href="http://www.apache.org/security/";>Security</a></li>
+          </ul>
+        </li>
+      </ul>
+      <a href="http://www.apache.org/";>
+        <img style="float:right;" src="/img/asf_logo.svg" width="120px"/>
+      </a>
+      </div><!-- /.navbar-collapse -->
+    </div>
+  </nav>
+
+
+    <h2>
+      Apache Arrow 0.7.0 Release
+      <a href="/blog/2017/09/18/0.7.0-release/" class="permalink" 
title="Permalink">∞</a>
+    </h2>
+
+    
+
+    <div class="panel">
+      <div class="panel-body">
+        <div>
+          <span class="label label-default">Published</span>
+          <span class="published">
+            <i class="fa fa-calendar"></i>
+            18 Sep 2017
+          </span>
+        </div>
+        <div>
+          <span class="label label-default">By</span>
+          <a href="http://wesmckinney.com";><i class="fa fa-user"></i> Wes 
McKinney (wesm)</a>
+        </div>
+      </div>
+    </div>
+
+    <!--
+
+-->
+
+<p>The Apache Arrow team is pleased to announce the 0.7.0 release. It includes
+<a 
href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.7.0"><strong>133
 resolved JIRAs</strong></a> many new features and bug fixes to the various
+language implementations. The Arrow memory format remains stable since the
+0.3.x release.</p>
+
+<p>See the <a href="http://arrow.apache.org/install";>Install Page</a> to learn 
how to get the libraries for your
+platform. The <a href="http://arrow.apache.org/release/0.7.0.html";>complete 
changelog</a> is also available.</p>
+
+<p>We include some highlights from the release in this post.</p>
+
+<h2 id="new-pmc-member-kouhei-sutou">New PMC Member: Kouhei Sutou</h2>
+
+<p>Since the last release we have added <a 
href="https://github.com/kou";>Kou</a> to the Arrow Project Management
+Committee. He is also a PMC for Apache Subversion, and a major contributor to
+many other open source projects.</p>
+
+<p>As an active member of the Ruby community in Japan, Kou has been developing 
the
+GLib-based C bindings for Arrow with associated Ruby wrappers, to enable Ruby
+users to benefit from the work that’s happening in Apache Arrow.</p>
+
+<p>We are excited to be collaborating with the Ruby community on shared
+infrastructure for in-memory analytics and data science.</p>
+
+<h2 id="expanded-javascript-typescript-implementation">Expanded JavaScript 
(TypeScript) Implementation</h2>
+
+<p><a href="https://github.com/trxcllnt";>Paul Taylor</a> from the <a 
href="https://github.com/netflix/falcor";>Falcor</a> and <a 
href="http://reactivex.io";>ReactiveX</a> projects has worked to
+expand the JavaScript implementation (which is written in TypeScript), using
+the latest in modern JavaScript build and packaging technology. We are looking
+forward to building out the JS implementation and bringing it up to full
+functionality with the C++ and Java implementations.</p>
+
+<p>We are looking for more JavaScript developers to join the project and work
+together to make Arrow for JS work well with many kinds of front end use cases,
+like real time data visualization.</p>
+
+<h2 id="type-casting-for-c-and-python">Type casting for C++ and Python</h2>
+
+<p>As part of longer-term efforts to build an Arrow-native in-memory analytics
+library, we implemented a variety of type conversion functions. These functions
+are essential in ETL tasks when conforming one table schema to another. These
+are similar to the <code class="highlighter-rouge">astype</code> function in 
NumPy.</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">In</span> <span class="p">[</span><span 
class="mi">17</span><span class="p">]:</span> <span class="kn">import</span> 
<span class="nn">pyarrow</span> <span class="kn">as</span> <span 
class="nn">pa</span>
+
+<span class="n">In</span> <span class="p">[</span><span 
class="mi">18</span><span class="p">]:</span> <span class="n">arr</span> <span 
class="o">=</span> <span class="n">pa</span><span class="o">.</span><span 
class="n">array</span><span class="p">([</span><span 
class="bp">True</span><span class="p">,</span> <span 
class="bp">False</span><span class="p">,</span> <span 
class="bp">None</span><span class="p">,</span> <span 
class="bp">True</span><span class="p">])</span>
+
+<span class="n">In</span> <span class="p">[</span><span 
class="mi">19</span><span class="p">]:</span> <span class="n">arr</span>
+<span class="n">Out</span><span class="p">[</span><span 
class="mi">19</span><span class="p">]:</span>
+<span class="o">&lt;</span><span class="n">pyarrow</span><span 
class="o">.</span><span class="n">lib</span><span class="o">.</span><span 
class="n">BooleanArray</span> <span class="nb">object</span> <span 
class="n">at</span> <span class="mh">0x7ff6fb069b88</span><span 
class="o">&gt;</span>
+<span class="p">[</span>
+  <span class="bp">True</span><span class="p">,</span>
+  <span class="bp">False</span><span class="p">,</span>
+  <span class="n">NA</span><span class="p">,</span>
+  <span class="bp">True</span>
+<span class="p">]</span>
+
+<span class="n">In</span> <span class="p">[</span><span 
class="mi">20</span><span class="p">]:</span> <span class="n">arr</span><span 
class="o">.</span><span class="n">cast</span><span class="p">(</span><span 
class="n">pa</span><span class="o">.</span><span class="n">int32</span><span 
class="p">())</span>
+<span class="n">Out</span><span class="p">[</span><span 
class="mi">20</span><span class="p">]:</span>
+<span class="o">&lt;</span><span class="n">pyarrow</span><span 
class="o">.</span><span class="n">lib</span><span class="o">.</span><span 
class="n">Int32Array</span> <span class="nb">object</span> <span 
class="n">at</span> <span class="mh">0x7ff6fb0383b8</span><span 
class="o">&gt;</span>
+<span class="p">[</span>
+  <span class="mi">1</span><span class="p">,</span>
+  <span class="mi">0</span><span class="p">,</span>
+  <span class="n">NA</span><span class="p">,</span>
+  <span class="mi">1</span>
+<span class="p">]</span>
+</code></pre>
+</div>
+
+<p>Over time these will expand to support as many input-and-output type
+combinations with optimized conversions.</p>
+
+<h2 id="new-arrow-gpu-cuda-extension-library-for-c">New Arrow GPU (CUDA) 
Extension Library for C++</h2>
+
+<p>To help with GPU-related projects using Arrow, like the <a 
href="http://gpuopenanalytics.com/";>GPU Open Analytics
+Initiative</a>, we have started a C++ add-on library to simplify Arrow memory
+management on CUDA-enabled graphics cards. We would like to expand this to
+include a library of reusable CUDA kernel functions for GPU analytics on Arrow
+columnar memory.</p>
+
+<p>For example, we could write a record batch from CPU memory to GPU device 
memory
+like so (some error checking omitted):</p>
+
+<div class="language-c++ highlighter-rouge"><pre class="highlight"><code><span 
class="cp">#include &lt;arrow/api.h&gt;
+#include &lt;arrow/gpu/cuda_api.h&gt;
+</span>
+<span class="k">using</span> <span class="k">namespace</span> <span 
class="n">arrow</span><span class="p">;</span>
+
+<span class="n">gpu</span><span class="o">::</span><span 
class="n">CudaDeviceManager</span><span class="o">*</span> <span 
class="n">manager</span><span class="p">;</span>
+<span class="n">std</span><span class="o">::</span><span 
class="n">shared_ptr</span><span class="o">&lt;</span><span 
class="n">gpu</span><span class="o">::</span><span 
class="n">CudaContext</span><span class="o">&gt;</span> <span 
class="n">context</span><span class="p">;</span>
+
+<span class="n">gpu</span><span class="o">::</span><span 
class="n">CudaDeviceManager</span><span class="o">::</span><span 
class="n">GetInstance</span><span class="p">(</span><span 
class="o">&amp;</span><span class="n">manager</span><span class="p">)</span>
+<span class="n">manager_</span><span class="o">-&gt;</span><span 
class="n">GetContext</span><span class="p">(</span><span 
class="n">kGpuNumber</span><span class="p">,</span> <span 
class="o">&amp;</span><span class="n">context</span><span class="p">);</span>
+
+<span class="n">std</span><span class="o">::</span><span 
class="n">shared_ptr</span><span class="o">&lt;</span><span 
class="n">RecordBatch</span><span class="o">&gt;</span> <span 
class="n">batch</span> <span class="o">=</span> <span 
class="n">GetCpuData</span><span class="p">();</span>
+
+<span class="n">std</span><span class="o">::</span><span 
class="n">shared_ptr</span><span class="o">&lt;</span><span 
class="n">gpu</span><span class="o">::</span><span 
class="n">CudaBuffer</span><span class="o">&gt;</span> <span 
class="n">device_serialized</span><span class="p">;</span>
+<span class="n">gpu</span><span class="o">::</span><span 
class="n">SerializeRecordBatch</span><span class="p">(</span><span 
class="o">*</span><span class="n">batch</span><span class="p">,</span> <span 
class="n">context_</span><span class="p">.</span><span 
class="n">get</span><span class="p">(),</span> <span 
class="o">&amp;</span><span class="n">device_serialized</span><span 
class="p">));</span>
+</code></pre>
+</div>
+
+<p>We can then “read” the GPU record batch, but the returned <code 
class="highlighter-rouge">arrow::RecordBatch</code>
+internally will contain GPU device pointers that you can use for CUDA kernel
+calls:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>std::shared_ptr&lt;RecordBatch&gt; device_batch;
+gpu::ReadRecordBatch(batch-&gt;schema(), device_serialized,
+                     default_memory_pool(), &amp;device_batch));
+
+// Now run some CUDA kernels on device_batch
+</code></pre>
+</div>
+
+<h2 id="decimal-integration-tests">Decimal Integration Tests</h2>
+
+<p><a href="http://github.com/cpcloud";>Phillip Cloud</a> has been working on 
decimal support in C++ to enable Parquet
+read/write support in C++ and Python, and also end-to-end testing against the
+Arrow Java libraries.</p>
+
+<p>In the upcoming releases, we hope to complete the remaining data types that
+need end-to-end testing between Java and C++:</p>
+
+<ul>
+  <li>Fixed size lists (variable-size lists already implemented)</li>
+  <li>Fixes size binary</li>
+  <li>Unions</li>
+  <li>Maps</li>
+  <li>Time intervals</li>
+</ul>
+
+<h2 id="other-notable-python-changes">Other Notable Python Changes</h2>
+
+<p>Some highlights of Python development outside of bug fixes and general API
+improvements include:</p>
+
+<ul>
+  <li>Simplified <code class="highlighter-rouge">put</code> and <code 
class="highlighter-rouge">get</code> arbitrary Python objects in Plasma 
objects</li>
+  <li><a href="http://arrow.apache.org/docs/python/ipc.html";>High-speed, 
memory efficient object serialization</a>. This is important
+enough that we will likely write a dedicated blog post about it.</li>
+  <li>New <code class="highlighter-rouge">flavor='spark'</code> option to 
<code class="highlighter-rouge">pyarrow.parquet.write_table</code> to enable 
easy
+writing of Parquet files maximized for Spark compatibility</li>
+  <li><code class="highlighter-rouge">parquet.write_to_dataset</code> function 
with support for partitioned writes</li>
+  <li>Improved support for Dask filesystems</li>
+  <li>Improved Python usability for IPC: read and write schemas and record 
batches
+more easily. See the <a 
href="http://arrow.apache.org/docs/python/api.html";>API docs</a> for more about 
these.</li>
+</ul>
+
+<h2 id="the-road-ahead">The Road Ahead</h2>
+
+<p>Upcoming Arrow releases will continue to expand the project to cover more 
use
+cases. In addition to completing end-to-end testing for all the major data
+types, some of us will be shifting attention to building Arrow-native in-memory
+analytics libraries.</p>
+
+<p>We are looking for more JavaScript, R, and other programming language
+developers to join the project and expand the available implementations and
+bindings to more languages.</p>
+
+
+
+    <hr/>
+<footer class="footer">
+  <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache 
Arrow project logo are either registered trademarks or trademarks of The Apache 
Software Foundation in the United States and other countries.</p>
+  <p>&copy; 2017 Apache Software Foundation</p>
+</footer>
+
+  </div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/arrow-site/blob/3cd84682/build/blog/2017/09/19/0.7.0-release/index.html
----------------------------------------------------------------------
diff --git a/build/blog/2017/09/19/0.7.0-release/index.html 
b/build/blog/2017/09/19/0.7.0-release/index.html
new file mode 100644
index 0000000..809a31a
--- /dev/null
+++ b/build/blog/2017/09/19/0.7.0-release/index.html
@@ -0,0 +1,312 @@
+<!DOCTYPE html>
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <title>Apache Arrow Homepage</title>
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="generator" content="Jekyll v3.4.3">
+    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
+    <link rel="icon" type="image/x-icon" href="/favicon.ico">
+
+    <link rel="stylesheet" 
href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+
+    <link href="/css/main.css" rel="stylesheet">
+    <link href="/css/syntax.css" rel="stylesheet">
+    <script src="https://code.jquery.com/jquery-3.2.1.min.js";
+            integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4="
+            crossorigin="anonymous"></script>
+    <script src="/assets/javascripts/bootstrap.min.js"></script>
+    
+    <!-- Global Site Tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments)};
+  gtag('js', new Date());
+
+  gtag('config', 'UA-107500873-1');
+</script>
+
+    
+  </head>
+
+
+
+<body class="wrap">
+  <div class="container">
+    <nav class="navbar navbar-default">
+  <div class="container-fluid">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target="#arrow-navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a class="navbar-brand" href="/">Apache 
Arrow&#8482;&nbsp;&nbsp;&nbsp;</a>
+    </div>
+
+    <!-- Collect the nav links, forms, and other content for toggling -->
+    <div class="collapse navbar-collapse" id="arrow-navbar">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Project Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/install/">Install</a></li>
+            <li><a href="/blog/">Blog</a></li>
+            <li><a href="/release/">Releases</a></li>
+            <li><a href="https://issues.apache.org/jira/browse/ARROW";>Issue 
Tracker</a></li>
+            <li><a href="https://github.com/apache/arrow";>Source Code</a></li>
+            <li><a 
href="http://mail-archives.apache.org/mod_mbox/arrow-dev/";>Mailing List</a></li>
+            <li><a href="https://apachearrowslackin.herokuapp.com";>Slack 
Channel</a></li>
+            <li><a href="/committers/">Committers</a></li>
+            <li><a href="/powered_by/">Powered By</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Specification<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/memory_layout.html">Memory Layout</a></li>
+            <li><a href="/docs/metadata.html">Metadata</a></li>
+            <li><a href="/docs/ipc.html">Messaging / IPC</a></li>
+          </ul>
+        </li>
+
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Documentation<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/python">Python</a></li>
+            <li><a href="/docs/cpp">C++ API</a></li>
+            <li><a href="/docs/java">Java API</a></li>
+            <li><a href="/docs/c_glib">C GLib API</a></li>
+            <li><a href="/docs/js">Javascript API</a></li>
+          </ul>
+        </li>
+        <!-- <li><a href="/blog">Blog</a></li> -->
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">ASF Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="http://www.apache.org/";>ASF Website</a></li>
+            <li><a href="http://www.apache.org/licenses/";>License</a></li>
+            <li><a 
href="http://www.apache.org/foundation/sponsorship.html";>Donate</a></li>
+            <li><a 
href="http://www.apache.org/foundation/thanks.html";>Thanks</a></li>
+            <li><a href="http://www.apache.org/security/";>Security</a></li>
+          </ul>
+        </li>
+      </ul>
+      <a href="http://www.apache.org/";>
+        <img style="float:right;" src="/img/asf_logo.svg" width="120px"/>
+      </a>
+      </div><!-- /.navbar-collapse -->
+    </div>
+  </nav>
+
+
+    <h2>
+      Apache Arrow 0.7.0 Release
+      <a href="/blog/2017/09/19/0.7.0-release/" class="permalink" 
title="Permalink">∞</a>
+    </h2>
+
+    
+
+    <div class="panel">
+      <div class="panel-body">
+        <div>
+          <span class="label label-default">Published</span>
+          <span class="published">
+            <i class="fa fa-calendar"></i>
+            19 Sep 2017
+          </span>
+        </div>
+        <div>
+          <span class="label label-default">By</span>
+          <a href="http://wesmckinney.com";><i class="fa fa-user"></i> Wes 
McKinney (wesm)</a>
+        </div>
+      </div>
+    </div>
+
+    <!--
+
+-->
+
+<p>The Apache Arrow team is pleased to announce the 0.7.0 release. It includes
+<a 
href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.7.0"><strong>133
 resolved JIRAs</strong></a> many new features and bug fixes to the various
+language implementations. The Arrow memory format remains stable since the
+0.3.x release.</p>
+
+<p>See the <a href="http://arrow.apache.org/install";>Install Page</a> to learn 
how to get the libraries for your
+platform. The <a href="http://arrow.apache.org/release/0.7.0.html";>complete 
changelog</a> is also available.</p>
+
+<p>We include some highlights from the release in this post.</p>
+
+<h2 id="new-pmc-member-kouhei-sutou">New PMC Member: Kouhei Sutou</h2>
+
+<p>Since the last release we have added <a 
href="https://github.com/kou";>Kou</a> to the Arrow Project Management
+Committee. He is also a PMC for Apache Subversion, and a major contributor to
+many other open source projects.</p>
+
+<p>As an active member of the Ruby community in Japan, Kou has been developing 
the
+GLib-based C bindings for Arrow with associated Ruby wrappers, to enable Ruby
+users to benefit from the work that’s happening in Apache Arrow.</p>
+
+<p>We are excited to be collaborating with the Ruby community on shared
+infrastructure for in-memory analytics and data science.</p>
+
+<h2 id="expanded-javascript-typescript-implementation">Expanded JavaScript 
(TypeScript) Implementation</h2>
+
+<p><a href="https://github.com/trxcllnt";>Paul Taylor</a> from the <a 
href="https://github.com/netflix/falcor";>Falcor</a> and <a 
href="http://reactivex.io";>ReactiveX</a> projects has worked to
+expand the JavaScript implementation (which is written in TypeScript), using
+the latest in modern JavaScript build and packaging technology. We are looking
+forward to building out the JS implementation and bringing it up to full
+functionality with the C++ and Java implementations.</p>
+
+<p>We are looking for more JavaScript developers to join the project and work
+together to make Arrow for JS work well with many kinds of front end use cases,
+like real time data visualization.</p>
+
+<h2 id="type-casting-for-c-and-python">Type casting for C++ and Python</h2>
+
+<p>As part of longer-term efforts to build an Arrow-native in-memory analytics
+library, we implemented a variety of type conversion functions. These functions
+are essential in ETL tasks when conforming one table schema to another. These
+are similar to the <code class="highlighter-rouge">astype</code> function in 
NumPy.</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">In</span> <span class="p">[</span><span 
class="mi">17</span><span class="p">]:</span> <span class="kn">import</span> 
<span class="nn">pyarrow</span> <span class="kn">as</span> <span 
class="nn">pa</span>
+
+<span class="n">In</span> <span class="p">[</span><span 
class="mi">18</span><span class="p">]:</span> <span class="n">arr</span> <span 
class="o">=</span> <span class="n">pa</span><span class="o">.</span><span 
class="n">array</span><span class="p">([</span><span 
class="bp">True</span><span class="p">,</span> <span 
class="bp">False</span><span class="p">,</span> <span 
class="bp">None</span><span class="p">,</span> <span 
class="bp">True</span><span class="p">])</span>
+
+<span class="n">In</span> <span class="p">[</span><span 
class="mi">19</span><span class="p">]:</span> <span class="n">arr</span>
+<span class="n">Out</span><span class="p">[</span><span 
class="mi">19</span><span class="p">]:</span>
+<span class="o">&lt;</span><span class="n">pyarrow</span><span 
class="o">.</span><span class="n">lib</span><span class="o">.</span><span 
class="n">BooleanArray</span> <span class="nb">object</span> <span 
class="n">at</span> <span class="mh">0x7ff6fb069b88</span><span 
class="o">&gt;</span>
+<span class="p">[</span>
+  <span class="bp">True</span><span class="p">,</span>
+  <span class="bp">False</span><span class="p">,</span>
+  <span class="n">NA</span><span class="p">,</span>
+  <span class="bp">True</span>
+<span class="p">]</span>
+
+<span class="n">In</span> <span class="p">[</span><span 
class="mi">20</span><span class="p">]:</span> <span class="n">arr</span><span 
class="o">.</span><span class="n">cast</span><span class="p">(</span><span 
class="n">pa</span><span class="o">.</span><span class="n">int32</span><span 
class="p">())</span>
+<span class="n">Out</span><span class="p">[</span><span 
class="mi">20</span><span class="p">]:</span>
+<span class="o">&lt;</span><span class="n">pyarrow</span><span 
class="o">.</span><span class="n">lib</span><span class="o">.</span><span 
class="n">Int32Array</span> <span class="nb">object</span> <span 
class="n">at</span> <span class="mh">0x7ff6fb0383b8</span><span 
class="o">&gt;</span>
+<span class="p">[</span>
+  <span class="mi">1</span><span class="p">,</span>
+  <span class="mi">0</span><span class="p">,</span>
+  <span class="n">NA</span><span class="p">,</span>
+  <span class="mi">1</span>
+<span class="p">]</span>
+</code></pre>
+</div>
+
+<p>Over time these will expand to support as many input-and-output type
+combinations with optimized conversions.</p>
+
+<h2 id="new-arrow-gpu-cuda-extension-library-for-c">New Arrow GPU (CUDA) 
Extension Library for C++</h2>
+
+<p>To help with GPU-related projects using Arrow, like the <a 
href="http://gpuopenanalytics.com/";>GPU Open Analytics
+Initiative</a>, we have started a C++ add-on library to simplify Arrow memory
+management on CUDA-enabled graphics cards. We would like to expand this to
+include a library of reusable CUDA kernel functions for GPU analytics on Arrow
+columnar memory.</p>
+
+<p>For example, we could write a record batch from CPU memory to GPU device 
memory
+like so (some error checking omitted):</p>
+
+<div class="language-c++ highlighter-rouge"><pre class="highlight"><code><span 
class="cp">#include &lt;arrow/api.h&gt;
+#include &lt;arrow/gpu/cuda_api.h&gt;
+</span>
+<span class="k">using</span> <span class="k">namespace</span> <span 
class="n">arrow</span><span class="p">;</span>
+
+<span class="n">gpu</span><span class="o">::</span><span 
class="n">CudaDeviceManager</span><span class="o">*</span> <span 
class="n">manager</span><span class="p">;</span>
+<span class="n">std</span><span class="o">::</span><span 
class="n">shared_ptr</span><span class="o">&lt;</span><span 
class="n">gpu</span><span class="o">::</span><span 
class="n">CudaContext</span><span class="o">&gt;</span> <span 
class="n">context</span><span class="p">;</span>
+
+<span class="n">gpu</span><span class="o">::</span><span 
class="n">CudaDeviceManager</span><span class="o">::</span><span 
class="n">GetInstance</span><span class="p">(</span><span 
class="o">&amp;</span><span class="n">manager</span><span class="p">)</span>
+<span class="n">manager_</span><span class="o">-&gt;</span><span 
class="n">GetContext</span><span class="p">(</span><span 
class="n">kGpuNumber</span><span class="p">,</span> <span 
class="o">&amp;</span><span class="n">context</span><span class="p">);</span>
+
+<span class="n">std</span><span class="o">::</span><span 
class="n">shared_ptr</span><span class="o">&lt;</span><span 
class="n">RecordBatch</span><span class="o">&gt;</span> <span 
class="n">batch</span> <span class="o">=</span> <span 
class="n">GetCpuData</span><span class="p">();</span>
+
+<span class="n">std</span><span class="o">::</span><span 
class="n">shared_ptr</span><span class="o">&lt;</span><span 
class="n">gpu</span><span class="o">::</span><span 
class="n">CudaBuffer</span><span class="o">&gt;</span> <span 
class="n">device_serialized</span><span class="p">;</span>
+<span class="n">gpu</span><span class="o">::</span><span 
class="n">SerializeRecordBatch</span><span class="p">(</span><span 
class="o">*</span><span class="n">batch</span><span class="p">,</span> <span 
class="n">context_</span><span class="p">.</span><span 
class="n">get</span><span class="p">(),</span> <span 
class="o">&amp;</span><span class="n">device_serialized</span><span 
class="p">));</span>
+</code></pre>
+</div>
+
+<p>We can then “read” the GPU record batch, but the returned <code 
class="highlighter-rouge">arrow::RecordBatch</code>
+internally will contain GPU device pointers that you can use for CUDA kernel
+calls:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>std::shared_ptr&lt;RecordBatch&gt; device_batch;
+gpu::ReadRecordBatch(batch-&gt;schema(), device_serialized,
+                     default_memory_pool(), &amp;device_batch));
+
+// Now run some CUDA kernels on device_batch
+</code></pre>
+</div>
+
+<h2 id="decimal-integration-tests">Decimal Integration Tests</h2>
+
+<p><a href="http://github.com/cpcloud";>Phillip Cloud</a> has been working on 
decimal support in C++ to enable Parquet
+read/write support in C++ and Python, and also end-to-end testing against the
+Arrow Java libraries.</p>
+
+<p>In the upcoming releases, we hope to complete the remaining data types that
+need end-to-end testing between Java and C++:</p>
+
+<ul>
+  <li>Fixed size lists (variable-size lists already implemented)</li>
+  <li>Fixes size binary</li>
+  <li>Unions</li>
+  <li>Maps</li>
+  <li>Time intervals</li>
+</ul>
+
+<h2 id="other-notable-python-changes">Other Notable Python Changes</h2>
+
+<p>Some highlights of Python development outside of bug fixes and general API
+improvements include:</p>
+
+<ul>
+  <li>Simplified <code class="highlighter-rouge">put</code> and <code 
class="highlighter-rouge">get</code> arbitrary Python objects in Plasma 
objects</li>
+  <li><a href="http://arrow.apache.org/docs/python/ipc.html";>High-speed, 
memory efficient object serialization</a>. This is important
+enough that we will likely write a dedicated blog post about it.</li>
+  <li>New <code class="highlighter-rouge">flavor='spark'</code> option to 
<code class="highlighter-rouge">pyarrow.parquet.write_table</code> to enable 
easy
+writing of Parquet files maximized for Spark compatibility</li>
+  <li><code class="highlighter-rouge">parquet.write_to_dataset</code> function 
with support for partitioned writes</li>
+  <li>Improved support for Dask filesystems</li>
+  <li>Improved Python usability for IPC: read and write schemas and record 
batches
+more easily. See the <a 
href="http://arrow.apache.org/docs/python/api.html";>API docs</a> for more about 
these.</li>
+</ul>
+
+<h2 id="the-road-ahead">The Road Ahead</h2>
+
+<p>Upcoming Arrow releases will continue to expand the project to cover more 
use
+cases. In addition to completing end-to-end testing for all the major data
+types, some of us will be shifting attention to building Arrow-native in-memory
+analytics libraries.</p>
+
+<p>We are looking for more JavaScript, R, and other programming language
+developers to join the project and expand the available implementations and
+bindings to more languages.</p>
+
+
+
+    <hr/>
+<footer class="footer">
+  <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache 
Arrow project logo are either registered trademarks or trademarks of The Apache 
Software Foundation in the United States and other countries.</p>
+  <p>&copy; 2017 Apache Software Foundation</p>
+</footer>
+
+  </div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/arrow-site/blob/3cd84682/build/blog/2017/10/15/fast-python-serialization-with-ray-and-arrow/index.html
----------------------------------------------------------------------
diff --git 
a/build/blog/2017/10/15/fast-python-serialization-with-ray-and-arrow/index.html 
b/build/blog/2017/10/15/fast-python-serialization-with-ray-and-arrow/index.html
new file mode 100644
index 0000000..f35d7f3
--- /dev/null
+++ 
b/build/blog/2017/10/15/fast-python-serialization-with-ray-and-arrow/index.html
@@ -0,0 +1,430 @@
+<!DOCTYPE html>
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <title>Apache Arrow Homepage</title>
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="generator" content="Jekyll v3.4.3">
+    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
+    <link rel="icon" type="image/x-icon" href="/favicon.ico">
+
+    <link rel="stylesheet" 
href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+
+    <link href="/css/main.css" rel="stylesheet">
+    <link href="/css/syntax.css" rel="stylesheet">
+    <script src="https://code.jquery.com/jquery-3.2.1.min.js";
+            integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4="
+            crossorigin="anonymous"></script>
+    <script src="/assets/javascripts/bootstrap.min.js"></script>
+    
+    <!-- Global Site Tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments)};
+  gtag('js', new Date());
+
+  gtag('config', 'UA-107500873-1');
+</script>
+
+    
+  </head>
+
+
+
+<body class="wrap">
+  <div class="container">
+    <nav class="navbar navbar-default">
+  <div class="container-fluid">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target="#arrow-navbar">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a class="navbar-brand" href="/">Apache 
Arrow&#8482;&nbsp;&nbsp;&nbsp;</a>
+    </div>
+
+    <!-- Collect the nav links, forms, and other content for toggling -->
+    <div class="collapse navbar-collapse" id="arrow-navbar">
+      <ul class="nav navbar-nav">
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Project Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/install/">Install</a></li>
+            <li><a href="/blog/">Blog</a></li>
+            <li><a href="/release/">Releases</a></li>
+            <li><a href="https://issues.apache.org/jira/browse/ARROW";>Issue 
Tracker</a></li>
+            <li><a href="https://github.com/apache/arrow";>Source Code</a></li>
+            <li><a 
href="http://mail-archives.apache.org/mod_mbox/arrow-dev/";>Mailing List</a></li>
+            <li><a href="https://apachearrowslackin.herokuapp.com";>Slack 
Channel</a></li>
+            <li><a href="/committers/">Committers</a></li>
+            <li><a href="/powered_by/">Powered By</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Specification<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/memory_layout.html">Memory Layout</a></li>
+            <li><a href="/docs/metadata.html">Metadata</a></li>
+            <li><a href="/docs/ipc.html">Messaging / IPC</a></li>
+          </ul>
+        </li>
+
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">Documentation<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/docs/python">Python</a></li>
+            <li><a href="/docs/cpp">C++ API</a></li>
+            <li><a href="/docs/java">Java API</a></li>
+            <li><a href="/docs/c_glib">C GLib API</a></li>
+            <li><a href="/docs/js">Javascript API</a></li>
+          </ul>
+        </li>
+        <!-- <li><a href="/blog">Blog</a></li> -->
+        <li class="dropdown">
+          <a href="#" class="dropdown-toggle" data-toggle="dropdown"
+             role="button" aria-haspopup="true"
+             aria-expanded="false">ASF Links<span class="caret"></span>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="http://www.apache.org/";>ASF Website</a></li>
+            <li><a href="http://www.apache.org/licenses/";>License</a></li>
+            <li><a 
href="http://www.apache.org/foundation/sponsorship.html";>Donate</a></li>
+            <li><a 
href="http://www.apache.org/foundation/thanks.html";>Thanks</a></li>
+            <li><a href="http://www.apache.org/security/";>Security</a></li>
+          </ul>
+        </li>
+      </ul>
+      <a href="http://www.apache.org/";>
+        <img style="float:right;" src="/img/asf_logo.svg" width="120px"/>
+      </a>
+      </div><!-- /.navbar-collapse -->
+    </div>
+  </nav>
+
+
+    <h2>
+      Fast Python Serialization with Ray and Apache Arrow
+      <a href="/blog/2017/10/15/fast-python-serialization-with-ray-and-arrow/" 
class="permalink" title="Permalink">∞</a>
+    </h2>
+
+    
+
+    <div class="panel">
+      <div class="panel-body">
+        <div>
+          <span class="label label-default">Published</span>
+          <span class="published">
+            <i class="fa fa-calendar"></i>
+            15 Oct 2017
+          </span>
+        </div>
+        <div>
+          <span class="label label-default">By</span>
+          <a href="http://people.apache.org/~Philipp Moritz, Robert 
Nishihara"><i class="fa fa-user"></i>  (Philipp Moritz, Robert Nishihara)</a>
+        </div>
+      </div>
+    </div>
+
+    <!--
+
+-->
+
+<p><em>This was originally posted on the <a 
href="https://ray-project.github.io/";>Ray blog</a>. <a 
href="https://people.eecs.berkeley.edu/~pcmoritz/";>Philipp Moritz</a> and <a 
href="http://www.robertnishihara.com";>Robert Nishihara</a> are graduate 
students at UC Berkeley.</em></p>
+
+<p>This post elaborates on the integration between <a 
href="http://ray.readthedocs.io/en/latest/index.html";>Ray</a> and <a 
href="https://arrow.apache.org/";>Apache Arrow</a>.
+The main problem this addresses is <a 
href="https://en.wikipedia.org/wiki/Serialization";>data serialization</a>.</p>
+
+<p>From <a href="https://en.wikipedia.org/wiki/Serialization";>Wikipedia</a>, 
<strong>serialization</strong> is</p>
+
+<blockquote>
+  <p>… the process of translating data structures or object state into a 
format
+that can be stored … or transmitted … and reconstructed later (possibly
+in a different computer environment).</p>
+</blockquote>
+
+<p>Why is any translation necessary? Well, when you create a Python object, it 
may
+have pointers to other Python objects, and these objects are all allocated in
+different regions of memory, and all of this has to make sense when unpacked by
+another process on another machine.</p>
+
+<p>Serialization and deserialization are <strong>bottlenecks in parallel and 
distributed
+computing</strong>, especially in machine learning applications with large 
objects and
+large quantities of data.</p>
+
+<h2 id="design-goals">Design Goals</h2>
+
+<p>As Ray is optimized for machine learning and AI applications, we have 
focused a
+lot on serialization and data handling, with the following design goals:</p>
+
+<ol>
+  <li>It should be very efficient with <strong>large numerical data</strong> 
(this includes
+NumPy arrays and Pandas DataFrames, as well as objects that recursively contain
+Numpy arrays and Pandas DataFrames).</li>
+  <li>It should be about as fast as Pickle for <strong>general Python 
types</strong>.</li>
+  <li>It should be compatible with <strong>shared memory</strong>, allowing 
multiple processes
+to use the same data without copying it.</li>
+  <li><strong>Deserialization</strong> should be extremely fast (when 
possible, it should not
+require reading the entire serialized object).</li>
+  <li>It should be <strong>language independent</strong> (eventually we’d 
like to enable Python
+workers to use objects created by workers in Java or other languages and vice
+versa).</li>
+</ol>
+
+<h2 id="our-approach-and-alternatives">Our Approach and Alternatives</h2>
+
+<p>The go-to serialization approach in Python is the <strong>pickle</strong> 
module. Pickle is
+very general, especially if you use variants like <a 
href="https://github.com/cloudpipe/cloudpickle/";>cloudpickle</a>. However, it
+does not satisfy requirements 1, 3, 4, or 5. Alternatives like 
<strong>json</strong> satisfy
+5, but not 1-4.</p>
+
+<p><strong>Our Approach:</strong> To satisfy requirements 1-5, we chose to use 
the
+<a href="https://arrow.apache.org/";>Apache Arrow</a> format as our underlying 
data representation. In collaboration
+with the Apache Arrow team, we built <a 
href="https://arrow.apache.org/docs/python/ipc.html#arbitrary-object-serialization";>libraries</a>
 for mapping general Python
+objects to and from the Arrow format. Some properties of this approach:</p>
+
+<ul>
+  <li>The data layout is language independent (requirement 5).</li>
+  <li>Offsets into a serialized data blob can be computed in constant time 
without
+reading the full object (requirements 1 and 4).</li>
+  <li>Arrow supports <strong>zero-copy reads</strong>, so objects can 
naturally be stored in
+shared memory and used by multiple processes (requirements 1 and 3).</li>
+  <li>We can naturally fall back to pickle for anything we can’t handle well
+(requirement 2).</li>
+</ul>
+
+<p><strong>Alternatives to Arrow:</strong> We could have built on top of
+<a href="https://developers.google.com/protocol-buffers/";><strong>Protocol 
Buffers</strong></a>, but protocol buffers really isn’t designed for
+numerical data, and that approach wouldn’t satisfy 1, 3, or 4. Building on 
top
+of <a 
href="https://google.github.io/flatbuffers/";><strong>Flatbuffers</strong></a> 
actually could be made to work, but it would have
+required implementing a lot of the facilities that Arrow already has and we
+preferred a columnar data layout more optimized for big data.</p>
+
+<h2 id="speedups">Speedups</h2>
+
+<p>Here we show some performance improvements over Python’s pickle module. 
The
+experiments were done using <code 
class="highlighter-rouge">pickle.HIGHEST_PROTOCOL</code>. Code for generating 
these
+plots is included at the end of the post.</p>
+
+<p><strong>With NumPy arrays:</strong> In machine learning and AI 
applications, data (e.g.,
+images, neural network weights, text documents) are typically represented as
+data structures containing NumPy arrays. When using NumPy arrays, the speedups
+are impressive.</p>
+
+<p>The fact that the Ray bars for deserialization are barely visible is not a
+mistake. This is a consequence of the support for zero-copy reads (the savings
+largely come from the lack of memory movement).</p>
+
+<div align="center">
+<img src="/assets/fast_python_serialization_with_ray_and_arrow/speedups0.png" 
width="365" height="255" />
+<img src="/assets/fast_python_serialization_with_ray_and_arrow/speedups1.png" 
width="365" height="255" />
+</div>
+
+<p>Note that the biggest wins are with deserialization. The speedups here are
+multiple orders of magnitude and get better as the NumPy arrays get larger
+(thanks to design goals 1, 3, and 4). Making <strong>deserialization</strong> 
fast is
+important for two reasons. First, an object may be serialized once and then
+deserialized many times (e.g., an object that is broadcast to all workers).
+Second, a common pattern is for many objects to be serialized in parallel and
+then aggregated and deserialized one at a time on a single worker making
+deserialization the bottleneck.</p>
+
+<p><strong>Without NumPy arrays:</strong> When using regular Python objects, 
for which we
+cannot take advantage of shared memory, the results are comparable to 
pickle.</p>
+
+<div align="center">
+<img src="/assets/fast_python_serialization_with_ray_and_arrow/speedups2.png" 
width="365" height="255" />
+<img src="/assets/fast_python_serialization_with_ray_and_arrow/speedups3.png" 
width="365" height="255" />
+</div>
+
+<p>These are just a few examples of interesting Python objects. The most 
important
+case is the case where NumPy arrays are nested within other objects. Note that
+our serialization library works with very general Python types including custom
+Python classes and deeply nested objects.</p>
+
+<h2 id="the-api">The API</h2>
+
+<p>The serialization library can be used directly through pyarrow as follows. 
More
+documentation is available <a 
href="https://arrow.apache.org/docs/python/ipc.html#arbitrary-object-serialization";>here</a>.</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">x</span> <span class="o">=</span> <span 
class="p">[(</span><span class="mi">1</span><span class="p">,</span> <span 
class="mi">2</span><span class="p">),</span> <span 
class="s">'hello'</span><span class="p">,</span> <span class="mi">3</span><span 
class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span 
class="n">np</span><span class="o">.</span><span class="n">array</span><span 
class="p">([</span><span class="mf">5.0</span><span class="p">,</span> <span 
class="mf">6.0</span><span class="p">])]</span>
+<span class="n">serialized_x</span> <span class="o">=</span> <span 
class="n">pyarrow</span><span class="o">.</span><span 
class="n">serialize</span><span class="p">(</span><span class="n">x</span><span 
class="p">)</span><span class="o">.</span><span class="n">to_buffer</span><span 
class="p">()</span>
+<span class="n">deserialized_x</span> <span class="o">=</span> <span 
class="n">pyarrow</span><span class="o">.</span><span 
class="n">deserialize</span><span class="p">(</span><span 
class="n">serialized_x</span><span class="p">)</span>
+</code></pre>
+</div>
+
+<p>It can be used directly through the Ray API as follows.</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="n">x</span> <span class="o">=</span> <span 
class="p">[(</span><span class="mi">1</span><span class="p">,</span> <span 
class="mi">2</span><span class="p">),</span> <span 
class="s">'hello'</span><span class="p">,</span> <span class="mi">3</span><span 
class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span 
class="n">np</span><span class="o">.</span><span class="n">array</span><span 
class="p">([</span><span class="mf">5.0</span><span class="p">,</span> <span 
class="mf">6.0</span><span class="p">])]</span>
+<span class="n">x_id</span> <span class="o">=</span> <span 
class="n">ray</span><span class="o">.</span><span class="n">put</span><span 
class="p">(</span><span class="n">x</span><span class="p">)</span>
+<span class="n">deserialized_x</span> <span class="o">=</span> <span 
class="n">ray</span><span class="o">.</span><span class="n">get</span><span 
class="p">(</span><span class="n">x_id</span><span class="p">)</span>
+</code></pre>
+</div>
+
+<h2 id="data-representation">Data Representation</h2>
+
+<p>We use Apache Arrow as the underlying language-independent data layout. 
Objects
+are stored in two parts: a <strong>schema</strong> and a <strong>data 
blob</strong>. At a high level, the
+data blob is roughly a flattened concatenation of all of the data values
+recursively contained in the object, and the schema defines the types and
+nesting structure of the data blob.</p>
+
+<p><strong>Technical Details:</strong> Python sequences (e.g., dictionaries, 
lists, tuples,
+sets) are encoded as Arrow <a 
href="http://arrow.apache.org/docs/memory_layout.html#dense-union-type";>UnionArrays</a>
 of other types (e.g., bools, ints,
+strings, bytes, floats, doubles, date64s, tensors (i.e., NumPy arrays), lists,
+tuples, dicts and sets). Nested sequences are encoded using Arrow
+<a 
href="http://arrow.apache.org/docs/memory_layout.html#list-type";>ListArrays</a>.
 All tensors are collected and appended to the end of the
+serialized object, and the UnionArray contains references to these tensors.</p>
+
+<p>To give a concrete example, consider the following object.</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="p">[(</span><span class="mi">1</span><span 
class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span 
class="s">'hello'</span><span class="p">,</span> <span class="mi">3</span><span 
class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span 
class="n">np</span><span class="o">.</span><span class="n">array</span><span 
class="p">([</span><span class="mf">5.0</span><span class="p">,</span> <span 
class="mf">6.0</span><span class="p">])]</span>
+</code></pre>
+</div>
+
+<p>It would be represented in Arrow with the following structure.</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>UnionArray(type_ids=[tuple, string, int, int, ndarray],
+           tuples=ListArray(offsets=[0, 2],
+                            UnionArray(type_ids=[int, int],
+                                       ints=[1, 2])),
+           strings=['hello'],
+           ints=[3, 4],
+           ndarrays=[&lt;offset of numpy array&gt;])
+</code></pre>
+</div>
+
+<p>Arrow uses Flatbuffers to encode serialized schemas. <strong>Using only the 
schema, we
+can compute the offsets of each value in the data blob without scanning through
+the data blob</strong> (unlike Pickle, this is what enables fast 
deserialization). This
+means that we can avoid copying or otherwise converting large arrays and other
+values during deserialization. Tensors are appended at the end of the 
UnionArray
+and can be efficiently shared and accessed using shared memory.</p>
+
+<p>Note that the actual object would be laid out in memory as shown below.</p>
+
+<div align="center">
+<img 
src="/assets/fast_python_serialization_with_ray_and_arrow/python_object.png" 
width="600" />
+</div>
+<div><i>The layout of a Python object in the heap. Each box is allocated in a
+different memory region, and arrows between boxes represent pointers.</i></div>
+<p><br /></p>
+
+<p>The Arrow serialized representation would be as follows.</p>
+
+<div align="center">
+<img 
src="/assets/fast_python_serialization_with_ray_and_arrow/arrow_object.png" 
width="400" />
+</div>
+<div><i>The memory layout of the Arrow-serialized object.</i></div>
+<p><br /></p>
+
+<h2 id="getting-involved">Getting Involved</h2>
+
+<p>We welcome contributions, especially in the following areas.</p>
+
+<ul>
+  <li>Use the C++ and Java implementations of Arrow to implement versions of 
this
+for C++ and Java.</li>
+  <li>Implement support for more Python types and better test coverage.</li>
+</ul>
+
+<h2 id="reproducing-the-figures-above">Reproducing the Figures Above</h2>
+
+<p>For reference, the figures can be reproduced with the following code.
+Benchmarking <code class="highlighter-rouge">ray.put</code> and <code 
class="highlighter-rouge">ray.get</code> instead of <code 
class="highlighter-rouge">pyarrow.serialize</code> and
+<code class="highlighter-rouge">pyarrow.deserialize</code> gives similar 
figures. The plots were generated at this
+<a 
href="https://github.com/apache/arrow/tree/894f7400977693b4e0e8f4b9845fd89481f6bf29";>commit</a>.</p>
+
+<div class="language-python highlighter-rouge"><pre 
class="highlight"><code><span class="kn">import</span> <span 
class="nn">pickle</span>
+<span class="kn">import</span> <span class="nn">pyarrow</span>
+<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span 
class="kn">as</span> <span class="nn">plt</span>
+<span class="kn">import</span> <span class="nn">numpy</span> <span 
class="kn">as</span> <span class="nn">np</span>
+<span class="kn">import</span> <span class="nn">timeit</span>
+
+
+<span class="k">def</span> <span class="nf">benchmark_object</span><span 
class="p">(</span><span class="n">obj</span><span class="p">,</span> <span 
class="n">number</span><span class="o">=</span><span class="mi">10</span><span 
class="p">):</span>
+    <span class="c"># Time serialization and deserialization for pickle.</span>
+    <span class="n">pickle_serialize</span> <span class="o">=</span> <span 
class="n">timeit</span><span class="o">.</span><span 
class="n">timeit</span><span class="p">(</span>
+        <span class="k">lambda</span><span class="p">:</span> <span 
class="n">pickle</span><span class="o">.</span><span 
class="n">dumps</span><span class="p">(</span><span class="n">obj</span><span 
class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span 
class="n">pickle</span><span class="o">.</span><span 
class="n">HIGHEST_PROTOCOL</span><span class="p">),</span>
+        <span class="n">number</span><span class="o">=</span><span 
class="n">number</span><span class="p">)</span>
+    <span class="n">serialized_obj</span> <span class="o">=</span> <span 
class="n">pickle</span><span class="o">.</span><span 
class="n">dumps</span><span class="p">(</span><span class="n">obj</span><span 
class="p">,</span> <span class="n">pickle</span><span class="o">.</span><span 
class="n">HIGHEST_PROTOCOL</span><span class="p">)</span>
+    <span class="n">pickle_deserialize</span> <span class="o">=</span> <span 
class="n">timeit</span><span class="o">.</span><span 
class="n">timeit</span><span class="p">(</span><span 
class="k">lambda</span><span class="p">:</span> <span 
class="n">pickle</span><span class="o">.</span><span 
class="n">loads</span><span class="p">(</span><span 
class="n">serialized_obj</span><span class="p">),</span>
+                                       <span class="n">number</span><span 
class="o">=</span><span class="n">number</span><span class="p">)</span>
+
+    <span class="c"># Time serialization and deserialization for Ray.</span>
+    <span class="n">ray_serialize</span> <span class="o">=</span> <span 
class="n">timeit</span><span class="o">.</span><span 
class="n">timeit</span><span class="p">(</span>
+        <span class="k">lambda</span><span class="p">:</span> <span 
class="n">pyarrow</span><span class="o">.</span><span 
class="n">serialize</span><span class="p">(</span><span 
class="n">obj</span><span class="p">)</span><span class="o">.</span><span 
class="n">to_buffer</span><span class="p">(),</span> <span 
class="n">number</span><span class="o">=</span><span 
class="n">number</span><span class="p">)</span>
+    <span class="n">serialized_obj</span> <span class="o">=</span> <span 
class="n">pyarrow</span><span class="o">.</span><span 
class="n">serialize</span><span class="p">(</span><span 
class="n">obj</span><span class="p">)</span><span class="o">.</span><span 
class="n">to_buffer</span><span class="p">()</span>
+    <span class="n">ray_deserialize</span> <span class="o">=</span> <span 
class="n">timeit</span><span class="o">.</span><span 
class="n">timeit</span><span class="p">(</span>
+        <span class="k">lambda</span><span class="p">:</span> <span 
class="n">pyarrow</span><span class="o">.</span><span 
class="n">deserialize</span><span class="p">(</span><span 
class="n">serialized_obj</span><span class="p">),</span> <span 
class="n">number</span><span class="o">=</span><span 
class="n">number</span><span class="p">)</span>
+
+    <span class="k">return</span> <span class="p">[[</span><span 
class="n">pickle_serialize</span><span class="p">,</span> <span 
class="n">pickle_deserialize</span><span class="p">],</span>
+            <span class="p">[</span><span class="n">ray_serialize</span><span 
class="p">,</span> <span class="n">ray_deserialize</span><span 
class="p">]]</span>
+
+
+<span class="k">def</span> <span class="nf">plot</span><span 
class="p">(</span><span class="n">pickle_times</span><span class="p">,</span> 
<span class="n">ray_times</span><span class="p">,</span> <span 
class="n">title</span><span class="p">,</span> <span class="n">i</span><span 
class="p">):</span>
+    <span class="n">fig</span><span class="p">,</span> <span 
class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span 
class="o">.</span><span class="n">subplots</span><span class="p">()</span>
+    <span class="n">fig</span><span class="o">.</span><span 
class="n">set_size_inches</span><span class="p">(</span><span 
class="mf">3.8</span><span class="p">,</span> <span class="mf">2.7</span><span 
class="p">)</span>
+
+    <span class="n">bar_width</span> <span class="o">=</span> <span 
class="mf">0.35</span>
+    <span class="n">index</span> <span class="o">=</span> <span 
class="n">np</span><span class="o">.</span><span class="n">arange</span><span 
class="p">(</span><span class="mi">2</span><span class="p">)</span>
+    <span class="n">opacity</span> <span class="o">=</span> <span 
class="mf">0.6</span>
+
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">bar</span><span class="p">(</span><span class="n">index</span><span 
class="p">,</span> <span class="n">pickle_times</span><span class="p">,</span> 
<span class="n">bar_width</span><span class="p">,</span>
+            <span class="n">alpha</span><span class="o">=</span><span 
class="n">opacity</span><span class="p">,</span> <span 
class="n">color</span><span class="o">=</span><span class="s">'r'</span><span 
class="p">,</span> <span class="n">label</span><span class="o">=</span><span 
class="s">'Pickle'</span><span class="p">)</span>
+
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">bar</span><span class="p">(</span><span class="n">index</span> <span 
class="o">+</span> <span class="n">bar_width</span><span class="p">,</span> 
<span class="n">ray_times</span><span class="p">,</span> <span 
class="n">bar_width</span><span class="p">,</span>
+            <span class="n">alpha</span><span class="o">=</span><span 
class="n">opacity</span><span class="p">,</span> <span 
class="n">color</span><span class="o">=</span><span class="s">'c'</span><span 
class="p">,</span> <span class="n">label</span><span class="o">=</span><span 
class="s">'Ray'</span><span class="p">)</span>
+
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">title</span><span class="p">(</span><span class="n">title</span><span 
class="p">,</span> <span class="n">fontweight</span><span 
class="o">=</span><span class="s">'bold'</span><span class="p">)</span>
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">ylabel</span><span class="p">(</span><span class="s">'Time 
(seconds)'</span><span class="p">,</span> <span class="n">fontsize</span><span 
class="o">=</span><span class="mi">10</span><span class="p">)</span>
+    <span class="n">labels</span> <span class="o">=</span> <span 
class="p">[</span><span class="s">'serialization'</span><span 
class="p">,</span> <span class="s">'deserialization'</span><span 
class="p">]</span>
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">xticks</span><span class="p">(</span><span class="n">index</span> 
<span class="o">+</span> <span class="n">bar_width</span> <span 
class="o">/</span> <span class="mi">2</span><span class="p">,</span> <span 
class="n">labels</span><span class="p">,</span> <span 
class="n">fontsize</span><span class="o">=</span><span 
class="mi">10</span><span class="p">)</span>
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">legend</span><span class="p">(</span><span 
class="n">fontsize</span><span class="o">=</span><span 
class="mi">10</span><span class="p">,</span> <span 
class="n">bbox_to_anchor</span><span class="o">=</span><span 
class="p">(</span><span class="mi">1</span><span class="p">,</span> <span 
class="mi">1</span><span class="p">))</span>
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">tight_layout</span><span class="p">()</span>
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">yticks</span><span class="p">(</span><span 
class="n">fontsize</span><span class="o">=</span><span 
class="mi">10</span><span class="p">)</span>
+    <span class="n">plt</span><span class="o">.</span><span 
class="n">savefig</span><span class="p">(</span><span class="s">'plot-'</span> 
<span class="o">+</span> <span class="nb">str</span><span 
class="p">(</span><span class="n">i</span><span class="p">)</span> <span 
class="o">+</span> <span class="s">'.png'</span><span class="p">,</span> <span 
class="n">format</span><span class="o">=</span><span 
class="s">'png'</span><span class="p">)</span>
+
+
+<span class="n">test_objects</span> <span class="o">=</span> <span 
class="p">[</span>
+    <span class="p">[</span><span class="n">np</span><span 
class="o">.</span><span class="n">random</span><span class="o">.</span><span 
class="n">randn</span><span class="p">(</span><span 
class="mi">50000</span><span class="p">)</span> <span class="k">for</span> 
<span class="n">i</span> <span class="ow">in</span> <span 
class="nb">range</span><span class="p">(</span><span class="mi">100</span><span 
class="p">)],</span>
+    <span class="p">{</span><span class="s">'weight-'</span> <span 
class="o">+</span> <span class="nb">str</span><span class="p">(</span><span 
class="n">i</span><span class="p">):</span> <span class="n">np</span><span 
class="o">.</span><span class="n">random</span><span class="o">.</span><span 
class="n">randn</span><span class="p">(</span><span 
class="mi">50000</span><span class="p">)</span> <span class="k">for</span> 
<span class="n">i</span> <span class="ow">in</span> <span 
class="nb">range</span><span class="p">(</span><span class="mi">100</span><span 
class="p">)},</span>
+    <span class="p">{</span><span class="n">i</span><span class="p">:</span> 
<span class="nb">set</span><span class="p">([</span><span 
class="s">'string1'</span> <span class="o">+</span> <span 
class="nb">str</span><span class="p">(</span><span class="n">i</span><span 
class="p">),</span> <span class="s">'string2'</span> <span class="o">+</span> 
<span class="nb">str</span><span class="p">(</span><span 
class="n">i</span><span class="p">)])</span> <span class="k">for</span> <span 
class="n">i</span> <span class="ow">in</span> <span 
class="nb">range</span><span class="p">(</span><span 
class="mi">100000</span><span class="p">)},</span>
+    <span class="p">[</span><span class="nb">str</span><span 
class="p">(</span><span class="n">i</span><span class="p">)</span> <span 
class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span 
class="nb">range</span><span class="p">(</span><span 
class="mi">200000</span><span class="p">)]</span>
+<span class="p">]</span>
+
+<span class="n">titles</span> <span class="o">=</span> <span class="p">[</span>
+    <span class="s">'List of large numpy arrays'</span><span class="p">,</span>
+    <span class="s">'Dictionary of large numpy arrays'</span><span 
class="p">,</span>
+    <span class="s">'Large dictionary of small sets'</span><span 
class="p">,</span>
+    <span class="s">'Large list of strings'</span>
+<span class="p">]</span>
+
+<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> 
<span class="nb">range</span><span class="p">(</span><span 
class="nb">len</span><span class="p">(</span><span 
class="n">test_objects</span><span class="p">)):</span>
+    <span class="n">plot</span><span class="p">(</span><span 
class="o">*</span><span class="n">benchmark_object</span><span 
class="p">(</span><span class="n">test_objects</span><span 
class="p">[</span><span class="n">i</span><span class="p">]),</span> <span 
class="n">titles</span><span class="p">[</span><span class="n">i</span><span 
class="p">],</span> <span class="n">i</span><span class="p">)</span>
+</code></pre>
+</div>
+
+
+
+    <hr/>
+<footer class="footer">
+  <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache 
Arrow project logo are either registered trademarks or trademarks of The Apache 
Software Foundation in the United States and other countries.</p>
+  <p>&copy; 2017 Apache Software Foundation</p>
+</footer>
+
+  </div>
+</body>
+</html>

Reply via email to