http://git-wip-us.apache.org/repos/asf/arrow-site/blob/4d4a3202/docs/python/_modules/pyarrow/hdfs.html ---------------------------------------------------------------------- diff --git a/docs/python/_modules/pyarrow/hdfs.html b/docs/python/_modules/pyarrow/hdfs.html new file mode 100644 index 0000000..b193931 --- /dev/null +++ b/docs/python/_modules/pyarrow/hdfs.html @@ -0,0 +1,283 @@ +<!DOCTYPE html> + + +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + + <title>pyarrow.hdfs — pyarrow documentation</title> + + <link rel="stylesheet" href="../../_static/bootstrap-sphinx.css" type="text/css" /> + <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> + + <script type="text/javascript"> + var DOCUMENTATION_OPTIONS = { + URL_ROOT: '../../', + VERSION: '', + COLLAPSE_INDEX: false, + FILE_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt' + }; + </script> + <script type="text/javascript" src="../../_static/jquery.js"></script> + <script type="text/javascript" src="../../_static/underscore.js"></script> + <script type="text/javascript" src="../../_static/doctools.js"></script> + <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> + <script type="text/javascript" src="../../_static/js/jquery-1.11.0.min.js"></script> + <script type="text/javascript" src="../../_static/js/jquery-fix.js"></script> + <script type="text/javascript" src="../../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script> + <script type="text/javascript" src="../../_static/bootstrap-sphinx.js"></script> + <link rel="index" title="Index" href="../../genindex.html" /> + <link rel="search" title="Search" href="../../search.html" /> +<meta charset='utf-8'> +<meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'> +<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'> +<meta name="apple-mobile-web-app-capable" content="yes"> + + </head> + <body role="document"> + + <div id="navbar" class="navbar navbar-default navbar-fixed-top"> + <div class="container"> + <div class="navbar-header"> + <!-- .btn-navbar is used as the toggle for collapsed navbar content --> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse"> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a class="navbar-brand" href="../../index.html"> + pyarrow</a> + <span class="navbar-text navbar-version pull-left"><b></b></span> + </div> + + <div class="collapse navbar-collapse nav-collapse"> + <ul class="nav navbar-nav"> + + + <li class="dropdown globaltoc-container"> + <a role="button" + id="dLabelGlobalToc" + data-toggle="dropdown" + data-target="#" + href="../../index.html">Site <b class="caret"></b></a> + <ul class="dropdown-menu globaltoc" + role="menu" + aria-labelledby="dLabelGlobalToc"><p class="caption"><span class="caption-text">Getting Started</span></p> +<ul> +<li class="toctree-l1"><a class="reference internal" href="../../install.html">Install PyArrow</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../development.html">Development</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../memory.html">Memory and IO Interfaces</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../data.html">In-Memory Data Model</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../ipc.html">IPC: Fast Streaming and Serialization</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../filesystems.html">File System Interfaces</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../getting_involved.html">Getting Involved</a></li> +</ul> +</ul> +</li> + + <li class="dropdown"> + <a role="button" + id="dLabelLocalToc" + data-toggle="dropdown" + data-target="#" + href="#">Page <b class="caret"></b></a> + <ul class="dropdown-menu localtoc" + role="menu" + aria-labelledby="dLabelLocalToc"></ul> +</li> + + + + + + + + + + + <li class="hidden-sm"></li> + + </ul> + + + +<form class="navbar-form navbar-right" action="../../search.html" method="get"> + <div class="form-group"> + <input type="text" name="q" class="form-control" placeholder="Search" /> + </div> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> +</form> + + </div> + </div> + </div> + +<div class="container"> + <div class="row"> + <div class="col-md-12 content"> + + <h1>Source code for pyarrow.hdfs</h1><div class="highlight"><pre> +<span></span><span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span> +<span class="c1"># or more contributor license agreements. See the NOTICE file</span> +<span class="c1"># distributed with this work for additional information</span> +<span class="c1"># regarding copyright ownership. The ASF licenses this file</span> +<span class="c1"># to you under the Apache License, Version 2.0 (the</span> +<span class="c1"># "License"); you may not use this file except in compliance</span> +<span class="c1"># with the License. You may obtain a copy of the License at</span> +<span class="c1">#</span> +<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> +<span class="c1">#</span> +<span class="c1"># Unless required by applicable law or agreed to in writing,</span> +<span class="c1"># software distributed under the License is distributed on an</span> +<span class="c1"># "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span> +<span class="c1"># KIND, either express or implied. See the License for the</span> +<span class="c1"># specific language governing permissions and limitations</span> +<span class="c1"># under the License.</span> + +<span class="kn">import</span> <span class="nn">posixpath</span> + +<span class="kn">from</span> <span class="nn">pyarrow.util</span> <span class="k">import</span> <span class="n">implements</span> +<span class="kn">from</span> <span class="nn">pyarrow.filesystem</span> <span class="k">import</span> <span class="n">FileSystem</span> +<span class="kn">import</span> <span class="nn">pyarrow.lib</span> <span class="k">as</span> <span class="nn">lib</span> + + +<div class="viewcode-block" id="HadoopFileSystem"><a class="viewcode-back" href="../../api.html#pyarrow.HadoopFileSystem">[docs]</a><span class="k">class</span> <span class="nc">HadoopFileSystem</span><span class="p">(</span><span class="n">lib</span><span class="o">.</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="n">FileSystem</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> FileSystem interface for HDFS cluster. See pyarrow.hdfs.connect for full</span> +<span class="sd"> connection details</span> +<span class="sd"> """</span> + + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">host</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">user</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">kerb_ticket</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">driver</span><span class="o">=</span><span class="s1">'libhdfs'</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">_connect</span><span class="p">(</span><span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">,</span> <span class="n">user</span><span class="p">,</span> <span class="n">kerb_ticket</span><span class="p">,</span> <span class="n">driver</span><span class="p">)</span> + + <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">isdir</span><span class="p">)</span> + <span class="k">def</span> <span class="nf">isdir</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">isdir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> + + <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">isfile</span><span class="p">)</span> + <span class="k">def</span> <span class="nf">isfile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> + + <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">delete</span><span class="p">)</span> +<div class="viewcode-block" id="HadoopFileSystem.delete"><a class="viewcode-back" href="../../generated/pyarrow.HadoopFileSystem.delete.html#pyarrow.HadoopFileSystem.delete">[docs]</a> <span class="k">def</span> <span class="nf">delete</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">recursive</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">recursive</span><span class="p">)</span></div> + + <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">mkdir</span><span class="p">)</span> +<div class="viewcode-block" id="HadoopFileSystem.mkdir"><a class="viewcode-back" href="../../generated/pyarrow.HadoopFileSystem.mkdir.html#pyarrow.HadoopFileSystem.mkdir">[docs]</a> <span class="k">def</span> <span class="nf">mkdir</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">create_parents</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div> + + <span class="nd">@implements</span><span class="p">(</span><span class="n">FileSystem</span><span class="o">.</span><span class="n">rename</span><span class="p">)</span> +<div class="viewcode-block" id="HadoopFileSystem.rename"><a class="viewcode-back" href="../../generated/pyarrow.HadoopFileSystem.rename.html#pyarrow.HadoopFileSystem.rename">[docs]</a> <span class="k">def</span> <span class="nf">rename</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">new_path</span><span class="p">):</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">new_path</span><span class="p">)</span></div> + +<div class="viewcode-block" id="HadoopFileSystem.ls"><a class="viewcode-back" href="../../generated/pyarrow.HadoopFileSystem.ls.html#pyarrow.HadoopFileSystem.ls">[docs]</a> <span class="k">def</span> <span class="nf">ls</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">detail</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Retrieve directory contents and metadata, if requested.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> path : HDFS path</span> +<span class="sd"> detail : boolean, default False</span> +<span class="sd"> If False, only return list of paths</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> result : list of dicts (detail=True) or strings (detail=False)</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">HadoopFileSystem</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">ls</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">detail</span><span class="p">)</span></div> + + <span class="k">def</span> <span class="nf">walk</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">top_path</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Directory tree generator for HDFS, like os.walk</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> top_path : string</span> +<span class="sd"> Root directory for tree traversal</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> Generator yielding 3-tuple (dirpath, dirnames, filename)</span> +<span class="sd"> """</span> + <span class="n">contents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ls</span><span class="p">(</span><span class="n">top_path</span><span class="p">,</span> <span class="n">detail</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> + + <span class="n">directories</span><span class="p">,</span> <span class="n">files</span> <span class="o">=</span> <span class="n">_libhdfs_walk_files_dirs</span><span class="p">(</span><span class="n">top_path</span><span class="p">,</span> <span class="n">contents</span><span class="p">)</span> + <span class="k">yield</span> <span class="n">top_path</span><span class="p">,</span> <span class="n">directories</span><span class="p">,</span> <span class="n">files</span> + <span class="k">for</span> <span class="n">dirname</span> <span class="ow">in</span> <span class="n">directories</span><span class="p">:</span> + <span class="k">for</span> <span class="n">tup</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">walk</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path_join</span><span class="p">(</span><span class="n">top_path</span><span class="p">,</span> <span class="n">dirname</span><span class="p">)):</span> + <span class="k">yield</span> <span class="n">tup</span></div> + + +<span class="k">def</span> <span class="nf">_libhdfs_walk_files_dirs</span><span class="p">(</span><span class="n">top_path</span><span class="p">,</span> <span class="n">contents</span><span class="p">):</span> + <span class="n">files</span> <span class="o">=</span> <span class="p">[]</span> + <span class="n">directories</span> <span class="o">=</span> <span class="p">[]</span> + <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">contents</span><span class="p">:</span> + <span class="n">scrubbed_name</span> <span class="o">=</span> <span class="n">posixpath</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">c</span><span class="p">[</span><span class="s1">'name'</span><span class="p">])[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">if</span> <span class="n">c</span><span class="p">[</span><span class="s1">'kind'</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'file'</span><span class="p">:</span> + <span class="n">files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">scrubbed_name</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="n">directories</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">scrubbed_name</span><span class="p">)</span> + + <span class="k">return</span> <span class="n">directories</span><span class="p">,</span> <span class="n">files</span> + + +<div class="viewcode-block" id="connect"><a class="viewcode-back" href="../../generated/pyarrow.hdfs.connect.html#pyarrow.connect">[docs]</a><span class="k">def</span> <span class="nf">connect</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="s2">"default"</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">user</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">kerb_ticket</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">driver</span><span class="o">=</span><span class="s1">'libhdfs'</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Connect to an HDFS cluster. All parameters are optional and should</span> +<span class="sd"> only be set if the defaults need to be overridden.</span> + +<span class="sd"> Authentication should be automatic if the HDFS cluster uses Kerberos.</span> +<span class="sd"> However, if a username is specified, then the ticket cache will likely</span> +<span class="sd"> be required.</span> + +<span class="sd"> Parameters</span> +<span class="sd"> ----------</span> +<span class="sd"> host : NameNode. Set to "default" for fs.defaultFS from core-site.xml.</span> +<span class="sd"> port : NameNode's port. Set to 0 for default or logical (HA) nodes.</span> +<span class="sd"> user : Username when connecting to HDFS; None implies login user.</span> +<span class="sd"> kerb_ticket : Path to Kerberos ticket cache.</span> +<span class="sd"> driver : {'libhdfs', 'libhdfs3'}, default 'libhdfs'</span> +<span class="sd"> Connect using libhdfs (JNI-based) or libhdfs3 (3rd-party C++</span> +<span class="sd"> library from Apache HAWQ (incubating) )</span> + +<span class="sd"> Notes</span> +<span class="sd"> -----</span> +<span class="sd"> The first time you call this method, it will take longer than usual due</span> +<span class="sd"> to JNI spin-up time.</span> + +<span class="sd"> Returns</span> +<span class="sd"> -------</span> +<span class="sd"> filesystem : HadoopFileSystem</span> +<span class="sd"> """</span> + <span class="n">fs</span> <span class="o">=</span> <span class="n">HadoopFileSystem</span><span class="p">(</span><span class="n">host</span><span class="o">=</span><span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="o">=</span><span class="n">port</span><span class="p">,</span> <span class="n">user</span><span class="o">=</span><span class="n">user</span><span class="p">,</span> + <span class="n">kerb_ticket</span><span class="o">=</span><span class="n">kerb_ticket</span><span class="p">,</span> <span class="n">driver</span><span class="o">=</span><span class="n">driver</span><span class="p">)</span> + <span class="k">return</span> <span class="n">fs</span></div> +</pre></div> + + </div> + + </div> +</div> +<footer class="footer"> + <div class="container"> + <p class="pull-right"> + <a href="#">Back to top</a> + + </p> + <p> + © Copyright 2016-2017 Apache Software Foundation.<br/> + Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.5.<br/> + </p> + </div> +</footer> + </body> +</html> \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/4d4a3202/docs/python/_modules/pyarrow/ipc.html ---------------------------------------------------------------------- diff --git a/docs/python/_modules/pyarrow/ipc.html b/docs/python/_modules/pyarrow/ipc.html index 49de091..9905c66 100644 --- a/docs/python/_modules/pyarrow/ipc.html +++ b/docs/python/_modules/pyarrow/ipc.html @@ -71,7 +71,8 @@ <li class="toctree-l1"><a class="reference internal" href="../../memory.html">Memory and IO Interfaces</a></li> <li class="toctree-l1"><a class="reference internal" href="../../data.html">In-Memory Data Model</a></li> <li class="toctree-l1"><a class="reference internal" href="../../ipc.html">IPC: Fast Streaming and Serialization</a></li> -<li class="toctree-l1"><a class="reference internal" href="../../filesystems.html">Filesystem Interfaces</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../filesystems.html">File System Interfaces</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../plasma.html">The Plasma In-Memory Object Store</a></li> <li class="toctree-l1"><a class="reference internal" href="../../pandas.html">Using PyArrow with pandas</a></li> <li class="toctree-l1"><a class="reference internal" href="../../parquet.html">Reading and Writing the Apache Parquet Format</a></li> <li class="toctree-l1"><a class="reference internal" href="../../api.html">API Reference</a></li>