http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.read_tensor.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.read_tensor.rst.txt b/docs/python/_sources/generated/pyarrow.read_tensor.rst.txt new file mode 100644 index 0000000..655e4ac --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.read_tensor.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.read\_tensor +===================== + +.. currentmodule:: pyarrow + +.. autofunction:: read_tensor \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.schema.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.schema.rst.txt b/docs/python/_sources/generated/pyarrow.schema.rst.txt new file mode 100644 index 0000000..5f19955 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.schema.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.schema +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: schema \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.set_memory_pool.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.set_memory_pool.rst.txt b/docs/python/_sources/generated/pyarrow.set_memory_pool.rst.txt new file mode 100644 index 0000000..13a31af --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.set_memory_pool.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.set\_memory\_pool +========================== + +.. currentmodule:: pyarrow + +.. autofunction:: set_memory_pool \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.string.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.string.rst.txt b/docs/python/_sources/generated/pyarrow.string.rst.txt new file mode 100644 index 0000000..0b2ea4b --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.string.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.string +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: string \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.struct.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.struct.rst.txt b/docs/python/_sources/generated/pyarrow.struct.rst.txt new file mode 100644 index 0000000..c780adc --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.struct.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.struct +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: struct \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.time32.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.time32.rst.txt b/docs/python/_sources/generated/pyarrow.time32.rst.txt new file mode 100644 index 0000000..f397d57 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.time32.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.time32 +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: time32 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.time64.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.time64.rst.txt b/docs/python/_sources/generated/pyarrow.time64.rst.txt new file mode 100644 index 0000000..012cff8 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.time64.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.time64 +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: time64 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.timestamp.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.timestamp.rst.txt b/docs/python/_sources/generated/pyarrow.timestamp.rst.txt new file mode 100644 index 0000000..08ed790 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.timestamp.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.timestamp +================== + +.. currentmodule:: pyarrow + +.. autofunction:: timestamp \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.total_allocated_bytes.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.total_allocated_bytes.rst.txt b/docs/python/_sources/generated/pyarrow.total_allocated_bytes.rst.txt new file mode 100644 index 0000000..847fa68 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.total_allocated_bytes.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.total\_allocated\_bytes +================================ + +.. currentmodule:: pyarrow + +.. autofunction:: total_allocated_bytes \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.uint16.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.uint16.rst.txt b/docs/python/_sources/generated/pyarrow.uint16.rst.txt new file mode 100644 index 0000000..811a8a7 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.uint16.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.uint16 +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: uint16 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.uint32.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.uint32.rst.txt b/docs/python/_sources/generated/pyarrow.uint32.rst.txt new file mode 100644 index 0000000..d955cdf --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.uint32.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.uint32 +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: uint32 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.uint64.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.uint64.rst.txt b/docs/python/_sources/generated/pyarrow.uint64.rst.txt new file mode 100644 index 0000000..2a72c70 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.uint64.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.uint64 +=============== + +.. currentmodule:: pyarrow + +.. autofunction:: uint64 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.uint8.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.uint8.rst.txt b/docs/python/_sources/generated/pyarrow.uint8.rst.txt new file mode 100644 index 0000000..8c57c67 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.uint8.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.uint8 +============== + +.. currentmodule:: pyarrow + +.. autofunction:: uint8 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/generated/pyarrow.write_tensor.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/generated/pyarrow.write_tensor.rst.txt b/docs/python/_sources/generated/pyarrow.write_tensor.rst.txt new file mode 100644 index 0000000..2c19bc8 --- /dev/null +++ b/docs/python/_sources/generated/pyarrow.write_tensor.rst.txt @@ -0,0 +1,6 @@ +pyarrow\.write\_tensor +====================== + +.. currentmodule:: pyarrow + +.. autofunction:: write_tensor \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/getting_involved.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/getting_involved.rst.txt b/docs/python/_sources/getting_involved.rst.txt new file mode 100644 index 0000000..90fa3e4 --- /dev/null +++ b/docs/python/_sources/getting_involved.rst.txt @@ -0,0 +1,37 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Getting Involved +================ + +Right now the primary audience for Apache Arrow are the developers of data +systems; most people will use Apache Arrow indirectly through systems that use +it for internal data handling and interoperating with other Arrow-enabled +systems. + +Even if you do not plan to contribute to Apache Arrow itself or Arrow +integrations in other projects, we'd be happy to have you involved: + + * Join the mailing list: send an email to + `[email protected] <mailto:[email protected]>`_. + Share your ideas and use cases for the project or read through the + `Archive <http://mail-archives.apache.org/mod_mbox/arrow-dev/>`_. + * Follow our activity on `JIRA <https://issues.apache.org/jira/browse/ARROW>`_ + * Learn the `Format / Specification + <https://github.com/apache/arrow/tree/master/format>`_ + * Chat with us on `Slack <https://apachearrowslackin.herokuapp.com/>`_ + http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/index.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/index.rst.txt b/docs/python/_sources/index.rst.txt new file mode 100644 index 0000000..ecb8e8f --- /dev/null +++ b/docs/python/_sources/index.rst.txt @@ -0,0 +1,48 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Apache Arrow (Python) +===================== + +Arrow is a columnar in-memory analytics layer designed to accelerate big data. +It houses a set of canonical in-memory representations of flat and hierarchical +data along with multiple language-bindings for structure manipulation. It also +provides IPC and common algorithm implementations. + +This is the documentation of the Python API of Apache Arrow. For more details +on the format and other language bindings see +`the main page for Arrow <https://arrow.apache.org/>`_. Here will we only +detail the usage of the Python API for Arrow and the leaf libraries that add +additional functionality such as reading Apache Parquet files into Arrow +structures. + +.. toctree:: + :maxdepth: 2 + :caption: Getting Started + + install + pandas + filesystems + parquet + api + getting_involved + +.. toctree:: + :maxdepth: 2 + :caption: Additional Features + + jemalloc MemoryPool <jemalloc.rst> http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/install.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/install.rst.txt b/docs/python/_sources/install.rst.txt new file mode 100644 index 0000000..278b466 --- /dev/null +++ b/docs/python/_sources/install.rst.txt @@ -0,0 +1,151 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Install PyArrow +=============== + +Conda +----- + +To install the latest version of PyArrow from conda-forge using conda: + +.. code-block:: bash + + conda install -c conda-forge pyarrow + +Pip +--- + +Install the latest version from PyPI: + +.. code-block:: bash + + pip install pyarrow + +.. note:: + Currently there are only binary artifcats available for Linux and MacOS. + Otherwise this will only pull the python sources and assumes an existing + installation of the C++ part of Arrow. + To retrieve the binary artifacts, you'll need a recent ``pip`` version that + supports features like the ``manylinux1`` tag. + +Building from source +-------------------- + +First, clone the master git repository: + +.. code-block:: bash + + git clone https://github.com/apache/arrow.git arrow + +System requirements +~~~~~~~~~~~~~~~~~~~ + +Building pyarrow requires: + +* A C++11 compiler + + * Linux: gcc >= 4.8 or clang >= 3.5 + * OS X: XCode 6.4 or higher preferred + +* `CMake <https://cmake.org/>`_ + +Python requirements +~~~~~~~~~~~~~~~~~~~ + +You will need Python (CPython) 2.7, 3.4, or 3.5 installed. Earlier releases and +are not being targeted. + +.. note:: + This library targets CPython only due to an emphasis on interoperability with + pandas and NumPy, which are only available for CPython. + +The build requires NumPy, Cython, and a few other Python dependencies: + +.. code-block:: bash + + pip install cython + cd arrow/python + pip install -r requirements.txt + +Installing Arrow C++ library +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, you should choose an installation location for Arrow C++. In the future +using the default system install location will work, but for now we are being +explicit: + +.. code-block:: bash + + export ARROW_HOME=$HOME/local + +Now, we build Arrow: + +.. code-block:: bash + + cd arrow/cpp + + mkdir dev-build + cd dev-build + + cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME .. + + make + + # Use sudo here if $ARROW_HOME requires it + make install + +To get the optional Parquet support, you should also build and install +`parquet-cpp <https://github.com/apache/parquet-cpp/blob/master/README.md>`_. + +Install `pyarrow` +~~~~~~~~~~~~~~~~~ + + +.. code-block:: bash + + cd arrow/python + + # --with-parquet enables the Apache Parquet support in PyArrow + # --with-jemalloc enables the jemalloc allocator support in PyArrow + # --build-type=release disables debugging information and turns on + # compiler optimizations for native code + python setup.py build_ext --with-parquet --with-jemalloc --build-type=release install + python setup.py install + +.. warning:: + On XCode 6 and prior there are some known OS X `@rpath` issues. If you are + unable to import pyarrow, upgrading XCode may be the solution. + +.. note:: + In development installations, you will also need to set a correct + ``LD_LIBRARY_PATH``. This is most probably done with + ``export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH``. + + +.. code-block:: python + + In [1]: import pyarrow + + In [2]: pyarrow.array([1,2,3]) + Out[2]: + <pyarrow.array.Int64Array object at 0x7f899f3e60e8> + [ + 1, + 2, + 3 + ] http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/jemalloc.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/jemalloc.rst.txt b/docs/python/_sources/jemalloc.rst.txt new file mode 100644 index 0000000..8d7a5dc --- /dev/null +++ b/docs/python/_sources/jemalloc.rst.txt @@ -0,0 +1,48 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +jemalloc MemoryPool +=================== + +Arrow's default :class:`~pyarrow.memory.MemoryPool` uses the system's allocator +through the POSIX APIs. Although this already provides aligned allocation, the +POSIX interface doesn't support aligned reallocation. The default reallocation +strategy is to allocate a new region, copy over the old data and free the +previous region. Using `jemalloc <http://jemalloc.net/>`_ we can simply extend +the existing memory allocation to the requested size. While this may still be +linear in the size of allocated memory, it is magnitudes faster as only the page +mapping in the kernel is touched, not the actual data. + +The :mod:`~pyarrow.jemalloc` allocator is not enabled by default to allow the +use of the system allocator and/or other allocators like ``tcmalloc``. You can +either explicitly make it the default allocator or pass it only to single +operations. + +.. code:: python + + import pyarrow as pa + + jemalloc_pool = pyarrow.jemalloc_memory_pool() + + # Explicitly use jemalloc for allocating memory for an Arrow Table object + array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool) + + # Set the global pool + pyarrow.set_memory_pool(jemalloc_pool) + # This operation has no explicit MemoryPool specified and will thus will + # also use jemalloc for its allocations. + array = pa.Array.from_pylist([1, 2, 3]) http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/pandas.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/pandas.rst.txt b/docs/python/_sources/pandas.rst.txt new file mode 100644 index 0000000..34445ae --- /dev/null +++ b/docs/python/_sources/pandas.rst.txt @@ -0,0 +1,119 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Pandas Interface +================ + +To interface with Pandas, PyArrow provides various conversion routines to +consume Pandas structures and convert back to them. + +DataFrames +---------- + +The equivalent to a Pandas DataFrame in Arrow is a :class:`pyarrow.table.Table`. +Both consist of a set of named columns of equal length. While Pandas only +supports flat columns, the Table also provides nested columns, thus it can +represent more data than a DataFrame, so a full conversion is not always possible. + +Conversion from a Table to a DataFrame is done by calling +:meth:`pyarrow.table.Table.to_pandas`. The inverse is then achieved by using +:meth:`pyarrow.Table.from_pandas`. This conversion routine provides the +convience parameter ``timestamps_to_ms``. Although Arrow supports timestamps of +different resolutions, Pandas only supports nanosecond timestamps and most +other systems (e.g. Parquet) only work on millisecond timestamps. This parameter +can be used to already do the time conversion during the Pandas to Arrow +conversion. + +.. code-block:: python + + import pyarrow as pa + import pandas as pd + + df = pd.DataFrame({"a": [1, 2, 3]}) + # Convert from Pandas to Arrow + table = pa.Table.from_pandas(df) + # Convert back to Pandas + df_new = table.to_pandas() + + +Series +------ + +In Arrow, the most similar structure to a Pandas Series is an Array. +It is a vector that contains data of the same type as linear memory. You can +convert a Pandas Series to an Arrow Array using :meth:`pyarrow.array.from_pandas_series`. +As Arrow Arrays are always nullable, you can supply an optional mask using +the ``mask`` parameter to mark all null-entries. + +Type differences +---------------- + +With the current design of Pandas and Arrow, it is not possible to convert all +column types unmodified. One of the main issues here is that Pandas has no +support for nullable columns of arbitrary type. Also ``datetime64`` is currently +fixed to nanosecond resolution. On the other side, Arrow might be still missing +support for some types. + +Pandas -> Arrow Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~ + ++------------------------+--------------------------+ +| Source Type (Pandas) | Destination Type (Arrow) | ++========================+==========================+ +| ``bool`` | ``BOOL`` | ++------------------------+--------------------------+ +| ``(u)int{8,16,32,64}`` | ``(U)INT{8,16,32,64}`` | ++------------------------+--------------------------+ +| ``float32`` | ``FLOAT`` | ++------------------------+--------------------------+ +| ``float64`` | ``DOUBLE`` | ++------------------------+--------------------------+ +| ``str`` / ``unicode`` | ``STRING`` | ++------------------------+--------------------------+ +| ``pd.Categorical`` | ``DICTIONARY`` | ++------------------------+--------------------------+ +| ``pd.Timestamp`` | ``TIMESTAMP(unit=ns)`` | ++------------------------+--------------------------+ +| ``datetime.date`` | ``DATE`` | ++------------------------+--------------------------+ + +Arrow -> Pandas Conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~ + ++-------------------------------------+--------------------------------------------------------+ +| Source Type (Arrow) | Destination Type (Pandas) | ++=====================================+========================================================+ +| ``BOOL`` | ``bool`` | ++-------------------------------------+--------------------------------------------------------+ +| ``BOOL`` *with nulls* | ``object`` (with values ``True``, ``False``, ``None``) | ++-------------------------------------+--------------------------------------------------------+ +| ``(U)INT{8,16,32,64}`` | ``(u)int{8,16,32,64}`` | ++-------------------------------------+--------------------------------------------------------+ +| ``(U)INT{8,16,32,64}`` *with nulls* | ``float64`` | ++-------------------------------------+--------------------------------------------------------+ +| ``FLOAT`` | ``float32`` | ++-------------------------------------+--------------------------------------------------------+ +| ``DOUBLE`` | ``float64`` | ++-------------------------------------+--------------------------------------------------------+ +| ``STRING`` | ``str`` | ++-------------------------------------+--------------------------------------------------------+ +| ``DICTIONARY`` | ``pd.Categorical`` | ++-------------------------------------+--------------------------------------------------------+ +| ``TIMESTAMP(unit=*)`` | ``pd.Timestamp`` (``np.datetime64[ns]``) | ++-------------------------------------+--------------------------------------------------------+ +| ``DATE`` | ``pd.Timestamp`` (``np.datetime64[ns]``) | ++-------------------------------------+--------------------------------------------------------+ http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_sources/parquet.rst.txt ---------------------------------------------------------------------- diff --git a/docs/python/_sources/parquet.rst.txt b/docs/python/_sources/parquet.rst.txt new file mode 100644 index 0000000..8e011e4 --- /dev/null +++ b/docs/python/_sources/parquet.rst.txt @@ -0,0 +1,91 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Reading/Writing Parquet files +============================= + +If you have built ``pyarrow`` with Parquet support, i.e. ``parquet-cpp`` was +found during the build, you can read files in the Parquet format to/from Arrow +memory structures. The Parquet support code is located in the +:mod:`pyarrow.parquet` module and your package needs to be built with the +``--with-parquet`` flag for ``build_ext``. + +Reading Parquet +--------------- + +To read a Parquet file into Arrow memory, you can use the following code +snippet. It will read the whole Parquet file into memory as an +:class:`~pyarrow.table.Table`. + +.. code-block:: python + + import pyarrow.parquet as pq + + table = pq.read_table('<filename>') + +As DataFrames stored as Parquet are often stored in multiple files, a +convenience method :meth:`~pyarrow.parquet.read_multiple_files` is provided. + +If you already have the Parquet available in memory or get it via non-file +source, you can utilize :class:`pyarrow.io.BufferReader` to read it from +memory. As input to the :class:`~pyarrow.io.BufferReader` you can either supply +a Python ``bytes`` object or a :class:`pyarrow.io.Buffer`. + +.. code:: python + + import pyarrow.io as paio + import pyarrow.parquet as pq + + buf = ... # either bytes or paio.Buffer + reader = paio.BufferReader(buf) + table = pq.read_table(reader) + +Writing Parquet +--------------- + +Given an instance of :class:`pyarrow.table.Table`, the most simple way to +persist it to Parquet is by using the :meth:`pyarrow.parquet.write_table` +method. + +.. code-block:: python + + import pyarrow as pa + import pyarrow.parquet as pq + + table = pa.Table(..) + pq.write_table(table, '<filename>') + +By default this will write the Table as a single RowGroup using ``DICTIONARY`` +encoding. To increase the potential of parallelism a query engine can process +a Parquet file, set the ``chunk_size`` to a fraction of the total number of rows. + +If you also want to compress the columns, you can select a compression +method using the ``compression`` argument. Typically, ``GZIP`` is the choice if +you want to minimize size and ``SNAPPY`` for performance. + +Instead of writing to a file, you can also write to Python ``bytes`` by +utilizing an :class:`pyarrow.io.InMemoryOutputStream()`: + +.. code:: python + + import pyarrow.io as paio + import pyarrow.parquet as pq + + table = ... + output = paio.InMemoryOutputStream() + pq.write_table(table, output) + pybytes = output.get_result().to_pybytes() http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_static/ajax-loader.gif ---------------------------------------------------------------------- diff --git a/docs/python/_static/ajax-loader.gif b/docs/python/_static/ajax-loader.gif new file mode 100644 index 0000000..61faf8c Binary files /dev/null and b/docs/python/_static/ajax-loader.gif differ http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_static/basic.css ---------------------------------------------------------------------- diff --git a/docs/python/_static/basic.css b/docs/python/_static/basic.css new file mode 100644 index 0000000..dc88b5a --- /dev/null +++ b/docs/python/_static/basic.css @@ -0,0 +1,632 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox input[type="text"] { + width: 170px; +} + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li div.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px 7px 0 7px; + background-color: #ffe; + width: 40%; + float: right; +} + +p.sidebar-title { + font-weight: bold; +} + +/* -- topics ---------------------------------------------------------------- */ + +div.topic { + border: 1px solid #ccc; + padding: 7px 7px 0 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +div.admonition dl { + margin-bottom: 0; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + border: 0; + border-collapse: collapse; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +dl { + margin-bottom: 15px; +} + +dd p { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +dt:target, .highlighted { + background-color: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; +} + +td.linenos pre { + padding: 5px 0px; + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + margin-left: 0.5em; +} + +table.highlighttable td { + padding: 0 0.5em 0 0.5em; +} + +div.code-block-caption { + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +div.code-block-caption + div > div.highlight > pre { + margin-top: 0; +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + padding: 1em 1em 0; +} + +div.literal-block-wrapper div.highlight { + margin: 0; +} + +code.descname { + background-color: transparent; + font-weight: bold; + font-size: 1.2em; +} + +code.descclassname { + background-color: transparent; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: relative; + left: 0px; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_static/comment-bright.png ---------------------------------------------------------------------- diff --git a/docs/python/_static/comment-bright.png b/docs/python/_static/comment-bright.png new file mode 100644 index 0000000..15e27ed Binary files /dev/null and b/docs/python/_static/comment-bright.png differ http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_static/comment-close.png ---------------------------------------------------------------------- diff --git a/docs/python/_static/comment-close.png b/docs/python/_static/comment-close.png new file mode 100644 index 0000000..4d91bcf Binary files /dev/null and b/docs/python/_static/comment-close.png differ http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_static/comment.png ---------------------------------------------------------------------- diff --git a/docs/python/_static/comment.png b/docs/python/_static/comment.png new file mode 100644 index 0000000..dfbc0cb Binary files /dev/null and b/docs/python/_static/comment.png differ http://git-wip-us.apache.org/repos/asf/arrow-site/blob/679f060e/docs/python/_static/css/badge_only.css ---------------------------------------------------------------------- diff --git a/docs/python/_static/css/badge_only.css b/docs/python/_static/css/badge_only.css new file mode 100644 index 0000000..6362912 --- /dev/null +++ b/docs/python/_static/css/badge_only.css @@ -0,0 +1,2 @@ +.fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../font/fontawesome_webfont.eot");src:url("../font/fontawesome_webfont.eot?#iefix") format("embedded-opentype"),url("../font/fontawesome_webfont.woff") format("woff"),url("../font/fontawesome_webfont.ttf") format("truetype"),url("../font/fontawesome_webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:0.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:"ï"}.icon-book:be fore{content:"ï"}.fa-caret-down:before{content:"ï"}.icon-caret-down:before{content:"ï"}.fa-caret-up:before{content:"ï"}.icon-caret-up:before{content:"ï"}.fa-caret-left:before{content:"ï"}.icon-caret-left:before{content:"ï"}.fa-caret-right:before{content:"ï"}.icon-caret-right:before{content:"ï"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;border-top:solid 10px #343131;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-cu rrent-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-ver sion .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} +/*# sourceMappingURL=badge_only.css.map */
