This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 77c4fd51a ORC-1491: Update Python documentation with PyArrow 13.0.0
and Dask 2023.8.1
77c4fd51a is described below
commit 77c4fd51ad8e1cd358faa4b463f9e61b42f2924d
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Aug 23 19:59:19 2023 -0700
ORC-1491: Update Python documentation with PyArrow 13.0.0 and Dask 2023.8.1
---
docs/dask.html | 18 ++++++++++++++----
docs/pyarrow.html | 11 +++++++++--
2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/docs/dask.html b/docs/dask.html
index ca575007e..a451a2a8e 100644
--- a/docs/dask.html
+++ b/docs/dask.html
@@ -920,7 +920,7 @@
<p><a href="https://dask.org">Dask</a> also supports Apache ORC.</p>
-<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>pip3 install "dask[dataframe]==2022.2.0"
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>pip3 install "dask[dataframe]==2023.8.1"
pip3 install pandas
</code></pre></div></div>
@@ -930,19 +930,29 @@ pip3 install pandas
In [2]: import dask.dataframe as dd
-In [3]: pf = pd.DataFrame(data={"col1": [1, 2, 3]})
+In [3]: pf = pd.DataFrame(data={"col1": [1, 2, 3], "col2": ["a", "b", None]})
In [4]: dd.to_orc(dd.from_pandas(pf, npartitions=2), path="/tmp/orc")
-Out[4]: (None,)
+Out[4]: (None, None)
In [5]: dd.read_orc(path="/tmp/orc").compute()
Out[5]:
+ col1 col2
+0 1 a
+1 2 b
+0 3 <NA>
+
+In [6]: dd.read_orc(path="/tmp/orc", columns=["col1"]).compute()
+Out[6]:
col1
0 1
1 2
-2 3
+0 3
</code></pre></div></div>
+<p><a href="https://docs.dask.org/en/stable/10-minutes-to-dask.html">10
Minutes to Dask</a> page
+provides a short overview.</p>
+
diff --git a/docs/pyarrow.html b/docs/pyarrow.html
index 60c357287..fe73a1745 100644
--- a/docs/pyarrow.html
+++ b/docs/pyarrow.html
@@ -920,7 +920,7 @@
<p><a href="https://arrow.apache.org">Apache Arrow</a> project’s <a
href="https://pypi.org/project/pyarrow/">PyArrow</a> is the recommended
package.</p>
-<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>pip3 install pyarrow==12.0.0
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>pip3 install pyarrow==13.0.0
pip3 install pandas
</code></pre></div></div>
@@ -930,10 +930,17 @@ pip3 install pandas
In [2]: from pyarrow import orc
-In [3]: orc.write_table(pa.table({"col1": [1, 2, 3]}), "test.orc",
compression="zstd")
+In [3]: orc.write_table(pa.table({"col1": [1, 2, 3], "col2": ["a", "b",
None]}), "test.orc", compression="zstd")
In [4]: orc.read_table("test.orc").to_pandas()
Out[4]:
+ col1 col2
+0 1 a
+1 2 b
+2 3 None
+
+In [5]: orc.read_table("test.orc", columns=["col1"]).to_pandas()
+Out[5]:
col1
0 1
1 2