This is an automated email from the ASF dual-hosted git repository. charlie pushed a commit to branch sphinx-docs in repository https://gitbox.apache.org/repos/asf/datasketches-python.git
commit 316f714ecc74ae54fa156426522b64d121143bdf Author: Charlie Dickens <[email protected]> AuthorDate: Mon Sep 11 18:57:20 2023 +0100 First attempt at sphinx documentation --- docs/Makefile | 20 +++++++++++ docs/README.md | 50 +++++++++++++++++++++++++++ docs/make.bat | 35 +++++++++++++++++++ docs/source/conf.py | 36 ++++++++++++++++++++ docs/source/count_min_sketch.rst | 6 ++++ docs/source/cpc.rst | 7 ++++ docs/source/frequent_items.rst | 6 ++++ docs/source/hyper_log_log.rst | 7 ++++ docs/source/index.rst | 73 ++++++++++++++++++++++++++++++++++++++++ docs/source/kll.rst | 19 +++++++++++ docs/source/quantiles_depr.rst | 19 +++++++++++ docs/source/req.rst | 14 ++++++++ docs/source/theta.rst | 8 +++++ requirements.txt | 25 ++++++++++++++ 14 files changed, 325 insertions(+) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..d48c482 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,50 @@ +Follow these steps to build the documentation. +1. Clone the directory in an appropriate location `git clone https://github.com/apache/datasketches-python.git` +2. Switch to the correct branch: `git checkout python-docs`. +3. In project root run `source python-docs-venv/bin/activate` + +If there are problems running the virtual env then you may need to install `virtualenv` +and install the packages manually as below +(nb my environment has `python` aliased to `python3` so just use whichever is appropriate for your installation) +``` +python -m venv python-docs-venv # create a new virtual env named python-docs-venv +source python-docs-venv/bin/activate +python -m pip install sphinx +python -m pip install sphinx-rtd-theme +``` +4. In project root run `python3 -m pip install .` to build the python bindings. +5. Build and open the documentation: +``` +cd python/docs +make html +open build/html/index.html +``` + +## Problems +The `density_sketch` and `tuple_sketch` are not yet included. +I have not included the file to avoid cluttering the PR with things that may not work. +You can easily include them by making a `density_sketch.rst` file in the same location as +all of the other `X.rst` files for the sketches and copying in the following: + +``` +Density Sketch +-------------- + +.. autoclass:: datasketches.density_sketch + :members: + :undoc-members: + +.. autoclass:: datasketches.GaussianKernel + :members: +``` +Additionally, you will need to add the below to `index.rst` +``` +Density Estimation +################## + +.. toctree:: + :maxdepth: 1 + + density_sketch +``` + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..d4be129 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,36 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import sys +import os + +# need to fix the paths so that sphinx can find the source code. +sys.path.insert(0, os.path.abspath("../../datasketches")) +sys.path.insert(0, os.path.abspath("../../src")) + + +project = 'datasketches' +copyright = '' +author = 'Apache Software Foundation' +release = '0.1' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.autodoc","sphinx.ext.autosummary"] + +templates_path = ['_templates'] +exclude_patterns = [] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] diff --git a/docs/source/count_min_sketch.rst b/docs/source/count_min_sketch.rst new file mode 100644 index 0000000..42ef3c0 --- /dev/null +++ b/docs/source/count_min_sketch.rst @@ -0,0 +1,6 @@ +CountMin Sketch +--------------- + +.. autoclass:: _datasketches.count_min_sketch + :members: + :undoc-members: diff --git a/docs/source/cpc.rst b/docs/source/cpc.rst new file mode 100644 index 0000000..ac18638 --- /dev/null +++ b/docs/source/cpc.rst @@ -0,0 +1,7 @@ +Compressed Probabilistic Counting (CPC) +--------------------------------------- +The *Compressed Probabilistic Counting* sketch is a space-efficient method for estimating cardinalities of sets. + +.. autoclass:: _datasketches.cpc_sketch + :members: + :undoc-members: diff --git a/docs/source/frequent_items.rst b/docs/source/frequent_items.rst new file mode 100644 index 0000000..c755d56 --- /dev/null +++ b/docs/source/frequent_items.rst @@ -0,0 +1,6 @@ +Frequent Items +-------------- + +.. autoclass:: _datasketches.frequent_items_sketch + :members: + :undoc-members: diff --git a/docs/source/hyper_log_log.rst b/docs/source/hyper_log_log.rst new file mode 100644 index 0000000..f1a61f7 --- /dev/null +++ b/docs/source/hyper_log_log.rst @@ -0,0 +1,7 @@ +HyperLogLog (HLL) +----------------- +The HyperLogLog (HLL) sketch is a space-efficient method for estimating cardinalities of sets. + +.. autoclass:: _datasketches.hll_sketch + :members: + :undoc-members: diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..9e3c47b --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,73 @@ +.. datasketches documentation master file, created by + sphinx-quickstart on Tue Jul 25 11:04:59 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Apache DataSketches +================================================= + +**DataSketches** are highly-efficient algorithms to analyze big data quickly. + + +Counting Distincts +################## +.. + maxdepth: 1 means only the heading is printed in the contents +.. toctree:: + :maxdepth: 1 + + hyper_log_log + cpc + theta + +Frequency Estimation +########################## + +.. toctree:: + :maxdepth: 1 + + count_min_sketch + + +Frequent Items +########################## +This problem may also be known as **heavy hitters** or **TopK** + +.. toctree:: + :maxdepth: 1 + + frequent_items + +Quantile Estimation +################### + +.. toctree:: + :maxdepth: 1 + + kll + req + quantiles_depr + +.. note:: + + This project is under active development. + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + + +.. .. automodule:: datasketches +.. :members: + +.. .. automodule:: _datasketches +.. :members: + +.. +.. + +.. distinct_count diff --git a/docs/source/kll.rst b/docs/source/kll.rst new file mode 100644 index 0000000..312fd34 --- /dev/null +++ b/docs/source/kll.rst @@ -0,0 +1,19 @@ +KLL Sketch +---------- + +.. autoclass:: _datasketches.kll_ints_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.kll_floats_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.kll_doubles_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.kll_items_sketch + :members: + :undoc-members: + diff --git a/docs/source/quantiles_depr.rst b/docs/source/quantiles_depr.rst new file mode 100644 index 0000000..bbfa8cc --- /dev/null +++ b/docs/source/quantiles_depr.rst @@ -0,0 +1,19 @@ +Quantiles Sketch (Deprecated) +----------------------------- +This is a deprecated quantiles sketch that is included for cross-language compatability. + +.. autoclass:: _datasketches.quantiles_ints_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.quantiles_floats_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.quantiles_doubles_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.quantiles_items_sketch + :members: + :undoc-members: diff --git a/docs/source/req.rst b/docs/source/req.rst new file mode 100644 index 0000000..ceb7936 --- /dev/null +++ b/docs/source/req.rst @@ -0,0 +1,14 @@ +Relative Error Quantiles (REQ) Sketch +------------------------------------- + +.. autoclass:: _datasketches.req_ints_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.req_floats_sketch + :members: + :undoc-members: + +.. autoclass:: _datasketches.req_items_sketch + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/source/theta.rst b/docs/source/theta.rst new file mode 100644 index 0000000..bf85d9f --- /dev/null +++ b/docs/source/theta.rst @@ -0,0 +1,8 @@ +Theta Sketch +------------ +The *Theta Sketch* sketch is a space-efficient method for estimating cardinalities of sets. +It can also easily handle set operations (such as union, intersection, difference) while maintaining good accuarcy. + +.. autoclass:: _datasketches.theta_sketch + :members: + :undoc-members: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..52684c5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +alabaster==0.7.13 +Babel==2.12.1 +certifi==2023.7.22 +charset-normalizer==3.2.0 +datasketches @ file:///Users/charlied/personal_dev/datasketches-python +docutils==0.18.1 +idna==3.4 +imagesize==1.4.1 +Jinja2==3.1.2 +MarkupSafe==2.1.3 +numpy==1.25.2 +packaging==23.1 +Pygments==2.16.1 +requests==2.31.0 +snowballstemmer==2.2.0 +Sphinx==7.2.5 +sphinx-rtd-theme==1.3.0 +sphinxcontrib-applehelp==1.0.7 +sphinxcontrib-devhelp==1.0.5 +sphinxcontrib-htmlhelp==2.0.4 +sphinxcontrib-jquery==4.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.6 +sphinxcontrib-serializinghtml==1.1.9 +urllib3==2.0.4 --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
