Reorganize document
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/54f7335c Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/54f7335c Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/54f7335c Branch: refs/heads/trunk Commit: 54f7335cd9bcf13f96d62191b64ea179a61c1763 Parents: f2f3071 Author: Sylvain Lebresne <[email protected]> Authored: Tue Jun 21 19:52:52 2016 +0200 Committer: Sylvain Lebresne <[email protected]> Committed: Wed Jun 22 09:30:46 2016 +0200 ---------------------------------------------------------------------- .gitignore | 3 + doc/Makefile | 2 +- doc/convert_yaml_to_rst.py | 2 +- doc/source/_static/extra.css | 8 + doc/source/_templates/indexcontent.html | 33 + doc/source/architecture.rst | 217 - doc/source/architecture/dynamo.rst | 137 + doc/source/architecture/guarantees.rst | 20 + doc/source/architecture/index.rst | 29 + doc/source/architecture/overview.rst | 20 + doc/source/architecture/storage_engine.rst | 82 + doc/source/bugs.rst | 20 + doc/source/conf.py | 4 +- .../configuration/cassandra_config_file.rst | 1699 ++++++++ doc/source/configuration/index.rst | 25 + doc/source/cql.rst | 4114 ------------------ doc/source/cql/appendices.rst | 310 ++ doc/source/cql/changes.rst | 257 ++ doc/source/cql/ddl.rst | 682 +++ doc/source/cql/definitions.rst | 225 + doc/source/cql/dml.rst | 606 +++ doc/source/cql/functions.rst | 661 +++ doc/source/cql/index.rst | 47 + doc/source/cql/indexes.rst | 84 + doc/source/cql/json.rst | 146 + doc/source/cql/mvs.rst | 95 + doc/source/cql/security.rst | 637 +++ doc/source/cql/triggers.rst | 61 + doc/source/cql/types.rst | 516 +++ doc/source/cqlsh.rst | 447 -- doc/source/data_modeling/index.rst | 20 + doc/source/faq.rst | 20 - doc/source/faq/index.rst | 20 + doc/source/getting_started.rst | 269 -- doc/source/getting_started/configuring.rst | 67 + doc/source/getting_started/drivers.rst | 105 + doc/source/getting_started/index.rst | 33 + doc/source/getting_started/installing.rst | 99 + doc/source/getting_started/querying.rst | 38 + doc/source/index.rst | 19 +- doc/source/operating/backups.rst | 22 + doc/source/operating/bloom_filters.rst | 65 + doc/source/operating/cdc.rst | 89 + doc/source/operating/compaction.rst | 426 ++ doc/source/operating/compression.rst | 94 + doc/source/operating/hardware.rst | 87 + doc/source/operating/hints.rst | 22 + doc/source/operating/index.rst | 38 + doc/source/operating/metrics.rst | 619 +++ doc/source/operating/read_repair.rst | 22 + doc/source/operating/repair.rst | 22 + doc/source/operating/security.rst | 410 ++ doc/source/operating/snitch.rst | 78 + doc/source/operating/topo_changes.rst | 122 + doc/source/operations.rst | 1900 -------- doc/source/tools/cqlsh.rst | 455 ++ doc/source/tools/index.rst | 26 + doc/source/tools/nodetool.rst | 22 + doc/source/troubleshooting.rst | 20 - doc/source/troubleshooting/index.rst | 20 + 60 files changed, 9440 insertions(+), 6998 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index 9cb8614..4f33eda 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,6 @@ lib/jsr223/jython/cachedir lib/jsr223/scala/*.jar /.ant-targets-build.xml + +# Generated files from the documentation +doc/source/cassandra_config_file.rst http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/Makefile ---------------------------------------------------------------------- diff --git a/doc/Makefile b/doc/Makefile index 778448a..14e4c7a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -14,7 +14,7 @@ ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) sou # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -MAKE_CASSANDRA_YAML = python convert_yaml_to_rst.py ../conf/cassandra.yaml source/cassandra_config_file.rst +MAKE_CASSANDRA_YAML = python convert_yaml_to_rst.py ../conf/cassandra.yaml source/configuration/cassandra_config_file.rst .PHONY: help help: http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/convert_yaml_to_rst.py ---------------------------------------------------------------------- diff --git a/doc/convert_yaml_to_rst.py b/doc/convert_yaml_to_rst.py index 426286a..398295d 100644 --- a/doc/convert_yaml_to_rst.py +++ b/doc/convert_yaml_to_rst.py @@ -58,7 +58,7 @@ def convert(yaml_file, dest_file): lines = f.readlines()[7:] with open(dest_file, 'w') as outfile: - outfile.write("Cassandra Config File\n") + outfile.write("Cassandra Configuration File\n") outfile.write("=====================\n") # since comments preceed an option, this holds all of the comment http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/_static/extra.css ---------------------------------------------------------------------- diff --git a/doc/source/_static/extra.css b/doc/source/_static/extra.css index 1b65a86..b55515e 100644 --- a/doc/source/_static/extra.css +++ b/doc/source/_static/extra.css @@ -33,3 +33,11 @@ a.reference.internal:visited code.literal { max-width: 100%; overflow: visible; } + +table.contentstable { + margin: 0; +} + +td.rightcolumn { + padding-left: 30px; +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/_templates/indexcontent.html ---------------------------------------------------------------------- diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html new file mode 100644 index 0000000..a71a7e9 --- /dev/null +++ b/doc/source/_templates/indexcontent.html @@ -0,0 +1,33 @@ +{% extends "defindex.html" %} +{% block tables %} +<p><strong>{% trans %}Main documentation parts:{% endtrans %}</strong></p> + <table class="contentstable" align="center"><tr> + <td width="50%"> + <p class="biglink"><a class="biglink" href="{{ pathto("getting_started/index") }}">{% trans %}Getting started{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}Newbie friendly starting point{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("architecture/index") }}">{% trans %}Cassandra Architecture{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}Cassandra's big picture{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("data_modeling/index") }}">{% trans %}Data Modeling{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}Or how to make square pegs fit round holes{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("cql/index") }}">{% trans %}Cassandra Query Language{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}CQL reference documentation{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("configuration/index") }}">{% trans %}Configuration{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}Cassandra's handles and knobs{% endtrans %}</span></p> + </td><td width="50%" class="rightcolumn"> + <p class="biglink"><a class="biglink" href="{{ pathto("operating/index") }}">{% trans %}Operating Cassandra{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}The operator's corner{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("tooling/index") }}">{% trans %}Cassandra's Tools{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}cqlsh, nodetool, ...{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("troubleshooting/index") }}">{% trans %}Troubleshooting{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}What to look for when you have a problem{% endtrans %}</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("faq/index") }}">{% trans %}FAQs{% endtrans %}</a><br/> + <span class="linkdescr">{% trans %}Frequently Asked Questions (with answers!){% endtrans %}</span></p> + </td></tr> + </table> + +<p><strong>{% trans %}Meta informations:{% endtrans %}</strong></p> + +<p class="biglink"><a class="biglink" href="{{ pathto("bugs") }}">{% trans %}Reporting bugs{% endtrans %}</a></p> +<p class="biglink"><a class="biglink" href="{{ pathto("contactus") }}">{% trans %}Contact us{% endtrans %}</a></p> + +{% endblock %} http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/architecture.rst ---------------------------------------------------------------------- diff --git a/doc/source/architecture.rst b/doc/source/architecture.rst deleted file mode 100644 index 9209414..0000000 --- a/doc/source/architecture.rst +++ /dev/null @@ -1,217 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at -.. -.. http://www.apache.org/licenses/LICENSE-2.0 -.. -.. Unless required by applicable law or agreed to in writing, software -.. distributed under the License is distributed on an "AS IS" BASIS, -.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -.. See the License for the specific language governing permissions and -.. limitations under the License. - -Architecture -============ - -Overview --------- - -.. todo:: todo - -Dynamo ------- - -Gossip -^^^^^^ - -.. todo:: todo - -Failure Detection -^^^^^^^^^^^^^^^^^ - -.. todo:: todo - -Token Ring/Ranges -^^^^^^^^^^^^^^^^^ - -.. todo:: todo - -.. _replication-strategy: - -Replication -^^^^^^^^^^^ - -The replication strategy of a keyspace determines which nodes are replicas for a given token range. The two main -replication strategies are :ref:`simple-strategy` and :ref:`network-topology-strategy`. - -.. _simple-strategy: - -SimpleStrategy -~~~~~~~~~~~~~~ - -SimpleStrategy allows a single integer ``replication_factor`` to be defined. This determines the number of nodes that -should contain a copy of each row. For example, if ``replication_factor`` is 3, then three different nodes should store -a copy of each row. - -SimpleStrategy treats all nodes identically, ignoring any configured datacenters or racks. To determine the replicas -for a token range, Cassandra iterates through the tokens in the ring, starting with the token range of interest. For -each token, it checks whether the owning node has been added to the set of replicas, and if it has not, it is added to -the set. This process continues until ``replication_factor`` distinct nodes have been added to the set of replicas. - -.. _network-topology-strategy: - -NetworkTopologyStrategy -~~~~~~~~~~~~~~~~~~~~~~~ - -NetworkTopologyStrategy allows a replication factor to be specified for each datacenter in the cluster. Even if your -cluster only uses a single datacenter, NetworkTopologyStrategy should be prefered over SimpleStrategy to make it easier -to add new physical or virtual datacenters to the cluster later. - -In addition to allowing the replication factor to be specified per-DC, NetworkTopologyStrategy also attempts to choose -replicas within a datacenter from different racks. If the number of racks is greater than or equal to the replication -factor for the DC, each replica will be chosen from a different rack. Otherwise, each rack will hold at least one -replica, but some racks may hold more than one. Note that this rack-aware behavior has some potentially `surprising -implications <https://issues.apache.org/jira/browse/CASSANDRA-3810>`_. For example, if there are not an even number of -nodes in each rack, the data load on the smallest rack may be much higher. Similarly, if a single node is bootstrapped -into a new rack, it will be considered a replica for the entire ring. For this reason, many operators choose to -configure all nodes on a single "rack". - -Tunable Consistency -^^^^^^^^^^^^^^^^^^^ - -Cassandra supports a per-operation tradeoff between consistency and availability through *Consistency Levels*. -Essentially, an operation's consistency level specifies how many of the replicas need to respond to the coordinator in -order to consider the operation a success. - -The following consistency levels are available: - -``ONE`` - Only a single replica must respond. - -``TWO`` - Two replicas must respond. - -``THREE`` - Three replicas must respond. - -``QUORUM`` - A majority (n/2 + 1) of the replicas must respond. - -``ALL`` - All of the replicas must respond. - -``LOCAL_QUORUM`` - A majority of the replicas in the local datacenter (whichever datacenter the coordinator is in) must respond. - -``EACH_QUORUM`` - A majority of the replicas in each datacenter must respond. - -``LOCAL_ONE`` - Only a single replica must respond. In a multi-datacenter cluster, this also gaurantees that read requests are not - sent to replicas in a remote datacenter. - -``ANY`` - A single replica may respond, or the coordinator may store a hint. If a hint is stored, the coordinator will later - attempt to replay the hint and deliver the mutation to the replicas. This consistency level is only accepted for - write operations. - -Write operations are always sent to all replicas, regardless of consistency level. The consistency level simply -controls how many responses the coordinator waits for before responding to the client. - -For read operations, the coordinator generally only issues read commands to enough replicas to satisfy the consistency -level. There are a couple of exceptions to this: - -- Speculative retry may issue a redundant read request to an extra replica if the other replicas have not responded - within a specified time window. -- Based on ``read_repair_chance`` and ``dclocal_read_repair_chance`` (part of a table's schema), read requests may be - randomly sent to all replicas in order to repair potentially inconsistent data. - -Picking Consistency Levels -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -It is common to pick read and write consistency levels that are high enough to overlap, resulting in "strong" -consistency. This is typically expressed as ``W + R > RF``, where ``W`` is the write consistency level, ``R`` is the -read consistency level, and ``RF`` is the replication factor. For example, if ``RF = 3``, a ``QUORUM`` request will -require responses from at least two of the three replicas. If ``QUORUM`` is used for both writes and reads, at least -one of the replicas is guaranteed to participate in *both* the write and the read request, which in turn guarantees that -the latest write will be read. In a multi-datacenter environment, ``LOCAL_QUORUM`` can be used to provide a weaker but -still useful guarantee: reads are guaranteed to see the latest write from within the same datacenter. - -If this type of strong consistency isn't required, lower consistency levels like ``ONE`` may be used to improve -throughput, latency, and availability. - -Storage Engine --------------- - -.. _commit-log: - -CommitLog -^^^^^^^^^ - -.. todo:: todo - -.. _memtables: - -Memtables -^^^^^^^^^ - -Memtables are in-memory structures where Cassandra buffers writes. In general, there is one active memtable per table. -Eventually, memtables are flushed onto disk and become immutable `SSTables`_. This can be triggered in several -ways: - -- The memory usage of the memtables exceeds the configured threshold (see ``memtable_cleanup_threshold``) -- The :ref:`commit-log` approaches its maximum size, and forces memtable flushes in order to allow commitlog segments to - be freed - -Memtables may be stored entirely on-heap or partially off-heap, depending on ``memtable_allocation_type``. - -SSTables -^^^^^^^^ - -SSTables are the immutable data files that Cassandra uses for persisting data on disk. - -As SSTables are flushed to disk from :ref:`memtables` or are streamed from other nodes, Cassandra triggers compactions -which combine multiple SSTables into one. Once the new SSTable has been written, the old SSTables can be removed. - -Each SSTable is comprised of multiple components stored in separate files: - -``Data.db`` - The actual data, i.e. the contents of rows. - -``Index.db`` - An index from partition keys to positions in the ``Data.db`` file. For wide partitions, this may also include an - index to rows within a partition. - -``Summary.db`` - A sampling of (by default) every 128th entry in the ``Index.db`` file. - -``Filter.db`` - A Bloom Filter of the partition keys in the SSTable. - -``CompressionInfo.db`` - Metadata about the offsets and lengths of compression chunks in the ``Data.db`` file. - -``Statistics.db`` - Stores metadata about the SSTable, including information about timestamps, tombstones, clustering keys, compaction, - repair, compression, TTLs, and more. - -``Digest.crc32`` - A CRC-32 digest of the ``Data.db`` file. - -``TOC.txt`` - A plain text list of the component files for the SSTable. - -Within the ``Data.db`` file, rows are organized by partition. These partitions are sorted in token order (i.e. by a -hash of the partition key when the default partitioner, ``Murmur3Partition``, is used). Within a partition, rows are -stored in the order of their clustering keys. - -SSTables can be optionally compressed using block-based compression. - -Guarantees ----------- - -.. todo:: todo http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/architecture/dynamo.rst ---------------------------------------------------------------------- diff --git a/doc/source/architecture/dynamo.rst b/doc/source/architecture/dynamo.rst new file mode 100644 index 0000000..d146471 --- /dev/null +++ b/doc/source/architecture/dynamo.rst @@ -0,0 +1,137 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +Dynamo +------ + +Gossip +^^^^^^ + +.. todo:: todo + +Failure Detection +^^^^^^^^^^^^^^^^^ + +.. todo:: todo + +Token Ring/Ranges +^^^^^^^^^^^^^^^^^ + +.. todo:: todo + +.. _replication-strategy: + +Replication +^^^^^^^^^^^ + +The replication strategy of a keyspace determines which nodes are replicas for a given token range. The two main +replication strategies are :ref:`simple-strategy` and :ref:`network-topology-strategy`. + +.. _simple-strategy: + +SimpleStrategy +~~~~~~~~~~~~~~ + +SimpleStrategy allows a single integer ``replication_factor`` to be defined. This determines the number of nodes that +should contain a copy of each row. For example, if ``replication_factor`` is 3, then three different nodes should store +a copy of each row. + +SimpleStrategy treats all nodes identically, ignoring any configured datacenters or racks. To determine the replicas +for a token range, Cassandra iterates through the tokens in the ring, starting with the token range of interest. For +each token, it checks whether the owning node has been added to the set of replicas, and if it has not, it is added to +the set. This process continues until ``replication_factor`` distinct nodes have been added to the set of replicas. + +.. _network-topology-strategy: + +NetworkTopologyStrategy +~~~~~~~~~~~~~~~~~~~~~~~ + +NetworkTopologyStrategy allows a replication factor to be specified for each datacenter in the cluster. Even if your +cluster only uses a single datacenter, NetworkTopologyStrategy should be prefered over SimpleStrategy to make it easier +to add new physical or virtual datacenters to the cluster later. + +In addition to allowing the replication factor to be specified per-DC, NetworkTopologyStrategy also attempts to choose +replicas within a datacenter from different racks. If the number of racks is greater than or equal to the replication +factor for the DC, each replica will be chosen from a different rack. Otherwise, each rack will hold at least one +replica, but some racks may hold more than one. Note that this rack-aware behavior has some potentially `surprising +implications <https://issues.apache.org/jira/browse/CASSANDRA-3810>`_. For example, if there are not an even number of +nodes in each rack, the data load on the smallest rack may be much higher. Similarly, if a single node is bootstrapped +into a new rack, it will be considered a replica for the entire ring. For this reason, many operators choose to +configure all nodes on a single "rack". + +Tunable Consistency +^^^^^^^^^^^^^^^^^^^ + +Cassandra supports a per-operation tradeoff between consistency and availability through *Consistency Levels*. +Essentially, an operation's consistency level specifies how many of the replicas need to respond to the coordinator in +order to consider the operation a success. + +The following consistency levels are available: + +``ONE`` + Only a single replica must respond. + +``TWO`` + Two replicas must respond. + +``THREE`` + Three replicas must respond. + +``QUORUM`` + A majority (n/2 + 1) of the replicas must respond. + +``ALL`` + All of the replicas must respond. + +``LOCAL_QUORUM`` + A majority of the replicas in the local datacenter (whichever datacenter the coordinator is in) must respond. + +``EACH_QUORUM`` + A majority of the replicas in each datacenter must respond. + +``LOCAL_ONE`` + Only a single replica must respond. In a multi-datacenter cluster, this also gaurantees that read requests are not + sent to replicas in a remote datacenter. + +``ANY`` + A single replica may respond, or the coordinator may store a hint. If a hint is stored, the coordinator will later + attempt to replay the hint and deliver the mutation to the replicas. This consistency level is only accepted for + write operations. + +Write operations are always sent to all replicas, regardless of consistency level. The consistency level simply +controls how many responses the coordinator waits for before responding to the client. + +For read operations, the coordinator generally only issues read commands to enough replicas to satisfy the consistency +level. There are a couple of exceptions to this: + +- Speculative retry may issue a redundant read request to an extra replica if the other replicas have not responded + within a specified time window. +- Based on ``read_repair_chance`` and ``dclocal_read_repair_chance`` (part of a table's schema), read requests may be + randomly sent to all replicas in order to repair potentially inconsistent data. + +Picking Consistency Levels +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is common to pick read and write consistency levels that are high enough to overlap, resulting in "strong" +consistency. This is typically expressed as ``W + R > RF``, where ``W`` is the write consistency level, ``R`` is the +read consistency level, and ``RF`` is the replication factor. For example, if ``RF = 3``, a ``QUORUM`` request will +require responses from at least two of the three replicas. If ``QUORUM`` is used for both writes and reads, at least +one of the replicas is guaranteed to participate in *both* the write and the read request, which in turn guarantees that +the latest write will be read. In a multi-datacenter environment, ``LOCAL_QUORUM`` can be used to provide a weaker but +still useful guarantee: reads are guaranteed to see the latest write from within the same datacenter. + +If this type of strong consistency isn't required, lower consistency levels like ``ONE`` may be used to improve +throughput, latency, and availability. http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/architecture/guarantees.rst ---------------------------------------------------------------------- diff --git a/doc/source/architecture/guarantees.rst b/doc/source/architecture/guarantees.rst new file mode 100644 index 0000000..c0b58d8 --- /dev/null +++ b/doc/source/architecture/guarantees.rst @@ -0,0 +1,20 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +Guarantees +---------- + +.. todo:: todo http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/architecture/index.rst ---------------------------------------------------------------------- diff --git a/doc/source/architecture/index.rst b/doc/source/architecture/index.rst new file mode 100644 index 0000000..58eda13 --- /dev/null +++ b/doc/source/architecture/index.rst @@ -0,0 +1,29 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +Architecture +============ + +This section describes the general architecture of Apache Cassandra. + +.. toctree:: + :maxdepth: 2 + + overview + dynamo + storage_engine + guarantees + http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/architecture/overview.rst ---------------------------------------------------------------------- diff --git a/doc/source/architecture/overview.rst b/doc/source/architecture/overview.rst new file mode 100644 index 0000000..005b15b --- /dev/null +++ b/doc/source/architecture/overview.rst @@ -0,0 +1,20 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +Overview +-------- + +.. todo:: todo http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/architecture/storage_engine.rst ---------------------------------------------------------------------- diff --git a/doc/source/architecture/storage_engine.rst b/doc/source/architecture/storage_engine.rst new file mode 100644 index 0000000..e4114e5 --- /dev/null +++ b/doc/source/architecture/storage_engine.rst @@ -0,0 +1,82 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +Storage Engine +-------------- + +.. _commit-log: + +CommitLog +^^^^^^^^^ + +.. todo:: todo + +.. _memtables: + +Memtables +^^^^^^^^^ + +Memtables are in-memory structures where Cassandra buffers writes. In general, there is one active memtable per table. +Eventually, memtables are flushed onto disk and become immutable `SSTables`_. This can be triggered in several +ways: + +- The memory usage of the memtables exceeds the configured threshold (see ``memtable_cleanup_threshold``) +- The :ref:`commit-log` approaches its maximum size, and forces memtable flushes in order to allow commitlog segments to + be freed + +Memtables may be stored entirely on-heap or partially off-heap, depending on ``memtable_allocation_type``. + +SSTables +^^^^^^^^ + +SSTables are the immutable data files that Cassandra uses for persisting data on disk. + +As SSTables are flushed to disk from :ref:`memtables` or are streamed from other nodes, Cassandra triggers compactions +which combine multiple SSTables into one. Once the new SSTable has been written, the old SSTables can be removed. + +Each SSTable is comprised of multiple components stored in separate files: + +``Data.db`` + The actual data, i.e. the contents of rows. + +``Index.db`` + An index from partition keys to positions in the ``Data.db`` file. For wide partitions, this may also include an + index to rows within a partition. + +``Summary.db`` + A sampling of (by default) every 128th entry in the ``Index.db`` file. + +``Filter.db`` + A Bloom Filter of the partition keys in the SSTable. + +``CompressionInfo.db`` + Metadata about the offsets and lengths of compression chunks in the ``Data.db`` file. + +``Statistics.db`` + Stores metadata about the SSTable, including information about timestamps, tombstones, clustering keys, compaction, + repair, compression, TTLs, and more. + +``Digest.crc32`` + A CRC-32 digest of the ``Data.db`` file. + +``TOC.txt`` + A plain text list of the component files for the SSTable. + +Within the ``Data.db`` file, rows are organized by partition. These partitions are sorted in token order (i.e. by a +hash of the partition key when the default partitioner, ``Murmur3Partition``, is used). Within a partition, rows are +stored in the order of their clustering keys. + +SSTables can be optionally compressed using block-based compression. http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/bugs.rst ---------------------------------------------------------------------- diff --git a/doc/source/bugs.rst b/doc/source/bugs.rst new file mode 100644 index 0000000..ef10aab --- /dev/null +++ b/doc/source/bugs.rst @@ -0,0 +1,20 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +Reporting bugs +-------------- + +.. todo:: TODO http://git-wip-us.apache.org/repos/asf/cassandra/blob/54f7335c/doc/source/conf.py ---------------------------------------------------------------------- diff --git a/doc/source/conf.py b/doc/source/conf.py index 85c494d..9caf188 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -192,7 +192,9 @@ html_static_path = ['_static'] # Additional templates that should be rendered to pages, maps page names to # template names. # -# html_additional_pages = {} +html_additional_pages = { + 'index': 'indexcontent.html' +} # If false, no module index is generated. #
