cassandra.thrift

eevans Tue, 08 Dec 2009 14:59:46 -0800

Author: eevans
Date: Tue Dec  8 22:59:20 2009
New Revision: 888617

URL: http://svn.apache.org/viewvc?rev=888617&view=rev
Log:
add documentation to thrift interface


Patch by Gary Dusbabek; reviewed by eevans for CASSANDRA-324

Modified:
    incubator/cassandra/trunk/interface/cassandra.thrift

Modified: incubator/cassandra/trunk/interface/cassandra.thrift
URL: 
http://svn.apache.org/viewvc/incubator/cassandra/trunk/interface/cassandra.thrift?rev=888617&r1=888616&r2=888617&view=diff
==============================================================================
--- incubator/cassandra/trunk/interface/cassandra.thrift (original)
+++ incubator/cassandra/trunk/interface/cassandra.thrift Tue Dec  8 22:59:20 
2009
@@ -47,17 +47,38 @@
 # data structures
 #
 
+/** Basic unit of data within a ColumnFamily.
+ * @param name. A column name can act both as structure (a label) or as data 
(like value). Regardless, the name of the column
+ *        is used as a key to its value.
+ * @param value. Some data
+ * @param timestamp. Used to record when data was sent to be written.
+ */
 struct Column {
    1: required binary name,
    2: required binary value,
    3: required i64 timestamp,
 }
 
+/** A named list of columns.
+ * @param name. see Column.name.
+ * @param columns. A collection of standard Columns.  The columns within a 
super column are defined in an adhoc manner.
+ *                 Columns within a super column do not have to have matching 
structures (similarly named child columns).
+ */
 struct SuperColumn {
    1: required binary name,
    2: required list<Column> columns,
 }
 
+/**
+    Methods for fetching rows/records from Cassandra will return either a 
single instance of ColumnOrSuperColumn or a list
+    of ColumnOrSuperColumns (get_slice()). If you're looking up a SuperColumn 
(or list of SuperColumns) then the resulting
+    instances of ColumnOrSuperColumn will have the requested SuperColumn in 
the attribute super_column. For queries resulting
+    in Columns, those values will be in the attribute column. This change was 
made between 0.3 and 0.4 to standardize on
+    single query methods that may return either a SuperColumn or Column.
+
+    @param column. The Column returned by get() or get_slice().
+    @param super_column. The SuperColumn returned by get() or get_slice().
+ */
 struct ColumnOrSuperColumn {
     1: optional Column column,
     2: optional SuperColumn super_column,
@@ -66,32 +87,55 @@
 
 #
 # Exceptions
+# (note that internal server errors will raise a TApplicationException, 
courtesy of Thrift)
 #
 
-# a specific column was requested that does not exist
+/** A specific column was requested that does not exist. */
 exception NotFoundException {
 }
 
-# invalid request (keyspace / CF does not exist, etc.)
+/** Invalid request could mean keyspace or column family does not exist, 
required parameters are missing, or a parameter is malformed. 
+    why contains an associated error message.
+*/
 exception InvalidRequestException {
     1: required string why
 }
 
-# not all the replicas required could be created / read
+/** Not all the replicas required could be created and/or read. */
 exception UnavailableException {
 }
 
-# RPC timeout was exceeded.  either a node failed mid-operation, or load was 
too high, or the requested op was too large.
+/** RPC timeout was exceeded.  either a node failed mid-operation, or load was 
too high, or the requested op was too large. */
 exception TimedOutException {
 }
 
-# (note that internal server errors will raise a TApplicationException, 
courtesy of Thrift)
-
 
 #
 # service api
 #
-
+/** The ConsistencyLevel is an enum that controls both read and write behavior 
based on <ReplicationFactor> in your
+ * storage-conf.xml. The different consistency levels have different meanings, 
depending on if you're doing a write or read
+ * operation. Note that if W + R > ReplicationFactor, where W is the number of 
nodes to block for on write, and R
+ * the number to block for on reads, you will have strongly consistent 
behavior; that is, readers will always see the most
+ * recent write. Of these, the most interesting is to do QUORUM reads and 
writes, which gives you consistency while still
+ * allowing availability in the face of node failures up to half of 
<ReplicationFactor>. Of course if latency is more
+ * important than consistency then you can use lower values for either or both.
+ *
+ * Write:
+ *      ZERO    Ensure nothing. A write happens asynchronously in background
+ *      ONE     Ensure that the write has been written to at least 1 node's 
commit log and memory table before responding to the client.
+ *      QUORUM  Ensure that the write has been written to <ReplicationFactor> 
/ 2 + 1 nodes before responding to the client.
+ *      ALL     Ensure that the write is written to 
<code>&lt;ReplicationFactor&gt;</code> nodes before responding to the client.
+ *
+ * Read:
+ *      ZERO    Not supported, because it doesn't make sense.
+ *      ONE     Will return the record returned by the first node to respond. 
A consistency check is always done in a
+ *              background thread to fix any consistency issues when 
ConsistencyLevel.ONE is used. This means subsequent
+ *              calls will have correct data even if the initial read gets an 
older value. (This is called 'read repair'.)
+ *      QUORUM  Will query all storage nodes and return the record with the 
most recent timestamp once it has at least a
+ *              majority of replicas reported. Again, the remaining replicas 
will be checked in the background.
+ *      ALL     Not yet supported, but we plan to eventually.
+*/
 enum ConsistencyLevel {
     ZERO = 0,
     ONE = 1,
@@ -101,17 +145,48 @@
     ALL = 5,
 }
 
+/**
+    ColumnParent is used when selecting groups of columns from the same 
ColumnFamily. In directory structure terms, imagine
+    ColumnParent as ColumnPath + '/../'.
+
+    See also <a href="cassandra.html#Struct_ColumnPath">ColumnPath</a>
+ */
 struct ColumnParent {
     3: required string column_family,
     4: optional binary super_column,
 }
 
+/** The ColumnPath is the path to a single column in Cassandra. It might make 
sense to think of ColumnPath and
+ * ColumnParent in terms of a directory structure.
+ *
+ * ColumnPath is used to looking up a single column.
+ *
+ * @param column_family. The name of the CF of the column being looked up.
+ * @param super_column. The super column name.
+ * @param column. The column name.
+ */
 struct ColumnPath {
     3: required string column_family,
     4: optional binary super_column,
     5: optional binary column,
 }
 
+/**
+    A slice range is a structure that stores basic range, ordering and limit 
information for a query that will return
+    multiple columns. It could be thought of as Cassandra's version of LIMIT 
and ORDER BY
+
+    @param start. The column name to start the slice with. This attribute is 
not required, though there is no default value,
+                  and can be safely set to '', i.e., an empty byte array, to 
start with the first column name. Otherwise, it
+                  must a valid value under the rules of the Comparator defined 
for the given ColumnFamily.
+    @param finish. The column name to stop the slice at. This attribute is not 
required, though there is no default value,
+                   and can be safely set to an empty byte array to not stop 
until 'count' results are seen. Otherwise, it
+                   must also be a value value to the ColumnFamily Comparator.
+    @param reversed. Whether the results should be ordered in reversed order. 
Similar to ORDER BY blah DESC in SQL.
+    @param count. How many keys to return. Similar to LIMIT 100 in SQL. May be 
arbitrarily large, but Thrift will
+                  materialize the whole result into memory before returning it 
to the client, so be aware that you may
+                  be better served by iterating through slices by passing the 
last value of one call in as the 'start'
+                  of the next instead of increasing 'count' arbitrarily large.
+ */
 struct SliceRange {
     1: required binary start,
     2: required binary finish,
@@ -119,25 +194,52 @@
     4: required i32 count=100,
 }
 
+/**
+    A SlicePredicate is similar to a mathematic predicate (see 
http://en.wikipedia.org/wiki/Predicate_(mathematical_logic)),
+    which is described as "a property that the elements of a set have in 
common."
+
+    SlicePredicate's in Cassandra are described with either a list of 
column_names or a SliceRange.  If column_names is
+    specified, slice_range is ignored.
+
+    @param column_name. A list of column names to retrieve. This can be used 
similar to Memcached's "multi-get" feature
+                        to fetch N known column names. For instance, if you 
know you wish to fetch columns 'Joe', 'Jack',
+                        and 'Jim' you can pass those column names as a list to 
fetch all three at once.
+    @param slice_range. A SliceRange describing how to range, order, and/or 
limit the slice.
+ */
 struct SlicePredicate {
     1: optional list<binary> column_names,
     2: optional SliceRange   slice_range,
 }
 
+/**
+    A KeySlice is key followed by the data it maps to. A collection of 
KeySlice is returned by the get_range_slice operation.
+
+    @param key. a row key
+    @param columns. List of data represented by the key. Typically, the list 
is pared down to only the columns specified by
+                    a SlicePredicate.
+ */
 struct KeySlice {
     1: required string key,
     2: required list<ColumnOrSuperColumn> columns,
 }
 
-
 service Cassandra {
   # retrieval methods
+
+  /**
+    Get the Column or SuperColumn at the given column_path. If no value is 
present, NotFoundException is thrown. (This is
+    the only method that can throw an exception under non-failure conditions.)
+   */
   ColumnOrSuperColumn get(1:required string keyspace,
                           2:required string key,
                           3:required ColumnPath column_path,
                           4:required ConsistencyLevel consistency_level=1)
                       throws (1:InvalidRequestException ire, 
2:NotFoundException nfe, 3:UnavailableException ue, 4:TimedOutException te),
 
+  /**
+    Get the group of columns contained by column_parent (either a ColumnFamily 
name or a ColumnFamily/SuperColumn name
+    pair) specified by the given SlicePredicate. If no matching values are 
found, an empty list is returned.
+   */
   list<ColumnOrSuperColumn> get_slice(1:required string keyspace, 
                                       2:required string key, 
                                       3:required ColumnParent column_parent, 
@@ -145,12 +247,20 @@
                                       5:required ConsistencyLevel 
consistency_level=1)
                             throws (1:InvalidRequestException ire, 
2:UnavailableException ue, 3:TimedOutException te),
 
+  /**
+    Perform a get for column_path in parallel on the given list<string> keys. 
The return value maps keys to the
+    ColumnOrSuperColumn found. If no value corresponding to a key is present, 
the key will still be in the map, but both
+    the column and super_column references of the ColumnOrSuperColumn object 
it maps to will be null.  
+  */
   map<string,ColumnOrSuperColumn> multiget(1:required string keyspace, 
                                            2:required list<string> keys, 
                                            3:required ColumnPath column_path, 
                                            4:required ConsistencyLevel 
consistency_level=1)
                                   throws (1:InvalidRequestException ire, 
2:UnavailableException ue, 3:TimedOutException te),
 
+  /**
+    Performs a get_slice for column_parent and predicate for the given keys in 
parallel.
+  */
   map<string,list<ColumnOrSuperColumn>> multiget_slice(1:required string 
keyspace, 
                                                        2:required list<string> 
keys, 
                                                        3:required ColumnParent 
column_parent, 
@@ -158,6 +268,9 @@
                                                        5:required 
ConsistencyLevel consistency_level=1)
                                         throws (1:InvalidRequestException ire, 
2:UnavailableException ue, 3:TimedOutException te),
 
+  /**
+    returns the number of columns for a particular <code>key</code> and 
<code>ColumnFamily</code> or <code>SuperColumn</code>.
+  */
   i32 get_count(1:required string keyspace, 
                 2:required string key, 
                 3:required ColumnParent column_parent, 
@@ -173,6 +286,9 @@
                              6:required ConsistencyLevel consistency_level=1)
                throws (1:InvalidRequestException ire, 2:UnavailableException 
ue, 3:TimedOutException te),
 
+  /**
+   returns a subset of columns for a range of keys.
+  */
   list<KeySlice> get_range_slice(1:required string keyspace, 
                                  2:required ColumnParent column_parent, 
                                  3:required SlicePredicate predicate,
@@ -183,6 +299,12 @@
                  throws (1:InvalidRequestException ire, 2:UnavailableException 
ue, 3:TimedOutException te),
 
   # modification methods
+
+  /**
+    Insert a Column consisting of (column_path.column, value, timestamp) at 
the given column_path.column_family and optional
+    column_path.super_column. Note that column_path.column is here required, 
since a SuperColumn cannot directly contain binary
+    values -- it can only contain sub-Columns. 
+   */
   void insert(1:required string keyspace, 
               2:required string key, 
               3:required ColumnPath column_path, 
@@ -191,12 +313,22 @@
               6:required ConsistencyLevel consistency_level=0)
        throws (1:InvalidRequestException ire, 2:UnavailableException ue, 
3:TimedOutException te),
 
+  /**
+    Insert Columns or SuperColumns across different Column Families for the 
same row key. batch_mutation is a
+    map<string, list<ColumnOrSuperColumn>> -- a map which pairs column family 
names with the relevant ColumnOrSuperColumn
+    objects to insert.
+   */
   void batch_insert(1:required string keyspace, 
                     2:required string key, 
                     3:required map<string, list<ColumnOrSuperColumn>> cfmap, 
                     4:required ConsistencyLevel consistency_level=0)
        throws (1:InvalidRequestException ire, 2:UnavailableException ue, 
3:TimedOutException te),
 
+  /**
+    Remove data from the row specified by key at the granularity specified by 
column_path, and the given timestamp. Note
+    that all the values in column_path besides column_path.column_family are 
truly optional: you can remove the entire
+    row by just specifying the ColumnFamily, or you can remove a SuperColumn 
or a single Column by specifying those levels too.
+   */
   void remove(1:required string keyspace,
               2:required string key, 
               3:required ColumnPath column_path,
@@ -208,13 +340,13 @@
   // Meta-APIs -- APIs to get information about the node or cluster,
   // rather than user data.  The nodeprobe program provides usage examples.
 
-  // get property whose value is of type "string"
+  /** get property whose value is of type string. */
   string get_string_property(1:required string property),
 
-  // get property whose value is list of "strings"
+  /** get property whose value is list of strings. */
   list<string> get_string_list_property(1:required string property),
 
-  // describe specified keyspace
+  /** describe specified keyspace */
   map<string, map<string, string>> describe_keyspace(1:required string 
keyspace)
                                    throws (1:NotFoundException nfe),
 }

svn commit: r888617 - /incubator/cassandra/trunk/interface/cassandra.thrift

Reply via email to