Drop support for protocol v1 and v2 patch by Benjamin Lerer; reviewed by Sylvain Lebresne for CASSANDRA-10146
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/8439e74e Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/8439e74e Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/8439e74e Branch: refs/heads/trunk Commit: 8439e74e6f39317c1731aeb438b6ee17c09fa57d Parents: b007316 Author: blerer <[email protected]> Authored: Thu Sep 10 22:05:01 2015 +0200 Committer: blerer <[email protected]> Committed: Thu Sep 10 22:05:20 2015 +0200 ---------------------------------------------------------------------- NEWS.txt | 1 + doc/native_protocol_v1.spec | 746 --------------- doc/native_protocol_v2.spec | 954 ------------------- .../org/apache/cassandra/cql3/QueryOptions.java | 18 +- src/java/org/apache/cassandra/cql3/Tuples.java | 6 - .../org/apache/cassandra/cql3/UserTypes.java | 8 - .../cql3/statements/ModificationStatement.java | 3 - .../cassandra/db/marshal/CollectionType.java | 26 +- .../serializers/CollectionSerializer.java | 58 +- .../cassandra/thrift/CassandraServer.java | 4 +- .../org/apache/cassandra/transport/Event.java | 7 +- .../org/apache/cassandra/transport/Frame.java | 58 +- .../org/apache/cassandra/transport/Server.java | 3 +- .../transport/messages/BatchMessage.java | 7 +- .../transport/messages/ExecuteMessage.java | 14 +- .../transport/messages/QueryMessage.java | 22 +- .../org/apache/cassandra/cql3/CQLTester.java | 68 +- .../cassandra/cql3/IndexQueryPagingTest.java | 3 +- .../validation/entities/UFPureScriptTest.java | 6 +- .../cql3/validation/entities/UFTest.java | 15 +- .../cassandra/service/ClientWarningsTest.java | 20 +- .../cassandra/transport/ProtocolErrorTest.java | 56 +- .../cassandra/transport/SerDeserTest.java | 10 +- 23 files changed, 118 insertions(+), 1995 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/NEWS.txt ---------------------------------------------------------------------- diff --git a/NEWS.txt b/NEWS.txt index af2f64c..1af1bd5 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -42,6 +42,7 @@ New features Upgrading --------- + - The native protocol versions 1 and 2 are not supported anymore. - Max mutation size is now configurable via max_mutation_size_in_kb setting in cassandra.yaml; the default is half the size commitlog_segment_size_in_mb * 1024. - 3.0 requires Java 8u40 or later. http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/doc/native_protocol_v1.spec ---------------------------------------------------------------------- diff --git a/doc/native_protocol_v1.spec b/doc/native_protocol_v1.spec deleted file mode 100644 index 9c9b6b5..0000000 --- a/doc/native_protocol_v1.spec +++ /dev/null @@ -1,746 +0,0 @@ - - CQL BINARY PROTOCOL v1 - - -Table of Contents - - 1. Overview - 2. Frame header - 2.1. version - 2.2. flags - 2.3. stream - 2.4. opcode - 2.5. length - 3. Notations - 4. Messages - 4.1. Requests - 4.1.1. STARTUP - 4.1.2. CREDENTIALS - 4.1.3. OPTIONS - 4.1.4. QUERY - 4.1.5. PREPARE - 4.1.6. EXECUTE - 4.1.7. REGISTER - 4.2. Responses - 4.2.1. ERROR - 4.2.2. READY - 4.2.3. AUTHENTICATE - 4.2.4. SUPPORTED - 4.2.5. RESULT - 4.2.5.1. Void - 4.2.5.2. Rows - 4.2.5.3. Set_keyspace - 4.2.5.4. Prepared - 4.2.5.5. Schema_change - 4.2.6. EVENT - 5. Compression - 6. Data Type Serialization Formats - 7. Error codes - - -1. Overview - - The CQL binary protocol is a frame based protocol. Frames are defined as: - - 0 8 16 24 32 - +---------+---------+---------+---------+ - | version | flags | stream | opcode | - +---------+---------+---------+---------+ - | length | - +---------+---------+---------+---------+ - | | - . ... body ... . - . . - . . - +---------------------------------------- - - The protocol is big-endian (network byte order). - - Each frame contains a fixed size header (8 bytes) followed by a variable size - body. The header is described in Section 2. The content of the body depends - on the header opcode value (the body can in particular be empty for some - opcode values). The list of allowed opcode is defined Section 2.3 and the - details of each corresponding message is described Section 4. - - The protocol distinguishes 2 types of frames: requests and responses. Requests - are those frame sent by the clients to the server, response are the ones sent - by the server. Note however that while communication are initiated by the - client with the server responding to request, the protocol may likely add - server pushes in the future, so responses does not obligatory come right after - a client request. - - Note to client implementors: clients library should always assume that the - body of a given frame may contain more data than what is described in this - document. It will however always be safe to ignore the remaining of the frame - body in such cases. The reason is that this may allow to sometimes extend the - protocol with optional features without needing to change the protocol - version. - - -2. Frame header - -2.1. version - - The version is a single byte that indicate both the direction of the message - (request or response) and the version of the protocol in use. The up-most bit - of version is used to define the direction of the message: 0 indicates a - request, 1 indicates a responses. This can be useful for protocol analyzers to - distinguish the nature of the packet from the direction which it is moving. - The rest of that byte is the protocol version (1 for the protocol defined in - this document). In other words, for this version of the protocol, version will - have one of: - 0x01 Request frame for this protocol version - 0x81 Response frame for this protocol version - - -2.2. flags - - Flags applying to this frame. The flags have the following meaning (described - by the mask that allow to select them): - 0x01: Compression flag. If set, the frame body is compressed. The actual - compression to use should have been set up beforehand through the - Startup message (which thus cannot be compressed; Section 4.1.1). - 0x02: Tracing flag. For a request frame, this indicate the client requires - tracing of the request. Note that not all requests support tracing. - Currently, only QUERY, PREPARE and EXECUTE queries support tracing. - Other requests will simply ignore the tracing flag if set. If a - request support tracing and the tracing flag was set, the response to - this request will have the tracing flag set and contain tracing - information. - If a response frame has the tracing flag set, its body contains - a tracing ID. The tracing ID is a [uuid] and is the first thing in - the frame body. The rest of the body will then be the usual body - corresponding to the response opcode. - - The rest of the flags is currently unused and ignored. - -2.3. stream - - A frame has a stream id (one signed byte). When sending request messages, this - stream id must be set by the client to a positive byte (negative stream id - are reserved for streams initiated by the server; currently all EVENT messages - (section 4.2.6) have a streamId of -1). If a client sends a request message - with the stream id X, it is guaranteed that the stream id of the response to - that message will be X. - - This allow to deal with the asynchronous nature of the protocol. If a client - sends multiple messages simultaneously (without waiting for responses), there - is no guarantee on the order of the responses. For instance, if the client - writes REQ_1, REQ_2, REQ_3 on the wire (in that order), the server might - respond to REQ_3 (or REQ_2) first. Assigning different stream id to these 3 - requests allows the client to distinguish to which request an received answer - respond to. As there can only be 128 different simultaneous stream, it is up - to the client to reuse stream id. - - Note that clients are free to use the protocol synchronously (i.e. wait for - the response to REQ_N before sending REQ_N+1). In that case, the stream id - can be safely set to 0. Clients should also feel free to use only a subset of - the 128 maximum possible stream ids if it is simpler for those - implementation. - -2.4. opcode - - An integer byte that distinguish the actual message: - 0x00 ERROR - 0x01 STARTUP - 0x02 READY - 0x03 AUTHENTICATE - 0x04 CREDENTIALS - 0x05 OPTIONS - 0x06 SUPPORTED - 0x07 QUERY - 0x08 RESULT - 0x09 PREPARE - 0x0A EXECUTE - 0x0B REGISTER - 0x0C EVENT - - Messages are described in Section 4. - - -2.5. length - - A 4 byte integer representing the length of the body of the frame (note: - currently a frame is limited to 256MB in length). - - -3. Notations - - To describe the layout of the frame body for the messages in Section 4, we - define the following: - - [int] A 4 byte integer - [short] A 2 byte unsigned integer - [string] A [short] n, followed by n bytes representing an UTF-8 - string. - [long string] An [int] n, followed by n bytes representing an UTF-8 string. - [uuid] A 16 bytes long uuid. - [string list] A [short] n, followed by n [string]. - [bytes] A [int] n, followed by n bytes if n >= 0. If n < 0, - no byte should follow and the value represented is `null`. - [short bytes] A [short] n, followed by n bytes if n >= 0. - - [option] A pair of <id><value> where <id> is a [short] representing - the option id and <value> depends on that option (and can be - of size 0). The supported id (and the corresponding <value>) - will be described when this is used. - [option list] A [short] n, followed by n [option]. - [inet] An address (ip and port) to a node. It consists of one - [byte] n, that represents the address size, followed by n - [byte] representing the IP address (in practice n can only be - either 4 (IPv4) or 16 (IPv6)), following by one [int] - representing the port. - [consistency] A consistency level specification. This is a [short] - representing a consistency level with the following - correspondance: - 0x0000 ANY - 0x0001 ONE - 0x0002 TWO - 0x0003 THREE - 0x0004 QUORUM - 0x0005 ALL - 0x0006 LOCAL_QUORUM - 0x0007 EACH_QUORUM - 0x000A LOCAL_ONE - - [string map] A [short] n, followed by n pair <k><v> where <k> and <v> - are [string]. - [string multimap] A [short] n, followed by n pair <k><v> where <k> is a - [string] and <v> is a [string list]. - - -4. Messages - -4.1. Requests - - Note that outside of their normal responses (described below), all requests - can get an ERROR message (Section 4.2.1) as response. - -4.1.1. STARTUP - - Initialize the connection. The server will respond by either a READY message - (in which case the connection is ready for queries) or an AUTHENTICATE message - (in which case credentials will need to be provided using CREDENTIALS). - - This must be the first message of the connection, except for OPTIONS that can - be sent before to find out the options supported by the server. Once the - connection has been initialized, a client should not send any more STARTUP - message. - - The body is a [string map] of options. Possible options are: - - "CQL_VERSION": the version of CQL to use. This option is mandatory and - currenty, the only version supported is "3.0.0". Note that this is - different from the protocol version. - - "COMPRESSION": the compression algorithm to use for frames (See section 5). - This is optional, if not specified no compression will be used. - - -4.1.2. CREDENTIALS - - Provides credentials information for the purpose of identification. This - message comes as a response to an AUTHENTICATE message from the server, but - can be use later in the communication to change the authentication - information. - - The body is a list of key/value informations. It is a [short] n, followed by n - pair of [string]. These key/value pairs are passed as is to the Cassandra - IAuthenticator and thus the detail of which informations is needed depends on - that authenticator. - - The response to a CREDENTIALS is a READY message (or an ERROR message). - - -4.1.3. OPTIONS - - Asks the server to return what STARTUP options are supported. The body of an - OPTIONS message should be empty and the server will respond with a SUPPORTED - message. - - -4.1.4. QUERY - - Performs a CQL query. The body of the message consists of a CQL query as a [long - string] followed by the [consistency] for the operation. - - Note that the consistency is ignored by some queries (USE, CREATE, ALTER, - TRUNCATE, ...). - - The server will respond to a QUERY message with a RESULT message, the content - of which depends on the query. - - -4.1.5. PREPARE - - Prepare a query for later execution (through EXECUTE). The body consists of - the CQL query to prepare as a [long string]. - - The server will respond with a RESULT message with a `prepared` kind (0x0004, - see Section 4.2.5). - - -4.1.6. EXECUTE - - Executes a prepared query. The body of the message must be: - <id><n><value_1>....<value_n><consistency> - where: - - <id> is the prepared query ID. It's the [short bytes] returned as a - response to a PREPARE message. - - <n> is a [short] indicating the number of following values. - - <value_1>...<value_n> are the [bytes] to use for bound variables in the - prepared query. - - <consistency> is the [consistency] level for the operation. - - Note that the consistency is ignored by some (prepared) queries (USE, CREATE, - ALTER, TRUNCATE, ...). - - The response from the server will be a RESULT message. - - -4.1.7. REGISTER - - Register this connection to receive some type of events. The body of the - message is a [string list] representing the event types to register to. See - section 4.2.6 for the list of valid event types. - - The response to a REGISTER message will be a READY message. - - Please note that if a client driver maintains multiple connections to a - Cassandra node and/or connections to multiple nodes, it is advised to - dedicate a handful of connections to receive events, but to *not* register - for events on all connections, as this would only result in receiving - multiple times the same event messages, wasting bandwidth. - - -4.2. Responses - - This section describes the content of the frame body for the different - responses. Please note that to make room for future evolution, clients should - support extra informations (that they should simply discard) to the one - described in this document at the end of the frame body. - -4.2.1. ERROR - - Indicates an error processing a request. The body of the message will be an - error code ([int]) followed by a [string] error message. Then, depending on - the exception, more content may follow. The error codes are defined in - Section 7, along with their additional content if any. - - -4.2.2. READY - - Indicates that the server is ready to process queries. This message will be - sent by the server either after a STARTUP message if no authentication is - required, or after a successful CREDENTIALS message. - - The body of a READY message is empty. - - -4.2.3. AUTHENTICATE - - Indicates that the server require authentication. This will be sent following - a STARTUP message and must be answered by a CREDENTIALS message from the - client to provide authentication informations. - - The body consists of a single [string] indicating the full class name of the - IAuthenticator in use. - - -4.2.4. SUPPORTED - - Indicates which startup options are supported by the server. This message - comes as a response to an OPTIONS message. - - The body of a SUPPORTED message is a [string multimap]. This multimap gives - for each of the supported STARTUP options, the list of supported values. - - -4.2.5. RESULT - - The result to a query (QUERY, PREPARE or EXECUTE messages). - - The first element of the body of a RESULT message is an [int] representing the - `kind` of result. The rest of the body depends on the kind. The kind can be - one of: - 0x0001 Void: for results carrying no information. - 0x0002 Rows: for results to select queries, returning a set of rows. - 0x0003 Set_keyspace: the result to a `use` query. - 0x0004 Prepared: result to a PREPARE message. - 0x0005 Schema_change: the result to a schema altering query. - - The body for each kind (after the [int] kind) is defined below. - - -4.2.5.1. Void - - The rest of the body for a Void result is empty. It indicates that a query was - successful without providing more information. - - -4.2.5.2. Rows - - Indicates a set of rows. The rest of body of a Rows result is: - <metadata><rows_count><rows_content> - where: - - <metadata> is composed of: - <flags><columns_count><global_table_spec>?<col_spec_1>...<col_spec_n> - where: - - <flags> is an [int]. The bits of <flags> provides information on the - formatting of the remaining informations. A flag is set if the bit - corresponding to its `mask` is set. Supported flags are, given there - mask: - 0x0001 Global_tables_spec: if set, only one table spec (keyspace - and table name) is provided as <global_table_spec>. If not - set, <global_table_spec> is not present. - - <columns_count> is an [int] representing the number of columns selected - by the query this result is of. It defines the number of <col_spec_i> - elements in and the number of element for each row in <rows_content>. - - <global_table_spec> is present if the Global_tables_spec is set in - <flags>. If present, it is composed of two [string] representing the - (unique) keyspace name and table name the columns return are of. - - <col_spec_i> specifies the columns returned in the query. There is - <column_count> such column specification that are composed of: - (<ksname><tablename>)?<column_name><type> - The initial <ksname> and <tablename> are two [string] are only present - if the Global_tables_spec flag is not set. The <column_name> is a - [string] and <type> is an [option] that correspond to the column name - and type. The option for <type> is either a native type (see below), - in which case the option has no value, or a 'custom' type, in which - case the value is a [string] representing the full qualified class - name of the type represented. Valid option ids are: - 0x0000 Custom: the value is a [string], see above. - 0x0001 Ascii - 0x0002 Bigint - 0x0003 Blob - 0x0004 Boolean - 0x0005 Counter - 0x0006 Decimal - 0x0007 Double - 0x0008 Float - 0x0009 Int - 0x000A Text - 0x000B Timestamp - 0x000C Uuid - 0x000D Varchar - 0x000E Varint - 0x000F Timeuuid - 0x0010 Inet - 0x0020 List: the value is an [option], representing the type - of the elements of the list. - 0x0021 Map: the value is two [option], representing the types of the - keys and values of the map - 0x0022 Set: the value is an [option], representing the type - of the elements of the set - - <rows_count> is an [int] representing the number of rows present in this - result. Those rows are serialized in the <rows_content> part. - - <rows_content> is composed of <row_1>...<row_m> where m is <rows_count>. - Each <row_i> is composed of <value_1>...<value_n> where n is - <columns_count> and where <value_j> is a [bytes] representing the value - returned for the jth column of the ith row. In other words, <rows_content> - is composed of (<rows_count> * <columns_count>) [bytes]. - - -4.2.5.3. Set_keyspace - - The result to a `use` query. The body (after the kind [int]) is a single - [string] indicating the name of the keyspace that has been set. - - -4.2.5.4. Prepared - - The result to a PREPARE message. The rest of the body of a Prepared result is: - <id><metadata> - where: - - <id> is [short bytes] representing the prepared query ID. - - <metadata> is defined exactly as for a Rows RESULT (See section 4.2.5.2). - - Note that prepared query ID return is global to the node on which the query - has been prepared. It can be used on any connection to that node and this - until the node is restarted (after which the query must be reprepared). - -4.2.5.5. Schema_change - - The result to a schema altering query (creation/update/drop of a - keyspace/table/index). The body (after the kind [int]) is composed of 3 - [string]: - <change><keyspace><table> - where: - - <change> describe the type of change that has occured. It can be one of - "CREATED", "UPDATED" or "DROPPED". - - <keyspace> is the name of the affected keyspace or the keyspace of the - affected table. - - <table> is the name of the affected table. <table> will be empty (i.e. - the empty string "") if the change was affecting a keyspace and not a - table. - - Note that queries to create and drop an index are considered changes - updating the table the index is on. Queries that create, alter, or drop - user-defined types (availble in Cassandra 2.1+) are considered changes - updating the keyspace the type is defined in. - - -4.2.6. EVENT - - And event pushed by the server. A client will only receive events for the - type it has REGISTER to. The body of an EVENT message will start by a - [string] representing the event type. The rest of the message depends on the - event type. The valid event types are: - - "TOPOLOGY_CHANGE": events related to change in the cluster topology. - Currently, events are sent when new nodes are added to the cluster, and - when nodes are removed. The body of the message (after the event type) - consists of a [string] and an [inet], corresponding respectively to the - type of change ("NEW_NODE", "REMOVED_NODE", or "MOVED_NODE") followed - by the address of the new/removed/moved node. - - "STATUS_CHANGE": events related to change of node status. Currently, - up/down events are sent. The body of the message (after the event type) - consists of a [string] and an [inet], corresponding respectively to the - type of status change ("UP" or "DOWN") followed by the address of the - concerned node. - - "SCHEMA_CHANGE": events related to schema change. The body of the message - (after the event type) consists of 3 [string] corresponding respectively - to the type of schema change ("CREATED", "UPDATED" or "DROPPED"), - followed by the name of the affected keyspace and the name of the - affected table within that keyspace. For changes that affect a keyspace - directly, the table name will be empty (i.e. the empty string ""). - Changes to user-defined types (available in Cassandra 2.1+) will result - in an "UPDATED" change for the keyspace containing the type, and the - table name will be empty. - - All EVENT message have a streamId of -1 (Section 2.3). - - Please note that "NEW_NODE" and "UP" events are sent based on internal Gossip - communication and as such may be sent a short delay before the binary - protocol server on the newly up node is fully started. Clients are thus - advise to wait a short time before trying to connect to the node (1 seconds - should be enough), otherwise they may experience a connection refusal at - first. - - It is possible for the same event to be sent multiple times. Therefore, - a client library should ignore the same event if it has already been notified - of a change. - -5. Compression - - Frame compression is supported by the protocol, but then only the frame body - is compressed (the frame header should never be compressed). - - Before being used, client and server must agree on a compression algorithm to - use, which is done in the STARTUP message. As a consequence, a STARTUP message - must never be compressed. However, once the STARTUP frame has been received - by the server can be compressed (including the response to the STARTUP - request). Frame do not have to be compressed however, even if compression has - been agreed upon (a server may only compress frame above a certain size at its - discretion). A frame body should be compressed if and only if the compressed - flag (see Section 2.2) is set. - - -6. Data Type Serialization Formats - - This sections describes the serialization formats for all CQL data types - supported by Cassandra through the native protocol. These serialization - formats should be used by client drivers to encode values for EXECUTE - messages. Cassandra will use these formats when returning values in - RESULT messages. - - All values are represented as [bytes] in EXECUTE and RESULT messages. - The [bytes] format includes an int prefix denoting the length of the value. - For that reason, the serialization formats described here will not include - a length component. - - For legacy compatibility reasons, note that most non-string types support - "empty" values (i.e. a value with zero length). An empty value is distinct - from NULL, which is encoded with a negative length. - - As with the rest of the native protocol, all encodings are big-endian. - -6.1. ascii - - A sequence of bytes in the ASCII range [0, 127]. Bytes with values outside of - this range will result in a validation error. - -6.2 bigint - - An eight-byte two's complement integer. - -6.3 blob - - Any sequence of bytes. - -6.4 boolean - - A single byte. A value of 0 denotes "false"; any other value denotes "true". - (However, it is recommended that a value of 1 be used to represent "true".) - -6.5 decimal - - The decimal format represents an arbitrary-precision number. It contains an - [int] "scale" component followed by a varint encoding (see section 6.17) - of the unscaled value. The encoded value represents "<unscaled>E<-scale>". - In other words, "<unscaled> * 10 ^ (-1 * <scale>)". - -6.6 double - - An eight-byte floating point number in the IEEE 754 binary64 format. - -6.7 float - - An four-byte floating point number in the IEEE 754 binary32 format. - -6.8 inet - - A 4 byte or 16 byte sequence denoting an IPv4 or IPv6 address, respectively. - -6.9 int - - A four-byte two's complement integer. - -6.10 list - - A [short] n indicating the number of elements in the list, followed by n - elements. Each element is [short bytes] representing the serialized value. - -6.11 map - - A [short] n indicating the number of key/value pairs in the map, followed by - n entries. Each entry is composed of two [short bytes] representing the key - and value. - -6.12 set - - A [short] n indicating the number of elements in the set, followed by n - elements. Each element is [short bytes] representing the serialized value. - -6.13 text - - A sequence of bytes conforming to the UTF-8 specifications. - -6.14 timestamp - - An eight-byte two's complement integer representing a millisecond-precision - offset from the unix epoch (00:00:00, January 1st, 1970). Negative values - represent a negative offset from the epoch. - -6.15 uuid - - A 16 byte sequence representing any valid UUID as defined by RFC 4122. - -6.16 varchar - - An alias of the "text" type. - -6.17 varint - - A variable-length two's complement encoding of a signed integer. - - The following examples may help implementors of this spec: - - Value | Encoding - ------|--------- - 0 | 0x00 - 1 | 0x01 - 127 | 0x7F - 128 | 0x0080 - -1 | 0xFF - -128 | 0x80 - -129 | 0xFF7F - - Note that positive numbers must use a most-significant byte with a value - less than 0x80, because a most-significant bit of 1 indicates a negative - value. Implementors should pad positive values that have a MSB >= 0x80 - with a leading 0x00 byte. - -6.18 timeuuid - - A 16 byte sequence representing a version 1 UUID as defined by RFC 4122. - - -7. Error codes - - The supported error codes are described below: - 0x0000 Server error: something unexpected happened. This indicates a - server-side bug. - 0x000A Protocol error: some client message triggered a protocol - violation (for instance a QUERY message is sent before a STARTUP - one has been sent) - 0x0100 Bad credentials: CREDENTIALS request failed because Cassandra - did not accept the provided credentials. - - 0x1000 Unavailable exception. The rest of the ERROR message body will be - <cl><required><alive> - where: - <cl> is the [consistency] level of the query having triggered - the exception. - <required> is an [int] representing the number of node that - should be alive to respect <cl> - <alive> is an [int] representing the number of replica that - were known to be alive when the request has been - processed (since an unavailable exception has been - triggered, there will be <alive> < <required>) - 0x1001 Overloaded: the request cannot be processed because the - coordinator node is overloaded - 0x1002 Is_bootstrapping: the request was a read request but the - coordinator node is bootstrapping - 0x1003 Truncate_error: error during a truncation error. - 0x1100 Write_timeout: Timeout exception during a write request. The rest - of the ERROR message body will be - <cl><received><blockfor><writeType> - where: - <cl> is the [consistency] level of the query having triggered - the exception. - <received> is an [int] representing the number of nodes having - acknowledged the request. - <blockfor> is an [int] representing the number of replica whose - acknowledgement is required to achieve <cl>. - <writeType> is a [string] that describe the type of the write - that timeouted. The value of that string can be one - of: - - "SIMPLE": the write was a non-batched - non-counter write. - - "BATCH": the write was a (logged) batch write. - If this type is received, it means the batch log - has been successfully written (otherwise a - "BATCH_LOG" type would have been send instead). - - "UNLOGGED_BATCH": the write was an unlogged - batch. Not batch log write has been attempted. - - "COUNTER": the write was a counter write - (batched or not). - - "BATCH_LOG": the timeout occured during the - write to the batch log when a (logged) batch - write was requested. - 0x1200 Read_timeout: Timeout exception during a read request. The rest - of the ERROR message body will be - <cl><received><blockfor><data_present> - where: - <cl> is the [consistency] level of the query having triggered - the exception. - <received> is an [int] representing the number of nodes having - answered the request. - <blockfor> is an [int] representing the number of replicas whose - response is required to achieve <cl>. Please note that it - is possible to have <received> >= <blockfor> if - <data_present> is false. And also in the (unlikely) - case were <cl> is achieved but the coordinator node - timeout while waiting for read-repair - acknowledgement. - <data_present> is a single byte. If its value is 0, it means - the replica that was asked for data has not - responded. Otherwise, the value is != 0. - - 0x2000 Syntax_error: The submitted query has a syntax error. - 0x2100 Unauthorized: The logged user doesn't have the right to perform - the query. - 0x2200 Invalid: The query is syntactically correct but invalid. - 0x2300 Config_error: The query is invalid because of some configuration issue - 0x2400 Already_exists: The query attempted to create a keyspace or a - table that was already existing. The rest of the ERROR message - body will be <ks><table> where: - <ks> is a [string] representing either the keyspace that - already exists, or the keyspace in which the table that - already exists is. - <table> is a [string] representing the name of the table that - already exists. If the query was attempting to create a - keyspace, <table> will be present but will be the empty - string. - 0x2500 Unprepared: Can be thrown while a prepared statement tries to be - executed if the provide prepared statement ID is not known by - this host. The rest of the ERROR message body will be [short - bytes] representing the unknown ID. http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/doc/native_protocol_v2.spec ---------------------------------------------------------------------- diff --git a/doc/native_protocol_v2.spec b/doc/native_protocol_v2.spec deleted file mode 100644 index b9cc51f..0000000 --- a/doc/native_protocol_v2.spec +++ /dev/null @@ -1,954 +0,0 @@ - - CQL BINARY PROTOCOL v2 - - -Table of Contents - - 1. Overview - 2. Frame header - 2.1. version - 2.2. flags - 2.3. stream - 2.4. opcode - 2.5. length - 3. Notations - 4. Messages - 4.1. Requests - 4.1.1. STARTUP - 4.1.2. AUTH_RESPONSE - 4.1.3. OPTIONS - 4.1.4. QUERY - 4.1.5. PREPARE - 4.1.6. EXECUTE - 4.1.7. BATCH - 4.1.8. REGISTER - 4.2. Responses - 4.2.1. ERROR - 4.2.2. READY - 4.2.3. AUTHENTICATE - 4.2.4. SUPPORTED - 4.2.5. RESULT - 4.2.5.1. Void - 4.2.5.2. Rows - 4.2.5.3. Set_keyspace - 4.2.5.4. Prepared - 4.2.5.5. Schema_change - 4.2.6. EVENT - 4.2.7. AUTH_CHALLENGE - 4.2.8. AUTH_SUCCESS - 5. Compression - 6. Data Type Serialization Formats - 7. Result paging - 8. Error codes - 9. Changes from v1 - - -1. Overview - - The CQL binary protocol is a frame based protocol. Frames are defined as: - - 0 8 16 24 32 - +---------+---------+---------+---------+ - | version | flags | stream | opcode | - +---------+---------+---------+---------+ - | length | - +---------+---------+---------+---------+ - | | - . ... body ... . - . . - . . - +---------------------------------------- - - The protocol is big-endian (network byte order). - - Each frame contains a fixed size header (8 bytes) followed by a variable size - body. The header is described in Section 2. The content of the body depends - on the header opcode value (the body can in particular be empty for some - opcode values). The list of allowed opcode is defined Section 2.3 and the - details of each corresponding message is described Section 4. - - The protocol distinguishes 2 types of frames: requests and responses. Requests - are those frame sent by the clients to the server, response are the ones sent - by the server. Note however that the protocol supports server pushes (events) - so responses does not necessarily come right after a client request. - - Note to client implementors: clients library should always assume that the - body of a given frame may contain more data than what is described in this - document. It will however always be safe to ignore the remaining of the frame - body in such cases. The reason is that this may allow to sometimes extend the - protocol with optional features without needing to change the protocol - version. - - - -2. Frame header - -2.1. version - - The version is a single byte that indicate both the direction of the message - (request or response) and the version of the protocol in use. The up-most bit - of version is used to define the direction of the message: 0 indicates a - request, 1 indicates a responses. This can be useful for protocol analyzers to - distinguish the nature of the packet from the direction which it is moving. - The rest of that byte is the protocol version (2 for the protocol defined in - this document). In other words, for this version of the protocol, version will - have one of: - 0x02 Request frame for this protocol version - 0x82 Response frame for this protocol version - - Please note that the while every message ship with the version, only one version - of messages is accepted on a given connection. In other words, the first message - exchanged (STARTUP) sets the version for the connection for the lifetime of this - connection. - - This document describe the version 2 of the protocol. For the changes made since - version 1, see Section 9. - - -2.2. flags - - Flags applying to this frame. The flags have the following meaning (described - by the mask that allow to select them): - 0x01: Compression flag. If set, the frame body is compressed. The actual - compression to use should have been set up beforehand through the - Startup message (which thus cannot be compressed; Section 4.1.1). - 0x02: Tracing flag. For a request frame, this indicate the client requires - tracing of the request. Note that not all requests support tracing. - Currently, only QUERY, PREPARE and EXECUTE queries support tracing. - Other requests will simply ignore the tracing flag if set. If a - request support tracing and the tracing flag was set, the response to - this request will have the tracing flag set and contain tracing - information. - If a response frame has the tracing flag set, its body contains - a tracing ID. The tracing ID is a [uuid] and is the first thing in - the frame body. The rest of the body will then be the usual body - corresponding to the response opcode. - - The rest of the flags is currently unused and ignored. - -2.3. stream - - A frame has a stream id (one signed byte). When sending request messages, this - stream id must be set by the client to a positive byte (negative stream id - are reserved for streams initiated by the server; currently all EVENT messages - (section 4.2.6) have a streamId of -1). If a client sends a request message - with the stream id X, it is guaranteed that the stream id of the response to - that message will be X. - - This allow to deal with the asynchronous nature of the protocol. If a client - sends multiple messages simultaneously (without waiting for responses), there - is no guarantee on the order of the responses. For instance, if the client - writes REQ_1, REQ_2, REQ_3 on the wire (in that order), the server might - respond to REQ_3 (or REQ_2) first. Assigning different stream id to these 3 - requests allows the client to distinguish to which request an received answer - respond to. As there can only be 128 different simultaneous stream, it is up - to the client to reuse stream id. - - Note that clients are free to use the protocol synchronously (i.e. wait for - the response to REQ_N before sending REQ_N+1). In that case, the stream id - can be safely set to 0. Clients should also feel free to use only a subset of - the 128 maximum possible stream ids if it is simpler for those - implementation. - -2.4. opcode - - An integer byte that distinguish the actual message: - 0x00 ERROR - 0x01 STARTUP - 0x02 READY - 0x03 AUTHENTICATE - 0x05 OPTIONS - 0x06 SUPPORTED - 0x07 QUERY - 0x08 RESULT - 0x09 PREPARE - 0x0A EXECUTE - 0x0B REGISTER - 0x0C EVENT - 0x0D BATCH - 0x0E AUTH_CHALLENGE - 0x0F AUTH_RESPONSE - 0x10 AUTH_SUCCESS - - Messages are described in Section 4. - - (Note that there is no 0x04 message in this version of the protocol) - - -2.5. length - - A 4 byte integer representing the length of the body of the frame (note: - currently a frame is limited to 256MB in length). - - -3. Notations - - To describe the layout of the frame body for the messages in Section 4, we - define the following: - - [int] A 4 byte integer - [short] A 2 byte unsigned integer - [string] A [short] n, followed by n bytes representing an UTF-8 - string. - [long string] An [int] n, followed by n bytes representing an UTF-8 string. - [uuid] A 16 bytes long uuid. - [string list] A [short] n, followed by n [string]. - [bytes] A [int] n, followed by n bytes if n >= 0. If n < 0, - no byte should follow and the value represented is `null`. - [short bytes] A [short] n, followed by n bytes if n >= 0. - - [option] A pair of <id><value> where <id> is a [short] representing - the option id and <value> depends on that option (and can be - of size 0). The supported id (and the corresponding <value>) - will be described when this is used. - [option list] A [short] n, followed by n [option]. - [inet] An address (ip and port) to a node. It consists of one - [byte] n, that represents the address size, followed by n - [byte] representing the IP address (in practice n can only be - either 4 (IPv4) or 16 (IPv6)), following by one [int] - representing the port. - [consistency] A consistency level specification. This is a [short] - representing a consistency level with the following - correspondance: - 0x0000 ANY - 0x0001 ONE - 0x0002 TWO - 0x0003 THREE - 0x0004 QUORUM - 0x0005 ALL - 0x0006 LOCAL_QUORUM - 0x0007 EACH_QUORUM - 0x0008 SERIAL - 0x0009 LOCAL_SERIAL - 0x000A LOCAL_ONE - - [string map] A [short] n, followed by n pair <k><v> where <k> and <v> - are [string]. - [string multimap] A [short] n, followed by n pair <k><v> where <k> is a - [string] and <v> is a [string list]. - - -4. Messages - -4.1. Requests - - Note that outside of their normal responses (described below), all requests - can get an ERROR message (Section 4.2.1) as response. - -4.1.1. STARTUP - - Initialize the connection. The server will respond by either a READY message - (in which case the connection is ready for queries) or an AUTHENTICATE message - (in which case credentials will need to be provided using AUTH_RESPONSE). - - This must be the first message of the connection, except for OPTIONS that can - be sent before to find out the options supported by the server. Once the - connection has been initialized, a client should not send any more STARTUP - message. - - The body is a [string map] of options. Possible options are: - - "CQL_VERSION": the version of CQL to use. This option is mandatory and - currenty, the only version supported is "3.0.0". Note that this is - different from the protocol version. - - "COMPRESSION": the compression algorithm to use for frames (See section 5). - This is optional, if not specified no compression will be used. - - -4.1.2. AUTH_RESPONSE - - Answers a server authentication challenge. - - Authentication in the protocol is SASL based. The server sends authentication - challenges (a bytes token) to which the client answer with this message. Those - exchanges continue until the server accepts the authentication by sending a - AUTH_SUCCESS message after a client AUTH_RESPONSE. It is however that client that - initiate the exchange by sending an initial AUTH_RESPONSE in response to a - server AUTHENTICATE request. - - The body of this message is a single [bytes] token. The details of what this - token contains (and when it can be null/empty, if ever) depends on the actual - authenticator used. - - The response to a AUTH_RESPONSE is either a follow-up AUTH_CHALLENGE message, - an AUTH_SUCCESS message or an ERROR message. - - -4.1.3. OPTIONS - - Asks the server to return what STARTUP options are supported. The body of an - OPTIONS message should be empty and the server will respond with a SUPPORTED - message. - - -4.1.4. QUERY - - Performs a CQL query. The body of the message must be: - <query><query_parameters> - where <query> is a [long string] representing the query and - <query_parameters> must be - <consistency><flags>[<n><value_1>...<value_n>][<result_page_size>][<paging_state>][<serial_consistency>] - where: - - <consistency> is the [consistency] level for the operation. - - <flags> is a [byte] whose bits define the options for this query and - in particular influence what the remainder of the message contains. - A flag is set if the bit corresponding to its `mask` is set. Supported - flags are, given there mask: - 0x01: Values. In that case, a [short] <n> followed by <n> [bytes] - values are provided. Those value are used for bound variables in - the query. - 0x02: Skip_metadata. If present, the Result Set returned as a response - to that query (if any) will have the NO_METADATA flag (see - Section 4.2.5.2). - 0x04: Page_size. In that case, <result_page_size> is an [int] - controlling the desired page size of the result (in CQL3 rows). - See the section on paging (Section 7) for more details. - 0x08: With_paging_state. If present, <paging_state> should be present. - <paging_state> is a [bytes] value that should have been returned - in a result set (Section 4.2.5.2). If provided, the query will be - executed but starting from a given paging state. This also to - continue paging on a different node from the one it has been - started (See Section 7 for more details). - 0x10: With serial consistency. If present, <serial_consistency> should be - present. <serial_consistency> is the [consistency] level for the - serial phase of conditional updates. That consitency can only be - either SERIAL or LOCAL_SERIAL and if not present, it defaults to - SERIAL. This option will be ignored for anything else that a - conditional update/insert. - - Note that the consistency is ignored by some queries (USE, CREATE, ALTER, - TRUNCATE, ...). - - The server will respond to a QUERY message with a RESULT message, the content - of which depends on the query. - - -4.1.5. PREPARE - - Prepare a query for later execution (through EXECUTE). The body consists of - the CQL query to prepare as a [long string]. - - The server will respond with a RESULT message with a `prepared` kind (0x0004, - see Section 4.2.5). - - -4.1.6. EXECUTE - - Executes a prepared query. The body of the message must be: - <id><query_parameters> - where <id> is the prepared query ID. It's the [short bytes] returned as a - response to a PREPARE message. As for <query_parameters>, it has the exact - same definition than in QUERY (see Section 4.1.4). - - The response from the server will be a RESULT message. - - -4.1.7. BATCH - - Allows executing a list of queries (prepared or not) as a batch (note that - only DML statements are accepted in a batch). The body of the message must - be: - <type><n><query_1>...<query_n><consistency> - where: - - <type> is a [byte] indicating the type of batch to use: - - If <type> == 0, the batch will be "logged". This is equivalent to a - normal CQL3 batch statement. - - If <type> == 1, the batch will be "unlogged". - - If <type> == 2, the batch will be a "counter" batch (and non-counter - statements will be rejected). - - <n> is a [short] indicating the number of following queries. - - <query_1>...<query_n> are the queries to execute. A <query_i> must be of the - form: - <kind><string_or_id><n><value_1>...<value_n> - where: - - <kind> is a [byte] indicating whether the following query is a prepared - one or not. <kind> value must be either 0 or 1. - - <string_or_id> depends on the value of <kind>. If <kind> == 0, it should be - a [long string] query string (as in QUERY, the query string might contain - bind markers). Otherwise (that is, if <kind> == 1), it should be a - [short bytes] representing a prepared query ID. - - <n> is a [short] indicating the number (possibly 0) of following values. - - <value_1>...<value_n> are the [bytes] to use for bound variables. - - <consistency> is the [consistency] level for the operation. - - The server will respond with a RESULT message with a `Void` kind (0x0001, - see Section 4.2.5). - - -4.1.8. REGISTER - - Register this connection to receive some type of events. The body of the - message is a [string list] representing the event types to register to. See - section 4.2.6 for the list of valid event types. - - The response to a REGISTER message will be a READY message. - - Please note that if a client driver maintains multiple connections to a - Cassandra node and/or connections to multiple nodes, it is advised to - dedicate a handful of connections to receive events, but to *not* register - for events on all connections, as this would only result in receiving - multiple times the same event messages, wasting bandwidth. - - -4.2. Responses - - This section describes the content of the frame body for the different - responses. Please note that to make room for future evolution, clients should - support extra informations (that they should simply discard) to the one - described in this document at the end of the frame body. - -4.2.1. ERROR - - Indicates an error processing a request. The body of the message will be an - error code ([int]) followed by a [string] error message. Then, depending on - the exception, more content may follow. The error codes are defined in - Section 8, along with their additional content if any. - - -4.2.2. READY - - Indicates that the server is ready to process queries. This message will be - sent by the server either after a STARTUP message if no authentication is - required, or after a successful CREDENTIALS message. - - The body of a READY message is empty. - - -4.2.3. AUTHENTICATE - - Indicates that the server require authentication, and which authentication - mechanism to use. - - The authentication is SASL based and thus consists on a number of server - challenges (AUTH_CHALLENGE, Section 4.2.7) followed by client responses - (AUTH_RESPONSE, Section 4.1.2). The Initial exchange is however boostrapped - by an initial client response. The details of that exchange (including how - much challenge-response pair are required) are specific to the authenticator - in use. The exchange ends when the server sends an AUTH_SUCCESS message or - an ERROR message. - - This message will be sent following a STARTUP message if authentication is - required and must be answered by a AUTH_RESPONSE message from the client. - - The body consists of a single [string] indicating the full class name of the - IAuthenticator in use. - - -4.2.4. SUPPORTED - - Indicates which startup options are supported by the server. This message - comes as a response to an OPTIONS message. - - The body of a SUPPORTED message is a [string multimap]. This multimap gives - for each of the supported STARTUP options, the list of supported values. - - -4.2.5. RESULT - - The result to a query (QUERY, PREPARE, EXECUTE or BATCH messages). - - The first element of the body of a RESULT message is an [int] representing the - `kind` of result. The rest of the body depends on the kind. The kind can be - one of: - 0x0001 Void: for results carrying no information. - 0x0002 Rows: for results to select queries, returning a set of rows. - 0x0003 Set_keyspace: the result to a `use` query. - 0x0004 Prepared: result to a PREPARE message. - 0x0005 Schema_change: the result to a schema altering query. - - The body for each kind (after the [int] kind) is defined below. - - -4.2.5.1. Void - - The rest of the body for a Void result is empty. It indicates that a query was - successful without providing more information. - - -4.2.5.2. Rows - - Indicates a set of rows. The rest of body of a Rows result is: - <metadata><rows_count><rows_content> - where: - - <metadata> is composed of: - <flags><columns_count>[<paging_state>][<global_table_spec>?<col_spec_1>...<col_spec_n>] - where: - - <flags> is an [int]. The bits of <flags> provides information on the - formatting of the remaining informations. A flag is set if the bit - corresponding to its `mask` is set. Supported flags are, given there - mask: - 0x0001 Global_tables_spec: if set, only one table spec (keyspace - and table name) is provided as <global_table_spec>. If not - set, <global_table_spec> is not present. - 0x0002 Has_more_pages: indicates whether this is not the last - page of results and more should be retrieve. If set, the - <paging_state> will be present. The <paging_state> is a - [bytes] value that should be used in QUERY/EXECUTE to - continue paging and retrieve the remained of the result for - this query (See Section 7 for more details). - 0x0004 No_metadata: if set, the <metadata> is only composed of - these <flags>, the <column_count> and optionally the - <paging_state> (depending on the Has_more_pages flage) but - no other information (so no <global_table_spec> nor <col_spec_i>). - This will only ever be the case if this was requested - during the query (see QUERY and RESULT messages). - - <columns_count> is an [int] representing the number of columns selected - by the query this result is of. It defines the number of <col_spec_i> - elements in and the number of element for each row in <rows_content>. - - <global_table_spec> is present if the Global_tables_spec is set in - <flags>. If present, it is composed of two [string] representing the - (unique) keyspace name and table name the columns return are of. - - <col_spec_i> specifies the columns returned in the query. There is - <column_count> such column specifications that are composed of: - (<ksname><tablename>)?<name><type> - The initial <ksname> and <tablename> are two [string] are only present - if the Global_tables_spec flag is not set. The <column_name> is a - [string] and <type> is an [option] that correspond to the description - (what this description is depends a bit on the context: in results to - selects, this will be either the user chosen alias or the selection used - (often a colum name, but it can be a function call too). In results to - a PREPARE, this will be either the name of the bind variable corresponding - or the column name for the variable if it is "anonymous") and type of - the corresponding result. The option for <type> is either a native - type (see below), in which case the option has no value, or a - 'custom' type, in which case the value is a [string] representing - the full qualified class name of the type represented. Valid option - ids are: - 0x0000 Custom: the value is a [string], see above. - 0x0001 Ascii - 0x0002 Bigint - 0x0003 Blob - 0x0004 Boolean - 0x0005 Counter - 0x0006 Decimal - 0x0007 Double - 0x0008 Float - 0x0009 Int - 0x000A Text - 0x000B Timestamp - 0x000C Uuid - 0x000D Varchar - 0x000E Varint - 0x000F Timeuuid - 0x0010 Inet - 0x0020 List: the value is an [option], representing the type - of the elements of the list. - 0x0021 Map: the value is two [option], representing the types of the - keys and values of the map - 0x0022 Set: the value is an [option], representing the type - of the elements of the set - - <rows_count> is an [int] representing the number of rows present in this - result. Those rows are serialized in the <rows_content> part. - - <rows_content> is composed of <row_1>...<row_m> where m is <rows_count>. - Each <row_i> is composed of <value_1>...<value_n> where n is - <columns_count> and where <value_j> is a [bytes] representing the value - returned for the jth column of the ith row. In other words, <rows_content> - is composed of (<rows_count> * <columns_count>) [bytes]. - - -4.2.5.3. Set_keyspace - - The result to a `use` query. The body (after the kind [int]) is a single - [string] indicating the name of the keyspace that has been set. - - -4.2.5.4. Prepared - - The result to a PREPARE message. The rest of the body of a Prepared result is: - <id><metadata><result_metadata> - where: - - <id> is [short bytes] representing the prepared query ID. - - <metadata> is defined exactly as for a Rows RESULT (See section 4.2.5.2; you - can however assume that the Has_more_pages flag is always off) and - is the specification for the variable bound in this prepare statement. - - <result_metadata> is defined exactly as <metadata> but correspond to the - metadata for the resultSet that execute this query will yield. Note that - <result_metadata> may be empty (have the No_metadata flag and 0 columns, See - section 4.2.5.2) and will be for any query that is not a Select. There is - in fact never a guarantee that this will non-empty so client should protect - themselves accordingly. The presence of this information is an - optimization that allows to later execute the statement that has been - prepared without requesting the metadata (Skip_metadata flag in EXECUTE). - Clients can safely discard this metadata if they do not want to take - advantage of that optimization. - - Note that prepared query ID return is global to the node on which the query - has been prepared. It can be used on any connection to that node and this - until the node is restarted (after which the query must be reprepared). - -4.2.5.5. Schema_change - - The result to a schema altering query (creation/update/drop of a - keyspace/table/index). The body (after the kind [int]) is composed of 3 - [string]: - <change><keyspace><table> - where: - - <change> describe the type of change that has occured. It can be one of - "CREATED", "UPDATED" or "DROPPED". - - <keyspace> is the name of the affected keyspace or the keyspace of the - affected table. - - <table> is the name of the affected table. <table> will be empty (i.e. - the empty string "") if the change was affecting a keyspace and not a - table. - - Note that queries to create and drop an index are considered changes - updating the table the index is on. Queries that create, alter, or drop - user-defined types (availble in Cassandra 2.1+) are considered changes - updating the keyspace the type is defined in. - - -4.2.6. EVENT - - And event pushed by the server. A client will only receive events for the - type it has REGISTER to. The body of an EVENT message will start by a - [string] representing the event type. The rest of the message depends on the - event type. The valid event types are: - - "TOPOLOGY_CHANGE": events related to change in the cluster topology. - Currently, events are sent when new nodes are added to the cluster, and - when nodes are removed. The body of the message (after the event type) - consists of a [string] and an [inet], corresponding respectively to the - type of change ("NEW_NODE", "REMOVED_NODE", or "MOVED_NODE") followed - by the address of the new/removed/moved node. - - "STATUS_CHANGE": events related to change of node status. Currently, - up/down events are sent. The body of the message (after the event type) - consists of a [string] and an [inet], corresponding respectively to the - type of status change ("UP" or "DOWN") followed by the address of the - concerned node. - - "SCHEMA_CHANGE": events related to schema change. The body of the message - (after the event type) consists of 3 [string] corresponding respectively - to the type of schema change ("CREATED", "UPDATED" or "DROPPED"), - followed by the name of the affected keyspace and the name of the - affected table within that keyspace. For changes that affect a keyspace - directly, the table name will be empty (i.e. the empty string ""). - Changes to user-defined types (available in Cassandra 2.1+) will result - in an "UPDATED" change for the keyspace containing the type, and the - table name will be empty. - - All EVENT message have a streamId of -1 (Section 2.3). - - Please note that "NEW_NODE" and "UP" events are sent based on internal Gossip - communication and as such may be sent a short delay before the binary - protocol server on the newly up node is fully started. Clients are thus - advise to wait a short time before trying to connect to the node (1 seconds - should be enough), otherwise they may experience a connection refusal at - first. - - It is possible for the same event to be sent multiple times. Therefore, - a client library should ignore the same event if it has already been notified - of a change. - -4.2.7. AUTH_CHALLENGE - - A server authentication challenge (see AUTH_RESPONSE (Section 4.1.2) for more - details). - - The body of this message is a single [bytes] token. The details of what this - token contains (and when it can be null/empty, if ever) depends on the actual - authenticator used. - - Clients are expected to answer the server challenge by an AUTH_RESPONSE - message. - -4.2.7. AUTH_SUCCESS - - Indicate the success of the authentication phase. See Section 4.2.3 for more - details. - - The body of this message is a single [bytes] token holding final information - from the server that the client may require to finish the authentication - process. What that token contains and whether it can be null depends on the - actual authenticator used. - - -5. Compression - - Frame compression is supported by the protocol, but then only the frame body - is compressed (the frame header should never be compressed). - - Before being used, client and server must agree on a compression algorithm to - use, which is done in the STARTUP message. As a consequence, a STARTUP message - must never be compressed. However, once the STARTUP frame has been received - by the server can be compressed (including the response to the STARTUP - request). Frame do not have to be compressed however, even if compression has - been agreed upon (a server may only compress frame above a certain size at its - discretion). A frame body should be compressed if and only if the compressed - flag (see Section 2.2) is set. - - As of this version 2 of the protocol, the following compressions are available: - - lz4 (https://code.google.com/p/lz4/). In that, note that the 4 first bytes - of the body will be the uncompressed length (followed by the compressed - bytes). - - snappy (https://code.google.com/p/snappy/). This compression might not be - available as it depends on a native lib (server-side) that might not be - avaivable on some installation. - - -6. Data Type Serialization Formats - - This sections describes the serialization formats for all CQL data types - supported by Cassandra through the native protocol. These serialization - formats should be used by client drivers to encode values for EXECUTE - messages. Cassandra will use these formats when returning values in - RESULT messages. - - All values are represented as [bytes] in EXECUTE and RESULT messages. - The [bytes] format includes an int prefix denoting the length of the value. - For that reason, the serialization formats described here will not include - a length component. - - For legacy compatibility reasons, note that most non-string types support - "empty" values (i.e. a value with zero length). An empty value is distinct - from NULL, which is encoded with a negative length. - - As with the rest of the native protocol, all encodings are big-endian. - -6.1. ascii - - A sequence of bytes in the ASCII range [0, 127]. Bytes with values outside of - this range will result in a validation error. - -6.2 bigint - - An eight-byte two's complement integer. - -6.3 blob - - Any sequence of bytes. - -6.4 boolean - - A single byte. A value of 0 denotes "false"; any other value denotes "true". - (However, it is recommended that a value of 1 be used to represent "true".) - -6.5 decimal - - The decimal format represents an arbitrary-precision number. It contains an - [int] "scale" component followed by a varint encoding (see section 6.17) - of the unscaled value. The encoded value represents "<unscaled>E<-scale>". - In other words, "<unscaled> * 10 ^ (-1 * <scale>)". - -6.6 double - - An eight-byte floating point number in the IEEE 754 binary64 format. - -6.7 float - - An four-byte floating point number in the IEEE 754 binary32 format. - -6.8 inet - - A 4 byte or 16 byte sequence denoting an IPv4 or IPv6 address, respectively. - -6.9 int - - A four-byte two's complement integer. - -6.10 list - - A [short] n indicating the number of elements in the list, followed by n - elements. Each element is [short bytes] representing the serialized value. - -6.11 map - - A [short] n indicating the number of key/value pairs in the map, followed by - n entries. Each entry is composed of two [short bytes] representing the key - and value. - -6.12 set - - A [short] n indicating the number of elements in the set, followed by n - elements. Each element is [short bytes] representing the serialized value. - -6.13 text - - A sequence of bytes conforming to the UTF-8 specifications. - -6.14 timestamp - - An eight-byte two's complement integer representing a millisecond-precision - offset from the unix epoch (00:00:00, January 1st, 1970). Negative values - represent a negative offset from the epoch. - -6.15 uuid - - A 16 byte sequence representing any valid UUID as defined by RFC 4122. - -6.16 varchar - - An alias of the "text" type. - -6.17 varint - - A variable-length two's complement encoding of a signed integer. - - The following examples may help implementors of this spec: - - Value | Encoding - ------|--------- - 0 | 0x00 - 1 | 0x01 - 127 | 0x7F - 128 | 0x0080 - 129 | 0x0081 - -1 | 0xFF - -128 | 0x80 - -129 | 0xFF7F - - Note that positive numbers must use a most-significant byte with a value - less than 0x80, because a most-significant bit of 1 indicates a negative - value. Implementors should pad positive values that have a MSB >= 0x80 - with a leading 0x00 byte. - -6.18 timeuuid - - A 16 byte sequence representing a version 1 UUID as defined by RFC 4122. - - -7. Result paging - - The protocol allows for paging the result of queries. For that, the QUERY and - EXECUTE messages have a <result_page_size> value that indicate the desired - page size in CQL3 rows. - - If a positive value is provided for <result_page_size>, the result set of the - RESULT message returned for the query will contain at most the - <result_page_size> first rows of the query result. If that first page of result - contains the full result set for the query, the RESULT message (of kind `Rows`) - will have the Has_more_pages flag *not* set. However, if some results are not - part of the first response, the Has_more_pages flag will be set and the result - will contain a <paging_state> value. In that case, the <paging_state> value - should be used in a QUERY or EXECUTE message (that has the *same* query than - the original one or the behavior is undefined) to retrieve the next page of - results. - - Only CQL3 queries that return a result set (RESULT message with a Rows `kind`) - support paging. For other type of queries, the <result_page_size> value is - ignored. - - Note to client implementors: - - While <result_page_size> can be as low as 1, it will likely be detrimental - to performance to pick a value too low. A value below 100 is probably too - low for most use cases. - - Clients should not rely on the actual size of the result set returned to - decide if there is more result to fetch or not. Instead, they should always - check the Has_more_pages flag (unless they did not enabled paging for the query - obviously). Clients should also not assert that no result will have more than - <result_page_size> results. While the current implementation always respect - the exact value of <result_page_size>, we reserve ourselves the right to return - slightly smaller or bigger pages in the future for performance reasons. - - -8. Error codes - - The supported error codes are described below: - 0x0000 Server error: something unexpected happened. This indicates a - server-side bug. - 0x000A Protocol error: some client message triggered a protocol - violation (for instance a QUERY message is sent before a STARTUP - one has been sent) - 0x0100 Bad credentials: CREDENTIALS request failed because Cassandra - did not accept the provided credentials. - - 0x1000 Unavailable exception. The rest of the ERROR message body will be - <cl><required><alive> - where: - <cl> is the [consistency] level of the query having triggered - the exception. - <required> is an [int] representing the number of node that - should be alive to respect <cl> - <alive> is an [int] representing the number of replica that - were known to be alive when the request has been - processed (since an unavailable exception has been - triggered, there will be <alive> < <required>) - 0x1001 Overloaded: the request cannot be processed because the - coordinator node is overloaded - 0x1002 Is_bootstrapping: the request was a read request but the - coordinator node is bootstrapping - 0x1003 Truncate_error: error during a truncation error. - 0x1100 Write_timeout: Timeout exception during a write request. The rest - of the ERROR message body will be - <cl><received><blockfor><writeType> - where: - <cl> is the [consistency] level of the query having triggered - the exception. - <received> is an [int] representing the number of nodes having - acknowledged the request. - <blockfor> is an [int] representing the number of replica whose - acknowledgement is required to achieve <cl>. - <writeType> is a [string] that describe the type of the write - that timeouted. The value of that string can be one - of: - - "SIMPLE": the write was a non-batched - non-counter write. - - "BATCH": the write was a (logged) batch write. - If this type is received, it means the batch log - has been successfully written (otherwise a - "BATCH_LOG" type would have been send instead). - - "UNLOGGED_BATCH": the write was an unlogged - batch. Not batch log write has been attempted. - - "COUNTER": the write was a counter write - (batched or not). - - "BATCH_LOG": the timeout occured during the - write to the batch log when a (logged) batch - write was requested. - 0x1200 Read_timeout: Timeout exception during a read request. The rest - of the ERROR message body will be - <cl><received><blockfor><data_present> - where: - <cl> is the [consistency] level of the query having triggered - the exception. - <received> is an [int] representing the number of nodes having - answered the request. - <blockfor> is an [int] representing the number of replica whose - response is required to achieve <cl>. Please note that it - is possible to have <received> >= <blockfor> if - <data_present> is false. And also in the (unlikely) - case were <cl> is achieved but the coordinator node - timeout while waiting for read-repair - acknowledgement. - <data_present> is a single byte. If its value is 0, it means - the replica that was asked for data has not - responded. Otherwise, the value is != 0. - - 0x2000 Syntax_error: The submitted query has a syntax error. - 0x2100 Unauthorized: The logged user doesn't have the right to perform - the query. - 0x2200 Invalid: The query is syntactically correct but invalid. - 0x2300 Config_error: The query is invalid because of some configuration issue - 0x2400 Already_exists: The query attempted to create a keyspace or a - table that was already existing. The rest of the ERROR message - body will be <ks><table> where: - <ks> is a [string] representing either the keyspace that - already exists, or the keyspace in which the table that - already exists is. - <table> is a [string] representing the name of the table that - already exists. If the query was attempting to create a - keyspace, <table> will be present but will be the empty - string. - 0x2500 Unprepared: Can be thrown while a prepared statement tries to be - executed if the provide prepared statement ID is not known by - this host. The rest of the ERROR message body will be [short - bytes] representing the unknown ID. - -9. Changes from v1 - * Protocol is versioned to allow old client connects to a newer server, if a - newer client connects to an older server, it needs to check if it gets a - ProtocolException on connection and try connecting with a lower version. - * A query can now have bind variables even though the statement is not - prepared; see Section 4.1.4. - * A new BATCH message allows to batch a set of queries (prepared or not); see - Section 4.1.7. - * Authentication now uses SASL. Concretely, the CREDENTIALS message has been - removed and replaced by a server/client challenges/responses exchanges (done - through the new AUTH_RESPONSE/AUTH_CHALLENGE messages). See Section 4.2.3 for - details. - * Query paging has been added (Section 7): QUERY and EXECUTE message have an - additional <result_page_size> [int] and <paging_state> [bytes], and - the Rows kind of RESULT message has an additional flag and <paging_state> - value. Note that paging is optional, and a client that do not want to handle - can simply avoid including the Page_size flag and parameter in QUERY and - EXECUTE. - * QUERY and EXECUTE statements can request for the metadata to be skipped in - the result set returned (for efficiency reasons) if said metadata are known - in advance. Furthermore, the result to a PREPARE (section 4.2.5.4) now - includes the metadata for the result of executing the statement just - prepared (though those metadata will be empty for non SELECT statements). http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/src/java/org/apache/cassandra/cql3/QueryOptions.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/QueryOptions.java b/src/java/org/apache/cassandra/cql3/QueryOptions.java index fb46b9b..672f8ea 100644 --- a/src/java/org/apache/cassandra/cql3/QueryOptions.java +++ b/src/java/org/apache/cassandra/cql3/QueryOptions.java @@ -47,14 +47,9 @@ public abstract class QueryOptions public static final CBCodec<QueryOptions> codec = new Codec(); - public static QueryOptions fromProtocolV1(ConsistencyLevel consistency, List<ByteBuffer> values) + public static QueryOptions fromThrift(ConsistencyLevel consistency, List<ByteBuffer> values) { - return new DefaultQueryOptions(consistency, values, false, SpecificOptions.DEFAULT, Server.VERSION_1); - } - - public static QueryOptions fromProtocolV2(ConsistencyLevel consistency, List<ByteBuffer> values) - { - return new DefaultQueryOptions(consistency, values, false, SpecificOptions.DEFAULT, Server.VERSION_2); + return new DefaultQueryOptions(consistency, values, false, SpecificOptions.DEFAULT, Server.VERSION_3); } public static QueryOptions forInternalCalls(ConsistencyLevel consistency, List<ByteBuffer> values) @@ -67,11 +62,6 @@ public abstract class QueryOptions return new DefaultQueryOptions(ConsistencyLevel.ONE, values, false, SpecificOptions.DEFAULT, Server.VERSION_3); } - public static QueryOptions fromPreV3Batch(ConsistencyLevel consistency) - { - return new DefaultQueryOptions(consistency, Collections.<ByteBuffer>emptyList(), false, SpecificOptions.DEFAULT, Server.VERSION_2); - } - public static QueryOptions forProtocolVersion(int protocolVersion) { return new DefaultQueryOptions(null, null, true, null, protocolVersion); @@ -301,8 +291,6 @@ public abstract class QueryOptions public QueryOptions decode(ByteBuf body, int version) { - assert version >= 2; - ConsistencyLevel consistency = CBUtil.readConsistencyLevel(body); EnumSet<Flag> flags = Flag.deserialize((int)body.readByte()); @@ -349,8 +337,6 @@ public abstract class QueryOptions public void encode(QueryOptions options, ByteBuf dest, int version) { - assert version >= 2; - CBUtil.writeConsistencyLevel(options.getConsistency(), dest); EnumSet<Flag> flags = gatherFlags(options); http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/src/java/org/apache/cassandra/cql3/Tuples.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Tuples.java b/src/java/org/apache/cassandra/cql3/Tuples.java index 89fecd0..933088f 100644 --- a/src/java/org/apache/cassandra/cql3/Tuples.java +++ b/src/java/org/apache/cassandra/cql3/Tuples.java @@ -199,8 +199,6 @@ public class Tuples private ByteBuffer[] bindInternal(QueryOptions options) throws InvalidRequestException { - int version = options.getProtocolVersion(); - ByteBuffer[] buffers = new ByteBuffer[elements.size()]; for (int i = 0; i < elements.size(); i++) { @@ -208,10 +206,6 @@ public class Tuples // Since A tuple value is always written in its entirety Cassandra can't preserve a pre-existing value by 'not setting' the new value. Reject the query. if (buffers[i] == ByteBufferUtil.UNSET_BYTE_BUFFER) throw new InvalidRequestException(String.format("Invalid unset value for tuple field number %d", i)); - // Inside tuples, we must force the serialization of collections to v3 whatever protocol - // version is in use since we're going to store directly that serialized value. - if (version < 3 && type.type(i).isCollection()) - buffers[i] = ((CollectionType)type.type(i)).getSerializer().reserializeToV3(buffers[i]); } return buffers; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/src/java/org/apache/cassandra/cql3/UserTypes.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/UserTypes.java b/src/java/org/apache/cassandra/cql3/UserTypes.java index de3f545..22c7987 100644 --- a/src/java/org/apache/cassandra/cql3/UserTypes.java +++ b/src/java/org/apache/cassandra/cql3/UserTypes.java @@ -21,11 +21,9 @@ import java.nio.ByteBuffer; import java.util.*; import org.apache.cassandra.cql3.functions.Function; -import org.apache.cassandra.db.marshal.CollectionType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.marshal.UserType; import org.apache.cassandra.exceptions.InvalidRequestException; -import org.apache.cassandra.transport.Server; import org.apache.cassandra.utils.ByteBufferUtil; /** @@ -171,8 +169,6 @@ public abstract class UserTypes private ByteBuffer[] bindInternal(QueryOptions options) throws InvalidRequestException { - int version = options.getProtocolVersion(); - ByteBuffer[] buffers = new ByteBuffer[values.size()]; for (int i = 0; i < type.size(); i++) { @@ -180,10 +176,6 @@ public abstract class UserTypes // Since A UDT value is always written in its entirety Cassandra can't preserve a pre-existing value by 'not setting' the new value. Reject the query. if (buffers[i] == ByteBufferUtil.UNSET_BYTE_BUFFER) throw new InvalidRequestException(String.format("Invalid unset value for field '%s' of user defined type %s", type.fieldNameAsString(i), type.getNameAsString())); - // Inside UDT values, we must force the serialization of collections to v3 whatever protocol - // version is in use since we're going to store directly that serialized value. - if (version < Server.VERSION_3 && type.fieldType(i).isCollection() && buffers[i] != null) - buffers[i] = ((CollectionType)type.fieldType(i)).getSerializer().reserializeToV3(buffers[i]); } return buffers; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/8439e74e/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java index 9ddf7b8..3855b6a 100644 --- a/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/ModificationStatement.java @@ -393,9 +393,6 @@ public abstract class ModificationStatement implements CQLStatement if (options.getConsistency() == null) throw new InvalidRequestException("Invalid empty consistency level"); - if (hasConditions() && options.getProtocolVersion() == 1) - throw new InvalidRequestException("Conditional updates are not supported by the protocol version in use. You need to upgrade to a driver using the native protocol v2."); - return hasConditions() ? executeWithCondition(queryState, options) : executeWithoutCondition(queryState, options);
