Repository: cassandra Updated Branches: refs/heads/trunk 582f9dc08 -> e42352763
Add support for CQL in pygments (proper syntax highlighting) Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e4235276 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e4235276 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e4235276 Branch: refs/heads/trunk Commit: e42352763089a41d4563d67fe800fd2878da842f Parents: 582f9dc Author: Sylvain Lebresne <[email protected]> Authored: Wed Jun 29 09:23:38 2016 +0200 Committer: Sylvain Lebresne <[email protected]> Committed: Wed Jun 29 12:36:54 2016 +0200 ---------------------------------------------------------------------- doc/source/_util/cql.py | 267 ++++++++++++++++++++++++++++++++++++ doc/source/conf.py | 6 +- doc/source/cql/appendices.rst | 2 +- doc/source/cql/changes.rst | 2 +- doc/source/cql/ddl.rst | 2 +- doc/source/cql/definitions.rst | 4 +- doc/source/cql/dml.rst | 4 +- doc/source/cql/functions.rst | 25 ++-- doc/source/cql/indexes.rst | 2 +- doc/source/cql/json.rst | 2 +- doc/source/cql/mvs.rst | 4 +- doc/source/cql/security.rst | 7 +- doc/source/cql/triggers.rst | 2 +- doc/source/cql/types.rst | 2 +- 14 files changed, 307 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/_util/cql.py ---------------------------------------------------------------------- diff --git a/doc/source/_util/cql.py b/doc/source/_util/cql.py new file mode 100644 index 0000000..b1c8cde --- /dev/null +++ b/doc/source/_util/cql.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +""" + CQL pygments lexer + ~~~~~~~~~~~~~~~~~~ + + Lexer for the Cassandra Query Language (CQL). + + This is heavily inspired from the pygments SQL lexer (and the Postgres one in particular) but adapted to CQL + keywords and specificities. + + TODO: This has been hacked quickly, but once it's more tested, we could submit it upstream. + In particular, we have alot of keywords whose meaning depends on the context and we could potentially improve + their handling. For instance, SET is a keyword, but also a type name (that's why currently we also consider + map and list as keywords, not types; we could disambiguate by looking if there is a '<' afterwards). Or things + like USERS, which can is used in some documentation example as a table name but is a keyword too (we could + only consider it a keyword if after LIST for instance). Similarly, type nanes are not reserved, so they and + are sometime used as column identifiers (also, timestamp is both a type and a keyword). I "think" we can + somewhat disambiguate through "states", but unclear how far it's worth going. + + We could also add the predefined functions? +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words +from pygments.token import Punctuation, Whitespace, Error, \ + Text, Comment, Operator, Keyword, Name, String, Number, Generic, Literal +from pygments.lexers import get_lexer_by_name, ClassNotFound +from pygments.util import iteritems + +__all__ = [ 'CQLLexer' ] + +language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) + +KEYWORDS = ( + 'SELECT', + 'FROM', + 'AS', + 'WHERE', + 'AND', + 'KEY', + 'KEYS', + 'ENTRIES', + 'FULL', + 'INSERT', + 'UPDATE', + 'WITH', + 'LIMIT', + 'PER', + 'PARTITION', + 'USING', + 'USE', + 'DISTINCT', + 'COUNT', + 'SET', + 'BEGIN', + 'UNLOGGED', + 'BATCH', + 'APPLY', + 'TRUNCATE', + 'DELETE', + 'IN', + 'CREATE', + 'KEYSPACE', + 'SCHEMA', + 'KEYSPACES', + 'COLUMNFAMILY', + 'TABLE', + 'MATERIALIZED', + 'VIEW', + 'INDEX', + 'CUSTOM', + 'ON', + 'TO', + 'DROP', + 'PRIMARY', + 'INTO', + 'VALUES', + 'TIMESTAMP', + 'TTL', + 'CAST', + 'ALTER', + 'RENAME', + 'ADD', + 'TYPE', + 'COMPACT', + 'STORAGE', + 'ORDER', + 'BY', + 'ASC', + 'DESC', + 'ALLOW', + 'FILTERING', + 'IF', + 'IS', + 'CONTAINS', + 'GRANT', + 'ALL', + 'PERMISSION', + 'PERMISSIONS', + 'OF', + 'REVOKE', + 'MODIFY', + 'AUTHORIZE', + 'DESCRIBE', + 'EXECUTE', + 'NORECURSIVE', + 'MBEAN', + 'MBEANS', + 'USER', + 'USERS', + 'ROLE', + 'ROLES', + 'SUPERUSER', + 'NOSUPERUSER', + 'PASSWORD', + 'LOGIN', + 'NOLOGIN', + 'OPTIONS', + 'CLUSTERING', + 'TOKEN', + 'WRITETIME', + 'NULL', + 'NOT', + 'EXISTS', + 'MAP', + 'LIST', + 'NAN', + 'INFINITY', + 'TUPLE', + 'TRIGGER', + 'STATIC', + 'FROZEN', + 'FUNCTION', + 'FUNCTIONS', + 'AGGREGATE', + 'SFUNC', + 'STYPE', + 'FINALFUNC', + 'INITCOND', + 'RETURNS', + 'CALLED', + 'INPUT', + 'LANGUAGE', + 'OR', + 'REPLACE', + 'JSON', + 'LIKE', +) + +DATATYPES = ( + 'ASCII', + 'BIGINT', + 'BLOB', + 'BOOLEAN', + 'COUNTER', + 'DATE', + 'DECIMAL', + 'DOUBLE', + 'EMPTY', + 'FLOAT', + 'INET', + 'INT', + 'SMALLINT', + 'TEXT', + 'TIME', + 'TIMESTAMP', + 'TIMEUUID', + 'TINYINT', + 'UUID', + 'VARCHAR', + 'VARINT', +) + +def language_callback(lexer, match): + """Parse the content of a $-string using a lexer + + The lexer is chosen looking for a nearby LANGUAGE or assumed as + java if no LANGUAGE has been found. + """ + l = None + m = language_re.match(lexer.text[max(0, match.start()-100):match.start()]) + if m is not None: + l = lexer._get_lexer(m.group(1)) + else: + l = lexer._get_lexer('java') + + # 1 = $, 2 = delimiter, 3 = $ + yield (match.start(1), String, match.group(1)) + yield (match.start(2), String.Delimiter, match.group(2)) + yield (match.start(3), String, match.group(3)) + # 4 = string contents + if l: + for x in l.get_tokens_unprocessed(match.group(4)): + yield x + else: + yield (match.start(4), String, match.group(4)) + # 5 = $, 6 = delimiter, 7 = $ + yield (match.start(5), String, match.group(5)) + yield (match.start(6), String.Delimiter, match.group(6)) + yield (match.start(7), String, match.group(7)) + + +class CQLLexer(RegexLexer): + """ + Lexer for the Cassandra Query Language. + """ + + name = 'Cassandra Query Language' + aliases = ['cql'] + filenames = ['*.cql'] + mimetypes = ['text/x-cql'] + + flags = re.IGNORECASE + tokens = { + 'root': [ + (r'\s+', Text), + (r'--.*\n?', Comment.Single), + (r'//.*\n?', Comment.Single), + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r'(' + '|'.join(s.replace(" ", "\s+") + for s in DATATYPES) + + r')\b', Name.Builtin), + (words(KEYWORDS, suffix=r'\b'), Keyword), + (r'[+*/<>=~!@#%^&|`?-]+', Operator), + (r'\$\d+', Name.Variable), + + # Using Number instead of the more accurate Literal because the latter don't seem to e highlighted in most + # styles + (r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', Number), # UUIDs + (r'0x[0-9a-fA-F]+', Number), # Blobs + + (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), + (r'[0-9]+', Number.Integer), + (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), + # quoted identifier + (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), + (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), + (r'[a-z_]\w*', Name), + (r'[;:()\[\]{},.]', Punctuation), + ], + 'multiline-comments': [ + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[^/*]+', Comment.Multiline), + (r'[/*]', Comment.Multiline) + ], + 'string': [ + (r"[^']+", String.Single), + (r"''", String.Single), + (r"'", String.Single, '#pop'), + ], + 'quoted-ident': [ + (r'[^"]+', String.Name), + (r'""', String.Name), + (r'"', String.Name, '#pop'), + ], + } + + def get_tokens_unprocessed(self, text, *args): + # Have a copy of the entire text to be used by `language_callback`. + self.text = text + for x in RegexLexer.get_tokens_unprocessed(self, text, *args): + yield x + + def _get_lexer(self, lang): + return get_lexer_by_name(lang, **self.options) http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/conf.py ---------------------------------------------------------------------- diff --git a/doc/source/conf.py b/doc/source/conf.py index 2b36353..63697aa 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -21,7 +21,7 @@ # # This file is execfile()d with the current directory set to its containing # dir. -import re +import re, sys, os # Finds out the version (so we don't have to manually edit that file every # time we change the version) @@ -32,6 +32,10 @@ with open(cassandra_build_file) as f: raise RuntimeException("Problem finding version in build.xml file, this shouldn't happen.") cassandra_version = m.group(1) +def setup(sphinx): + sys.path.insert(0, os.path.abspath('./source/_util')) + from cql import CQLLexer + sphinx.add_lexer("cql", CQLLexer()) # -- General configuration ------------------------------------------------ http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/appendices.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/appendices.rst b/doc/source/cql/appendices.rst index c4bb839..8c63a32 100644 --- a/doc/source/cql/appendices.rst +++ b/doc/source/cql/appendices.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql Appendices ---------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/changes.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/changes.rst b/doc/source/cql/changes.rst index 263df13..d9aea85 100644 --- a/doc/source/cql/changes.rst +++ b/doc/source/cql/changes.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql Changes ------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/ddl.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/ddl.rst b/doc/source/cql/ddl.rst index 7f3431a..029c1cb 100644 --- a/doc/source/cql/ddl.rst +++ b/doc/source/cql/ddl.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _data-definition: http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/definitions.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/definitions.rst b/doc/source/cql/definitions.rst index 6c3b522..e54bcd7 100644 --- a/doc/source/cql/definitions.rst +++ b/doc/source/cql/definitions.rst @@ -16,6 +16,8 @@ .. _UUID: https://en.wikipedia.org/wiki/Universally_unique_identifier +.. highlight:: cql + Definitions ----------- @@ -145,7 +147,7 @@ Multi-line comments are also supported through enclosure within ``/*`` and ``*/` :: - â This is a comment + -- This is a comment // This is a comment too /* This is a multi-line comment */ http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/dml.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/dml.rst b/doc/source/cql/dml.rst index 989c0ca..b5f9e9f 100644 --- a/doc/source/cql/dml.rst +++ b/doc/source/cql/dml.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _data-manipulation: @@ -202,7 +202,7 @@ The tuple notation may also be used for ``IN`` clauses on clustering columns:: SELECT * FROM posts WHERE userid = 'john doe' - AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01), ('Extreme Chess', '2014-06-01')) + AND (blog_title, posted_at) IN (('John''s Blog', '2012-01-01'), ('Extreme Chess', '2014-06-01')) The ``CONTAINS`` operator may only be used on collection columns (lists, sets, and maps). In the case of maps, ``CONTAINS`` applies to the map values. The ``CONTAINS KEY`` operator may only be used on map columns and applies to the http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/functions.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/functions.rst b/doc/source/cql/functions.rst index efcdf32..47026cd 100644 --- a/doc/source/cql/functions.rst +++ b/doc/source/cql/functions.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _cql-functions: @@ -39,6 +39,11 @@ functions. evil, but no sandbox is perfect so using user-defined functions is opt-in). See the ``enable_user_defined_functions`` in ``cassandra.yaml`` to enable them. +A function is identifier by its name: + +.. productionlist:: + function_name: [ `keyspace_name` '.' ] `name` + .. _scalar-functions: Scalar functions @@ -236,15 +241,15 @@ User-defined functions can be used in ``SELECT``, ``INSERT`` and ``UPDATE`` stat The implicitly available ``udfContext`` field (or binding for script UDFs) provides the necessary functionality to create new UDT and tuple values:: - CREATE TYPE custom\_type (txt text, i int); + CREATE TYPE custom_type (txt text, i int); CREATE FUNCTION fct\_using\_udt ( somearg int ) RETURNS NULL ON NULL INPUT - RETURNS custom\_type + RETURNS custom_type LANGUAGE java AS $$ UDTValue udt = udfContext.newReturnUDTValue(); - udt.setString(âtxtâ, âsome stringâ); - udt.setInt(âiâ, 42); + udt.setString("txt", "some string"); + udt.setInt("i", 42); return udt; $$; @@ -447,25 +452,25 @@ statement):: CALLED ON NULL INPUT RETURNS tuple LANGUAGE java - AS ' + AS $$ if (val != null) { state.setInt(0, state.getInt(0)+1); state.setLong(1, state.getLong(1)+val.intValue()); } return state; - '; + $$; CREATE OR REPLACE FUNCTION averageFinal (state tuple<int,bigint>) CALLED ON NULL INPUT RETURNS double LANGUAGE java - AS ' + AS $$ double r = 0; if (state.getInt(0) == 0) return null; r = state.getLong(1); r /= state.getInt(0); - return Double.valueOf®; - '; + return Double.valueOf(r); + $$; CREATE OR REPLACE AGGREGATE average(int) SFUNC averageState http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/indexes.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/indexes.rst b/doc/source/cql/indexes.rst index fbe5827..81fe429 100644 --- a/doc/source/cql/indexes.rst +++ b/doc/source/cql/indexes.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _secondary-indexes: http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/json.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/json.rst b/doc/source/cql/json.rst index 6482fd6..f83f16c 100644 --- a/doc/source/cql/json.rst +++ b/doc/source/cql/json.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _cql-json: http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/mvs.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/mvs.rst b/doc/source/cql/mvs.rst index 84c18e0..aabea10 100644 --- a/doc/source/cql/mvs.rst +++ b/doc/source/cql/mvs.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _materialized-views: @@ -46,7 +46,7 @@ For instance:: SELECT * FROM monkeySpecies WHERE population IS NOT NULL AND species IS NOT NULL PRIMARY KEY (population, species) - WITH comment=âAllow query by population instead of speciesâ; + WITH comment='Allow query by population instead of species'; The ``CREATE MATERIALIZED VIEW`` statement creates a new materialized view. Each such view is a set of *rows* which corresponds to rows which are present in the underlying, or base, table specified in the ``SELECT`` statement. A http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/security.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/security.rst b/doc/source/cql/security.rst index aa65383..9efe27f 100644 --- a/doc/source/cql/security.rst +++ b/doc/source/cql/security.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _cql-security: @@ -26,6 +26,11 @@ Security Database Roles ^^^^^^^^^^^^^^ +CQL uses database roles to represent users and group of users. Syntactically, a role is defined by: + +.. productionlist:: + role_name: `identifier` | `string` + .. _create-role-statement: CREATE ROLE http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/triggers.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/triggers.rst b/doc/source/cql/triggers.rst index 3bba72d..db3f53e 100644 --- a/doc/source/cql/triggers.rst +++ b/doc/source/cql/triggers.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _cql-triggers: http://git-wip-us.apache.org/repos/asf/cassandra/blob/e4235276/doc/source/cql/types.rst ---------------------------------------------------------------------- diff --git a/doc/source/cql/types.rst b/doc/source/cql/types.rst index 80cf864..e452f35 100644 --- a/doc/source/cql/types.rst +++ b/doc/source/cql/types.rst @@ -14,7 +14,7 @@ .. See the License for the specific language governing permissions and .. limitations under the License. -.. highlight:: sql +.. highlight:: cql .. _UUID: https://en.wikipedia.org/wiki/Universally_unique_identifier
