To help clarify my understanding of Thrift's IDL and Protocol Structure, I transliterated the following two documents into ABNF (RFC5234) documents.
http://wiki.apache.org/thrift/ThriftIDL http://svn.apache.org/viewvc/incubator/thrift/trunk/doc/thrift.bnf?view=markup Due to discrepancies between the above documents and a few of the thrift implementations that I sampled, the correct specification is unclear to me. I'd appreciate any corrections and/or feedback on the attached ABNF documents. regards, Joe N. -- nor...@alum.mit.edu
;; Document ;; ;; [1] Document ::= Header* Definition* Document = *Header *Definition ;; Header ;; ;; [2] Header ::= Include | CppInclude | Namespace Header = Include/ CppInclude/ Namespace ;;; Thrift Include ;;; ;; [3] Include ::= 'include' Literal Include = "include" Literal ;;; C++ Include ;;; ;; [4] CppInclude ::= 'cpp_include' Literal CppInclude = "cpp_include" Literal ;;; Namespace ;;; ;; [5] Namespace ::= ( 'namespace' ( NamespaceScope Identifier ) |\ ;; ( 'smalltalk.category' STIdentifier ) |\ ;; ( 'smalltalk.prefix' Identifier ) ) |\ ;; ( 'php_namespace' Literal ) |\ ;; ( 'xsd_namespace' Literal ) Namespace = ( "namespace" NamespaceValue )/ ( "php_namespace" Literal )/ ( "xsd_namespace" Literal ) NamespaceValue = ( NamespaceScope Identifier )/ ( "smalltalk.category" STIdentifier )/ ( "smalltalk.prefix" Identifier ) ;; [6] NamespaceScope ::= '*' | 'cpp' | 'java' | 'py' | 'perl' | 'rb' | 'cocoa' | 'csharp' NamespaceScope = "*"/ "cpp"/ "java"/ "py"/ "perl"/ "rb"/ "cocoa"/ "csharp" ;; Definition ;; ;; [7] Definition ::= Const | Typedef | Enum | Senum | Struct | Exception | Service Definition = Const/ Typedef/ Enum/ Senum/ Struct/ Exception/ Service ;;; Const ;;; ;; [8] Const ::= 'const' FieldType Identifier '=' ConstValue [ListSeparator] Const = "const" FieldType Identifier "=" ConstValue [ListSeparator] ;;; Typedef ;;; ;; [9] Typedef ::= 'typedef' DefinitionType Identifier Typedef = "typedef" DefinitionType Identifier ;;; Enum ;;; ;; [10] Enum ::= 'enum' Identifier '{' (Identifier ('=' IntConstant)? ListSeparator?)* '}' Enum = "enum" Identifier "{" *(Identifier [("=" IntConstant)] [ListSeparator]) "}" ;;; Senum ;;; ;; [11] Senum ::= 'senum' Identifier '{' (Literal ListSeparator?)* '}' Senum = "senum" Identifier "{" *(Literal [ListSeparator]) "}" ;;; Struct ;;; ;; [12] Struct ::= 'struct' Identifier 'xsd_all'? '{' Field* '}' Struct = "struct" Identifier ["xsd_all"] "{" *Field "}" ;;; Exception ;;; ;; [13] Exception ::= 'exception' Identifier '{' Field* '}' Exception = "exception" Identifier "{" *Field "}" ;;; Service ;;; ;; [14] Service ::= 'service' Identifier ( 'extends' Identifier )? '{' Function* '}' Service = "service" Identifier [( "extends" Identifier )] "{" *Function "}" ;; Field ;; ;; [15] Field ::= FieldID? FieldReq? FieldType Identifier ('= ConstValue)? XsdFieldOptions ListSeparator? Field = [FieldID] [FieldReq] FieldType Identifier [("=" ConstValue)] XsdFieldOptions [ListSeparator] ;;; Field ID ;;; ;; [16] FieldID ::= IntConstant ':' FieldID = IntConstant ":" ;;; Field Requiredness ;;; ;; [17] FieldReq ::= 'required' | 'optional' FieldReq = "required"/ "optional" ;;; XSD Options ;;; ;; [18] XsdFieldOptions ::= 'xsd_optional'? 'xsd_nillable'? XsdAttrs? XsdFieldOptions= ["xsd_optional"] ["xsd_nillable"] [XsdAttrs] ;; [19] XsdAttrs ::= 'xsd_attrs' '{' Field* '}' XsdAttrs = "xsd_attrs" "{" *Field "}" ;; Functions ;; ;; [20] Function ::= 'oneway'? FunctionType Identifier '(' Field* ')' Throws? ListSeparator? Function = ["oneway"] FunctionType Identifier "(" *Field ")" [Throws] [ListSeparator] ;; [21] FunctionType ::= FieldType | 'void' FunctionType = FieldType/ "void" ;; [22] Throws ::= 'throws' '(' Field* ')' Throws = "throws" "(" *Field ")" ;; Types ;; ;; [23] FieldType ::= Identifier | BaseType | ContainerType FieldType = Identifier/ BaseType/ ContainerType ;; [24] DefinitionType ::= BaseType | ContainerType DefinitionType = BaseType/ ContainerType ;; [25] BaseType ::= 'bool' | 'byte' | 'i16' | 'i32' | 'i64' | 'double' |\ ;; 'string' | 'binary' | 'slist' BaseType = "bool"/ "byte"/ "i16"/ "i32"/ "i64"/ "double"/ "string"/ "binary"/ "slist" ;; [26] ContainerType ::= MapType | SetType | ListType ContainerType = MapType/ SetType/ ListType ;; [27] MapType ::= 'map' CppType? '<' FieldType ',' FieldType '>' MapType = "map" [CppType] "<" FieldType "," FieldType ">" ;; [28] SetType ::= 'set' CppType? '<' FieldType '>' SetType = "set" [CppType] "<" FieldType ">" ;; [29] ListType ::= 'list' '<' FieldType '>' CppType? ListType = "list" "<" FieldType ">" [CppType] ;; [30] CppType ::= 'cpp_type' Literal CppType = "cpp_type" Literal ;; Constant Values ;; ;; [31] ConstValue ::= IntConstant | DoubleConstant | Literal | Identifier | ConstList | ConstMap ConstValue = IntConstant/ DoubleConstant/ Literal/ Identifier/ ConstList/ ConstMap ;; [32] IntConstant ::= ('+' | '-')? Digit+ IntConstant = [("+"/ "-")] 1*Digit ;; [33] DoubleConstant ::= ('+' | '-')? Digit* ('.' Digit+)? ( ('E' | 'e') IntConstant )? DoubleConstant = [("+"/ "-")] *Digit [("." 1*Digit)] [( "E" IntConstant )] ;; [34] ConstList ::= '[' (ConstValue ListSeparator?)* ']' ConstList = "[" *(ConstValue [ListSeparator]) "]" ;; [35] ConstMap ::= '{' (ConstValue ':' ConstValue ListSeparator?)* '}' ConstMap = "{" *(ConstValue ":" ConstValue [ListSeparator]) "}" ;; Basic Definitions ;; ;;; Literal ;;; ;; [36] Literal ::= ('"' [^"]* '"') | ("'" [^']* "'") Literal = (%x22 *(%x20-21 / %x23-7E) %x22)/ (%x27 *(%x20-26 / %x28-7E) %x27) ;;; Identifier ;;; ;; [37] Identifier ::= ( Letter | '_' ) ( Letter | Digit | '.' | '_' )* Identifier = ( Letter/ "_" ) *( Letter/ Digit/ "."/ "_" ) ;; [38] STIdentifier ::= ( Letter | '_' ) ( Letter | Digit | '.' | '_' | '-' )* STIdentifier = ( Letter/ "_" ) *( Letter/ Digit/ "."/ "_"/ "-" ) ;;; List Separator ;;; ;; [39] ListSeparator ::= ',' | ';' ListSeparator = ","/ ";" ;;; Letters and Digits ;;; ;; [40] Letter ::= ['A'-'Z'] | ['a'-'z'] Letter = %x41-5A/ %x61-7A ;; [41] Digit ::= ['0'-'9'] Digit = %x30-39
BOOL = %x00/ %x01 BYTE = OCTET I08 = OCTET I16 = 2*OCTET I32 = 4*OCTET U64 = 8*OCTET I64 = 8*OCTET DOUBLE = 8*OCTET STRING = I32 UTF8-octets BINARY = I32 *OCTET T-CALL = %x01 T-REPLY = %x02 T-EXCEPTION = %x03 T-ONEWAY = %x04 T-STOP = %x00 T-VOID = %x01 T-BOOL = %x02 T-BYTE = %x03 T-I08 = %x05 T-I16 = %x06 T-I32 = %x08 T-U64 = %x09 T-I64 = %x0a T-DOUBLE = %x04 T-STRING = %x0b T-BINARY = %x0b T-STRUCT = %x0c T-MAP = %x0d T-SET = %x0e T-LIST = %x0f
;; <message> ::= <message-begin> <struct> <message-end> message = message-begin struct message-end ;; <message-begin> ::= <method-name> <message-type> <message-seqid> message-begin = method-name message-type message-seqid message-end = "" ;; <method-name> ::= STRING method-name = STRING ;; <message-type> ::= T_CALL | T_REPLY | T_EXCEPTION message-type = T-CALL/ T-REPLY/ T-EXCEPTION ;; <message-seqid> ::= I32 message-seqid = I32 ;; <struct> ::= <struct-begin> <field>* <field-stop> <struct-end> struct = struct-begin *field field-stop struct-end ;; <struct-begin> ::= <struct-name> struct-begin = struct-name struct-end = "" ;; <struct-name> ::= STRING struct-name = STRING ;; <field-stop> ::= T_STOP field-stop = T-STOP ;; <field> ::= <field-begin> <field-data> <field-end> field = field-begin field-data field-end ;; <field-begin> ::= <field-name> <field-type> <field-id> field-begin = field-name field-type field-id field-end = "" ;; <field-name> ::= STRING field-name = STRING ;; <field-type> ::= T_BOOL | T_BYTE | T_I8 | T_I16 | T_I32 | T_I64 | T_DOUBLE ;; | T_STRING | T_BINARY | T_STRUCT | T_MAP | T_SET | T_LIST field-type = T-STOP/ T-VOID/ T-BOOL/ T-BYTE/ T-I08/ T-I16/ T-I32/ T-U64/ T-I64/ T-DOUBLE/ T-STRING/ T-BINARY/ T-STRUCT/ T-MAP/ T-SET/ T-LIST ;; <field-id> ::= I16 field-id = I16 ;; <field-data> ::= I8 | I16 | I32 | I64 | DOUBLE | STRING | BINARY ;; <struct> | <map> | <list> | <set> field-data = BOOL/ I08/ I16/ I32/ U64/ I64/ DOUBLE/ STRING/ BINARY/ struct/ map/ list/ set ;; TBD - is this correct? field-datum = field-data field-data ;; <map> ::= <map-begin> <field-datum>* <map-end> map = map-begin *field-datum map-end ;; <map-begin> ::= <map-key-type> <map-value-type> <map-size> map-begin = map-key-type map-value-type map-size map-end = "" ;; <map-key-type> ::= <field-type> map-key-type = field-type ;;<map-value-type> ::= <field-type> map-value-type = field-type ;; <map-size> ::= I32 map-size = I32 ;; <list> ::= <list-begin> <field-data>* <list-end> list = list-begin *field-data list-end ;; <list-begin> ::= <list-elem-type> <list-size> list-begin = list-elem-type list-size list-end = "" ;; <list-elem-type> ::= <field-type> list-elem-type = field-type ;; <list-size> ::= I32 list-size = I32 ;; <set> ::= <set-begin> <field-data>* <set-end> set = set-begin *field-data set-end ;; <set-begin> ::= <set-elem-type> <set-size> set-begin = set-elem-type set-size set-end = "" ;; <set-elem-type> ::= <field-type> set-elem-type = field-type ;; <set-size> ::= I32 set-size = I32
ALPHA = %x41-5A/ %x61-7A BIT = "0"/ "1" CHAR = %x01-7F CR = %x0D CRLF = LF/ (CR LF) CTL = %x00-1F/ %x7F DIGIT = %x30-39 DQUOTE = %x22 HEXDIG = DIGIT/ "A"/ "B"/ "C"/ "D"/ "E"/ "F" HTAB = %x09 LF = %x0A LWSP = *(WSP/ CRLF WSP) OCTET = %x00-FF SP = %x20 VCHAR = %x21-7E WSP = SP/ HTAB
UTF8-octets = *( UTF8-char ) UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 UTF8-1 = %x00-7F UTF8-2 = %xC2-DF UTF8-tail UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / %xF4 %x80-8F 2( UTF8-tail ) UTF8-tail = %x80-BF