To help clarify my understanding of Thrift's IDL and Protocol Structure, I
transliterated the following two documents into ABNF (RFC5234) documents.

http://wiki.apache.org/thrift/ThriftIDL
http://svn.apache.org/viewvc/incubator/thrift/trunk/doc/thrift.bnf?view=markup

Due to discrepancies between the above documents and a few of the thrift
implementations that I sampled, the correct specification is unclear to
me.  I'd appreciate any corrections and/or feedback on the attached ABNF
documents.

regards,

Joe N.


--
nor...@alum.mit.edu
;; Document ;;
;; [1] Document ::= Header* Definition*
Document       =  *Header *Definition

;; Header ;;
;; [2] Header ::= Include | CppInclude | Namespace
Header         = Include/ CppInclude/ Namespace

;;; Thrift Include ;;;
;; [3] Include ::= 'include' Literal
Include        =  "include" Literal

;;; C++ Include ;;;
;; [4] CppInclude ::= 'cpp_include' Literal
CppInclude     =  "cpp_include" Literal

;;; Namespace ;;;
;; [5] Namespace ::= ( 'namespace' ( NamespaceScope Identifier ) |\
;;                                  ( 'smalltalk.category' STIdentifier ) |\
;;                                  ( 'smalltalk.prefix' Identifier ) ) |\
;;                    ( 'php_namespace' Literal ) |\
;;                    ( 'xsd_namespace' Literal )
Namespace      =  ( "namespace" NamespaceValue )/
                  ( "php_namespace" Literal )/
                  ( "xsd_namespace" Literal )

NamespaceValue =  ( NamespaceScope Identifier )/
                  ( "smalltalk.category" STIdentifier )/
                  ( "smalltalk.prefix" Identifier )

;; [6] NamespaceScope ::= '*' | 'cpp' | 'java' | 'py' | 'perl' | 'rb' | 'cocoa' 
| 'csharp'
NamespaceScope =  "*"/ "cpp"/ "java"/ "py"/ "perl"/ "rb"/ "cocoa"/ "csharp"

;; Definition ;;
;; [7] Definition ::= Const | Typedef | Enum | Senum | Struct | Exception | 
Service
Definition     =  Const/ Typedef/ Enum/ Senum/ Struct/ Exception/ Service

;;; Const ;;;
;; [8] Const ::= 'const' FieldType Identifier '=' ConstValue [ListSeparator]
Const          =  "const" FieldType Identifier "=" ConstValue [ListSeparator]

;;; Typedef ;;;
;; [9] Typedef ::= 'typedef' DefinitionType Identifier
Typedef        =  "typedef" DefinitionType Identifier

;;; Enum ;;;
;; [10] Enum ::= 'enum' Identifier '{' (Identifier ('=' IntConstant)? 
ListSeparator?)* '}'
Enum           =  "enum" Identifier "{" *(Identifier [("=" IntConstant)] 
[ListSeparator]) "}"

;;; Senum ;;;
;; [11] Senum ::= 'senum' Identifier '{' (Literal ListSeparator?)* '}'
Senum          =  "senum" Identifier "{" *(Literal [ListSeparator]) "}"

;;; Struct ;;;
;; [12] Struct ::= 'struct' Identifier 'xsd_all'? '{' Field* '}'
Struct         =  "struct" Identifier ["xsd_all"] "{" *Field "}"

;;; Exception ;;;
;; [13] Exception ::= 'exception' Identifier '{' Field* '}'
Exception      =  "exception" Identifier "{" *Field "}"

;;; Service ;;;
;; [14] Service ::= 'service' Identifier ( 'extends' Identifier )? '{' 
Function* '}'
Service        =  "service" Identifier [( "extends" Identifier )] "{" *Function 
"}"

;; Field ;;
;; [15] Field ::= FieldID? FieldReq? FieldType Identifier ('= ConstValue)? 
XsdFieldOptions ListSeparator?
Field          =  [FieldID] [FieldReq] FieldType Identifier [("=" ConstValue)] 
XsdFieldOptions [ListSeparator]

;;; Field ID ;;;
;; [16] FieldID ::= IntConstant ':'
FieldID        =  IntConstant ":"


;;; Field Requiredness ;;;
;; [17] FieldReq ::= 'required' | 'optional'
FieldReq       =  "required"/ "optional"

;;; XSD Options ;;;
;; [18] XsdFieldOptions ::= 'xsd_optional'? 'xsd_nillable'? XsdAttrs?
XsdFieldOptions=  ["xsd_optional"] ["xsd_nillable"] [XsdAttrs]

;; [19] XsdAttrs ::= 'xsd_attrs' '{' Field* '}'
XsdAttrs       =  "xsd_attrs" "{" *Field "}"

;; Functions ;;
;; [20] Function ::= 'oneway'? FunctionType Identifier '(' Field* ')' Throws? 
ListSeparator?
Function       =  ["oneway"] FunctionType Identifier "(" *Field ")" [Throws] 
[ListSeparator]

;; [21] FunctionType ::= FieldType | 'void'
FunctionType   =  FieldType/ "void"

;; [22] Throws ::= 'throws' '(' Field* ')'
Throws         =  "throws" "(" *Field ")"

;; Types ;;
;; [23] FieldType ::= Identifier | BaseType | ContainerType
FieldType      =  Identifier/ BaseType/ ContainerType

;; [24] DefinitionType ::= BaseType | ContainerType
DefinitionType =  BaseType/ ContainerType

;; [25] BaseType ::= 'bool' | 'byte' | 'i16' | 'i32' | 'i64' | 'double' |\
;;                     'string' | 'binary' | 'slist'
BaseType       =  "bool"/ "byte"/ "i16"/ "i32"/ "i64"/ "double"/
                    "string"/ "binary"/ "slist"

;; [26] ContainerType ::= MapType | SetType | ListType
ContainerType  =  MapType/ SetType/ ListType

;; [27] MapType ::= 'map' CppType? '<' FieldType ',' FieldType '>'
MapType        =  "map" [CppType] "<" FieldType "," FieldType ">"

;; [28] SetType ::= 'set' CppType? '<' FieldType '>'
SetType        =  "set" [CppType] "<" FieldType ">"

;; [29] ListType ::= 'list' '<' FieldType '>' CppType?
ListType       =  "list" "<" FieldType ">" [CppType]

;; [30] CppType ::= 'cpp_type' Literal
CppType        =  "cpp_type" Literal

;; Constant Values ;;
;; [31] ConstValue ::= IntConstant | DoubleConstant | Literal | Identifier | 
ConstList | ConstMap
ConstValue     =  IntConstant/ DoubleConstant/ Literal/ Identifier/ ConstList/ 
ConstMap

;; [32] IntConstant ::= ('+' | '-')? Digit+
IntConstant    =  [("+"/ "-")] 1*Digit

;; [33] DoubleConstant ::= ('+' | '-')? Digit* ('.' Digit+)? ( ('E' | 'e') 
IntConstant )?
DoubleConstant =  [("+"/ "-")] *Digit [("." 1*Digit)] [( "E" IntConstant )]

;; [34] ConstList ::= '[' (ConstValue ListSeparator?)* ']'
ConstList      = "[" *(ConstValue [ListSeparator]) "]"

;; [35] ConstMap ::= '{' (ConstValue ':' ConstValue ListSeparator?)* '}'
ConstMap       = "{" *(ConstValue ":" ConstValue [ListSeparator]) "}"

;; Basic Definitions ;;
;;; Literal ;;;
;; [36] Literal ::= ('"' [^"]* '"') | ("'" [^']* "'")
Literal        =  (%x22 *(%x20-21 / %x23-7E) %x22)/ (%x27 *(%x20-26 / %x28-7E) 
%x27)

;;; Identifier ;;;
;; [37] Identifier ::= ( Letter | '_' ) ( Letter | Digit | '.' | '_' )*
Identifier     =  ( Letter/ "_" ) *( Letter/ Digit/ "."/ "_" )

;; [38] STIdentifier ::= ( Letter | '_' ) ( Letter | Digit | '.' | '_' | '-' )*
STIdentifier   =  ( Letter/ "_" ) *( Letter/ Digit/ "."/ "_"/ "-" )

;;; List Separator ;;;
;; [39] ListSeparator ::= ',' | ';'
ListSeparator  =  ","/ ";"

;;; Letters and Digits ;;;
;; [40] Letter ::= ['A'-'Z'] | ['a'-'z']
Letter         =  %x41-5A/ %x61-7A

;; [41] Digit ::= ['0'-'9']
Digit          =  %x30-39
BOOL           =  %x00/ %x01
BYTE           =  OCTET
I08            =  OCTET
I16            =  2*OCTET
I32            =  4*OCTET
U64            =  8*OCTET
I64            =  8*OCTET
DOUBLE         =  8*OCTET
STRING         =  I32 UTF8-octets
BINARY         =  I32 *OCTET

T-CALL         =  %x01
T-REPLY        =  %x02
T-EXCEPTION    =  %x03
T-ONEWAY       =  %x04

T-STOP         =  %x00
T-VOID         =  %x01
T-BOOL         =  %x02
T-BYTE         =  %x03
T-I08          =  %x05
T-I16          =  %x06
T-I32          =  %x08
T-U64          =  %x09
T-I64          =  %x0a
T-DOUBLE       =  %x04
T-STRING       =  %x0b
T-BINARY       =  %x0b
T-STRUCT       =  %x0c
T-MAP          =  %x0d
T-SET          =  %x0e
T-LIST         =  %x0f
;; <message> ::= <message-begin> <struct> <message-end>
message        =  message-begin struct message-end

;; <message-begin> ::= <method-name> <message-type> <message-seqid>
message-begin  =  method-name message-type message-seqid
message-end    =  ""

;; <method-name> ::= STRING
method-name    =  STRING

;; <message-type> ::= T_CALL | T_REPLY | T_EXCEPTION
message-type   =  T-CALL/ T-REPLY/ T-EXCEPTION

;; <message-seqid> ::= I32
message-seqid  =  I32

;; <struct> ::= <struct-begin> <field>* <field-stop> <struct-end>
struct         =  struct-begin *field field-stop struct-end

;; <struct-begin> ::= <struct-name>
struct-begin   =  struct-name
struct-end     =  ""

;; <struct-name> ::= STRING
struct-name    =  STRING

;; <field-stop> ::= T_STOP
field-stop     =  T-STOP

;; <field> ::= <field-begin> <field-data> <field-end>
field          =  field-begin field-data field-end

;; <field-begin> ::= <field-name> <field-type> <field-id>
field-begin    =  field-name field-type field-id
field-end      =  ""

;; <field-name> ::= STRING
field-name     =  STRING

;; <field-type> ::= T_BOOL | T_BYTE | T_I8 | T_I16 | T_I32 | T_I64 | T_DOUBLE
;; | T_STRING | T_BINARY | T_STRUCT | T_MAP | T_SET | T_LIST
field-type     =  T-STOP/ T-VOID/ T-BOOL/ T-BYTE/ T-I08/ T-I16/ T-I32/ T-U64/ 
T-I64/ T-DOUBLE/
                  T-STRING/ T-BINARY/ T-STRUCT/ T-MAP/ T-SET/ T-LIST

;; <field-id> ::= I16
field-id       =  I16

;; <field-data> ::= I8 | I16 | I32 | I64 | DOUBLE | STRING | BINARY
;; <struct> | <map> | <list> | <set>
field-data     =  BOOL/ I08/ I16/ I32/ U64/ I64/ DOUBLE/ STRING/ BINARY/
                  struct/ map/ list/ set

;; TBD - is this correct?
field-datum    =  field-data field-data

;; <map> ::= <map-begin> <field-datum>* <map-end>
map            =  map-begin *field-datum map-end

;; <map-begin> ::= <map-key-type> <map-value-type> <map-size>
map-begin      =  map-key-type map-value-type map-size
map-end        =  ""

;; <map-key-type> ::= <field-type>
map-key-type   =  field-type

;;<map-value-type> ::= <field-type>
map-value-type =  field-type

;; <map-size> ::= I32
map-size       =  I32

;; <list> ::= <list-begin> <field-data>* <list-end>
list           =  list-begin *field-data list-end

;; <list-begin> ::= <list-elem-type> <list-size>
list-begin     =  list-elem-type list-size
list-end       =  ""

;; <list-elem-type> ::= <field-type>
list-elem-type =  field-type

;; <list-size> ::= I32
list-size      =  I32

;; <set> ::= <set-begin> <field-data>* <set-end>
set            =  set-begin *field-data set-end

;; <set-begin> ::= <set-elem-type> <set-size>
set-begin      =  set-elem-type set-size
set-end        =  ""

;; <set-elem-type> ::= <field-type>
set-elem-type  =  field-type

;; <set-size> ::= I32
set-size       =   I32
ALPHA          =  %x41-5A/ %x61-7A

BIT            =  "0"/ "1"

CHAR           =  %x01-7F

CR             =  %x0D

CRLF           =  LF/ (CR LF)

CTL            =  %x00-1F/ %x7F

DIGIT          =  %x30-39

DQUOTE         =  %x22

HEXDIG         =  DIGIT/ "A"/ "B"/ "C"/ "D"/ "E"/ "F"

HTAB           =  %x09

LF             =  %x0A

LWSP           =  *(WSP/ CRLF WSP)

OCTET          =  %x00-FF

SP             =  %x20

VCHAR          =  %x21-7E

WSP            =  SP/ HTAB

UTF8-octets = *( UTF8-char )
UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
UTF8-1      = %x00-7F
UTF8-2      = %xC2-DF UTF8-tail

UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
              %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
              %xF4 %x80-8F 2( UTF8-tail )
UTF8-tail   = %x80-BF

Reply via email to