Signed-off-by: John Snow <js...@redhat.com> ---
My hubris is infinite. OK, I only added a few -- to help me remember how the parser works at a glance. Signed-off-by: John Snow <js...@redhat.com> --- scripts/qapi/parser.py | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index dbbd0fcbc2f..8fc77808ace 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -51,7 +51,24 @@ def __init__(self, parser: 'QAPISchemaParser', msg: str): class QAPISchemaParser: + """ + Performs parsing of a QAPI schema source file. + :param fname: Path to the source file. + :param previously_included: + The absolute paths of previously included source files. + Only used by recursive calls to avoid re-parsing files. + :param incl_info: + `QAPISourceInfo` for the parent document. + This may be None if this is the root schema document. + + :ivar exprs: Resulting parsed expressions. + :ivar docs: Resulting parsed documentation blocks. + + :raise OSError: For problems opening the root schema document. + :raise QAPIParseError: For JSON or QAPIDoc syntax problems. + :raise QAPISemError: For various semantic issues with the schema. + """ def __init__(self, fname: str, previously_included: Optional[Set[str]] = None, @@ -77,6 +94,11 @@ def __init__(self, self._parse() def _parse(self) -> None: + """ + Parse the QAPI schema document. + + :return: None; results are stored in ``exprs`` and ``docs``. + """ cur_doc = None with open(self._fname, 'r', encoding='utf-8') as fp: @@ -197,6 +219,50 @@ def _check(name: str, value: object) -> List[str]: raise QAPISemError(info, "unknown pragma '%s'" % name) def accept(self, skip_comment: bool = True) -> None: + """ + Read the next lexeme and process it into a token. + + :Object state: + :tok: represents the token type. See below for values. + :pos: is the position of the first character in the lexeme. + :cursor: is the position of the next character. + :val: is the variable value of the token, if any. + + Single-character tokens: + + These include ``LBRACE``, ``RBRACE``, ``COLON``, ``COMMA``, + ``LSQB``, and ``RSQB``. ``tok`` holds the single character + lexeme. ``val`` is ``None``. + + Multi-character tokens: + + - ``COMMENT``: + + - This token is not normally yielded by the lexer, but it + can be when ``skip_comment`` is False. + - ``tok`` is the value ``"#"``. + - ``val`` is a string including all chars until end-of-line. + + - ``STRING``: + + - ``tok`` is the ``"'"``, the single quote. + - ``value`` is the string, *excluding* the quotes. + + - ``TRUE`` and ``FALSE``: + + - ``tok`` is either ``"t"`` or ``"f"`` accordingly. + - ``val`` is either ``True`` or ``False`` accordingly. + + - ``NEWLINE`` and ``SPACE``: + + - These are consumed by the lexer directly. ``line_pos`` and + ``info`` are advanced when ``NEWLINE`` is encountered. + ``tok`` is set to ``None`` upon reaching EOF. + + :param skip_comment: + When false, return ``COMMENT`` tokens. + This is used when reading documentation blocks. + """ while True: self.tok = self.src[self.cursor] self.pos = self.cursor -- 2.30.2