Author: Armin Rigo <[email protected]>
Branch:
Changeset: r103:1ad5d2bb1fa5
Date: 2014-11-28 23:11 +0100
http://bitbucket.org/cffi/creflect/changeset/1ad5d2bb1fa5/
Log: Parsing simple C declarations, starting
diff --git a/creflect/src/c_decl_parser.c b/creflect/src/c_decl_parser.c
new file mode 100644
--- /dev/null
+++ b/creflect/src/c_decl_parser.c
@@ -0,0 +1,197 @@
+#include <stdlib.h>
+#include "creflect.h"
+
+
+enum crxp_token_e {
+ TOK_STAR='*',
+ TOK_OPEN_PAREN='(',
+ TOK_CLOSE_PAREN=')',
+ TOK_OPEN_BRACKET='[',
+ TOK_CLOSE_BRACKET=']',
+ TOK_COMMA=',',
+
+ TOK_START=256,
+ TOK_END,
+ TOK_ERROR,
+ TOK_IDENTIFIER,
+ TOK_INTEGER,
+
+ /* keywords */
+ TOK__BOOL,
+ TOK_CHAR,
+ //TOK__COMPLEX,
+ TOK_CONST,
+ TOK_DOUBLE,
+ TOK_FLOAT,
+ //TOK__IMAGINARY,
+ TOK_INT,
+ TOK_LONG,
+ TOK_SHORT,
+ TOK_SIGNED,
+ TOK_STRUCT,
+ TOK_UNION,
+ TOK_UNSIGNED,
+ TOK_VOID,
+};
+
+typedef struct {
+ enum crxp_token_e kind;
+ const char *p;
+ size_t size;
+ crx_builder_t *cb;
+} crxp_token_t;
+
+static int is_space(char x)
+{
+ return (x == ' ' || x == '\f' || x == '\n' || x == '\r' ||
+ x == '\t' || x == '\v');
+}
+
+static int is_ident_first(char x)
+{
+ return ('A' <= x && x <= 'Z' || 'a' <= x && x <= 'z' || x == '_');
+}
+
+static int is_ident_next(char x)
+{
+ return (is_ident_first(x) || '0' <= x && x <= '9');
+}
+
+static void next_token(crxp_token_t *tok)
+{
+ const char *p = tok->p + tok->size;
+ if (tok->kind == TOK_ERROR)
+ return;
+ while (!is_ident_first(*p)) {
+ if (is_space(*p)) {
+ p++;
+ }
+ else if (*p) {
+ tok->kind = *p;
+ tok->p = p;
+ tok->size = 1;
+ return;
+ }
+ else {
+ tok->kind = TOK_END;
+ tok->p = p;
+ tok->size = 0;
+ return;
+ }
+ }
+ tok->p = p;
+ tok->size = 1;
+ while (is_ident_next(p[tok->size]))
+ tok->size++;
+ tok->kind = TOK_IDENTIFIER;
+
+ switch (*p) {
+ case '_':
+ if (tok->size == 5 && !memcmp(p, "_Bool", 5)) tok->kind = TOK__BOOL;
+ break;
+ case 'c':
+ if (tok->size == 4 && !memcmp(p, "char", 4)) tok->kind = TOK_CHAR;
+ if (tok->size == 5 && !memcmp(p, "const", 5)) tok->kind = TOK_CONST;
+ break;
+ case 'd':
+ if (tok->size == 6 && !memcmp(p, "double", 6)) tok->kind = TOK_DOUBLE;
+ break;
+ case 'f':
+ if (tok->size == 5 && !memcmp(p, "float", 5)) tok->kind = TOK_FLOAT;
+ break;
+ case 'i':
+ if (tok->size == 3 && !memcmp(p, "int", 3)) tok->kind = TOK_INT;
+ break;
+ case 'l':
+ if (tok->size == 4 && !memcmp(p, "long", 4)) tok->kind = TOK_LONG;
+ break;
+ case 's':
+ if (tok->size == 5 && !memcmp(p, "short", 5)) tok->kind = TOK_SHORT;
+ if (tok->size == 6 && !memcmp(p, "signed", 6)) tok->kind = TOK_SIGNED;
+ if (tok->size == 6 && !memcmp(p, "struct", 6)) tok->kind = TOK_STRUCT;
+ break;
+ case 'u':
+ if (tok->size == 5 && !memcmp(p, "union", 5)) tok->kind = TOK_UNION;
+ if (tok->size == 8 && !memcmp(p,"unsigned",8)) tok->kind =
TOK_UNSIGNED;
+ break;
+ case 'v':
+ if (tok->size == 4 && !memcmp(p, "void", 4)) tok->kind = TOK_VOID;
+ break;
+ }
+}
+
+static crx_type_t *parse_sequel_right(crxp_token_t *tok, crx_type_t *t1)
+{
+ switch (tok->kind) {
+
+ case TOK_OPEN_PAREN:
+ abort();
+
+ case TOK_OPEN_BRACKET:
+ abort();
+
+ default:
+ return t1;
+ }
+}
+
+static crx_type_t *parse_sequel(crxp_token_t *tok, crx_type_t *t1)
+{
+ while (1) {
+ switch (tok->kind) {
+
+ case TOK_STAR:
+ t1 = tok->cb->get_pointer_type(tok->cb, t1);
+ break;
+
+ case TOK_CONST:
+ t1 = tok->cb->get_const_type(tok->cb, t1);
+ break;
+
+ default:
+ return parse_sequel_right(tok, t1);
+ }
+
+ next_token(tok);
+ }
+}
+
+static crx_type_t *parse_complete(crxp_token_t *tok)
+{
+ crx_type_t *t1;
+ int is_const = (tok->kind == TOK_CONST);
+ if (is_const) {
+ next_token(tok);
+ }
+ switch (tok->kind) {
+ case TOK_INT:
+ t1 = tok->cb->get_signed_type(tok->cb, sizeof(int), "int");
+ break;
+ default:
+ tok->kind = TOK_ERROR;
+ return NULL;
+ }
+ next_token(tok);
+
+ if (is_const) {
+ t1 = tok->cb->get_const_type(tok->cb, t1);
+ }
+ return parse_sequel(tok, t1);
+}
+
+crx_type_t *creflect_decl_parser(crx_builder_t *cb, const char **input)
+{
+ crxp_token_t token;
+ crx_type_t *t1;
+ token.kind = TOK_START;
+ token.cb = cb;
+ token.p = *input;
+ token.size = 0;
+ next_token(&token);
+ t1 = parse_complete(&token);
+
+ if (token.kind == TOK_END)
+ return t1;
+ *input = token.p;
+ return NULL;
+}
diff --git a/test/test_c_decl_parser.py b/test/test_c_decl_parser.py
new file mode 100644
--- /dev/null
+++ b/test/test_c_decl_parser.py
@@ -0,0 +1,72 @@
+import os, subprocess
+from .udir import udir
+
+
+TESTER = r"""
+#include "c_decl_parser.c"
+#include "creflect_print.h"
+
+int main(int argc, char *argv[])
+{
+ const char *p = argv[1];
+ crx_type_t *t1 = creflect_decl_parser(&maincb, &p);
+ if (t1 != NULL)
+ printf("%s\n", t1->text);
+ else {
+ printf("error: %s\n ", argv[1]);
+ while (p > argv[1]) {
+ printf(" ");
+ p--;
+ }
+ printf("^\n");
+ }
+ return 0;
+}
+"""
+
+
+def setup_module(mod):
+ executable = str(udir.join('c_decl_parser_test'))
+ f = open(executable + '.c', 'w')
+ f.write(TESTER)
+ f.close()
+ err = os.system("gcc -g -Werror '%s.c' -o '%s' -I../creflect/src" % (
+ executable, executable))
+ assert not err
+ mod.executable = executable
+
+
+def parse(input, expected_output):
+ global executable
+ got = subprocess.check_output([executable, input])
+ assert got == expected_output + '\n'
+
+def parse_error(input, expected_location):
+ parse(input, 'error: %s\n %s^' % (input, " " * expected_location))
+
+def test_c_decl_parser():
+ parse("int **", "PTR PTR int")
+ parse("const int **", "PTR PTR CONST int")
+ parse("int const **", "PTR PTR CONST int")
+ parse("int *const *", "PTR CONST PTR int")
+ parse("int ** const", "CONST PTR PTR int")
+ import py; py.test.skip("in-progress")
+ parse("int[2]")
+ parse("int*[2][3]")
+ parse("int(*)[2][3]")
+ parse("int(*[2])[3]")
+ parse("int()")
+ parse("int(void)")
+ parse("int(int)")
+ parse("int(int *, int const *)")
+ parse("int(*)(int)")
+ parse("unsigned int")
+ parse("unsigned long long *")
+ parse("const unsigned long long *")
+ parse("unsigned long long const *")
+ parse("char(*(*)(long))(int)")
+ parse("foo_t[]")
+
+def test_c_decl_error():
+ parse_error("*", 0)
+ parse_error("int ]**", 4)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit