I decided to play with Ragel by making a parser for an extended
version of JSON.

After falling into every single pitfall possible, I managed to get the
whole thing working pretty well, but I just had a look at the
generated code and it reached 600KB total, using -G1 (!)

So, what am I doing wrong ? I'm certain there's probably a lot of
stuff wrong with the way I made the parser, but really, I'm not sure
what so having experts eyeballing this would be very nice...

There are 2 different json parsers in the file; one is for the string
constants, one (the main one) is for the language proper...


/*
 * IF YOU ARE LOOKING AT A .c FILE, YOUR ARE LOOKING AT THE WRONG ONE
 *
 * This file is autogenerated from a .rl source file for 'ragel'
parser generator.
 */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#include "json.h"

%%{
        machine json_str;
        write data;
}%%

static char * json_append_utf8_glyph(
        char * dst,
        unsigned long inUnicode )
{
        if (!(inUnicode & ~0x7f)) {
                *dst++ = ((char)inUnicode);
                return dst;     // that was easy
        }
        unsigned char *cur = dst;

        unsigned long currentMask = ~0x7ff;
        int bits = 6;
        int header = 5;

        while ((inUnicode & currentMask) && bits <= 24) {
                currentMask = currentMask << 6;
                bits += 6; header--;
        }
        *cur++ = (0xfe << header) | (unsigned char)(inUnicode >> (bits));
        bits -= 6;
        while (bits >= 0) {
                *cur++ = 0x80 | ((unsigned char)(inUnicode >> bits) & 0x3f);
                bits -= 6;
        }
        return cur;
}

int json_parse_string(char * str, char *end, char * out)
{
        char *p = str, *pe = end ? end : str + strlen( str ), *eof = pe;
        int cs;
        out = out ? out : str;
        uint16_t u;
        %%{
                machine json_str;

                xxdigit = (
                        ([0-9] @{ u = (u << 4) | fc - '0'; }) |
                        ([a-f] @{ u = (u << 4) | fc - 'a' + 0xa; }) |
                        ([A-F] @{ u = (u << 4) | fc - 'A' + 0xa; })             
        
                );
                utf16 = ( xxdigit{4} ) >{ u = 0; } @{ out = 
json_append_utf8_glyph(out, u); };
                
                normal = any @{*out++ = fc;};
                escape =
                                ('\\' %{ *out++ = '\\'; } ) |
                                ('t' %{ *out++ = '\t'; } )  |
                                ('b' %{ *out++ = '\b'; } )  |
                                ('f' %{ *out++ = '\f'; } )  |
                                ('n' %{ *out++ = '\n'; } )  |
                                ('r' %{ *out++ = '\r'; } )  |
                                ('u' utf16 ) |
                                ( normal -- [\\tbfntu] )
                        ;
                main := (
                        ('\\' escape) |
                        ( normal -- '\\' )
                )*;
                
                # Initialize and execute.
                write init;
                write exec;
        }%%
        *out = 0;
        
        return 0;
}

%%{
        machine json;
        write data;
}%%

int json_parse( json_driver_t *d, char * str )
{
        char *p = str, *pe = str + strlen( str ), *eof = pe;
        int cs;
        int stack[32], top = 0;
        int integer_sign;       // for integer decode
        char * float_start;
        json_driver_value_t v;
        uint32_t b64;
        int b64_cnt;
        
        %%{
                machine json;
                action obj_field_list_start { d->open_object(d); }
                action obj_field_list_done { d->close_object(d); }
                action obj_value_list_start { d->open_array(d); }
                action obj_value_list_done { d->close_array(d); }
                action obj_create_name { d->set_name(d, &v); }
                action obj_set_flag { if (d->add_flag) d->add_flag(d, &v); }
                action obj_set_string { d->set_value(d, 
json_driver_type_string, &v); }
                action obj_set_integer { d->set_value(d, 
json_driver_type_integer, &v); }
                action obj_set_float { d->set_value(d, json_driver_type_float, 
&v); }
                action obj_set_hex { d->set_value(d, json_driver_type_hex, &v); 
}
                action obj_set_true { v.u.v_bool = 1; d->set_value(d,
json_driver_type_bool, &v); }
                action obj_set_false { v.u.v_bool = 0; d->set_value(d,
json_driver_type_bool, &v); }
                action obj_set_null { d->set_value(d, json_driver_type_null, 
NULL); }
                
                action obj_start_data { if (d->open_data) d->open_data(d); }
                action obj_flush_data { if (d->add_data) for (int s=16,i = 0;
i<b64_cnt; i++,s-=8) d->add_data(d, (b64 >> s) & 0xff); }
                action obj_end_data { if (d->close_data) d->close_data(d); }
                
                W = [ \t\n]**;

                #
                # quoted or unquoted string
                #
                action str_init { v.u.v_str.start = v.u.v_str.end = fpc; }
                action str_done { v.u.v_str.end = fpc; }
                        
                string = '"' ((([^"] | '\"')*) >str_init %str_done)  '"';
                ident = ((alpha | '_') (alnum | '_')*) >str_init %str_done;
                
                #
                #       negative/positive Integer
                #
                action integer_init { v.u.v_int = 0; integer_sign = 1; }
                action integer_minus { integer_sign = -1; }
                action integer_digit { v.u.v_int = (v.u.v_int * 10) + (fc - 
'0'); }
                action integer_done {  v.u.v_int *= integer_sign; }
                
                integer = (('-' @integer_minus | '+')? (digit+ @integer_digit))
                        >integer_init %integer_done;
                
                #
                # hex integer
                #
                xxdigit = (
                        ([0-9] @{ v.u.v_int = (v.u.v_int << 4) | fc - '0'; }) |
                        ([a-f] @{ v.u.v_int = (v.u.v_int << 4) | fc - 'a' + 
0xa; }) |
                        ([A-F] @{ v.u.v_int = (v.u.v_int << 4) | fc - 'A' + 
0xa; })                     
                );
                hex = (('-' @integer_minus | '+')?( '0x' xxdigit+))
                        >integer_init %integer_done;
                
                #
                # float/double value
                #
                action float_init { float_start = fpc; }
                action float_done { sscanf(float_start, "%lf", &v.u.v_float); }
                #
                # float values
                #
                float = (
                        ('-' | '+')? digit* '.' digit+ [fd]?
                ) >float_init %float_done;

                #
                # base64 decoder
                #
                base64_char = (
                        ([A-Z] @{ b64 = (b64 << 6) | (fc - 'A'); }) |
                        ([a-z] @{ b64 = (b64 << 6) | (fc - 'a' + 26 ); }) |
                        ([0-9] @{ b64 = (b64 << 6) | (fc - '0' + 52 ); }) |
                        ('+' @{ b64 = (b64 << 6) | 62; }) |
                        ('/' @{ b64 = (b64 << 6) | 63; })
                );
                base64_pad = '=' @{ b64 = (b64 << 6); };
                base64_four = (
                        base64_char base64_char base64_char base64_char
                ) %{ b64_cnt = 3; } %obj_flush_data;
                base64_padder = (
                        base64_char base64_char
                        (
                                (( base64_char base64_pad )
                                        %{ b64_cnt = 2; } ) |
                                (( base64_pad base64_pad )
                                        %{ b64_cnt = 1; } )
                        )
                ) %obj_flush_data;

                base64 = ( base64_four** (base64_four | base64_padder) ) >{b64 
= 0;}
                                %err{ printf("### base64 Error : '%s'\n", p); };

                #
                # JSON value, extended
                #
                json_value = (
                        (string %obj_set_string) |
                        (integer %obj_set_integer) |
                        (hex %obj_set_hex ) |
                        (float %obj_set_float) |
                        ('true' %obj_set_true) |
                        ('false' %obj_set_false) |
                        ('null' %obj_set_null) |
                        ('{' @{ fhold; fcall obj_field_list; } ) |
                        ('[' @{ fhold; fcall json_value_list; } ) |
                        (('%' (W base64)* W '%') >obj_start_data %obj_end_data)
                );
                
                json_value_list := (
                        '[' (
                                '' |
                                (W json_value (W ',' W json_value)* )
                        ) W ','? W ']'
                ) >obj_value_list_start @obj_value_list_done @{ fret; }
                                %err{ printf("### Array[%d] Error : '%s'\n", 
top, p); };
                
                obj_field_flag = ( ident ) %obj_set_flag;
                obj_field_flags = (
                        '(' W obj_field_flag (W ',' W obj_field_flag)** ')'
                );
                obj_field = ((string | ident) %obj_create_name) W 
obj_field_flags? W
':' W json_value;
                
                obj_field_list := (
                        '{' (
                                '' |
                                (W obj_field (W ',' W obj_field)** )
                        ) W ','? W '}'
                ) >obj_field_list_start @obj_field_list_done @{ fret; }
                                %err{ printf("### Object[%d] Error : '%s'\n", 
top, p); };
                
                main := (
                        W json_value
                ) %err{ printf("### JSON Error : '%s'\n", p); };

                # Initialize and execute.
                write init;
                write exec;
        }%%

        return 0;
};

#ifdef JSON_TEST_UNIT
static void d_set_name(struct json_driver_t *d,
                json_driver_value_t * v)
{
        int l = v->u.v_str.end - v->u.v_str.start;
        printf("\"%*.*s\": ", l, l, v->u.v_str.start);
}

static void d_open_array(struct json_driver_t *d)
{
        printf("[");fflush(stdout);
}

static void d_open_object(struct json_driver_t *d)
{
        printf("{");fflush(stdout);
}

static void d_set_value(struct json_driver_t *d,
                int type,
                json_driver_value_t * v)
{
        switch (type) {
                case json_driver_type_null:
                        printf("null, ");
                        break;
                case json_driver_type_bool:
                        printf("%s, ", v->u.v_bool ? "true" : "false");
                        break;
                case json_driver_type_integer:
                        printf("%d, ", (int)v->u.v_int);
                        break;
                case json_driver_type_hex:
                        printf("0x%x, ", (int)v->u.v_int);
                        break;
                case json_driver_type_float:
                        printf("%f, ", (float)v->u.v_float);
                        break;
                case json_driver_type_string: {
                        char buf[256];
                        json_parse_string(v->u.v_str.start, v->u.v_str.end, 
buf);
                        printf("\"%s\": ", buf);
                }       break;
        }
        fflush(stdout);
}

static void d_close_array(struct json_driver_t *d)
{
        printf("],");fflush(stdout);
}

static void d_close_object(struct json_driver_t *d)
{
        printf("},");fflush(stdout);
}

static void d_open_data(struct json_driver_t *d)
{
        printf("%% '");fflush(stdout);  
}
static void d_add_data(struct json_driver_t *d, uint8_t data)
{
        printf("%c",data);fflush(stdout);
}
static void d_close_data(struct json_driver_t *d)
{
        printf("' %%,");fflush(stdout);
}

json_driver_t driver = {
        .set_name = d_set_name,
        .open_array = d_open_array,
        .open_object = d_open_object,
        .set_value = d_set_value,
        .close_array = d_close_array,
        .close_object = d_close_object,
        
        .open_data = d_open_data,
        .add_data = d_add_data,
        .close_data = d_close_data,
};

int main(int argc, char * argv[])
{

        for (int i = 1; i < argc; i++) {
                printf("### parsing '%s'\n", argv[i]);
                json_parse(&driver, argv[i]);printf("\n");
        }
        return 0;
}

#endif

_______________________________________________
ragel-users mailing list
[email protected]
http://www.complang.org/mailman/listinfo/ragel-users

Reply via email to