Hello everyone,

I was very impressed with Val Schorre's META-II paper that Dr. Kay gave me
to read, so I built a version of it for C; the metacircular part of which
can fit in a half of a sheet of A4 or Letter paper. Here it is:

.syntax meta

arg = '$' <'emit_token();'>
    | .string <'emit(' $ ');'>;

output = '<' *arg '>' <'emit_nl();'>;

exp3 = .id <'meta_' $ '();'>
     | .string <'read_literal(' $ ');'>
     | '.id' <'read_id();'>
     | '.number' <'read_number();'>
     | '.string' <'read_string();'>
     | '(' exp1 ')'
     | '.e' <'test_flag = 1;'>
     | '*' <'do {'>
       exp3 <'} while (test_flag);'>
       <'test_flag = 1;'>;

exp2 = ( exp3 <'if (test_flag) {'>
       | output <'if (1) {'> )
       *( exp3 <'error_if_false();'>
        | output )
       <'}'>;

exp1 = <'do {'> exp2
       *( '|' <'if (test_flag) { break; }'> exp2 )
       <'} while (0);'>;

stat = .id <'void meta_' $ '(void)'>
       <'{'>
       '=' exp1 ';'
       <'}'>;

program = '.syntax' .id <'#include "support.h"'>
          *stat
          '.end';

.end

It can also be found at https://gist.github.com/4129159

Attached are the bootstrapping C code and support files. Error recovery is
so far non-existent and there may be bugs lurking in there. Nevertheless, I
wanted to share this with everyone. (This code is also in the public
domain.)

Best,
Long Nguyen
#include "support.h"

void meta_arg(void)
{
	do {
		read_literal("$");
		if (test_flag) {
			emit("emit_token();");
			emit_nl();
		}

		if (test_flag) { break; }

		read_string();
		if (test_flag) {
			emit("emit(");
			emit_token();
			emit(");");
			emit_nl();
		}
	} while (0);
}

void meta_output(void)
{
	do {
		read_literal("<");
		if (test_flag) {
			do {
				meta_arg();
			} while (test_flag);
			test_flag = 1;
			error_if_false();

			read_literal(">");
			error_if_false();

			emit("emit_nl();");
			emit_nl();
		}
	} while (0);
}

void meta_exp3(void)
{
	do {
		read_id();
		if (test_flag) {
			emit("meta_");
			emit_token();
			emit("();");
			emit_nl();
		}

		if (test_flag) { break; }

		read_string();
		if (test_flag) {
			emit("read_literal(");
			emit_token();
			emit(");");
			emit_nl();
		}

		if (test_flag) { break; }

		read_literal(".id");
		if (test_flag) {
			emit("read_id();");
			emit_nl();
		}

		if (test_flag) { break; }

		read_literal(".number");
		if (test_flag) {
			emit("read_number();");
			emit_nl();
		}

		if (test_flag) { break; }

		read_literal(".string");
		if (test_flag) {
			emit("read_string();");
			emit_nl();
		}

		if (test_flag) { break; }

		read_literal("(");
		if (test_flag) {
			meta_exp1();
			error_if_false();

			read_literal(")");
			error_if_false();
		}

		if (test_flag) { break; }

		read_literal(".e");
		if (test_flag) {
			emit("test_flag = 1;");
			emit_nl();
		}

		if (test_flag) { break; }

		read_literal("*");
		if (test_flag) {
			emit("do {");
			emit_nl();

			meta_exp3();
			error_if_false();

			emit("} while (test_flag);");
			emit_nl();

			emit("test_flag = 1;");
			emit_nl();
		}
	} while (0);
}

void meta_exp2(void)
{
	do {
		// open
		do {
			meta_exp3();
			if (test_flag) {
				emit("if (test_flag) {");
				emit_nl();
			}

			if (test_flag) { break; }

			meta_output();
			if (test_flag) {
				emit("if (1) {");
				emit_nl();
			}
		} while (0);
		// close

		if (test_flag) {
			do {
				// open
				do {
					meta_exp3();
					if (test_flag) {
						emit("error_if_false();");
						emit_nl();
					}

					if (test_flag) { break; }

					meta_output();
					if (test_flag) {
					}
				} while (0);
				// close

			} while (test_flag);
			test_flag = 1;
			error_if_false();

			emit("}");
			emit_nl();
		}
	} while (0);
}

void meta_exp1(void)
{
	do {
		emit("do {");
		emit_nl();
		if (1) {
			meta_exp2();
			error_if_false();

			do {
				// open
				do {
					read_literal("|");
					if (test_flag) {
						emit("if (test_flag) { break; }");
						emit_nl();

						meta_exp2();
						error_if_false();
					}
				} while (0);
				// close

			} while (test_flag);
			test_flag = 1;
			error_if_false();

			emit("} while (0);");
			emit_nl();
		}
	} while (0);
}

void meta_stat(void)
{
	do {
		read_id();
		if (test_flag) {
			emit("void meta_");
			emit_token();
			emit("(void)");
			emit_nl();

			emit("{");
			emit_nl();

			read_literal("=");
			error_if_false();

			meta_exp1();
			error_if_false();

			read_literal(";");
			error_if_false();

			emit("}");
			emit_nl();
		}
	} while (0);
}

void meta_program(void)
{
	do {
		read_literal(".syntax");
		if (test_flag) {
			read_id();
			error_if_false();

			emit("#include \"support.h\"");
			emit_nl();

			do {
				meta_stat();
			} while (test_flag);
			test_flag = 1;
			error_if_false();

			read_literal(".end");
			error_if_false();
		}
	} while (0);
}

// end
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

FILE *output = NULL;
char *source = NULL;
int pos = 0;
char *token = NULL;
int test_flag = 0;

void skip_whitespace(void)
{
	while (source[pos] == '\x20' || source[pos] == '\t' ||
			source[pos] == '\r' || source[pos] == '\n') {
		pos++;
	} 
}

void make_token(int start_pos)
{
	int length = pos - start_pos;
	free(token);
	token = malloc(length + 1);
	token[length] = '\0';
	memcpy(token, &source[start_pos], length);
}

void shutdown(void)
{
	fclose(output);
	free(source);
	free(token);
}

// emits the currently recognized token
void emit_token(void)
{
	int i;
	// strings are converted to C format
	if (token[0] == '\'') {
		fprintf(output, "\"");
		for (i = 1; token[i] != '\0' && token[i] != '\''; i++) {
			switch (token[i]) {
			case '\"':
				fprintf(output, "\\\"");
				break;
			case '\\':
				fprintf(output, "\\\\");
				break;
			default:
				fprintf(output, "%c", token[i]);
				break;
			}
		}
		fprintf(output, "\"");
		return;
	}
	// if token is not a string, emit as-is
	fprintf(output, "%s", token);
}

void emit(const char *str)
{
	fprintf(output, "%s", str);
}

void emit_nl(void)
{
	fprintf(output, "\n");
}

void read_literal(const char *literal)
{
	int entry_pos;
	int i;

	skip_whitespace();
	// compare source with the literal
	entry_pos = pos;
	i = 0;
	while (source[pos] != '\0' && literal[i] != '\0' &&
			source[pos] == literal[i]) {
		pos++;
		i++;
	}
	// if the end of the literal has been reached, comparison successful
	if (literal[i] == '\0') {
		// copy recognized literal into token
		make_token(entry_pos);
		test_flag = 1;
	} else {
		// rewind
		pos = entry_pos;
		test_flag = 0;
	}
}

void read_id(void)
{
	int entry_pos;

	skip_whitespace();
	// recognize initial alphabetic character
	entry_pos = pos;
	if (('A' <= source[pos] && source[pos] <= 'A') ||
			('a' <= source[pos] && source[pos] <= 'z')) {
		pos++;
		test_flag = 1;
	} else {
		test_flag = 0;
		return;
	}
	// recognize alphanumeric characters
	while (('A' <= source[pos] && source[pos] <= 'A') ||
			('a' <= source[pos] && source[pos] <= 'z') ||
			('0' <= source[pos] && source[pos] <= '9')) {
		pos++;
	}
	// copy recognized identifier into token
	make_token(entry_pos);
}

void read_number(void)
{
	int entry_pos;

	skip_whitespace();
	// recognize initial numeric character
	entry_pos = pos;
	if ('0' <= source[pos] && source[pos] <= '9') {
		pos++;
		test_flag = 1;
	} else {
		test_flag = 0;
		return;
	}
	// recognize subsequent numeric characters
	while ('0' <= source[pos] && source[pos] <= '9') {
		pos++;
	}
	// copy recognized number into token
	make_token(entry_pos);
}

void read_string(void)
{
	int entry_pos;

	skip_whitespace();
	// recognize initial single quote
	entry_pos = pos;
	if (source[pos] == '\'') {
		pos++;
		// test_flag is not set as recognition can still fail
	} else {
		test_flag = 0;
		return;
	}

	// recognize contents
	while (source[pos] != '\0' && source[pos] != '\'') {
		pos++;
	}

	// recognize final single quote
	if (source[pos] == '\'') {
		pos++;
		// copy recognized string into token
		make_token(entry_pos);
		test_flag = 1;
	} else if (source[pos] == '\0') {
		// rewind
		pos = entry_pos;
		test_flag = 0;
	}
}

void error_if_false(void)
{
    if (!test_flag) {
        fprintf(stderr, "error\n");
        shutdown();
        exit(1);
    }
}

void meta_program(void);
void meta_exp1(void);

int main(int argc, char *argv[])
{
	FILE *input;
	int length;

	if (argc < 3) {
		fprintf(stderr, "usage: meta <input> <output>\n");
		exit(1);
	}

	// open input and output
	input = fopen(argv[1], "r");
	if (input == NULL) {
		fprintf(stderr, "invalid input file\n");
		exit(1);
	}
	output = fopen(argv[2], "w");
	if (output == NULL) {
		fprintf(stderr, "invalid output file\n");
		exit(1);
	}
	// read entire input into source
	fseek(input, 0, SEEK_END);
	length = (int)ftell(input);
    fseek(input, 0, SEEK_SET);
	source = malloc(length + 1);
	fread(source, 1, length, input);
	source[length] = '\0';
	fclose(input);

	// initially we have empty token; token is never NULL
	token = malloc(1);
	token[0] = '\0';

	// run meta
	meta_program();
	shutdown();
	return 0;
}

/* end */
_______________________________________________
fonc mailing list
[email protected]
http://vpri.org/mailman/listinfo/fonc

Reply via email to