Hello everyone,
I was very impressed with Val Schorre's META-II paper that Dr. Kay gave me
to read, so I built a version of it for C; the metacircular part of which
can fit in a half of a sheet of A4 or Letter paper. Here it is:
.syntax meta
arg = '$' <'emit_token();'>
| .string <'emit(' $ ');'>;
output = '<' *arg '>' <'emit_nl();'>;
exp3 = .id <'meta_' $ '();'>
| .string <'read_literal(' $ ');'>
| '.id' <'read_id();'>
| '.number' <'read_number();'>
| '.string' <'read_string();'>
| '(' exp1 ')'
| '.e' <'test_flag = 1;'>
| '*' <'do {'>
exp3 <'} while (test_flag);'>
<'test_flag = 1;'>;
exp2 = ( exp3 <'if (test_flag) {'>
| output <'if (1) {'> )
*( exp3 <'error_if_false();'>
| output )
<'}'>;
exp1 = <'do {'> exp2
*( '|' <'if (test_flag) { break; }'> exp2 )
<'} while (0);'>;
stat = .id <'void meta_' $ '(void)'>
<'{'>
'=' exp1 ';'
<'}'>;
program = '.syntax' .id <'#include "support.h"'>
*stat
'.end';
.end
It can also be found at https://gist.github.com/4129159
Attached are the bootstrapping C code and support files. Error recovery is
so far non-existent and there may be bugs lurking in there. Nevertheless, I
wanted to share this with everyone. (This code is also in the public
domain.)
Best,
Long Nguyen
#include "support.h"
void meta_arg(void)
{
do {
read_literal("$");
if (test_flag) {
emit("emit_token();");
emit_nl();
}
if (test_flag) { break; }
read_string();
if (test_flag) {
emit("emit(");
emit_token();
emit(");");
emit_nl();
}
} while (0);
}
void meta_output(void)
{
do {
read_literal("<");
if (test_flag) {
do {
meta_arg();
} while (test_flag);
test_flag = 1;
error_if_false();
read_literal(">");
error_if_false();
emit("emit_nl();");
emit_nl();
}
} while (0);
}
void meta_exp3(void)
{
do {
read_id();
if (test_flag) {
emit("meta_");
emit_token();
emit("();");
emit_nl();
}
if (test_flag) { break; }
read_string();
if (test_flag) {
emit("read_literal(");
emit_token();
emit(");");
emit_nl();
}
if (test_flag) { break; }
read_literal(".id");
if (test_flag) {
emit("read_id();");
emit_nl();
}
if (test_flag) { break; }
read_literal(".number");
if (test_flag) {
emit("read_number();");
emit_nl();
}
if (test_flag) { break; }
read_literal(".string");
if (test_flag) {
emit("read_string();");
emit_nl();
}
if (test_flag) { break; }
read_literal("(");
if (test_flag) {
meta_exp1();
error_if_false();
read_literal(")");
error_if_false();
}
if (test_flag) { break; }
read_literal(".e");
if (test_flag) {
emit("test_flag = 1;");
emit_nl();
}
if (test_flag) { break; }
read_literal("*");
if (test_flag) {
emit("do {");
emit_nl();
meta_exp3();
error_if_false();
emit("} while (test_flag);");
emit_nl();
emit("test_flag = 1;");
emit_nl();
}
} while (0);
}
void meta_exp2(void)
{
do {
// open
do {
meta_exp3();
if (test_flag) {
emit("if (test_flag) {");
emit_nl();
}
if (test_flag) { break; }
meta_output();
if (test_flag) {
emit("if (1) {");
emit_nl();
}
} while (0);
// close
if (test_flag) {
do {
// open
do {
meta_exp3();
if (test_flag) {
emit("error_if_false();");
emit_nl();
}
if (test_flag) { break; }
meta_output();
if (test_flag) {
}
} while (0);
// close
} while (test_flag);
test_flag = 1;
error_if_false();
emit("}");
emit_nl();
}
} while (0);
}
void meta_exp1(void)
{
do {
emit("do {");
emit_nl();
if (1) {
meta_exp2();
error_if_false();
do {
// open
do {
read_literal("|");
if (test_flag) {
emit("if (test_flag) { break; }");
emit_nl();
meta_exp2();
error_if_false();
}
} while (0);
// close
} while (test_flag);
test_flag = 1;
error_if_false();
emit("} while (0);");
emit_nl();
}
} while (0);
}
void meta_stat(void)
{
do {
read_id();
if (test_flag) {
emit("void meta_");
emit_token();
emit("(void)");
emit_nl();
emit("{");
emit_nl();
read_literal("=");
error_if_false();
meta_exp1();
error_if_false();
read_literal(";");
error_if_false();
emit("}");
emit_nl();
}
} while (0);
}
void meta_program(void)
{
do {
read_literal(".syntax");
if (test_flag) {
read_id();
error_if_false();
emit("#include \"support.h\"");
emit_nl();
do {
meta_stat();
} while (test_flag);
test_flag = 1;
error_if_false();
read_literal(".end");
error_if_false();
}
} while (0);
}
// end
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
FILE *output = NULL;
char *source = NULL;
int pos = 0;
char *token = NULL;
int test_flag = 0;
void skip_whitespace(void)
{
while (source[pos] == '\x20' || source[pos] == '\t' ||
source[pos] == '\r' || source[pos] == '\n') {
pos++;
}
}
void make_token(int start_pos)
{
int length = pos - start_pos;
free(token);
token = malloc(length + 1);
token[length] = '\0';
memcpy(token, &source[start_pos], length);
}
void shutdown(void)
{
fclose(output);
free(source);
free(token);
}
// emits the currently recognized token
void emit_token(void)
{
int i;
// strings are converted to C format
if (token[0] == '\'') {
fprintf(output, "\"");
for (i = 1; token[i] != '\0' && token[i] != '\''; i++) {
switch (token[i]) {
case '\"':
fprintf(output, "\\\"");
break;
case '\\':
fprintf(output, "\\\\");
break;
default:
fprintf(output, "%c", token[i]);
break;
}
}
fprintf(output, "\"");
return;
}
// if token is not a string, emit as-is
fprintf(output, "%s", token);
}
void emit(const char *str)
{
fprintf(output, "%s", str);
}
void emit_nl(void)
{
fprintf(output, "\n");
}
void read_literal(const char *literal)
{
int entry_pos;
int i;
skip_whitespace();
// compare source with the literal
entry_pos = pos;
i = 0;
while (source[pos] != '\0' && literal[i] != '\0' &&
source[pos] == literal[i]) {
pos++;
i++;
}
// if the end of the literal has been reached, comparison successful
if (literal[i] == '\0') {
// copy recognized literal into token
make_token(entry_pos);
test_flag = 1;
} else {
// rewind
pos = entry_pos;
test_flag = 0;
}
}
void read_id(void)
{
int entry_pos;
skip_whitespace();
// recognize initial alphabetic character
entry_pos = pos;
if (('A' <= source[pos] && source[pos] <= 'A') ||
('a' <= source[pos] && source[pos] <= 'z')) {
pos++;
test_flag = 1;
} else {
test_flag = 0;
return;
}
// recognize alphanumeric characters
while (('A' <= source[pos] && source[pos] <= 'A') ||
('a' <= source[pos] && source[pos] <= 'z') ||
('0' <= source[pos] && source[pos] <= '9')) {
pos++;
}
// copy recognized identifier into token
make_token(entry_pos);
}
void read_number(void)
{
int entry_pos;
skip_whitespace();
// recognize initial numeric character
entry_pos = pos;
if ('0' <= source[pos] && source[pos] <= '9') {
pos++;
test_flag = 1;
} else {
test_flag = 0;
return;
}
// recognize subsequent numeric characters
while ('0' <= source[pos] && source[pos] <= '9') {
pos++;
}
// copy recognized number into token
make_token(entry_pos);
}
void read_string(void)
{
int entry_pos;
skip_whitespace();
// recognize initial single quote
entry_pos = pos;
if (source[pos] == '\'') {
pos++;
// test_flag is not set as recognition can still fail
} else {
test_flag = 0;
return;
}
// recognize contents
while (source[pos] != '\0' && source[pos] != '\'') {
pos++;
}
// recognize final single quote
if (source[pos] == '\'') {
pos++;
// copy recognized string into token
make_token(entry_pos);
test_flag = 1;
} else if (source[pos] == '\0') {
// rewind
pos = entry_pos;
test_flag = 0;
}
}
void error_if_false(void)
{
if (!test_flag) {
fprintf(stderr, "error\n");
shutdown();
exit(1);
}
}
void meta_program(void);
void meta_exp1(void);
int main(int argc, char *argv[])
{
FILE *input;
int length;
if (argc < 3) {
fprintf(stderr, "usage: meta <input> <output>\n");
exit(1);
}
// open input and output
input = fopen(argv[1], "r");
if (input == NULL) {
fprintf(stderr, "invalid input file\n");
exit(1);
}
output = fopen(argv[2], "w");
if (output == NULL) {
fprintf(stderr, "invalid output file\n");
exit(1);
}
// read entire input into source
fseek(input, 0, SEEK_END);
length = (int)ftell(input);
fseek(input, 0, SEEK_SET);
source = malloc(length + 1);
fread(source, 1, length, input);
source[length] = '\0';
fclose(input);
// initially we have empty token; token is never NULL
token = malloc(1);
token[0] = '\0';
// run meta
meta_program();
shutdown();
return 0;
}
/* end */
_______________________________________________
fonc mailing list
[email protected]
http://vpri.org/mailman/listinfo/fonc