[this is a resent because the mail was dropped somewhere before reaching the 
mailing list]

Hi,

I wanted to use re2c to implement some matching like functionality in a buffer 
which is defined by its location in memory and length (so, the length is not 
detectable by some symbol after the content of the buffer). The matching 
should find some regular expression. The matching can be for example a string 
in the middle of the buffer e.g. ".*foobar". Or two strings separated by some 
other values e.g. ".*foo.*bar". The limitations can also be some ranges 
".*bar[0-9]+foo". I would expect a buffer to be filled with "Bbar21fooXXXX" to 
return that it matched against ".*bar[0-9]+foo"

re2c seemed to be to implement something like this without sacrificing too 
much speed. Unfortunately, my first approach doesn't seem to work and says 
YYFILL called for all strings.

I am currently completely baffled by the result and unsure how to proceed. 
I've attached the original testcase (test.re) and a simpler form without the 
any expressions (simple.re). Only "veryloneteststring" matches in the simple 
example, but also "foo" and "fooX" should match.

Also the code taken from the comments of bug 2407218 (which say how the user 
problem could be resolved) doesn't seem to work for me. The example is 
attached and "a" or "abbbb" don't match and instead have as return code that 
YYFILL was called.

My version of re2c is re2c 0.13.5 (0.13.5-1 from Debian)
-- 
Franz Schrober
// re2c --case-insensitive -i test.re > test.c && gcc test.c -o test && ./test

#include <stdio.h>
#include <string.h>


int matcher(char *cur, size_t len)
{
#define matcher_stop(n) { return 72; }

	char *marker;
	char *cur_end = cur + len;

/*!re2c
	re2c:define:YYCTYPE  = "char";
	re2c:define:YYCURSOR = cur;
	re2c:define:YYMARKER = marker;
	re2c:define:YYLIMIT = cur_end ;
	re2c:define:YYFILL = matcher_stop;
	re2c:yyfill:enable   = 1;
	any	= [\000-\377];


	any*"foo"			{ return 1; }
	"bar"			{ return 2; }
	any*"veryloneteststring"	{ return 3; }
	any*"split"any*"ted"		{ return 4; }
	"blob"				{ return 5; }
	[^]             	{ return 99;}
*/
#undef matcher_stop
}

char *match2string(int id)
{
	switch (id) {
		case 72:
			return "YYFILL called";
		case 1:
			return "foo";
		case 2:
			return "bar";
		case 3:
			return "veryloneteststring";
		case 4:
			return "split.*ted";
		case 5:
			return "blob";
		case 99:
			return "End reached without match";
		default:
			return "unknown id";
	}
}

int main()
{
	char *input[] = {"foo", "bar", "XXXXXXXfoo", "veryloneteststring", "Test-split: supported", "middle-foo-bar", "blobX", "blob", NULL};
	size_t i;
	int ret;

	for (i = 0; input[i]; i++) {
		ret = matcher(input[i], strlen(input[i]));

		printf("parser says for %s %d (%s)\n", input[i], ret, match2string(ret));
	}

	return 0;
}
// re2c --case-insensitive -i simple.re > simple.c && gcc simple.c -o simple && ./simple

#include <stdio.h>
#include <string.h>


int matcher(char *cur, size_t len)
{
#define matcher_stop(n) { return 72; }

	char *marker;
	char *cur_end = cur + len;

/*!re2c
	re2c:define:YYCTYPE  = "char";
	re2c:define:YYCURSOR = cur;
	re2c:define:YYMARKER = marker;
	re2c:define:YYLIMIT = cur_end ;
	re2c:define:YYFILL = matcher_stop;
	re2c:yyfill:enable   = 1;
	any	= [\000-\377];


	"foo"			{ return 1; }
	"veryloneteststring"	{ return 3; }
	[^]             	{ return 99;}
*/
#undef matcher_stop
}

char *match2string(int id)
{
	switch (id) {
		case 72:
			return "YYFILL called";
		case 1:
			return "foo";
		case 3:
			return "veryloneteststring";
		case 99:
			return "End reached without match";
		default:
			return "unknown id";
	}
}

int main()
{
	char *input[] = {"foo", "XXXXXXXfoo", "veryloneteststring", "fooX", NULL};
	size_t i;
	int ret;

	for (i = 0; input[i]; i++) {
		ret = matcher(input[i], strlen(input[i]));

		printf("parser says for %s %d (%s)\n", input[i], ret, match2string(ret));
	}

	return 0;
}
// re2c --case-insensitive -i bug_2407218.re > bug_2407218.c && gcc bug_2407218.c -o bug_2407218 && ./bug_2407218

#include <stdio.h>
#include <string.h>


int matcher(char *cur, size_t len)
{
#define matcher_stop(n) { return 3; }

	char *marker;
	char *cur_end = cur + len;

/*!re2c
	re2c:define:YYCTYPE  = "char";
	re2c:define:YYCURSOR = cur;
	re2c:define:YYMARKER = marker;
	re2c:define:YYLIMIT = cur_end ;
	re2c:define:YYFILL = matcher_stop;
	re2c:yyfill:enable   = 1;

	"a" { return 1; }
	"b"{6} { return 2; }
	[^] { return 4; }
*/
#undef matcher_stop
}

char *match2string(int id)
{
	switch (id) {
		case 1:
			return "a";
		case 2:
			return "bbbbbb";
		case 3:
			return "not enough input";
		case 4:
			return "anything else";
		default:
			return "unknown id";
	}
}

int main()
{
	char *input[] = {"a", "aaaaaaa", "bbbbbb", "bababa", "abbbb", NULL};
	size_t i;
	int ret;

	for (i = 0; input[i]; i++) {
		ret = matcher(input[i], strlen(input[i]));

		printf("parser says for %s %d (%s)\n", input[i], ret, match2string(ret));
	}

	return 0;
}
------------------------------------------------------------------------------
Try New Relic Now & We'll Send You this Cool Shirt
New Relic is the only SaaS-based application performance monitoring service 
that delivers powerful full stack analytics. Optimize and monitor your
browser, app, & servers with just a few lines of code. Try New Relic
and get this awesome Nerd Life shirt! http://p.sf.net/sfu/newrelic_d2d_may
_______________________________________________
Re2c-general mailing list
Re2c-general@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/re2c-general

Reply via email to