cvsuser 04/11/19 05:53:40
Modified: compilers/p6ge Makefile p6ge.h p6ge_gen.c p6ge_parse.c
p6ge_parsep5.c
Log:
Changed malloc.h to stdlib.h.
Removed unused expression type strings.
Added test for .*? at beginning of pattern.
Revision Changes Path
1.2 +1 -0 parrot/compilers/p6ge/Makefile
Index: Makefile
===================================================================
RCS file: /cvs/public/parrot/compilers/p6ge/Makefile,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Makefile 18 Nov 2004 22:34:52 -0000 1.1
+++ Makefile 19 Nov 2004 13:53:40 -0000 1.2
@@ -1,5 +1,6 @@
PARROT = ../..
CFLAGS = -I $(PARROT)/include
+
p6ge.so: p6ge_parse.o p6ge_gen.o p6ge_parsep5.o
$(CC) -shared -fpic p6ge_parse.o p6ge_gen.o p6ge_parsep5.o -o p6ge.so
1.2 +1 -1 parrot/compilers/p6ge/p6ge.h
Index: p6ge.h
===================================================================
RCS file: /cvs/public/parrot/compilers/p6ge/p6ge.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- p6ge.h 18 Nov 2004 22:41:04 -0000 1.1
+++ p6ge.h 19 Nov 2004 13:53:40 -0000 1.2
@@ -8,6 +8,7 @@
#define P6GE_MAX_LITERAL_LEN 128
typedef enum {
+ P6GE_NULL_PATTERN,
P6GE_PATTERN_END, P6GE_DOT, P6GE_LITERAL,
P6GE_CONCAT, P6GE_GROUP, P6GE_ALT,
P6GE_ANCHOR_BOS, P6GE_ANCHOR_EOS,
@@ -17,7 +18,6 @@
enum { ctliteral=0x00, ctmeta=0x01, ctspace=0x02, ctket=0x04, ctquant=0x08 };
extern int p6ge_ctype[256];
extern int p6ge_cmeta[256];
-extern const char* p6ge_exp_s[];
typedef struct _P6GE_Text {
const unsigned char* text;
1.2 +34 -23 parrot/compilers/p6ge/p6ge_gen.c
Index: p6ge_gen.c
===================================================================
RCS file: /cvs/public/parrot/compilers/p6ge/p6ge_gen.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- p6ge_gen.c 18 Nov 2004 22:34:52 -0000 1.1
+++ p6ge_gen.c 19 Nov 2004 13:53:40 -0000 1.2
@@ -20,8 +20,8 @@
#include "p6ge.h"
#include "parrot/parrot.h"
-#include <malloc.h>
#include <stdarg.h>
+#include <stdlib.h>
static char* p6ge_cbuf = 0;
static int p6ge_cbuf_size = 0;
@@ -389,6 +389,7 @@
{
emitlcount();
switch (e->type) {
+ case P6GE_NULL_PATTERN: emit("R%d:\n", e->id); break;
case P6GE_PATTERN_END: p6ge_gen_pattern_end(e, succ); break;
case P6GE_DOT: p6ge_gen_dot(e, succ); break;
case P6GE_LITERAL: p6ge_gen_literal(e, succ); break;
@@ -445,6 +446,20 @@
return p6ge_cbuf;
}
+/* is_bos_anchored() returns true if an expression is anchored to the bos */
+static int
+is_bos_anchored(P6GE_Exp* e)
+{
+ switch (e->type) {
+ case P6GE_ANCHOR_BOS: return 1;
+ case P6GE_CONCAT:
+ return is_bos_anchored(e->exp1) || is_bos_anchored(e->exp2);
+ case P6GE_GROUP: return is_bos_anchored(e->exp1);
+ case P6GE_ALT: return is_bos_anchored(e->exp1) &&
is_bos_anchored(e->exp2);
+ }
+ return 0;
+}
+
/*
=item C<char* p6ge_p6rule_pir(const char* s)>
@@ -463,19 +478,17 @@
{
P6GE_Exp* e = 0;
P6GE_Exp* dot0 = 0;
- P6GE_Exp* group0 = 0;
- P6GE_Exp* end = 0;
char* pir = 0;
- dot0 = p6ge_parse_new(P6GE_DOT, 0, 0);
- dot0->min = 0; dot0->max = P6GE_INF; dot0->isgreedy = 0;
-
- group0 = p6ge_parse_new(P6GE_GROUP, p6ge_parse(s), 0);
-
- end = p6ge_parse_new(P6GE_PATTERN_END, 0, 0);
-
- e = p6ge_parse_new(P6GE_CONCAT, dot0,
- p6ge_parse_new(P6GE_CONCAT, group0, end));
+ e = p6ge_parse_new(P6GE_CONCAT,
+ p6ge_parse_new(P6GE_GROUP, p6ge_parse(s), 0),
+ p6ge_parse_new(P6GE_PATTERN_END, 0, 0));
+
+ if (!is_bos_anchored(e)) {
+ dot0 = p6ge_parse_new(P6GE_DOT, 0, 0);
+ dot0->min = 0; dot0->max = P6GE_INF; dot0->isgreedy = 0;
+ e = p6ge_parse_new(P6GE_CONCAT, dot0, e);
+ }
pir = p6ge_gen(e);
p6ge_parse_free(e);
@@ -501,19 +514,17 @@
{
P6GE_Exp* e = 0;
P6GE_Exp* dot0 = 0;
- P6GE_Exp* group0 = 0;
- P6GE_Exp* end = 0;
char* pir = 0;
- dot0 = p6ge_parse_new(P6GE_DOT, 0, 0);
- dot0->min = 0; dot0->max = P6GE_INF; dot0->isgreedy = 0;
-
- group0 = p6ge_parse_new(P6GE_GROUP, p6ge_parsep5(s), 0);
-
- end = p6ge_parse_new(P6GE_PATTERN_END, 0, 0);
-
- e = p6ge_parse_new(P6GE_CONCAT, dot0,
- p6ge_parse_new(P6GE_CONCAT, group0, end));
+ e = p6ge_parse_new(P6GE_CONCAT,
+ p6ge_parse_new(P6GE_GROUP, p6ge_parse(s), 0),
+ p6ge_parse_new(P6GE_PATTERN_END, 0, 0));
+
+ if (!is_bos_anchored(e)) {
+ dot0 = p6ge_parse_new(P6GE_DOT, 0, 0);
+ dot0->min = 0; dot0->max = P6GE_INF; dot0->isgreedy = 0;
+ e = p6ge_parse_new(P6GE_CONCAT, dot0, e);
+ }
pir = p6ge_gen(e);
p6ge_parse_free(e);
1.2 +1 -8 parrot/compilers/p6ge/p6ge_parse.c
Index: p6ge_parse.c
===================================================================
RCS file: /cvs/public/parrot/compilers/p6ge/p6ge_parse.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- p6ge_parse.c 18 Nov 2004 22:34:52 -0000 1.1
+++ p6ge_parse.c 19 Nov 2004 13:53:40 -0000 1.2
@@ -20,15 +20,8 @@
#include "p6ge.h"
#include <ctype.h>
-#include <malloc.h>
#include <stdio.h>
-
-const char* p6ge_exp_s[] = {
- "pattern end", "dot", "literal",
- "concat", "group", "alt",
- "^anchor", "anchor$",
- "^^anchor", "anchor$$"
-};
+#include <stdlib.h>
int p6ge_ctype[256];
int p6ge_cmeta[256];
1.2 +1 -1 parrot/compilers/p6ge/p6ge_parsep5.c
Index: p6ge_parsep5.c
===================================================================
RCS file: /cvs/public/parrot/compilers/p6ge/p6ge_parsep5.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- p6ge_parsep5.c 18 Nov 2004 22:34:52 -0000 1.1
+++ p6ge_parsep5.c 19 Nov 2004 13:53:40 -0000 1.2
@@ -20,8 +20,8 @@
#include "p6ge.h"
#include <ctype.h>
-#include <malloc.h>
#include <stdio.h>
+#include <stdlib.h>
static P6GE_Exp* p5re_parse_expr(P6GE_Text* t);