For amusement, I was having a go at the Python Challenge (http://www.pythonchallenge.com/) in Smalltalk. That made me thing that our regex implementation really needs to be able to capture sub-expressions, so here is a patch that exposes them.

The reason for passing the C structure to Smalltalk is that it allows the user to access it directly as an optimization, and also to decide that they would prefer the results in something other than a Dictionary of Intervals.

Incidentally, the fact that both regex implementations are in the examples directory suggests that they are not 'official' packages. Could we not elevate one to 'official' status?

Regards to all,

Mike
Common subdirectories: smalltalk-2.1g-orig/examples/.deps and smalltalk-2.1g/examples/.deps
Common subdirectories: smalltalk-2.1g-orig/examples/.libs and smalltalk-2.1g/examples/.libs
diff -u smalltalk-2.1g-orig/examples/re.c smalltalk-2.1g/examples/re.c
--- smalltalk-2.1g-orig/examples/re.c	2003-09-04 05:48:58.000000000 +0000
+++ smalltalk-2.1g/examples/re.c	2005-05-27 07:22:26.586941880 +0000
@@ -74,6 +74,9 @@
 static int reh_search (OOP srcOOP, OOP patternOOP, int from, int to),
 reh_match (OOP srcOOP, OOP patternOOP, int from, int to);
 
+static struct pre_registers *reh_search_full (OOP srcOOP, OOP patternOOP, int from, int to);
+static void reh_free_registers(struct pre_registers *regs);
+
 static RegexCacheEntry cache[REGEX_CACHE_SIZE];
 
 /* Smalltalk globals */
@@ -252,6 +255,38 @@
   return res + 1;
 }
 
+/* Search helper function */
+struct pre_registers *
+reh_search_full (OOP srcOOP, OOP patternOOP, int from, int to)
+{
+  int res = 0;
+  const char *src;
+  struct pre_pattern_buffer *regex;
+  struct pre_registers *regs;
+  RegexCaching caching;
+
+  caching = lookupRegex (patternOOP, &regex);
+  if (caching != REGEX_CACHE_HIT && compileRegex (patternOOP, regex) != NULL)
+    return NULL;
+
+  /* now search */
+  src = &STRING_OOP_AT (OOP_TO_OBJ (srcOOP), 1);
+  regs = (struct pre_registers *) calloc (1, sizeof (struct pre_registers));
+  res = pre_search (regex, src, to, from - 1, to - from + 1, regs);
+
+  if (caching == REGEX_NOT_CACHED)
+    pre_free_pattern (regex);
+
+  return regs;
+}
+
+void 
+reh_free_registers(struct pre_registers *regs)
+{
+	pre_free_registers(regs);
+	free(regs);
+}
+
 /* Match helper function */
 int
 reh_match (OOP srcOOP, OOP patternOOP, int from, int to)
@@ -280,6 +315,8 @@
 {
   vmProxy = proxy;
   vmProxy->defineCFunc ("reh_search", reh_search);
+  vmProxy->defineCFunc ("reh_search_full", reh_search_full);
+  vmProxy->defineCFunc ("reh_free_registers", reh_free_registers);
   vmProxy->defineCFunc ("reh_match", reh_match);
   vmProxy->defineCFunc ("reh_make_cacheable", reh_make_cacheable);
 
Files smalltalk-2.1g-orig/examples/re.o and smalltalk-2.1g/examples/re.o differ
diff -u smalltalk-2.1g-orig/examples/regex.la smalltalk-2.1g/examples/regex.la
--- smalltalk-2.1g-orig/examples/regex.la	2004-11-02 21:07:15.000000000 +0000
+++ smalltalk-2.1g/examples/regex.la	2005-05-27 07:22:30.489348624 +0000
@@ -32,4 +32,4 @@
 dlpreopen=''
 
 # Directory that this library needs to be installed in:
-libdir='/usr/local/lib/smalltalk'
+libdir='/usr/lib/smalltalk'
diff -u smalltalk-2.1g-orig/examples/regex.st smalltalk-2.1g/examples/regex.st
--- smalltalk-2.1g-orig/examples/regex.st	2003-09-04 05:48:58.000000000 +0000
+++ smalltalk-2.1g/examples/regex.st	2005-05-27 07:25:22.609182432 +0000
@@ -68,7 +68,31 @@
 	defineCFunc: 'reh_match'
 	withSelectorArgs: 'lengthOfRegexMatch: pattern from: from to: to'
 	returning: #int
-	args: #(#selfSmalltalk #smalltalk #int #int)!
+	args: #(#selfSmalltalk #smalltalk #int #int)
+!
+
+CStruct subclass: #CPreRegisters
+        declaration: #( (#allocated #int)
+						(#numRegs #int)
+						(#beg (#ptr #int))
+						(#end (#ptr #int)) )
+        classVariableNames: ''
+        poolDictionaries: ''
+        category: 'Regex'
+!
+
+String
+	defineCFunc: 'reh_search_full'
+	withSelectorArgs: 'searchRegexFull: pattern from: from to: to'
+	returning: CPreRegisters type
+	args: #(#selfSmalltalk #smalltalk #int #int).
+
+String
+	defineCFunc: 'reh_free_registers'
+	withSelectorArgs: 'freeCPreRegisters: regs'
+	returning: #void
+	args: #(#cObject).
+!
 
 "--------------------------------------------------------------------------"
 
@@ -379,5 +403,21 @@
 tokenize
 
     ^self tokenize: '[\n\t ]+' from: 1 to: self size
-! !
+! 
+
+regexSubExprs: pattern from: from to: to
+	| cregs regs |
+	cregs := self searchRegexFull: pattern from: from to: to.
+	[	cregs beg value value >= 0 ifFalse: [ ^nil ].
+		regs := Dictionary new.
+		0 to: cregs numRegs value - 1 do:
+			[ :i | 
+			regs at: i put:
+				((cregs beg value + i) value + 1 
+					to: (cregs end value + i) value + 1) ]. 
+		^regs ]
+		ensure:
+		[ self freeCPreRegisters: cregs. ].
+!
+!
 
_______________________________________________
help-smalltalk mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/help-smalltalk

Reply via email to