cvsuser 05/04/09 00:35:14
Modified: . MANIFEST
examples/assembly pcre.imc
runtime/parrot/library libpcre.imc pcre.imc
Added: t/library pcre.t
Log:
update pcre example + libraries + tests
i've attached a patch to update the pcre example, libraries, and added some
documentation and simple tests. notably, the pcre library now uses current
calling conventions, and is designed to work on win32 and unix.
Courtesy of Jerry Gay <[EMAIL PROTECTED]>
Revision Changes Path
1.863 +1 -0 parrot/MANIFEST
Index: MANIFEST
===================================================================
RCS file: /cvs/public/parrot/MANIFEST,v
retrieving revision 1.862
retrieving revision 1.863
diff -u -r1.862 -r1.863
--- MANIFEST 6 Apr 2005 14:02:51 -0000 1.862
+++ MANIFEST 9 Apr 2005 07:35:11 -0000 1.863
@@ -2947,6 +2947,7 @@
t/library/parrotlib.t []
t/library/perlhist.txt []
t/library/pge.t []
+t/library/pcre.t []
t/library/sort.t []
t/library/streams.t []
t/library/yaml_parser_syck.t []
1.1 parrot/t/library/pcre.t
Index: pcre.t
===================================================================
#! perl -w
# Copyright (C) 2001-2005 The Perl Foundation. All rights reserved.
# $Id: pcre.t,v 1.1 2005/04/09 07:35:12 leo Exp $
=head1 NAME
t/library/pcre.t - testing library/pcre.imc
=head1 SYNOPSIS
% perl -Ilib t/library/pcre.t
=head1 DESCRIPTION
This program tests whether the 'pcre.imc' library accesses
the installed PCRE library, and matches patterns successfully.
=cut
use strict;
use Parrot::Test tests => 1;
# if we keep pcre, we need a config test
my $has_pcre = Parrot::Test::_run_command("pcre-config --version",
STDERR => '/dev/null') == 0;
SKIP: {
skip("no pcre-config", Test::Builder->expected_tests()) unless $has_pcre;
## 1
pir_output_is( <<'CODE', <<'OUT', 'soup to nuts' );
.include 'library/pcre.imc'
.sub main @MAIN
.local pmc func
.local pmc lib
find_global func, 'PCRE', 'init'
isnull func, NOK1
branch OK1
NOK1:
print 'not '
OK1:
print "ok 1\n"
lib= func()
isnull lib, NOK2
branch OK2
NOK2:
print 'not '
OK2:
print "ok 2\n"
.local string s
.local string pat
s= '--a--'
pat= 'a'
.local pmc code
.local string error
.local int errptr
func= find_global 'PCRE', 'compile'
( code, error, errptr )= func( pat, 0 )
.local int is_code_defined
is_code_defined= defined code
if is_code_defined goto OK3
print 'not '
OK3:
print "ok 3\n"
.local int ok
.local pmc result
func= find_global 'PCRE', 'match'
( ok, result )= func( code, s, 0, 0 )
unless ok < 0 goto OK4
print 'not '
OK4:
print "ok 4\n"
.local int i
i= 0
.local string match
func= find_global 'PCRE', 'dollar'
match= func( s, ok, result, i )
if 'a' == match goto OK5
print 'not '
OK5:
print "ok 5\n"
.end
CODE
ok 1
ok 2
ok 3
ok 4
ok 5
OUT
}
1.9 +79 -54 parrot/examples/assembly/pcre.imc
Index: pcre.imc
===================================================================
RCS file: /cvs/public/parrot/examples/assembly/pcre.imc,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- pcre.imc 30 Jan 2004 11:45:07 -0000 1.8
+++ pcre.imc 9 Apr 2005 07:35:13 -0000 1.9
@@ -3,7 +3,7 @@
=head1 NAME
-examples/assembly/pcre.imc - String matching
+examples/assembly/pcre.imc - perl-compatible regular expressions
=head1 SYNOPSIS
@@ -12,73 +12,98 @@
=head1 DESCRIPTION
Experimental string matching with PCRE (L<http://www.pcre.org/>).
-
Note that PCRE must be installed for this to work.
+=head1 AUTHORS
+
+Original code by Leo Toetsch, updated by Jerry Gay
+E<lt>jerry dot gay at gmail dot com<gt>
+
=cut
-.pcc_sub _main prototyped
- .param pmc argv
-.include "library/pcre.imc"
- .sym int argc
- argc = argv
- if argc != 3 goto usage
-
- .sym string s
- .sym string pat
- s = argv[1]
- pat = argv[2]
- print s
- print " =~ /"
- print pat
- print "/\n"
- .sym pmc PCRE_LIB
- .PCRE_INIT(PCRE_LIB)
- .sym string error
- .sym int errptr
- .sym pmc code
- .PCRE_COMPILE(pat, 0, code, error, errptr)
- $I0 = defined code
- unless $I0 goto match_err
-
- .sym int ok
- .sym pmc result
- .PCRE_MATCH(code, s, 0, 0, ok, result)
- if ok < 0 goto nomatch
- print ok
- print " match(es):\n"
- .sym int i
- i = 0
- .sym string match
-lp: .PCRE_DOLLAR(s, ok, result, i, match)
+
+.include 'library/pcre.imc'
+
+
+.sub main @MAIN
+ .param pmc argv
+
+ .local int argc
+ argc= argv
+ if argc != 3 goto USAGE
+
+ .local pmc func
+ .local pmc lib
+
+ func= find_global 'PCRE', 'init'
+ lib= func()
+
+ .local string s
+ s= argv[1]
+ .local string pat
+ pat= argv[2]
+
+ print s
+ print " =~ /"
+ print pat
+ print "/\n"
+
+ .local pmc regex
+ .local string error
+ .local int errptr
+
+ func= find_global 'PCRE', 'compile'
+ ( regex, error, errptr )= func( pat, 0 )
+
+ .local int is_regex_defined
+ is_regex_defined= defined regex
+ unless is_regex_defined goto MATCH_ERR
+
+ .local int ok
+ .local pmc result
+
+ func= find_global 'PCRE', 'match'
+ ( ok, result )= func( regex, s, 0, 0 )
+
+ if ok < 0 goto NOMATCH
+ print ok
+ print " match(es):\n"
+ .local int i
+ i= 0
+ .local string match
+
+LP:
+ func= find_global 'PCRE', 'dollar'
+ match= func( s, ok, result, i )
print match
print "\n"
inc i
- if i < ok goto lp
+ if i < ok goto LP
end
-nomatch:
+
+NOMATCH:
print "no match\n"
- end
-match_err:
+ end
+
+MATCH_ERR:
print "error in regex: "
- #print error
print "at: '"
- length $I0, pat
- $I0 = $I0 - errptr
- substr $S0, pat, errptr, $I0
- print $S0
+ .local int pat_errloc
+ length pat_errloc, pat
+ pat_errloc = pat_errloc - errptr
+
+ .local string pattern_error
+ substr pattern_error, pat, errptr, pat_errloc
+ print pattern_error
print "'\n"
exit 1
-usage:
- .sym string prog
- prog = argv[0]
+
+USAGE:
+ .local string prog
+ prog= argv[0]
+ print 'usage: '
print prog
print " string pattern\n"
exit 1
.end
-=head1 SEE ALSO
-
-F<library/libpcre.imc>, F<library/pcre.imc>.
-
-=cut
1.2 +79 -99 parrot/runtime/parrot/library/libpcre.imc
Index: libpcre.imc
===================================================================
RCS file: /cvs/public/parrot/runtime/parrot/library/libpcre.imc,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- libpcre.imc 25 May 2004 18:06:26 -0000 1.1
+++ libpcre.imc 9 Apr 2005 07:35:14 -0000 1.2
@@ -1,132 +1,112 @@
=head1 TITEL
-libpcre.imc - pcre interface
+libpcre.imc - NCI interface to Perl-Compatible Regular Expression library
-=head1 SYNOPSIS
+=head1 DESCRIPTION
- .include "library/pcre.imc"
- .PCRE_INIT(lib)
- .PCRE_COMPILE(pattern, options, regex, error, errptr)
- .PCRE_MATCH(regex, string, nres, ovector)
- .PCRE_DOLLAR(string, ok, ovector, n, match)
+See 'library/pcre.imc' for details on the user interface.
-=head1 DESCRIPTION
+=cut
-Above macros call functions in libpcre.imc.
-=over 4
+.namespace ['PCRE::NCI']
-=item macro PCRE_INIT(var)
-Intialize the pcre library. The PMC B<var> is set to the library handle
-and additionally stored as global "pcre::lib".
+.sub compile prototyped
+ .param string pat
+ .param int options
-=item macro PCRE_COMPILE((pattern, options, regex, error, errptr)
+ .local string error
+ .local pmc PCRE_NCI_compile
+ .local int error_size
-Compile the string B<pattern> with int B<options>. Returns var B<regex>
-string B<error> and int B<errptr>.
+ .local pmc NULL
+ null NULL
-=cut
+ .local pmc errptr
+ errptr= new PerlInt
+ ## error message string size
+ error_size= 500
-.pcc_sub _pcre_compile prototyped
- .param string pat
- .param int options
+ ## allocate space in string for error message
+ repeat error, " ", error_size
+
+ PCRE_NCI_compile= find_global 'PCRE::NCI', 'PCRE_compile'
+
+ .local pmc code
+
+ code= PCRE_NCI_compile( pat, options, error, errptr, NULL )
+
+ .local int is_code_defined
+ is_code_defined = defined code
+ unless is_code_defined goto RETURN
- .sym pmc NULL
- .sym pmc code
- .sym string error
- .sym pmc COMPILE
- COMPILE = global "pcre::compile"
- repeat error, " ", 500 # could be enough
- .sym pmc errptr
- errptr = new PerlInt
- null NULL
- .pcc_begin prototyped
- .arg pat
- .arg options
- .arg error
- .arg errptr
- .arg NULL
- .nci_call COMPILE
- .result code
- .pcc_end
- $I0 = defined code
- unless $I0 goto nok
error = ""
-nok:
- .pcc_begin_return
- .return code
- .return error
- .return errptr
- .pcc_end_return
-.end
-=item macro PCRE_MATCH(regex, str, start, options, ok, res)
-Match the var B<regex> against string B<str> from int B<start> with
-int B<options>. Returns the number of matches in int B<ok> and the
-ovector in var B<res>.
+RETURN:
+ .return( code, error, errptr )
+.end
-=cut
-.pcc_sub _pcre_exec prototyped
+.sub exec prototyped
.param pmc regex
.param string s
.param int start
.param int options
- .sym int len
+
+ .local int len
length len, s
- .sym pmc NULL
+
+ .local pmc NULL
null NULL
- .sym pmc ovector
- ovector = new ManagedStruct
- ovector = 120 # 1/(2/3) * 4 * 2 * 10 for 10 result pairs
- .sym pmc EXEC # on 32 bit systems
- EXEC = global "pcre::exec"
- .pcc_begin prototyped
- .arg regex # p code
- .arg NULL # P extra
- .arg s # t subject
- .arg len # i length
- .arg start # i start
- .arg options
- .arg ovector # p ovector
- .arg 10 # i ovecsize
- .nci_call EXEC
- .local int ok
- .result ok
- .pcc_end
- .pcc_begin_return
- .return ok
- .return ovector
- .pcc_end_return
-.end
+ ## osize -- 1/(2/3) * 4 * 2
+ .local int osize
+ osize= 12
-=item macro PCRE_DOLLAR(str, ok, res, n, matched)
+ ## number of result pairs
+ .local int num_result_pairs
+ num_result_pairs= 10
-Extract from string B<str> the int B<n>th result into string B<match>.
+ .local int ovector_length
+ ovector_length= osize * num_result_pairs
-=cut
+ .local pmc ovector
+ ovector= new ManagedStruct
+ ovector= ovector_length
+
+ ## on 32 bit systems
+ .local pmc PCRE_NCI_exec
+ PCRE_NCI_exec = find_global 'PCRE::NCI', 'PCRE_exec'
+ .local int ok
+ ok= PCRE_NCI_exec( regex, NULL, s, len, start, options, ovector, 10 )
-.pcc_sub _pcre_result prototyped
+ .return( ok, ovector )
+.end
+
+
+.sub result prototyped
.param string s
.param int ok
.param pmc ovector
.param int n
- .sym string match
- match = ""
- if ok <= 0 goto nomatch
+ .local string match
+ match= ""
+ if ok <= 0 goto NOMATCH
.local int ovecs
.local int ovece
- .sym pmc struct
- struct = new SArray
- struct = 3
-.include "datatypes.pasm"
+
+ .local pmc struct
+ struct= new SArray
+ struct= 3
+
+ .include "datatypes.pasm"
+
struct[0] = .DATATYPE_INT
$I0 = ok * 2
struct[1] = $I0
@@ -137,16 +117,14 @@
inc $I0
ovece = ovector[0;$I0]
$I0 = ovece - ovecs
- if ovecs >= 0 goto m1
+ if ovecs >= 0 goto M1
match = ""
- goto m0
-m1:
+ goto M0
+M1:
substr match, s, ovecs, $I0
-m0:
-nomatch:
- .pcc_begin_return
- .return match
- .pcc_end_return
+M0:
+NOMATCH:
+ .return( match )
.end
=for todo
@@ -190,8 +168,10 @@
pcre(3)
-=head1 AUTHOR
+=head1 AUTHORS
-Leopold Toetsch
+Original code by Leo Toetsch, updated by Jerry Gay
+E<lt>jerry dot gay at gmail dot com<gt>
+=cut
1.2 +181 -53 parrot/runtime/parrot/library/pcre.imc
Index: pcre.imc
===================================================================
RCS file: /cvs/public/parrot/runtime/parrot/library/pcre.imc,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- pcre.imc 25 May 2004 18:06:26 -0000 1.1
+++ pcre.imc 9 Apr 2005 07:35:14 -0000 1.2
@@ -1,60 +1,188 @@
-.macro PCRE_INIT (var)
- loadlib .var, "libpcre"
- store_global "pcre::lib", .var
- load_bytecode "library/libpcre.imc"
- # pcre *pcre_compile(const char *pattern, int options,
- # const char **errptr, int *erroffset,
- # const unsigned char *tableptr
- dlfunc $P0, .var, "pcre_compile", "ptiB3P"
- store_global "pcre::compile", $P0
+=head1 TITLE
+
+pcre.imc - user interface to Perl-Compatible Regular Expression library
+
+=head1 SYNOPSIS
+
+ .include 'library/pcre.imc'
+ lib= pcre_init()
+
+ func= find_global 'PCRE', 'compile'
+ ( regex, error, errptr )= func( pat, options )
+
+ func= find_global 'PCRE', 'match'
+ ( ok, result )= func( regex, string, start, options )
+
+ func= find_global 'PCRE', 'dollar'
+ match= func( string, ok, result, i )
+
+=head1 DESCRIPTION
+
+This is the user interface to PCRE. Use this to initialize the library,
+compile regexes, match against strings, and return the results.
+All functions are found in the 'PCRE' namespace.
+
+The NCI interface is contained in libpcre.imc.
+
+=cut
+
+
+.namespace ['PCRE']
+
+
+=over 4
+
+=item sub init()
+
+Intialize the pcre library. The library handle is returned as a PMC
+and is additionally stored as global 'PCRE', 'lib'.
+
+=cut
+
+.sub init
+ .local pmc libpcre
+ .local pmc pcre_function
+ .local pmc config
+ .local string osname
+
+ config= _config()
+ osname= config['osname']
+
+ if 'MSWin32' == osname goto LIB_WIN32
+
+LIB_DEFAULT:
+ loadlib libpcre, 'libpcre'
+ branch LIB_LOADED
+
+LIB_WIN32:
+ loadlib libpcre, 'pcre'
+
+LIB_LOADED:
+ store_global 'PCRE', 'lib', libpcre
+
+ load_bytecode 'library/libpcre.imc'
+
+ # pcre *pcre_compile(const char *pattern, int options,
+ # const char **errptr, int *erroffset,
+ # const unsigned char *tableptr
+ dlfunc pcre_function, libpcre, 'pcre_compile', 'ptiB3P'
+ store_global 'PCRE::NCI', 'PCRE_compile', pcre_function
+
#int pcre_exec(const pcre *code, const pcre_extra *extra,
# const char *subject, int length, int startoffset,
# int options, int *ovector, int ovecsize);
- dlfunc $P0, .var, "pcre_exec", "ipPtiiipi"
- store_global "pcre::exec", $P0
+ dlfunc pcre_function, libpcre, 'pcre_exec', 'ipPtiiipi'
+ store_global 'PCRE::NCI', 'PCRE_exec', pcre_function
#int pcre_copy_substring(const char *subject, int *ovector,
# int stringcount, int stringnumber, char *buffer,
# int buffersize);
- dlfunc $P0, .var, "pcre_copy_substring", "itpiibi"
- store_global "pcre::copy_substring", $P0
-.endm
-
-.macro PCRE_COMPILE(pattern, options, regex, error, errptr)
- $P0 = global "pcre::compile"
- $P1 = global "_pcre_compile"
- .pcc_begin prototyped
- .arg .pattern
- .arg .options
- .pcc_call $P1
- .result .regex
- .result .error
- .result .errptr
- .pcc_end
-.endm
-
-.macro PCRE_MATCH(regex, str, start, options, ok, res)
- $P0 = global "pcre::exec"
- $P1 = global "_pcre_exec"
- .pcc_begin prototyped
- .arg .regex
- .arg .str
- .arg .start
- .arg .options
- .pcc_call $P1
- .result .ok
- .result .res
- .pcc_end
-.endm
-
-.macro PCRE_DOLLAR(str, ok, res, n, matched)
- $P1 = global "_pcre_result"
- .pcc_begin prototyped
- .arg .str
- .arg .ok
- .arg .res
- .arg .n
- .pcc_call $P1
- .result .matched
- .pcc_end
-.endm
+ dlfunc pcre_function, libpcre, 'pcre_copy_substring', 'itpiibi'
+ store_global 'PCRE::NCI', 'PCRE_copy_substring', pcre_function
+
+ .return( libpcre )
+.end
+
+
+=item sub ( regex, error, errptr )= compile( pattern, options )
+
+Compile the string B<pattern> with int B<options>.
+Returns pmc B<regex>, string B<error> and int B<errptr>.
+
+=cut
+
+.sub compile prototyped
+ .param string pattern
+ .param int options
+ .local pmc pcre_function
+
+ pcre_function= find_global 'PCRE::NCI', 'compile'
+
+ .local pmc regex
+ .local string error
+ .local int errptr
+
+ ( regex, error, errptr )= pcre_function( pattern, options )
+
+ .return( regex, error, errptr )
+.end
+
+
+=item sub ( ok, result )= match( regex, string, start, options )
+ sub match ()= match()
+
+Match the pmc B<regex> against string B<string> from int B<start> with
+int B<options>. Returns the number of matches in int B<ok> and the ovector
+in pmc B<result>.
+
+=cut
+
+.sub match prototyped
+ .param pmc regex
+ .param string str
+ .param int start
+ .param int options
+ .local pmc pcre_function
+
+ pcre_function= find_global 'PCRE::NCI', 'exec'
+
+ .local int ok
+ .local pmc res
+
+ ( ok, res )= pcre_function( regex, str, start, options )
+
+ .return( ok, res )
+.end
+
+
+=item sub match= dollar( string, ok, result, i )
+
+Extract from string B<string> the int B<i>th result into string B<match>.
+Returns the match.
+
+=cut
+
+.sub dollar prototyped
+ .param string str
+ .param int ok
+ .param pmc res
+ .param int n
+ .local pmc pcre_function
+
+ pcre_function= find_global 'PCRE::NCI', 'result'
+
+ .local string matched
+
+ matched= pcre_function( str, ok, res, n )
+
+ .return( matched )
+.end
+
+
+.include "library/config.imc"
+
+
+=head1 BUGS
+
+None known, but this hasn't been well tested. This interface
+is designed to work on all platforms where PCRE and parrot
+are supported, but has not been tested on all of them.
+Send bug reports to E<lt>[EMAIL PROTECTED]<gt>
+
+=cut
+
+=head1 FILES
+
+pcre.imc, libpcre.imc
+
+=head1 SEE ALSO
+
+pcre(3)
+
+=head1 AUTHORS
+
+Original code by Leo Toetsch, updated by Jerry Gay
+E<lt>jerry dot gay at gmail dot com<gt>
+
+=cut
+