"Dr.Ruud" schreef:

> Slight revision, that fails on the last line:

More assuming revision:

#!/usr/bin/perl
  use warnings ;
  use strict ;

  sub SOB      { '\A' }
  sub EOB      { '\z' }
  sub OR       { join '|', @_ }

  sub sp       { '[[:blank:]]+' }
  sub capture  { "(@_)" }
  sub optional { "(?:@_)?" }
  sub optimany { "(?:@_)*" }
  sub ungreedy { "@_?" }
  sub ahead    { "([EMAIL PROTECTED])" }

  sub REnumber { '\d+' }
  sub REword   { '\w+' }
  sub RElang   { '
(?:
a[ly]|b[gs]|cs|d[ae]|e[nst]|
f[ir]|gr|h[eruy]|it|ja|kk|lv|nl|
p[blt]|r[ou]|s[klqrv]|t[hr]|uk|zh)
' }

  sub REwordlist { REword
                 . ungreedy(optimany( sp . REword ))
                 . ahead(OR(sp,EOB))
                 }

  sub RElanglist { RElang . optimany( ',' . RElang ) }

  my $re = SOB
         . optional(capture(REnumber).sp)
         . capture(REwordlist)
         . optional(sp.capture(RElanglist))
         . optional(sp.capture(REnumber).'cd')
         . EOB ;

  print "re/$re/\n\n\n" ;

  my $qr = qr/ $re /x ;

  while ( <DATA> )
  {
    print "\n" ;
    print ;

    s/\A[[:blank:]]+// ;
    s/\s+\z// ;  # chomps as well
    s/[[:blank:]]+,[[:blank:]]*|,[[:blank:]]+/,/g ;

    { no warnings ;
      /$qr/ and print "($1) ($2) ($3) ($4)\n" ;
    }
  }

__DATA__
word
word word
word word word
1 word
1 word word word
1 word en,pt,sk
1 word en 1cd
1 word word en 1cd


-- 
Affijn, Ruud

"Gewoon is een tijger."



-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
<http://learn.perl.org/> <http://learn.perl.org/first-response>


Reply via email to