On Tue, 25 May 2004, Tom Jackson wrote:
> On Tue, 2004-05-25 at 08:20, Daniël Mantione wrote:
>
> > To those unknown to the OpenACS compiler; OpenACS has its own adp parser
> > which is way more powerfull than the AOLserver adp parser.
>
> Actually the OpenACS parser doesn't exist, ATS uses ns_register_adptag
> and ns_adp_parse. One big problem with this setup is that recusion is
> difficult, making it difficult to extend the templating system, but it
> works great for tags which do not need to parse their content.
Hmmm. Are we looking at the same code? I'm quite sure it converts the
.adp & .tcl into a Tcl script, which it then evaluates. That would be
contradictory with ns_adp_parse. Well, time to take a closer look at it,
> > What's the idea? Well, such a compiler would not be a very large piece of
> > Tcl code as far as I can see,
> Tcl is very limited in data structures. You would need to be able to map
> each PHP data structure to tcl.
Yes, that's the hard part. I think it should be possible to map most
datastructures, but for classes I don't know yet if that'll be possible.
> > Well, I was that enthousiastic of the idea that I started doing
> > programming, and within 3 hours (!) I programmed a working tokenizer for
> > PHP. A parser would be a bit more work of course, but it seems the idea is
> > very feasible.
>
> I've done a template tokenizer/parser/compiler for a tcl-like templating
> language to tcl code, I'd like to do the same thing with PHP. It would
> be interesting to see what a tokenizer for PHP looks like.
Here 'ya go :) It tokenizes already a lot of php-files correctly, but
there will of course certainly be bugs.
Dani�l
#!/usr/bin/tclsh
#
#This script should become a Php -> Tcl compiler
array set keywords {
__CLASS__ _CLASS
__FILE__ _FILE
__FUNCTION _FUNCTION
__LINE__ _LINE
__METHOD _METHOD
exception EXCEPTION
and AND
array ARRAY
as AS
break BREAK
case CASE
cfunction CFUNCTION
class CLASS
const CONST
continue CONTINUE
declare DECLARE
default DEFAULT
die DIE
do DO
else ELSE
elseif ELSEIF
empty EMPTY
enddeclare ENDDECLARE
endfor ENDFOR
endforeach ENDFOREACH
endif ENDIF
endswitch ENDSWITCH
endwhile ENDWHILE
eval EVAL
exit EXIT
extends EXTENDS
for FOR
foreach FOREACH
function FUNCTION
global GLOBAL
if IF
include INCLUDE
include_once INCLUDEONCE
isset ISSET
list LIST
new NEW
old_function OLDFUNCTION
or OR
php_user_filter PHPUSERFILTER
print PRINT
require REQUIRE
require_once REQUIRE_ONCE
return RETURN
static STATIC
switch SWITCH
unset UNSET
use USE
var VAR
while WHILE
xor XOR
}
proc php_skip_whitespace {} {
global php ptr tokenendptr token
set c [string index $php $ptr]
while {[string match "\[ \t\n\r\]" $c]} {
incr ptr
set c [string index $php $ptr]
}
}
proc php_get_token {} {
global php ptr tokenendptr token endtoken keywords
php_skip_whitespace
switch -glob -- [string range $php $ptr [expr $ptr + 1]] {
"\\?>" {
# End of chunk
if {$endtoken == "?>"} then {
set token END_OF_CHUNK
set tokenendptr [expr $ptr + 2]
}
}
"%>" {
# End of chunk
if {$endtoken == "%>"} then {
set token END_OF_CHUNK
set tokenendptr [expr $ptr + 2]
}
}
"//" {
# Comment, skip until end of line
incr ptr 2
set c [string index $php $ptr]
while {$c != "\n"} {
incr ptr
set c [string index $php $ptr]
}
incr ptr
# Recurse to get really token (might be whitespace & comment again).
php_get_token
}
"/\\*" {
# Comment, skip until "*/"
incr ptr 2
set s [string range $php $ptr [expr $ptr + 1]]
while {$s != "*/"} {
incr ptr
set s [string range $php $ptr [expr $ptr + 1]]
}
incr ptr 2
# Recurse to get really token (might be whitespace & comment again).
php_get_token
}
"'*" {
# Literal string
set tokenendptr $ptr
incr tokenendptr
set c [string index $php $tokenendptr]
set ident ""
while {$c != "'"} {
append ident $c
incr tokenendptr
set c [string index $php $tokenendptr]
}
incr tokenendptr
set token [list LSTRING $ident]
}
"\"*" {
# Expandable string
set tokenendptr $ptr
incr tokenendptr
set c [string index $php $tokenendptr]
if {$c == "\\"} then {
incr tokenendptr
set c [string index $php $tokenendptr]
append ident $c
incr tokenendptr
set c [string index $php $tokenendptr]
}
set ident ""
while {$c != "\""} {
append ident $c
incr tokenendptr
set c [string index $php $tokenendptr]
if {$c == "\\"} then {
incr tokenendptr
set c [string index $php $tokenendptr]
append ident $c
incr tokenendptr
set c [string index $php $tokenendptr]
}
}
incr tokenendptr
set token [list ESTRING $ident]
}
"(*" {
set token LPAR
set tokenendptr [expr $ptr + 1]
}
")*" {
set token RPAR
set tokenendptr [expr $ptr + 1]
}
";*" {
set token SEMICOLON
set tokenendptr [expr $ptr + 1]
}
",*" {
set token COMMA
set tokenendptr [expr $ptr + 1]
}
"=*" {
set token ASSIGN
set tokenendptr [expr $ptr + 1]
}
"+*" {
set token PLUS
set tokenendptr [expr $ptr + 1]
}
"-*" {
set token MINUS
set tokenendptr [expr $ptr + 1]
}
"\\**" {
set token TIMES
set tokenendptr [expr $ptr + 1]
}
"/*" {
set token DIV
set tokenendptr [expr $ptr + 1]
}
".*" {
set token CONCAT
set tokenendptr [expr $ptr + 1]
}
"==" {
set token EQUAL
set tokenendptr [expr $ptr + 2]
}
"!=" {
set token UNEQUAL
set tokenendptr [expr $ptr + 2]
}
"<*" {
set token LT
set tokenendptr [expr $ptr + 1]
}
">*" {
set token GT
set tokenendptr [expr $ptr + 1]
}
"<=" {
set token LTE
set tokenendptr [expr $ptr + 2]
}
">=" {
set token GTE
set tokenendptr [expr $ptr + 2]
}
"\\?*" {
set token IFEXPR
set tokenendptr [expr $ptr + 1]
}
"{*" {
set token LBRACE
set tokenendptr [expr $ptr + 1]
}
"}*" {
set token RBRACE
set tokenendptr [expr $ptr + 1]
}
"\\\[*" {
set token LBRACKET
set tokenendptr [expr $ptr + 1]
}
"\\\]*" {
set token RBRACKET
set tokenendptr [expr $ptr + 1]
}
"$*" {
#Variable
set ident ""
set tokenendptr $ptr
incr tokenendptr
set c [string index $php $tokenendptr]
while {![regexp "\[\\\[\\\];(){},. \t\n\r\]" $c]} {
append ident $c
incr tokenendptr
set c [string index $php $tokenendptr]
}
set token [list VARIABLE $ident]
}
default {
#Identifier or keyword
set ident ""
set tokenendptr $ptr
set c [string index $php $tokenendptr]
while {![regexp "\[\\\[\\\];(){},. \t\n\r\]" $c]} {
append ident $c
incr tokenendptr
set c [string index $php $tokenendptr]
}
if {$ident == ""} then {
set token [list ERROR "Invalid character '$c'."]
return
}
if [string is integer $ident] then {
# PHP integer notation is exactly the same as for TCL
set token [list INTEGER $ident]
} else {
if [info exists keywords($ident)] then {
set token [list $keywords($ident)]
} else {
set token [list IDENT $ident]
}
}
}
}
}
proc php_init_tokenizer {} {
global php ptr tokenendptr token
set tokenendptr $ptr
php_get_token
}
proc php_tokenize_chunk {} {
global php ptr token tokenendptr
php_init_tokenizer
set parsed ""
while {$token != "END_OF_CHUNK" && $token != "ERROR"} {
puts $token
lappend parsed $token
set ptr $tokenendptr
php_get_token
}
set ptr $tokenendptr
return $parsed
}
proc php_php2chunks {phpcode} {
global php ptr endtoken
set php $phpcode
set result ""
set ptr 0
set chunk ""
set state "notag"
while {$ptr < [string length $php]} {
switch -glob -- [string range $php $ptr [expr $ptr + 4]] {
"<\\?php" {
lappend result [list html $chunk]
set chunk ""
set endtoken "?>"
set ptr [expr $ptr + 5]
lappend result [list code [php_tokenize_chunk]]
}
"<\\?*" {
lappend result [list html $chunk]
set chunk ""
set endtoken "?>"
set ptr [expr $ptr + 2]
lappend result [list code [php_tokenize_chunk]]
}
"<%=*" {
lappend result [list html $chunk]
set chunk ""
set endtoken "%>"
set ptr [expr $ptr + 3]
lappend result [list expr [php_tokenize_chunk]]
}
"<%*" {
lappend result [list html $chunk]
set chunk ""
set endtoken "%>"
set ptr [expr $ptr + 2]
lappend result [list code [php_tokenize_chunk]]
}
"<*" {
incr ptr
append chunk "<"
set c [string index $php $ptr]
while {$c != ">"} {
if {$c == "\""} then {
append chunk $c
incr ptr
set c [string index $php $ptr]
while {$c != "\""} {
append chunk $c
incr ptr
set c [string index $php $ptr]
}
}
if {$c == "'"} then {
append chunk $c
incr ptr
set c [string index $php $ptr]
while {$c != "'"} {
append chunk $c
incr ptr
set c [string index $php $ptr]
}
}
append chunk $c
incr ptr
set c [string index $php $ptr]
}
}
}
set c [string index $php $ptr]
append chunk $c
incr ptr
}
lappend result [list html $chunk]
}
proc process_chunks {chunks} {
foreach chunk $chunks {
foreach {type data} $chunk {}
switch $type {
"html" {
puts "ns_puts {[string map "{ \\{ } \\}" $data]}"
}
"code" {
puts "code chunk"
}
"expr" {
puts "expr chunk"
}
}
}
}
proc php_compile {php} {
set chunks [php_php2chunks $php]
puts $chunks
process_chunks $chunks
}
if {$argc != 1} then {
puts file2 "Usage: phptcl <filename.php>"
} else {
set f [open [lindex $argv 0] r]
set php [read $f]
close $f
php_compile $php
}
--
AOLserver - http://www.aolserver.com/
To Remove yourself from this list, simply send an email to <[EMAIL PROTECTED]> with the
body of "SIGNOFF AOLSERVER" in the email message. You can leave the Subject: field of
your email blank.