ovidiu 02/01/17 10:06:16 Modified: src/scratchpad/schecoon/scheme sitemap.scm Log: Get serious: process the SXML representation of the sitemap and generate an equivalent sitemap function definition. Revision Changes Path 1.3 +397 -95 xml-cocoon2/src/scratchpad/schecoon/scheme/sitemap.scm Index: sitemap.scm =================================================================== RCS file: /home/cvs/xml-cocoon2/src/scratchpad/schecoon/scheme/sitemap.scm,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- sitemap.scm 8 Jan 2002 23:18:52 -0000 1.2 +++ sitemap.scm 17 Jan 2002 18:06:16 -0000 1.3 @@ -4,103 +4,405 @@ ;; Date: December 12, 2001 ;; -;; Pipeline definition. -;; -;; A pipeline describes a list of operations to be performed usually -;; on a resource, usually an XML document. A pipeline starts with a -;; generator, followed by zero or more transformers, and a -;; serializer. Such a pipeline is used to process XML documents and -;; generate some other representation of the original document -;; (usually HTML, WML, PDF, SVG etc.). -;; -;; For resources which just need to be passed through without any -;; modification, the pipeline could have a single step, composed of a -;; reader. -;; -;; To facilitate reuse, pipelines can have names. Pipeline names have -;; to be unique, or an error is signaled. -;; -;; Pipelines are simply functions that accept arguments. These -;; arguments are usually passed from the sitemap, where they are -;; computed usually from the HTTP request. These functions should -;; accept a variable number of arguments. This is because the actual -;; values for the parameters are generated as result of the pattern -;; matching. -;; -;; Below is an example of how pipelines definition look in -;; Scheme. Another Scheme module is responsible to mapping the -;; external XML representation of the pipelines definition into this -;; internal one. -;; -;;(define pipelines -;; (define-pipelines -;; (define-pipeline docbook-xhtml (file . rest) -;; (generate file) -;; (transform '((type xslt) -;; (name "docbook2xhtml.xsl") -;; (parameter "view-source" (concat "docs/samples/" file)))) -;; (serialize (type xml)))) -;; -;; (define-pipeline gif-image (file . rest) -;; (read (concat "src/" file ".gif") "image/gif"))) -;; )) -;; -;; In the above example, we have two pipelines, `docbook-xhtml' and -;; `gif-image'. They both take as arguments a file name component, -;; which is used to generate the initial resource file. Of course, you -;; can define as many arguments as you want for a pipeline definition -;; function. -;; -;; The `define-pipelines' definition takes such a structure and -;; returns an associative list whose key is a pipeline name and the -;; value is the procedure that defines the pipeline. - (load-module "sisc.modules.Regexp") +(load-module "sisc.modules.J2S") +(load-module "org.apache.cocoon.scheme.sitemap.SchemeSitemap") -(define define-pipelines - (lambda l l)) - -(define-syntax define-pipeline - (syntax-rules () - ((_ name args body ...) - (cons (quote name) - (lambda args (begin body ...)))))) - -;; Sitemap definition +;; A Cocoon XML sitemap description is processed by the Scheme code in +;; this file. The processing happens in several steps: ;; -;; The sitemap specifies how to map URLs to pipelines, or to Scheme -;; functions, to control the page flow in an application. +;; - in the first step, the XML sitemap file is translated into an +;; SXML representation. SXML is the Scheme representation of an XML +;; tree. This translation happens in an external Java class which uses +;; Cocoon's XML parser to do the parsing. +;; +;; - in the second step, the SXML representation of the sitemap is +;; translated into Scheme code, which is evaluated. This evaluation +;; process defines a Scheme function, which is invoked at runtime to +;; process the HTTP request. +;; +;; Here is a sample example. Suppose we have the following XML sitemap: +;; +;; <pipelines> +;; <pipeline> +;; <match pattern="documentation/(.*).html"> +;; <generate src="docs/{1}.xml" type="file"> +;; <param name="test" value="123"/> +;; </generate> +;; <transform src="stylesheets/document2html.xsl"> +;; <param name="test2" value="456"/> +;; </transform> +;; <serialize/> +;; </match> +;; +;; <match pattern="sites/images/(.*).gif"> +;; <read src="{1}" mime-type="image/gif"/> +;; </match> +;; </pipeline> +;; </pipelines> +;; +;; The SXML representation of the above XML fragment looks like this: +;; +;; (pipelines (@ (*line* 1)) +;; (pipeline (@ (*line* 2)) +;; (match (@ (pattern "documentation/(.*).html") (*line* 3)) +;; (generate (@ (src "docs/{1}.xml") (type "file") (*line* 4)) +;; (param (@ (name "test") (value "123") (*line* 5))) +;; (transform (@ (src "stylesheets/document2html.xsl") (@ (*line* 6))) +;; (param (@ (name "test2") (value "456") (@ (*line* 7)))) +;; (serialize (@ (*line* 8))) +;; ) +;; +;; (match (@ (pattern "sites/images/(.*).gif") (*line* 10)) +;; (read (@ (src "{1}") (mime-type "image/gif") (@ (*line* 11))))) +;; ))) +;; +;; The line numbers where an element starts are added by the XML +;; parser to the SXML representation as attributes of the element. +;; +;; The Scheme code translates the above SXML representation in the +;; following code. ;; ;;(define the-sitemap -;; (define-sitemap -;; (match "sql/(.*)" (file . rest) -;; (call-pipeline docbook-xhtml))) -;; -;; (match "slides/(.*)\.gif" (file . rest) -;; (call-pipeline gif-image)) -;; -;; (match "view-source/(*).(*)" (file type . rest) -;; (generate file)) -;; (transform '((type xslt) (name "xsp")))) -;; (serialize (type xml))))) -;; -;; (match "shopping-cart" (dummy . args) -;; (shopping-cart)) -;; )) - -(define-syntax define-sitemap - (syntax-rules () - ((_) #f) - ((_ m ...) - (lambda (url sitemap env) (or (m url sitemap env) ...))))) - -(define-syntax match - (syntax-rules () - ((_ pattern args body ...) - (let ((rx (regexp pattern))) - (lambda (url sitemap env) - (let ((result (regexp-match rx url))) - (if result - (apply (lambda args (begin body ...)) sitemap env (cdr result)) - #f))))) - )) +;; (let ((rx1 (regexp "documentation/(.*).html")) +;; (rx2 (regexp "sites/images/(.*).gif"))) +;; (define (p1 url sitemap env) +;; (let ((result (regexp-match rx1 url))) +;; (if result +;; (apply +;; (lambda (arg1 . rest) +;; (sitemap:process +;; sitemap env '() +;; (sitemap:serialize +;; sitemap env '() +;; (sitemap:transform +;; sitemap env +;; (list (cons 'params (list (cons "test" "123") +;; (cons "test2" "456"))) +;; (cons 'src "stylesheets/document2html.xsl")) +;; (sitemap:generate +;; sitemap env +;; (list (cons 'params (list (cons "test" "123") +;; (cons "test2" "456"))) +;; (cons 'src (string-append "docs/" arg1 ".xml")) +;; (cons 'type "file"))))))) +;; (cdr result)) +;; #f))) +;; +;; (define (p2 url sitemap env) +;; (let ((result (regexp-match rx2 url))) +;; (if result +;; (apply (lambda (arg1 . rest) +;; (sitemap:process +;; sitemap env '() +;; (sitemap:read +;; sitemap env +;; '(("src" . (string-append "" arg1 "")) +;; ("mime-type" . "image/gif"))))) +;; (cdr result)) +;; #f))) +;; +;; (lambda (url sitemap env) +;; (or (p1 url sitemap env) +;; (p2 url sitemap env))))) +;; +;; +;; Notice that all the {1}, {2}, ...{n} get expanded in the body of +;; the generated function. Thus there is no need to do a runtime +;; replacement of the {n} arguments in the URL string. +;; +;; The effect is that `the-sitemap' will be bound to a Scheme function +;; which, when executed, will process the HTTP request as described in +;; the original XML sitemap. + + +;; The main function to process an SXML representation of the sitemap, +;; and generate a function which is the executable version of the +;; sitemap. +;; +;; process-sitemap:: SXML -> (URL Sitemap Env -> #<void>) +;; +;; This returned function should be invoked at runtime to process an +;; HTTP request. +;; +;; The side effect of executing this returned function is the +;; processing of the input HTTP request as defined by the sitemap. +;; +(define (process-sitemap sitemap) + (let ((exit #f) + (arg-regexp (regexp "/({[0-9]+})/")) + (number-arg-regexp (regexp "[{}]")) + (match-pattern-regexp (regexp "/(\\([^)]+\\))/")) + (pattern-regexps-no 0) + (pcount 0)) + + ;; Print out an error message, showing the line in the XML document + ;; where the error occured, if such information is present in the + ;; SXML tree. + (define (xml-error node message) + (let ((line (sxml:attr node '*line*))) + (if line + (begin (display "In line ") (display line) (display ": "))) + (display message) (newline) (exit 'error))) + + ;; Returns the remaining pipeline after the first element has been + ;; removed. + (define (rest-of-nodes nodelist) + ((take-after (lambda (node) #t)) nodelist)) + + ;; Takes a string value and replaces in it all occurrences of + ;; '{n}', where 'n' is a number, with argN. If such an occurrence + ;; is found, the value returned is an expression of this form: + ;; + ;; "...{n}..." -> (string-append "..." argN "...") + ;; + ;; If no such occurrence is found, the value is simply returned. + (define (expand-value node value) + (let* ((exp (regexp-split arg-regexp value)) + (length (vector-length exp))) + (if (eq? length 1) + value + `(string-append + ,@(vector->list + (let loop ((index 1)) + (if (>= index length) + exp + (let* ((arg (vector-ref exp index)) + (n (vector-ref (regexp-split/delimiter + number-arg-regexp arg) 1))) + ;; Check to see if `n' is greater than the + ;; maximum number of paranthesised + ;; expressions in the original pattern. + (if (> (string->number n) pattern-regexps-no) + (xml-error node (format "Reference to inexistent regexp pattern ~a, maximum allowed is ~s" n pattern-regexps-no))) + (if (< (string->number n) 1) + (xml-error node (format "Regexp pattern argument should be greater than 1, got ~a" n))) + (vector-set! exp index + (string->symbol (string-append "arg" n))) + (loop (+ index 2))))))) + ))) + + ;; Collect embedded <param> elements into a list of name/value + ;; pairs and return it. + (define (get-params elements) + (if (eq? elements '()) + '() + (let* ((nodelist ((node-pos 1) elements)) + (node (if (null? nodelist) '() (car nodelist))) + (name (sxml:attr node 'name)) + (value (sxml:attr node 'value))) + (if (null? name) + (xml-error node "Attribute 'name' is required in <param>")) + (if (null? value) + (xml-error node "Attribute 'value' is required in <param>")) + (cons `(cons ,name ,(expand-value node value)) + (get-params (rest-of-nodes elements))) + ))) + + ;; Obtain the list of required and optional arguments, as well as + ;; the parameters, if they are needed. + (define (get-attributes node required optional allows-params) + (let* ((elem-name (sxml:element-name node)) + (args '()) + (params '()) + (required-attrs + (map + (lambda (attr-name) + (let ((attr (sxml:attr node attr-name))) + (if (not attr) + (xml-error node + (format "'~s' attribute required in <~s>" + attr-name elem-name)) + `(cons ',attr-name ,(expand-value node attr))))) + required)) + (optional-attrs '())) + (for-each + (lambda (attr-name) + (let ((attr (sxml:attr node attr-name))) + (if attr + (set! optional-attrs + (cons `(cons ',attr-name ,(expand-value node attr)) + optional-attrs))))) + optional) + + (if (not (null? required-attrs)) + (set! args (append args required-attrs))) + (if (not (null? optional-attrs)) + (set! args (append args optional-attrs))) + (if allows-params + (begin + (set! params (get-params (sxml:content node))) + (if (not (null? params)) + (set! args (cons `(cons 'params (list ,@params)) args))))) + (if (null? args) `('()) `((list ,@args))))) + + ;; The following match- functions should probably be transformed + ;; into a macro, and described at a much higher level than now. A + ;; grammar like approach seems appropriate here. + + ;; Translate a <generate> element. + (define (match-generate pipeline) + (let* ((nodelist ((node-pos 1) pipeline)) + (node (if (null? nodelist) '() (car nodelist)))) + (if (not (eq? (sxml:element-name node) 'generate)) + #f + (begin + (let ((args (get-attributes node '(src) '(type) #t))) + (match-transform + (rest-of-nodes pipeline) + `(sitemap:generate sitemap env ,@args)) + ))))) + + ;; Translate zero or more <transform> elements + (define (match-transform pipeline compfunc) + (let* ((nodelist ((node-pos 1) pipeline)) + (node (if (null? nodelist) '() (car nodelist)))) + (cond + ((eq? (sxml:element-name node) 'transform) + (let ((args (get-attributes node '(src) '(type) #t))) + (match-transform + (rest-of-nodes pipeline) + `(sitemap:transform sitemap env ,@args ,compfunc)) + )) + (else (match-serialize pipeline compfunc)) + ))) + + ;; Transform zero or one <serializer> elements + (define (match-serialize pipeline compfunc) + (let* ((nodelist ((node-pos 1) pipeline)) + (node (if (null? nodelist) '() (car nodelist)))) + (cond + ;; A serializer has been explicitly defined + ((eq? (sxml:element-name node) 'serialize) + (let ((args (get-attributes node '() '(type mime-type) #t))) + (match-pipeline-end + (rest-of-nodes pipeline) + `(sitemap:serialize sitemap env ,@args ,compfunc)))) + + ;; End of the pipeline with no serializer specified + ((eq? node '()) + `(sitemap:serialize sitemap env ,compfunc)) + + ;; Anything else is an error + (else + (xml-error node "Only <transformer> or <serialize> allowed here")) + ))) + + ;; Translate a <read> element + (define (match-reader pipeline) + (let* ((nodelist ((node-pos 1) pipeline)) + (node (if (null? nodelist) '() (car nodelist)))) + (cond + ((eq? (sxml:element-name node) 'read) + (let ((args (get-attributes node '(src) '(type mime-type) #t))) + (match-pipeline-end + (rest-of-nodes pipeline) + `(sitemap:read sitemap env ,@args)) + )) + (else #f) + ))) + + ;; Make sure nothing follows the pipeline definition + (define (match-pipeline-end pipeline compfunc) + (let* ((nodelist ((node-pos 1) pipeline)) + (node (if (null? nodelist) '() (car nodelist)))) + (if (null? node) + compfunc + (xml-error node "No element allowed in this context")))) + + ;; The entry point in matching a pipeline. Transforms a pipeline + ;; definition into a Scheme function whose body executes the + ;; described pipeline. + (define (match-pipeline pipeline) + (let ((procname (string->symbol (format "p~a" pcount))) + (rxname (string->symbol (format "rx~a" pcount)))) + `(define (,procname url sitemap env) + (let ((result (regexp-match ,rxname url))) + (if result + (apply + (lambda ,(let loop ((index 1)) + (if (> index pattern-regexps-no) + 'rest + (cons + (string->symbol + (format "arg~a" (number->string index))) + (loop (+ index 1))))) + (sitemap:process + sitemap env '() + ,(or + (match-generate pipeline) + (match-reader pipeline) + (let* ((nodelist ((node-pos 1) pipeline)) + (node (if (null? nodelist) '() (car nodelist)))) + (xml-error node "Invalid pipeline definition"))))) + (cdr result)) + #f))) + )) + + ;; This is the main processing function for a 'match' node in the + ;; SXML representation of the sitemap. This function returns an + ;; entry like this: + ;; + ;; (regexp . matcher-function-representation) + ;; + ;; The `apply-templates' function which invokes `process-match' + ;; will collect all these pair and return them in a list. + (define (process-match node) + ;; Check for the presence of the 'pattern' attribute and signal + ;; an error if not present + (let ((pattern (sxml:attr node 'pattern))) + (if (not pattern) + (xml-error + node "required 'pattern' attribute for <match> is not present")) + ;; Increment the pipelines count + (set! pcount (+ pcount 1)) + ;; Translate the pipeline definitions into equivalent Scheme + ;; functions + (let ((pipeline (reverse (sxml:child-elements node))) + (exp-pattern (regexp-split match-pattern-regexp pattern)) + (rxname (string->symbol (format "rx~a" pcount)))) + (set! pattern-regexps-no (/ (- (vector-length exp-pattern) 1) 2)) + (list (cons `(,rxname (regexp ,pattern)) + (match-pipeline pipeline))) + ))) + + ;; Process the SXML representation of the sitemap. This is done by + ;; invoking the apply-templates function on the SXML representation + ;; of the sitemap. + ;; + ;; We first setup the exit function, which will be called in case we + ;; encounter semantic errors. + (call/cc + (lambda (k) + (set! exit k) + (set! pcount 0) + ;; `matchers' will contain a list of (regexp + ;; . matcher-function). We iterate on it to construct the top + ;; level function that represents the sitemap. + (let* ((matchers + (apply-templates + sitemap + `((match . ,(lambda (node) (process-match node)))))) + (sitemap-code + `(let ,(let loop ((ms matchers)) + (if (null? ms) + '() + (cons (caar ms) + (loop (cdr ms))))) + ,@(let loop ((ms matchers)) + (if (null? ms) + '() + (cons (cdar ms) + (loop (cdr ms))))) + (lambda (url sitemap env) + (or ,@(let loop ((index 1)) + (if (> index pcount) + '() + (cons + (list (string->symbol (format "p~a" index)) + 'url 'sitemap 'env) + (loop (+ index 1)))))))))) +; (newline) (write sitemap-code) (newline) + (eval sitemap-code (interaction-environment)) + ))) + ))
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]