taylor 02/03/27 09:43:58
Added: src/java/org/apache/jetspeed/util/rewriter
HTMLParserAdaptor.java HTMLRewriter.java
Rewriter.java SampleRewriter.java
SwingParserAdaptor.java
Log:
Start of a new rewriter framework. Still to be integrated with WebPagePortlet
configuration parameters.
Once thats done I will deprecate the util/HTMLRewriterl.java class.
The new Rewriter does:
1. pluggable back end parsers, so that we are not tied to the Swing parser.
If someone wants to plug in an XHTML parser or XML
2. you can get events and extend the basic rewriter to decouple any business logic
from the core functionality
The events are:
- generateNewUrl -- called when you need to generate a new url
- enterSimpleTagEvent
- exitSimpleTagEvent
- enterStartTagEvent
- exitStartTagEvent
- enterEndTagEvent
- exitEndTagEvent
- convertTagEvent
Revision Changes Path
1.1
jakarta-jetspeed/src/java/org/apache/jetspeed/util/rewriter/HTMLParserAdaptor.java
Index: HTMLParserAdaptor.java
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Jetspeed" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache" or
* "Apache Jetspeed", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jetspeed.util.rewriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.io.Reader;
/*
* Interface for HTML Parser Adaptors.
*
* @author <a href="mailto:[EMAIL PROTECTED]">David Sean Taylor</a>
* @version $Id: HTMLParserAdaptor.java,v 1.1 2002/03/27 17:43:58 taylor Exp $
*/
public interface HTMLParserAdaptor
{
/*
* Parses and rewrites a HTML document, rewriting all URLs as either fully
proxied
* URLs or as web-application full URLs, not relative.
* Given a relative URL, such a "/content/images/my.gif" it can be rewritten as
either
* a proxied URL, for example:
*
* "http://www.webserver.com?js_path=/content/images/my.gif"
*
*
* or a full path to the URL on the web server:
*
* "http://www.webserver.com/content/images/my.gif"
*
*
* @param html The html content to be converted.
* @param proxyRoot The root URL of the Proxy Server.
* @param baseURL The Base URL of the host being proxied.
* @throws MalformedURLException If the baseUrl is not a valid URL or if an URL
inside
* the document could not be converted.
* @return An HTML-String with rewritten URLs.
*/
String run(Reader html,
String proxyRoot,
String baseUrl)
throws MalformedURLException;
}
1.1
jakarta-jetspeed/src/java/org/apache/jetspeed/util/rewriter/HTMLRewriter.java
Index: HTMLRewriter.java
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Jetspeed" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache" or
* "Apache Jetspeed", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jetspeed.util.rewriter;
// java.io
import java.io.IOException;
import java.io.CharArrayWriter;
import java.io.Reader;
// java.net
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
// this makes it dependent on Swing...need an abstraction WTP
import javax.swing.text.html.HTML;
import javax.swing.text.MutableAttributeSet;
/**
*
* Basic Rewriter for rewriting HTML content.
*
* @author <a href="mailto:[EMAIL PROTECTED]">David Sean Taylor</a>
* @version $Id: HTMLRewriter.java,v 1.1 2002/03/27 17:43:58 taylor Exp $
*
*/
public class HTMLRewriter implements Rewriter
{
/*
* Construct a basic HTML Rewriter
*
*/
public HTMLRewriter()
{
}
/*
* Entry point into rewriting HTML content.
*
* Reads stream from proxied host, runs configured HTML parser against that
stream,
* rewriting relevant links, and writes the parsed stream back to the client.
*
* @param input the HTML input stream.
* @param proxyRoot the base URL of the proxy server.
* @param baseURL the base URL of the target host.
* @return the rewritten HTML output stream.
*
* @exception MalformedURLException a servlet exception.
*/
public String rewrite(Reader input,
String proxyRoot,
String baseURL)
throws MalformedURLException
{
String rewrittenHTML = "";
this.proxyRoot = proxyRoot;
this.baseURL = baseURL;
HTMLParserAdaptor parser = new SwingParserAdaptor(this);
rewrittenHTML = parser.run(input, proxyRoot, baseURL);
return rewrittenHTML;
}
private String proxyRoot;
private String baseURL;
public final static String DEFAULT_PROXY_PARAM = "js_path";
/*
* This callback is called by the HTMLParserAdaptor implementation to write
* back all rewritten URLs to point to the proxy server.
* Given the targetURL, rewrites the link as a link back to the proxy server.
*
* @param targetURL the URL to be rewritten back to the proxy server.
* @param baseURL the base URL of the target host.
* @param proxyURL the base URL of the proxy server.
* @return the rewritten URL to the proxy server.
*
* @exception MalformedURLException a servlet exception.
*/
public String generateNewUrl( String targetURL, HTML.Tag tag, HTML.Attribute
attribute)
// String targetURL,
// String baseURL,
// String proxyURL,
// boolean proxied)
{
try {
URL full = new URL(new URL(proxyRoot), targetURL);
String fullPath = full.toString();
// some attributes shouldn't be proxied, like images, stylesheetrefs
StringBuffer buffer = new StringBuffer(proxyRoot.toString());
buffer.append("?");
buffer.append(DEFAULT_PROXY_PARAM);
buffer.append("=");
buffer.append(URLEncoder.encode(fullPath));
String proxiedPath = buffer.toString().replace('&', '@');
return proxiedPath;
}
catch (Throwable t)
{
//FIXME: transient print to debug...
System.err.println( "HTMLRewriter: BASE=" + proxyRoot);
System.err.println( "target=" + targetURL);
return URLEncoder.encode(targetURL);
}
}
/*
* Returns true if all rewritten URLs should be sent back to the proxy server.
*
* @return true if all URLs are rewritten back to proxy server.
*/
public boolean proxyAllTags()
{
return true; //false;
}
public static byte[] rewriteScript(String script,
String url,
String proxyHost,
String base)
throws IOException
{
int baseLength = base.length();
int totalScriptLength = script.length();
CharArrayWriter writer = new CharArrayWriter(totalScriptLength + 100);
char chars[] = script.toCharArray();
boolean translating = false;
// now rewrite the script stream
for (int ix=0; ix < chars.length; ix++)
{
if (chars[ix] == '"')
{
//int endpos= ix + len + 1;
if (translating)
translating = false;
else if (false == translating ) //&&
// endpos < totalScriptLength)
{
translating = true;
writer.write(chars[ix]);
if (!findImage(chars, ix + 1))
continue;
String trans = translate(proxyHost, base);
writer.write(trans);
if (chars[ix+1] != PATH_SEPARATOR && base.charAt(baseLength
- 1) != PATH_SEPARATOR)
writer.write(PATH_SEPARATOR);
if (chars[ix+1] == PATH_SEPARATOR && base.charAt(baseLength
- 1) == PATH_SEPARATOR)
ix++;
continue;
}
}
if (translating && chars[ix] == '&')
writer.write('@');
else
writer.write(chars[ix]);
}
return writer.toString().getBytes();
}
public static String translate(String proxyURL, String targetURL)
{
StringBuffer buffer = new StringBuffer(proxyURL);
buffer.append("?");
buffer.append(DEFAULT_PROXY_PARAM);
buffer.append("=");
buffer.append(targetURL.replace('&', '@'));
String proxiedPath = buffer.toString();
return proxiedPath;
}
/*
* Finds an image hyperlink in a quoted string.
* The image hyperlink is found by searching through the script text, searching
* for references ending in typical image extensions (GIF, PNG, JPG).
*
* NOTE: this function is just the start of script-parsing.
* A much more robust implementation will be necessary.
*
* @param chars The character array to search.
* @param ix The starting index to search from in the character array.
* @return If the image string is found, returns true otherwise false.
*
*/
protected static boolean findImage(char[] chars, int ix)
{
for (int iy=ix; iy < chars.length ; iy++)
{
if (chars[iy] == '"')
return false;
if (chars[iy] == '.')
{
int iw = 0;
for (int iz = iy+1; iz < chars.length && iw < 3; iz++, iw++ )
{
if (chars[iz] == GIF[iw] || chars[iz] == PNG[iw] || chars[iz] ==
JPG[iw])
{
continue;
}
else
return false;
}
if (iw == 3)
return true;
return false;
}
}
return false;
}
private static final char[] GIF = {'g', 'i', 'f'};
private static final char[] PNG = {'p', 'n', 'g'};
private static final char[] JPG = {'j', 'p', 'g'};
protected static final char PATH_SEPARATOR = '/';
/*
* Simple Tag Events
*/
public void enterSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
}
public String exitSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
return null;
}
/*
* Start Tag Events
*/
public void enterStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
}
public String exitStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
return null;
}
/*
* Exit Tag Events
*/
public void enterEndTagEvent(HTML.Tag tag)
{
}
public String exitEndTagEvent(HTML.Tag tag)
{
return null;
}
/*
* Convert Tag Events
*/
public void convertTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
}
}
1.1
jakarta-jetspeed/src/java/org/apache/jetspeed/util/rewriter/Rewriter.java
Index: Rewriter.java
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Jetspeed" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache" or
* "Apache Jetspeed", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jetspeed.util.rewriter;
// java.io
import java.io.IOException;
import java.io.Reader;
// java.net
import java.net.MalformedURLException;
import java.net.URL;
// this makes it dependent on Swing...need an abstraction WTP
import javax.swing.text.html.HTML;
import javax.swing.text.MutableAttributeSet;
/**
*
* Interface for URL rewriting.
*
* @author <a href="mailto:[EMAIL PROTECTED]">David Sean Taylor</a>
* @version $Id: Rewriter.java,v 1.1 2002/03/27 17:43:58 taylor Exp $
*/
public interface Rewriter
{
/*
* Entry point into rewriting HTML content.
*
* Reads stream from proxied host, runs configured HTML parser against that
stream,
* rewriting relevant links, and writes the parsed stream back to the client.
*
* @param input the HTML input stream.
* @param proxyRoot the base URL of the proxy server.
* @param baseURL the base URL of the target host.
* @return the rewritten HTML output stream.
*
* @exception MalformedURLException a servlet exception.
*/
String rewrite(Reader input, String proxyRoot, String baseURL)
throws MalformedURLException;
/* <p>
* This callback is called by the HTMLParserAdaptor implementation to write
* back all rewritten URLs to point to the proxy server. Each implementation
* writes specifically for their own type of resources.</p>
* <p>
* Given the targetURL, rewrites the link as a link back to the proxy server.
* </p>
* @param targetURL the URL to be rewritten back to the proxy server.
* @param baseURL the base URL of the target host.
* @param proxyURL the base URL of the proxy server.
* @return the rewritten URL to the proxy server.
*
* @exception MalformedURLException a servlet exception.
*/
String generateNewUrl(String targetURL, HTML.Tag tag, HTML.Attribute attribute);
/*
* Returns true if all rewritten URLs should be sent back to the proxy server.
*
* @return true if all URLs are rewritten back to proxy server.
*/
boolean proxyAllTags();
// parser event handling
void enterSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs);
String exitSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs);
void enterStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs);
String exitStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs);
void enterEndTagEvent(HTML.Tag tag);
String exitEndTagEvent(HTML.Tag tag);
void convertTagEvent(HTML.Tag tag, MutableAttributeSet attrs);
}
1.1
jakarta-jetspeed/src/java/org/apache/jetspeed/util/rewriter/SampleRewriter.java
Index: SampleRewriter.java
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Jetspeed" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache" or
* "Apache Jetspeed", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jetspeed.util.rewriter;
// java.io
import java.io.IOException;
import java.io.CharArrayWriter;
import java.io.Reader;
// java.net
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
// this makes it dependent on Swing...need an abstraction WTP
import javax.swing.text.html.HTML;
import javax.swing.text.MutableAttributeSet;
/**
*
* Sample of extending HTML Rewriter for your specific needs
*
*
* @author <a href="mailto:[EMAIL PROTECTED]">David Sean Taylor</a>
* @version $Id: SampleRewriter.java,v 1.1 2002/03/27 17:43:58 taylor Exp $
*/
public class SampleRewriter extends HTMLRewriter
{
private boolean debug = false;
private String basePortalURL;
private String fullPortalURL;
private String sampleURL;
private String sessionID = "NONE";
private String formID = "NONE";
private boolean sampleEndFlag = false;
public String getSessionID()
{
return sessionID;
}
public String getFormID()
{
return formID;
}
public boolean getSampleEndFlag()
{
return sampleEndFlag;
}
/*
* Basic constructor for creating a Sample Rewriter.
*
* @param basePortalURL Base Portal URL
* @param fullPortalURL Full Portal URL with path parameters and query strings
(sessionid)
* @param sampleURL The sample URL.
*
*/
public SampleRewriter(String basePortalURL, String fullPortalURL, String
sampleURL )
{
this.basePortalURL = basePortalURL;
this.fullPortalURL = fullPortalURL;
this.sampleURL = sampleURL;
}
/*
* Entry point into rewriting HTML content.
*
* Reads stream from proxied host, runs configured HTML parser against that
stream,
* rewriting relevant links, and writes the parsed stream back to the client.
*
* @param input the HTML input stream.
* @param proxyRoot the base URL of the proxy server.
* @param baseURL the base URL of the target host.
* @return the rewritten HTML output stream.
*
* @exception MalformedURLException a servlet exception.
*/
public String rewrite(Reader input)
throws MalformedURLException
{
String rewrittenHTML = "";
HTMLParserAdaptor parser = new SwingParserAdaptor(this);
rewrittenHTML = parser.run(input, fullPortalURL, sampleURL);
return rewrittenHTML;
}
/*
* <p>
* This callback is called by the HTMLParserAdaptor implementation to write
* back all rewritten URLs to point to the proxy server. The MMS implementation
* writes specifically for network element ids and relative paths to MMS
* resources.
* </p>
* <p>
* Given the targetURL, rewrites the link as a link back to the proxy server.
* </p>
*
* Example format:
*
* http://proxyserver/proxy?neid=id?nepath=path
*
* @param targetURL the URL to be rewritten back to the proxy server.
* @param baseURL the base URL of the target host.
* @param proxyURL the base URL of the proxy server.
* @return the rewritten URL to the proxy server.
*
* @exception MalformedURLException a servlet exception.
*/
public String generateNewUrl(String targetURL, HTML.Tag tag, HTML.Attribute
attribute)
{
if (debug)
System.out.println("[rewriter] Tag: " + tag.toString() + " Attribute: "
+ attribute.toString() + " targetURL: " + targetURL + " target = " + fullPortalURL +
"]");
// The only URL we want to re-write is ACTION attribute of the <FORM> tag.
// Ignore all others
if (tag == HTML.Tag.FORM && attribute == HTML.Attribute.ACTION) {
// Strip the session Id value out of the ACTION attribute value
int sessionLocation = targetURL.indexOf( "?sessionId" );
if (sessionLocation > -1) {
int equalsLocation = targetURL.indexOf( "=", sessionLocation );
if (equalsLocation > -1) {
int ampLocation = targetURL.indexOf( "&", equalsLocation );
if (ampLocation > -1) {
sessionID = targetURL.substring( equalsLocation + 1,
ampLocation );
} else {
sessionID = targetURL.substring( equalsLocation + 1 );
}
}
}
if (sampleEndFlag) {
// The sample session is being terminated, make the form action
return to the portal home page
return basePortalURL;
} else {
// Make the form action run the same portal page
return fullPortalURL;
}
}
// This is a tag that we do not wish to re-write, return it's own value
unmodified
return targetURL;
}
/*
* Returns true if all rewritten URLs should be sent back to the proxy server.
*
* @return true if all URLs are rewritten back to proxy server.
*/
public boolean proxyAllTags()
{
return true;
}
/*
* Start Tag Events
*/
public String exitStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
if (tag == HTML.Tag.FORM)
{
String inputTag = "<input type='hidden' name='sessionId' value='" +
sessionID + "'/>";
return inputTag;
}
return null;
}
/*
* Simple Tag Events
*/
public void enterSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
if (tag == HTML.Tag.META)
{
Object o = attrs.getAttribute(HTML.Attribute.NAME);
if (o != null)
{
String s = o.toString();
if (s.equalsIgnoreCase("SampleEnd"))
{
sampleEndFlag = true;
}
}
}
}
/*
* Convert Tag Events
*/
public void convertTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
{
if (tag == HTML.Tag.FORM) {
// All forms from sample will have the same form NAME.
// Jetspeed will add its own FORM depending on the type of portlet
// being used. So if you have multiple forms, any Javascript will
// have to know which form to reference.
attrs.addAttribute("NAME","SampleForm");
}
// INPUT Tag
if (tag == HTML.Tag.INPUT)
{
Object o = attrs.getAttribute(HTML.Attribute.NAME);
if (o != null)
{
String s = o.toString();
if (s.equalsIgnoreCase("FormID"))
{
o = attrs.getAttribute(HTML.Attribute.VALUE);
if (o != null)
{
formID = o.toString();
}
}
}
}
}
}
1.1
jakarta-jetspeed/src/java/org/apache/jetspeed/util/rewriter/SwingParserAdaptor.java
Index: SwingParserAdaptor.java
===================================================================
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000-2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Jetspeed" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache" or
* "Apache Jetspeed", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jetspeed.util.rewriter;
// javax.swing.text
import javax.swing.text.*;
import javax.swing.text.html.parser.*;
import javax.swing.text.html.*;
import javax.swing.text.html.HTMLEditorKit;
// java.io
import java.io.*;
// java.util
import java.util.*;
// java.net
import java.net.*;
/*
* HTML Parser Adaptor for the Swing 'HotJava' parser.
*
* @author <a href="mailto:[EMAIL PROTECTED]">David Sean Taylor</a>
* @version $Id: SwingParserAdaptor.java,v 1.1 2002/03/27 17:43:58 taylor Exp $
*/
public class SwingParserAdaptor implements HTMLParserAdaptor
{
private SwingParserAdaptor.Callback cb = new SwingParserAdaptor.Callback();
private String lineSeparator;
private boolean skippingImplied = false;
private Rewriter rewriter;
/*
* Construct a swing (hot java) parser adaptor
* Receives a Rewriter parameter, which is used as a callback when rewriting
URLs.
* The rewriter object executes the implementation specific URL rewriting.
*
* @param rewriter The rewriter object that is called back during URL rewriting
*/
public SwingParserAdaptor(Rewriter rewriter)
{
this.rewriter = rewriter;
lineSeparator = System.getProperty("line.separator", "\r\n");
}
/*
* Parses and rewrites a HTML document, rewriting all URLs as either fully
proxied
* URLs or as web-application full URLs, not relative.
* Given a relative URL, such a "/content/images/my.gif" it can be rewritten as
either
* a proxied URL, for example:
*
* "http://proxyserver/proxy?pxpath=/content/images/my.gif"
*
* or a full path to the URL on the web server:
*
* "http://www.webserver.com/content/images/my.gif"
*
* In all cases in MMS, the proxy is rewritten as in the first example, proxied.
*
* @param html The html content to be converted.
* @param proxyRoot The root URL of the Proxy Server.
* @param baseURL The Base URL of the host being proxied.
* @throws MalformedURLException If the baseUrl is not a valid URL or if an URL
inside
* the document could not be converted.
* @return An HTML-String with rewritten URLs.
*/
public String run(Reader reader,
String proxyRoot,
String baseUrl)
throws MalformedURLException
{
HTMLEditorKit.Parser parser = new
SwingParserAdaptor.ParserGetter().getParser();
String res ="";
try
{
// cb.baseURL = new URL(baseUrl);
// cb.proxyURL = new URL(proxyRoot);
cb.baseURL = baseUrl;
cb.proxyURL = proxyRoot;
parser.parse(reader, cb, true);
res = cb.getResult();
} catch (Exception e)
{
e.printStackTrace();
throw new MalformedURLException(e.toString());
}
return res;
}
/*
* This Class is needed, because getParser is protected and therefore
* only accessibly by a subclass
*/
class ParserGetter extends HTMLEditorKit
{
public HTMLEditorKit.Parser getParser(){
return super.getParser();
}
}
/*
* Swing Parser Callback from the HTMLEditorKit.
* This class handles all SAX-like events during parsing.
*
*/
class Callback extends HTMLEditorKit.ParserCallback
{
// the base-url of which the given html comes from.
// private URL baseURL;
// private URL proxyURL;
private String baseURL;
private String proxyURL;
// either handling of <FORM> is buggy, or I made some weird mistake ...
// ... JDK 1.3 sends double "</form>"-tags on closing <form>
private boolean inForm = false;
private boolean inScript = false;
private boolean emit = true;
private boolean simpleTag = false;
private StringWriter result = new StringWriter();
private Callback ()
{
}
//
// -------------- Hot Java event callbacks... --------------------
//
/*
* Hot Java event callback for text (all data in between tags)
*
* @param values The array of characters containing the text.
*/
public void handleText(char[] values,int param)
{
if (false == emit)
return;
if (values[0] == '>')
return;
addToResult(values);
}
/*
* Hot Java event callback for handling a simple tag (without begin/end)
*
* @param tag The HTML tag being handled.
* @param attrs The mutable HTML attribute set for the current HTML element.
* @param position the position of the tag.
*
*/
public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int
param)
{
simpleTag = true;
rewriter.enterSimpleTagEvent(tag, attrs);
if (false == emit)
return;
appendTagToResult(tag,attrs);
if (tag.toString().equalsIgnoreCase("param") ||
tag.toString().equalsIgnoreCase("object") ||
tag.toString().equalsIgnoreCase("embed"))
{
result.write(lineSeparator);
}
simpleTag = false;
String appended = rewriter.exitSimpleTagEvent(tag, attrs);
if (null != appended)
result.write(appended);
}
/*
* Hot Java event callback for handling a start tag.
*
* @param tag The HTML tag being handled.
* @param attrs The mutable HTML attribute set for the current HTML element.
* @param position the position of the tag.
*
*/
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int
position)
{
rewriter.enterStartTagEvent(tag, attrs);
if (false == emit)
return;
if (tag == HTML.Tag.HTML)
return;
if (tag == HTML.Tag.HEAD)
{
emit = false;
return;
}
if ((tag == HTML.Tag.BODY))
return;
appendTagToResult(tag,attrs);
formatLine(tag);
String appended = rewriter.exitStartTagEvent(tag, attrs);
if (null != appended)
result.write(appended);
}
/*
* Hot Java event callback for handling an end tag.
*
* @param tag The HTML tag being handled.
* @param position the position of the tag.
*
*/
public void handleEndTag(HTML.Tag tag, int position)
{
rewriter.enterEndTagEvent(tag);
if (tag == HTML.Tag.HTML) // always strip out HTML tag for fragments
return;
if (tag == HTML.Tag.HEAD)
{
emit = true;
return;
}
if (false == emit)
return;
if (tag == HTML.Tag.BODY)
return;
addToResult("</").addToResult(tag).addToResult(">");
formatLine(tag);
String appended = rewriter.exitEndTagEvent(tag);
if (null != appended)
result.write(appended);
}
/*
* Hot Java event callback for handling errors.
*
* @param str The error message from Swing.
* @param param A parameter passed to handler.
*
*/
public void handleError(java.lang.String str,int param)
{
// ignored
}
/*
* Hot Java event callback for HTML comments.
*
* @param values The character array of text comments.
* @param param A parameter passed to handler.
*
*/
public void handleComment(char[] values,int param)
{
// STRIP COMMENTS: addToResult(values);
// this is questionable, we may need to turn this on for scripts inside
comments
}
/*
* Hot Java event callback for end of line strings.
*
* @param str The end-of-line string.
*
*/
public void handleEndOfLineString(java.lang.String str)
{
addToResult(str);
}
/*
* Prints new lines to make the output a little easier to read when
debugging.
*
* @param tag The HTML tag being handled.
*
*/
private void formatLine(HTML.Tag tag)
{
if (tag.isBlock() ||
tag.breaksFlow() ||
tag == HTML.Tag.FRAME ||
tag == HTML.Tag.FRAMESET ||
tag == HTML.Tag.SCRIPT)
{
result.write(lineSeparator);
}
}
/*
* Used to write tag and attribute objects to the output stream.
* Returns a reference to itself so that these calls can be chained.
*
* @param txt Any text to be written out to stream with toString method.
* The object being written should implement its toString method.
* @return A handle to the this, the callback, for chaining results.
*
*/
private Callback addToResult(Object txt)
{
// to allow for implementation using Stringbuffer or StringWriter
// I don't know yet, which one is better in this case
//if (ignoreLevel > 0 ) return this;
try
{
result.write(txt.toString());
} catch (Exception e)
{
System.err.println("Error parsing:" + e);
}
return this;
}
/*
* Used to write all character content to the output stream.
* Returns a reference to itself so that these calls can be chained.
*
* @param txt Any character text to be written out directly to stream.
* @return A handle to the this, the callback, for chaining results.
*
*/
private Callback addToResult(char[] txt)
{
//if (ignoreLevel > 0) return this;
try
{
result.write(txt);
} catch (Exception e)
{ /* ignore */
}
return this;
}
/*
* Accessor to the Callback's content-String
*
* @return Cleaned and rewritten HTML-Content
*/
public String getResult()
{
try
{
result.flush();
} catch (Exception e)
{ /* ignore */
}
// WARNING: doesn't work, if you remove " " + ... but don't know why
String res = " " + result.toString();
return res;
}
/*
* Flushes the output stream. NOT IMPLEMENTED
*
*/
public void flush() throws javax.swing.text.BadLocationException
{
// nothing to do here ...
}
/*
* Writes output to the final stream for all attributes of a given tag.
*
* @param tag The HTML tag being output.
* @param attrs The mutable HTML attribute set for the current HTML tag.
*
*/
private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs)
{
convertURLS(tag,attrs);
Enumeration e = attrs.getAttributeNames();
addToResult("<").addToResult(tag);
while (e.hasMoreElements())
{
Object attr = e.nextElement();
String value = attrs.getAttribute(attr).toString();
addToResult(" ").addToResult(attr).addToResult("=\"").
addToResult(value).addToResult("\"");
}
if (simpleTag)
addToResult("/>");
else
addToResult(">");
}
/*
* Determines which HTML Tag/Element is being inspected, and calls the
* appropriate converter for that context. This method contains all the
* logic for determining how tags are rewritten.
*
* TODO: it would be better to drive this logic off a state table that is not
* tied to the Hot Java parser.
*
* @param tag TAG from the Callback-Interface.
* @param attrs The mutable HTML attribute set for the current HTML element.
*/
private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs )
{
rewriter.convertTagEvent(tag, attrs);
if ((tag == HTML.Tag.A) &&
(attrs.getAttribute(HTML.Attribute.HREF) != null))
{
// ---- CHECKING <A HREF
addProxiedConvertedAttribute( tag, HTML.Attribute.HREF, attrs);
}
else if (((tag == HTML.Tag.IMG ||
tag == HTML.Tag.INPUT
) &&
(attrs.getAttribute(HTML.Attribute.SRC) != null)
))
{
// ---- CHECKING <IMG SRC & <INPUT SRC
addConvertedAttribute( tag,
HTML.Attribute.SRC,
attrs,
rewriter.proxyAllTags());
} else if (((tag == HTML.Tag.OPTION) ) &&
(attrs.getAttribute(HTML.Attribute.VALUE) != null))
{
// ---- CHECKING <OPTION
addProxiedConvertedAttribute( tag, HTML.Attribute.VALUE, attrs );
} else if (((tag == HTML.Tag.LINK) ) &&
(attrs.getAttribute(HTML.Attribute.HREF) != null))
{
// ---- CHECKING <LINK
addConvertedAttribute( tag,
HTML.Attribute.HREF,
attrs,
rewriter.proxyAllTags());
} else if ( tag == HTML.Tag.APPLET )
{
// ---- CHECKING <APPLET CODEBASE=
addConvertedAttribute( tag,
HTML.Attribute.CODEBASE,
attrs,
rewriter.proxyAllTags());
} else if ( tag == HTML.Tag.FRAME )
{
// ---- CHECKING <FRAME SRC=
addProxiedConvertedAttribute( tag, HTML.Attribute.SRC, attrs);
} else if ( tag == HTML.Tag.SCRIPT )
{
// ---- CHECKING <SCRIPT SRC=
if (attrs.getAttribute(HTML.Attribute.SRC) != null)
{
// script is external
String s = attrs.getAttribute(HTML.Attribute.SRC).toString();
if (s.indexOf("%3E") == -1)
{
addConvertedAttribute( tag,
HTML.Attribute.SRC,
attrs,
rewriter.proxyAllTags());
}
} else
{
// script is inline
//parserOff = true;
}
} else if (tag == HTML.Tag.FORM)
{
// ---- CHECKING <FORM ACTION=
inForm = true; // buggy <form> handling in jdk 1.3
if (attrs.getAttribute(HTML.Attribute.ACTION) == null)
{
// always post
attrs.addAttribute(HTML.Attribute.METHOD, "POST");
//self referencing <FORM>
attrs.addAttribute(HTML.Attribute.ACTION,
baseURL);
} else
{
// always post
attrs.addAttribute(HTML.Attribute.METHOD, "POST");
addProxiedConvertedAttribute( tag, HTML.Attribute.ACTION, attrs);
}
} else if (((tag == HTML.Tag.AREA) ) &&
(attrs.getAttribute(HTML.Attribute.HREF) != null))
{
// ---- CHECKING <AREA
addProxiedConvertedAttribute( tag, HTML.Attribute.HREF,
attrs );
} else if (((tag == HTML.Tag.BODY) ) &&
(attrs.getAttribute(HTML.Attribute.BACKGROUND) != null))
{
// ---- CHECKING <BODY
addConvertedAttribute( tag,
HTML.Attribute.BACKGROUND,
attrs,
rewriter.proxyAllTags());
} else if (tag == HTML.Tag.TD)
{
// ---- CHECKING <TD BACKGROUND=
if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null))
{
addConvertedAttribute( tag,
HTML.Attribute.BACKGROUND,
attrs,
rewriter.proxyAllTags());
}
}
/*
if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
ignoreLevel ++;
*/
}
/*
* Converts the given attribute's URL compatible element to a proxied URL.
* Uses the proxy parameter to determine if the URL should be written back
as a
* proxied URL, or as a fullpath to the original host.
*
* @param attr The HTML attribute to be proxied.
* @param attrs The mutable HTML attribute set for the current HTML element.
* @param proxy If set true, the URL is written back as a proxied URL,
otherwise
* it is written back as a fullpath back to the original host.
*
*/
private void addConvertedAttribute( HTML.Tag tag,
HTML.Attribute attr,
MutableAttributeSet attrs,
boolean proxy )
{
if (proxy)
{
addProxiedConvertedAttribute(tag, attr,attrs);
} else
{
if ( attrs.getAttribute( attr ) != null )
{
attrs.addAttribute( attr,
generateNewUrl( tag, attrs, attr, false ) );
}
}
}
/**
*
* Converts the given attribute's URL compatible element to a proxied URL.
* This method will always add the proxy host prefix to the rewritten URL.
*
* @param attr The HTML attribute to be proxied.
* @param attrs The mutable HTML attribute set for the current HTML element.
*
*/
private void addProxiedConvertedAttribute( HTML.Tag tag,
HTML.Attribute attr,
MutableAttributeSet attrs ) {
if ( attrs.getAttribute( attr ) != null )
{
String attrSource = attrs.getAttribute( attr ).toString();
// special case: mailto should not be sent to the proxy server
if (attrSource.startsWith("mailto:"))
{
attrs.addAttribute( attr,
generateNewUrl( tag, attrs, attr, true ) );
} else if (attrSource.startsWith("javascript:"))
{
attrs.addAttribute( attr,
attrSource);
} else
{
attrs.addAttribute( attr,
generateNewUrl( tag, attrs, attr, true ) );
}
}
}
/*
* Calls the rewriter's URL generator callback, which will translate the old
url
* into a new fullpath URL, either relative to the proxy server, or a
fullpath
* to the original web server, depending on the 'proxied' parameter.
*
* @param oldURL The original URL, before it is tranlated.
* @param proxied Boolean indicator denotes if the URL should be written back
* as a proxied URL (true), or as a fully addressable address to the
* original web server.
* @return The translated new URL.
*
*/
private String generateNewUrl(HTML.Tag tag,
MutableAttributeSet attrs,
HTML.Attribute attr,
boolean proxied)
{
String oldURL = attrs.getAttribute( attr ).toString();
return rewriter.generateNewUrl(oldURL, tag, attr);
}
}
}
--
To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>