taylor      2004/06/02 08:18:18

  Modified:    src/java/org/apache/jetspeed/portal/portlets
                        WebClippingPortlet.java
  Log:
  WebPageClipping portlet cannot handle encoding correctly - patch applied
  
  http://nagoya.apache.org/jira/browse/JS1-481
  
  patch from Shinsuke Sugaya
  
  CVS: ----------------------------------------------------------------------
  CVS: PR:
  CVS:   If this change addresses a PR in the problem report tracking
  CVS:   database, then enter the PR number(s) here.
  CVS: Obtained from:
  CVS:   If this change has been taken from another system, such as NCSA,
  CVS:   then name the system in this line, otherwise delete it.
  CVS: Submitted by:
  CVS:   If this code has been contributed to Apache by someone else; i.e.,
  CVS:   they sent us a patch or a new module, then include their name/email
  CVS:   address here. If this is your work then delete this line.
  CVS: Reviewed by:
  CVS:   If we are doing pre-commit code reviews and someone else has
  CVS:   reviewed your changes, include their name(s) here.
  CVS:   If you have not had it reviewed then delete this line.
  
  Revision  Changes    Path
  1.3       +391 -375  
jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets/WebClippingPortlet.java
  
  Index: WebClippingPortlet.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/portal/portlets/WebClippingPortlet.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- WebClippingPortlet.java   23 Feb 2004 04:03:34 -0000      1.2
  +++ WebClippingPortlet.java   2 Jun 2004 15:18:18 -0000       1.3
  @@ -25,6 +25,7 @@
   import java.util.Enumeration;
   import java.util.Hashtable;
   import java.util.Iterator;
  +import java.util.StringTokenizer;
   
   import org.apache.ecs.ConcreteElement;
   import org.apache.jetspeed.portal.PortletConfig;
  @@ -48,379 +49,394 @@
   public class WebClippingPortlet extends AbstractInstancePortlet
   {
   
  -     /**
  -      * Static initialization of the logger for this class
  -      */
  -     private static final JetspeedLogger logger =
  -             
JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
  -
  -     // Define parameter name for the first tag to clip
  -     public static final String START = "startTag";
  -     // Define parameter name for the last tag to clip
  -     public static final String STOP = "stopTag";
  -     // Define parameter name for a single tag to clip
  -     public static final String TAG = "Tag";
  -     // Define parameter name for the number of the tag to clip
  -     public static final String TAGNUM = "startTagNumber";
  -     // Define parameter name for the URL of the page
  -     public static final String URL = "url";
  -     // Error message for startTag without stopTag
  -     private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
  -     // Error message for wrong startTagNumber parameter
  -     private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
  -
  -     protected boolean initDone = false;
  -     protected boolean contentStale = true;
  -     protected boolean cacheContent = false;
  -     protected String username = null;
  -     protected String password = null;
  -
  -     private Hashtable patterns = null;
  -
  -     /**
  -      * Initialize this portlet
  -      * @throws PortletException Initialization failed
  -      */
  -     public void init()
  -     {
  -             if (initDone)
  -                     return;
  -
  -             patterns = new Hashtable();
  -
  -             try
  -             {
  -                     loadParams();
  -             }
  -             catch (Exception e)
  -             {
  -                     logger.info("Exception occurred:" + e.toString());
  -                     e.printStackTrace();
  -             }
  -
  -             contentStale = true;
  -             initDone = true;
  -     }
  -
  -     /**
  -      * took this from FileServerPortlet as it was private 
  -      *
  -     */
  -
  -     // FIXME: Currently only the expiration the HTTP Response header is honored. 
  -     //        Expiration information in <meta> tags are not honored 
  -
  -     protected Reader getReader(String url) throws IOException
  -     {
  -             URL pageUrl = new URL(url);
  -
  -             URLConnection pageConn = pageUrl.openConnection();
  -             try
  -             {
  -                     // set HTTP Basic Authetication header if username and 
password are set
  -                     if (username != null && password != null)
  -                     {
  -                             pageConn.setRequestProperty(
  -                                     "Authorization",
  -                                     "Basic "
  -                                             + Base64.encodeAsString(username + ":" 
+ password));
  -                     }
  -
  -             }
  -             catch (Exception e)
  -             {
  -                     logger.info("Exception occurred:" + e.toString());
  -                     e.printStackTrace();
  -             }
  -
  -             long pageExpiration = pageConn.getExpiration();
  -             String encoding = pageConn.getContentEncoding();
  -             String tempString = null;
  -             String noCache = "no-cache";
  -
  -             if (encoding == null)
  -             {
  -                     // Standard HTTP encoding
  -                     encoding = "iso-8859-1";
  -             }
  -
  -             /*
  -              * Determing if content should be cached.
  -              */
  -             cacheContent = true; // Assume content is cached
  -             if (pageExpiration == 0)
  -             {
  -                     cacheContent = false;
  -             }
  -             // Check header field CacheControl
  -             tempString = pageConn.getHeaderField("Cache-Control");
  -             if (tempString != null)
  -             {
  -                     if (tempString.toLowerCase().indexOf(noCache) >= 0)
  -                     {
  -                             cacheContent = false;
  -                     }
  -             }
  -             // Check header field Pragma
  -             tempString = pageConn.getHeaderField("Pragma");
  -             if (tempString != null)
  -             {
  -                     if (tempString.toLowerCase().indexOf(noCache) >= 0)
  -                     {
  -                             cacheContent = false;
  -                     }
  -             }
  -
  -             // Assign a reader
  -             Reader rdr = new InputStreamReader(pageConn.getInputStream(), 
encoding);
  -
  -             // Only set the page expiration it the page has not expired
  -             if (pageExpiration > System.currentTimeMillis()
  -                     && (cacheContent == true))
  -             {
  -                     contentStale = false;
  -                     logger.debug(
  -                             "WebPagePortlet caching URL: "
  -                                     + url
  -                                     + " Expiration: "
  -                                     + pageExpiration
  -                                     + ", "
  -                                     + (pageExpiration - System.currentTimeMillis())
  -                                     + " milliseconds into the future");
  -                     setExpirationMillis(pageExpiration);
  -             }
  -             else
  -             {
  -                     contentStale = true;
  -             }
  -
  -             return rdr;
  -     }
  -
  -     /**
  -     This methods outputs the content of the portlet for a given 
  -     request.
  -     
  -     @param data the RunData object for the request
  -     @return the content to be displayed to the user-agent
  -     */
  -     public ConcreteElement getContent(RunData data)
  -     {
  -             PortletConfig config = this.getPortletConfig();
  -
  -             if (contentStale == true)
  -                     return getWebClippedContent(data, config);
  -
  -             if (null == getExpirationMillis())
  -                     return getContent(data, null, true);
  -
  -             if (getExpirationMillis().longValue() <= System.currentTimeMillis())
  -                     return getWebClippedContent(data, config);
  -
  -             return getContent(data, null, true);
  -     }
  -
  -     /*
  -      * This method returns the clipped part of the Web page
  -      */
  -     private ConcreteElement getWebClippedContent(
  -             RunData data,
  -             PortletConfig config)
  -     {
  -             String clippedString = ""; // HTML to visualize
  -             JetspeedClearElement element = null;
  -             int patternNumber = 1;
  -             int tagNumber = 0;
  -             Reader htmlReader;
  -             String defaultUrl = selectUrl(data, config);
  -
  -             try
  -             {
  -                     // Re-load parameters to see immediately the effect of changes
  -                     loadParams();
  -                     Enumeration en = patterns.keys();
  -
  -                     while (en.hasMoreElements())
  -                     {
  -                             String name = (String) en.nextElement();
  -
  -                             // Search for parameters in the right order
  -                             if (name.equals(START + String.valueOf(patternNumber))
  -                                     || name.equals(TAG + 
String.valueOf(patternNumber)))
  -                             {
  -                                     String start =
  -                                             (String) patterns.get(
  -                                                     START + 
String.valueOf(patternNumber));
  -                                     String simpleTag =
  -                                             (String) patterns.get(
  -                                                     TAG + 
String.valueOf(patternNumber));
  -                                     String stop =
  -                                             (String) patterns.get(
  -                                                     STOP + 
String.valueOf(patternNumber));
  -                                     String tagNum =
  -                                             (String) patterns.get(
  -                                                     TAGNUM + 
String.valueOf(patternNumber));
  -                                     // A group of params can have a specific url
  -                                     String url =
  -                                             (String) patterns.get(
  -                                                     URL + 
String.valueOf(patternNumber));
  -                                     url = controlUrl(url, defaultUrl);
  -                                     htmlReader = getReader(url);
  -
  -                                     if ((start != null) && (stop == null))
  -                                     {
  -                                             element = new 
JetspeedClearElement(BAD_PARAM);
  -                                             return element;
  -                                     }
  -
  -                                     if (tagNum != null)
  -                                     {
  -                                             try
  -                                             {
  -                                                     tagNumber = 
Integer.parseInt(tagNum);
  -                                             }
  -                                             catch (NumberFormatException e)
  -                                             {
  -                                                     logger.info("Exception 
occurred:" + e.toString());
  -                                                     e.printStackTrace();
  -                                                     element = new 
JetspeedClearElement(BAD_NUMBER);
  -                                                     return element;
  -                                             }
  -                                     }
  -
  -                                     if ((simpleTag != null) && (tagNum == null))
  -                                             clippedString =
  -                                                     clippedString
  -                                                             + 
Transformer.findElement(
  -                                                                     htmlReader,
  -                                                                     url,
  -                                                                     simpleTag);
  -                                     else if ((simpleTag != null) && (tagNum != 
null))
  -                                             clippedString =
  -                                                     clippedString
  -                                                             + 
Transformer.findElementNumber(
  -                                                                     htmlReader,
  -                                                                     url,
  -                                                                     simpleTag,
  -                                                                     tagNumber);
  -                                     else if (tagNum == null)
  -                                             clippedString =
  -                                                     clippedString
  -                                                             + 
Transformer.clipElements(
  -                                                                     htmlReader,
  -                                                                     url,
  -                                                                     start,
  -                                                                     stop);
  -                                     else if (tagNum != null)
  -                                             clippedString =
  -                                                     clippedString
  -                                                             + 
Transformer.clipElementsNumber(
  -                                                                     htmlReader,
  -                                                                     url,
  -                                                                     start,
  -                                                                     stop,
  -                                                                     tagNumber);
  -
  -                                     patternNumber = patternNumber + 1;
  -                                     //Restart Enumeration, because params could 
not be in the right order
  -                                     en = patterns.keys();
  -                                     htmlReader.close();
  -                             }
  -                     }
  -
  -                     element = new JetspeedClearElement(clippedString);
  -
  -                     //FIXME: We should do a clearContent() for the media type, not 
ALL media types
  -                     this.clearContent();
  -                     // doing this because setContent() is not overwriting current 
content.
  -                     this.setContent(element);
  -
  -             }
  -             catch (Exception e)
  -             {
  -                     logger.info("Exception occurred:" + e.toString());
  -                     e.printStackTrace();
  -             }
  -
  -             return element;
  -     }
  -
  -     /**
  -      * Usually called by caching system when portlet is marked as expired, but
  -      * has not be idle longer then TimeToLive.
  -      *
  -      * Any cached content that is expired need to be refreshed.
  -      */
  -     public void refresh()
  -     {
  -             if (cacheContent == true)
  -             {
  -                     getWebClippedContent(null, this.getPortletConfig());
  -             }
  -     }
  -
  -     /**
  -      * Select the URL to use for this portlet.
  -      * @return The URL to use for this portlet
  -      */
  -     protected String selectUrl(RunData data, PortletConfig config)
  -     {
  -             String url = config.getURL();
  -             return url;
  -     }
  -
  -     /*
  -      * Choose between a specific url and the default url
  -      */
  -     private String controlUrl(String url, String defaultUrl)
  -     {
  -             if (url == null)
  -             {
  -                     return defaultUrl;
  -             }
  -
  -             //if the given URL doesn not include a protocol... ie http:// or ftp://
  -             //then resolve it relative to the current URL context
  -             if (url.indexOf("://") < 0)
  -             {
  -                     url = TurbineServlet.getResource(url).toString();
  -             }
  -
  -             return url;
  -     }
  -
  -     /*
  -      * Load portlet parameters
  -      */
  -     private void loadParams() throws PortletException
  -     {
  -             Iterator en = this.getPortletConfig().getInitParameterNames();
  -
  -             try
  -             {
  -                     while (en.hasNext())
  -                     {
  -                             String name = (String) en.next();
  -
  -                             if (name.equals("username"))
  -                                     username =
  -                                             
this.getPortletConfig().getInitParameter("username");
  -                             else if (name.equals("password"))
  -                                     password =
  -                                             
this.getPortletConfig().getInitParameter("password");
  -                             else
  -                                     patterns.put(
  -                                             name,
  -                                             
this.getPortletConfig().getInitParameter(name));
  -
  -                     }
  -             }
  -             catch (Exception e)
  -             {
  -                     logger.info("Exception occurred:" + e.toString());
  -                     e.printStackTrace();
  -                     throw new PortletException(e.toString());
  -             }
  -     }
  +    /**
  +     * Static initialization of the logger for this class
  +     */
  +    private static final JetspeedLogger logger =
  +        JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
  +
  +    // Define parameter name for the first tag to clip
  +    public static final String START = "startTag";
  +    // Define parameter name for the last tag to clip
  +    public static final String STOP = "stopTag";
  +    // Define parameter name for a single tag to clip
  +    public static final String TAG = "Tag";
  +    // Define parameter name for the number of the tag to clip
  +    public static final String TAGNUM = "startTagNumber";
  +    // Define parameter name for the URL of the page
  +    public static final String URL = "url";
  +    // Error message for startTag without stopTag
  +    private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
  +    // Error message for wrong startTagNumber parameter
  +    private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
  +
  +    protected boolean initDone = false;
  +    protected boolean contentStale = true;
  +    protected boolean cacheContent = false;
  +    protected String username = null;
  +    protected String password = null;
  +
  +    private Hashtable patterns = null;
  +
  +    /**
  +     * Initialize this portlet
  +     * @throws PortletException Initialization failed
  +     */
  +    public void init()
  +    {
  +        if (initDone)
  +            return;
  +
  +        patterns = new Hashtable();
  +
  +        try
  +        {
  +            loadParams();
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +        }
  +
  +        contentStale = true;
  +        initDone = true;
  +    }
  +
  +    /**
  +     * took this from FileServerPortlet as it was private 
  +     *
  +    */
  +
  +    // FIXME: Currently only the expiration the HTTP Response header is honored. 
  +    //        Expiration information in <meta> tags are not honored 
  +
  +    protected Reader getReader(String url) throws IOException
  +    {
  +        URL pageUrl = new URL(url);
  +
  +        URLConnection pageConn = pageUrl.openConnection();
  +        try
  +        {
  +            // set HTTP Basic Authetication header if username and password are set
  +            if (username != null && password != null)
  +            {
  +                pageConn.setRequestProperty(
  +                    "Authorization",
  +                    "Basic "
  +                        + Base64.encodeAsString(username + ":" + password));
  +            }
  +
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +        }
  +
  +        long pageExpiration = pageConn.getExpiration();
  +        String encoding = "iso-8859-1";
  +        String contentType = pageConn.getContentType();
  +        String tempString = null;
  +        String noCache = "no-cache";
  +
  +        if (contentType != null)
  +        {
  +            StringTokenizer st = new StringTokenizer(contentType, "; =");
  +            while (st.hasMoreTokens())
  +            {
  +                if (st.nextToken().equalsIgnoreCase("charset"))
  +                {
  +                    try
  +                    {
  +                        encoding = st.nextToken();
  +                        break;
  +                    }
  +                    catch (Exception e)
  +                    {
  +                        break;
  +                    }
  +                }
  +            }
  +        }
  +
  +        /*
  +         * Determing if content should be cached.
  +         */
  +        cacheContent = true; // Assume content is cached
  +        if (pageExpiration == 0)
  +        {
  +            cacheContent = false;
  +        }
  +        // Check header field CacheControl
  +        tempString = pageConn.getHeaderField("Cache-Control");
  +        if (tempString != null)
  +        {
  +            if (tempString.toLowerCase().indexOf(noCache) >= 0)
  +            {
  +                cacheContent = false;
  +            }
  +        }
  +        // Check header field Pragma
  +        tempString = pageConn.getHeaderField("Pragma");
  +        if (tempString != null)
  +        {
  +            if (tempString.toLowerCase().indexOf(noCache) >= 0)
  +            {
  +                cacheContent = false;
  +            }
  +        }
  +
  +        // Assign a reader
  +        Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
  +
  +        // Only set the page expiration it the page has not expired
  +        if (pageExpiration > System.currentTimeMillis()
  +            && (cacheContent == true))
  +        {
  +            contentStale = false;
  +            logger.debug(
  +                "WebPagePortlet caching URL: "
  +                    + url
  +                    + " Expiration: "
  +                    + pageExpiration
  +                    + ", "
  +                    + (pageExpiration - System.currentTimeMillis())
  +                    + " milliseconds into the future");
  +            setExpirationMillis(pageExpiration);
  +        }
  +        else
  +        {
  +            contentStale = true;
  +        }
  +
  +        return rdr;
  +    }
  +
  +    /**
  +    This methods outputs the content of the portlet for a given 
  +    request.
  +    
  +    @param data the RunData object for the request
  +    @return the content to be displayed to the user-agent
  +    */
  +    public ConcreteElement getContent(RunData data)
  +    {
  +        PortletConfig config = this.getPortletConfig();
  +
  +        if (contentStale == true)
  +            return getWebClippedContent(data, config);
  +
  +        if (null == getExpirationMillis())
  +            return getContent(data, null, true);
  +
  +        if (getExpirationMillis().longValue() <= System.currentTimeMillis())
  +            return getWebClippedContent(data, config);
  +
  +        return getContent(data, null, true);
  +    }
  +
  +    /*
  +     * This method returns the clipped part of the Web page
  +     */
  +    private ConcreteElement getWebClippedContent(
  +        RunData data,
  +        PortletConfig config)
  +    {
  +        String clippedString = ""; // HTML to visualize
  +        JetspeedClearElement element = null;
  +        int patternNumber = 1;
  +        int tagNumber = 0;
  +        Reader htmlReader;
  +        String defaultUrl = selectUrl(data, config);
  +
  +        try
  +        {
  +            // Re-load parameters to see immediately the effect of changes
  +            loadParams();
  +            Enumeration en = patterns.keys();
  +
  +            while (en.hasMoreElements())
  +            {
  +                String name = (String) en.nextElement();
  +
  +                // Search for parameters in the right order
  +                if (name.equals(START + String.valueOf(patternNumber))
  +                    || name.equals(TAG + String.valueOf(patternNumber)))
  +                {
  +                    String start =
  +                        (String) patterns.get(
  +                            START + String.valueOf(patternNumber));
  +                    String simpleTag =
  +                        (String) patterns.get(
  +                            TAG + String.valueOf(patternNumber));
  +                    String stop =
  +                        (String) patterns.get(
  +                            STOP + String.valueOf(patternNumber));
  +                    String tagNum =
  +                        (String) patterns.get(
  +                            TAGNUM + String.valueOf(patternNumber));
  +                    // A group of params can have a specific url
  +                    String url =
  +                        (String) patterns.get(
  +                            URL + String.valueOf(patternNumber));
  +                    url = controlUrl(url, defaultUrl);
  +                    htmlReader = getReader(url);
  +
  +                    if ((start != null) && (stop == null))
  +                    {
  +                        element = new JetspeedClearElement(BAD_PARAM);
  +                        return element;
  +                    }
  +
  +                    if (tagNum != null)
  +                    {
  +                        try
  +                        {
  +                            tagNumber = Integer.parseInt(tagNum);
  +                        }
  +                        catch (NumberFormatException e)
  +                        {
  +                            logger.info("Exception occurred:" + e.toString());
  +                            e.printStackTrace();
  +                            element = new JetspeedClearElement(BAD_NUMBER);
  +                            return element;
  +                        }
  +                    }
  +
  +                    if ((simpleTag != null) && (tagNum == null))
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.findElement(
  +                                    htmlReader,
  +                                    url,
  +                                    simpleTag);
  +                    else if ((simpleTag != null) && (tagNum != null))
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.findElementNumber(
  +                                    htmlReader,
  +                                    url,
  +                                    simpleTag,
  +                                    tagNumber);
  +                    else if (tagNum == null)
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.clipElements(
  +                                    htmlReader,
  +                                    url,
  +                                    start,
  +                                    stop);
  +                    else if (tagNum != null)
  +                        clippedString =
  +                            clippedString
  +                                + Transformer.clipElementsNumber(
  +                                    htmlReader,
  +                                    url,
  +                                    start,
  +                                    stop,
  +                                    tagNumber);
  +
  +                    patternNumber = patternNumber + 1;
  +                    //Restart Enumeration, because params could not be in the right 
order
  +                    en = patterns.keys();
  +                    htmlReader.close();
  +                }
  +            }
  +
  +            element = new JetspeedClearElement(clippedString);
  +
  +            //FIXME: We should do a clearContent() for the media type, not ALL 
media types
  +            this.clearContent();
  +            // doing this because setContent() is not overwriting current content.
  +            this.setContent(element);
  +
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +        }
  +
  +        return element;
  +    }
  +
  +    /**
  +     * Usually called by caching system when portlet is marked as expired, but
  +     * has not be idle longer then TimeToLive.
  +     *
  +     * Any cached content that is expired need to be refreshed.
  +     */
  +    public void refresh()
  +    {
  +        if (cacheContent == true)
  +        {
  +            getWebClippedContent(null, this.getPortletConfig());
  +        }
  +    }
  +
  +    /**
  +     * Select the URL to use for this portlet.
  +     * @return The URL to use for this portlet
  +     */
  +    protected String selectUrl(RunData data, PortletConfig config)
  +    {
  +        String url = config.getURL();
  +        return url;
  +    }
  +
  +    /*
  +     * Choose between a specific url and the default url
  +     */
  +    private String controlUrl(String url, String defaultUrl)
  +    {
  +        if (url == null)
  +        {
  +            return defaultUrl;
  +        }
  +
  +        //if the given URL doesn not include a protocol... ie http:// or ftp://
  +        //then resolve it relative to the current URL context
  +        if (url.indexOf("://") < 0)
  +        {
  +            url = TurbineServlet.getResource(url).toString();
  +        }
  +
  +        return url;
  +    }
  +
  +    /*
  +     * Load portlet parameters
  +     */
  +    private void loadParams() throws PortletException
  +    {
  +        Iterator en = this.getPortletConfig().getInitParameterNames();
  +
  +        try
  +        {
  +            while (en.hasNext())
  +            {
  +                String name = (String) en.next();
  +
  +                if (name.equals("username"))
  +                    username =
  +                        this.getPortletConfig().getInitParameter("username");
  +                else if (name.equals("password"))
  +                    password =
  +                        this.getPortletConfig().getInitParameter("password");
  +                else
  +                    patterns.put(
  +                        name,
  +                        this.getPortletConfig().getInitParameter(name));
  +
  +            }
  +        }
  +        catch (Exception e)
  +        {
  +            logger.info("Exception occurred:" + e.toString());
  +            e.printStackTrace();
  +            throw new PortletException(e.toString());
  +        }
  +    }
   
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to