Dear all,

When I download a Web Page, I look for the the string
<META HTTP-EQUIV="refresh" CONTENT="0;URL=>

and identify the link that follows "URL=".

Then, I make another request for the link identified.

My program throws the exception listed below:
------------------------------------------------------------------------
java.lang.NullPointerException
        at sun.net.ProgressData.unregister(Compiled Code)
        at sun.net.www.http.HttpClient.parseHTTP(Compiled Code)
        at
sun.net.www.protocol.http.HttpURLConnection.getInputStream(Compiled Code)
        at
sun.net.www.protocol.http.HttpURLConnection.getHeaderField(Compiled Code)
        at GetPage.sendRequest(Compiled Code)
        at GetPage.main(Compiled Code)
------------------------------------------------------------------------

The class GetPage is listed below.

Could someone help me out ????

Thank you for any help !

Claudine

-----------------------------------------------------------------------
import java.net.*;
import java.io.*;
import java.util.*;

import com.oroinc.text.perl.*;

public class GetPage
{
    private String url;
    private String response;

    public GetPage(String Url)
    {
        this.url = Url;
        this.response = null;
    } /* GetPage */

    // It was substituted by the method URL.getHost()
    private String findHost() {
        StringBuffer sb;
        StringTokenizer st;

        st = new StringTokenizer(this.url, "/");
        sb = new StringBuffer();

        if (st.countTokens() < 2) return "";

        st.nextToken();
        sb.append(st.nextToken());

        return sb.toString();
    }  /* findHost */

    public String getResponse()
    {
        return this.response;
    }  /* getResponse */

    public String getURL()
    {
        return this.url;
    } /* getURL */

    public void sendRequest() throws MalformedURLException, IOException,
ProtocolException, NullPointerException
    {
        HttpURLConnection con;
        String host;
        BufferedReader in;
        String line;
        String redirectedURL;
        URL myURL;
        StringBuffer out;

            myURL = new URL(this.url);

            host = myURL.getHost();

            con = (HttpURLConnection) myURL.openConnection();
            //Sets that the HTTP redirects should not be automatically
followed by this class.
            con.setFollowRedirects(false);

            con.setUseCaches(false);
            con.setRequestMethod("GET");
            con.setDoInput(true);

            con.setRequestProperty("Proxy-Connection","Keep-Alive");
            con.setRequestProperty("User-agent", "Mozilla/4.7 [en] (X11;
U; Linux 2.2.12 i686)");
            con.setRequestProperty("Pragma","no-cache");
            con.setRequestProperty("Host",host);
            con.setRequestProperty("Accept", "image/gif, image/x-xbitmap,
image/jpeg, image/pjpeg, image/png, */*");
            con.setRequestProperty("Accept-Language", "en");
            con.setRequestProperty("Accept-Charset",
"iso-8859-1,*,utf-8");

            con.connect();

            // Follows manually the HTTP redirects, in order to obtain the
remote directory containing the sources used in the html page.
            //If the requested resource has been assigned a new URI, the
response header contains the field "location" whose value is the new URI.
            redirectedURL = con.getHeaderField("Location");
            if (redirectedURL!=null)
            {
                this.url = redirectedURL;
                myURL = new URL(this.url);
                host = myURL.getHost();
                con = (HttpURLConnection) myURL.openConnection();
            }

            in = new BufferedReader(new
InputStreamReader(con.getInputStream()));
            out = new StringBuffer();
            line = null;

            while ((line = in.readLine()) != null)
            {
                out.append(line + "\n");
            }

            in.close();
            con.disconnect();

            this.response = out.toString();

    } /* sendRequest */

    public static void main(String[] args) throws MalformedURLException,
IOException, ProtocolException,NullPointerException
    {

        ModifyPage modifiedPage;
        String modifiedPageString;
        String originalPageString;
        GetPage page;
        Perl5Util perl;
        String refreshedURL;
        String url;

        if (args.length != 1) {
            System.out.println("Usage: GetPage <url>");
            System.exit(0);
        }

        url=args[0];

        page = new GetPage(url);
        page.sendRequest();
        originalPageString = page.getResponse();
        url = page.getURL();

        modifiedPage = new ModifyPage(url,originalPageString);
        modifiedPageString = modifiedPage.modifyPage();

        perl = new Perl5Util();
        while (perl.match("#<META
HTTP-EQUIV=\"?refresh\"?(\\s+)CONTENT=\"?0;(\\s*?)URL=\"?(.*?)\"?>#is",
modifiedPageString))
        {
            refreshedURL = perl.group(3);

            // Test if the url is invalid
            if (perl.match("#\\s#",refreshedURL) ||
refreshedURL.equals(url))
            {
                break;
            }

            page = new GetPage(refreshedURL);
            page.sendRequest();
            originalPageString = page.getResponse();
            refreshedURL = page.getURL();

            modifiedPage = new
ModifyPage(refreshedURL,originalPageString);
modifiedPageString = modifiedPage.modifyPage();

            url = refreshedURL;
        }

        System.out.print(modifiedPageString);

    }
} /* GetPage */





***********************************************************************
*                       CLAUDINE SANTOS BADUE                         *
*                                                                     *
*                   Msc. Student - Computer Science                   *
* Laboratory for Treating Information (http://www.dcc.ufmg.br/latin/) *
*                         DCC - UFMG - Brazil                         *
***********************************************************************
* "For God so loved the world that He gave His one and only Son,      *
*  that whoever believes in Him shall not perish but have             *
*  eternal life" (John 3:16)                                          *
*                                                                     *
*  "Jesus answered: I am the way and the truth and the life.          *
*  No one comes to the Father except through me" (John 14:6)          *
***********************************************************************

___________________________________________________________________________
To unsubscribe, send email to [EMAIL PROTECTED] and include in the body
of the message "signoff SERVLET-INTEREST".

Archives: http://archives.java.sun.com/archives/servlet-interest.html
Resources: http://java.sun.com/products/servlet/external-resources.html
LISTSERV Help: http://www.lsoft.com/manuals/user/user.html

Reply via email to