Dear all,
When I download a Web Page, I look for the the string
<META HTTP-EQUIV="refresh" CONTENT="0;URL=>
and identify the link that follows "URL=".
Then, I make another request for the link identified.
My program throws the exception listed below:
------------------------------------------------------------------------
java.lang.NullPointerException
at sun.net.ProgressData.unregister(Compiled Code)
at sun.net.www.http.HttpClient.parseHTTP(Compiled Code)
at
sun.net.www.protocol.http.HttpURLConnection.getInputStream(Compiled Code)
at
sun.net.www.protocol.http.HttpURLConnection.getHeaderField(Compiled Code)
at GetPage.sendRequest(Compiled Code)
at GetPage.main(Compiled Code)
------------------------------------------------------------------------
The class GetPage is listed below.
Could someone help me out ????
Thank you for any help !
Claudine
-----------------------------------------------------------------------
import java.net.*;
import java.io.*;
import java.util.*;
import com.oroinc.text.perl.*;
public class GetPage
{
private String url;
private String response;
public GetPage(String Url)
{
this.url = Url;
this.response = null;
} /* GetPage */
// It was substituted by the method URL.getHost()
private String findHost() {
StringBuffer sb;
StringTokenizer st;
st = new StringTokenizer(this.url, "/");
sb = new StringBuffer();
if (st.countTokens() < 2) return "";
st.nextToken();
sb.append(st.nextToken());
return sb.toString();
} /* findHost */
public String getResponse()
{
return this.response;
} /* getResponse */
public String getURL()
{
return this.url;
} /* getURL */
public void sendRequest() throws MalformedURLException, IOException,
ProtocolException, NullPointerException
{
HttpURLConnection con;
String host;
BufferedReader in;
String line;
String redirectedURL;
URL myURL;
StringBuffer out;
myURL = new URL(this.url);
host = myURL.getHost();
con = (HttpURLConnection) myURL.openConnection();
//Sets that the HTTP redirects should not be automatically
followed by this class.
con.setFollowRedirects(false);
con.setUseCaches(false);
con.setRequestMethod("GET");
con.setDoInput(true);
con.setRequestProperty("Proxy-Connection","Keep-Alive");
con.setRequestProperty("User-agent", "Mozilla/4.7 [en] (X11;
U; Linux 2.2.12 i686)");
con.setRequestProperty("Pragma","no-cache");
con.setRequestProperty("Host",host);
con.setRequestProperty("Accept", "image/gif, image/x-xbitmap,
image/jpeg, image/pjpeg, image/png, */*");
con.setRequestProperty("Accept-Language", "en");
con.setRequestProperty("Accept-Charset",
"iso-8859-1,*,utf-8");
con.connect();
// Follows manually the HTTP redirects, in order to obtain the
remote directory containing the sources used in the html page.
//If the requested resource has been assigned a new URI, the
response header contains the field "location" whose value is the new URI.
redirectedURL = con.getHeaderField("Location");
if (redirectedURL!=null)
{
this.url = redirectedURL;
myURL = new URL(this.url);
host = myURL.getHost();
con = (HttpURLConnection) myURL.openConnection();
}
in = new BufferedReader(new
InputStreamReader(con.getInputStream()));
out = new StringBuffer();
line = null;
while ((line = in.readLine()) != null)
{
out.append(line + "\n");
}
in.close();
con.disconnect();
this.response = out.toString();
} /* sendRequest */
public static void main(String[] args) throws MalformedURLException,
IOException, ProtocolException,NullPointerException
{
ModifyPage modifiedPage;
String modifiedPageString;
String originalPageString;
GetPage page;
Perl5Util perl;
String refreshedURL;
String url;
if (args.length != 1) {
System.out.println("Usage: GetPage <url>");
System.exit(0);
}
url=args[0];
page = new GetPage(url);
page.sendRequest();
originalPageString = page.getResponse();
url = page.getURL();
modifiedPage = new ModifyPage(url,originalPageString);
modifiedPageString = modifiedPage.modifyPage();
perl = new Perl5Util();
while (perl.match("#<META
HTTP-EQUIV=\"?refresh\"?(\\s+)CONTENT=\"?0;(\\s*?)URL=\"?(.*?)\"?>#is",
modifiedPageString))
{
refreshedURL = perl.group(3);
// Test if the url is invalid
if (perl.match("#\\s#",refreshedURL) ||
refreshedURL.equals(url))
{
break;
}
page = new GetPage(refreshedURL);
page.sendRequest();
originalPageString = page.getResponse();
refreshedURL = page.getURL();
modifiedPage = new
ModifyPage(refreshedURL,originalPageString);
modifiedPageString = modifiedPage.modifyPage();
url = refreshedURL;
}
System.out.print(modifiedPageString);
}
} /* GetPage */
***********************************************************************
* CLAUDINE SANTOS BADUE *
* *
* Msc. Student - Computer Science *
* Laboratory for Treating Information (http://www.dcc.ufmg.br/latin/) *
* DCC - UFMG - Brazil *
***********************************************************************
* "For God so loved the world that He gave His one and only Son, *
* that whoever believes in Him shall not perish but have *
* eternal life" (John 3:16) *
* *
* "Jesus answered: I am the way and the truth and the life. *
* No one comes to the Father except through me" (John 14:6) *
***********************************************************************
___________________________________________________________________________
To unsubscribe, send email to [EMAIL PROTECTED] and include in the body
of the message "signoff SERVLET-INTEREST".
Archives: http://archives.java.sun.com/archives/servlet-interest.html
Resources: http://java.sun.com/products/servlet/external-resources.html
LISTSERV Help: http://www.lsoft.com/manuals/user/user.html