Hi
I have used the following tutorial to bring html-code from websites to my java
program:
http://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk/module-client/src/examples/org/apache/http/examples/client/ClientExecuteDirect.java
The websites which I connect to all start with www.xxxxxxxxx.se * but they
have different endings.
So I start by making one HttpHost and defining a HttpEntity and HttpRespons
final HttpHost target = new HttpHost("www.xxxxxxx.se", 80, "http");
HttpEntity entity = null;
HttpResponse rsp = null;
After that I use a loop to connect to each subsite. This is what I write in the
loop:
HttpRequest req = createRequest(urlEnding);
rsp = client.execute(target, req);
entity = rsp.getEntity();
String[] line = EntityUtils.toString(entity).split("\n"); //this info is used
by the program
if (entity != null){
entity.consumeContent();
}
It basically works fine, but the problem is that I get a memory leak.. after
having brought the code from a few hundred websites I get a
java.lang.OutOfMemoryError: Java heap space. Is there anything I can do in the
loop except entity.consumeConent() to prevent this from happening?
I attach my java code as a text file. It is just a slight modification of the
code in the tutorial.
* this is not the real domain.. if anyone wants to know the real domain, please
send me a private e-mail
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.client.HttpClient;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.scheme.SocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.message.BasicHttpRequest;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;
public class ClientExecuteDirect {
public static void doWork(HashSet<String> urlEndings) throws Exception {
final HttpHost target = new HttpHost("www.xxxxxxxxxxx.se", -1,
"http");
setup(); //some general setup
HttpClient client = createHttpClient();
HttpEntity entity = null;
HttpResponse rsp = null;
try {
for (String urlEnding : urlEndings){
HttpRequest req = createRequest(urlEnding);
rsp = client.execute(target, req);
entity = rsp.getEntity();
String[] line =
EntityUtils.toString(entity).split("\n");
if (entity != null){
entity.consumeContent();
}
for (int j=0; j<line.length; j++){
//process the html
}
}
} finally {
// If we could be sure that the stream of the entity has been
// closed, we wouldn't need this code to release the connection.
// However, EntityUtils.toString(...) can throw an exception.
// if there is no entity, the connection is already released
if (entity != null)
entity.consumeContent(); // release connection gracefully
}
} // main
/**
* The default parameters.
* Instantiated in [EMAIL PROTECTED] #setup setup}.
*/
private static HttpParams defaultParameters = null;
/**
* The scheme registry.
* Instantiated in [EMAIL PROTECTED] #setup setup}.
*/
private static SchemeRegistry supportedSchemes;
private final static HttpClient createHttpClient() {
ClientConnectionManager ccm =
new ThreadSafeClientConnManager(getParams(), supportedSchemes);
// new SingleClientConnManager(getParams(), supportedSchemes);
DefaultHttpClient dhc =
new DefaultHttpClient(ccm, getParams());
return dhc;
}
/**
* Performs general setup.
* This should be called only once.
*/
private final static void setup() {
supportedSchemes = new SchemeRegistry();
// Register the "http" protocol scheme, it is required
// by the default operator to look up socket factories.
SocketFactory sf = PlainSocketFactory.getSocketFactory();
supportedSchemes.register(new Scheme("http", sf, 80));
// prepare parameters
HttpParams params = new BasicHttpParams();
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
HttpProtocolParams.setContentCharset(params, "UTF-8");
HttpProtocolParams.setUseExpectContinue(params, true);
defaultParameters = params;
} // setup
private final static HttpParams getParams() {
return defaultParameters;
}
/**
* Creates a request to execute in this example.
*
* @return a request without an entity
*/
private final static HttpRequest createRequest(String urlEnding) {
HttpRequest req = new BasicHttpRequest
("GET", urlEnding + "/", HttpVersion.HTTP_1_1);
//("OPTIONS", "*", HttpVersion.HTTP_1_1);
return req;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]