Hi
 
I have used the following tutorial to bring html-code from websites to my java 
program:
 
http://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk/module-client/src/examples/org/apache/http/examples/client/ClientExecuteDirect.java
 
The websites which I connect to all start with www.xxxxxxxxx.se *  but they 
have different endings.
 
So I start by making one HttpHost and defining a HttpEntity and HttpRespons
 
final HttpHost target = new HttpHost("www.xxxxxxx.se", 80, "http");
HttpEntity entity = null;
HttpResponse rsp = null;

 
After that I use a loop to connect to each subsite. This is what I write in the 
loop:
 
HttpRequest req = createRequest(urlEnding);
rsp = client.execute(target, req);
entity = rsp.getEntity();
String[] line = EntityUtils.toString(entity).split("\n"); //this info is used 
by the program
if (entity != null){
entity.consumeContent(); 
}
 
It basically works fine, but the problem is that I get a memory leak.. after 
having brought the code from a few hundred websites I get a 
java.lang.OutOfMemoryError: Java heap space. Is there anything I can do in the 
loop except entity.consumeConent() to prevent this from happening?
 
I attach my java code as a text file. It is just a slight modification of the 
code in the tutorial.
 
* this is not the real domain.. if anyone wants to know the real domain, please 
send me a private e-mail


      
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.client.HttpClient;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.scheme.SocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.message.BasicHttpRequest;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;

public class ClientExecuteDirect {


    public static void doWork(HashSet<String> urlEndings) throws Exception {
            
            final HttpHost target = new HttpHost("www.xxxxxxxxxxx.se", -1, 
"http");
            setup();            //some general setup    
            HttpClient client = createHttpClient();     
            HttpEntity entity = null;
            HttpResponse rsp = null;
                    
            try {
                for (String urlEnding : urlEndings){
                        HttpRequest req = createRequest(urlEnding);             
                        rsp = client.execute(target, req);                      

                        entity = rsp.getEntity();
                        String[] line = 
EntityUtils.toString(entity).split("\n");
                        if (entity != null){
                                entity.consumeContent(); 
                        }
                                
                        for (int j=0; j<line.length; j++){
                                //process the html
                        }
                }   
            } finally {
                // If we could be sure that the stream of the entity has been
                // closed, we wouldn't need this code to release the connection.
                // However, EntityUtils.toString(...) can throw an exception.
        
                // if there is no entity, the connection is already released
                if (entity != null)
                    entity.consumeContent(); // release connection gracefully
            }
        } // main
        
    /**
     * The default parameters.
     * Instantiated in [EMAIL PROTECTED] #setup setup}.
     */
    private static HttpParams defaultParameters = null;

    /**
     * The scheme registry.
     * Instantiated in [EMAIL PROTECTED] #setup setup}.
     */
    private static SchemeRegistry supportedSchemes;
    
    private final static HttpClient createHttpClient() {
        ClientConnectionManager ccm =
            new ThreadSafeClientConnManager(getParams(), supportedSchemes);
        //  new SingleClientConnManager(getParams(), supportedSchemes);

        DefaultHttpClient dhc =
            new DefaultHttpClient(ccm, getParams());

        return dhc;
    }

    /**
     * Performs general setup.
     * This should be called only once.
     */
    private final static void setup() {

        supportedSchemes = new SchemeRegistry();

        // Register the "http" protocol scheme, it is required
        // by the default operator to look up socket factories.
        SocketFactory sf = PlainSocketFactory.getSocketFactory();
        supportedSchemes.register(new Scheme("http", sf, 80));

        // prepare parameters
        HttpParams params = new BasicHttpParams();
        HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
        HttpProtocolParams.setContentCharset(params, "UTF-8");
        HttpProtocolParams.setUseExpectContinue(params, true);
        defaultParameters = params;

    } // setup

    private final static HttpParams getParams() {
        return defaultParameters;
    }

    /**
     * Creates a request to execute in this example.
     *
     * @return  a request without an entity
     */
    private final static HttpRequest createRequest(String urlEnding) {

        HttpRequest req = new BasicHttpRequest
            ("GET", urlEnding + "/", HttpVersion.HTTP_1_1);
          //("OPTIONS", "*", HttpVersion.HTTP_1_1);
        return req;
    }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to