Hi Everybody!
I was using HttpClient V0.3 , which doesn't support https.
The newer version 2.0 supports https but its structure totally changed,
i.e I was using HttpResponse object also NVPairs from v0.3 , but they
are not found in 2.0.
Than I saw patches of HTTPConnection & RedirectionModule for v0.3-3 I
used them to open https calls.
It works for some sites e.g
=====================
https://adwords.google.com/select/
https://console.website.yahoo.com/
https://console.website.yahoo.com/apps/recoverpassword/index.cgi
but not work for follwing due to authenticating certificates(Error is
peer NOT authenticated.)
=====================================================================
https://www.etrade.com <https://www.etrade.com/>
https://www.sun.com/
MY CODE (GREEN LINES ARE interacting with HTTPCLient 0.3-3)
====================================================
// Returns an array containing [ String content of web site, Document
object ]
private Object[] parse(Tidy tidy, String url, String sCharset, String
sMode, boolean bFullPage) {
long lStartTime = System.currentTimeMillis();
vLinks=new Vector();
counter=0;
String sFileName=new
String(""+System.currentTimeMillis()+Thread.currentThread().hashCode());
ByteArrayOutputStream baos=new ByteArrayOutputStream();
String sOriginalUrl = null;
try
{
boolean bPageNotLoaded = true;
HTTPClient.URI uri = null;
HTTPResponse rsp = null;
do {
//////Form the nvpairs from the url
NVPair nvpairs[] = null;
int index = url.indexOf("?");
sOriginalUrl = url;
if ( index >= 0 ) {
Vector v = new Vector();
String params = url.substring(index+1);
url = url.substring(0, index);
StringTokenizer st = new StringTokenizer ( params, "&" );
while ( st.hasMoreTokens() ) {
String token = st.nextToken();
int index2 = token.indexOf("=");
NVPair form_data = new NVPair(token.substring(0, index2),
token.substring(index2+1));
v.add( form_data );
}
if ( v.size() > 0 ) {
int len = v.size();
nvpairs = new NVPair[len];
for ( int i=0; i<len; i++ ) {
nvpairs[i] = (NVPair)v.elementAt(i);
}
}
}
//Use a loop to see if the website is redirecting u
//with a response code of 302 and if it is then go to the new
redirected url
/* Sample header looks like this for www.espn.com
Header:Content-type value=text/html
Header:Location value=http://msn.espn.go.com/
Header:Connection value=close
Header:Date value=Thu, 26 Jun 2003 22:33:21 GMT
Header:Content-length value=0
Header:Server value=Netscape-Enterprise/4.1
*/
// Send new user agent string in header
NVPair[] Headers = new NVPair[1];
NVPair Header = new NVPair("User-Agent", "Mozilla/4.0
(compatible; MSIE 6.0; Windows NT 5.0)");
Headers[0] = Header;
uri = new HTTPClient.URI(url);
// Prevents popup from appearing which prompts user to
accept/reject cookie
CookieModule.setCookiePolicyHandler(null);
We do not need to set security provider class if url is not https
if ( url.startsWith("https") ){
if ( ! bSSLProviderSet ) {
// This indicates JSSE module for SSL support
java.security.Security.addProvider(new
com.sun.net.ssl.internal.ssl.Provider());
bSSLProviderSet=true;
}
}
String a = uri.getPath().equals("")?"/":uri.getPath();
HTTPConnection con = new HTTPConnection(uri.getScheme(),
uri.getHost(), uri.getPort());
con.setRawMode(true);
rsp = con.Get(uri.getPath().equals("")?"/":uri.getPath(),
nvpairs, Headers);
if (rsp.getStatusCode() == 302){
Enumeration enum = rsp.listHeaders();
while ( enum.hasMoreElements() ){
String name = (String)enum.nextElement();
}
url = Common.decodeString( rsp.getHeader("Location").trim()
);
}
else
{
bPageNotLoaded = false;
}
}while( bPageNotLoaded );
if (rsp.getStatusCode() >= 300)
{
System.err.println("Received Error: "+rsp.getReasonLine()+"
code="+rsp.getStatusCode()+" Headers:"+rsp.listHeaders());
return null;
}
else{
if (
rsp.getHeader("Content-Type").toLowerCase().indexOf("image/") >= 0 ){
String sDocument = "<a
href='"+PortalServlet.SERVLET_NAME+"?x="+JABITAT_PARSER+
"&URL="+Common.encodeString(url)+"&IMAGE=1"+"'
title='"+ResourceText.Strings.get("Common_5")+"'><img src='"+url+"'
border='0' /></a>" ;
ByteArrayInputStream bais=new
ByteArrayInputStream(sDocument.getBytes());
Document d=tidy.parseDOM(bais, baos);
Common.ensureTitle(d);
return new Object[]{sDocument, d};
}
else{
String str=rsp.getText();
String sEffectiveURI=rsp.getEffectiveURI().toString();
String sEncoding = getEncoding(tidy, rsp, sCharset,
sEffectiveURI);
String unicodeString = null;
if ( sEncoding != null )
unicodeString = StringTools.toUnicode(str, sEncoding);
else
unicodeString = str;
ByteArrayInputStream bais=new
ByteArrayInputStream(unicodeString.getBytes("UTF-8"));
tidy.setBaseURL(sEffectiveURI);
Document d=tidy.parseDOM(bais, baos);
Common.ensureTitle(d);
if ( sEncoding != null )
changeEncodingOfPage ( d );
stripUnwantedTags (d);
String baseHref = getBaseHref (d, url, sEffectiveURI,
bFullPage);
eliminateUnnecessaryAttributesAndFixHref (d, baseHref,
uri.getScheme()+"://"+uri.getHost(), bFullPage);
putElementIdentifierInEachTag(d);
String
sDOMString=JabitatDomPrinter.printToString(d.getDocumentElement(), new
JabitatDomPrinter.XMLEscaper());
RequestInfo rinfo=RequestHandler.getRequestInfo();
if ( sMode != null && sMode.equals("REFRESH") ) {
return new Object[]{sDOMString, d};
}
saveAtagsToVector(d, sFileName);
saveVectorToFile(sDOMString, sOriginalUrl, sFileName);
Common.cleanupDirectory(getParserFilesDir());
if ( rinfo.getRequestParameter("RB") != null &&
rinfo.getRequestParameter("RB").toString().equals("FP") ) {
String sParams = null;
String sHeader = null;
if ( rinfo.getRequestParameter("FROM") == null ){
sHeader =
(String)ResourceText.Strings.get("InternetIntranet_6");
sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&FID="+sFileName+"
&x="+JABITAT_PARSER;
}
else{
sHeader =
(String)ResourceText.Strings.get("SourceContent_1");
//
sParams="URL="+Common.encodeString(url)+"&STEP=2&RB=FP&CID="+sFileName+"
&ACTION=AP&MODE=ACTIVEX&SHOW=3&x="+JABITAT_PARSER;
sParams="x="+JABITAT_PARSER+"&ACTION=AP&MODE=ACTIVEX&SHOW=3&CID="+sFileN
ame+"&FID="+sFileName+"&URL="+Common.encodeString(url)+"&STEP=2&RB=FP";
}
return new Object[]{Common.print_header(sHeader,
(String)ResourceText.Strings.get("Common_5")+"<br/><a
class='p2'
href='"+PortalServlet.SERVLET_NAME+"?"+sParams+"'>"+url+"</a>")
, d};
}
else
return new Object[]{processDOM(d, sFileName), d};
}
}
}
catch (Exception e)
{
e.printStackTrace();
Logger.error("JabitatParser", e);
System.err.println("Error handling request: " + e.getMessage());
}
return null;
}
BottomLINE
=========
1) is any way to open those url also by using this pacth of v0.3-3
?
2) is there any httpclient version with Httpresponse & NVpair
classes which supports https ? (so compatible with my existing code)
3) or if I have to upgrade to v2.0 than which classes & methods I
have to use to replace NVpairs & HTTPResponse usage?
Thanks
Kashif