the code bellow is already not so up to date, and the obvious problem
in it is that the attempt to QI nsIHTMLContentSink from nsParser
fails.

The main question is - where can I QI nsIHTMLContentSink from?

The other alternative - using NS_NewHTMLContentSink() which is
supposed to be the solution. I've tried that one but ran into the
following problem - i've linked against -lgkconhtmldoc_s in order to
have the implementation of HTMLContentSink (and
NS_NewHTMLContentSink()), but my kdevelop failed at the link stage
saying "Exited with stats: 2" and nothing more...

Since my main goal is just extracting urls from a given html page, my
first attempt was to write my own contentsink which will inherit from
nsIHTMLContentSink and override the OpenXXX/CloseXXX routines, and
catch all the anchors, links, frames etc and extract the urls from
their attributes (href for <a>, src for <frame> etc.). Unfortunately,
when i tried running it with the parser, only OpenHTML & OpenBody were
invoked.. that's how i came to just building the DOMHTMLDocument from
the nsIHTMLContentSink and working with it.


[EMAIL PROTECTED] (Gil Fidel) wrote in message news:<[EMAIL PROTECTED]>...
> Hi,
> I'm trying to parse a URL into an nsIDOMHTMLDocument. everything seems
> to be going fine, but when I want to go over the resulting DOM
> documenting, it turns out that it's empty (or contatins empty nodes) -
> when I call routines such as GetTitle() or GetAnchors() they return
> NS_OK as their return value, but the data they return is either a
> blank string or an empty nsIHTMLCollection.
> 
> Does anyone has any idea what could be wrong with my code?
> 
> Thanks in advance.
> 
> the code:
> //----------------------------------------------------------------------------
> void PrintHTMLDOM(nsString * urlName)
> {
>     char * tmpStr = ToNewCString(*urlName);
>     fprintf(stderr, "Start parsing %s\n", tmpStr);
> 
>     nsresult rv;
>     // Create url
>     nsIURI* url;
>     
>     nsCOMPtr<nsIIOService> service(do_GetService(kIOServiceCID, &rv));
>     
>     if (NS_FAILED(rv)) 
>     {
>         printf("Failed to create nsIIOService. error = [%x]\n", rv);
>         return;
>     }
>     
>     nsIURI *uri = nsnull;
>     NS_ConvertUCS2toUTF8 uriStr(*urlName);
>     rv = service->NewURI(uriStr, nsnull, nsnull, &uri);
>     if (NS_FAILED(rv))
>     
>     {
>         printf("Failed to create URI. error = [%x]\n", rv);
>         return;
>     }
> 
>     rv = uri->QueryInterface(NS_GET_IID(nsIURI), (void**)&url);
>     NS_RELEASE(uri);
>     if (NS_OK != rv) {
>         printf("invalid URL: '");
>         fputs(uriStr.get(), stderr);
>         printf("'\n");
>         return;
>     }
>     
>     //Create input stream
>     nsCOMPtr<nsIInputStream> inputStream;
>     rv = NS_OpenURI(getter_AddRefs(inputStream), url);
>     
>     //Create webshell
>     nsCOMPtr<nsIWebShell> webshell(do_GetService(kWebShellCID, &rv));
>         
>     //Create html document
>     nsCOMPtr<nsIDocument> doc(do_CreateInstance(kHTMLDocumentCID,
> &rv));
>     if(NS_FAILED(rv))
>     
>     {
>         fprintf(stderr, "Couldn't create an html document [%x]\n",
> rv);
>         return;
>     }
>     
>     //Create parser
>     nsCOMPtr<nsIParser> parser;
>     rv = nsComponentManager::CreateInstance(kCParserCID, 
>                                         nsnull, 
>                                         kCParserIID, 
>                                         (void **)&parser);
>     if (NS_FAILED(rv))
>     {
>         fprintf(stderr, "Unable to create a parser : 0x%x\n", rv);
>         return;
>     }
>     
>     //Create htmlcontentsink
>     nsCOMPtr<nsIHTMLContentSink> sink;
>     parser->QueryInterface(kIHTMLContentSinkIID,
> getter_AddRefs(sink));
>                                         
>     if(NS_FAILED(rv))
>     {
>         fprintf(stderr, "Unable to create an HTMLContentSink :
> 0x%x\n", rv);
>         return;
>     } 
>     //rv = NS_NewHTMLContentSink(getter_AddRefs(sink), doc, url,
> webshell, nsnull);
>     parser->SetContentSink(sink);
>     
>     rv = parser->Parse(inputStream, NS_LITERAL_CSTRING("text/html") );
>     if(NS_FAILED(rv))
>     {
>         fprintf(stderr, "Parsing failed : 0x%x\n", rv);
>         return;
>     }
>     fprintf(stderr, "Finished parsing %s\nStart printing HTML DOM:\n",
> tmpStr);
>     
>     nsCOMPtr<nsIDOMHTMLDocument> domdoc;
>     rv = doc->QueryInterface(nsIDOMHTMLDocument::GetIID(),
>     getter_AddRefs(domdoc));
>     
>     fprintf(stderr, "Parse_URL() - after nsIDOMHTMLDocument QI result
> = 0x%x\n", rv);
>     
>     nsString title;
>     nsString URL;
>     domdoc->GetTitle(title);
>     domdoc->GetDomain(URL);
>     fprintf(stderr, "Parse_URL() - Document title = %s\nID = %s\n",
> ToNewUTF8String(title), ToNewUTF8String(URL));
>     
>     nsIDOMHTMLCollection * anchors
>     rv = domdoc->GetAnchors(&anchors);
>     PRUint32 len;
>     links->GetLength(&len);
>     fprintf(stderr, "GetAnchors from DOCUMENHT. anchors count = %d, rv
> = 0x%x\n", len, rv);
>     
>     nsCOMPtr<nsIDOMHTMLElement> body;
>     rv = domdoc->GetBody(getter_AddRefs(body));
>     printf("Parse_URL() - after GetBody rv = 0x%x\n", rv);
_______________________________________________
mozilla-layout mailing list
[EMAIL PROTECTED]
http://mail.mozilla.org/listinfo/mozilla-layout

Reply via email to