the code bellow is already not so up to date, and the obvious problem
in it is that the attempt to QI nsIHTMLContentSink from nsParser
fails.
The main question is - where can I QI nsIHTMLContentSink from?
The other alternative - using NS_NewHTMLContentSink() which is
supposed to be the solution. I've tried that one but ran into the
following problem - i've linked against -lgkconhtmldoc_s in order to
have the implementation of HTMLContentSink (and
NS_NewHTMLContentSink()), but my kdevelop failed at the link stage
saying "Exited with stats: 2" and nothing more...
Since my main goal is just extracting urls from a given html page, my
first attempt was to write my own contentsink which will inherit from
nsIHTMLContentSink and override the OpenXXX/CloseXXX routines, and
catch all the anchors, links, frames etc and extract the urls from
their attributes (href for <a>, src for <frame> etc.). Unfortunately,
when i tried running it with the parser, only OpenHTML & OpenBody were
invoked.. that's how i came to just building the DOMHTMLDocument from
the nsIHTMLContentSink and working with it.
[EMAIL PROTECTED] (Gil Fidel) wrote in message news:<[EMAIL PROTECTED]>...
> Hi,
> I'm trying to parse a URL into an nsIDOMHTMLDocument. everything seems
> to be going fine, but when I want to go over the resulting DOM
> documenting, it turns out that it's empty (or contatins empty nodes) -
> when I call routines such as GetTitle() or GetAnchors() they return
> NS_OK as their return value, but the data they return is either a
> blank string or an empty nsIHTMLCollection.
>
> Does anyone has any idea what could be wrong with my code?
>
> Thanks in advance.
>
> the code:
> //----------------------------------------------------------------------------
> void PrintHTMLDOM(nsString * urlName)
> {
> char * tmpStr = ToNewCString(*urlName);
> fprintf(stderr, "Start parsing %s\n", tmpStr);
>
> nsresult rv;
> // Create url
> nsIURI* url;
>
> nsCOMPtr<nsIIOService> service(do_GetService(kIOServiceCID, &rv));
>
> if (NS_FAILED(rv))
> {
> printf("Failed to create nsIIOService. error = [%x]\n", rv);
> return;
> }
>
> nsIURI *uri = nsnull;
> NS_ConvertUCS2toUTF8 uriStr(*urlName);
> rv = service->NewURI(uriStr, nsnull, nsnull, &uri);
> if (NS_FAILED(rv))
>
> {
> printf("Failed to create URI. error = [%x]\n", rv);
> return;
> }
>
> rv = uri->QueryInterface(NS_GET_IID(nsIURI), (void**)&url);
> NS_RELEASE(uri);
> if (NS_OK != rv) {
> printf("invalid URL: '");
> fputs(uriStr.get(), stderr);
> printf("'\n");
> return;
> }
>
> //Create input stream
> nsCOMPtr<nsIInputStream> inputStream;
> rv = NS_OpenURI(getter_AddRefs(inputStream), url);
>
> //Create webshell
> nsCOMPtr<nsIWebShell> webshell(do_GetService(kWebShellCID, &rv));
>
> //Create html document
> nsCOMPtr<nsIDocument> doc(do_CreateInstance(kHTMLDocumentCID,
> &rv));
> if(NS_FAILED(rv))
>
> {
> fprintf(stderr, "Couldn't create an html document [%x]\n",
> rv);
> return;
> }
>
> //Create parser
> nsCOMPtr<nsIParser> parser;
> rv = nsComponentManager::CreateInstance(kCParserCID,
> nsnull,
> kCParserIID,
> (void **)&parser);
> if (NS_FAILED(rv))
> {
> fprintf(stderr, "Unable to create a parser : 0x%x\n", rv);
> return;
> }
>
> //Create htmlcontentsink
> nsCOMPtr<nsIHTMLContentSink> sink;
> parser->QueryInterface(kIHTMLContentSinkIID,
> getter_AddRefs(sink));
>
> if(NS_FAILED(rv))
> {
> fprintf(stderr, "Unable to create an HTMLContentSink :
> 0x%x\n", rv);
> return;
> }
> //rv = NS_NewHTMLContentSink(getter_AddRefs(sink), doc, url,
> webshell, nsnull);
> parser->SetContentSink(sink);
>
> rv = parser->Parse(inputStream, NS_LITERAL_CSTRING("text/html") );
> if(NS_FAILED(rv))
> {
> fprintf(stderr, "Parsing failed : 0x%x\n", rv);
> return;
> }
> fprintf(stderr, "Finished parsing %s\nStart printing HTML DOM:\n",
> tmpStr);
>
> nsCOMPtr<nsIDOMHTMLDocument> domdoc;
> rv = doc->QueryInterface(nsIDOMHTMLDocument::GetIID(),
> getter_AddRefs(domdoc));
>
> fprintf(stderr, "Parse_URL() - after nsIDOMHTMLDocument QI result
> = 0x%x\n", rv);
>
> nsString title;
> nsString URL;
> domdoc->GetTitle(title);
> domdoc->GetDomain(URL);
> fprintf(stderr, "Parse_URL() - Document title = %s\nID = %s\n",
> ToNewUTF8String(title), ToNewUTF8String(URL));
>
> nsIDOMHTMLCollection * anchors
> rv = domdoc->GetAnchors(&anchors);
> PRUint32 len;
> links->GetLength(&len);
> fprintf(stderr, "GetAnchors from DOCUMENHT. anchors count = %d, rv
> = 0x%x\n", len, rv);
>
> nsCOMPtr<nsIDOMHTMLElement> body;
> rv = domdoc->GetBody(getter_AddRefs(body));
> printf("Parse_URL() - after GetBody rv = 0x%x\n", rv);
_______________________________________________
mozilla-layout mailing list
[EMAIL PROTECTED]
http://mail.mozilla.org/listinfo/mozilla-layout