I try to crawl into website pages. here my sample code , i used
stackoverflow just for test i dont want crawl stackoverflow.
i this code i want get every link in page and push in an array after that
go to every link and search for Node (it's just test.)
here is my code :
var request = require('request');
var cheerio = require('cheerio');
var siteUrl =
"http://stackoverflow.com/unanswered/tagged/?page=1&tab=votes";
var queue = [];
request(siteUrl, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
// Extract All links in page
$('.question-summary').each(function(){
var url =
$(this).children().last().children().first().children().first().attr('href');
queue.push("http://stackoverflow.com"+url);
});
}
// Search For Node.js on every question.
var i,
item;
for(i = 3; i < queue.length; i++) {
item = queue[i];
request(item, function(error, response, html){
var page = cheerio.load(html);
console.log(page);
});
}
})
after i run this code give this error :
typeerror cannot read property 'parent' of undefined
i think there something wrong with cheerio , but i dont know how fix this.
--
Job board: http://jobs.nodejs.org/
New group rules:
https://gist.github.com/othiym23/9886289#file-moderation-policy-md
Old group rules:
https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
---
You received this message because you are subscribed to the Google Groups
"nodejs" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/nodejs/bfc99f30-7116-471e-9919-a3eae643e9f0%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.