制作了一个基于node.js的抓取小案例
code:
var request = require('request'); var cheerio = require('cheerio'); var fs = require('fs'); var page = 1; var out = []; var str = ''; if( process.argv.length != 4){ return console.log('请输入正确的参数'); } var keywords = process.argv[2]; var city = process.argv[3]; var links = 'http://www.lagou.com/jobs/list_'+ keywords +'?kd='+ keywords +'&spc=1&labelWords=label%2Clabel&lc=&city=' + city + '&pn='; jober(); function jober(){ var address = links + page; request.get(address, function(err, res, str) { if (err || res.statusCode != 200) { return console.log('发生错误'); } var $ = cheerio.load(str); if( $('.noresult').length ){ if( page == 1){ return console.log('找不到相关数据'); } str = JSON.stringify( out ); console.log('抓取结束,共' + (page -1) + '页,' + out.length + '条数据' ); fs.writeFile('data/' + keywords + '_' + city + '.json',str,function(err){ if(err){ return console.log('保存文件出错'); } console.log('保存数据成功'); }); return; } console.log('-------------------'); console.log('正在抓取第' + page + '页'); $('.hot_pos > li').each(function(i, el) { var obj = {}; obj.title = $(this).find('a').eq(0).attr('title'); obj.link = $(this).find('a').eq(0).attr('href'); obj.city = $(this).find('a').next('span').text(); obj.salary = $(this).find('.hot_pos_l > span').eq(0).text(); obj.company = $(this).find('.hot_pos_r').find('.apply').next('div').find('a').attr('title'); out.push(obj); }); page ++; jober(); }) }
效果: