2013-10-10 16:20| 发布者: tianzc| 查看: 1211| 评论: 0
表结构 01 -- 02 -- 表的结构 `collectvideo` 03 -- 04 05 CREATE TABLE `collectvideo` ( 06 `id` mediumint(7) unsigned NOT NULL auto_increment, 07 `rid` tinyint(2) NOT NULL default '9' COMMENT '类别 1:电影 2:电视 3:动画 4:综艺 9:其他', 08 `cid` char(10) NOT NULL COMMENT '类型', 09 `nid` char(10) NOT NULL COMMENT '地区', 10 `subject` varchar(100) NOT NULL COMMENT '标题', 11 `pic` char(200) NOT NULL COMMENT '图片', 12 `playactor` varchar(150) NOT NULL COMMENT '演员', 13 `director` varchar(100) NOT NULL COMMENT '导演', 14 `year` char(10) NOT NULL COMMENT '年份', 15 `language` char(10) NOT NULL COMMENT '语言', 16 `content` text NOT NULL COMMENT '内容', 17 `fromurl` char(200) NOT NULL COMMENT '来自', 18 `updatecheck` tinyint(1) NOT NULL default '1' COMMENT '是否需要更新至正式表', 19 `updatetime` int(10) NOT NULL default '0', 20 PRIMARY KEY (`id`), 21 KEY `fromurl` (`fromurl`) 22 ) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ; 23 24 -- 25 -- 导出表中的数据 `collectvideo` 26 -- 27 28 29 -- -------------------------------------------------------- 30 31 -- 32 -- 表的结构 `collectvideo_url` 33 -- 34 35 CREATE TABLE `collectvideo_url` ( 36 `id` mediumint(8) unsigned NOT NULL auto_increment, 37 `vid` mediumint(7) NOT NULL COMMENT 'collectvideo id', 38 `type` tinyint(2) NOT NULL default '1' COMMENT '1:bdhd,2:gvod,3:qvod', 39 `size` int(11) NOT NULL COMMENT '影片大小', 40 `hash` varchar(40) NOT NULL COMMENT '影片HASH', 41 `name` varchar(100) NOT NULL COMMENT '影片名称', 42 PRIMARY KEY (`id`), 43 UNIQUE KEY `hash` (`hash`), 44 KEY `vid` (`vid`) 45 ) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ; 46 47 -- 48 -- 导出表中的数据 `collectvideo_url` 49 -- ##########NextPage########## [代码] caiji.js 001 /** 002 * collect gvod.tv 003 * author Richard 004 * 2013-3-9 005 */ 006 007 var mysql = require('./mysql.js'); 008 var http = require('http'); 009 var $ = require('jquery'); 010 var fs = require('fs'); 011 var iconv = require('iconv-lite'); 012 var querystring = require('querystring'); 013 014 // set param 015 var conn = mysql.conn(); 016 var weburl = 'http://gvodtv.33k.cc'; 017 var rid_type = {'movie':'动作片,喜剧片,爱情片,科幻片,恐怖片,剧情片,战争片,其它片', 'tv':'国产剧,港台剧,欧美剧,日韩剧', 'animation':'动画片', 'variety':'综艺片'}; 018 //1:bdhd, 2:gvod, 3:qvod 019 020 // start 021 //for(var i = 1; i <= 740; i++){ 022 // start(i); 023 //} 024 start(1); 025 function start(page){ 026 getFromURL(page, function(html){ 027 if(html){ 028 029 $(html).find('table tr.row').each(function(){ 030 // get fromurl 031 var fromurl = $(this).find('td:eq(0) a').attr('href'); 032 033 // select fromurl from db 034 selectURL(fromurl, function(fromurl, results){ 035 036 if(!results.length){// db fromurl is no exists 037 // insert fromurl to db 038 insertURL(fromurl, function(fromlist){ 039 // get fromurl information 040 getInformation(fromlist, function(fromlist, html){ 041 if(html){ 042 // get list 043 var list = getList(html); 044 // 封装 数据入库 045 FcInsertInformation(fromlist, list); 046 } 047 }); 048 }); 049 }else if(results[0].updatetime){ 050 // 如果发现URL存在,则页面时间对比库里面的更新时间 051 var fromlist = {}; 052 fromlist['id'] = results[0].id; 053 fromlist['fromurl'] = fromurl; 054 fromlist['updatetime'] = results[0].updatetime; 055 getInformation(fromlist, function(fromlist, html){ 056 if(html){ 057 // get list 058 var list = getList(html); 059 // 现有时间是否 > 库时间 表示有更新 060 if(list.updatetime > fromlist.updatetime){ 061 // 更新 062 FcInsertInformation(fromlist, list); 063 } 064 } 065 }); 066 067 }else{//库时间为空 从新更新 068 var fromlist = {}; 069 fromlist['id'] = results[0].id; 070 fromlist['fromurl'] = fromurl; 071 getInformation(fromlist, function(fromlist, html){ 072 if(html){ 073 // get list 074 var list = getList(html); 075 // 更新 076 FcInsertInformation(fromlist, list); 077 } 078 }); 079 } 080 }); 081 }); 082 } 083 }); 084 }; 085 086 // get fromurl 087 function getFromURL(page, callback){ 088 var html = ''; 089 var req = http.get(weburl + '/?page=' + page, function(res){ 090 res.setEncoding('binary'); 091 res.on('data', function(data){ 092 html += data; 093 }).on('end', function(){ 094 var buf = new Buffer(html, 'binary');//这一步不可省略 095 var str = iconv.decode(buf, 'gbk');//将GBK编码的字符转换成utf8的 096 callback(str); 097 }).on('close', function(){ 098 console.log('Close received!'); 099 }); 100 }); 101 req.on('error', function(error){ 102 fs.appendFile('error.log', new Date().getTime()+' '+error+'\r\n', 'utf-8'); 103 }); 104 105 } 106 107 // get fromurl information 108 function getInformation(fromlist, callback){ 109 if(fromlist.fromurl){ 110 var html = ''; 111 var req = http.get(weburl + fromlist.fromurl, function(res){ 112 res.setEncoding('binary'); 113 res.on('data', function(data){ 114 html += data; 115 }).on('end', function(){ 116 var buf = new Buffer(html, 'binary');//这一步不可省略 117 var str = iconv.decode(buf, 'gbk');//将GBK编码的字符转换成utf8的 118 callback(fromlist, str); 119 }).on('close', function(){ 120 console.log('Close received!'); 121 }); 122 }); 123 req.on('error', function(error){ 124 fs.appendFile('error.log', new Date().getTime()+' '+error+'\r\n','utf-8'); 125 }); 126 } 127 } 128 129 function selectURL(fromurl, callback){ 130 if(fromurl){ 131 conn.query( 132 'SELECT id,fromurl,updatetime FROM collectvideo where fromurl="' + fromurl + '" limit 1', 133 function selectCb(err, results) { 134 if(err){ 135 throw err; 136 } 137 callback(fromurl, results); 138 } 139 ); 140 } 141 } 142 143 function insertURL(fromurl, callback){ 144 if(fromurl){ 145 conn.query( 146 'INSERT INTO collectvideo '+ 147 'SET fromurl = ?', 148 [fromurl], 149 function selectCb(err, results) { 150 if(err){ 151 throw err; 152 } 153 var fromlist = {}; 154 fromlist['id'] = results.insertId; 155 fromlist['fromurl'] = fromurl; 156 callback(fromlist); 157 } 158 ); 159 } 160 } 161 162 // 查询影片地址是片已存在 163 function selectHash(fromlist, list, callback){ 164 if(list[2]){ 165 conn.query( 166 'SELECT id FROM collectvideo_url where hash="' + list[2] + '" limit 1', 167 function selectCb(err, results) { 168 if(err){ 169 throw err; 170 } 171 callback(fromlist, list, results); 172 } 173 ); 174 } 175 } 176 177 // 封装 数据入库 178 function FcInsertInformation(fromlist, list){ 179 insertInformation(fromlist, list, function(list){ 180 console.log(list.cid, list.nid, list.subject); 181 }); 182 insertInformationUrl(fromlist, list); 183 } 184 185 // 数据入库 186 function insertInformation(fromlist, list, callback){ 187 if(fromlist.id && list){ 188 conn.query( 189 'UPDATE collectvideo '+ 190 'SET rid = ?, cid = ?, nid = ?, subject = ? ,pic = ?, playactor = ?, content = ?, updatecheck = ?, updatetime = ? '+ 191 'WHERE id=' + fromlist.id, 192 [list.rid, list.cid, list.nid, list.subject, list.pic, list.playactor, list.content, 0, list.updatetime], 193 function selectCb(err, results) { 194 if(err){ 195 throw err; 196 } 197 callback(list); 198 } 199 ); 200 } 201 } 202 203 // 数据入库 204 function insertInformationUrl(fromlist, list){ 205 if(fromlist.id && list.url){ 206 for(var x in list.url){ 207 if(fromlist.id && list.url[x][0] && list.url[x][1] && list.url[x][2] && list.url[x][3]){ 208 // 查询影片地址是片已存在 209 selectHash(fromlist, list.url[x], function(fromlist, list, results){ 210 if(results.length == 0){ 211 conn.query( 212 'REPLACE INTO collectvideo_url '+ 213 'SET vid = ?, type = ?, size = ?, hash = ?, name = ?', 214 [fromlist.id, list[0], list[1], list[2], list[3]] 215 ); 216 } 217 }); 218 }else{//入库失败写记录 219 fs.appendFile('error_db.log', JSON.stringify(newDate().getTime()+' '+fromlist)+' '+list.url[x]+'\r\n', 'utf-8'); 220 } 221 } 222 } 223 } 224 225 // 解析详细页面 226 function getList(html){ 227 // list 228 var list = {}; 229 230 // playinfo 231 var dom = $(html); 232 list['subject'] = dom.find('#mmc').text();//标题 233 list['playactor'] = dom.find('#myy').text();//主演 234 list['cid'] = dom.find('#mlx').text();//类别 235 list['nid'] = dom.find('#mdq').text().replace('地区', '');//地区 236 list['updatetime'] = new Date(dom.find('#msj').text().replace('-',',')).getTime()/1000;//更新时间 237 list['content'] = dom.find('#mjj').text().replace(/\s+/g, '');//介绍 238 list['pic'] = dom.find('td[valign="middle"] img').attr('src');//图片 239 list['rid'] = getRidType(list['cid']); 240 241 var bdhd = []; 242 var gvod = []; 243 $(html).find('table tr td[align="left"] a').each(function(){ 244 var str = $(this).text(); 245 if(str.indexOf('bdhd://') > -1){ 246 bdhd.push(str.replace('bdhd://', '').split('|')); 247 } 248 if(str.indexOf('gvod://') > -1){ 249 gvod.push(str.replace('gvod://', '').split('/')); 250 } 251 }); 252 list['url'] = getListurl(bdhd, gvod); 253 return list; 254 } 255 256 // 构造合适的链接地址数组入库 257 function getListurl(bdhd, gvod){ 258 //1:bdhd, 2:gvod, 3:qvod 259 var url = []; 260 if(bdhd){ 261 for(var x in bdhd){ 262 var b = []; 263 b.push(1); 264 b.push(bdhd[x][0]); 265 b.push(bdhd[x][1]); 266 b.push(bdhd[x][2]); 267 url.push(b); 268 } 269 } 270 if(gvod){ 271 for(var x in gvod){ 272 var g = []; 273 g.push(2); 274 g.push(gvod[x][2]); 275 g.push(gvod[x][1]); 276 g.push(gvod[x][3]); 277 url.push(g); 278 } 279 } 280 return url; 281 } 282 283 //影片类型 电影 1:电影 2:电视 3:动画 4:综艺 9:其他 284 //var rid_type = {'movie':'动作片,喜剧片,爱情片,科幻片,恐怖片,剧情片,战争片,其它片', 'tv':'国产剧,港台剧,欧美剧,日韩剧', 'animation':'动画片', 'variety':'综艺片'}; 285 function getRidType(cid){ 286 var rid = 9; 287 if(cid){ 288 if(rid_type.movie.indexOf(cid) > -1) rid = 1; 289 else if(rid_type.tv.indexOf(cid) > -1) rid = 2; 290 else if(rid_type.animation.indexOf(cid) > -1) rid = 3; 291 else if(rid_type.variety.indexOf(cid) > -1) rid = 4; 292 } 293 return rid; 294 } ##########NextPage########## [代码] mysql.js 01 var mysql = require('mysql'); 02 var mysql_options = { 03 host: 'localhost', 04 port: 3306, 05 user: 'root', 06 password: '', 07 database: 'test' 08 }; 09 10 // 返回连接 11 exports.conn = function(){ 12 return mysql.createConnection(mysql_options); 13 }; 14 15 // 自定义db 16 exports.conndb = function(db){ 17 if(db) mysql_options['database'] = db; 18 return mysql.createConnection(mysql_options); 19 }; 20 21 //var TEST_TABLE = 'test'; 22 23 //创建数据库 24 /*conn.query('CREATE DATABASE '+TEST_DATABASE, function(err) { 25 if (err && err.number != mysql.ERROR_DB_CREATE_EXISTS) { 26 throw err; 27 } 28 });*/ 29 30 //不指定回调函数,如果出错,则体现为客户端错误 31 //conn.query('USE '+TEST_DATABASE); 32 33 //创建表格,插入数据 34 /*conn.query( 35 'CREATE TABLE '+TEST_TABLE+ 36 '(id INT(11) AUTO_INCREMENT, '+ 37 'name VARCHAR(255), '+ 38 'PRIMARY KEY (id))' 39 ); */ 40 41 /*conn.query( 42 'INSERT INTO '+TEST_TABLE+' '+ 43 'SET name = ?', 44 ['nodejs1'] 45 ); 46 47 var query = conn.query( 48 'INSERT INTO '+TEST_TABLE+' '+ 49 'SET name = ?', 50 ['nodejs2'] 51 );*/ 52 53 //查询,并设置回调函数 54 /*conn.query( 55 'SELECT * FROM '+TEST_TABLE, 56 function selectCb(err, results, fields) { 57 if (err) { 58 throw err; 59 } 60 61 console.log(results); 62 console.log(fields); 63 conn.end(); 64 } 65 );*/ |