最近很头疼 PHP写了个下载 一天12小时看护 经常执行到一半出错 只能区区下1000个文件 而这1000个文件你也不能确定是不是都OK的

然后决定还是用Node写一写  之前经历了 断网 + 内存爆的问题 现在终于成功了!

我使用的是centos系统 2G内存 至于用windows处理也可以 不过windows就是个坑 尽量还是别用了

由于nodejs是异步操作 因此在操作大量数据的时候 必须使用队列

队列示例:

var array = [1,2,3,4];

function check(){
if(array.length > 0){
    var url = array.pop();
load(url);
}
}

function load(url)
{
console.log(url);
check();

}

load();

return;

如果需要做一个sleep的效果 示例

var array = [1,2,3,4];

function sleep(milliSeconds){
    var startTime =new Date().getTime();
    while(new Date().getTime()< startTime + milliSeconds);
  }

function check(){
if(array.length > 0){
    var url = array.pop();
load(url);
}
}

function load(url)
{
console.log(url);
console.log('waiting');
  sleep(10000); //暂停10s
check();

}

load();

我的完整代码是 取出数据库3000个数据 对每个数据指定文件夹下载4个文件 也就是一次性下载12000个文件 下载500个文件夹 2000个文件只用了20分钟 2G内存实际监控内存使用率不超过 15% 因为是异步加载 为了省事 我就先不关闭数据库了 但一次下太多 有时候网络不好 可能会报错 因此 每次可以少运行一点 分多次运行 用命令行命令控制limit 也是很不错的 (这里就不改进了)

"use strict";
var http = require("http");
var fs = require("fs");
// 加载编码转换模块  
var iconv = require('iconv-lite');   
/*
var server = http.createServer(function(req, res){
res.writeHead(200, {'Content-type' : 'text/html'});
 res.write('<h1>Node.js</h1>');
 res.end('<p>Hello World</p>');
}).listen(50082);
console.log("http start");*/

//连接数据库
var mysql = require('mysql');
var connection = mysql.createConnection({
    host: 'localhost',
    user: 'root',
    password: '',
    database:'gftest'
});

var lee = 1;



/*
update giphy_posts set nodea = '' where id < 2001;
update giphy_posts set nodestate = 0 where id < 2001;
 http.get('http://media0.giphy.com/media/zXHZWGLWNQkrS/giphy.mp4', function(res){

    console.log(res.statusCode +.res.headers['content-type']);


    //writeFile('a.txt',res);
    
 })
return;*/

connection.connect();
//查询

connection.query('select * from `giphy_posts` limit 0,3000', function(err, rows, fields) {
    if (err) throw err;


    function check(){
            if(rows.length > 0){

                var row = rows.pop();
                load(row);
            }
        }
    
    function log(name,data){
        //var data = 'hello world';
        var date = new Date();
        var year = date.getFullYear();
        var month = date.getMonth()+1;
        var day = date.getDate();
        var hour = date.getHours();
        var minute = date.getMinutes();
        var second = date.getSeconds(); 
        var data = year+'.'+month+'.'+day+' '+hour+':'+minute+':'+second + ' - ' + data + '\n';
        fs.appendFile('./log/'+name+'.txt',data,'utf8',function(err){  
            if(err)  
            {  
                console.log('lee log fun 函数出错'+err);  
            }else{
                console.log(data);
            }  
        });  
    }
    
    function load(row){
            var url = new Array();
            var k =0; //用于记录执行次数 触发check()
            url[0] = "http://media0.giphy.com/media/"+row.dataid+"/200w_s.gif";
            url[1]= "http://media0.giphy.com/media/"+row.dataid+"/giphy.gif";
            url[2]= "http://media0.giphy.com/media/"+row.dataid+"/giphy.mp4";
            url[3]= "http://media0.giphy.com/media/"+row.dataid+"/giphy_s.gif";

            if(row.nodestate > 4 || row.nodestate == 4){
                //文件下载状态超过4
                log('sqlerr','文件下载状态超过4,取消下载 不操作数据库 - 执行ID'+row.id);
                check(); //执行函数
                return;

            }else if(row.nodestate > 0){
                log('sqlerr','文件超过1,取消下载 不操作数据库 - 执行ID'+row.id);
                check(); //执行函数
                return;
            }
           // return;
            url.forEach(function (v){
               // console.log(v);
                http.get(v, function(res){
                    if(res.statusCode==200){
                        
                        var imgData = "";

                        res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开
                        res.on("data", function(chunk){
                            imgData+=chunk;
                        });

                        res.on("end", function(){
                            //计算文件名
                            var patharr = v.split('/');
                            var filename = patharr[patharr.length-1]; //计算出文件名
                            var fileendarr = patharr[patharr.length-1].split('.');
                            var fileend = fileendarr[fileendarr.length-1]; //计算出后缀

                            /*console.log(fileend);
                            return;*/
                            //创建目录
                            if(!fs.existsSync('./a/'+row.dataid)){
                                var creats = fs.mkdirSync('./a/'+row.dataid, 777);
                            }
                            

                            fs.writeFile("./a/"+row.dataid + '/' + Math.random() +"."+fileend, imgData, "binary", function(err){
                                if(err){
                                    log('filerun',"down fail 下载失败 - "+row.dataid+"."+fileend+' - query id ='.row.id + '\n' +err);
                                    if (err) throw err;
                                    
                                }else{
                                    //更新数据库把文件名输入进去
                                    connection.query("update giphy_posts set nodea=concat(nodea,'"+fileend+",') where id="+row.id, function(err, result) {
                                        if (err) throw err;
                                         connection.query("update giphy_posts set nodestate=nodestate+1 where id="+row.id, function(err, result) {
                                        if (err) throw err;
                                       // console.log('update '+row.id+'OK');
                                        //console.log(result);
                                        log('sqlrun','update nodestate +int '+row.id+' -OK');
                                        //console.log('\n');
                                       // console.log("down success - "+row.dataid+"."+fileend+' - query id ='+row.id);
                                        
                                        });

                                        log('sqlrun','update add nodea file +name '+row.id+' -OK');
                                        //console.log(result);
                                        console.log('\n');
                                        log('filerun',"down success - "+row.dataid+"."+fileend+' - query id ='+row.id);

                                        if(++k >3){ //大于3执行到最后一次执行chuck
                                            check();
                                        }
                                        
                                    });

                                     

                                    
                                }

                                
                            });
                        });
                    }else{
                         if(++k >3){ //大于3执行到最后一次执行chuck
                             check();
                             }
                        log('filenot200','ID'+row.id+'文件返回状态码不是200下载失败 - '+v);
                    }
                    


                });



                
            });

            //check(); //完整下载完再check执行 内存爆表
            

            
            
        }

        check(); //初次执行队列
    

  //  rows.forEach(function(val){
      //  console.log(val.id + ' - 查询结果为: ', val.dataid);

        

            
        
/*
 var url = "http://media0.giphy.com/media/"+val.dataid+"/giphy.mp4";
    http.get(url, function(res){
        var imgData = "";

        res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开


        res.on("data", function(chunk){
            imgData+=chunk;
        });

        res.on("end", function(){
            fs.writeFile("./a/"+val.dataid+".mp4", imgData, "binary", function(err){
                if(err){
                    console.log("down fail - "+val.dataid+'query id ='.val.id);
                }
                console.log("down success - "+val.dataid+'query id ='+val.id);
            });
        });
});*/
   // });


});
//关闭连接
//connection.end();
return;


var sitearr =new Array(); 
 for(var i =0;i<10;i++){
     sitearr[i]=i;
 }
 //var sitearr = [1,2,3];



sitearr.forEach(function (fname){
    var url = "http://daysvr.com/static/lee/images/logo.png";
    http.get(url, function(res){
        var imgData = "";

        res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开


        res.on("data", function(chunk){
            imgData+=chunk;
        });

        res.on("end", function(){
            fs.writeFile("./a/"+fname+".png", imgData, "binary", function(err){
                if(err){
                    console.log("down fail - "+fname);
                }
                console.log("down success - "+fname);
            });
        });
});

    


    });

qq%e6%88%aa%e5%9b%be20161127142524