I tried to use Node.js to process an apache log file of 500MB, converting 
its syntax from

    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26

to

    ip.ip.ip.ip - - 02/Aug/2012:05:01:17 GET /path/of/access/ HTTP/1.1 302 
26

, then write to another text file.

For better memory control and performance, I used `fs.createReadStream` and 
`fs.createWriteStream`, but only managed to write the first line into 
`output.txt`, because the script end with an error:

`{ [Error: EBADF, write] errno: 9, code: 'EBADF' }`

Am I doing anything wrong?

Here I posted some info that may help debug.

Head of `input.txt`:

    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26
    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26
    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26
    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26
    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26
    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26
    ip.ip.ip.ip - - [02/Aug/2012:05:01:18 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26

Content of `output.txt`:

    ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ 
HTTP/1.1" 302 26


The whole script:

    var fs = require('fs');
    var data ='';
    var n=0;                    //For line control
    var r = fs.createReadStream('./input.txt',{
        encoding: 'ascii',
        start:0,
        // end: 100000,
    });
    var w = fs.createWriteStream('./output.txt',{
        encoding:'ascii'
    });
    function put(line){            //write into w;
        ++n;
        w.write(line+'\n');
    }
    function end(){
        r.destroy();
        w.destroy();
    }
    function onData(chunk){
        var hasNewline = chunk.indexOf('\n')!==-1;
        if(hasNewline){
            var arr = chunk.split('\n');
            var first = arr.shift();
            var last = arr.pop();
            data+=first;
            put(data);            //write a complete line
            arr.forEach(function(line){
                put(line);        //write a complete line
            });
            data=last;
        }else{
            data+=chunk;
        }
        if(n>100){
            end();
        }
    }
    function onErr(e){
        console.log(e);
    }
    
    r.addListener( "data", onData);
    r.addListener( "end", end);
    r.addListener('error',onErr);
    w.addListener('error',onErr);

-- 
Job Board: http://jobs.nodejs.org/
Posting guidelines: 
https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
You received this message because you are subscribed to the Google
Groups "nodejs" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/nodejs?hl=en?hl=en

Reply via email to