FWIW ez-streams lets you handle it with a single read.parse.reduce chain:
var ez = require('ez-streams');
function analyze(filename, callback) {
ez.devices.file.text.reader(filename)
.transform(ez.transforms.lines())
.reduce(callback, function(cb, result, line) {
result[line.length > 10 ? 'longLines' : 'shortLines'].push(line);
cb(null, result);
}, { shortLines: [], longLines: [] });
}
Bruno
On Sunday, April 26, 2015 at 2:09:36 AM UTC+2, Marco Ippolito wrote:
>
> Hi all,
> this is my code:
>
> #!/usr/bin/env node
>
> var path = require('path');
> var fs = require('fs');
>
> var util = require('util');
> var stream = require('stream');
> var es = require('event-stream');
> var normalized_path = path.normalize(process.argv[2])
> var unified_unique_urls_file_path = path.join(process.cwd(),
> normalized_path)
> var unified_unique_urls_file_name = normalized_path + "_unified_unique.txt"
> var unified_unique_urls_file = unified_unique_urls_file_path + "/" +
> unified_unique_urls_file_name
> //console.log(unified_unique_urls_file)
> //
> http://stackoverflow.com/questions/16010915/parsing-huge-logfiles-in-node-js-read-in-line-by-line
>
> var lineNr = 1
> var rs = fs.createReadStream(unified_unique_urls_file)
>
> rs.pipe(es.split()) // split stream to break on newlines
> rs.pipe(es.map(function(line) { // es.map turns this async function into a
> stream
> // pause the readstream
> rs.pause();
> lineNr += 1;
> (function() {
> // process line here and call s.resume() when ready
> callback(line)
> //logMemoryUsage(lineNr);
> // resume the readstream
> rs.resume();
> })();
> })
> .on('error', function() {
> console.log('Error while reading file.');
> })
> .on('end', function() {
> console.log('Read entire file.');
> })
> );
>
> function callback(line) {
> var lineS = line.toString()
> var lengthy_lines = [];
> var shorty_lines = [];
> //console.log("Ecco i dati in input: ", lineS);
> var lengthy = lineS.length;
> console.log("lenght of the line: ", lengthy);
> if (lengthy > 10)
> lengthy_lines.push(lineS);
> else
> shorty_lines.push(lineS);
>
> console.log("lenghty lines are: ", lengthy_lines)
> console.log("short lines are: ", shorty_lines)
> }
>
> and the objective is to read the txt file (located in a directory)
> line-by-line and get, for each line, the length of the line.
> So...the objective is to read in a stream mode the file and process it
> line-by-line.
>
> but the output is:
> time ./stream_callback.js example_1
> lenght of the line: 415
> lenghty lines are: [ 'Prova prova prova\nOur seat and site license
> programs allow schools, organizations and corporations to purchase digital
> access for multiple users at a group discount\nLa recaudación por las
> películas en ‘streaming’ se multiplicó por 10 en Europa en cinco años\nLe
> fantastiche immagini del telescopio spaziale Hubble: in un quarto di secolo
> ha cambiato il nostro punto di vista sul galassie, stelle, pianeti e
> nebulose\n' ]
> short lines are: []
>
> instead of getting the lenght of line, it gives the total length...
> Any ideas?
>
> Looking forward to your kind help.
> Marco
>
--
Job board: http://jobs.nodejs.org/
New group rules:
https://gist.github.com/othiym23/9886289#file-moderation-policy-md
Old group rules:
https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
---
You received this message because you are subscribed to the Google Groups
"nodejs" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/nodejs/53fba3d9-51b3-4603-9609-4366774a4fc3%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.