On Sunday, 30 September 2018 at 03:19:11 UTC, Adam D. Ruppe wrote:
On Saturday, 29 September 2018 at 23:46:26 UTC, helxi wrote:
Thanks. Would you say
https://dlang.org/library/std/encoding/get_bom.html is useful
in this context?
Eh, not really, most text files will not have one.
Hi,
I tried out https://dlang.org/library/std/utf/validate.html
before manually checking for encoding myself so I ended up with
the code below. I was fairly surprised that "*.o" (object) files
are UTF encoded! Is it normal?
import std.stdio : File, lines, stdout;
void panic(in string message, int exitCode = 1) {
import core.stdc.stdlib : exit;
import std.stdio : stderr, writeln;
stderr.writeln(message);
exit(exitCode);
}
void writeFunc(ulong occerenceNumber, ulong lineNumber, in ref
string fileName,
in ref string line, File ofile = stdout) {
import std.stdio : writef;
ofile.writef("%s: L:%s: F:\"%s\":\n%s\n", occerenceNumber,
lineNumber, fileName, line);
}
void treverseDirectories(in string path, in string term)
in {
import std.file : isDir;
if (!isDir(path))
panic("Cannot access directory: " ~ path);
}
do {
import std.file : dirEntries, SpanMode;
ulong occerenceNumber, filesChecked, filesIgnored; // = 0;
File currentFile;
foreach (string fileName; dirEntries(path, SpanMode.breadth)) {
try {
currentFile = File(fileName, "r");
++filesChecked;
foreach (ulong lineNumber, string currentLine;
lines(currentFile)) {
if (lineNumber == 0) {
// check if the file is encoded with
proper UTF
// if Line 0 is not UTF encoded, move
on to the next file
// I hope the compiler unrolls this if
condition
import std.utf : validate;
validate(currentLine);
// throws exception if
the file is not UTF encoded
}
import std.algorithm : canFind;
if (canFind(currentLine, term)) {
writeFunc(++occerenceNumber, lineNumber, fileName,
currentLine);
}
}
}
catch (Exception e) {
filesIgnored++;
}
}
//summarize
import std.stdio : writefln;
writefln("Total match found:\t%s\nTotal files
checked:\t%s\nTotal files ignored:\t%s\n",
occerenceNumber, filesChecked, filesIgnored);
}
void main(string[] args) {
import std.getopt : getopt;
string term, directory;
getopt(args, "term|t", &term, "directory|d", &directory);
if (!directory) {
// if directory not specified, start working with the current
directory
import std.file : getcwd;
directory = getcwd();
}
if (!term)
panic("Term not specified.");
treverseDirectories(directory, term);
}
/*
Output: https://pastebin.com/PZ8nCaYf