Read the BOM ?
module main;
import std.stdio;
enum Encoding
{
UTF7,
UTF8,
UTF32,
Unicode,
BigEndianUnicode,
ASCII
};
Encoding GetFileEncoding(string fileName)
{
import std.file;
auto bom = cast(ubyte[]) read(fileName, 4);
if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76)
return Encoding.UTF7;
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf)
return Encoding.UTF8;
if (bom[0] == 0xff && bom[1] == 0xfe)
return Encoding.Unicode; //UTF-16LE
if (bom[0] == 0xfe && bom[1] == 0xff)
return Encoding.BigEndianUnicode; //UTF-16BE
if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] ==
0xff)
return Encoding.UTF32;
return Encoding.ASCII;
}
void main(string[] args)
{
if(GetFileEncoding("test.txt") == Encoding.UTF8)
writeln("The file is UTF8");
else
writeln("File is not UTF8 :(");
}
On Tuesday, 22 July 2014 at 09:50:00 UTC, Sam Hu wrote:
Greetings!
As subjected,how can I know whether a file is in UTF8 encoding
or ansi?
Thanks for the help in advance.
Regards,
Sam