On Monday, 24 March 2014 at 04:37:23 UTC, Michel Fortin wrote:
dchar front(char[] s)
{
 if (s[0] < 0b1000000)
   return s[0]; // ASCII
 auto indicator = (s[0] >> 5) & 0b11;
 auto tailLength = indicator ? indicator : 1;

 dchar result = s[0] & (0b00111111 >> tailLength);
 foreach (i; 0..tailLength)
     result = (result << 6) | (s[1+i] & 0b00111111);
 return result;
}

(Disclaimer: not tested, but I did check that all the expected code paths are present in the assembly this time.)

0b1000000 is missing a zero: 0b1000_0000
Fixing that, I still get a range violation from "s[1+i]".

----- Test program -----
void main()
{
        foreach (ubyte b0; 0..0x80)
        {
                char[] s = [b0];
                assert(front(s)==front2(s));
        }       writeln("Single byte done");
        foreach (ubyte b0; 0..0x40)
        foreach (ubyte b1; 0..0x20)
        {
                char[] s = [0xC0|b1, 0x80|b0];
                assert(front(s)==front2(s));
        }       writeln("Double byte done");
        foreach (ubyte b0; 0..0x40)
        foreach (ubyte b1; 0..0x40)
        foreach (ubyte b2; 0..0x10)
        {
                char[] s = [0xE0|b2, 0x80|b1, 0x80|b0];
                assert(front(s)==front2(s));
        }       writeln("Triple byte done");
        foreach (ubyte b0; 0..0x40)
        foreach (ubyte b1; 0..0x40)
        foreach (ubyte b2; 0..0x40)
        foreach (ubyte b3; 0..0x08)
        {
                char[] s = [0xF0|b3, 0x80|b2, 0x80|b1, 0x80|b0];
                assert(front(s)==front2(s));
        }
}

Reply via email to