On Thursday 18 April 2013 11:41:24 Graeme Geldenhuys wrote:
> On 2013-04-18 09:31, Martin Schreiber wrote:
> > It counts the number of a known constant Russian character in a random
> > string. In utf-16 and UCS4 this is an operation with numbers and string
> > index,
>
> Does that Russian character have a surrogate pair?
No character of the BMP (Basic Multilingual Plane) is a surrogate pair. The
Cyrillic alphabeth is in range U+0400–U+04FF, the searched character is
U+042F. BTW even modern Chinese is in BMP.
The test program is is probably this:
"
{$H+,R-}
uses SysUtils, Windows;//strings;
const
TRIES = 1;
CAPACITY = 1000000;
var
ansi: AnsiString;
utf8: Utf8String;
i, j: LongWord;
t1, t2: Int64;
p: PChar;
utf16: UnicodeString;
utf32: UCS4String;
TestCount: LongWord;
c8: AnsiString;
c16: UCS2Char;
c32: UCS4Char;
begin
//Randomize;
SetLength(ansi, CAPACITY);
for j := 1 to CAPACITY do
ansi[j] := Char(32 + Random(256 - 32));
utf8 := AnsiToUtf8(ansi);
c8 := #$D0#$AF; // 'Я' in utf8
{ UTF-8 test }
QueryPerformanceCounter(t1);
TestCount := 0;
for i := 1 to TRIES do begin
p := @utf8[1];
while true do begin
p := StrPos(p, @c8[1]);
if p = nil then break;
Inc(p); Inc(TestCount);
end;
end;
QueryPerformanceCounter(t2);
WriteLn('UTF-8: ', TestCount, ' entries in ', t2 - t1, ' ticks.');
{ UTF-16 test }
utf16 := UTF8Decode(UTF8);//**
QueryPerformanceCounter(t1);
TestCount := 0;
for i := 1 to TRIES do begin
for j := 1 to Length(utf16) do begin
c16 := utf16[j];
if c16 = #$042F then Inc(TestCount);
end;
end;
QueryPerformanceCounter(t2);
WriteLn('UTF-16: ', TestCount, ' entries in ', t2 - t1, ' ticks.');
{ UTF-32 test }
utf32 := UnicodeStringToUCS4String(utf16);//**
QueryPerformanceCounter(t1);
TestCount := 0;
for i := 1 to TRIES do begin
for j := 0 to Length(utf32) - 1 do begin
c32 := utf32[j];
if c32 = $042F then Inc(TestCount);
end;
end;
QueryPerformanceCounter(t2);
WriteLn('UTF-32: ', TestCount, ' entries in ', t2 - t1, ' ticks.');
end.
"
Martin
------------------------------------------------------------------------------
Precog is a next-generation analytics platform capable of advanced
analytics on semi-structured data. The platform includes APIs for building
apps and a phenomenal toolset for data science. Developers can use
our toolset for easy data analysis & visualization. Get a free account!
http://www2.precog.com/precogplatform/slashdotnewsletter
_______________________________________________
mseide-msegui-talk mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mseide-msegui-talk