On Wed, 3 Mar 2010 07:24:35 +0800 Robin Hoo <[email protected]> wrote:
> Hi, Antonio > > Pls check the function I used for check UTF8 string. Hope it helpful You combine a IsText (no special characters in #0-#31) and IsUTF8 - good idea. > function IsUTF8(UnknownStr:string):boolean; Maybe better name it IsUTF8Text ? > var > i :Integer; > begin > if length(UnknownStr)=0 then exit(true); > i:=1; > while i<length(UnknownStr) do > begin > // ASCII > if (UnknownStr[i] = #$09) or > (UnknownStr[i] = #$0A) or > (UnknownStr[i] = #$0D) or > (UnknownStr[i] in [#$20..#$7E]) then #12 is a valid character too in texts (form feed). > begin > inc(i); > continue; > end; > // non-overlong 2-byte > if (UnknownStr[i] in [#$C2..#$DF]) and > (UnknownStr[i+1] in [#$80..#$BF]) then > begin > inc(i,2); > continue; > end; > // excluding overlongs > if ((UnknownStr[i]=#$E0) and > (UnknownStr[i+1] in [#$A0..#$BF]) and > (UnknownStr[i+2] in [#$80..#$BF])) > or > // straight 3-byte > (((UnknownStr[i] in [#$E1..#$EC]) or > (UnknownStr[i] = #$EE) or > (UnknownStr[i] = #$EF)) > and > (UnknownStr[i+1] in [#$80..#$BF]) and > (UnknownStr[i+2] in [#$80..#$BF])) > or > // excluding surrogates > ((UnknownStr[i]=#$ED) and > (UnknownStr[i+1] in [#$80..#$9F]) and > (UnknownStr[i+2] in [#$80..#$BF])) then > begin > inc(i,3); > continue; > end; > // planes 1-3 > if ((UnknownStr[i]=#$F0) and > (UnknownStr[i+1] in [#$90..#$BF]) and > (UnknownStr[i+2] in [#$80..#$BF]) and > (UnknownStr[i+3] in [#$80..#$BF])) > or > // planes 4-15 > ((UnknownStr[i] in [#$F1..#$F3]) and > (UnknownStr[i+1] in [#$80..#$BF]) and > (UnknownStr[i+2] in [#$80..#$BF]) and > (UnknownStr[i+3] in [#$80..#$BF])) > or > // plane 16 > ((UnknownStr[i]=#$F4) and > (UnknownStr[i+1] in [#$80..#$8F]) and > (UnknownStr[i+2] in [#$80..#$BF]) and > (UnknownStr[i+3] in [#$80..#$BF])) then > begin > inc(i,4); > continue; > end; > exit(false); > end; > exit(true); > end; Mattias -- _______________________________________________ Lazarus mailing list [email protected] http://lists.lazarus.freepascal.org/mailman/listinfo/lazarus
