Here is a patch to svn lazarus. It enables {%encoding xxx} mechanism. Some
changes are not very good, but... It enables hack which allows to use
cp1251/koi8r LFM in gtk2 pseudo UTF (Hint='{%encoding=cp1251}'). That hack
works partially on win32. Anyway, it is good for translating old/win32
projects. Course, string constants in ShowMessage('.......') are bad.
diff -u -r --minimal /home/vvi/svn/lazarus/ide/main.pp /home/vvi/src/lazarus1/ide/main.pp
--- /home/vvi/svn/lazarus/ide/main.pp 2008-01-04 09:45:19.000000000 +0000
+++ /home/vvi/src/lazarus1/ide/main.pp 2008-01-10 19:24:28.000000000 +0000
@@ -11047,14 +11047,21 @@
const Filename: string; var Source, DiskEncoding, MemEncoding: string);
begin
//DebugLn(['TMainIDE.OnCodeBufferDecodeLoaded Filename=',Filename,' Encoding=',GuessEncoding(Source)]);
- //DiskEncoding:=GuessEncoding(Source);
- //MemEncoding:=EncodingUTF8;
+ DiskEncoding:=GuessEncoding(Source);
+ MemEncoding:=LazarusEncoding;
+ if DiskEncoding=MemEncoding then exit;
+ Source:=ConvertEncoding(Source,DiskEncoding,MemEncoding);
end;
procedure TMainIDE.OnCodeBufferEncodeSaving(Code: TCodeBuffer;
const Filename: string; var Source: string);
+var
+ DiskEncoding: String;
begin
-
+ DiskEncoding:=GuessEncoding(Source,true);
+ if DiskEncoding=EncodingNoTranslation then
+ exit;//This will be if no %encoding found in source
+ Source:=ConvertEncoding(Source,LazarusEncoding,DiskEncoding);
end;
procedure TMainIDE.CodeToolBossPrepareTree(Sender: TObject);
diff -u -r --minimal /home/vvi/svn/lazarus/ide/revision.inc /home/vvi/src/lazarus1/ide/revision.inc
--- /home/vvi/svn/lazarus/ide/revision.inc 2007-09-08 11:28:42.000000000 +0100
+++ /home/vvi/src/lazarus1/ide/revision.inc 2008-01-10 19:24:28.000000000 +0000
@@ -1,2 +1,2 @@
// Created by Svn2RevisionInc
-const RevisionStr = '11846';
+const RevisionStr = '13612';
diff -u -r --minimal /home/vvi/svn/lazarus/lcl/lconvencoding.pas /home/vvi/src/lazarus1/lcl/lconvencoding.pas
--- /home/vvi/svn/lazarus/lcl/lconvencoding.pas 2007-12-31 12:08:13.000000000 +0000
+++ /home/vvi/src/lazarus1/lcl/lconvencoding.pas 2008-01-10 19:24:28.000000000 +0000
@@ -31,8 +31,21 @@
const
EncodingUTF8 = 'utf8';
-
-function GuessEncoding(const s: string): string;
+ LazarusEncoding = {$ifdef MSWindows}
+ {$ifdef WindowsUnicodeSupport}
+ EncodingUTF8
+ {$else}
+ 'ANSI'
+ {$endif}
+ {$else}
+ {$ifndef LCLgtk}
+ EncodingUTF8
+ {$else}
+ 'ANSI'
+ {$endif}
+ {$endif};
+ EncodingNoTranslation = 'no_translation';
+function GuessEncoding(const s: string;OnlyForSaving:boolean=false): string;
function ConvertEncoding(const s, FromEncoding, ToEncoding: string): string;
@@ -281,12 +294,13 @@
end;
end;
-function GuessEncoding(const s: string): string;
+function GuessEncoding(const s: string;OnlyForSaving:boolean=false): string;
var
l: Integer;
p: Integer;
EndPos: LongInt;
i: LongInt;
+ k: Int64;
function CompareI(p1, p2: PChar; Count: integer): boolean;
var
@@ -316,20 +330,29 @@
Result:='';
exit;
end;
+ // try %encoding eee
+ // Placing %encoding in the beginning is a good idea, but this won't allow
+ // our "LFM-hacks". Yes, this is not so good, but it is the only reasonable
+ // way to make Lazarus really cross-platform before encoding unification
+ // Besides, file beginning can contain spaces, CR+LF,BOM or any other waste.
+ // Or just some autoinserted or beginning-requiring comments.
+ k:=pos('{'+'%encoding ',s);
- // try BOM
- if CompareI(@s[1],#$EF#$BB#$BF,3) then begin
- Result:=EncodingUTF8;
- exit;
- end;
-
- // try {%encoding eee}
- if CompareI(@s[1],'{%encoding ',11) then begin
+ if (k>0)and CompareI(@s[k],'{'+'%encoding ',11) then begin
p:=12;
- while (p<=l) and (s[p] in [' ',#9]) do inc(p);
+ while (p<=l) and (s[k+p-1] in [' ',#9]) do inc(p);
EndPos:=p;
- while (EndPos<=l) and (not (s[EndPos] in ['}',' ',#9])) do inc(EndPos);
- Result:=copy(s,p,EndPos-p);
+ while (EndPos<=l) and (not (s[k+EndPos-1] in ['}',' ',#9,#13,#10])) do inc(EndPos);
+ Result:=copy(s,k+p-1,EndPos-p);
+ exit;
+ end;
+
+ // OK, if no encoding found, we shouldn't translate anyway
+ if OnlyForSaving then begin Result:=EncodingNoTranslation;exit;end;
+
+ // try BOM. It is placed AFTER %encoding because convenience for user
+ if CompareI(@s[1],#$EF#$BB#$BF,3) then begin
+ Result:=EncodingUTF8;
exit;
end;
@@ -358,8 +381,24 @@
var AFrom,ATo:string;
SL:TStringList;
FN1,FN2:string;
+ function NormEncoding(const Enc:string):string;//Or make it separated function?
+ var
+ i: Integer;
+ begin
+ Result:=AnsiLowerCase(Enc);
+ i:=Pos('-',Result);
+ While i>0 do
+ begin
+ Delete(Result,i,1);
+ i:=Pos('-',Result);
+ end;
+ end;
begin
Result:=s;
+ if (FromEncoding=EncodingNoTranslation)or(ToEncoding=EncodingNoTranslation)
+ then exit;//Theoretically we shouldn''t be here, but this should work anyway
+ if NormEncoding(FromEncoding)=NormEncoding(ToEncoding) then exit;
+
AFrom:=LowerCase(FromEncoding);
ATo:=LowerCase(ToEncoding);
if AFrom=ATo then exit;
@@ -375,6 +414,8 @@
if AFrom='cp1251' then begin Result:=Cp1251toUTF(s);exit;end;
if AFrom='koi8-r' then begin Result:=Cp1251toUTF(Koi8rToCP1251(s));exit;end;
end;
+ if (AFrom='ansi')and(ATo=EncodingUTF8) then begin Result:=AnsiToUtf8(s);exit; end;
+ if (AFrom=EncodingUTF8)and(ATo='ansi') then begin Result:=Utf8ToAnsi(s);exit; end;
//Stupid code. Works anyway, but extra-slow
{$ifdef Unix}
DebugLn(['CPConvert NOTE: using slow iconv workaround to convert from ',AFrom,' to ',ATo]);