Benchmarking included 3*10^6 calls to Add and Find methods
with the arguments of various lengths.
Average string length 5 characters:
ShortString: 1.15 s
AnsiString: 1.56 s
Average string length 45 characters:
ShortString: 12.0 s
AnsiString: 3.2 s
I agree that the first case is more relevant for the compiler,
but still you can see that ShortStrings are clearly not always faster.
Sure, see attachment.
Note that I made shortstring->ansistring text replacament
followed by minimal fixed to get working Add and Find methods.
Other methods may be broken as a result.
I've done some some optimizations of tFPHashList and results are (for
10^6 calls average length 30 characters):
from svn: 2.234s
uhla : 2.297
my : 2.044
that is 10% shorter (or even better, because string generation consume
about 1.4s)
full source in attachment (should I prepare it different?)
changes are made in: FPHash and some minor in StrExpand,AddStr, Add
Second: when I review assembler list I've notice some strange lines (all
optimizations are enabled):
# [124] dec(ii);
movl %esi,%eax
decl %eax
movl %eax,%esi
.Lj16:
movl %esi,%eax <- this is not necessary
testl %eax,%eax
jg .Lj15
In source this is
while ii>0 do begin
..
dec(ii)
end;
Can be added some optimizations to avoid this doubled move.
--
Darek
unit uhlm;
interface
{$mode objfpc}{$h+}{$r+}
uses
contnrs,sysutils,cutils, types;
const
SListIndexError = 'List index exceeds bounds (%d)';
SListCapacityError = 'The maximum list capacity is reached (%d)';
SListCapacityPower2Error = 'The capacity has to be a power of 2, but is set
to %d';
SListCountError = 'List count too large (%d)';
type
EListError = class(Exception);
type
THashItem=record
HashValue : LongWord;
StrIndex : Integer;
NextIndex : Integer;
Data : Pointer;
end;
PHashItem=^THashItem;
const
MaxHashListSize = Maxint div 16;
MaxHashStrSize = Maxint;
MaxHashTableSize = Maxint div 4;
MaxItemsPerHash = 3;
type
PHashItemList = ^THashItemList;
THashItemList = array[0..MaxHashListSize - 1] of THashItem;
PHashTable = ^THashTable;
THashTable = array[0..MaxHashTableSize - 1] of Integer;
TFPHashListM = class(TObject)
private
{ ItemList }
ffcc : integer;
FHashList : PHashItemList;
FCount,
FCapacity : Integer;
FCapacityMask: LongWord;
{ Hash }
FHashTable : PHashTable;
FHashCapacity : Integer;
{ Strings }
FStrs : PChar;
FStrCount,
FStrCapacity : Integer;
function InternalFind(AHash:LongWord;const AName:shortstring;out
PrevIndex:Integer):Integer;
protected
function Get(Index: Integer): Pointer;
procedure Put(Index: Integer; Item: Pointer);
procedure SetCapacity(NewCapacity: Integer);
procedure SetCount(NewCount: Integer);
Procedure RaiseIndexError(Index : Integer);
function AddStr(const s:shortstring): Integer;
procedure AddToHashTable(Index: Integer);inline;
procedure StrExpand;
procedure SetStrCapacity(NewCapacity: Integer);
procedure SetHashCapacity(NewCapacity: Integer);
procedure ReHash;
public
constructor Create;
destructor Destroy; override;
function Add(const AName:shortstring;Item: Pointer): Integer;
procedure Clear;
function NameOfIndex(Index: Integer): ShortString;
function HashOfIndex(Index: Integer): LongWord;
function GetNextCollision(Index: Integer): Integer;
procedure Delete(Index: Integer);
class procedure Error(const Msg: string; Data: PtrInt);
function Expand: TFPHashListM;
function Extract(item: Pointer): Pointer;
function IndexOf(Item: Pointer): Integer;
function Find(const AName:shortstring): Pointer;
function FindIndexOf(const AName:shortstring): Integer;
function FindWithHash(const AName:shortstring;AHash:LongWord): Pointer;
function Rename(const AOldName,ANewName:shortstring): Integer;
function Remove(Item: Pointer): Integer;
procedure Pack;
procedure ShowStatistics;
procedure ForEachCall(proc2call:TListCallback;arg:pointer);
procedure ForEachCall(proc2call:TListStaticCallback;arg:pointer);
property Capacity: Integer read FCapacity write SetCapacity;
property Count: Integer read FCount write SetCount;
property Items[Index: Integer]: Pointer read Get write Put; default;
property List: PHashItemList read FHashList;
property Strs: PChar read FStrs;
end;
implementation
{*****************************************************************************
TFPHashListM
*****************************************************************************}
function FPHash(const s:shortstring):LongWord;
Var
ii,i : integer;
p : pchar;
pw : ^longword;
begin
{$ifopt Q+}
{$define overflowon}
{$Q-}
{$endif}
pw:=...@s[0];
i:=length(s);
ii:=i shr 2;
i:=i and 3;
result:=0;
while ii>0 do begin
result:=LongWord(result *8010817 ) xor (Pw^);
inc(pw);
dec(ii);
end;
p:=pointer(pw);
while i>0 do begin
result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor
LongWord(p^);
dec(i);
inc(p);
end;
{$ifdef overflowon}
{$Q+}
{$undef overflowon}
{$endif}
end;
function FPHash(P: PChar; Len: Integer): LongWord;
Var
pmax : pchar;
begin
{$ifopt Q+}
{$define overflowon}
{$Q-}
{$endif}
result:=0;
pmax:=p+len;
while (p<pmax) do
begin
result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor
LongWord(P^);
inc(p);
end;
{$ifdef overflowon}
{$Q+}
{$undef overflowon}
{$endif}
end;
procedure TFPHashListM.RaiseIndexError(Index : Integer);
begin
Error(SListIndexError, Index);
end;
function TFPHashListM.Get(Index: Integer): Pointer;
begin
If (Index < 0) or (Index >= FCount) then
RaiseIndexError(Index);
Result:=FHashList^[Index].Data;
end;
procedure TFPHashListM.Put(Index: Integer; Item: Pointer);
begin
if (Index < 0) or (Index >= FCount) then
RaiseIndexError(Index);
FHashList^[Index].Data:=Item;
end;
function TFPHashListM.NameOfIndex(Index: Integer): shortstring;
begin
If (Index < 0) or (Index >= FCount) then
RaiseIndexError(Index);
with FHashList^[Index] do
begin
if StrIndex>=0 then
Result:=PShortString(@FStrs[StrIndex])^
else
Result:='';
end;
end;
function TFPHashListM.HashOfIndex(Index: Integer): LongWord;
begin
If (Index < 0) or (Index >= FCount) then
RaiseIndexError(Index);
Result:=FHashList^[Index].HashValue;
end;
function TFPHashListM.GetNextCollision(Index: Integer): Integer;
begin
Result:=-1;
if ((Index > -1) and (Index < FCount)) then
Result:=FHashList^[Index].NextIndex;
end;
function TFPHashListM.Extract(item: Pointer): Pointer;
var
i : Integer;
begin
result := nil;
i := IndexOf(item);
if i >= 0 then
begin
Result := item;
Delete(i);
end;
end;
procedure TFPHashListM.SetCapacity(NewCapacity: Integer);
var
power: longint;
begin
{ use a power of two to be able to quickly calculate the hash table index }
if NewCapacity <> 0 then
NewCapacity := nextpowerof2((NewCapacity+(MaxItemsPerHash-1)) div
MaxItemsPerHash, power) * MaxItemsPerHash;
if (NewCapacity < FCount) or (NewCapacity > MaxHashListSize) then
Error (SListCapacityError, NewCapacity);
if NewCapacity = FCapacity then
exit;
ReallocMem(FHashList, NewCapacity*SizeOf(THashItem));
FCapacity := NewCapacity;
{ Maybe expand hash also }
if FCapacity>FHashCapacity*MaxItemsPerHash then
SetHashCapacity(FCapacity div MaxItemsPerHash);
end;
procedure TFPHashListM.SetCount(NewCount: Integer);
begin
if (NewCount < 0) or (NewCount > MaxHashListSize)then
Error(SListCountError, NewCount);
If NewCount > FCount then
begin
If NewCount > FCapacity then
SetCapacity(NewCount);
If FCount < NewCount then
{ FCapacity is NewCount rounded up to the next power of 2 }
FillChar(FHashList^[FCount], (FCapacity-FCount) div Sizeof(THashItem),
0);
end;
FCount := Newcount;
end;
procedure TFPHashListM.SetStrCapacity(NewCapacity: Integer);
begin
If (NewCapacity < FStrCount) or (NewCapacity > MaxHashStrSize) then
Error (SListCapacityError, NewCapacity);
if NewCapacity = FStrCapacity then
exit;
ReallocMem(FStrs, NewCapacity);
FStrCapacity := NewCapacity;
end;
procedure TFPHashListM.SetHashCapacity(NewCapacity: Integer);
var
power: longint;
begin
If (NewCapacity < 1) then
Error (SListCapacityError, NewCapacity);
if FHashCapacity=NewCapacity then
exit;
if (NewCapacity<>0) and
not ispowerof2(NewCapacity,power) then
Error(SListCapacityPower2Error, NewCapacity);
FHashCapacity:=NewCapacity;
ReallocMem(FHashTable, FHashCapacity*sizeof(Integer));
FCapacityMask:=(1 shl power)-1;
ReHash;
end;
procedure TFPHashListM.ReHash;
var
i : Integer;
begin
FillDword(FHashTable^,FHashCapacity,LongWord(-1));
For i:=0 To FCount-1 Do
AddToHashTable(i);
end;
constructor TFPHashListM.Create;
begin
SetHashCapacity(1024);
end;
destructor TFPHashListM.Destroy;
begin
Clear;
if assigned(FHashTable) then
FreeMem(FHashTable);
inherited Destroy;
end;
function TFPHashListM.AddStr(const s:shortstring): Integer;
var
Len : Integer;
begin
len:=length(s)+1;
result:=FstrCount;
inc(FStrCount,Len);
if FStrCount >= FStrCapacity then
StrExpand;
System.Move(s[0],FStrs[result],Len);
end;
procedure TFPHashListM.AddToHashTable(Index: Integer);
var
HashIndex : Integer;
begin
with FHashList^[Index] do
begin
if not assigned(Data) then
exit;
HashIndex:=HashValue and FCapacityMask;
NextIndex:=FHashTable^[HashIndex];
FHashTable^[HashIndex]:=Index;
end;
end;
function TFPHashListM.Add(const AName:shortstring;Item: Pointer): Integer;
begin
if FCount = FCapacity then
Expand;
with FHashList^[FCount] do
begin
Data:=Item;
StrIndex:=AddStr(AName);
HashValue:=FPHash(AName);
end;
AddToHashTable(FCount);
Result := FCount;
inc(FCount);
end;
procedure TFPHashListM.Clear;
begin
if Assigned(FHashList) then
begin
FCount:=0;
SetCapacity(0);
FHashList := nil;
end;
SetHashCapacity(1);
FHashTable^[0]:=-1; // sethashcapacity does not always call rehash
if Assigned(FStrs) then
begin
FStrCount:=0;
SetStrCapacity(0);
FStrs := nil;
end;
end;
procedure TFPHashListM.Delete(Index: Integer);
begin
If (Index<0) or (Index>=FCount) then
Error (SListIndexError, Index);
{ Remove from HashList }
dec(FCount);
System.Move (FHashList^[Index+1], FHashList^[Index], (FCount - Index) *
Sizeof(THashItem));
{ All indexes are updated, we need to build the hashtable again }
Rehash;
{ Shrink the list if appropriate }
if (FCapacity > 256) and (FCount < FCapacity shr 2) then
begin
FCapacity := FCapacity shr 1;
ReallocMem(FHashList, Sizeof(THashItem) * FCapacity);
end;
end;
function TFPHashListM.Remove(Item: Pointer): Integer;
begin
Result := IndexOf(Item);
If Result <> -1 then
Self.Delete(Result);
end;
class procedure TFPHashListM.Error(const Msg: string; Data: PtrInt);
begin
Raise EListError.CreateFmt(Msg,[Data]) at get_caller_addr(get_frame);
end;
function TFPHashListM.Expand: TFPHashListM;
var
IncSize : Longint;
begin
Result := Self;
if FCount < FCapacity then
exit;
IncSize := sizeof(ptrint)*2;
SetCapacity(FCapacity + IncSize);
end;
procedure TFPHashListM.StrExpand;
var
IncSize : Longint;
begin
// if FStrCount+MinIncSize < FStrCapacity then
// exit;
IncSize := 1024*16;
// if FStrCapacity > 255 then
Inc(IncSize, FStrCapacity shr 2 );
SetStrCapacity(FStrCapacity + IncSize );
end;
function TFPHashListM.IndexOf(Item: Pointer): Integer;
var
psrc : PHashItem;
Index : integer;
begin
Result:=-1;
psrc:=...@fhashlist^[0];
For Index:=0 To FCount-1 Do
begin
if psrc^.Data=Item then
begin
Result:=Index;
exit;
end;
inc(psrc);
end;
end;
function TFPHashListM.InternalFind(AHash:LongWord;const AName:shortstring;out
PrevIndex:Integer):Integer;
//var
// HashIndex : Integer;
begin
Result:=FHashTable^[AHash and FCapacityMask];
PrevIndex:=-1;
while Result<>-1 do
begin
with FHashList^[Result] do
begin
if (HashValue=AHash) and assigned(Data) and
(AName=PShortString(@FStrs[StrIndex])^) then
exit;
inc(ffcc);
PrevIndex:=Result;
Result:=NextIndex;
end;
end;
end;
function TFPHashListM.Find(const AName:shortstring): Pointer;
var
Index,
PrevIndex : Integer;
begin
prefetch(AName);
Result:=nil;
Index:=InternalFind(FPHash(AName),AName,PrevIndex);
if Index=-1 then
exit;
Result:=FHashList^[Index].Data;
end;
function TFPHashListM.FindIndexOf(const AName:shortstring): Integer;
var
PrevIndex : Integer;
begin
Result:=InternalFind(FPHash(AName),AName,PrevIndex);
end;
function TFPHashListM.FindWithHash(const AName:shortstring;AHash:LongWord):
Pointer;
var
Index,
PrevIndex : Integer;
begin
Result:=nil;
Index:=InternalFind(AHash,AName,PrevIndex);
if Index=-1 then
exit;
Result:=FHashList^[Index].Data;
end;
function TFPHashListM.Rename(const AOldName,ANewName:shortstring): Integer;
var
PrevIndex,
Index : Integer;
OldHash : LongWord;
begin
Result:=-1;
OldHash:=FPHash(AOldName);
Index:=InternalFind(OldHash,AOldName,PrevIndex);
if Index=-1 then
exit;
{ Remove from current Hash }
if PrevIndex<>-1 then
FHashList^[PrevIndex].NextIndex:=FHashList^[Index].NextIndex
else
FHashTable^[OldHash and FCapacityMask]:=FHashList^[Index].NextIndex;
{ Set new name and hash }
with FHashList^[Index] do
begin
HashValue:=FPHash(ANewName);
StrIndex:=AddStr(ANewName);
end;
{ Insert back in Hash }
AddToHashTable(Index);
{ Return Index }
Result:=Index;
end;
procedure TFPHashListM.Pack;
var
NewCount,
i : integer;
pdest,
psrc : PHashItem;
begin
NewCount:=0;
psrc:=...@fhashlist^[0];
pdest:=psrc;
For I:=0 To FCount-1 Do
begin
if assigned(psrc^.Data) then
begin
pdest^:=psrc^;
inc(pdest);
inc(NewCount);
end;
inc(psrc);
end;
FCount:=NewCount;
{ We need to ReHash to update the IndexNext }
ReHash;
{ Release over-capacity }
SetCapacity(FCount);
SetStrCapacity(FStrCount);
end;
procedure TFPHashListM.ShowStatistics;
var
HashMean,
HashStdDev : Double;
Index,
i,j : Integer;
begin
{ Calculate Mean and StdDev }
HashMean:=0;
HashStdDev:=0;
for i:=0 to FHashCapacity-1 do
begin
j:=0;
Index:=FHashTable^[i];
while (Index<>-1) do
begin
inc(j);
Index:=FHashList^[Index].NextIndex;
end;
HashMean:=HashMean+j;
HashStdDev:=HashStdDev+Sqr(j);
end;
HashMean:=HashMean/FHashCapacity;
HashStdDev:=(HashStdDev-FHashCapacity*Sqr(HashMean));
If FHashCapacity>1 then
HashStdDev:=Sqrt(HashStdDev/(FHashCapacity-1))
else
HashStdDev:=0;
{ Print info to stdout }
Writeln('HashSize : ',FHashCapacity);
Writeln('HashMean : ',HashMean:1:4);
Writeln('HashStdDev : ',HashStdDev:1:4);
Writeln('ListSize : ',FCount,'/',FCapacity);
Writeln('StringSize : ',FStrCount,'/',FStrCapacity);
writeln('hash count ',ffcc);
end;
procedure TFPHashListM.ForEachCall(proc2call:TListCallback;arg:pointer);
var
i : integer;
p : pointer;
begin
For I:=0 To Count-1 Do
begin
p:=FHashList^[i].Data;
if assigned(p) then
proc2call(p,arg);
end;
end;
procedure TFPHashListM.ForEachCall(proc2call:TListStaticCallback;arg:pointer);
var
i : integer;
p : pointer;
begin
For I:=0 To Count-1 Do
begin
p:=FHashList^[i].Data;
if assigned(p) then
proc2call(p,arg);
end;
end;
end.
_______________________________________________
fpc-devel maillist - [email protected]
http://lists.freepascal.org/mailman/listinfo/fpc-devel