Benchmarking included 3*10^6 calls to Add and Find methods
with the arguments of various lengths.

Average string length 5 characters:
ShortString: 1.15 s
AnsiString: 1.56 s

Average string length 45 characters:
ShortString: 12.0 s
AnsiString: 3.2 s

I agree that the first case is more relevant for the compiler,
but still you can see that ShortStrings are clearly not always faster.

Sure, see attachment.
Note that I made shortstring->ansistring text replacament
followed by minimal fixed to get working Add and Find methods.
Other methods may be broken as a result.


I've done some some optimizations of tFPHashList and results are (for 10^6 calls average length 30 characters):
from svn: 2.234s
uhla : 2.297
my : 2.044

that is 10% shorter (or even better, because string generation consume about 1.4s)
full source in attachment (should I prepare it different?)


changes are made in: FPHash and some minor in StrExpand,AddStr, Add

Second: when I review assembler list I've notice some strange lines (all optimizations are enabled):

# [124] dec(ii);
    movl    %esi,%eax
    decl    %eax
    movl    %eax,%esi
.Lj16:
    movl    %esi,%eax <-  this is not necessary
    testl    %eax,%eax
    jg    .Lj15

In source this is
while ii>0 do begin
  ..
  dec(ii)
end;


Can be added some optimizations to avoid this doubled move.



--
  Darek




unit uhlm;

interface
{$mode objfpc}{$h+}{$r+}

uses
  contnrs,sysutils,cutils, types;

const
   SListIndexError = 'List index exceeds bounds (%d)';
   SListCapacityError = 'The maximum list capacity is reached (%d)';
   SListCapacityPower2Error = 'The capacity has to be a power of 2, but is set 
to %d';
   SListCountError = 'List count too large (%d)';
type
   EListError = class(Exception);



type
  THashItem=record
    HashValue : LongWord;
    StrIndex  : Integer;
    NextIndex : Integer;
    Data      : Pointer;
  end;
  PHashItem=^THashItem;

const
  MaxHashListSize = Maxint div 16;
  MaxHashStrSize  = Maxint;
  MaxHashTableSize = Maxint div 4;
  MaxItemsPerHash = 3;

type
  PHashItemList = ^THashItemList;
  THashItemList = array[0..MaxHashListSize - 1] of THashItem;
  PHashTable = ^THashTable;
  THashTable = array[0..MaxHashTableSize - 1] of Integer;

  TFPHashListM = class(TObject)
  private
    { ItemList }
    ffcc          : integer;
    FHashList     : PHashItemList;
    FCount,
    FCapacity : Integer;
    FCapacityMask: LongWord;
    { Hash }
    FHashTable    : PHashTable;
    FHashCapacity : Integer;
    { Strings }
    FStrs     : PChar;
    FStrCount,
    FStrCapacity : Integer;
    function InternalFind(AHash:LongWord;const AName:shortstring;out 
PrevIndex:Integer):Integer;
  protected
    function Get(Index: Integer): Pointer;
    procedure Put(Index: Integer; Item: Pointer);
    procedure SetCapacity(NewCapacity: Integer);
    procedure SetCount(NewCount: Integer);
    Procedure RaiseIndexError(Index : Integer);
    function  AddStr(const s:shortstring): Integer;
    procedure AddToHashTable(Index: Integer);inline;
    procedure StrExpand;
    procedure SetStrCapacity(NewCapacity: Integer);
    procedure SetHashCapacity(NewCapacity: Integer);
    procedure ReHash;
  public
    constructor Create;
    destructor Destroy; override;
    function Add(const AName:shortstring;Item: Pointer): Integer;
    procedure Clear;
    function NameOfIndex(Index: Integer): ShortString;
    function HashOfIndex(Index: Integer): LongWord;
    function GetNextCollision(Index: Integer): Integer;
    procedure Delete(Index: Integer);
    class procedure Error(const Msg: string; Data: PtrInt);
    function Expand: TFPHashListM;
    function Extract(item: Pointer): Pointer;
    function IndexOf(Item: Pointer): Integer;
    function Find(const AName:shortstring): Pointer;
    function FindIndexOf(const AName:shortstring): Integer;
    function FindWithHash(const AName:shortstring;AHash:LongWord): Pointer;
    function Rename(const AOldName,ANewName:shortstring): Integer;
    function Remove(Item: Pointer): Integer;
    procedure Pack;
    procedure ShowStatistics;
    procedure ForEachCall(proc2call:TListCallback;arg:pointer);
    procedure ForEachCall(proc2call:TListStaticCallback;arg:pointer);
    property Capacity: Integer read FCapacity write SetCapacity;
    property Count: Integer read FCount write SetCount;
    property Items[Index: Integer]: Pointer read Get write Put; default;
    property List: PHashItemList read FHashList;
    property Strs: PChar read FStrs;
  end;

implementation


{*****************************************************************************
                            TFPHashListM
*****************************************************************************}

    function FPHash(const s:shortstring):LongWord;
      Var

        ii,i : integer;
        p    : pchar;
        pw   : ^longword;
     begin
{$ifopt Q+}
{$define overflowon}
{$Q-}
{$endif}
        pw:=...@s[0];
        i:=length(s);
        ii:=i shr 2;
        i:=i and 3;
        result:=0;

          while ii>0 do begin
            result:=LongWord(result *8010817  ) xor (Pw^);
            inc(pw);
            dec(ii);
          end;
          p:=pointer(pw);
          while i>0 do begin
            result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor 
LongWord(p^);
            dec(i);
            inc(p);
          end;





{$ifdef overflowon}
{$Q+}
{$undef overflowon}
{$endif}
      end;

    function FPHash(P: PChar; Len: Integer): LongWord;
      Var
        pmax : pchar;
      begin
{$ifopt Q+}
{$define overflowon}
{$Q-}
{$endif}
        result:=0;
        pmax:=p+len;
        while (p<pmax) do
          begin
            result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor 
LongWord(P^);
            inc(p);
          end;
{$ifdef overflowon}
{$Q+}
{$undef overflowon}
{$endif}
      end;


procedure TFPHashListM.RaiseIndexError(Index : Integer);
begin
  Error(SListIndexError, Index);
end;


function TFPHashListM.Get(Index: Integer): Pointer;
begin
  If (Index < 0) or (Index >= FCount) then
    RaiseIndexError(Index);
  Result:=FHashList^[Index].Data;
end;


procedure TFPHashListM.Put(Index: Integer; Item: Pointer);
begin
  if (Index < 0) or (Index >= FCount) then
    RaiseIndexError(Index);
  FHashList^[Index].Data:=Item;
end;


function TFPHashListM.NameOfIndex(Index: Integer): shortstring;
begin
  If (Index < 0) or (Index >= FCount) then
    RaiseIndexError(Index);
  with FHashList^[Index] do
    begin
      if StrIndex>=0 then
        Result:=PShortString(@FStrs[StrIndex])^
      else
        Result:='';
    end;
end;


function TFPHashListM.HashOfIndex(Index: Integer): LongWord;
begin
  If (Index < 0) or (Index >= FCount) then
    RaiseIndexError(Index);
  Result:=FHashList^[Index].HashValue;
end;


function TFPHashListM.GetNextCollision(Index: Integer): Integer;
begin
  Result:=-1;
  if ((Index > -1) and (Index < FCount)) then
    Result:=FHashList^[Index].NextIndex;
end;


function TFPHashListM.Extract(item: Pointer): Pointer;
var
  i : Integer;
begin
  result := nil;
  i := IndexOf(item);
  if i >= 0 then
   begin
     Result := item;
     Delete(i);
   end;
end;


procedure TFPHashListM.SetCapacity(NewCapacity: Integer);
var
  power: longint;
begin
  { use a power of two to be able to quickly calculate the hash table index }
  if NewCapacity <> 0 then
    NewCapacity := nextpowerof2((NewCapacity+(MaxItemsPerHash-1)) div 
MaxItemsPerHash, power) * MaxItemsPerHash;
  if (NewCapacity < FCount) or (NewCapacity > MaxHashListSize) then
     Error (SListCapacityError, NewCapacity);
  if NewCapacity = FCapacity then
    exit;
  ReallocMem(FHashList, NewCapacity*SizeOf(THashItem));
  FCapacity := NewCapacity;
  { Maybe expand hash also }
  if FCapacity>FHashCapacity*MaxItemsPerHash then
    SetHashCapacity(FCapacity div MaxItemsPerHash);
end;


procedure TFPHashListM.SetCount(NewCount: Integer);
begin
  if (NewCount < 0) or (NewCount > MaxHashListSize)then
    Error(SListCountError, NewCount);
  If NewCount > FCount then
    begin
      If NewCount > FCapacity then
        SetCapacity(NewCount);
      If FCount < NewCount then
        { FCapacity is NewCount rounded up to the next power of 2 }
        FillChar(FHashList^[FCount], (FCapacity-FCount) div Sizeof(THashItem), 
0);
    end;
  FCount := Newcount;
end;


procedure TFPHashListM.SetStrCapacity(NewCapacity: Integer);
begin
  If (NewCapacity < FStrCount) or (NewCapacity > MaxHashStrSize) then
     Error (SListCapacityError, NewCapacity);
  if NewCapacity = FStrCapacity then
    exit;
  ReallocMem(FStrs, NewCapacity);
  FStrCapacity := NewCapacity;
end;


procedure TFPHashListM.SetHashCapacity(NewCapacity: Integer);
var
  power: longint;
begin
  If (NewCapacity < 1) then
    Error (SListCapacityError, NewCapacity);
  if FHashCapacity=NewCapacity then
    exit;
  if (NewCapacity<>0) and
     not ispowerof2(NewCapacity,power) then
    Error(SListCapacityPower2Error, NewCapacity);
  FHashCapacity:=NewCapacity;
  ReallocMem(FHashTable, FHashCapacity*sizeof(Integer));
  FCapacityMask:=(1 shl power)-1;
  ReHash;
end;


procedure TFPHashListM.ReHash;
var
  i : Integer;
begin
  FillDword(FHashTable^,FHashCapacity,LongWord(-1));
  For i:=0 To FCount-1 Do
    AddToHashTable(i);
end;


constructor TFPHashListM.Create;
begin
  SetHashCapacity(1024);
end;


destructor TFPHashListM.Destroy;
begin
  Clear;
  if assigned(FHashTable) then
    FreeMem(FHashTable);
  inherited Destroy;
end;


function TFPHashListM.AddStr(const s:shortstring): Integer;
var
  Len : Integer;
begin
  len:=length(s)+1;
  result:=FstrCount;
  inc(FStrCount,Len);

  if FStrCount >= FStrCapacity then
    StrExpand;
  System.Move(s[0],FStrs[result],Len);

end;



procedure TFPHashListM.AddToHashTable(Index: Integer);
var
  HashIndex : Integer;
begin
  with FHashList^[Index] do
    begin
      if not assigned(Data) then
        exit;
      HashIndex:=HashValue and FCapacityMask;
      NextIndex:=FHashTable^[HashIndex];
      FHashTable^[HashIndex]:=Index;
    end;
end;


function TFPHashListM.Add(const AName:shortstring;Item: Pointer): Integer;
begin
  if FCount = FCapacity then
    Expand;
  with FHashList^[FCount] do
    begin
     Data:=Item;
      StrIndex:=AddStr(AName);
      HashValue:=FPHash(AName);
    end;
  AddToHashTable(FCount);
  Result := FCount;
  inc(FCount);
end;

procedure TFPHashListM.Clear;
begin
  if Assigned(FHashList) then
    begin
      FCount:=0;
      SetCapacity(0);
      FHashList := nil;
    end;
  SetHashCapacity(1);
  FHashTable^[0]:=-1; // sethashcapacity does not always call rehash
  if Assigned(FStrs) then
    begin
      FStrCount:=0;
      SetStrCapacity(0);
      FStrs := nil;
    end;
end;

procedure TFPHashListM.Delete(Index: Integer);
begin
  If (Index<0) or (Index>=FCount) then
    Error (SListIndexError, Index);
  { Remove from HashList }
  dec(FCount);
  System.Move (FHashList^[Index+1], FHashList^[Index], (FCount - Index) * 
Sizeof(THashItem));
  { All indexes are updated, we need to build the hashtable again }
  Rehash;
  { Shrink the list if appropriate }
  if (FCapacity > 256) and (FCount < FCapacity shr 2) then
    begin
      FCapacity := FCapacity shr 1;
      ReallocMem(FHashList, Sizeof(THashItem) * FCapacity);
    end;
end;

function TFPHashListM.Remove(Item: Pointer): Integer;
begin
  Result := IndexOf(Item);
  If Result <> -1 then
    Self.Delete(Result);
end;

class procedure TFPHashListM.Error(const Msg: string; Data: PtrInt);
begin
  Raise EListError.CreateFmt(Msg,[Data]) at get_caller_addr(get_frame);
end;

function TFPHashListM.Expand: TFPHashListM;
var
  IncSize : Longint;
begin
  Result := Self;
  if FCount < FCapacity then
    exit;
  IncSize := sizeof(ptrint)*2;
  SetCapacity(FCapacity + IncSize);
end;

procedure TFPHashListM.StrExpand;
var
  IncSize : Longint;
begin
//  if FStrCount+MinIncSize < FStrCapacity then
//    exit;
  IncSize := 1024*16;
//  if FStrCapacity > 255 then
    Inc(IncSize, FStrCapacity shr 2 );
  SetStrCapacity(FStrCapacity + IncSize );
end;

function TFPHashListM.IndexOf(Item: Pointer): Integer;
var
  psrc  : PHashItem;
  Index : integer;
begin
  Result:=-1;
  psrc:=...@fhashlist^[0];
  For Index:=0 To FCount-1 Do
    begin
      if psrc^.Data=Item then
        begin
          Result:=Index;
          exit;
        end;
      inc(psrc);
    end;
end;

function TFPHashListM.InternalFind(AHash:LongWord;const AName:shortstring;out 
PrevIndex:Integer):Integer;
//var
//  HashIndex : Integer;
begin
  Result:=FHashTable^[AHash and FCapacityMask];
  PrevIndex:=-1;
  while Result<>-1 do
    begin
      with FHashList^[Result] do
        begin
          if (HashValue=AHash) and assigned(Data) and
             (AName=PShortString(@FStrs[StrIndex])^) then
            exit;
          inc(ffcc);
          PrevIndex:=Result;
          Result:=NextIndex;
        end;
    end;
end;


function TFPHashListM.Find(const AName:shortstring): Pointer;
var
  Index,
  PrevIndex : Integer;
begin
  prefetch(AName);
  Result:=nil;
  Index:=InternalFind(FPHash(AName),AName,PrevIndex);
  if Index=-1 then
    exit;
  Result:=FHashList^[Index].Data;
end;


function TFPHashListM.FindIndexOf(const AName:shortstring): Integer;
var
  PrevIndex : Integer;
begin
  Result:=InternalFind(FPHash(AName),AName,PrevIndex);
end;


function TFPHashListM.FindWithHash(const AName:shortstring;AHash:LongWord): 
Pointer;
var
  Index,
  PrevIndex : Integer;
begin
  Result:=nil;
  Index:=InternalFind(AHash,AName,PrevIndex);
  if Index=-1 then
    exit;
  Result:=FHashList^[Index].Data;
end;


function TFPHashListM.Rename(const AOldName,ANewName:shortstring): Integer;
var
  PrevIndex,
  Index : Integer;
  OldHash : LongWord;
begin
  Result:=-1;
  OldHash:=FPHash(AOldName);
  Index:=InternalFind(OldHash,AOldName,PrevIndex);
  if Index=-1 then
    exit;
  { Remove from current Hash }
  if PrevIndex<>-1 then
    FHashList^[PrevIndex].NextIndex:=FHashList^[Index].NextIndex
  else
    FHashTable^[OldHash and FCapacityMask]:=FHashList^[Index].NextIndex;
  { Set new name and hash }
  with FHashList^[Index] do
    begin
      HashValue:=FPHash(ANewName);
      StrIndex:=AddStr(ANewName);
    end;
  { Insert back in Hash }
  AddToHashTable(Index);
  { Return Index }
  Result:=Index;
end;

procedure TFPHashListM.Pack;
var
  NewCount,
  i : integer;
  pdest,
  psrc : PHashItem;
begin
  NewCount:=0;
  psrc:=...@fhashlist^[0];
  pdest:=psrc;
  For I:=0 To FCount-1 Do
    begin
      if assigned(psrc^.Data) then
        begin
          pdest^:=psrc^;
          inc(pdest);
          inc(NewCount);
        end;
      inc(psrc);
    end;
  FCount:=NewCount;
  { We need to ReHash to update the IndexNext }
  ReHash;
  { Release over-capacity }
  SetCapacity(FCount);
  SetStrCapacity(FStrCount);
end;


procedure TFPHashListM.ShowStatistics;
var
  HashMean,
  HashStdDev : Double;
  Index,
  i,j : Integer;
begin
  { Calculate Mean and StdDev }
  HashMean:=0;
  HashStdDev:=0;
  for i:=0 to FHashCapacity-1 do
    begin
      j:=0;
      Index:=FHashTable^[i];
      while (Index<>-1) do
        begin
          inc(j);
          Index:=FHashList^[Index].NextIndex;
        end;
      HashMean:=HashMean+j;
      HashStdDev:=HashStdDev+Sqr(j);
    end;
  HashMean:=HashMean/FHashCapacity;
  HashStdDev:=(HashStdDev-FHashCapacity*Sqr(HashMean));
  If FHashCapacity>1 then
    HashStdDev:=Sqrt(HashStdDev/(FHashCapacity-1))
  else
    HashStdDev:=0;
  { Print info to stdout }
  Writeln('HashSize   : ',FHashCapacity);
  Writeln('HashMean   : ',HashMean:1:4);
  Writeln('HashStdDev : ',HashStdDev:1:4);
  Writeln('ListSize   : ',FCount,'/',FCapacity);
  Writeln('StringSize : ',FStrCount,'/',FStrCapacity);
  writeln('hash count ',ffcc);
end;


procedure TFPHashListM.ForEachCall(proc2call:TListCallback;arg:pointer);
var
  i : integer;
  p : pointer;
begin
  For I:=0 To Count-1 Do
    begin
      p:=FHashList^[i].Data;
      if assigned(p) then
        proc2call(p,arg);
    end;
end;


procedure TFPHashListM.ForEachCall(proc2call:TListStaticCallback;arg:pointer);
var
  i : integer;
  p : pointer;
begin
  For I:=0 To Count-1 Do
    begin
      p:=FHashList^[i].Data;
      if assigned(p) then
        proc2call(p,arg);
    end;
end;

end.

_______________________________________________
fpc-devel maillist  -  [email protected]
http://lists.freepascal.org/mailman/listinfo/fpc-devel

Reply via email to