I get a little annoyed that yahoogroups strips out the leading spaces 
which then greatly reduces the readibility of the code.

Maybe producing ">" prefixes on each line by replying to my own 
message will overcome this.
I won't make a habit of this, this is just a test.
If anyone knows a way to preserve indents in posted code please let 
me know.

> Your basic loop to scan a file is pretty simple and
> will look like this:
> 
> function StringInFile(strFind, strFileName: string): boolean;
> const
>    BUFSIZE = 8192;
> var
>    fstm: TFileStream;
>    numread: Longint;
>    buffer: array [0..BUFSIZE-1] of char;
>    szFind: array [0..255] of char;
>    found: boolean;
> begin
>    StrPCopy(szFind, strFind);
>    found := False;
>    fstm := TFileStream.Create(strFileName, fmOpenRead);
>    repeat
>      numread := fstrm.Read(Buffer, BUFSIZE);
>      if BMFind(szFind, Buffer, numread) >= 0 then
>        found := True
>      else if numread = BUFSIZE then // more to scan
>        fstm.Position := fstmPosition - (Length(strFind)-1);
>    until found or (numread < BUFSIZE);
>    fstm.Free;
>    Result := found;
> end;
> 
> 1) The reason for backing up fstm.Position by nearly the length
> of strFind is in case strFind crosses buffer boundaries.
> 
> 2) The "BMFind" function used above is a Boyer-Moore search
> as shown below. This is the fastest string search known.
> 
> function BMFind(szSubStr, buf: PChar; iBufSize: integer): integer;
> { Returns -1 if substring not found,
>    or zero-based index into buffer if substring found }
> var
>    iSubStrLen: integer;
>    skip: array [char] of integer;
>    found: boolean;
>    iMaxSubStrIdx: integer;
>    iSubStrIdx: integer;
>    iBufIdx: integer;
>    iScanSubStr: integer;
>    mismatch: boolean;
>    iBufScanStart: integer;
>    ch: char;
> begin
>    { Initialisations }
>    found := False;
>    Result := -1;
>    { Check if trivial scan for empty string }
>    iSubStrLen := StrLen(szSubStr);
>    if iSubStrLen = 0 then
>    begin
>      Result := 0;
>      Exit
>    end;
> 
>    iMaxSubStrIdx := iSubStrLen - 1;
>    { Initialise the skip table }
>    for ch := Low(skip) to High(skip) do skip[ch] := iSubStrLen;
>    for iSubStrIdx := 0 to (iMaxSubStrIdx - 1) do
>      skip[szSubStr[iSubStrIdx]] := iMaxSubStrIdx - iSubStrIdx;
> 
>    { Scan the buffer, starting comparisons at the end of the 
substring }
>    iBufScanStart := iMaxSubStrIdx;
>    while (not found) and (iBufScanStart < iBufSize) do
>    begin
>      iBufIdx := iBufScanStart;
>      iScanSubStr := iMaxSubStrIdx;
>      repeat
>        mismatch := (szSubStr[iScanSubStr] <> buf[iBufIdx]);
>        if not mismatch then
>          if iScanSubStr > 0 then
>          begin // more characters to scan
>            Dec(iBufIdx); Dec(iScanSubStr)
>          end
>          else
>            found := True;
>      until mismatch or found;
>      if found then
>        Result := iBufIdx
>      else
>        iBufScanStart := iBufScanStart + skip[buf[iBufScanStart]];
>    end;
> end;
> 
> I have included a "wholeword_only" flag n the BMFind below.
> This confirms or rejects the "found" result, and will
> cause the loop to keep searching if match is rejected.
> 
> function BMFind(szSubStr, buf: PChar; iBufSize: integer;
>                  wholeword_only: boolean): integer;
> { Returns -1 if substring not found,
>    or zero-based index into buffer if substring found }
> var
>    iSubStrLen: integer;
>    skip: array [char] of integer;
>    found: boolean;
>    iMaxSubStrIdx: integer;
>    iSubStrIdx: integer;
>    iBufIdx: integer;
>    iScanSubStr: integer;
>    mismatch: boolean;
>    iBufScanStart: integer;
>    ch: char;
> begin
>    found := False;
>    Result := -1;
>    iSubStrLen := StrLen(szSubStr);
>    if iSubStrLen = 0 then
>    begin
>      Result := 0;
>      Exit
>    end;
> 
>    iMaxSubStrIdx := iSubStrLen - 1;
>    { Initialise the skip table }
>    for ch := Low(skip) to High(skip) do skip[ch] := iSubStrLen;
>    for iSubStrIdx := 0 to (iMaxSubStrIdx - 1) do
>      skip[szSubStr[iSubStrIdx]] := iMaxSubStrIdx - iSubStrIdx;
> 
>    { Scan the buffer, starting comparisons at the end of the 
substring }
>    iBufScanStart := iMaxSubStrIdx;
>    while (not found) and (iBufScanStart < iBufSize) do
>    begin
>      iBufIdx := iBufScanStart;
>      iScanSubStr := iMaxSubStrIdx;
>      repeat
>        mismatch := (szSubStr[iScanSubStr] <> buf[iBufIdx]);
>        if not mismatch then
>          if iScanSubStr > 0 then
>          begin // more characters to scan
>            Dec(iBufIdx); Dec(iScanSubStr)
>          end
>          else
>            found := True;
>      until mismatch or found;
>      if found and wholeword_only then
>      begin
>        if (iBufIdx > 0) then
>          found := not IsCharAlpha(buf[iBufIdx - 1]);
>        if found then
>          if iBufScanStart < (iBufSize - 1) then
>            found := not IsCharAlpha(buf[iBufScanStart + 1]);
>      end;
>      if found then
>        Result := iBufIdx
>      else
>        iBufScanStart := iBufScanStart + skip[buf[iBufScanStart]];
>    end;
> end;
> 
> Obviously you'll be tempted to increase BUFSIZE on the assumption
> that it will improve performance. My experience is that it does
> not, and that 8K is pretty optimum.
>


Reply via email to