2006/7/5, Bisma Jayadi <[EMAIL PROTECTED]>:
> Current Usage:
>  TCSVParser.ExtractFields(const S: string; pFieldList: TStrings);

Is it able to parse correct these kinds of problems?

It handles couple cases you mentionned (see above). I send a new
version of the functions but I've been asked to do a widestring
version and use array of pchar... which I do not have the time nor the
expertise.

But here we go in attachment. I would like these two functions to be
added to the strutil of fpc once someone in fpc core team decide to do
so.

DelimeterChar := ',';
QuoteChar := '''';

(1) field 1, field 2, field 3, field 4
(2) field 1, 'field 2, field 3', field 4
(3) field 1, 'field 2', field 3, field 4

No problems

(4) field 1, field''2, field 3, field 4

In strick CSV you must have no empty space; this will work no problems
too. (The delimiter must be the first char detected.)

(4) field 1,field''2,field 3,field 4

(5) field 1, 'field' 2, field 3, field 4

From memory this will be a problem; I would have to test this one...
(My algo was simple, could be patched to include this check.) This is
malformed CSV.

(6) field 1, 'field 2, field 3, field 4

Problem: this is malformed CSV. This is also in contradiction with
example 7. Also in CSV notation, this would mean that the string is
continuing on the next line which is also accepted.

(7) field 1, 'field 2, field 3, field 4

(See previous.)

note:
- incorrect quote char considered as single value.
- case (6) and (7) can be set via property StrictQuoteCharPair: boolean;

If your method above is able to solve those problems, then I'm very interested
to know the source code because I'm still unable to solve the last three
problems. :)

They would not be problems to solve, the problem is to guess what
behaviour the user want. Solving some cases you presented are
contradicting normal CSV logic, like the ability to spam a string
block on many lines. But some people will reject this behaviour.
Example:

John Doe;"123 My Street
SomeTown, Some Country
Some area code";Other information

The above is one record but spam on 3 lines. Now you can't solve 4 and
5 and 6 without breaking the logi of someone else. This is why I tried
to be error checking be keep normal CSV logic. I do allow a record on
many lines.

In attachment the last version of the code. This is released under the
same licence as FPC unit fcl/rtl.

--
Alexandre Leclerc
{******************************************************************************}
{ Author:       Alexandre Leclerc                                              }
{ Description:  User defined string utilities (will be integrated to Lazarus). }
{ Creation:     2006/06/06                                                     }
{ Version:      0.00                                                           }
{ Copyright:    (c) 2006, Alexandre Leclerc                                    }
{ License:      See COPYING.FPC                                                }
{                                                                              }
{ History:                                                                     }
{ 2006/00/00    v0.00 Alexandre Leclerc - First release (not yet official).    }
{******************************************************************************}

unit uStrUtils;

{$mode objfpc}{$H+}

interface

uses
  Classes, SysUtils; 

type
  TArrayOfString = array of string;
//  TArrayOfPWideChar = array of PWideChar;

function DecodeCSVStr(const s: string; var a: TArrayOfString;
  const stringDelimiter, valueDelimiter: Char; const looseQuoting: Boolean
  ): Integer;
function EncodeCSVStr(const a: array of string; const stringDelimiter,
  valueDelimiter: Char): string;
//Implement a widestring version
//Implement a verison that returns an array of pchar (pwidechar?)

implementation

uses
  StrUtils;

{ Summary:
    Decode a CSV string and fills an array of string.
  Arguments:
    s = csv record (line) to parse
    a = array of string to fill (length will be set automatically)
    stringDelimiter = the string delimiter to use (usually '"')
    valueDelimiter = the value delimiter to use (usually ',')
    looseQuoting = In strick CSV the stringDelimiter must be found right after
      the valueDelimiter in order to be a valid quote. If looseQuoting is
      activated, space (#32) chars will be accepted
  Description:
    The function will parse the 's' string and fill the 'a' array of string with
    a dimension for each part of 's' delimited by valueDelimiter. If a
    stringDelimier is first found, all valueDelimiter inside it will be ignored.
      The function will return 1 as exit code if the process ended in a quoted
    block. This will give an indication to the caller that there are probably
    more data on the next CSV line, or that there is a problem in the CSV record
    itself. This is to support CR/LF inside a quoted block, which is legal CSV.
  Results:
    An array is returned, having the content of each column in the CSV record.
    The function will return 0 if there are no errors, 1 if it looks like there
    is data missing (like stopping in the middle of a quoted block).
  Notes:
    Since this function is taking a string, we can' manage the aspect of imbeded
    LF/CRLF in the CSV when reading from a file (since the function is not
    reading from the file itself).
      Also, the procedure is not intended to support improperly formed CSV
    records. It was not intended to be so. Activating looseQuoting is allowing a
    breach in CSV interpretation. Something like 'a, "b"c, d' will fail as it
    should give 'a','"b"c','d' will rather give 'a','b"c','d'.
  See also:
    EncodeCSVStr
  License:
    See COPYING.FPC
  Author:
    Alexandre Leclerc }

function DecodeCSVStr(const s: string; var a: TArrayOfString;
  const stringDelimiter, valueDelimiter: Char; const looseQuoting: Boolean
  ): Integer;
var
  i: LongInt;
  pc: PChar;
  sp: Integer; //start pos when copying
  count: Integer;
  isQuoted: Boolean;

  procedure AddCSVColumn;
  begin
    if count mod 10 = 0 then
      SetLength(a, count+10); //increase array if we require more space
    a[count] := Trim(Copy(s, sp, i-sp));
    if isQuoted then //remove double-quote and last quote char
    begin
      sp := Length(a[count]); //reuse sp temporarily for column value length
      a[count] := StringReplace(
        IfThen(a[count][sp] = stringDelimiter,
          Copy(a[count], 1, sp-1), a[count]),
        stringDelimiter + stringDelimiter, stringDelimiter, [rfReplaceAll]);
    end;
    Inc(count);
    sp := 0;
    isQuoted := False;
  end;

var
  inString: Boolean;
begin
  inString := False;
  isQuoted := False;
  pc := @s[1];
  sp := 0;
  count := 0;
  for i := 1 to Length(s) do
  begin
    if not (looseQuoting and (pc^ = #32)) then
      if pc^ = stringDelimiter then
      begin
        if sp = 0 then //string delimiter is only good at start
        begin
          isQuoted := True;
          sp := i+1;
        end;
        if isQuoted then
          inString := (not inString);
      end else
      begin
        if sp = 0 then
          sp := i;
        if (not inString) and (pc^ = valueDelimiter) then
          AddCSVColumn();
      end;
    Inc(pc);
  end;
  if sp = 0 then
    sp := i;
  Inc(i);
  AddCSVColumn;
  SetLength(a, count);
  if inString then
    Result := 1
  else
    Result := 0;
end;

{ Summary:
    Encode a CSV string from an array of string.
  Arguments:
    a = array of string to encore in csv
    stringDelimiter = the string delimiter to use (usually '"')
    valueDelimiter = the value delimiter to use (usually ',')
  Description:
    The function encode a csv cmpliant string from the data found in an array of
    string. Columns will be separated by valueDelimiter and if valueDelimiter is
    found in the value to save, it will be properly quoted with stringDelimiter.
  Results:
    Returns a string representing the array of string as a csv record.
  See also:
    DecodeCSVStr
  License:
    See COPYING.FPC
  Author:
    Alexandre Leclerc }

function EncodeCSVStr(const a: array of string; const stringDelimiter,
  valueDelimiter: Char): string;
var
  i: Integer;
begin
  Result := '';
  for i := Low(a) to High(a) do
  begin
    if Pos(valueDelimiter, a[i]) > 0 then
      Result := Result + stringDelimiter + StringReplace(Trim(a[i]),
        stringDelimiter, stringDelimiter + stringDelimiter, [rfReplaceAll]) +
        stringDelimiter
    else
      Result := Result + a[i];
    if (i < High(a)) then
      Result := Result + valueDelimiter
  end;
end;

end.

Reply via email to