Sergei Gorelkin pisze:
Dariusz Mazur wrote:

I'd suggest you to find a test suite (some examples are contained directly in the text of w3.org specification, others may be ripped from opensource projects that support canonicalization, like libxml2) and verify your unit against it.
I draw from libxml2. First i resolved problem with namespaces. That is not so clear in specification. Even simple xml canonization don't work. Now I can work with more complicate file.

New version should be send here?

I was intending to implement canonicalization in the existing writer (xmlwrite.pp), but namespace support is required before it can be done. (namespace support is present in dom and xmlread, but not yet in xmlwrite). Actually, the writer is already doing most of the things, including proper encoding and escaping. The attributes are already sorted by name.
What has to be changed:
1) sorting attributes with respect to namespace
2) write end-tags in full form
3) treat CDATA sections as text
4) don't omit defaulted attributes
5) omit xml declaration and DTD.
6) ...maybe missed something else.
The (1) is most difficult, (2) to (5) are pretty trivial.

So for me it seems a better idea to improve the existing writer rather than making a new one. I cannot speak on behalf of the whole team, however. Other opinions are welcome.

I've prepare new version of canonization. Its based on xmlwrite. Point 3,4 are not resolved (I don't it really need) but its trivial.

test against xmllib is not so easy, because we should follow the same indentation on xmlwrite


It works only with simple xml, but its good enough to make proper XADES signature
I can submit those unit also, if somebody help me and review it.



--
 Darek




{
    This file is part of the Free Component Library

    XML writing routines
    Copyright (c) 1999-2000 by Sebastian Guenther, s...@freepascal.org
    Modified in 2006 by Sergei Gorelkin, sergei_gorel...@mail.ru

    See the file COPYING.FPC, included in this distribution,
    for details about the copyright.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

 **********************************************************************}


unit XMLWriteC14;


{$ifdef fpc}
{$MODE objfpc}
{$endif}
{$H+}

interface

uses Classes, DOM;

procedure WriteXMLFileC14(doc: TXMLDocument; const AFileName: String); overload;
procedure WriteXMLFileC14(doc: TXMLDocument; var AFile: Text); overload;
procedure WriteXMLFileC14(doc: TXMLDocument; AStream: TStream); overload;

procedure WriteXMLC14(Element: TDOMNode; const AFileName: String); overload;
procedure WriteXMLC14(Element: TDOMNode; var AFile: Text); overload;
procedure WriteXMLC14(Element: TDOMNode; AStream: TStream); overload;


// ===================================================================

implementation

uses SysUtils;

type
  TSpecialCharCallback = procedure(c: WideChar) of object;

  TXMLWriter = class(TObject)
  private
    FInsideTextNode: Boolean;
    FIndent: WideString;
    FIndentCount: Integer;
    FBuffer: PChar;
    FBufPos: PChar;
    FCapacity: Integer;
    FLineBreak: string;
    nsStack : TStringList;
    fNotFirst  : boolean;
    function nodeIsNS(aNode : tDomNode):boolean;
    function NSStackFind(const aName : string):boolean;

    procedure wrtChars(Src: PWideChar; Length: Integer);
    procedure IncIndent;
    procedure DecIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
    procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
    procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
    procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
    procedure wrtQuotedLiteral(const ws: WideString);
    procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
      const SpecialCharCallback: TSpecialCharCallback);
    procedure AttrSpecialCharCallback(c: WideChar);
    procedure BuildAttributes(Anode: TDOMNode);
    procedure TextNodeSpecialCharCallback(c: WideChar);
  protected

    procedure Write(const Buffer; Count: Longint); virtual; abstract;
    procedure WriteNode(Node: TDOMNode);
    procedure VisitDocument(Node: TDOMNode);
    procedure VisitElement(Node: TDOMNode);
    procedure VisitText(Node: TDOMNode);
    procedure VisitCDATA(Node: TDOMNode);
    procedure VisitComment(Node: TDOMNode);
    procedure VisitFragment(Node: TDOMNode);
    procedure VisitAttribute(Node: TDOMNode);
    procedure VisitEntityRef(Node: TDOMNode);
    procedure VisitDocumentType(Node: TDOMNode);
    procedure VisitPI(Node: TDOMNode);
  public
    constructor Create;
    destructor Destroy; override;
  end;

  TTextXMLWriter = Class(TXMLWriter)
  Private
    F : ^Text;
  Protected
    Procedure Write(Const Buffer; Count : Longint);override;
  Public
    constructor Create(var AFile: Text);
  end;

  TStreamXMLWriter = Class(TXMLWriter)
  Private
    F : TStream;
  Protected
    Procedure Write(Const Buffer; Count : Longint);override;
  Public
    constructor Create(AStream: TStream);
  end;

{ ---------------------------------------------------------------------
    TTextXMLWriter
  ---------------------------------------------------------------------}


constructor TTextXMLWriter.Create(var AFile: Text);
begin
  inherited Create;
  f := @AFile;
end;

procedure TTextXMLWriter.Write(const Buffer; Count: Longint);
var
  s: string;
begin
  if Count>0 then
  begin
    SetString(s, PChar(@Buffer), Count);
    system.Write(f^, s);
  end;
end;

{ ---------------------------------------------------------------------
    TStreamXMLWriter
  ---------------------------------------------------------------------}

constructor TStreamXMLWriter.Create(AStream: TStream);
begin
  inherited Create;
  F := AStream;
end;


procedure TStreamXMLWriter.Write(const Buffer; Count: Longint);
begin
  if Count > 0 then
    F.Write(Buffer, Count);
end;


{ ---------------------------------------------------------------------
    TXMLWriter
  ---------------------------------------------------------------------}

constructor TXMLWriter.Create;
var
  I: Integer;
begin
  inherited Create;
  // some overhead - always be able to write at least one extra UCS4
  FBuffer := AllocMem(512+32);
  FBufPos := FBuffer;
  FCapacity := 512;
  // Initialize Indent string
  SetLength(FIndent, 100);
  FIndent[1] := #10;
  for I := 2 to 100 do FIndent[I] := ' ';
  FIndentCount := 0;
  // Later on, this may be put under user control
  // for now, take OS setting
  FLineBreak := sLineBreak;
  nsStack:=tStringList.create;

end;

destructor TXMLWriter.Destroy;
begin
  if FBufPos > FBuffer then
    write(FBuffer^, FBufPos-FBuffer);

  FreeMem(FBuffer);
  nsStack.Free;

  inherited Destroy;

end;

procedure TXMLWriter.wrtChars(Src: PWideChar; Length: Integer);
var
  pb: PChar;
  wc: Cardinal;
  SrcEnd: PWideChar;
begin
  pb := FBufPos;
  SrcEnd := Src + Length;
  while Src < SrcEnd do
  begin
    if pb >= @FBuffer[FCapacity] then
    begin
      write(FBuffer^, FCapacity);
      Dec(pb, FCapacity);
      if pb > FBuffer then
        Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
    end;

    wc := Cardinal(Src^);  Inc(Src);
    case wc of
//      $0A: pb := StrECopy(pb, PChar(FLineBreak));
      $0A: begin
        pb^ := char(wc); Inc(pb);
      end;

      $0D: begin
{        pb := StrECopy(pb, PChar(FLineBreak));
        if (Src < SrcEnd) and (Src^ = #$0A) then
          Inc(Src);                            }
      end;

      0..$09, $0B, $0C, $0E..$7F:  begin
        pb^ := char(wc); Inc(pb);
      end;

      $80..$7FF: begin
        pb^ := Char($C0 or (wc shr 6));
        pb[1] := Char($80 or (wc and $3F));
        Inc(pb,2);
      end;

      $D800..$DBFF: begin
        if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
        begin
          wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
          Inc(Src);

          pb^ := Char($F0 or (wc shr 18));
          pb[1] := Char($80 or ((wc shr 12) and $3F));
          pb[2] := Char($80 or ((wc shr 6) and $3F));
          pb[3] := Char($80 or (wc and $3F));
          Inc(pb,4);
        end
        else
          raise EConvertError.Create('High surrogate without low one');
      end;
      $DC00..$DFFF:
        raise EConvertError.Create('Low surrogate without high one');
      else   // $800 >= wc > $FFFF, excluding surrogates
      begin
        pb^ := Char($E0 or (wc shr 12));
        pb[1] := Char($80 or ((wc shr 6) and $3F));
        pb[2] := Char($80 or (wc and $3F));
        Inc(pb,3);
      end;
    end;
  end;
  FBufPos := pb;
end;

procedure TXMLWriter.wrtStr(const ws: WideString); { inline }
begin
  wrtChars(PWideChar(ws), Length(ws));
end;

{ No checks here - buffer always has 32 extra bytes }
procedure TXMLWriter.wrtChr(c: WideChar); { inline }
begin
  FBufPos^ := char(ord(c));
  Inc(FBufPos);
end;

procedure TXMLWriter.wrtIndent; { inline }
begin
  wrtChars(PWideChar(FIndent), FIndentCount*2+1);
end;

procedure TXMLWriter.IncIndent;
var
  I, NewLen, OldLen: Integer;
begin
  Inc(FIndentCount);
  if Length(FIndent) < 2 * FIndentCount then
  begin
    OldLen := Length(FIndent);
    NewLen := 4 * FIndentCount;
    SetLength(FIndent, NewLen);
    for I := OldLen to NewLen do
      FIndent[I] := ' ';
  end;
end;

procedure TXMLWriter.DecIndent; { inline }
begin
  if FIndentCount>0 then dec(FIndentCount);
end;

procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
var
  Quote: WideChar;
begin
  // TODO: need to check if the string also contains single quote
  // both quotes present is a error
  if Pos('"', ws) > 0 then
    Quote := ''''
  else
    Quote := '"';
  wrtChr(Quote);
  wrtStr(ws);
  wrtChr(Quote);
end;

const
  AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
  TextSpecialChars = ['<', '>', '&'];

procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: 
TSetOfChar;
  const SpecialCharCallback: TSpecialCharCallback);
var
  StartPos, EndPos: Integer;
begin
  StartPos := 1;
  EndPos := 1;
  while EndPos <= Length(s) do
  begin
    if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
    begin
      wrtChars(@s[StartPos], EndPos - StartPos);
      SpecialCharCallback(s[EndPos]);
      StartPos := EndPos + 1;
    end;
    Inc(EndPos);
  end;
  if StartPos <= length(s) then
    wrtChars(@s[StartPos], EndPos - StartPos);
end;

const
  QuotStr = '&quot;';
  AmpStr = '&amp;';
  ltStr = '&lt;';
  gtStr = '&gt;';

procedure TXMLWriter.AttrSpecialCharCallback(c: WideChar);
begin
  case c of
    '"': wrtStr(QuotStr);
    '&': wrtStr(AmpStr);
    '<': wrtStr(ltStr);
    // Escape whitespace using CharRefs to be consistent with W3 spec ยง 3.3.3
    #9: wrtStr('&#x9;');
    #10: wrtStr('&#xA;');
    #13: wrtStr('&#xD;');
  else
    wrtChr(c);
  end;
end;

procedure TXMLWriter.TextnodeSpecialCharCallback(c: WideChar);
begin
  case c of
    '<': wrtStr(ltStr);
    '>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
    '&': wrtStr(AmpStr);
  else
    wrtChr(c);
  end;
end;

function tXMLWriter.nodeIsNS(aNode : tDomNode):boolean;
begin
      result:= (system.Pos('xmlns', LowerCase(aNode.nodeName)) = 1);
end;

function tXMLWriter.nsStackFind(const aName : string):boolean;
var
  i : integer;
begin
  result:=nsStack.Find(aName,i);
  if not result then
    nsStack.add(aName);
end;


procedure TXMLWriter.WriteNode(node: TDOMNode);
begin
  case node.NodeType of
    ELEMENT_NODE:                VisitElement(node);
    ATTRIBUTE_NODE:              VisitAttribute(node);
    TEXT_NODE:                   VisitText(node);
    CDATA_SECTION_NODE:          VisitCDATA(node);
    ENTITY_REFERENCE_NODE:       VisitEntityRef(node);
    PROCESSING_INSTRUCTION_NODE: VisitPI(node);
    COMMENT_NODE:                VisitComment(node);
    DOCUMENT_NODE:               VisitDocument(node);
    DOCUMENT_TYPE_NODE:          VisitDocumentType(node);
    ENTITY_NODE,
    DOCUMENT_FRAGMENT_NODE:      VisitFragment(node);
  end;
end;

procedure TXMLWriter.BuildAttributes(Anode: TDOMNode);
var
  i: Integer;
  attributes, namespaces: TStringList;
  element: TDOMElement;
  xNSName: string;
  child: TDOMNode;

  procedure parseNS(aNode : tDomNode);
  var
    i : integer;
    xNSName : string;
  begin
      if aNode.Attributes=nil then exit;
      for i := 0 to aNode.attributes.length - 1 do
      begin
        if nodeIsNS(aNode.attributes.item[i]) then
        begin
          xNSName:=aNode.attributes.item[i].nodeName + 
'="'+aNode.attributes.item[i].nodeValue + '"';
          if not NsStackFind(xNSName) then
            namespaces.AddObject(xNSName,aNode.attributes.item[i]);
        end;
      end;
  end;


begin
  if aNode.nodeType<>Element_node then Exit;

  attributes := nil;
  namespaces := nil;
  try
    attributes := TStringList.Create();
    attributes.Sorted := True;

    namespaces := TStringList.Create();
    namespaces.Sorted := True;

    element := (ANode as TDOMElement);
    while (element.ParentNode <>nil) and (element.parentNode is tDomElement) do 
begin
       element:=element.parentNode as tDomElement;
       parseNS(element);
    end;
    element := (ANode as TDOMElement);

    for i := 0 to element.attributes.length - 1 do
    begin
      xNSName:=element.attributes.item[i].nodeName + 
'="'+element.attributes.item[i].nodeValue + '"';
      if nodeIsNS(element.attributes.item[i]) then
      begin
        if not NsStackFind(xNSName) then
          namespaces.AddObject(xNSName,element.attributes.item[i]);
      end
      else
      begin
        attributes.AddObject(xNSName,element.attributes.item[i]);
      end;
    end;
    for i := 0 to namespaces.Count - 1 do
    begin
      tObject(child) := nameSpaces.objects[i];
//      if TDOMAttr(child).Specified then
        VisitAttribute(child);
//      Result := Result + ' ' + Trim(namespaces[i]);
    end;
    for i := 0 to attributes.Count - 1 do
    begin
      tObject(child) := attributes.objects[i];
      if TDOMAttr(child).Specified then
        VisitAttribute(child);
//      Result := Result + ' ' + Trim(attributes[i]);
    end;
  finally
    namespaces.Free();
    attributes.Free();
  end;
end;



procedure TXMLWriter.VisitElement(node: TDOMNode);
var
  i: Integer;
  child: TDOMNode;
  SavedInsideTextNode: Boolean;
begin
  if not FInsideTextNode and FNotFirst then
    wrtIndent;
  fNotFirst:=true;
  wrtChr('<');
  wrtStr(TDOMElement(node).TagName);
  // FIX: Accessing Attributes was causing them to be created for every element 
:(
  buildAttributes(node);
  {
  if node.HasAttributes then

    for i := 0 to node.Attributes.Length - 1 do
    begin
      child := node.Attributes.Item[i];
      if TDOMAttr(child).Specified then
        VisitAttribute(child);
    end;
  }
  Child := node.FirstChild;
{  if Child = nil then
    wrtChars('/>', 2)
  else}
  wrtChr('>');
  if child<>nil then begin
    SavedInsideTextNode := FInsideTextNode;
    FInsideTextNode := Child.NodeType in [TEXT_NODE, CDATA_SECTION_NODE];
    IncIndent;
    repeat
      WriteNode(Child);
      Child := Child.NextSibling;
    until Child = nil;
    DecIndent;
    if not (node.LastChild.NodeType in [TEXT_NODE, CDATA_SECTION_NODE]) then
      wrtIndent;
    FInsideTextNode := SavedInsideTextNode;
  end;
    wrtChars('</', 2);
    wrtStr(TDOMElement(Node).TagName);
    wrtChr('>');
//  end;
end;

procedure TXMLWriter.VisitText(node: TDOMNode);
begin
  ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF 
f...@{$endif}textnodespecialcharcallback);
end;

procedure TXMLWriter.VisitCDATA(node: TDOMNode);
begin
  if not FInsideTextNode then
    wrtIndent;
  wrtChars('<![CDATA[', 9);
  wrtStr(TDOMCharacterData(node).Data);
  wrtChars(']]>', 3);
end;

procedure TXMLWriter.VisitEntityRef(node: TDOMNode);
begin
  wrtChr('&');
  wrtStr(node.NodeName);
  wrtChr(';');
end;

procedure TXMLWriter.VisitPI(node: TDOMNode);
begin
  if not FInsideTextNode then wrtIndent;
  wrtStr('<?');
  wrtStr(TDOMProcessingInstruction(node).Target);
  wrtChr(' ');
  wrtStr(TDOMProcessingInstruction(node).Data);
  wrtStr('?>');
end;

procedure TXMLWriter.VisitComment(node: TDOMNode);
begin
  if not FInsideTextNode then wrtIndent;
  wrtChars('<!--', 4);
  wrtStr(TDOMCharacterData(node).Data);
  wrtChars('-->', 3);
end;

procedure TXMLWriter.VisitDocument(node: TDOMNode);
var
  child: TDOMNode;
begin
  wrtStr('<?xml version="');
  // Definitely should not escape anything here
  if Length(TXMLDocument(node).XMLVersion) > 0 then
    wrtStr(TXMLDocument(node).XMLVersion)
  else
    wrtStr('1.0');
  wrtChr('"');
  
// DISABLED - we are only able write in UTF-8 which does not require labeling
// writing incorrect encoding will render xml unreadable...
(*
  if Length(TXMLDocument(node).Encoding) > 0 then
  begin
    wrtStr(' encoding="');
    wrtStr(TXMLDocument(node).Encoding);
    wrtChr('"');
  end;
*)
  wrtStr('?>');

  // TODO: now handled as a regular PI, remove this?
  if Length(TXMLDocument(node).StylesheetType) > 0 then
  begin
    wrtStr(#10'<?xml-stylesheet type="');
    wrtStr(TXMLDocument(node).StylesheetType);
    wrtStr('" href="');
    wrtStr(TXMLDocument(node).StylesheetHRef);
    wrtStr('"?>');
  end;

  child := node.FirstChild;
  while Assigned(Child) do
  begin
    WriteNode(Child);
    Child := Child.NextSibling;
  end;
  wrtChars(#10, 1);
end;





procedure TXMLWriter.VisitAttribute(Node: TDOMNode);
var
  Child: TDOMNode;
begin
  wrtChr(' ');
  wrtStr(TDOMAttr(Node).Name);
  wrtChars('="', 2);
  Child := Node.FirstChild;
  while Assigned(Child) do
  begin
    case Child.NodeType of
      ENTITY_REFERENCE_NODE:
        VisitEntityRef(Child);
      TEXT_NODE:
        ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF 
f...@{$endif}attrspecialcharcallback);
    end;
    Child := Child.NextSibling;
  end;
  wrtChr('"');
end;

procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
begin
  wrtStr(#10'<!DOCTYPE ');
  wrtStr(Node.NodeName);
  wrtChr(' ');
  with TDOMDocumentType(Node) do
  begin
    if PublicID <> '' then
    begin
      wrtStr('PUBLIC ');
      wrtQuotedLiteral(PublicID);
      wrtChr(' ');
      wrtQuotedLiteral(SystemID);
    end
    else if SystemID <> '' then
    begin
      wrtStr('SYSTEM ');
      wrtQuotedLiteral(SystemID);
    end;
    if InternalSubset <> '' then
    begin
      wrtChr('[');
      wrtStr(InternalSubset);
      wrtChr(']');
    end;
  end;
  wrtChr('>');
end;

procedure TXMLWriter.VisitFragment(Node: TDOMNode);
var
  Child: TDOMNode;
begin
  // TODO: TextDecl is probably needed
  // Fragment itself should not be written, only its children should...
  Child := Node.FirstChild;
  while Assigned(Child) do
  begin
    WriteNode(Child);
    Child := Child.NextSibling;
  end;
end;


// -------------------------------------------------------------------
//   Interface implementation
// -------------------------------------------------------------------

procedure WriteXMLFileC14(doc: TXMLDocument; const AFileName: String);
var
  fs: TFileStream;
begin
  fs := TFileStream.Create(AFileName, fmCreate);
  try
    WriteXMLFileC14(doc, fs);
  finally
    fs.Free;
  end;
end;

procedure WriteXMLFileC14(doc: TXMLDocument; var AFile: Text);
begin
  with TTextXMLWriter.Create(AFile) do
  try
    WriteNode(doc);
  finally
    Free;
  end;
end;

procedure WriteXMLFileC14(doc: TXMLDocument; AStream: TStream);
var
  node    : tDomNode;
  element : tDOMElement;
begin
  with TStreamXMLWriter.Create(AStream) do
  try

    node:=doc;
    if node.nodetype =ELEMENT_NODE then begin
      element := node as tDomElement;
      while (element.ParentNode <>nil) and (element.parentNode is tDomElement) 
do begin
         element:=element.parentNode as tDomElement;
         IncIndent;
      end;
    end;
    WriteNode(doc);
  finally
    Free;
  end;
end;

procedure WriteXMLC14(Element: TDOMNode; const AFileName: String);
begin
  WriteXMLFileC14(TXMLDocument(Element), AFileName);
end;

procedure WriteXMLC14(Element: TDOMNode; var AFile: Text);
begin
  WriteXMLFileC14(TXMLDocument(Element), AFile);
end;

procedure WriteXMLC14(Element: TDOMNode; AStream: TStream);
begin
  WriteXMLFileC14(TXMLDocument(Element), AStream);
end;



end.
_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel

Reply via email to