Sergei Gorelkin pisze:
Dariusz Mazur wrote:
I'd suggest you to find a test suite (some examples are contained
directly in the text of w3.org specification, others may be ripped
from opensource projects that support canonicalization, like
libxml2) and verify your unit against it.
I draw from libxml2. First i resolved problem with namespaces. That
is not so clear in specification. Even simple xml canonization don't
work. Now I can work with more complicate file.
New version should be send here?
I was intending to implement canonicalization in the existing writer
(xmlwrite.pp), but namespace support is required before it can be done.
(namespace support is present in dom and xmlread, but not yet in
xmlwrite).
Actually, the writer is already doing most of the things, including
proper encoding and escaping. The attributes are already sorted by name.
What has to be changed:
1) sorting attributes with respect to namespace
2) write end-tags in full form
3) treat CDATA sections as text
4) don't omit defaulted attributes
5) omit xml declaration and DTD.
6) ...maybe missed something else.
The (1) is most difficult, (2) to (5) are pretty trivial.
So for me it seems a better idea to improve the existing writer rather
than making a new one. I cannot speak on behalf of the whole team,
however. Other opinions are welcome.
I've prepare new version of canonization. Its based on xmlwrite. Point
3,4 are not resolved (I don't it really need) but its trivial.
test against xmllib is not so easy, because we should follow the same
indentation on xmlwrite
It works only with simple xml, but its good enough to make proper XADES
signature
I can submit those unit also, if somebody help me and review it.
--
Darek
{
This file is part of the Free Component Library
XML writing routines
Copyright (c) 1999-2000 by Sebastian Guenther, s...@freepascal.org
Modified in 2006 by Sergei Gorelkin, sergei_gorel...@mail.ru
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit XMLWriteC14;
{$ifdef fpc}
{$MODE objfpc}
{$endif}
{$H+}
interface
uses Classes, DOM;
procedure WriteXMLFileC14(doc: TXMLDocument; const AFileName: String); overload;
procedure WriteXMLFileC14(doc: TXMLDocument; var AFile: Text); overload;
procedure WriteXMLFileC14(doc: TXMLDocument; AStream: TStream); overload;
procedure WriteXMLC14(Element: TDOMNode; const AFileName: String); overload;
procedure WriteXMLC14(Element: TDOMNode; var AFile: Text); overload;
procedure WriteXMLC14(Element: TDOMNode; AStream: TStream); overload;
// ===================================================================
implementation
uses SysUtils;
type
TSpecialCharCallback = procedure(c: WideChar) of object;
TXMLWriter = class(TObject)
private
FInsideTextNode: Boolean;
FIndent: WideString;
FIndentCount: Integer;
FBuffer: PChar;
FBufPos: PChar;
FCapacity: Integer;
FLineBreak: string;
nsStack : TStringList;
fNotFirst : boolean;
function nodeIsNS(aNode : tDomNode):boolean;
function NSStackFind(const aName : string):boolean;
procedure wrtChars(Src: PWideChar; Length: Integer);
procedure IncIndent;
procedure DecIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
procedure wrtQuotedLiteral(const ws: WideString);
procedure ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
const SpecialCharCallback: TSpecialCharCallback);
procedure AttrSpecialCharCallback(c: WideChar);
procedure BuildAttributes(Anode: TDOMNode);
procedure TextNodeSpecialCharCallback(c: WideChar);
protected
procedure Write(const Buffer; Count: Longint); virtual; abstract;
procedure WriteNode(Node: TDOMNode);
procedure VisitDocument(Node: TDOMNode);
procedure VisitElement(Node: TDOMNode);
procedure VisitText(Node: TDOMNode);
procedure VisitCDATA(Node: TDOMNode);
procedure VisitComment(Node: TDOMNode);
procedure VisitFragment(Node: TDOMNode);
procedure VisitAttribute(Node: TDOMNode);
procedure VisitEntityRef(Node: TDOMNode);
procedure VisitDocumentType(Node: TDOMNode);
procedure VisitPI(Node: TDOMNode);
public
constructor Create;
destructor Destroy; override;
end;
TTextXMLWriter = Class(TXMLWriter)
Private
F : ^Text;
Protected
Procedure Write(Const Buffer; Count : Longint);override;
Public
constructor Create(var AFile: Text);
end;
TStreamXMLWriter = Class(TXMLWriter)
Private
F : TStream;
Protected
Procedure Write(Const Buffer; Count : Longint);override;
Public
constructor Create(AStream: TStream);
end;
{ ---------------------------------------------------------------------
TTextXMLWriter
---------------------------------------------------------------------}
constructor TTextXMLWriter.Create(var AFile: Text);
begin
inherited Create;
f := @AFile;
end;
procedure TTextXMLWriter.Write(const Buffer; Count: Longint);
var
s: string;
begin
if Count>0 then
begin
SetString(s, PChar(@Buffer), Count);
system.Write(f^, s);
end;
end;
{ ---------------------------------------------------------------------
TStreamXMLWriter
---------------------------------------------------------------------}
constructor TStreamXMLWriter.Create(AStream: TStream);
begin
inherited Create;
F := AStream;
end;
procedure TStreamXMLWriter.Write(const Buffer; Count: Longint);
begin
if Count > 0 then
F.Write(Buffer, Count);
end;
{ ---------------------------------------------------------------------
TXMLWriter
---------------------------------------------------------------------}
constructor TXMLWriter.Create;
var
I: Integer;
begin
inherited Create;
// some overhead - always be able to write at least one extra UCS4
FBuffer := AllocMem(512+32);
FBufPos := FBuffer;
FCapacity := 512;
// Initialize Indent string
SetLength(FIndent, 100);
FIndent[1] := #10;
for I := 2 to 100 do FIndent[I] := ' ';
FIndentCount := 0;
// Later on, this may be put under user control
// for now, take OS setting
FLineBreak := sLineBreak;
nsStack:=tStringList.create;
end;
destructor TXMLWriter.Destroy;
begin
if FBufPos > FBuffer then
write(FBuffer^, FBufPos-FBuffer);
FreeMem(FBuffer);
nsStack.Free;
inherited Destroy;
end;
procedure TXMLWriter.wrtChars(Src: PWideChar; Length: Integer);
var
pb: PChar;
wc: Cardinal;
SrcEnd: PWideChar;
begin
pb := FBufPos;
SrcEnd := Src + Length;
while Src < SrcEnd do
begin
if pb >= @FBuffer[FCapacity] then
begin
write(FBuffer^, FCapacity);
Dec(pb, FCapacity);
if pb > FBuffer then
Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
end;
wc := Cardinal(Src^); Inc(Src);
case wc of
// $0A: pb := StrECopy(pb, PChar(FLineBreak));
$0A: begin
pb^ := char(wc); Inc(pb);
end;
$0D: begin
{ pb := StrECopy(pb, PChar(FLineBreak));
if (Src < SrcEnd) and (Src^ = #$0A) then
Inc(Src); }
end;
0..$09, $0B, $0C, $0E..$7F: begin
pb^ := char(wc); Inc(pb);
end;
$80..$7FF: begin
pb^ := Char($C0 or (wc shr 6));
pb[1] := Char($80 or (wc and $3F));
Inc(pb,2);
end;
$D800..$DBFF: begin
if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
begin
wc := ((LongInt(wc) - $D7C0) shl 10) + LongInt(word(Src^) xor $DC00);
Inc(Src);
pb^ := Char($F0 or (wc shr 18));
pb[1] := Char($80 or ((wc shr 12) and $3F));
pb[2] := Char($80 or ((wc shr 6) and $3F));
pb[3] := Char($80 or (wc and $3F));
Inc(pb,4);
end
else
raise EConvertError.Create('High surrogate without low one');
end;
$DC00..$DFFF:
raise EConvertError.Create('Low surrogate without high one');
else // $800 >= wc > $FFFF, excluding surrogates
begin
pb^ := Char($E0 or (wc shr 12));
pb[1] := Char($80 or ((wc shr 6) and $3F));
pb[2] := Char($80 or (wc and $3F));
Inc(pb,3);
end;
end;
end;
FBufPos := pb;
end;
procedure TXMLWriter.wrtStr(const ws: WideString); { inline }
begin
wrtChars(PWideChar(ws), Length(ws));
end;
{ No checks here - buffer always has 32 extra bytes }
procedure TXMLWriter.wrtChr(c: WideChar); { inline }
begin
FBufPos^ := char(ord(c));
Inc(FBufPos);
end;
procedure TXMLWriter.wrtIndent; { inline }
begin
wrtChars(PWideChar(FIndent), FIndentCount*2+1);
end;
procedure TXMLWriter.IncIndent;
var
I, NewLen, OldLen: Integer;
begin
Inc(FIndentCount);
if Length(FIndent) < 2 * FIndentCount then
begin
OldLen := Length(FIndent);
NewLen := 4 * FIndentCount;
SetLength(FIndent, NewLen);
for I := OldLen to NewLen do
FIndent[I] := ' ';
end;
end;
procedure TXMLWriter.DecIndent; { inline }
begin
if FIndentCount>0 then dec(FIndentCount);
end;
procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
var
Quote: WideChar;
begin
// TODO: need to check if the string also contains single quote
// both quotes present is a error
if Pos('"', ws) > 0 then
Quote := ''''
else
Quote := '"';
wrtChr(Quote);
wrtStr(ws);
wrtChr(Quote);
end;
const
AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
TextSpecialChars = ['<', '>', '&'];
procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars:
TSetOfChar;
const SpecialCharCallback: TSpecialCharCallback);
var
StartPos, EndPos: Integer;
begin
StartPos := 1;
EndPos := 1;
while EndPos <= Length(s) do
begin
if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
begin
wrtChars(@s[StartPos], EndPos - StartPos);
SpecialCharCallback(s[EndPos]);
StartPos := EndPos + 1;
end;
Inc(EndPos);
end;
if StartPos <= length(s) then
wrtChars(@s[StartPos], EndPos - StartPos);
end;
const
QuotStr = '"';
AmpStr = '&';
ltStr = '<';
gtStr = '>';
procedure TXMLWriter.AttrSpecialCharCallback(c: WideChar);
begin
case c of
'"': wrtStr(QuotStr);
'&': wrtStr(AmpStr);
'<': wrtStr(ltStr);
// Escape whitespace using CharRefs to be consistent with W3 spec ยง 3.3.3
#9: wrtStr('	');
#10: wrtStr('
');
#13: wrtStr('
');
else
wrtChr(c);
end;
end;
procedure TXMLWriter.TextnodeSpecialCharCallback(c: WideChar);
begin
case c of
'<': wrtStr(ltStr);
'>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
'&': wrtStr(AmpStr);
else
wrtChr(c);
end;
end;
function tXMLWriter.nodeIsNS(aNode : tDomNode):boolean;
begin
result:= (system.Pos('xmlns', LowerCase(aNode.nodeName)) = 1);
end;
function tXMLWriter.nsStackFind(const aName : string):boolean;
var
i : integer;
begin
result:=nsStack.Find(aName,i);
if not result then
nsStack.add(aName);
end;
procedure TXMLWriter.WriteNode(node: TDOMNode);
begin
case node.NodeType of
ELEMENT_NODE: VisitElement(node);
ATTRIBUTE_NODE: VisitAttribute(node);
TEXT_NODE: VisitText(node);
CDATA_SECTION_NODE: VisitCDATA(node);
ENTITY_REFERENCE_NODE: VisitEntityRef(node);
PROCESSING_INSTRUCTION_NODE: VisitPI(node);
COMMENT_NODE: VisitComment(node);
DOCUMENT_NODE: VisitDocument(node);
DOCUMENT_TYPE_NODE: VisitDocumentType(node);
ENTITY_NODE,
DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
end;
end;
procedure TXMLWriter.BuildAttributes(Anode: TDOMNode);
var
i: Integer;
attributes, namespaces: TStringList;
element: TDOMElement;
xNSName: string;
child: TDOMNode;
procedure parseNS(aNode : tDomNode);
var
i : integer;
xNSName : string;
begin
if aNode.Attributes=nil then exit;
for i := 0 to aNode.attributes.length - 1 do
begin
if nodeIsNS(aNode.attributes.item[i]) then
begin
xNSName:=aNode.attributes.item[i].nodeName +
'="'+aNode.attributes.item[i].nodeValue + '"';
if not NsStackFind(xNSName) then
namespaces.AddObject(xNSName,aNode.attributes.item[i]);
end;
end;
end;
begin
if aNode.nodeType<>Element_node then Exit;
attributes := nil;
namespaces := nil;
try
attributes := TStringList.Create();
attributes.Sorted := True;
namespaces := TStringList.Create();
namespaces.Sorted := True;
element := (ANode as TDOMElement);
while (element.ParentNode <>nil) and (element.parentNode is tDomElement) do
begin
element:=element.parentNode as tDomElement;
parseNS(element);
end;
element := (ANode as TDOMElement);
for i := 0 to element.attributes.length - 1 do
begin
xNSName:=element.attributes.item[i].nodeName +
'="'+element.attributes.item[i].nodeValue + '"';
if nodeIsNS(element.attributes.item[i]) then
begin
if not NsStackFind(xNSName) then
namespaces.AddObject(xNSName,element.attributes.item[i]);
end
else
begin
attributes.AddObject(xNSName,element.attributes.item[i]);
end;
end;
for i := 0 to namespaces.Count - 1 do
begin
tObject(child) := nameSpaces.objects[i];
// if TDOMAttr(child).Specified then
VisitAttribute(child);
// Result := Result + ' ' + Trim(namespaces[i]);
end;
for i := 0 to attributes.Count - 1 do
begin
tObject(child) := attributes.objects[i];
if TDOMAttr(child).Specified then
VisitAttribute(child);
// Result := Result + ' ' + Trim(attributes[i]);
end;
finally
namespaces.Free();
attributes.Free();
end;
end;
procedure TXMLWriter.VisitElement(node: TDOMNode);
var
i: Integer;
child: TDOMNode;
SavedInsideTextNode: Boolean;
begin
if not FInsideTextNode and FNotFirst then
wrtIndent;
fNotFirst:=true;
wrtChr('<');
wrtStr(TDOMElement(node).TagName);
// FIX: Accessing Attributes was causing them to be created for every element
:(
buildAttributes(node);
{
if node.HasAttributes then
for i := 0 to node.Attributes.Length - 1 do
begin
child := node.Attributes.Item[i];
if TDOMAttr(child).Specified then
VisitAttribute(child);
end;
}
Child := node.FirstChild;
{ if Child = nil then
wrtChars('/>', 2)
else}
wrtChr('>');
if child<>nil then begin
SavedInsideTextNode := FInsideTextNode;
FInsideTextNode := Child.NodeType in [TEXT_NODE, CDATA_SECTION_NODE];
IncIndent;
repeat
WriteNode(Child);
Child := Child.NextSibling;
until Child = nil;
DecIndent;
if not (node.LastChild.NodeType in [TEXT_NODE, CDATA_SECTION_NODE]) then
wrtIndent;
FInsideTextNode := SavedInsideTextNode;
end;
wrtChars('</', 2);
wrtStr(TDOMElement(Node).TagName);
wrtChr('>');
// end;
end;
procedure TXMLWriter.VisitText(node: TDOMNode);
begin
ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, {$IFDEF
f...@{$endif}textnodespecialcharcallback);
end;
procedure TXMLWriter.VisitCDATA(node: TDOMNode);
begin
if not FInsideTextNode then
wrtIndent;
wrtChars('<![CDATA[', 9);
wrtStr(TDOMCharacterData(node).Data);
wrtChars(']]>', 3);
end;
procedure TXMLWriter.VisitEntityRef(node: TDOMNode);
begin
wrtChr('&');
wrtStr(node.NodeName);
wrtChr(';');
end;
procedure TXMLWriter.VisitPI(node: TDOMNode);
begin
if not FInsideTextNode then wrtIndent;
wrtStr('<?');
wrtStr(TDOMProcessingInstruction(node).Target);
wrtChr(' ');
wrtStr(TDOMProcessingInstruction(node).Data);
wrtStr('?>');
end;
procedure TXMLWriter.VisitComment(node: TDOMNode);
begin
if not FInsideTextNode then wrtIndent;
wrtChars('<!--', 4);
wrtStr(TDOMCharacterData(node).Data);
wrtChars('-->', 3);
end;
procedure TXMLWriter.VisitDocument(node: TDOMNode);
var
child: TDOMNode;
begin
wrtStr('<?xml version="');
// Definitely should not escape anything here
if Length(TXMLDocument(node).XMLVersion) > 0 then
wrtStr(TXMLDocument(node).XMLVersion)
else
wrtStr('1.0');
wrtChr('"');
// DISABLED - we are only able write in UTF-8 which does not require labeling
// writing incorrect encoding will render xml unreadable...
(*
if Length(TXMLDocument(node).Encoding) > 0 then
begin
wrtStr(' encoding="');
wrtStr(TXMLDocument(node).Encoding);
wrtChr('"');
end;
*)
wrtStr('?>');
// TODO: now handled as a regular PI, remove this?
if Length(TXMLDocument(node).StylesheetType) > 0 then
begin
wrtStr(#10'<?xml-stylesheet type="');
wrtStr(TXMLDocument(node).StylesheetType);
wrtStr('" href="');
wrtStr(TXMLDocument(node).StylesheetHRef);
wrtStr('"?>');
end;
child := node.FirstChild;
while Assigned(Child) do
begin
WriteNode(Child);
Child := Child.NextSibling;
end;
wrtChars(#10, 1);
end;
procedure TXMLWriter.VisitAttribute(Node: TDOMNode);
var
Child: TDOMNode;
begin
wrtChr(' ');
wrtStr(TDOMAttr(Node).Name);
wrtChars('="', 2);
Child := Node.FirstChild;
while Assigned(Child) do
begin
case Child.NodeType of
ENTITY_REFERENCE_NODE:
VisitEntityRef(Child);
TEXT_NODE:
ConvWrite(TDOMCharacterData(Child).Data, AttrSpecialChars, {$IFDEF
f...@{$endif}attrspecialcharcallback);
end;
Child := Child.NextSibling;
end;
wrtChr('"');
end;
procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
begin
wrtStr(#10'<!DOCTYPE ');
wrtStr(Node.NodeName);
wrtChr(' ');
with TDOMDocumentType(Node) do
begin
if PublicID <> '' then
begin
wrtStr('PUBLIC ');
wrtQuotedLiteral(PublicID);
wrtChr(' ');
wrtQuotedLiteral(SystemID);
end
else if SystemID <> '' then
begin
wrtStr('SYSTEM ');
wrtQuotedLiteral(SystemID);
end;
if InternalSubset <> '' then
begin
wrtChr('[');
wrtStr(InternalSubset);
wrtChr(']');
end;
end;
wrtChr('>');
end;
procedure TXMLWriter.VisitFragment(Node: TDOMNode);
var
Child: TDOMNode;
begin
// TODO: TextDecl is probably needed
// Fragment itself should not be written, only its children should...
Child := Node.FirstChild;
while Assigned(Child) do
begin
WriteNode(Child);
Child := Child.NextSibling;
end;
end;
// -------------------------------------------------------------------
// Interface implementation
// -------------------------------------------------------------------
procedure WriteXMLFileC14(doc: TXMLDocument; const AFileName: String);
var
fs: TFileStream;
begin
fs := TFileStream.Create(AFileName, fmCreate);
try
WriteXMLFileC14(doc, fs);
finally
fs.Free;
end;
end;
procedure WriteXMLFileC14(doc: TXMLDocument; var AFile: Text);
begin
with TTextXMLWriter.Create(AFile) do
try
WriteNode(doc);
finally
Free;
end;
end;
procedure WriteXMLFileC14(doc: TXMLDocument; AStream: TStream);
var
node : tDomNode;
element : tDOMElement;
begin
with TStreamXMLWriter.Create(AStream) do
try
node:=doc;
if node.nodetype =ELEMENT_NODE then begin
element := node as tDomElement;
while (element.ParentNode <>nil) and (element.parentNode is tDomElement)
do begin
element:=element.parentNode as tDomElement;
IncIndent;
end;
end;
WriteNode(doc);
finally
Free;
end;
end;
procedure WriteXMLC14(Element: TDOMNode; const AFileName: String);
begin
WriteXMLFileC14(TXMLDocument(Element), AFileName);
end;
procedure WriteXMLC14(Element: TDOMNode; var AFile: Text);
begin
WriteXMLFileC14(TXMLDocument(Element), AFile);
end;
procedure WriteXMLC14(Element: TDOMNode; AStream: TStream);
begin
WriteXMLFileC14(TXMLDocument(Element), AStream);
end;
end.
_______________________________________________
fpc-devel maillist - fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel