Hi, I already wrote Expat binding for Smalltalk/X.
It does not support encondigs and parses internal
streams as well as external (files & sockets :-).
Maybe you can reuse some know-how from my (very simple) code.
Good luck :-)
Jan
On Ne, 2007-09-23 at 22:23 +0200, Robin Redeker wrote:
> Hi!
>
> I just wanted to inform you that I've been writing an Expat
> extension for gnu smalltalk today. I'll add some more documentation,
> finish it up and send a path within the next week.
>
> It's mainly a wrapper around the C API of Expat which emits SAX events,
> which can be processed by the existing XML.SAXDriver implementation.
>
> Expat is a stream parser and the smalltalk interface dows allow parsing
> of unpositional streams like eg. sockets.
>
> The good with the Expat parser is that it supports new (yet
> currently not implemented in XML.st) SAX events which are defined by the
> latest SAX extensions (such as cdata start/end and dtd start/end
> detection, etc.).
>
> The bad with Expat is that it requires yet another C lib.
>
> Also the current interface returns String objects for all strings. Those
> Strings are multibyte encoded (UTF-8) unicode. But writing a subclass
> of the exising interface which decodes them to UnicodeString won't be a
> big problem when someone needs it.
>
> The expat parser also parses (encoded) byte streams (utf-8, iso, ascii
> or utf-16 encoded) and not unicode strings. The current XML.XMLParser
> implementation seems not to be able to parse byte streams and only
> unicode strings (at least i want't able to throw in a UTF-8 encoded
> document string).
>
> Open questions with regard to the implementation are:
>
> How/When do I correctly free the allocated expat C parser struct?
> I'm currently storing the XML_Parser C struct as OOP in the Smalltalk
> interface object. (Similar to the zlib implemenation).
>
> But how do I ensure that the free function for it is called when my
> interface object is destroyed?
>
> I also put the package in packages/expat/ and used the zlib/ stuff as
> skeleton for the building system. Is that okay?
>
> I've also yet to discover how to create a patchset against the current
> tla tree. But I guess I'll just have to read the manual again ;-/ (GIT
> war a bit easier IMO :)
>
>
>
> Robin
>
>
> _______________________________________________
> help-smalltalk mailing list
> [email protected]
> http://lists.gnu.org/mailman/listinfo/help-smalltalk
"{ Package: 'stx:goodies/xmlsuite/xmlreaderimpl' }"
"{ NameSpace: XMLv2 }"
XMLReader subclass:#ExpatXMLReader
instanceVariableNames:'parser'
classVariableNames:''
poolDictionaries:''
category:'XML Suite-SAX2-XMLReaders'
!
!ExpatXMLReader primitiveDefinitions!
%{
/*
* includes, defines, structure definitions
* and typedefs come here.
*/
#include <stdio.h>
#include <expat.h>
#define CHUNK_SIZE 8192
#define EXPAT_DEBUG(args) printf("ExpatXMLReader [debug]") ; printf args
#define EXPAT_DEBUG(args)
#define EXPAT_DEBUG_ENTER EXPAT_DEBUG((" Entering %s\n",__PRETTY_FUNCTION__))
#define EXPAT_DEBUG_LEAVE EXPAT_DEBUG((" Leaving %s\n",__PRETTY_FUNCTION__))
/*
HACK for development. If you write thode functions in
"Primitive functions" field, functions are not fileouted
when you "accept" primitive method in browser, so
C compiler can't compile native code using those methods.
*/
struct {
OBJ* this_object;
} user_data;
static void XMLCALL
startElement(OBJ** readerPointer, char* nameStr, char** attributes) {
OBJ reader;
OBJ attArray = nil;
OBJ attName,attValue;
int i;
EXPAT_DEBUG_ENTER;
for ( i = 0; attributes[i]; i += 2 ); /* compute length of att array */
EXPAT_DEBUG((" Element tiplet is %s\n",nameStr));
EXPAT_DEBUG((" Number of attributes is %d\n",i));
if ( i > 0 ) { /* there are some attributes */
attArray = __ARRAY_NEW_INT(i);
__PROTECT__(attArray); /* protect array to be garbage collected */
EXPAT_DEBUG((" Attribute array allocated. Entering fill loop\n"));
for ( i = 0; attributes[i]; i += 2 ) {/* fill att array */
EXPAT_DEBUG((" Processing attribute %s value
%s\n",attributes[i],attributes[i+1]));
attName = __MKSTRING(attributes[i]);
attValue = __MKSTRING(attributes[i+1]);
__ArrayInstPtr(attArray)->a_element[i] = attName;
__STORE(attArray,attName); /* string may be different memory space
than array */
__ArrayInstPtr(attArray)->a_element[i+1] = attValue;
__STORE(attArray,attValue); /* string may be different memory space
than array */
}
EXPAT_DEBUG((" Attribute array filled\n"));
__UNPROTECT__(attArray); /* unprotect */
}
__PROTECT__(attArray);
char* name = __MKSTRING(nameStr);
__UNPROTECT__(attArray);
reader = **readerPointer;
EXPAT_DEBUG((" Reader is at %x, sending
#expatStartElement:attributes:\n",reader));
__SSEND2(reader, @symbol(expatStartElement:attributes:),0, name, attArray);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
endElement(OBJ** readerPointer, char* nameStr) {
OBJ reader;
OBJ name;
EXPAT_DEBUG_ENTER;
name = __MKSTRING(nameStr);
reader = **readerPointer;
__SSEND1(reader, @symbol(expatEndElement:),0, name);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
startNSPrefixMapping(OBJ** readerPointer, char* prefixStr, char* uriStr) {
OBJ reader = **readerPointer;
OBJ prefix;
OBJ uri;
EXPAT_DEBUG_ENTER;
prefix = __MKSTRING(prefixStr);
uri = __MKSTRING(uriStr);
reader = **readerPointer;
__SSEND2(reader, @symbol(expatStartPrefix:mappingTo:),0, uri, prefix);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
endNSPrefixMapping(OBJ** readerPointer, char* prefixStr) {
OBJ reader;
OBJ prefix;
EXPAT_DEBUG_ENTER;
prefix = __MKSTRING(prefixStr);
reader = **readerPointer;
__SSEND1(reader, @symbol(expatEndPrefixMapping:),0, prefix);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
xmlProlog(OBJ** readerPointer, char* verStr, char* encStr, int standInt) {
OBJ reader;
OBJ version;
OBJ encoding;
OBJ standalone;
EXPAT_DEBUG_ENTER;
if (verStr == NULL) {
version = __MKSTRING("1.0");
} else {
version = __MKSTRING(verStr);
}
if (encStr == NULL) {
encoding = nil;
} else {
encoding = __MKSTRING(encStr);
}
standalone = __MKINT(standInt);
reader = **readerPointer;
__SSEND3(reader, @symbol(expatPrologVersion:encoding:standalone:),
0, version, encoding, standalone);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
characters(OBJ** readerPointer, char* charStr, int charLen) {
OBJ reader = **readerPointer;
OBJ chars;
EXPAT_DEBUG_ENTER;
chars = __MKSTRING_L(charStr,charLen);
reader = **readerPointer;
__SSEND1(reader, @symbol(expatCharacters:),0, chars);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
startCDataSection(OBJ** readerPointer) {
EXPAT_DEBUG_ENTER;
OBJ reader = **readerPointer;
__SSEND1(reader, @symbol(expatInCDataSection:),0,true);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
endCDataSection(OBJ** readerPointer) {
EXPAT_DEBUG_ENTER;
OBJ reader = **readerPointer;
__SSEND1(reader, @symbol(expatInCDataSection:),0,false);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
errorOccuredWhenParsing(XML_Parser p) {
OBJ reader;
OBJ errorString;
EXPAT_DEBUG_ENTER;
errorString = __MKSTRING(XML_ErrorString(XML_GetErrorCode(p)));
reader = **(OBJ**)XML_GetUserData(p);
__SSEND4(reader, @symbol(expatParseError:code:line:column:), 0,
errorString,
_MKSMALLINT(XML_GetErrorCode(p)),
_MKSMALLINT(XML_GetCurrentLineNumber(p)),
_MKSMALLINT(XML_GetCurrentColumnNumber(p)));
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
processingInstruction(OBJ** readerPointer, char* targetStr, char* dataStr) {
OBJ reader;
OBJ target;
OBJ data;
EXPAT_DEBUG_ENTER;
target = __MKSTRING(targetStr);
data = __MKSTRING(dataStr);
reader = **readerPointer;
__SSEND2(reader, @symbol(expatPI:data:),0, target,data);
EXPAT_DEBUG_LEAVE;
}
static void XMLCALL
comment(OBJ** readerPointer, char* commentStr) {
OBJ reader;
OBJ comment;
EXPAT_DEBUG_ENTER;
comment = __MKSTRING(commentStr);
reader = **readerPointer;
__SSEND1(reader, @symbol(expatComment:),0, comment);
EXPAT_DEBUG_LEAVE;
}
%}
! !
!ExpatXMLReader primitiveVariables!
%{
/*
* any local C variables
* come here (please, define as static)
*/
%}
! !
!ExpatXMLReader class methodsFor:'testing'!
isAvailable
"
self isAvailable
"
^self perform:#primitiveIsAvailable ifNotUnderstood:[false]
"Created: / 29-10-2006 / 22:37:05 / janfrog"
"Modified: / 12-04-2007 / 21:31:39 / janfrog"
!
primitiveIsAvailable
%{
RETURN(true);
%}.
^false
"Created: / 09-02-2007 / 17:44:42 / janfrog"
! !
!ExpatXMLReader methodsFor:'SAX2 interface - Locator'!
getColumnNumber
"Superclass says that I am responsible to implement this method"
^self primitiveGetCurrentColumnNumber
"Created: / 19-05-2005 / 09:43:25 / masca"
!
getLineNumber
"Superclass says that I am responsible to implement this method"
^self primitiveGetCurrentLineNumber
"Created: / 19-05-2005 / 09:43:37 / masca"
! !
!ExpatXMLReader methodsFor:'expat events'!
expatCharacters:aString
| handler |
handler := self getContentHandler.
(inCDataSection and:[handler respondsTo:#cDataSection:])
ifTrue:[^handler cDataSection:aString].
(aString allSatisfy:[:char|char isXMLWhiteSpace])
ifTrue:[handler ignorableWhitespace:aString]
ifFalse:[handler characters:aString]
"Created: / 17-04-2005 / 09:06:07 / janfrog"
"Modified: / 28-12-2005 / 16:18:15 / janfrog"
!
expatComment:aString
| handler |
handler := self getContentHandler.
(handler respondsTo:#comment:)
ifTrue:[handler comment:aString]
"Created: / 17-04-2005 / 13:33:10 / janfrog"
"Modified: / 28-12-2005 / 16:18:00 / janfrog"
!
expatEndDocument
self getContentHandler endDocument
"Created: / 17-04-2005 / 09:32:14 / janfrog"
!
expatEndDocumentFragment
self getContentHandler endDocumentFragment
"Created: / 23-04-2005 / 14:16:39 / janfrog"
!
expatEndElement:name
"name = String"
|nodeName|
nodeName := self nodeNameFromExpatExpandedElementName:name.
self getContentHandler
endElement:nodeName localName
namespace:nodeName ns
prefix:nodeName prefix
"Created: / 16-04-2005 / 21:36:24 / janfrog"
"Modified: / 29-09-2006 / 10:24:59 / janfrog"
!
expatEndPrefixMapping:prefix
self getContentHandler endPrefixMapping:prefix
"Created: / 17-04-2005 / 08:48:23 / janfrog"
!
expatInCDataSection: aBoolean
inCDataSection := aBoolean
"Created: / 28-12-2005 / 13:49:31 / janfrog"
"Modified: / 28-12-2005 / 16:18:04 / janfrog"
!
expatPI:target data:data
self getContentHandler processingInstruction:target data:data.
"Created: / 17-04-2005 / 13:32:50 / janfrog"
!
expatParseError:msg code:code line:line column:column
| error |
error := SAXParseError new
errorString:'Parse error at line ',line printString,' col
',column printString,' #',code printString,': ',msg;
line:line;
col:column.
self primitiveFreeExpatParser.
self getErrorHandler fatalError:error.
"Created: / 17-04-2005 / 09:54:19 / janfrog"
"Modified: / 21-04-2005 / 19:41:59 / janfrog"
!
expatPrologVersion: vers encoding: enc standalone: stand
xmlVersion := vers.
xmlEncoding := enc.
standalone := stand == 1
"Created: / 28-12-2005 / 16:18:14 / janfrog"
!
expatStartDocument
self getContentHandler startDocument
"Created: / 17-04-2005 / 09:32:03 / janfrog"
!
expatStartDocumentFragment
self getContentHandler startDocumentFragment
"Created: / 23-04-2005 / 14:16:24 / janfrog"
!
expatStartElement:name attributes:attArray
"name = String
attArray = array of strings. could be nil -> no attributes"
|nodeName attributes|
nodeName := self nodeNameFromExpatExpandedElementName:name.
attributes := attArray
ifNil:[ Attributes empty ]
ifNotNil:[
self attributesFromExpatAttArray:attArray
inScopeOfNodeName:nodeName
].
self getContentHandler
startElement:nodeName localName
namespace:nodeName ns
prefix:nodeName prefix
attributes:attributes
"Created: / 16-04-2005 / 21:36:33 / janfrog"
"Modified: / 29-09-2006 / 10:24:25 / janfrog"
!
expatStartPrefix:prefix mappingTo:uri
self getContentHandler startPrefix:prefix mappingTo:uri
"Created: / 17-04-2005 / 08:47:52 / janfrog"
! !
!ExpatXMLReader methodsFor:'expat primitives'!
primitiveChunkSize
"Return chunk size"
%{
RETURN(_MKSMALLINT(CHUNK_SIZE));
%}
"Created: / 21-04-2005 / 19:38:18 / janfrog"
!
primitiveCreateExpatParser
"Creates and initialize expat parser"
%{
XML_Parser p;
OBJ** data;
OBJ parserAddress;
p = XML_ParserCreateNS(NULL,'|');
if (p != NULL) {
data = (OBJ**)malloc(sizeof (OBJ**));
EXPAT_DEBUG((" Allocating data=0x%x\n", data));
*data = (OBJ*)malloc(sizeof (OBJ*));
EXPAT_DEBUG((" Allocating *data=0x%x\n", *data));
**data = self;
EXPAT_DEBUG((" Parser is **data=0x%x\n", **data));
XML_SetUserData(p,data);
__ADD_REFCELL(*data);
XML_SetElementHandler(p, startElement, endElement);
XML_SetStartNamespaceDeclHandler(p, startNSPrefixMapping);
XML_SetEndNamespaceDeclHandler(p, endNSPrefixMapping);
XML_SetCharacterDataHandler(p, characters);
XML_SetCommentHandler(p, comment);
XML_SetProcessingInstructionHandler(p, processingInstruction);
XML_SetStartCdataSectionHandler(p, startCDataSection);
XML_SetEndCdataSectionHandler(p, endCDataSection);
XML_SetXmlDeclHandler(p, xmlProlog);
XML_SetReturnNSTriplet(p,1);
/* Store parser into instance variable */
parserAddress = (__MKEXTERNALADDRESS(p));
__INST(parser) = parserAddress;
__STORE(self, parserAddress);
/* return */
RETURN(self)
}
%}.
SAXError raiseErrorString:'Cannot create expat parser'
"Created: / 21-04-2005 / 19:00:58 / janfrog"
"Modified: / 28-12-2005 / 16:18:15 / janfrog"
!
primitiveFreeExpatParser
"Frees previously created expat parser"
%{
OBJ** data;
XML_Parser p;
if (__isExternalAddress(__INST(parser))) {
p = __externalAddressVal(__INST(parser));
data = (OBJ**)XML_GetUserData(p);
__REMOVE_REFCELL(*data);
EXPAT_DEBUG((" Parser is **data=0x%x\n", **data));
EXPAT_DEBUG((" Freeing *data=0x%x\n", *data));
free(*data);
EXPAT_DEBUG((" Freeing data=0x%x\n", data));
free(data);
XML_ParserFree(p);
}
%}.
parser := nil.
"Created: / 21-04-2005 / 19:03:20 / janfrog"
"Modified: / 11-08-2005 / 21:49:24 / janfrog"
!
primitiveGetCurrentColumnNumber
"Frees previously created expat parser"
%{
OBJ** data;
XML_Parser p;
if (__isExternalAddress(__INST(parser))) {
p = __externalAddressVal(__INST(parser));
RETURN(_MKSMALLINT(XML_GetCurrentColumnNumber(p)));
}
%}.
^nil
"Created: / 02-05-2005 / 12:03:19 / janfrog"
"Modified: / 11-08-2005 / 21:31:33 / janfrog"
!
primitiveGetCurrentLineNumber
"Frees previously created expat parser"
%{
OBJ** data;
XML_Parser p;
if (__isExternalAddress(__INST(parser))) {
p = __externalAddressVal(__INST(parser));
RETURN(_MKSMALLINT(XML_GetCurrentLineNumber(p)));
}
%}.
^nil
"Created: / 02-05-2005 / 12:03:30 / janfrog"
"Modified: / 11-08-2005 / 21:31:54 / janfrog"
!
primitiveParseChunk:aByteArray len:anInteger
"Frees previously created expat parser"
%{
unsigned char* chunk;
int chunkLen = __intVal(anInteger);
XML_Parser p;
EXPAT_DEBUG((" In #primitiveParseChunk:#[...] len:%d\n",chunkLen));
if (__isExternalAddress(__INST(parser))) {
p = __externalAddressVal(__INST(parser));
}
if (__isByteArray(aByteArray)) {
chunk = __byteArrayVal(aByteArray);
if (
XML_Parse(p, chunk, chunkLen, 0)
== XML_STATUS_ERROR
) {
errorOccuredWhenParsing(p);
RETURN(false);
}
} else {
RETURN(false);
}
EXPAT_DEBUG((" ...finished\n"));
%}.
^true
"Created: / 21-04-2005 / 20:09:08 / janfrog"
"Modified: / 06-10-2006 / 13:08:55 / janfrog"
! !
!ExpatXMLReader methodsFor:'expat utilities'!
attributesFromExpatAttArray:anArray inScopeOfNodeName:ownerNodeName
| attributes nodeName att |
anArray isEmpty ifTrue:[^Attributes empty].
attributes := Attributes new.
anArray pairWiseDo:[:name :value |
nodeName := self
nodeNameFromExpatExpandedAttributeName:name
inScopeOfNodeName:ownerNodeName.
att := Attr named:nodeName value:value.
attributes add:att
].
^attributes
"Created: / 11-08-2005 / 22:08:47 / janfrog"
"Modified: / 29-09-2006 / 10:22:46 / janfrog"
!
nodeNameFromExpatExpandedAttributeName:expandedName
inScopeOfNodeName:ownerNodeName
| a |
a := expandedName tokensBasedOn:$|.
^(a size = 1) ifTrue:[
NodeName new
prefix:ownerNodeName prefix;
ns:ownerNodeName ns;
localName:a first
] ifFalse:[
NodeName new
prefix:a third;
ns:a first;
localName:a second.
]
"Created: / 11-08-2005 / 22:08:47 / janfrog"
"Modified: / 29-09-2006 / 10:25:26 / janfrog"
!
nodeNameFromExpatExpandedElementName:expandedName
| a |
a := expandedName tokensBasedOn:$|.
(a size = 2) ifTrue:[
^NodeName new
prefix:'';
ns:a first;
localName:a second
].
(a size = 3) ifTrue:[
^NodeName new
prefix:a third;
ns:a first;
localName:a second
].
^NodeName new
prefix:'';
ns:'';
localName:expandedName
"Created: / 11-08-2005 / 22:08:47 / janfrog"
"Modified: / 06-10-2006 / 09:42:08 / janfrog"
! !
!ExpatXMLReader methodsFor:'initialization'!
initialize
super initialize.
inCDataSection := false.
"Created: / 28-12-2005 / 16:18:06 / janfrog"
! !
!ExpatXMLReader methodsFor:'parsing'!
parseFragmentStream:aStream
| chunk chunkSize bytesRead fragmentHeader fragmentFooter |
aStream isReadable ifFalse:[
self getErrorHandler fatalError:
(SAXError withMessage:'Stream is not readable')
].
chunk := ByteArray new:(chunkSize := self primitiveChunkSize).
fragmentHeader := '<?xml version="1.0" ?><fragment>' .
fragmentFooter := '</fragment>'.
self primitiveCreateExpatParser.
aStream binary.
self expatStartDocumentFragment.
(self primitiveParseChunk:fragmentHeader len:fragmentHeader size) ifFalse:[
self getErrorHandler fatalError:(SAXError withMessage:'Hmm...cannot
parse').
self primitiveFreeExpatParser.
^self
].
[aStream atEnd] whileFalse:[
aStream readWait.
bytesRead := aStream nextBytes:chunkSize into:chunk.
(self primitiveParseChunk:chunk len:bytesRead) ifFalse:[
self getErrorHandler fatalError:(SAXError withMessage:'Hmm...cannot
parse').
self primitiveFreeExpatParser.
^self
]
].
(self primitiveParseChunk:fragmentFooter len:fragmentFooter size) ifFalse:[
self getErrorHandler fatalError:(SAXError withMessage:'Hmm...cannot
parse').
self primitiveFreeExpatParser.
^self
].
self expatEndDocumentFragment.
self primitiveFreeExpatParser.
"Created: / 23-04-2005 / 14:16:13 / janfrog"
!
parseStream:aStream
| chunk chunkSize bytesRead |
aStream isReadable ifFalse:[
self getErrorHandler fatalError:
(SAXError withMessage:'Stream is not readable')
].
chunk := ByteArray new:(chunkSize := self primitiveChunkSize).
self primitiveCreateExpatParser.
aStream binary.
self expatStartDocument.
[aStream atEnd] whileFalse:[
aStream readWait.
bytesRead := aStream nextBytes:chunkSize into:chunk.
(self primitiveParseChunk:chunk len:bytesRead) ifFalse:[
self getErrorHandler fatalError:(SAXError withMessage:'Hmm...cannot
parse').
self primitiveFreeExpatParser.
]
].
self expatEndDocument.
self primitiveFreeExpatParser.
"Created: / 21-04-2005 / 19:37:05 / janfrog"
! !
!ExpatXMLReader class methodsFor:'documentation'!
version
^ '$Header:
/opt/data/cvs/stx/goodies/xmlsuite/xmlreaderimpl/XMLv2__ExpatXMLReader.st,v 1.7
2007/04/12 20:16:00 vranyj1 Exp $'
! !
_______________________________________________
help-smalltalk mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/help-smalltalk