In my previous message I reported an error when extra whitespaces appear in
the docType internal subset.
The problem is in DTD\DTDScanner.cpp
Method: void DTDScanner::scanAttListDecl()

I've slightly modified this function (see attachments) to get rid of this
bug.

The following problem(?) I've discovered in this area, Is not equal
appearance of attribute decalrations
as they appear in internal subset from how they were declared. For example:

Input File:

<!DOCTYPE A [
    <!ELEMENT A ANY>
    <!ATTLIST A
        name1 CDATA #IMPLIED
        name2 CDATA #IMPLIED
    >
]>
<A/>

Output File
<!DOCTYPE A [
    <!ELEMENT A ANY>
    <!ATTLIST A name1 CDATA #IMPLIED>
        <!ATTLIST A name2 CDATA #IMPLIED>

]>
<A/>


Actually it is not a problem to write each element's attribute as a separate
ATTLIST declaration,
but most of the users will agree with me, that it would be better to keep
the original layout.
If you decided to keep separate attlist declarations, then you need to
revise your code that deals with whitespaces (skipping, docType notifying,
and so on)

>From my point of view, you'd better ignore all whitespaces and format on
your own.
(Not a bad idea to provide the options, like

DTD_FORMAT                    // Ignores all whitespaces and make own
formatting
DTD_KEEP_LAYOUT          // Keep DTD as in the input source
DTD_NO_WHITESPACES   // Do not add to inertrnal subset any whitespaces.
(Very usefull feature)
)


Peter A. Volchek
Software Engineer
Metis International, Inc.
[EMAIL PROTECTED]

void DTDScanner::scanAttListDecl()
{
    // Space is required here, so check for a PE ref
    if (!checkForPERef(true, false, true))
    {
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }

    //
    //  Next should be the name of the element it belongs to, so get a buffer
    //  and get the name into it.
    //
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedElementName);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }

    //
    //  Find this element's declaration. If it has not been declared yet,
    //  we will force one into the list, but not mark it as declared.
    //
    DTDElementDecl* elemDecl = (DTDElementDecl*) 
fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), 
Grammar::TOP_LEVEL_SCOPE);
    if (!elemDecl)
    {
        //
        //  Lets fault in a declaration and add it to the pool. We mark
        //  it having been created because of an attlist. Later, if its
        //  declared, this will be updated.
        //
        elemDecl = new DTDElementDecl(bbName.getRawBuffer(), fEmptyNamespaceId);
        elemDecl->setCreateReason(XMLElementDecl::AttList);
        fDTDGrammar->putElemDecl((XMLElementDecl*) elemDecl);
    }

    // If we have a doc type handler, tell it the att list is starting
    if (fDocTypeHandler)
        fDocTypeHandler->startAttList(*elemDecl);

    //
    //  Now we loop until we are done with all of the attributes in this
    //  list. We need a buffer to use for local processing.
    //
    XMLBufBid   bbTmp(fBufMgr);
    XMLBuffer&  tmpBuf = bbTmp.getBuffer();
    bool        seenAnId = false;
    bool        gotAttr = false;
    while (true)
    {
        // Get the next char out and see what it tells us to do
        const XMLCh nextCh = fReaderMgr->peekNextChar();

        // Watch for EOF
        if (!nextCh)
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);

        if (nextCh == chCloseAngle)
        {
            // We are done with this attribute list
            fReaderMgr->getNextChar();
            break;
        }
         else if (XMLReader::isWhitespace(nextCh))
        {
            //
            //  If advanced callbacks are enabled and we have a doc
            //  type handler, then gather up the white space and call
            //  back on the doctype handler. Otherwise, just skip
            //  whitespace.
            //
            if (fDocTypeHandler && gotAttr )
            {
                fReaderMgr->getSpaces(tmpBuf);
                fDocTypeHandler->doctypeWhitespace
                (
                    tmpBuf.getRawBuffer()
                    , tmpBuf.getLen()
                );
            }
             else
            {
                fReaderMgr->skipPastSpaces();
            }
            gotAttr = false;
        }
         else if (nextCh == chPercent)
        {
            // Eat the percent and expand the ref
            fReaderMgr->getNextChar();
            expandPERef(false, false, true);
        }
         else
        {
            //
            //  It must be an attribute name, so scan it. We let
            //  it use our local buffer for its name scanning.
            //
            XMLAttDef* attDef = scanAttDef(*elemDecl, tmpBuf);
            gotAttr = true;

            if (!attDef)
            {
                fReaderMgr->skipPastChar(chCloseAngle);
                break;
            }

            //
            //  If we are validating and its an ID type, then we have to
            //  make sure that we have not seen an id attribute yet. Set
            //  the flag to say that we've seen one now also.
            //
            if (fScanner->getDoValidation())
            {
                if (attDef->getType() == XMLAttDef::ID)
                {
                    if (seenAnId)
                        fScanner->getValidator()->emitError(XMLValid::MultipleIdAttrs, 
elemDecl->getFullName());
                    seenAnId = true;
                }
            }
        }
    }

    // If we have a doc type handler, tell it the att list is ending
    if (fDocTypeHandler)
        fDocTypeHandler->endAttList(*elemDecl);
}

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to