The input file is 375 MB
The XML file that DFDL parsing generates is 4.67 GB

Time required for Daffodil to parse the input and generate the XML file is 16 
minutes, 24 seconds.

Ugh!

That is too long. My customers will laugh at me if I suggest they use a tool 
that takes 16 minutes to parse their data.

Below is the skeletal structure of my DFDL schema. I am pretty sure the 
"choice" is the cause of the slowness. I don't see an alternative to the 
choice; each record of the input could be one of the choices (i.e., the input 
records aren't in any order). Any suggestions for improving the performance?

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema";
    xmlns:fn="http://www.w3.org/2005/xpath-functions";
    xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/";>
    
    <xs:annotation>
        <xs:appinfo source="http://www.ogf.org/dfdl/";>
            <dfdl:format
                alignment="1" 
                alignmentUnits="bytes" 
                choiceLengthKind="implicit"
                emptyValueDelimiterPolicy="none" 
                encoding="ASCII" 
                encodingErrorPolicy="replace" 
                escapeSchemeRef="" 
                fillByte="%SP;" 
                floating="no" 
                ignoreCase="yes" 
                initiatedContent="no" 
                initiator="" 
                leadingSkip="0"
                lengthKind="delimited" 
                lengthUnits="characters" 
                nilValueDelimiterPolicy="none" 
                occursCountKind="implicit" 
                outputNewLine="%CR;%LF;" 
                representation="text" 
                separator="" 
                separatorSuppressionPolicy="anyEmpty" 
                sequenceKind="ordered" 
                textBidi="no" 
                textPadKind="none"
                textTrimKind="none" 
                trailingSkip="0" 
                truncateSpecifiedLengthString="no" 
                terminator="" 
                textNumberRep="standard" 
                textStandardBase="10" 
                textStandardZeroRep="0" 
                textNumberRounding="pattern" 
                textStandardExponentRep="E" 
                textNumberCheckPolicy="strict"
            />
        </xs:appinfo>
    </xs:annotation>
    
    <xs:element name="Test">
        <xs:complexType>
            <xs:sequence dfdl:separator="%NL;" dfdl:separatorPosition="infix">
                <xs:element name="record" maxOccurs="unbounded" >
                    <xs:complexType>
                        <xs:choice>
                            <xs:element ref="A" />                              
          
                            <xs:element ref="B" />                              
          
                            <xs:element ref="C" />                              
                
                            <xs:element ref="D" />                              
          
                            <!-- A hundred more of these element ref's -->
                        </xs:choice>
                    </xs:complexType>
                </xs:element>
            </xs:sequence>
        </xs:complexType>
    </xs:element>

Reply via email to