jkesselm    01/05/17 14:43:57

  Modified:    java/src/org/apache/xml/dtm/sax2dtm Tag: DTM_EXP
                        SAX2DTM.java
  Log:
  Revised text-coalescence code, improved handling of CDATA Sections.
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.13  +47 -81    
xml-xalan/java/src/org/apache/xml/dtm/sax2dtm/Attic/SAX2DTM.java
  
  Index: SAX2DTM.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/sax2dtm/Attic/SAX2DTM.java,v
  retrieving revision 1.1.2.12
  retrieving revision 1.1.2.13
  diff -u -r1.1.2.12 -r1.1.2.13
  --- SAX2DTM.java      2001/05/16 23:29:00     1.1.2.12
  +++ SAX2DTM.java      2001/05/17 21:43:55     1.1.2.13
  @@ -146,9 +146,14 @@
     /** Namespace support, only relevent at construction time. */
     transient private IntStack m_contextIndexes = new IntStack();
   
  -  /** Type of text event. */
  +  /** Type of next characters() event within text block in prgress. */
     transient private int m_textType = DTM.TEXT_NODE;
   
  +  /** Type of coalesced text block. See logic in the characters()
  +   * method.
  +   * */
  +  transient private int m_coalescedTextType = DTM.TEXT_NODE;
  +
     /** The SAX Document locator */
     transient private Locator m_locator = null;
   
  @@ -217,10 +222,11 @@
     private static final int ENTITY_FIELDS_PER = 4;
   
     /**
  -   * Flag tells if text is pending that we need to
  -   *  check for whitespace stripping. 
  +   * The starting offset within m_chars for the text or
  +   * CDATA_SECTION node currently being acumulated,
  +   * or -1 if there is no text node in progress
      */
  -  private boolean m_textPending = false;
  +  private int m_textPendingStart = -1;
   
     /**
      * Construct a SAX2DTM object ready to be constructed from SAX2
  @@ -799,9 +805,8 @@
       m_info.setElementAt(expandedTypeID, startInfo + OFFSET_EXPANDEDNAMEID);
       m_info.setElementAt(dataOrPrefix, startInfo + OFFSET_DATA_OR_QNAME);
   
  -    // Note that we don't want firstChild or nextSibling to be processed 
until
  -    // charactersFlush() is called.
  -    if (!m_textPending)
  +    // Text nodes no longer need special handling, because we
  +    // don't add them until they're complete
       {
         if (DTM.NULL != parentIndex && type != DTM.ATTRIBUTE_NODE
                 && type != DTM.NAMESPACE_NODE)
  @@ -1278,63 +1283,37 @@
     }
   
     /**
  -   * Check the last text node to see if it should be stripped.
  +   * Check whether accumulated text should be stripped; if not,
  +   * append the appropriate flavor of text/cdata node.
      */
     protected void charactersFlush()
     {
  -
  -    if (m_textPending)
  +    if (m_textPendingStart >= 0 ) // -1 indicates no-text-in-progress
       {
  -      m_textPending = false;
  -
  -      boolean didStrip = false;
  -      int lastNodeIdentity = m_size;
  +      int length=m_chars.size()-m_textPendingStart;
   
  +      boolean doStrip = false;
         if (getShouldStripWhitespace())
  -      {
  -        int dataIndex = getNodeInfoNoWait(lastNodeIdentity,
  -                                          OFFSET_DATA_OR_QNAME);
  -        int offset = m_data.elementAt(dataIndex);
  -        int length = m_data.elementAt(dataIndex + 1);
  -
  -        if (m_chars.isWhitespace(offset, length))
  -        {
  -          m_chars.setLength(m_chars.size() - length);
  -          m_info.setSize(m_info.size() - getNodeInfoBlockSize());
  -
  -          didStrip = true;
  -        }
  -      }
  -
  -      if (!didStrip)
  -      {
  -        m_size++;
  -
  -        int parentIndex = getNodeInfoNoWait(lastNodeIdentity, OFFSET_PARENT);
  -
  -        if (DTM.NULL != parentIndex)
  -        {
  -          int startParentInfo = parentIndex * NODEINFOBLOCKSIZE;
  -
  -          if (NOTPROCESSED
  -                  == m_info.elementAt(startParentInfo + OFFSET_FIRSTCHILD))
  -          {
  -            m_info.setElementAt(lastNodeIdentity,
  -                                startParentInfo + OFFSET_FIRSTCHILD);
  -          }
  -        }
  -
  -        int prev = getNodeInfoNoWait(lastNodeIdentity, OFFSET_PREVSIBLING);
  +     {
  +       doStrip=m_chars.isWhitespace(m_textPendingStart, length);
  +     }
   
  -        if (DTM.NULL != prev)
  -        {
  -          m_info.setElementAt(lastNodeIdentity,
  -                              (prev * getNodeInfoBlockSize())
  -                              + OFFSET_NEXTSIBLING);
  -        }
  +      if(doStrip)
  +     m_chars.setLength(m_textPendingStart); // Discard accumulated text
  +      else
  +     {
  +       int exName = m_ent.getExpandedNameID(DTM.TEXT_NODE);
   
  -        m_previous = lastNodeIdentity;
  -      }
  +       int nodeIndex= addNode(m_coalescedTextType,exName,m_level,
  +                              m_parents.peek(),m_previous,
  +                              m_textPendingStart,false);
  +       // %REVIEW% I _think_ I've got this right...
  +       m_data.setElementAt(nodeIndex,length);
  +     }
  +      
  +      // Reset for next text block
  +      m_textPendingStart=-1;
  +      m_textType=m_coalescedTextType=DTM.TEXT_NODE;
       }
     }
   
  @@ -1814,32 +1793,19 @@
      */
     public void characters(char ch[], int start, int length) throws 
SAXException
     {
  -
  -    int exName = m_ent.getExpandedNameID(DTM.TEXT_NODE);
  -    int dataIndex = m_data.size();
  -
  -    m_data.addElement(m_chars.length());
  -    m_data.addElement(length);
  -    m_chars.append(ch, start, length);
  -
  -    if (m_textPending)
  -    {
  -      int lastNodeIdentity = m_size;
  -
  -      dataIndex = getNodeInfoNoWait(lastNodeIdentity, OFFSET_DATA_OR_QNAME)
  -                  + 1;
  -
  -      m_data.setElementAt(m_data.elementAt(dataIndex) + length, dataIndex);
  -    }
  -    else
  -    {
  -      m_textPending = true;
  -
  -      addNode(m_textType, exName, m_level, m_parents.peek(), m_previous,
  -              dataIndex, false);
  +    if(m_textPendingStart==-1)       // First one in this block
  +      {
  +     m_textPendingStart=m_chars.size();
  +     m_coalescedTextType=m_textType;
  +      }
  +    m_chars.append(ch,start,length);
   
  -      m_size--;  // doesn't really exist until charactersFlush.
  -    }
  +    // Type logic: If all adjacent text is CDATASections, the
  +    // concatentated text is treated as a single CDATASection (see
  +    // initialization above).  If any were ordinary Text, the whole
  +    // thing is treated as Text. This may be worth %REVIEW%ing.
  +    if(m_textType==DTM.TEXT_NODE)
  +     m_coalescedTextType=DTM.TEXT_NODE;
     }
   
     /**
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to