Am Dienstag, 29. März 2005 15:30 schrieb Jesse Pelton:
> I'd agree if this were a violation of am specification, but it's not.
> If you have read the thread I referred to, you know that
> pretty-printing is only loosely specified. This means, in essence,
> that implementations can do whatever they like - there is no right or
> wrong. You can certainly propose that Xerces change its behavior
> because you don't like it, but you'll have a hard time making the
> case that what it's doing is incorrect.
>
> That said, this is at least the third time this has come up recently.
> Are there people who prefer empty lines between elements in their
> pretty-printed output? If not, and if it would be committed, I'd be
> happy to make a patch that gets rid of them.

Hi,

I'd like to propose here my preferred way of pretty-printing XML-output. 
Instead of letting any Writer-class do the job, I think it's a matter 
of preparing the DOM-tree and insert ws-only textnodes before writing 
out to file.

The algorithm is rather simple: in a two-step process firstly all 
ws-only textnodes are removed from the tree. The second step inserts 
the ws-textnodes as necessary for pretty looking :)

As 'pretty-looking' mostly depends on taste, there is some possibility 
to configure: indent_string and newline_string. These are static const 
XMLCh-strings and can be suited to ones needs.

I've attached 'pretty_printer.h' and 'pretty_printer.cpp' which you may 
try out and use if desired. Any comments are welcome.

Cheers,
                        Axel

-- 
Humboldt-Universität zu Berlin
Institut für Informatik
Signalverarbeitung und Mustererkennung
Dipl.-Inf. Axel Weiß
Rudower Chaussee 25
12489 Berlin-Adlershof
+49-30-2093-3050
** www.freesp.de **
/*
 *    (C) 2005 Axel Weiss ([EMAIL PROTECTED])
 *
 * FreeSP is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with freeSP; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
 * MA  02111-1307  USA.
 */

#include "pretty_printer.h"
#include <xercesc/util/XMLUniDefs.hpp>

using namespace xercesc;

// choose your indent string:
static const unsigned indent_size = 3;
static const XMLCh indent_string[indent_size] = {chSpace, chSpace, chSpace};

// choose your newline string:
static const unsigned newline_size = 1;
static const XMLCh newline_string[newline_size] = {chCR};

static bool is_text_node_whitespace_only(const DOMNode *node){
	if (node->getNodeType() == DOMNode::TEXT_NODE){
		return XMLString::isAllWhiteSpace(node->getNodeValue());
	}
	return false;
}

static bool is_last_child(const DOMNode *node){
	const DOMNode *n = node->getNextSibling();
	if (!n) return true;
	if (is_text_node_whitespace_only(n)) return is_last_child(n);
	return false;
}

static void remove_ws_nodes(DOMNode *node){
	if (node->getNodeType() == DOMNode::ELEMENT_NODE){
		// remove all leading ws-only nodes:
		for (DOMNode *child=node->getFirstChild(); child; child=node->getFirstChild()){
			if (!is_text_node_whitespace_only(child)) break;
			node->removeChild(child)->release();
		}
		for (DOMNode *child=node->getFirstChild(); child; child=child->getNextSibling()){
			DOMNode *prev = child->getPreviousSibling();
			if (is_text_node_whitespace_only(child)){
				// since we have no leading ws-only nodes (just removed them all),
				// predessor of child exists:
				DOMNode *n = child;
				child = prev;
				node->removeChild(n)->release();
			}
			// recursively remove ws-only nodes of all children:
			remove_ws_nodes(child);
		}
	}
}

static void concat(XMLCh *result, const XMLCh *s1, unsigned l1, const XMLCh *s2, unsigned l2){
	unsigned i;
	for (i=0; i<l1; ++i){
		result[i] = s1[i];
	}
	for (i=0; i<l2; ++i){
		result[l1 + i] = s2[i];
	}
	result[l1 + l2] = chNull;
}

static void make_pretty_print(DOMNode *node, const XMLCh *leading_indent=0, unsigned leading_size=0){
	if (node->getNodeType() == DOMNode::ELEMENT_NODE){
		XMLCh new_indent[leading_size + indent_size + 1];
		concat(new_indent, leading_indent, leading_size, indent_string, indent_size);
		XMLCh begin[leading_size + indent_size + newline_size + 1];
		concat(begin, newline_string, newline_size, new_indent, leading_size + indent_size);
		for (DOMNode *child=node->getFirstChild(); child; child=child->getNextSibling()){
			if (child->getNodeType() == DOMNode::ELEMENT_NODE){
				DOMNode *n = node->getOwnerDocument()->createTextNode(begin);
				node->insertBefore(n, child);
				make_pretty_print(child, new_indent, leading_size + indent_size);
			}
		}
		if (node->getLastChild() && node->getLastChild()->getNodeType() == DOMNode::ELEMENT_NODE){
			XMLCh end[leading_size + newline_size + 1];
			concat(end, newline_string, newline_size, leading_indent, leading_size);
			DOMNode *n = node->getOwnerDocument()->createTextNode(end);
			node->appendChild(n);
		}
	}
	if (!leading_size) node->getOwnerDocument()->normalize();
}


void make_pretty_print(DOMNode *node){
	remove_ws_nodes(node);
	make_pretty_print(node, 0, 0);
}
/*
 *    (C) 2005 Axel Weiss ([EMAIL PROTECTED])
 *
 * FreeSP is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with freeSP; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, 
 * MA  02111-1307  USA.
 */

#ifndef _pretty_printer_h_
#define _pretty_printer_h_

#include <xercesc/dom/DOM.hpp>

using xercesc::DOMNode;

/*
 * Make a pretty-printing DOM-tree.
 */

void make_pretty_print(DOMNode *node);

#endif

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to