(Resend, Dirk fixed the problem with the server eating my mail. Thanks 
Dirk :) )

Please review:

On Sunday 11 February 2007, Martijn Klingens wrote:
> In my patch to speed up formatStyleKeywords I already mentioned that there
> is one much bigger fish: the call to message.plainBody().
>
> It is used *ONLY* to determine whether a message is LTR or RTL for bidi
> locales. That is a lot of overkill.
>
> [...]
> 
> The alternative is to add an isRightToLeft() to Kopete::Message that caches
> the result.

Attached is a patch that does exactly that. It also kills some code 
duplication between de Message::Private ctor and setBody(). And it rewrites 
the regexp part of plainBody().

The reason is that with the caching we still need to call plainBody once per 
message, and even that single call is outrageously expensive.

The combined result is that plainBody() is about double as fast as it used to 
be (the single remaining regexp replace eating 35% of the time) and is also 
never called more than once.

The darn thing is still too slow, but this should at least give ~35% speedup 
compared to before both optimization patches.

Ok to commit to 3.5 branch?

-- 
Martijn

What may be done at any time will be done at no time. -Scottish proverb
Index: libkopete/kopetemessage.cpp
===================================================================
--- libkopete/kopetemessage.cpp	(revision 634870)
+++ libkopete/kopetemessage.cpp	(working copy)
@@ -47,7 +47,7 @@
 {
 public:
 	Private( const QDateTime &timeStamp, const Contact *from, const ContactPtrList &to,
-	         const QString &body, const QString &subject, MessageDirection direction, MessageFormat f,
+	         const QString &subject, MessageDirection direction,
 	         const QString &requestedPlugin, MessageType type );
 
 	QGuardedPtr<const Contact> from;
@@ -62,6 +62,7 @@
 	bool bgOverride;
 	bool fgOverride;
 	bool rtfOverride;
+	bool isRightToLeft;
 	QDateTime timeStamp;
 	QFont font;
 
@@ -72,46 +73,26 @@
 };
 
 Message::Private::Private( const QDateTime &timeStamp, const Contact *from,
-             const ContactPtrList &to, const QString &body, const QString &subject,
-				 MessageDirection direction, MessageFormat f, const QString &requestedPlugin, MessageType type )
-	: from(from), to(to), manager(0), direction(direction), format(f), type(type)
-	, requestedPlugin(requestedPlugin), importance( (to.count() <= 1) ? Normal : Low ), bgOverride(false), fgOverride(false)
-	, rtfOverride(false), timeStamp(timeStamp), body(body), subject(subject)
+		const ContactPtrList &to, const QString &subject,
+		MessageDirection direction, const QString &requestedPlugin, MessageType type )
+: from( from ), to( to ), manager( 0 ), direction( direction ), format( PlainText ), type( type ),
+	requestedPlugin( requestedPlugin ), importance( (to.count() <= 1) ? Normal : Low ),
+	bgOverride( false ), fgOverride( false ), rtfOverride( false ), isRightToLeft( false ),
+	timeStamp( timeStamp ), body( QString::null ), subject( subject )
 {
-	
-	//TODO: move that in ChatTextEditPart::contents
-	if( format == RichText )
-	{
-		//This is coming from the RichTextEditor component.
-		//Strip off the containing HTML document
-		this->body.replace( QRegExp( QString::fromLatin1(".*<body.*>\\s+(.*)\\s+</body>.*") ), QString::fromLatin1("\\1") );
-
-		//Strip <p> tags
-		this->body.replace( QString::fromLatin1("<p>"), QString::null );
-
-		//Replace </p> with a <br/>
-		this->body.replace( QString::fromLatin1("</p>") , QString::fromLatin1("<br/>") );
-
-		//Remove trailing <br/>
-		if ( this->body.endsWith( QString::fromLatin1("<br/>") ) )
-			this->body.truncate( this->body.length() - 5 );
-		this->body.remove(  QString::fromLatin1("\n") );
-		this->body.replace( QRegExp( QString::fromLatin1( "\\s\\s" ) ), QString::fromLatin1( "&nbsp; " ) );
-
-	}
 }
 
-
 Message::Message()
-    : d( new Private( QDateTime::currentDateTime(), 0L, QPtrList<Contact>(), QString::null, QString::null, Internal,
-	PlainText, QString::null, TypeNormal ) )
+: d( new Private( QDateTime::currentDateTime(), 0L, QPtrList<Contact>(), QString::null, Internal,
+	QString::null, TypeNormal ) )
 {
 }
 
 Message::Message( const Contact *fromKC, const QPtrList<Contact> &toKC, const QString &body,
 		  MessageDirection direction, MessageFormat f, const QString &requestedPlugin, MessageType type )
-    : d( new Private( QDateTime::currentDateTime(), fromKC, toKC, body, QString::null, direction, f, requestedPlugin, type ) )
+: d( new Private( QDateTime::currentDateTime(), fromKC, toKC, QString::null, direction, requestedPlugin, type ) )
 {
+	doSetBody( body, f );
 }
 
 Message::Message( const Contact *fromKC, const Contact *toKC, const QString &body,
@@ -119,26 +100,30 @@
 {
 	QPtrList<Contact> to;
 	to.append(toKC);
-	d = new Private( QDateTime::currentDateTime(), fromKC, to, body, QString::null, direction, f, requestedPlugin, type );
+	d = new Private( QDateTime::currentDateTime(), fromKC, to, QString::null, direction, requestedPlugin, type );
+	doSetBody( body, f );
 }
 
 Message::Message( const Contact *fromKC, const QPtrList<Contact> &toKC, const QString &body,
 		  const QString &subject, MessageDirection direction, MessageFormat f, const QString &requestedPlugin, MessageType type )
-    : d( new Private( QDateTime::currentDateTime(), fromKC, toKC, body, subject, direction, f, requestedPlugin, type ) )
+    : d( new Private( QDateTime::currentDateTime(), fromKC, toKC, subject, direction, requestedPlugin, type ) )
 {
+	doSetBody( body, f );
 }
 
 Message::Message( const QDateTime &timeStamp, const Contact *fromKC, const QPtrList<Contact> &toKC,
 		  const QString &body, MessageDirection direction, MessageFormat f, const QString &requestedPlugin, MessageType type )
-    : d( new Private( timeStamp, fromKC, toKC, body, QString::null, direction, f, requestedPlugin, type ) )
+    : d( new Private( timeStamp, fromKC, toKC, QString::null, direction, requestedPlugin, type ) )
 {
+	doSetBody( body, f );
 }
 
 
 Message::Message( const QDateTime &timeStamp, const Contact *fromKC, const QPtrList<Contact> &toKC,
 		  const QString &body, const QString &subject, MessageDirection direction, MessageFormat f, const QString &requestedPlugin, MessageType type )
-    : d( new Private( timeStamp, fromKC, toKC, body, subject, direction, f, requestedPlugin, type ) )
+    : d( new Private( timeStamp, fromKC, toKC, subject, direction, requestedPlugin, type ) )
 {
+	doSetBody( body, f );
 }
 
 Kopete::Message::Message( const Message &other )
@@ -205,40 +190,57 @@
 	d->font = font;
 }
 
-void Message::setBody( const QString &body, MessageFormat f )
+void Message::doSetBody( const QString &_body, Message::MessageFormat f )
 {
-	detach();
+	QString body = _body;
 
-	QString theBody = body;
 	//TODO: move that in ChatTextEditPart::contents
 	if( f == RichText )
 	{
 		//This is coming from the RichTextEditor component.
 		//Strip off the containing HTML document
-		theBody.replace( QRegExp( QString::fromLatin1(".*<body.*>\\s+(.*)\\s+</body>.*") ), QString::fromLatin1("\\1") );
+		body.replace( QRegExp( QString::fromLatin1(".*<body.*>\\s+(.*)\\s+</body>.*") ), QString::fromLatin1("\\1") );
 
 		//Strip <p> tags
-		theBody.replace( QString::fromLatin1("<p>"), QString::null );
+		body.replace( QString::fromLatin1("<p>"), QString::null );
 
 		//Replace </p> with a <br/>
-		theBody.replace( QString::fromLatin1("</p>"), QString::fromLatin1("<br/>") );
+		body.replace( QString::fromLatin1("</p>"), QString::fromLatin1("<br/>") );
 
 		//Remove trailing </br>
-		if ( theBody.endsWith( QString::fromLatin1("<br/>") ) )
-			theBody.truncate( theBody.length() - 5 );
+		if ( body.endsWith( QString::fromLatin1("<br/>") ) )
+			body.truncate( body.length() - 5 );
 	
-		theBody.remove( QString::fromLatin1("\n") );
-		theBody.replace( QRegExp( QString::fromLatin1( "\\s\\s" ) ), QString::fromLatin1( "&nbsp; " ) );
+		body.remove( QString::fromLatin1("\n") );
+		body.replace( QRegExp( QString::fromLatin1( "\\s\\s" ) ), QString::fromLatin1( "&nbsp; " ) );
 	}
-	/*	else if( f == ParsedHTML )
+	/*
+	else if( f == ParsedHTML )
 	{
-	kdWarning( 14000 ) << k_funcinfo << "using ParsedHTML which is internal !   message: " << body << kdBacktrace() << endl;
-	}*/
+		kdWarning( 14000 ) << k_funcinfo << "using ParsedHTML which is internal! Message: '" <<
+			body << "', Backtrace: " << kdBacktrace() << endl;
+	}
+	*/
 
-	d->body=theBody;
+	d->body = body;
 	d->format = f;
+
+	// unescaping is very expensive, do it only once and cache the result
+	d->isRightToLeft = ( f & RichText ? unescape( d->body ).isRightToLeft() : d->body.isRightToLeft() );
 }
 
+void Message::setBody( const QString &body, MessageFormat f )
+{
+	detach();
+
+	doSetBody( body, f );
+}
+
+bool Message::isRightToLeft() const
+{
+	return d->isRightToLeft;
+}
+
 void Message::setImportance(Message::MessageImportance i)
 {
 	detach();
@@ -252,12 +254,43 @@
 	//remove linebreak and multiple spaces
 	data.replace( QRegExp( QString::fromLatin1( "\\s*[\n\r\t]+\\s*" ) , false ), QString::fromLatin1(" " )) ;
 
-	data.replace( QRegExp( QString::fromLatin1( "< *img[^>]*title=\"([^>\"]*)\"[^>]*>" ) , false ), QString::fromLatin1( "\\1" ) );  //escape smeleys, return to the original code
-	data.replace( QRegExp( QString::fromLatin1( "< */ *p[^>]*>" ) , false ), QString::fromLatin1( "\n" ) );
-	data.replace( QRegExp( QString::fromLatin1( "< */ *div[^>]*>" ) , false ), QString::fromLatin1( "\n" ) );
-	data.replace( QRegExp( QString::fromLatin1( "< *br */? *>" ) , false ), QString::fromLatin1( "\n" ) );
-	data.replace( QRegExp( QString::fromLatin1( "<[^>]*>" ) ), QString::null );
+	// Loop over data, replacing all XML elements
+	int pos = 0;
+	QRegExp elementRX( QString::fromLatin1(
+		"< *"                                   // Elem start, whitespace
+		"(/? *([^ >]*)"                         // Elem name with optional leading /, but no trailing /
+		"(?: (?:[^>]* )?title=\"([^>\"]*)\")?"  // Optionally, a title attrib, used for <img>
+		"[^>]*>" ), false );                    // Everything else up to the closing '>'
 
+	while ( ( pos = elementRX.search( data, pos ) ) != -1 )
+	{
+		QString elem = elementRX.cap( 1 ).lower().stripWhiteSpace();
+		if ( elem == QString::fromLatin1( "img" ) )
+		{
+			// Replace smileys with their original text
+			// If this is an image that is not a smiley it will make the
+			// code effectively behave like the wildcard filter in the
+			// else {} below, which is intended
+			QString orig = elementRX.cap( 2 );
+			data.replace( pos, elementRX.matchedLength(), orig );
+			pos += orig.length();
+		}
+		else if ( elem == QString::fromLatin1( "/p" ) || elem == QString::fromLatin1( "/div" ) ||
+			elem == QString::fromLatin1( "br" ) )
+		{
+			// Replace paragraph, div and line breaks with a newline
+			data.replace( pos, elementRX.matchedLength(), '\n' );
+			pos++;
+		}
+		else
+		{
+			// Remove all other elements entirely
+			// Don't update pos, we restart at this position :)
+			data.remove( pos, elementRX.matchedLength() );
+		}
+	}
+
+	// Replace stuff starting with '&'
 	data.replace( QString::fromLatin1( "&gt;" ), QString::fromLatin1( ">" ) );
 	data.replace( QString::fromLatin1( "&lt;" ), QString::fromLatin1( "<" ) );
 	data.replace( QString::fromLatin1( "&quot;" ), QString::fromLatin1( "\"" ) );
Index: libkopete/kopetemessage.h
===================================================================
--- libkopete/kopetemessage.h	(revision 634870)
+++ libkopete/kopetemessage.h	(working copy)
@@ -361,13 +361,23 @@
 public:  /* static helpers */
 
 	/**
-	* Unescapes a string, removing XML entity references and return a plein text.
+	* Unescapes a string, removing XML entity references and returns a plain text.
 	*
+	* Note that this method is *VERY* expensive when called on rich text bodies,
+	* use with care!
+	*
 	* @param xml The string you want to unescape
 	*/
 	static QString unescape( const QString &xml );
 
 	/**
+	 * Indicate whether the string is right-to-left (Arabic or Hebrew are bidi locales)
+	 * or "normal" left-to-right. Calculating RTL on rich text is expensive, and
+	 * isRightToLeft() therefore uses a cached value.
+	 */
+	bool isRightToLeft() const;
+
+	/**
 	 * @brief Transform a pleintext message to an html.
 	 * it escape main entity like &gt; &lt; add some &lt;br /&gt; or &amp;nbsp;
 	 */
@@ -394,6 +404,12 @@
 	 */
 	void detach();
 
+	/**
+	 * Called internally by @ref setBody() and the constructor
+	 * Basically @ref setBody() without detach
+	 */
+	void doSetBody( const QString &body, MessageFormat format = PlainText );
+
 	class Private;
 	KSharedPtr<Private> d;
 
Index: kopete/chatwindow/chatmessagepart.cpp
===================================================================
--- kopete/chatwindow/chatmessagepart.cpp	(revision 634872)
+++ kopete/chatwindow/chatmessagepart.cpp	(working copy)
@@ -1060,9 +1060,7 @@
 	}
 
 	// Set message direction("rtl"(Right-To-Left) or "ltr"(Left-to-right))
-	// FIXME: The conversion to plainBody() is extremely expensive and should not be used
-	//        here. Use a cached value for isRightToLeft instead. -- Martijn, 20070218
-	resultHTML = resultHTML.replace( QString::fromUtf8("%messageDirection%"), message.plainBody().isRightToLeft() ? "rtl" : "ltr" );
+	resultHTML = resultHTML.replace( QString::fromUtf8("%messageDirection%"), message.isRightToLeft() ? "rtl" : "ltr" );
 
 	// These colors are used for coloring nicknames. I tried to use
 	// colors both visible on light and dark background.
_______________________________________________
kopete-devel mailing list
[email protected]
https://mail.kde.org/mailman/listinfo/kopete-devel

Reply via email to