Hello community,

here is the log from the commit of package kfilemetadata for openSUSE:Factory 
checked in at 2014-05-06 17:41:14
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/kfilemetadata (Old)
 and      /work/SRC/openSUSE:Factory/.kfilemetadata.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "kfilemetadata"

Changes:
--------
--- /work/SRC/openSUSE:Factory/kfilemetadata/kfilemetadata.changes      
2014-04-18 12:37:53.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.kfilemetadata.new/kfilemetadata.changes 
2014-05-06 17:41:16.000000000 +0200
@@ -1,0 +2,10 @@
+Sat May  3 23:06:19 UTC 2014 - [email protected]
+
+- Added mobipocket-devel BuildRequires to add epub support
+
+-------------------------------------------------------------------
+Fri May  2 10:27:24 UTC 2014 - [email protected]
+
+- Added optimize.patch: Optimize word count in PlainTextExtractor
+
+-------------------------------------------------------------------

New:
----
  optimize.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ kfilemetadata.spec ++++++
--- /var/tmp/diff_new_pack.UvMCS3/_old  2014-05-06 17:41:16.000000000 +0200
+++ /var/tmp/diff_new_pack.UvMCS3/_new  2014-05-06 17:41:16.000000000 +0200
@@ -23,10 +23,13 @@
 License:        GPL-2.0+ and LGPL-2.1+ and LGPL-3.0
 Group:          System/GUI/KDE
 Source0:        %{name}-%{version}.tar.xz
+# PATCH-FIX-UPSTREAM optimize.patch -- Optimize word count in 
PlainTextExtractor
+Patch0:         optimize.patch
 BuildRequires:  libepub-devel
 BuildRequires:  libexiv2-devel
 BuildRequires:  libkde4-devel
 BuildRequires:  libpoppler-qt4-devel
+BuildRequires:  mobipocket-devel
 BuildRequires:  taglib-devel
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 
@@ -44,6 +47,7 @@
 
 %prep
 %setup -q 
+%patch0 -p1
 
 %build
   %cmake_kde4 -d build

++++++ optimize.patch ++++++
From: Milian Wolff <[email protected]>
Date: Fri, 02 May 2014 09:53:33 +0000
Subject: Optimize word count in PlainTextExtractor.
X-Git-Url: 
http://quickgit.kde.org/?p=kfilemetadata.git&a=commitdiff&h=a5b76bbd287d504477a9f27d64747f9bcfe50dbc
---
Optimize word count in PlainTextExtractor.

Regular expressions are notoriously slow. Implementing a simple
word-count directly in C++ is much faster, as shown by the benchmark:

Before:
RESULT : IndexerExtractorTests::benchMarkPlainTextExtractor():
697.0 msecs per iteration (total: 6,970, iterations: 10)

After:
RESULT : IndexerExtractorTests::benchMarkPlainTextExtractor():
88.2 msecs per iteration (total: 883, iterations: 10)

REVIEW: 117789
---


--- a/src/extractors/plaintextextractor.cpp
+++ b/src/extractors/plaintextextractor.cpp
@@ -25,6 +25,32 @@
 
 using namespace KFileMetaData;
 
+namespace {
+inline bool isWordCharacter(const QChar& c)
+{
+    // The Qt docs say for word characters:
+    //      \w  - Matches a word character (QChar::isLetterOrNumber(), 
QChar::isMark(), or '_').
+    // see also: http://qt-project.org/doc/qt-4.8/qregexp.html
+    return c.isLetterOrNumber() || c.isMark() || c.unicode() == '_';
+}
+
+inline int countWords(const QString &string)
+{
+    int words = 0;
+    bool inWord = false;
+    foreach(QChar c, string) {
+        if (isWordCharacter(c) != inWord) {
+            inWord = !inWord;
+            if (inWord) {
+                ++words;
+            }
+        }
+    }
+
+    return words;
+}
+}
+
 PlainTextExtractor::PlainTextExtractor(QObject* parent, const QVariantList&)
     : ExtractorPlugin(parent)
 {
@@ -48,15 +74,13 @@
     int lines = 0;
     int words = 0;
 
-    QRegExp wordsRegex("\\b\\w+\\b");
-
     QTextStream ts(&file);
     while (!ts.atEnd()) {
         QString str = ts.readLine();
         result->append(str);
 
         lines += 1;
-        words += str.count(wordsRegex);
+        words += countWords(str);
     }
 
     result->add(Property::WordCount, words);
-- 
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to