commit:     7811803f05a811dab3e9220060a9539844cf8e74
Author:     Andreas Sturmlechner <andreas.sturmlechner <AT> gmail <DOT> com>
AuthorDate: Sun Sep 11 16:07:57 2016 +0000
Commit:     Michael Palimaka <kensington <AT> gentoo <DOT> org>
CommitDate: Wed Sep 14 14:13:12 2016 +0000
URL:        https://gitweb.gentoo.org/proj/kde.git/commit/?id=7811803f

kde-frameworks/kfilemetadata: Fix segfault w/ ODF and EPUB files

Package-Manager: portage-2.3.0

 ...ilemetadata-5.26.0-epubextractor-segfault.patch | 149 +++++++++++++++++++++
 ...filemetadata-5.26.0-odfextractor-segfault.patch |  66 +++++++++
 .../kfilemetadata/kfilemetadata-5.26.0-r1.ebuild   |  55 ++++++++
 3 files changed, 270 insertions(+)

diff --git 
a/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-epubextractor-segfault.patch
 
b/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-epubextractor-segfault.patch
new file mode 100644
index 0000000..b738d1a
--- /dev/null
+++ 
b/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-epubextractor-segfault.patch
@@ -0,0 +1,149 @@
+From: Christoph Cullmann <[email protected]>
+Date: Sun, 11 Sep 2016 17:14:51 +0000
+Subject: Improve epub extractor, less segfaults
+X-Git-Url: 
http://quickgit.kde.org/?p=kfilemetadata.git&a=commitdiff&h=47f6e57b2fa3768feb4f1f4a2cd3ce46660d90f2
+---
+Improve epub extractor, less segfaults
+
+Improve epub extractor:
+
+1) check for more nullpointers (e.g. data can be null for some fields, 
iterators, ...)
+2) actually close the epub file again at all
+3) iterator seems to handle clink as stated in docs, fix double free
+
+e.g. see bug 361727
+could be the double freed clink in the last iterator
+
+BUG: 361727
+REVIEW: 128888
+---
+
+
+--- a/src/extractors/epubextractor.cpp
++++ b/src/extractors/epubextractor.cpp
+@@ -1,5 +1,6 @@
+ /*
+     Copyright (C) 2013  Vishesh Handa <[email protected]>
++    Copyright (C) 2016  Christoph Cullmann <[email protected]>
+ 
+     This library is free software; you can redistribute it and/or
+     modify it under the terms of the GNU Lesser General Public
+@@ -46,11 +47,14 @@
+ QString fetchMetadata(struct epub* e, const epub_metadata& type)
+ {
+     int size = 0;
+-
+     unsigned char** data = epub_get_metadata(e, type, &size);
+     if (data) {
+         QStringList strList;
+         for (int i = 0; i < size; i++) {
++            // skip nullptr entries, can happen for broken xml files
++            if (!data[i])
++                continue;
++
+             strList << QString::fromUtf8((char*)data[i]);
+             free(data[i]);
+         }
+@@ -65,7 +69,8 @@
+ 
+ void EPubExtractor::extract(ExtractionResult* result)
+ {
+-    struct epub* ePubDoc = epub_open(result->inputUrl().toUtf8().constData(), 
1);
++    // open epub, return on exit, file will be closed again at end of function
++    auto ePubDoc = epub_open(result->inputUrl().toUtf8().constData(), 1);
+     if (!ePubDoc) {
+         qWarning() << "Invalid document";
+         return;
+@@ -138,49 +143,49 @@
+     //
+     // Plain Text
+     //
+-    if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
+-        return;
++    if (result->inputFlags() & ExtractionResult::ExtractPlainText) {
++        if (auto iter = epub_get_iterator(ePubDoc, EITERATOR_SPINE, 0)) {
++            do {
++                char* curr = epub_it_get_curr(iter);
++                if (!curr)
++                    continue;
++
++                QString html = QString::fromUtf8(curr);
++                html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
++                result->append(html);
++            } while (epub_it_get_next(iter));
++
++            epub_free_iterator(iter);
++        }
++
++        auto tit = epub_get_titerator(ePubDoc, TITERATOR_NAVMAP, 0);
++        if (!tit) {
++            tit = epub_get_titerator(ePubDoc, TITERATOR_GUIDE, 0);
++        }
++        if (tit) {
++            if (epub_tit_curr_valid(tit)) {
++                do {
++                    // get link, iterator handles freeing of it
++                    char* clink = epub_tit_get_curr_link(tit);
++
++                    // epub_get_data returns -1 on failure
++                    char* data = nullptr;
++                    const int size = epub_get_data(ePubDoc, clink, &data);
++                    if (size >= 0 && data) {
++                        QString html = QString::fromUtf8(data, size);
++                        // strip html tags
++                        
html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
++
++                        result->append(html);
++                        free(data);
++                    }
++                } while (epub_tit_next(tit));
++            }
++            epub_free_titerator(tit);
++        }
+     }
+ 
+-    struct eiterator* iter = epub_get_iterator(ePubDoc, EITERATOR_SPINE, 0);
+-    do {
+-        char* curr = epub_it_get_curr(iter);
+-        if (!curr)
+-            continue;
+-        QString html = QString::fromUtf8(curr);
+-        html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
+-
+-        result->append(html);
+-    } while (epub_it_get_next(iter));
+-
+-    epub_free_iterator(iter);
+-
+-    struct titerator* tit;
+-
+-    tit = epub_get_titerator(ePubDoc, TITERATOR_NAVMAP, 0);
+-    if (!tit) {
+-        tit = epub_get_titerator(ePubDoc, TITERATOR_GUIDE, 0);
+-    }
+-
+-    if (epub_tit_curr_valid(tit)) {
+-        do {
+-            char* clink = epub_tit_get_curr_link(tit);
+-
+-            char* data;
+-            int size = epub_get_data(ePubDoc, clink, &data);
+-            free(clink);
+-
+-            // epub_get_data returns -1 on failure
+-            if (size > 0 && data) {
+-                QString html = QString::fromUtf8(data, size);
+-                // strip html tags
+-                html.remove(QRegularExpression(QStringLiteral("<[^>]*>")));
+-
+-                result->append(html);
+-                free(data);
+-            }
+-        } while (epub_tit_next(tit));
+-    }
+-    epub_free_titerator(tit);
++    // close epub file again
++    epub_close(ePubDoc);
+ }
+ 
+

diff --git 
a/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-odfextractor-segfault.patch
 
b/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-odfextractor-segfault.patch
new file mode 100644
index 0000000..9f3029b
--- /dev/null
+++ 
b/kde-frameworks/kfilemetadata/files/kfilemetadata-5.26.0-odfextractor-segfault.patch
@@ -0,0 +1,66 @@
+From: Christoph Cullmann <[email protected]>
+Date: Sun, 11 Sep 2016 13:07:47 +0000
+Subject: Make odf indexer more error prove, check if the files are there (and 
are files at all) (meta.xml + content.xml)
+X-Git-Url: 
http://quickgit.kde.org/?p=kfilemetadata.git&a=commitdiff&h=40730d75397aefb92145f86fc6abc9b303c56cfe
+---
+Make odf indexer more error prove, check if the files are there (and are files 
at all) (meta.xml + content.xml)
+
+REVIEW: 128886
+BUG 364748
+
+=> if you download this odt's to indexed directories your baloo will die on 
each index, be careful
+---
+
+
+--- a/src/extractors/odfextractor.cpp
++++ b/src/extractors/odfextractor.cpp
+@@ -2,6 +2,7 @@
+     <one line to give the library's name and an idea of what it does.>
+     Copyright (C) 2013  Vishesh Handa <[email protected]>
+     Copyright (C) 2012  Jörg Ehrichs <[email protected]>
++    Copyright (C) 2016  Christoph Cullmann <[email protected]>
+ 
+     This library is free software; you can redistribute it and/or
+     modify it under the terms of the GNU Lesser General Public
+@@ -59,19 +60,18 @@
+         return;
+     }
+ 
+-    const QStringList entries = directory->entries();
+-    if (!entries.contains(QStringLiteral("meta.xml"))) {
++    // we need a meta xml file in the archive!
++    const auto metaXml = directory->entry(QStringLiteral("meta.xml"));
++    if (!metaXml || !metaXml->isFile()) {
+         qWarning() << "Invalid document structure (meta.xml is missing)";
+         return;
+     }
+ 
+     QDomDocument metaData(QStringLiteral("metaData"));
+-    const KArchiveFile* file = static_cast<const 
KArchiveFile*>(directory->entry(QStringLiteral("meta.xml")));
+-    metaData.setContent(file->data());
++    metaData.setContent(static_cast<const KArchiveFile*>(metaXml)->data());
+ 
+     // parse metadata ...
+     QDomElement docElem = metaData.documentElement();
+-
+     QDomNode n = docElem.firstChild().firstChild(); // <office:document-meta> 
... <office:meta> ... content
+     while (!n.isNull()) {
+         QDomElement e = n.toElement();
+@@ -129,9 +129,14 @@
+         return;
+     }
+ 
+-    const KArchiveFile* contentsFile = static_cast<const 
KArchiveFile*>(directory->entry(QStringLiteral("content.xml")));
+-    QXmlStreamReader xml(contentsFile->createDevice());
++    // for content indexing, we need content xml file
++    const auto contentXml = directory->entry(QStringLiteral("content.xml"));
++    if (!contentXml || !contentXml->isFile()) {
++        qWarning() << "Invalid document structure (content.xml is missing)";
++        return;
++    }
+ 
++    QXmlStreamReader xml(static_cast<const 
KArchiveFile*>(contentXml)->createDevice());
+     while (!xml.atEnd()) {
+         xml.readNext();
+         if (xml.isCharacters()) {
+

diff --git a/kde-frameworks/kfilemetadata/kfilemetadata-5.26.0-r1.ebuild 
b/kde-frameworks/kfilemetadata/kfilemetadata-5.26.0-r1.ebuild
new file mode 100644
index 0000000..ecf7e65
--- /dev/null
+++ b/kde-frameworks/kfilemetadata/kfilemetadata-5.26.0-r1.ebuild
@@ -0,0 +1,55 @@
+# Copyright 1999-2016 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=6
+
+inherit eutils kde5
+
+DESCRIPTION="Library for extracting file metadata"
+KEYWORDS="~amd64 ~arm ~x86"
+IUSE="epub exif ffmpeg libav pdf taglib"
+
+RDEPEND="
+       $(add_frameworks_dep karchive)
+       $(add_frameworks_dep ki18n)
+       $(add_qt_dep qtxml)
+       epub? ( app-text/ebook-tools )
+       exif? ( media-gfx/exiv2:= )
+       ffmpeg? (
+               libav? ( media-video/libav:= )
+               !libav? ( media-video/ffmpeg:0= )
+       )
+       pdf? ( app-text/poppler[qt5] )
+       taglib? ( media-libs/taglib )
+"
+DEPEND="${RDEPEND}
+       kernel_linux? ( sys-apps/attr )
+"
+
+PATCHES=(
+       "${FILESDIR}/${P}-odfextractor-segfault.patch"
+       "${FILESDIR}/${P}-epubextractor-segfault.patch"
+)
+
+src_configure() {
+       local mycmakeargs=(
+               $(cmake-utils_use_find_package epub EPub)
+               $(cmake-utils_use_find_package exif Exiv2)
+               $(cmake-utils_use_find_package ffmpeg FFmpeg)
+               $(cmake-utils_use_find_package pdf PopplerQt5)
+               $(cmake-utils_use_find_package taglib Taglib)
+       )
+
+       kde5_src_configure
+}
+
+pkg_postinst() {
+       kde5_pkg_postinst
+
+       if ! has_version app-text/catdoc || ! has_version dev-libs/libxls; then
+               elog "To get additional features, optional runtime dependencies 
may be installed:"
+               optfeature "indexing of Microsoft Word or Powerpoint files" 
app-text/catdoc
+               optfeature "indexing of Microsoft Excel files" dev-libs/libxls
+       fi
+}

Reply via email to