Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package qt6-speech for openSUSE:Factory checked in at 2026-03-28 20:12:48 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/qt6-speech (Old) and /work/SRC/openSUSE:Factory/.qt6-speech.new.8177 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "qt6-speech" Sat Mar 28 20:12:48 2026 rev:28 rq:1342815 version:6.11.0 Changes: -------- --- /work/SRC/openSUSE:Factory/qt6-speech/qt6-speech.changes 2026-02-03 21:28:16.272501947 +0100 +++ /work/SRC/openSUSE:Factory/.qt6-speech.new.8177/qt6-speech.changes 2026-03-28 20:13:22.965501368 +0100 @@ -1,0 +2,6 @@ +Mon Mar 23 10:37:54 UTC 2026 - Christophe Marin <[email protected]> + +- Update to 6.11.0 + https://www.qt.io/blog/qt-6.11-released + +------------------------------------------------------------------- Old: ---- qtspeech-everywhere-src-6.10.2.tar.xz New: ---- qtspeech-everywhere-src-6.11.0.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ qt6-speech.spec ++++++ --- /var/tmp/diff_new_pack.Cl4YIb/_old 2026-03-28 20:13:23.485522830 +0100 +++ /var/tmp/diff_new_pack.Cl4YIb/_new 2026-03-28 20:13:23.485522830 +0100 @@ -1,7 +1,7 @@ # # spec file for package qt6-speech # -# Copyright (c) 2025 SUSE LLC and contributors +# Copyright (c) 2026 SUSE LLC and contributors # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,8 +16,8 @@ # -%define real_version 6.10.2 -%define short_version 6.10 +%define real_version 6.11.0 +%define short_version 6.11 %define tar_name qtspeech-everywhere-src %define tar_suffix %{nil} # @@ -31,7 +31,7 @@ %bcond_without flite %endif Name: qt6-speech%{?pkg_suffix} -Version: 6.10.2 +Version: 6.11.0 Release: 0 Summary: Qt 6 TextToSpeech Library and Plugin License: GPL-2.0-only OR LGPL-3.0-only OR GPL-3.0-only @@ -42,10 +42,12 @@ BuildRequires: flite-devel > 2.0 %endif BuildRequires: pkgconfig +BuildRequires: cmake(Qt6Concurrent) = %{real_version} BuildRequires: cmake(Qt6Core) = %{real_version} BuildRequires: cmake(Qt6CorePrivate) = %{real_version} BuildRequires: cmake(Qt6Gui) = %{real_version} BuildRequires: cmake(Qt6Multimedia) = %{real_version} +BuildRequires: cmake(Qt6MultimediaPrivate) = %{real_version} BuildRequires: cmake(Qt6Qml) = %{real_version} BuildRequires: cmake(Qt6Widgets) = %{real_version} BuildRequires: pkgconfig(speech-dispatcher) ++++++ qtspeech-everywhere-src-6.10.2.tar.xz -> qtspeech-everywhere-src-6.11.0.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/.cmake.conf new/qtspeech-everywhere-src-6.11.0/.cmake.conf --- old/qtspeech-everywhere-src-6.10.2/.cmake.conf 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/.cmake.conf 2026-03-10 09:31:58.000000000 +0100 @@ -1,6 +1,7 @@ -set(QT_REPO_MODULE_VERSION "6.10.2") +set(QT_REPO_MODULE_VERSION "6.11.0") set(QT_EXTRA_INTERNAL_TARGET_DEFINES "QT_NO_AS_CONST=1" "QT_NO_CONTEXTLESS_CONNECT=1" "QT_NO_FOREACH=1" + "QT_NO_URL_CAST_FROM_STRING=1" ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/.tag new/qtspeech-everywhere-src-6.11.0/.tag --- old/qtspeech-everywhere-src-6.10.2/.tag 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/.tag 2026-03-10 09:31:58.000000000 +0100 @@ -1 +1 @@ -e16b1543cb2ff7238927ebc8a65c03442480d528 +bad8290b995a8990e64605dd60dcf4a5b898e29e diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/CMakeLists.txt new/qtspeech-everywhere-src-6.11.0/CMakeLists.txt --- old/qtspeech-everywhere-src-6.10.2/CMakeLists.txt 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/CMakeLists.txt 2026-03-10 09:31:58.000000000 +0100 @@ -13,14 +13,19 @@ LANGUAGES CXX C ) -find_package(Qt6 ${PROJECT_VERSION} CONFIG REQUIRED COMPONENTS BuildInternals Core) -find_package(Qt6 ${PROJECT_VERSION} CONFIG OPTIONAL_COMPONENTS Gui Multimedia Widgets Test QuickTest Qml) +find_package(Qt6 ${PROJECT_VERSION} CONFIG REQUIRED COMPONENTS BuildInternals) + +# This should be called as early as possible, just after find_package(BuildInternals) where it is +# defined. +qt_internal_project_setup() + +find_package(Qt6 ${PROJECT_VERSION} CONFIG REQUIRED COMPONENTS Core) +find_package(Qt6 ${PROJECT_VERSION} CONFIG OPTIONAL_COMPONENTS Gui Multimedia Widgets Test QuickTest + Qml) if(NOT TARGET Qt6::Multimedia) message(NOTICE "Skipping the build as the condition \"TARGET Qt6::Multimedia\" is not met.") return() endif() -qt_internal_project_setup() - qt_build_repo() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/REUSE.toml new/qtspeech-everywhere-src-6.11.0/REUSE.toml --- old/qtspeech-everywhere-src-6.10.2/REUSE.toml 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/REUSE.toml 2026-03-10 09:31:58.000000000 +0100 @@ -6,7 +6,7 @@ "src/plugins/**json"] precedence = "closest" comment = "src and plugin" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only" [[annotations]] @@ -14,40 +14,40 @@ "qt_cmdline.cmake", "**BLACKLIST", "**ci_config_linux.json", ".tag"] precedence = "closest" comment = "build system" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "BSD-3-Clause" [[annotations]] path = ["**/.gitattributes", "**.gitignore", "**.gitreview"] precedence = "closest" comment = "version control system. Infrastruture" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR BSD-3-Clause" [[annotations]] path = ["examples/**"] comment = "this must be after the build system table because example and snippets take precedence over build system" precedence = "closest" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR BSD-3-Clause" [[annotations]] path = ["**/doc/images/**", "**.qdocconf", "config_help.txt"] comment = "documentation" precedence = "closest" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR GFDL-1.3-no-invariants-only" [[annotations]] path = ["**.toml", "licenseRule.json"] comment = "infrastructure" precedence = "override" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR BSD-3-Clause" [[annotations]] path = ["**/qt_attribution.json"] comment = "documentation" precedence = "override" -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR GFDL-1.3-no-invariants-only" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/dependencies.yaml new/qtspeech-everywhere-src-6.11.0/dependencies.yaml --- old/qtspeech-everywhere-src-6.10.2/dependencies.yaml 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/dependencies.yaml 2026-03-10 09:31:58.000000000 +0100 @@ -1,10 +1,10 @@ dependencies: ../qtbase: - ref: 000d6c62f7880bb8d3054724e8da0b8ae244130e + ref: 8ba7ea4b77a4b8f1948760221e264917ddc9e1c8 required: true ../qtdeclarative: - ref: 09c70541c76659bcd8c49f05841b0e778c9ffd4c + ref: e602a097ca314e9610b1fd3b7dbfa467a868731a required: false ../qtmultimedia: - ref: 5f307447a3396215c35ce44856d3884447f44aab + ref: 8ffcce57ebc4a557fb07727c60b8511e39c6b28b required: true diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/dist/REUSE.toml new/qtspeech-everywhere-src-6.11.0/dist/REUSE.toml --- old/qtspeech-everywhere-src-6.10.2/dist/REUSE.toml 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/dist/REUSE.toml 2026-03-10 09:31:58.000000000 +0100 @@ -4,5 +4,5 @@ path = ["*"] precedence = "override" comment = "Licensed as documentation." -SPDX-FileCopyrightText = "Copyright (C) 2024 The Qt Company Ltd." +SPDX-FileCopyrightText = "Copyright (C) The Qt Company Ltd." SPDX-License-Identifier = "LicenseRef-Qt-Commercial OR GFDL-1.3-no-invariants-only" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/examples/speech/hello_speak/doc/src/hellospeakex.qdoc new/qtspeech-everywhere-src-6.11.0/examples/speech/hello_speak/doc/src/hellospeakex.qdoc --- old/qtspeech-everywhere-src-6.10.2/examples/speech/hello_speak/doc/src/hellospeakex.qdoc 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/examples/speech/hello_speak/doc/src/hellospeakex.qdoc 2026-03-10 09:31:58.000000000 +0100 @@ -17,5 +17,7 @@ volume, and rate of the speech. It also lets the user select an engine, the language, and a voice. - \image hellospeak-example.png + \image hellospeak-example.png {Screenshot showing a tool for text to speech + with options to change volume, rate, pitch, engine language and reader + voice} */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/examples/speech/quickspeech/doc/src/quickspeech.qdoc new/qtspeech-everywhere-src-6.11.0/examples/speech/quickspeech/doc/src/quickspeech.qdoc --- old/qtspeech-everywhere-src-6.10.2/examples/speech/quickspeech/doc/src/quickspeech.qdoc 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/examples/speech/quickspeech/doc/src/quickspeech.qdoc 2026-03-10 09:31:58.000000000 +0100 @@ -9,7 +9,9 @@ \brief The Quick Speech example reads out user-provided text. \meta tags {quick Android} - \image quickspeech-example.png + \image quickspeech-example.png {Screenshot showing a tool for text to speech + with options to change engine, locale, voice, volume, pitch, and + rate} The \e{Quick Speech} example demonstrates how the \l[QML]{TextToSpeech} type can be used in a \l{Qt Quick} application to read out text and to control @@ -26,7 +28,7 @@ \section1 Getting the status - \image status.gif + \image status.gif {Gif showing a status example of the tool} Switch cases are used to update the Label \c statusLabel in the footer. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/android/src/qtexttospeech_android.cpp new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/android/src/qtexttospeech_android.cpp --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/android/src/qtexttospeech_android.cpp 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/android/src/qtexttospeech_android.cpp 2026-03-10 09:31:58.000000000 +0100 @@ -122,7 +122,7 @@ Q_DECLARE_JNI_NATIVE_METHOD(notifyEndSynthesis) -Q_DECL_EXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void */*reserved*/) +extern "C" Q_DECL_EXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void */*reserved*/) { static bool initialized = false; if (initialized) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/CMakeLists.txt new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/CMakeLists.txt --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/CMakeLists.txt 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/CMakeLists.txt 2026-03-10 09:31:58.000000000 +0100 @@ -1,6 +1,8 @@ # Copyright (C) 2022 The Qt Company Ltd. # SPDX-License-Identifier: BSD-3-Clause +find_package(Qt6 ${PROJECT_VERSION} CONFIG REQUIRED COMPONENTS Concurrent MultimediaPrivate) + qt_internal_add_plugin(QTextToSpeechFlitePlugin OUTPUT_NAME qtexttospeech_flite PLUGIN_TYPE texttospeech @@ -10,7 +12,9 @@ qtexttospeech_flite_processor.cpp qtexttospeech_flite_processor.h LIBRARIES Flite::Flite + Qt::Concurrent Qt::Core Qt::Multimedia + Qt::MultimediaPrivate Qt::TextToSpeech ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/qtexttospeech_flite.cpp new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/qtexttospeech_flite.cpp --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/qtexttospeech_flite.cpp 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/qtexttospeech_flite.cpp 2026-03-10 09:31:58.000000000 +0100 @@ -4,7 +4,8 @@ #include "qtexttospeech_flite.h" -#include <QtCore/QCoreApplication> +#include <QtCore/qcoreapplication.h> +#include <QtCore/qsemaphore.h> QT_BEGIN_NAMESPACE @@ -23,7 +24,7 @@ m_errorReason = QTextToSpeech::ErrorReason::Playback; m_errorString = QCoreApplication::translate("QTextToSpeech", "No audio device available"); } - m_processor.reset(new QTextToSpeechProcessorFlite(audioDevice)); + m_processor = std::make_unique<QTextToSpeechProcessorFlite>(audioDevice); // Connect processor to engine for state changes and error connect(m_processor.get(), &QTextToSpeechProcessorFlite::stateChanged, @@ -56,6 +57,7 @@ m_thread.setObjectName("QTextToSpeechEngineFlite"); m_processor->moveToThread(&m_thread); m_thread.start(); + m_thread.setPriority(QThread::HighestPriority); // we feed data to the audio sink } else { m_errorReason = QTextToSpeech::ErrorReason::Configuration; m_errorString = QCoreApplication::translate("QTextToSpeech", "No voices available"); @@ -64,6 +66,12 @@ QTextToSpeechEngineFlite::~QTextToSpeechEngineFlite() { + if (m_processor->thread() != thread()) { + QMetaObject::invokeMethod(m_processor.get(), [&] { + m_processor.reset(); // ensure destruction on the correct thread + }, Qt::BlockingQueuedConnection); + } + m_thread.exit(); m_thread.wait(); } @@ -94,14 +102,14 @@ void QTextToSpeechEngineFlite::stop(QTextToSpeech::BoundaryHint boundaryHint) { - Q_UNUSED(boundaryHint); - QMetaObject::invokeMethod(m_processor.get(), &QTextToSpeechProcessorFlite::stop, Qt::QueuedConnection); + QMetaObject::invokeMethod(m_processor.get(), &QTextToSpeechProcessorFlite::stop, + Qt::QueuedConnection, boundaryHint); } void QTextToSpeechEngineFlite::pause(QTextToSpeech::BoundaryHint boundaryHint) { - Q_UNUSED(boundaryHint); - QMetaObject::invokeMethod(m_processor.get(), &QTextToSpeechProcessorFlite::pause, Qt::QueuedConnection); + QMetaObject::invokeMethod(m_processor.get(), &QTextToSpeechProcessorFlite::pause, + Qt::QueuedConnection, boundaryHint); } void QTextToSpeechEngineFlite::resume() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp 2026-03-10 09:31:58.000000000 +0100 @@ -8,17 +8,24 @@ #include <QtCore/qcoreapplication.h> #include <QtCore/qlocale.h> #include <QtCore/qmap.h> +#include <QtCore/qpointer.h> #include <QtCore/qprocessordetection.h> #include <QtCore/qspan.h> #include <QtCore/qstring.h> - -#include <thread> +#include <QtCore/qthreadpool.h> +#include <QtConcurrent/qtconcurrentrun.h> +#include <QtMultimedia/private/qaudiohelpers_p.h> +#include <QtMultimedia/private/qaudiosystem_p.h> #include <flite/flite.h> +#include <deque> +#include <utility> + QT_BEGIN_NAMESPACE using namespace Qt::StringLiterals; +using namespace std::chrono_literals; namespace { @@ -134,194 +141,383 @@ return fmt; } +// we use a dedicated thread pool for flite synthesis: +// * it has a higher priority than the system thread pool +// * synthesizing multiple voices in parallel does not really make sense, so we limit it to 2 +// threads (it will typically only be one) +std::shared_ptr<QThreadPool> getFliteThreadPool() +{ + static std::weak_ptr<QThreadPool> singleton; + static QMutex mutex; + std::lock_guard guard{ mutex }; + std::shared_ptr<QThreadPool> pool = singleton.lock(); + if (pool) + return pool; + + pool = std::make_shared<QThreadPool>(); + pool->setMaxThreadCount(2); + pool->setThreadPriority(QThread::HighPriority); + pool->setObjectName(u"QFliteThreadPool"_s); + + singleton = pool; + return pool; +} + } // namespace -QTextToSpeechProcessorFlite::QTextToSpeechProcessorFlite(const QAudioDevice &audioDevice) - : m_audioDevice(audioDevice) +/////////////////////////////////////////////////////////////////////////////////////////////////// + +class QFliteSynthesisProcess final : public QIODevice { - init(); -} + struct TokenInformation + { + QString word; + std::chrono::milliseconds startTime; + }; -QTextToSpeechProcessorFlite::~QTextToSpeechProcessorFlite() + using BoundaryHint = QTextToSpeech::BoundaryHint; + +public: + QFliteSynthesisProcess(cst_voice *voice, QTextToSpeechProcessorFlite *parent, QString text, + float pitch, float rate); + ~QFliteSynthesisProcess(); + + void pause(QTextToSpeech::BoundaryHint boundaryHint); + void stop(QTextToSpeech::BoundaryHint boundaryHint); + void resume(); + +private: + template <typename Closure> + void invokeOnParent(Closure c); + + // flite synthesis thread + void runFliteSynthesis(); + int outputCallback(const cst_wave *w, int start, int size, int last, + struct cst_audio_streaming_info_struct *asi); + static std::optional<TokenInformation> + detectNewToken(const cst_wave *w, int start, int size, + struct cst_audio_streaming_info_struct *asi); + + // QIODevice interface + qint64 readData(char *data, qint64 maxlen) override; + qint64 writeData(const char *, qint64) override { return -1; } + qint64 bytesAvailable() const override; + + // immutable state + QTextToSpeechProcessorFlite *const m_parent; + cst_voice *const m_voice; // borrowed + const QString m_text; + + // thread + std::shared_ptr<QThreadPool> m_threadPool = getFliteThreadPool(); + QFuture<void> m_task; + + // state + QAudioFormat m_format; + std::deque<char> m_audioBuffer; + std::deque<TokenInformation> m_tokens; + qsizetype m_currentBytePosition{}; // Position of m_audioBuffer.begin() + qsizetype m_currentTokenIndex{}; + bool m_lastChunkReceived{}; + + // pause/stop handling + bool m_paused{}; + // NOTE: at the moment only BoundaryHint::Word is supported + std::optional<QTextToSpeech::BoundaryHint> m_pauseRequest; + std::optional<QTextToSpeech::BoundaryHint> m_stopRequest; + + std::optional<qint64> bytesToNextWord() const; +}; + +QFliteSynthesisProcess::QFliteSynthesisProcess(cst_voice *voice, + QTextToSpeechProcessorFlite *parent, QString text, + float pitch, float rate) + : m_parent(parent), m_voice(voice), m_text(std::move(text)) { - for (const VoiceInfo &voice : std::as_const(m_voices)) - voice.unregister_func(voice.vox); + Q_ASSERT(m_voice); + Q_ASSERT(m_parent); + + // prepare voice + setRateForVoice(m_voice, rate); + setPitchForVoice(m_voice, pitch); + + m_task = QtConcurrent::run(m_threadPool.get(), [this] { + runFliteSynthesis(); + }); + + open(ReadOnly | Unbuffered); } -const QList<QTextToSpeechProcessorFlite::VoiceInfo> &QTextToSpeechProcessorFlite::voices() const +QFliteSynthesisProcess::~QFliteSynthesisProcess() { - return m_voices; + m_task.cancel(); + m_task.waitForFinished(); } -int QTextToSpeechProcessorFlite::audioOutputCb(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi) +void QFliteSynthesisProcess::pause(QTextToSpeech::BoundaryHint boundaryHint) { - auto *processor = static_cast<QTextToSpeechProcessorFlite *>(asi->userdata); - Q_ASSERT(processor); + if (m_paused) + return; - if (!asi->item) - asi->item = relation_head(utt_relation(asi->utt, "Token")); + switch (boundaryHint) { + case BoundaryHint::Default: + case BoundaryHint::Immediate: + Q_UNREACHABLE_RETURN(); + break; - const float tokenStartTime = flite_ffeature_float( - asi->item, "R:Token.daughter1.R:SylStructure.daughter1.daughter1.R:Segment.p.end"); - const int tokenStartSample = int(tokenStartTime * float(w->sample_rate)); - if ((tokenStartSample >= start) && (tokenStartSample < start + size)) { - // a new token starts in this chunk - processor->audioHandleNewToken( - std::chrono::milliseconds(std::lround(tokenStartTime * 1000)), asi); - asi->item = item_next(asi->item); + default: + m_pauseRequest = boundaryHint; } - return processor->audioOutput(w, start, size, last, asi); } -int QTextToSpeechProcessorFlite::audioOutput(const cst_wave *w, int start, int size, int last, - cst_audio_streaming_info *) +void QFliteSynthesisProcess::stop(QTextToSpeech::BoundaryHint boundaryHint) { - Q_ASSERT(QThread::currentThread() == thread()); - if (size == 0) - return CST_AUDIO_STREAM_CONT; - if (start == 0 && !initAudio(w)) - return CST_AUDIO_STREAM_STOP; + switch (boundaryHint) { + case BoundaryHint::Default: + case BoundaryHint::Immediate: + Q_UNREACHABLE_RETURN(); + break; - QSpan fliteStream{ w->samples + start, size }; - QSpan fliteBytes = as_bytes(fliteStream); + default: + m_stopRequest = boundaryHint; + } +} - using namespace std::chrono_literals; +void QFliteSynthesisProcess::resume() +{ + m_paused = false; + m_pauseRequest = std::nullopt; +} - std::optional<std::chrono::steady_clock::time_point> startTime; - qsizetype totalBytesWritten = 0; +template <typename Closure> +void QFliteSynthesisProcess::invokeOnParent(Closure c) +{ + QMetaObject::invokeMethod( + m_parent, + [parent = m_parent, self = QPointer{ this }, closure = std::move(c)]() mutable { + if (!parent->m_synthesisProcess || (parent->m_synthesisProcess.get() != self)) + return; // Another synthesis process has started - auto handleStreamingError = [&] { - setError(QTextToSpeech::ErrorReason::Playback, - QCoreApplication::translate("QTextToSpeech", "Audio streaming error.")); - stop(); - return CST_AUDIO_STREAM_STOP; - }; + closure(parent); + }, Qt::QueuedConnection); +} - while (!fliteBytes.isEmpty()) { - qsizetype bytesWritten = m_audioIODevice->write( - reinterpret_cast<const char *>(fliteBytes.data()), fliteBytes.size()); +void QFliteSynthesisProcess::runFliteSynthesis() +{ + qCDebug(lcSpeechTtsFlite) << "QFliteSynthesisProcess() begin"; - if (bytesWritten < 0) // something really went wrong - return handleStreamingError(); + cst_audio_streaming_info *asi = new_audio_streaming_info(); + asi->asc = [](const cst_wave *w, int start, int size, int last, + struct cst_audio_streaming_info_struct *asi) { + auto *self = static_cast<QFliteSynthesisProcess *>(asi->userdata); + return self->outputCallback(w, start, size, last, asi); + }; + asi->userdata = (void *)this; + feat_set(m_voice->features, "streaming_info", audio_streaming_info_val(asi)); - totalBytesWritten += bytesWritten; - if (bytesWritten == fliteBytes.size()) - break; + float secsToSpeak = flite_text_to_speech(m_text.toUtf8().constData(), m_voice, "none"); - if (bytesWritten) - fliteBytes = fliteBytes.subspan(bytesWritten); // ranges::drop + if (secsToSpeak <= 0) { + invokeOnParent([](QTextToSpeechProcessorFlite *parent) { + parent->setError( + QTextToSpeech::ErrorReason::Input, + QCoreApplication::translate("QTextToSpeech", "Speech synthesizing failure.")); + }); + return; + }; - // we could not write (all) data to the QIODevice. Back off and retry for 5 seconds before - // we give up. We cannot query the state of the QAudioSink here, as that would require event - // loop interaction. - constexpr auto timeout = 5s; + qCDebug(lcSpeechTtsFlite) << "QFliteSynthesisProcess() end" << secsToSpeak << "Seconds"; +} - if (!startTime) - startTime = std::chrono::steady_clock::now(); - else if (std::chrono::steady_clock::now() - *startTime > timeout) - return handleStreamingError(); +int QFliteSynthesisProcess::outputCallback(const cst_wave *w, int start, int size, int last, + cst_audio_streaming_info_struct *asi) +{ + Q_ASSERT(w); - std::this_thread::sleep_for(5ms); + if (start == 0) { + invokeOnParent([this, format = getAudioFormat(*w)](QTextToSpeechProcessorFlite *parent) { + m_format = format; + parent->prepareAudioSink(format); + }); } - // Stats for debugging - ++numberChunks; - totalBytes += totalBytesWritten; + QSpan fliteStream{ + w->samples + start, + size, + }; + QByteArray chunk{ + reinterpret_cast<const char *>(fliteStream.data()), + fliteStream.size_bytes(), + }; - if (last == 1) { - qCDebug(lcSpeechTtsFlite) << "last data chunk written"; - m_audioIODevice->close(); - } + std::optional<TokenInformation> token = detectNewToken(w, start, size, asi); + + invokeOnParent([this, chunk = std::move(chunk), token = std::move(token), + last](QTextToSpeechProcessorFlite *) mutable { + m_audioBuffer.insert(m_audioBuffer.end(), chunk.begin(), chunk.end()); + + if (token) + m_tokens.push_back(std::move(*token)); + if (last) + m_lastChunkReceived = true; + + emit QIODevice::bytesAvailable(); + }); + + if (m_task.isCanceled()) + return CST_AUDIO_STREAM_STOP; return CST_AUDIO_STREAM_CONT; } -void QTextToSpeechProcessorFlite::audioHandleNewToken(std::chrono::milliseconds tokenStartTime, - cst_audio_streaming_info *asi) +qint64 QFliteSynthesisProcess::readData(char *data, qint64 maxlen) { - auto normalizeFeatureString = [&](const char *feature) -> const char * { - const char *featureString = flite_ffeature_string(asi->item, feature); - if (cst_streq("0", featureString)) - return ""; - return featureString; - }; + if (m_paused) + Q_ASSERT(m_pauseRequest || m_stopRequest); - const char *token = flite_ffeature_string(asi->item, "name"); - if (!token) { - Q_UNLIKELY_BRANCH; - qCWarning(lcSpeechTtsFlite) << "No token found, skipping"; - return; + const qint64 bytesAvailable = this->bytesAvailable(); + const qint64 bytesRequested = std::min(bytesAvailable, maxlen); + qint64 bytesToRead = bytesRequested; + + bool atWordBoundary = false; + if (!m_paused && (m_pauseRequest || m_stopRequest)) { + std::optional<qsizetype> bytesToNextWord = this->bytesToNextWord(); + if (bytesToNextWord && bytesToNextWord < bytesRequested) { + // We are at a word boundary, so we only read up to the next word. + bytesToRead = bytesToNextWord.value(); + atWordBoundary = true; + } } - qCDebug(lcSpeechTtsFlite).nospace() - << "Processing token start_time: " << tokenStartTime << " content: \"" - << flite_ffeature_string(asi->item, "whitespace") - << normalizeFeatureString("prepunctuation") << "'" << token << "'" - << normalizeFeatureString("punc") << "\""; + if (m_paused) { + // feed null to sink during async operation + std::fill_n(data, bytesToRead, 0); + } else { + std::copy_n(m_audioBuffer.begin(), bytesToRead, data); + std::fill_n(data, bytesRequested - bytesToRead, 0); + + m_audioBuffer.erase(m_audioBuffer.begin(), m_audioBuffer.begin() + bytesToRead); + + m_currentBytePosition += bytesToRead; + + const std::chrono::microseconds currentTimeStamp{ + m_format.durationForBytes(m_currentBytePosition), + }; + + while (!m_tokens.empty() && m_tokens.front().startTime <= currentTimeStamp) { + const TokenInformation &token = m_tokens.front(); + m_currentTokenIndex = m_text.indexOf(token.word, m_currentTokenIndex); + emit m_parent->sayingWord(token.word, m_currentTokenIndex, token.word.length()); + m_tokens.pop_front(); + } + } + + const bool stopSynthesisProcess = [&] { + if (m_lastChunkReceived && m_audioBuffer.empty()) + return true; // end of file reached + if (atWordBoundary && m_stopRequest == BoundaryHint::Word) + return true; // stop at word boundary + return false; + }(); + + if (stopSynthesisProcess) { + m_paused = true; // we feed silence to the audio sink until the stop is processed + + invokeOnParent([](QTextToSpeechProcessorFlite *parent) { + parent->stop(QTextToSpeech::BoundaryHint::Immediate); + }); + } else if (atWordBoundary && m_pauseRequest == BoundaryHint::Word) { + m_paused = true; + + invokeOnParent([](QTextToSpeechProcessorFlite *parent) { + parent->pause(QTextToSpeech::BoundaryHint::Immediate); + }); + } - QString currentToken = QString::fromUtf8(token); - m_index = m_text.indexOf(currentToken, m_index); - emit sayingWord(currentToken, m_index, currentToken.length()); + return bytesToRead; } -int QTextToSpeechProcessorFlite::dataOutputCb(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi) +qint64 QFliteSynthesisProcess::bytesAvailable() const { - auto *processor = static_cast<QTextToSpeechProcessorFlite *>(asi->userdata); - Q_ASSERT(processor); - return processor->dataOutput(w, start, size, last, asi); + return qint64(m_audioBuffer.size()); } -int QTextToSpeechProcessorFlite::dataOutput(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *) +std::optional<qint64> QFliteSynthesisProcess::bytesToNextWord() const { - if (start == 0) - emit stateChanged(QTextToSpeech::Synthesizing); + if (m_tokens.empty()) + return std::nullopt; - if (!m_synthesisFormat) { - QAudioFormat format = getAudioFormat(*w); - if (!format.isValid()) - return CST_AUDIO_STREAM_STOP; - m_synthesisFormat = format; - } - - const qsizetype bytesToWrite = size * m_synthesisFormat->bytesPerSample(); - emit synthesized(*m_synthesisFormat, - QByteArray(reinterpret_cast<const char *>(&w->samples[start]), bytesToWrite)); - - if (last == 1) - emit stateChanged(QTextToSpeech::Ready); + using namespace std::chrono; - return CST_AUDIO_STREAM_CONT; + const microseconds currentTimeStamp{ + m_format.durationForBytes(m_currentBytePosition), + }; + const microseconds nextTokenStart{ + m_tokens.front().startTime, + }; + return m_format.bytesForDuration((nextTokenStart - currentTimeStamp).count()); } -void QTextToSpeechProcessorFlite::processText(const QString &text, int voiceId, float pitch, - float rate, OutputHandler outputHandler) +std::optional<QFliteSynthesisProcess::TokenInformation> +QFliteSynthesisProcess::detectNewToken(const cst_wave *w, int start, int size, + cst_audio_streaming_info_struct *asi) { - qCDebug(lcSpeechTtsFlite) << "processText() begin"; - if (!checkVoice(voiceId)) - return; + if (!asi->item) + asi->item = relation_head(utt_relation(asi->utt, "Token")); - m_text = text; - m_index = 0; - float secsToSpeak = -1; - const VoiceInfo &voiceInfo = m_voices.at(voiceId); - cst_voice *voice = voiceInfo.vox; - cst_audio_streaming_info *asi = new_audio_streaming_info(); - asi->asc = outputHandler; - asi->userdata = (void *)this; - feat_set(voice->features, "streaming_info", audio_streaming_info_val(asi)); - setRateForVoice(voice, rate); - setPitchForVoice(voice, pitch); - secsToSpeak = flite_text_to_speech(text.toUtf8().constData(), voice, "none"); + const float tokenStartTime = flite_ffeature_float( + asi->item, "R:Token.daughter1.R:SylStructure.daughter1.daughter1.R:Segment.p.end"); + const int tokenStartSample = int(tokenStartTime * float(w->sample_rate)); + if ((tokenStartSample >= start) && (tokenStartSample < start + size)) { - if (secsToSpeak <= 0) { - setError(QTextToSpeech::ErrorReason::Input, - QCoreApplication::translate("QTextToSpeech", "Speech synthesizing failure.")); - return; + const char *token = flite_ffeature_string(asi->item, "name"); + if (!token) { + Q_UNLIKELY_BRANCH; + qCWarning(lcSpeechTtsFlite) << "No token found, skipping"; + return std::nullopt; + } + + auto normalizeFeatureString = [&](const char *feature) -> const char * { + const char *featureString = flite_ffeature_string(asi->item, feature); + if (cst_streq("0", featureString)) + return ""; + return featureString; + }; + + auto tokenStartTimestamp = std::chrono::milliseconds(std::lround(tokenStartTime * 1'000)); + + qCDebug(lcSpeechTtsFlite).nospace() + << "Processing token start_time: " << tokenStartTimestamp << " content: \"" + << flite_ffeature_string(asi->item, "whitespace") + << normalizeFeatureString("prepunctuation") << "'" << token << "'" + << normalizeFeatureString("punc") << "\""; + + asi->item = item_next(asi->item); + return TokenInformation{ + QString::fromUtf8(token), + tokenStartTimestamp, + }; } + return std::nullopt; +} - qCDebug(lcSpeechTtsFlite) << "processText() end" << secsToSpeak << "Seconds"; +/////////////////////////////////////////////////////////////////////////////////////////////////// + +QTextToSpeechProcessorFlite::QTextToSpeechProcessorFlite(QAudioDevice audioDevice) + : m_audioDevice(std::move(audioDevice)) +{ + init(); +} + +QTextToSpeechProcessorFlite::~QTextToSpeechProcessorFlite() +{ + m_synthesisProcess.reset(); + for (const VoiceInfo &voice : std::as_const(m_voices)) + voice.unregister_func(voice.vox); +} + +const QList<QTextToSpeechProcessorFlite::VoiceInfo> &QTextToSpeechProcessorFlite::voices() const +{ + return m_voices; } typedef cst_voice*(*registerFnType)(); @@ -369,142 +565,57 @@ return !m_voices.isEmpty(); } -bool QTextToSpeechProcessorFlite::initAudio(const cst_wave *w) -{ - m_format = getAudioFormat(*w); - if (!checkFormat(m_format)) - return false; - - createSink(); - - return bool(m_audioSink); -} - -void QTextToSpeechProcessorFlite::deleteSink() +void QTextToSpeechProcessorFlite::prepareAudioSink(QAudioFormat format) { - if (m_audioSink) { - m_audioSink->disconnect(); - delete m_audioSink; - m_audioSink = nullptr; - m_audioIODevice = nullptr; - } -} + qCDebug(lcSpeechTtsFlite) << "QTextToSpeechProcessorFlite::prepareAudioSink" << format; -void QTextToSpeechProcessorFlite::createSink() -{ - using namespace std::chrono; - // Create new sink if none exists or the format has changed - if (!m_audioSink || (m_audioSink->format() != m_format)) { - // No signals while we create new sink with QIODevice - const bool sigs = signalsBlocked(); - auto resetSignals = qScopeGuard([this, sigs](){ blockSignals(sigs); }); - blockSignals(true); - deleteSink(); - m_audioSink = new QAudioSink(m_audioDevice, m_format, this); - m_audioSink->setVolume(m_volume); - constexpr auto bufferDuration = milliseconds(100); - m_audioSink->setBufferSize(m_format.bytesForDuration(microseconds(bufferDuration).count())); - connect(m_audioSink, &QAudioSink::stateChanged, this, - &QTextToSpeechProcessorFlite::changeState); - connect(QThread::currentThread(), &QThread::finished, m_audioSink, &QObject::deleteLater); - } else { - // stop before we can restart with a new QIODevice - m_audioSink->reset(); - } + m_audioSink = std::make_unique<QAudioSink>(m_audioDevice, format); + m_audioSink->setVolume(m_volume); + m_audioSink->setBufferSize(format.bytesForDuration(std::chrono::microseconds(100ms).count())); + + // LATER: use public API (compare QTBUG-138378) + QPlatformAudioSink *platformAudioSink = QPlatformAudioSink::get(*m_audioSink); + if (platformAudioSink) + platformAudioSink->setRole(QPlatformAudioSink::AudioEndpointRole::Accessibility); + + QObject::connect(m_audioSink.get(), &QAudioSink::stateChanged, m_audioSink.get(), + [&](QAudio::State state) { + if (state == QAudio::StoppedState && m_audioSink->error() != QAudio::NoError) { + setError(QTextToSpeech::ErrorReason::Playback, + QCoreApplication::translate("QTextToSpeech", "Audio IO.")); + } + }); - m_audioIODevice = m_audioSink->start(); - if (!m_audioIODevice) { - deleteSink(); + m_audioSink->start(m_synthesisProcess.get()); + if (m_audioSink->error() != QAudio::NoError) { setError(QTextToSpeech::ErrorReason::Playback, - QCoreApplication::translate("QTextToSpeech", "Audio Open error: No I/O device available.")); - } - - numberChunks = 0; - totalBytes = 0; -} - -// Wrapper for QAudioSink::stateChanged, bypassing early idle bug -void QTextToSpeechProcessorFlite::changeState(QAudio::State newState) -{ - if (m_state == newState) + QCoreApplication::translate("QTextToSpeech", "Audio Open error: %1") + .arg(m_audioSink->error())); return; - - qCDebug(lcSpeechTtsFlite) << "Audio sink state transition" << m_state << newState; - - m_state = newState; - const QTextToSpeech::State ttsState = audioStateToTts(newState); - emit stateChanged(ttsState); + } } void QTextToSpeechProcessorFlite::setError(QTextToSpeech::ErrorReason err, const QString &errorString) { - if (err == QTextToSpeech::ErrorReason::NoError) { - changeState(QAudio::IdleState); + if (err == QTextToSpeech::ErrorReason::NoError) return; - } - - qCDebug(lcSpeechTtsFlite) << "Error" << err << errorString; - emit stateChanged(QTextToSpeech::Error); - emit errorOccurred(err, errorString); -} -constexpr QTextToSpeech::State QTextToSpeechProcessorFlite::audioStateToTts(QAudio::State AudioState) -{ - switch (AudioState) { - case QAudio::ActiveState: - return QTextToSpeech::Speaking; - case QAudio::IdleState: - return QTextToSpeech::Ready; - case QAudio::SuspendedState: - return QTextToSpeech::Paused; - case QAudio::StoppedState: - return QTextToSpeech::Ready; - } - Q_UNREACHABLE(); -} + m_audioSink.reset(); + if (m_synthesisProcess) + m_synthesisProcess.reset(); -void QTextToSpeechProcessorFlite::deinitAudio() -{ - m_index = -1; - deleteSink(); + qCDebug(lcSpeechTtsFlite) << "Error" << err << errorString; + updateState(QTextToSpeech::Error); + emit errorOccurred(err, errorString); } -// Check format/device and set corresponding error messages -bool QTextToSpeechProcessorFlite::checkFormat(const QAudioFormat &format) +void QTextToSpeechProcessorFlite::updateState(QTextToSpeech::State state) { - auto streamToString = [](auto &&arg) { - QString string; - QDebug(&string) << arg; - return string; - }; - - bool formatOK = true; - - // Format must be valid - if (!format.isValid()) { - formatOK = false; - setError(QTextToSpeech::ErrorReason::Playback, - QCoreApplication::translate("QTextToSpeech", "Invalid audio format: %1") - .arg(streamToString(format))); - } - - // Device must exist - if (m_audioDevice.isNull()) { - formatOK = false; - setError(QTextToSpeech::ErrorReason::Playback, - QCoreApplication::translate("QTextToSpeech", "No audio device specified.")); - } - - // Device must support requested format - if (!m_audioDevice.isFormatSupported(format)) { - formatOK = false; - setError(QTextToSpeech::ErrorReason::Playback, - QCoreApplication::translate("QTextToSpeech", - "Audio device does not support format: %1") - .arg(streamToString(format))); - } - - return formatOK; + if (state == m_state) + return; + m_state = state; + qCDebug(lcSpeechTtsFlite) << "State changed to" << state; + emit stateChanged(state); } // Check voice validity @@ -518,36 +629,92 @@ return false; } -// Wrap QAudioSink::state and compensate early idle bug -QAudio::State QTextToSpeechProcessorFlite::audioSinkState() const -{ - return (m_audioSink) ? m_state : QAudio::StoppedState; -} // Stop current and cancel subsequent utterances -void QTextToSpeechProcessorFlite::stop() +void QTextToSpeechProcessorFlite::stop(QTextToSpeech::BoundaryHint boundaryHint) { - if (audioSinkState() == QAudio::ActiveState || audioSinkState() == QAudio::SuspendedState) { - deinitAudio(); - // Call manual state change as audio sink has been deleted - changeState(QAudio::StoppedState); + using BoundaryHint = QTextToSpeech::BoundaryHint; + + switch (m_state) { + case QTextToSpeech::Speaking: + case QTextToSpeech::Paused: { + switch (boundaryHint) { + case BoundaryHint::Sentence: { + qCDebug(lcSpeechTtsFlite) + << "Stopping after sentence not implemented. Stopping after next word"; + return stop(BoundaryHint::Word); + } + case BoundaryHint::Utterance: + Q_UNREACHABLE_RETURN(); // handled by QTextToSpeech + case BoundaryHint::Word: { + m_synthesisProcess->stop(BoundaryHint::Word); + return; + } + default: { + if (m_audioSink) { + m_audioSink->reset(); + m_audioSink.reset(); + } + m_synthesisProcess.reset(); + updateState(QTextToSpeech::Ready); + break; + } + } + } + + case QTextToSpeech::Synthesizing: + return; // we cannot stop a synthesis process, it will stop automatically + + case QTextToSpeech::Error: { + m_synthesisProcess.reset(); + + updateState(QTextToSpeech::Ready); + return; + } + case QTextToSpeech::Ready: + break; + + default: + Q_UNREACHABLE(); } } -void QTextToSpeechProcessorFlite::pause() +void QTextToSpeechProcessorFlite::pause(QTextToSpeech::BoundaryHint boundaryHint) { - if (audioSinkState() == QAudio::ActiveState) - m_audioSink->suspend(); + using BoundaryHint = QTextToSpeech::BoundaryHint; + + if (m_state == QTextToSpeech::Speaking) { + switch (boundaryHint) { + case BoundaryHint::Sentence: { + qCDebug(lcSpeechTtsFlite) + << "Pausing after sentence not implemented. Pausing after next word"; + return pause(BoundaryHint::Word); + } + case BoundaryHint::Utterance: + Q_UNREACHABLE_RETURN(); // handled by QTextToSpeech + + case BoundaryHint::Word: { + m_synthesisProcess->pause(BoundaryHint::Word); + return; + } + + default: + if (m_audioSink) + m_audioSink->suspend(); + updateState(QTextToSpeech::Paused); + } + } } void QTextToSpeechProcessorFlite::resume() { - if (audioSinkState() == QAudio::SuspendedState) { - m_audioSink->resume(); - // QAudioSink in push mode transitions to Idle when resumed, even if - // there is still data to play. Workaround this weird behavior if we - // know we are not done yet. - changeState(QAudio::ActiveState); + if (m_synthesisProcess) + m_synthesisProcess->resume(); + + if (m_state == QTextToSpeech::Paused) { + if (m_audioSink && m_synthesisProcess) + m_audioSink->resume(); + updateState(QTextToSpeech::Speaking); } } @@ -559,9 +726,28 @@ if (!checkVoice(voiceId)) return; - m_volume = volume; - processText(text, voiceId, float(pitch), float(rate), - QTextToSpeechProcessorFlite::audioOutputCb); + switch (m_state) { + case QTextToSpeech::Speaking: + case QTextToSpeech::Paused: + stop(QTextToSpeech::BoundaryHint::Immediate); + break; + + case QTextToSpeech::Synthesizing: + return; // we cannot synthesize and speak at the same time + + case QTextToSpeech::Ready: + case QTextToSpeech::Error: + break; + + default: + Q_UNREACHABLE(); + } + + const VoiceInfo &voiceInfo = m_voices.at(voiceId); + m_volume = float(volume); + updateState(QTextToSpeech::Speaking); + m_synthesisProcess = std::make_unique<QFliteSynthesisProcess>(voiceInfo.vox, this, text, + float(pitch), float(rate)); } void QTextToSpeechProcessorFlite::synthesize(const QString &text, int voiceId, double pitch, double rate, double volume) @@ -572,11 +758,69 @@ if (!checkVoice(voiceId)) return; + switch (m_state) { + case QTextToSpeech::Speaking: + case QTextToSpeech::Paused: + return; // we cannot synthesize and speak at the same time + + case QTextToSpeech::Synthesizing: + case QTextToSpeech::Ready: + case QTextToSpeech::Error: + break; + + default: + Q_UNREACHABLE(); + } + m_synthesisFormat = std::nullopt; - m_volume = volume; - processText(text, voiceId, float(pitch), float(rate), - QTextToSpeechProcessorFlite::dataOutputCb); + m_volume = float(volume); + + qCDebug(lcSpeechTtsFlite) << "processText() begin"; + + const VoiceInfo &voiceInfo = m_voices.at(voiceId); + cst_voice *voice = voiceInfo.vox; + cst_audio_streaming_info *asi = new_audio_streaming_info(); + + asi->asc = [](const cst_wave *w, int start, int size, int /*last*/, + cst_audio_streaming_info *asi) -> int { + auto *self = static_cast<QTextToSpeechProcessorFlite *>(asi->userdata); + + if (!self->m_synthesisFormat) { + QAudioFormat format = getAudioFormat(*w); + if (!format.isValid()) + return CST_AUDIO_STREAM_STOP; + self->m_synthesisFormat = format; + } + + const qsizetype bytesToWrite = size * self->m_synthesisFormat->bytesPerSample(); + QByteArray chunk(reinterpret_cast<const char *>(w->samples + start), bytesToWrite); + + QAudioHelperInternal::applyVolume(self->m_volume, *self->m_synthesisFormat, + as_bytes(QSpan{ chunk }), + as_writable_bytes(QSpan{ chunk })); + + emit self->synthesized(*self->m_synthesisFormat, chunk); + return CST_AUDIO_STREAM_CONT; + }; + + asi->userdata = (void *)this; + feat_set(voice->features, "streaming_info", audio_streaming_info_val(asi)); + setRateForVoice(voice, float(rate)); + setPitchForVoice(voice, float(pitch)); + + updateState(QTextToSpeech::Synthesizing); + + float secsToSpeak = flite_text_to_speech(text.toUtf8().constData(), voice, "none"); + + if (secsToSpeak <= 0) { + setError(QTextToSpeech::ErrorReason::Input, + QCoreApplication::translate("QTextToSpeech", "Speech synthesizing failure.")); + return; + } + + qCDebug(lcSpeechTtsFlite) << "processText() end" << secsToSpeak << "Seconds"; m_synthesisFormat = std::nullopt; + updateState(QTextToSpeech::Ready); } QT_END_NAMESPACE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/qtexttospeech_flite_processor.h new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/qtexttospeech_flite_processor.h --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/flite/qtexttospeech_flite_processor.h 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/flite/qtexttospeech_flite_processor.h 2026-03-10 09:31:58.000000000 +0100 @@ -23,14 +23,18 @@ #include <flite/flite.h> +#include <optional> + QT_BEGIN_NAMESPACE +class QFliteSynthesisProcess; + class QTextToSpeechProcessorFlite : public QObject { Q_OBJECT public: - QTextToSpeechProcessorFlite(const QAudioDevice &audioDevice); + QTextToSpeechProcessorFlite(QAudioDevice audioDevice); ~QTextToSpeechProcessorFlite(); struct VoiceInfo @@ -46,41 +50,18 @@ Q_INVOKABLE void say(const QString &text, int voiceId, double pitch, double rate, double volume); Q_INVOKABLE void synthesize(const QString &text, int voiceId, double pitch, double rate, double volume); - Q_INVOKABLE void pause(); + Q_INVOKABLE void pause(QTextToSpeech::BoundaryHint boundaryHint); Q_INVOKABLE void resume(); - Q_INVOKABLE void stop(); + Q_INVOKABLE void stop(QTextToSpeech::BoundaryHint boundaryHint); const QList<QTextToSpeechProcessorFlite::VoiceInfo> &voices() const; - static constexpr QTextToSpeech::State audioStateToTts(QAudio::State audioState); private: - // Flite callbacks - static int audioOutputCb(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi); - static int dataOutputCb(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi); - - using OutputHandler = decltype(QTextToSpeechProcessorFlite::audioOutputCb); - // Process a single text - void processText(const QString &text, int voiceId, float pitch, float rate, - OutputHandler outputHandler); - int audioOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi); - void audioHandleNewToken(std::chrono::milliseconds tokenStartTime, - cst_audio_streaming_info *asi); - int dataOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi); - bool init(); - bool initAudio(const cst_wave *w); - void deinitAudio(); - bool checkFormat(const QAudioFormat &format); bool checkVoice(int voiceId); - void deleteSink(); - void createSink(); - QAudio::State audioSinkState() const; - void setError(QTextToSpeech::ErrorReason err, const QString &errorString = QString()); -private slots: - void changeState(QAudio::State newState); + void setError(QTextToSpeech::ErrorReason err, const QString &errorString = QString()); + void updateState(QTextToSpeech::State); Q_SIGNALS: void errorOccurred(QTextToSpeech::ErrorReason error, const QString &errorString); @@ -89,23 +70,20 @@ void synthesized(const QAudioFormat &format, const QByteArray &array); private: - QString m_text; - qsizetype m_index = -1; + QTextToSpeech::State m_state = {}; - QAudioSink *m_audioSink = nullptr; - QAudio::State m_state = QAudio::IdleState; - QIODevice *m_audioIODevice = nullptr; - - QAudioDevice m_audioDevice; - QAudioFormat m_format; - double m_volume = 1; + std::unique_ptr<QAudioSink> m_audioSink; + const QAudioDevice m_audioDevice; + float m_volume = 1.f; std::optional<QAudioFormat> m_synthesisFormat; QList<VoiceInfo> m_voices; - // Statistics for debugging - qint64 numberChunks = 0; - qint64 totalBytes = 0; + // synthesis process + friend class QFliteSynthesisProcess; + std::unique_ptr<QFliteSynthesisProcess> m_synthesisProcess; + + void prepareAudioSink(QAudioFormat); }; QT_END_NAMESPACE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/winrt/CMakeLists.txt new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/winrt/CMakeLists.txt --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/winrt/CMakeLists.txt 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/winrt/CMakeLists.txt 2026-03-10 09:31:58.000000000 +0100 @@ -13,11 +13,7 @@ Qt::CorePrivate Qt::TextToSpeech Qt::Multimedia + Qt::MultimediaPrivate shlwapi runtimeobject ) - -qt_internal_extend_target(QTextToSpeechWinRTPlugin CONDITION MSVC - COMPILE_OPTIONS - /Zc:twoPhase- -) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/winrt/qtexttospeech_winrt.cpp new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/winrt/qtexttospeech_winrt.cpp --- old/qtspeech-everywhere-src-6.10.2/src/plugins/tts/winrt/qtexttospeech_winrt.cpp 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/src/plugins/tts/winrt/qtexttospeech_winrt.cpp 2026-03-10 09:31:58.000000000 +0100 @@ -5,9 +5,10 @@ #include "qtexttospeech_winrt.h" #include "qtexttospeech_winrt_audiosource.h" -#include <QtMultimedia/QAudioSink> -#include <QtMultimedia/QMediaDevices> -#include <QtMultimedia/QAudioDevice> +#include <QtMultimedia/qaudiodevice.h> +#include <QtMultimedia/qaudiosink.h> +#include <QtMultimedia/qmediadevices.h> +#include <QtMultimedia/private/qaudiosystem_p.h> #include <QtCore/QBasicTimer> #include <QtCore/QCoreApplication> @@ -339,6 +340,9 @@ d->forEachVoice([&data, &foundVoice](const ComPtr<IVoiceInformation> &voiceInfo) { HString voiceId; HRESULT hr = voiceInfo->get_Id(voiceId.GetAddressOf()); + if (FAILED(hr)) + return false; + if (data == QString::fromWCharArray(voiceId.GetRawBuffer(0))) { foundVoice = voiceInfo; return true; @@ -407,6 +411,11 @@ currentBoundary = boundaries.constBegin(); audioSink.reset(new QAudioSink(audioDevice, format)); + // LATER: use public API (compare QTBUG-138378) + QPlatformAudioSink *platformAudioSink = QPlatformAudioSink::get(*audioSink); + if (platformAudioSink) + platformAudioSink->setRole(QPlatformAudioSink::AudioEndpointRole::Accessibility); + QObject::connect(audioSink.get(), &QAudioSink::stateChanged, q, [this](QAudio::State sinkState) { sinkStateChanged(sinkState); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/qtspeech-everywhere-src-6.10.2/tests/auto/qtexttospeech/tst_qtexttospeech.cpp new/qtspeech-everywhere-src-6.11.0/tests/auto/qtexttospeech/tst_qtexttospeech.cpp --- old/qtspeech-everywhere-src-6.10.2/tests/auto/qtexttospeech/tst_qtexttospeech.cpp 2026-01-23 06:31:19.000000000 +0100 +++ new/qtspeech-everywhere-src-6.11.0/tests/auto/qtexttospeech/tst_qtexttospeech.cpp 2026-03-10 09:31:58.000000000 +0100 @@ -878,10 +878,6 @@ if (engine != "mock" && !hasDefaultAudioOutput()) QSKIP("No audio device present"); - if (engine == "flite") - QSKIP("QTBUG-137947 QTextToSpeech::pause(QTextToSpeech::BoundaryHint::Word) not implemented" - " for flite"); - QFETCH(QStringList, words); QFETCH(int, pauseAt);
