vlc | branch: master | Jean-Baptiste Kempf <[email protected]> | Sun Oct 11 18:26:11 2015 +0200| [a7eb0f0aa5c3003535a091364160c918db97c4ec] | committer: Jean-Baptiste Kempf
Add a SAPI synthetizer for Windows This is the work from Moti Zilberman, modified by me to build and integrate in-tree It's heavily inspired on the OS X one. Be careful: you need a very recent Mingw-W64 to hope to compile it. Ref #11893 > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=a7eb0f0aa5c3003535a091364160c918db97c4ec --- NEWS | 1 + modules/MODULES_LIST | 1 + modules/text_renderer/Makefile.am | 6 + modules/text_renderer/sapi.cpp | 236 +++++++++++++++++++++++++++++++++++++ po/POTFILES.in | 1 + 5 files changed, 245 insertions(+) diff --git a/NEWS b/NEWS index 2023bc9..16ea9e3 100644 --- a/NEWS +++ b/NEWS @@ -163,6 +163,7 @@ Misc * remove ZPL playlist format * Update libVLC doxygen modules * Add a text-to-speech renderer for subtitles on OS X/iOS + * Add a text-to-speech renderer for subtitles on Windows Removed modules * Atmo video filter diff --git a/modules/MODULES_LIST b/modules/MODULES_LIST index 472ad05..a34dc6e 100644 --- a/modules/MODULES_LIST +++ b/modules/MODULES_LIST @@ -319,6 +319,7 @@ $Id$ * rv32: RV32 image format conversion module * samplerate: Secret Rabbit Code (libsamplerate) audio resampler * sap: Interface module to read SAP/SDP announcements + * sapi: Windows Text to Speech Synthetizer using the SAPI 5.1 API * scale: Images rescaler * scaletempo: Scale audio tempo in sync with playback rate * scene: scene video filter diff --git a/modules/text_renderer/Makefile.am b/modules/text_renderer/Makefile.am index 43ccb67..7634a4f 100644 --- a/modules/text_renderer/Makefile.am +++ b/modules/text_renderer/Makefile.am @@ -42,6 +42,12 @@ libnsspeechsynthesizer_plugin_la_LDFLAGS = -Wl,-framework,Cocoa text_LTLIBRARIES += libnsspeechsynthesizer_plugin.la endif +libsapi_plugin_la_SOURCES = text_renderer/sapi.cpp +libsapi_plugin_la_LIBADD = -lole32 +if HAVE_WIN32 +text_LTLIBRARIES += libsapi_plugin.la +endif + libsvg_plugin_la_SOURCES = text_renderer/svg.c libsvg_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) $(SVG_CFLAGS) libsvg_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(textdir)' diff --git a/modules/text_renderer/sapi.cpp b/modules/text_renderer/sapi.cpp new file mode 100644 index 0000000..5df8dcf --- /dev/null +++ b/modules/text_renderer/sapi.cpp @@ -0,0 +1,236 @@ +/***************************************************************************** + * sapi.cpp: Simple text to Speech renderer for Windows, based on SAPI + ***************************************************************************** + * Copyright (c) 2015 Moti Zilberman + * + * Authors: Moti Zilberman + * Jean-Baptiste Kempf + * + * The MIT License (MIT) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + *****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* VLC core API headers */ +#include <vlc_common.h> +#include <vlc_plugin.h> +#include <vlc_filter.h> +#include <vlc_charset.h> + +#define INITGUID + +#include <windows.h> +#include <sapi.h> +#include <sphelper.h> + +static int Create (vlc_object_t *); +static void Destroy(vlc_object_t *); +static int RenderText(filter_t *, + subpicture_region_t *, + subpicture_region_t *, + const vlc_fourcc_t *); + +vlc_module_begin () + set_description(N_("Speech synthesis for Windows")) + + set_category(CAT_VIDEO) + set_subcategory(SUBCAT_VIDEO_SUBPIC) + + set_capability("text renderer", 0) + set_callbacks(Create, Destroy) + add_integer("sapi-voice", -1, "Voice Index", "Voice index", false) +vlc_module_end () + +struct filter_sys_t +{ + ISpVoice* cpVoice; + char* lastString; +}; + +/* MTA functions */ +static int TryEnterMTA(vlc_object_t *obj) +{ + HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED); + if (unlikely(FAILED(hr))) + { + msg_Err (obj, "cannot initialize COM (error 0x%lx)", hr); + return -1; + } + return 0; +} +#define TryEnterMTA(o) TryEnterMTA(VLC_OBJECT(o)) + +static void EnterMTA(void) +{ + HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED); + if (unlikely(FAILED(hr))) + abort(); +} + +static void LeaveMTA(void) +{ + CoUninitialize(); +} + +static int Create (vlc_object_t *p_this) +{ + filter_t *p_filter = (filter_t *)p_this; + filter_sys_t *p_sys; + HRESULT hr; + + if (TryEnterMTA(p_this)) + return VLC_EGENERIC; + + p_filter->p_sys = p_sys = (filter_sys_t*) malloc(sizeof(filter_sys_t)); + if (!p_sys) + goto error; + + p_sys->cpVoice = nullptr; + p_sys->lastString = nullptr; + + hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void**) &p_sys->cpVoice); + if (SUCCEEDED(hr)) { + ISpObjectToken* cpVoiceToken = nullptr; + IEnumSpObjectTokens* cpEnum = nullptr; + ULONG ulCount = 0; + + hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum); + if (SUCCEEDED(hr)) + { + // Get the number of voices. + hr = cpEnum->GetCount(&ulCount); + if (SUCCEEDED (hr)) + { + int voiceIndex = var_InheritInteger(p_this, "sapi-voice"); + if (voiceIndex > -1) + { + if ((unsigned)voiceIndex <= ulCount) { + hr = cpEnum->Item(voiceIndex, &cpVoiceToken); + if (SUCCEEDED(hr)) { + hr = p_sys->cpVoice->SetVoice(cpVoiceToken); + if (SUCCEEDED(hr)) { + msg_Dbg(p_this, "Selected voice %d", voiceIndex); + } + else { + msg_Err(p_this, "Failed to set voice %d", voiceIndex); + } + cpVoiceToken->Release(); + cpVoiceToken = nullptr; + } + } + else + msg_Err(p_this, "Voice index exceeds available count"); + } + } + cpEnum->Release(); + cpEnum = nullptr; + + /* Set Output */ + hr = p_sys->cpVoice->SetOutput(nullptr, TRUE); + } + } + else + { + msg_Err(p_filter, "Could not create SpVoice"); + goto error; + } + + LeaveMTA(); + + p_filter->pf_render = RenderText; + + return VLC_SUCCESS; + +error: + LeaveMTA(); + free(p_sys); + return VLC_EGENERIC; +} + +static void Destroy(vlc_object_t *p_this) +{ + filter_t *p_filter = (filter_t *)p_this; + filter_sys_t *p_sys = p_filter->p_sys; + + if (p_sys->cpVoice) { + p_sys->cpVoice->Release(); + p_sys->cpVoice = nullptr; + } + + if (p_sys->lastString) { + free(p_sys->lastString); + p_sys->lastString = nullptr; + } + + free(p_sys); +} + +static int RenderText(filter_t *p_filter, + subpicture_region_t *p_region_out, + subpicture_region_t *p_region_in, + const vlc_fourcc_t *p_chroma_list) +{ + VLC_UNUSED(p_region_out); + VLC_UNUSED(p_chroma_list); + + filter_sys_t *p_sys = p_filter->p_sys; + text_segment_t *p_segment = p_region_in->p_text; + + if (!p_segment) + return VLC_EGENERIC; + + for (const text_segment_t *s = p_segment; s != nullptr; s = s->p_next ) { + if (!s->psz_text ) + continue; + + if (strlen(s->psz_text) == 0) + continue; + + try { + if (p_sys->lastString && !strcmp(p_sys->lastString, s->psz_text)) + continue; + + if (!strcmp(s->psz_text, "\n")) + continue; + + p_sys->lastString = strdup(s->psz_text); + if (p_sys->lastString) { + msg_Dbg(p_filter, "Speaking '%s'", s->psz_text); + + EnterMTA(); + wchar_t* wideText = ToWide(s->psz_text); + HRESULT hr = p_sys->cpVoice->Speak(wideText, SPF_ASYNC, nullptr); + free(wideText); + if (!SUCCEEDED(hr)) { + msg_Err(p_filter, "Speak() error"); + } + LeaveMTA(); + } + } + catch (...) { + msg_Err(p_filter, "Caught an exception!"); + } + } + + return VLC_SUCCESS; +} diff --git a/po/POTFILES.in b/po/POTFILES.in index 49e4d25..8077a0c 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -1063,6 +1063,7 @@ modules/stream_out/transcode/transcode.c modules/text_renderer/freetype.c modules/text_renderer/nsspeechsynthesizer.m modules/text_renderer/quartztext.c +modules/text_renderer/sapi.cpp modules/text_renderer/svg.c modules/text_renderer/tdummy.c modules/text_renderer/win32text.c _______________________________________________ vlc-commits mailing list [email protected] https://mailman.videolan.org/listinfo/vlc-commits
