Attached is a first try of implementation, to discuss. Is using gstreamer-plugins-espeak, already included in the last images, and used in all the activities using text-to-speech. Do not add any new dependency.
A device is added in the frame to configure pitch and velocity, and the hotkey alt-shift-s is used to say the selected text. The SpeechManager provide a simple say_text method to be used by activities if needed. A pending functionality is add a list of languages, to translate it and have a single list, and do not need translate this list in every activity. There are code in Speak activity to do this, I need look at this. Gonzalo On Tue, Nov 15, 2011 at 8:16 PM, Samuel Klein <meta...@gmail.com> wrote: > +1 This would be amazing. This would also encourage more people to > contribute to the speech engine for their language or dialect. > > On Tue, Nov 15, 2011 at 9:15 AM, Gonzalo Odiard <gonz...@laptop.org>wrote: > >> I want propose the feature "Global Text to Speech" [1] >> >> In fact, the functionality was already designed, and part implemented, >> but is not working in Sugar. >> >> We have code in Speak, Read and Memorize activities implementing the call >> to the backend, >> the only missing part is a icon device to configure pitch and velocity. >> >> Gonzalo >> >> [1] http://wiki.sugarlabs.org/go/Features/GlobalTextToSpeech >> >> >> _______________________________________________ >> Sugar-devel mailing list >> Sugar-devel@lists.sugarlabs.org >> http://lists.sugarlabs.org/listinfo/sugar-devel >> >> > > > -- > Samuel Klein identi.ca:sj w:user:sj +1 617 > 529 4266 > >
From 8126ce7de174e4b440d93f8987d43ed987c6d823 Mon Sep 17 00:00:00 2001 From: Gonzalo Odiard <godi...@gmail.com> Date: Fri, 18 Nov 2011 18:02:40 -0300 Subject: [PATCH] Implement text to speech in Sugar A device is added to the frame, to configure rate and speech, the voice is selected based on the LANG variable. This patch is a initial implementation of the feature http://wiki.sugarlabs.org/go/Features/Global_Text_To_Speech Signed-of-by: Gonzalo Odiard <gonz...@laptop.org> --- extensions/deviceicon/Makefile.am | 1 + extensions/deviceicon/speech.py | 137 ++++++++++++++++++++ extensions/globalkey/Makefile.am | 1 + extensions/globalkey/speech.py | 24 ++++ src/jarabe/model/Makefile.am | 1 + src/jarabe/model/speech.py | 250 +++++++++++++++++++++++++++++++++++++ src/jarabe/view/keyhandler.py | 29 +---- 7 files changed, 415 insertions(+), 28 deletions(-) create mode 100644 extensions/deviceicon/speech.py create mode 100644 extensions/globalkey/speech.py create mode 100644 src/jarabe/model/speech.py diff --git a/extensions/deviceicon/Makefile.am b/extensions/deviceicon/Makefile.am index 118d866..7ed1f77 100644 --- a/extensions/deviceicon/Makefile.am +++ b/extensions/deviceicon/Makefile.am @@ -5,5 +5,6 @@ sugar_PYTHON = \ battery.py \ network.py \ speaker.py \ + speech.py \ touchpad.py \ volume.py diff --git a/extensions/deviceicon/speech.py b/extensions/deviceicon/speech.py new file mode 100644 index 0000000..acf96e1 --- /dev/null +++ b/extensions/deviceicon/speech.py @@ -0,0 +1,137 @@ +# Copyright (C) 2008 Martin Dengler +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +from gettext import gettext as _ +import gconf + +import glib +import gtk + +from sugar.graphics.icon import Icon +from sugar.graphics.menuitem import MenuItem +from sugar.graphics.tray import TrayIcon +from sugar.graphics.palette import Palette +from sugar.graphics.xocolor import XoColor + +from jarabe.frame.frameinvoker import FrameWidgetInvoker +from jarabe.model import speech + +_ICON_NAME = 'microphone' + + +class SpeechDeviceView(TrayIcon): + + FRAME_POSITION_RELATIVE = 105 + + def __init__(self): + client = gconf.client_get_default() + self._color = XoColor(client.get_string('/desktop/sugar/user/color')) + + TrayIcon.__init__(self, icon_name=_ICON_NAME, xo_color=self._color) + + self.set_palette_invoker(FrameWidgetInvoker(self)) + + self._manager = speech.get_speech_manager() + + self.connect('expose-event', self.__expose_event_cb) + + self._icon_widget.connect('button-release-event', + self.__button_release_event_cb) + + def create_palette(self): + label = glib.markup_escape_text(_('Speech')) + palette = SpeechPalette(label, manager=self._manager) + palette.set_group_id('frame') + return palette + + def __button_release_event_cb(self, widget, event): + if event.button != 1: + return False + + self.palette_invoker.notify_right_click() + return True + + def __expose_event_cb(self, *args): + self._update_info() + + +class SpeechPalette(Palette): + + def __init__(self, primary_text, manager): + Palette.__init__(self, label=primary_text) + + self._manager = manager + + vbox = gtk.VBox() + self.set_content(vbox) + + pitch_step = 10 + self._adj_pitch = gtk.Adjustment(value=self._manager.get_pitch(), + lower=self._manager.MIN_PITCH, + upper=self._manager.MAX_PITCH, + step_incr=pitch_step, + page_incr=pitch_step, + page_size=pitch_step) + self._hscale_pitch = gtk.HScale(self._adj_pitch) + self._hscale_pitch.set_digits(0) + self._hscale_pitch.set_draw_value(False) + + hbox_pitch = gtk.HBox() + hbox_pitch.pack_start(gtk.Label(_('Pitch'))) + hbox_pitch.pack_start(self._hscale_pitch) + vbox.add(hbox_pitch) + + rate_step = 10 + self._adj_rate = gtk.Adjustment(value=self._manager.get_rate(), + lower=self._manager.MIN_RATE, + upper=self._manager.MAX_RATE, + step_incr=rate_step, + page_incr=rate_step, + page_size=rate_step) + self._hscale_rate = gtk.HScale(self._adj_rate) + self._hscale_rate.set_digits(0) + self._hscale_rate.set_draw_value(False) + + hbox_rate = gtk.HBox() + hbox_rate.pack_start(gtk.Label(_('Rate'))) + hbox_rate.pack_start(self._hscale_rate) + vbox.add(hbox_rate) + vbox.show_all() + + self._play_item = MenuItem('Speak selected text') + self._play_icon = Icon(icon_name='player_play', + icon_size=gtk.ICON_SIZE_MENU) + self._play_item.set_image(self._play_icon) + self.menu.append(self._play_item) + self._play_item.show() + + self._adj_pitch.connect('value_changed', self.__adj_pitch_changed_cb) + self._adj_rate.connect('value_changed', self.__adj_rate_changed_cb) + self._play_item.connect('activate', self.__play_activate_cb) + #self.connect('popup', self.__popup_cb) + + def __adj_pitch_changed_cb(self, adjustement): + self._manager.set_pitch(int(adjustement.value)) + + def __adj_rate_changed_cb(self, adjustement): + self._manager.set_rate(int(adjustement.value)) + + def __play_activate_cb(self, menuitem_): + self._manager.say_selected_text() + + +def setup(tray): + tray.add_device(SpeechDeviceView()) diff --git a/extensions/globalkey/Makefile.am b/extensions/globalkey/Makefile.am index 69afac2..b6cbbd6 100644 --- a/extensions/globalkey/Makefile.am +++ b/extensions/globalkey/Makefile.am @@ -3,4 +3,5 @@ sugardir = $(pkgdatadir)/extensions/globalkey sugar_PYTHON = \ __init__.py \ screenshot.py \ + speech.py \ viewsource.py diff --git a/extensions/globalkey/speech.py b/extensions/globalkey/speech.py new file mode 100644 index 0000000..6e55dd5 --- /dev/null +++ b/extensions/globalkey/speech.py @@ -0,0 +1,24 @@ +# Copyright (C) 2011 One Laptop Per Child +# Copyright (C) 2011 Gonzalo Odiard +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +from jarabe.model import speech + +BOUND_KEYS = ['<alt><shift>s'] + + +def handle_key_press(key): + speech.get_speech_manager().say_selected_text() diff --git a/src/jarabe/model/Makefile.am b/src/jarabe/model/Makefile.am index 92e8712..2fc6b1c 100644 --- a/src/jarabe/model/Makefile.am +++ b/src/jarabe/model/Makefile.am @@ -16,4 +16,5 @@ sugar_PYTHON = \ screen.py \ session.py \ sound.py \ + speech.py \ telepathyclient.py diff --git a/src/jarabe/model/speech.py b/src/jarabe/model/speech.py new file mode 100644 index 0000000..bb2da87 --- /dev/null +++ b/src/jarabe/model/speech.py @@ -0,0 +1,250 @@ +# Copyright (C) 2006-2008 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import gconf + +from sugar import env + +import gst +import gtk +import gobject + +import os +from gettext import gettext as _ +import logging + +# TRANS: The language pitch (range [0 - 99], default 50 for English) +# Look at http://espeak.sourceforge.net/commands.html for details +DEFAULT_PITCH = int(_('50')) + + +# TRANS: The diction speed, in average words per minute (range [80 - 390], +# default 170 for English). +# Look at http://espeak.sourceforge.net/commands.html for details +DEFAULT_RATE = int(_('170')) + +_speech_manager = None + + +class SpeechManager(): + + MIN_PITCH = 0 + MAX_PITCH = 99 + + MIN_RATE = 80 + MAX_RATE = 390 + + def __init__(self): + self.player = AudioGrabGst() + logging.debug('SpeechManager setting default parameters') + self._voice_name = self.player.get_default_voice() + self._pitch = DEFAULT_PITCH + self._rate = DEFAULT_RATE + try: + self._loading = True + self.restore() + self._loading = False + except: + pass + + def get_pitch(self): + return self._pitch + + def get_rate(self): + return self._rate + + def set_pitch(self, pitch): + self._pitch = pitch + if not self._loading: + self.save() + + def set_rate(self, rate): + self._rate = rate + if not self._loading: + self.save() + + def say_text(self, text): + if text: + self.player.speak(self._pitch, self._rate, self._voice_name, text) + + def say_selected_text(self): + clipboard = gtk.clipboard_get(selection='PRIMARY') + clipboard.request_text(self._primary_selection_cb) + + def _primary_selection_cb(self, clipboard, text, user_data): + logging.debug('SpeechManager._primary_selection_cb: %r', text) + self.say_text(text) + + def save(self): + client = gconf.client_get_default() + client.set_int('/desktop/sugar/speech/pitch', self.get_pitch()) + client.set_int('/desktop/sugar/speech/rate', + self.get_rate()) + logging.debug('saving speech configuration pitch %s rate %s' % + (self._pitch, self._rate)) + + def restore(self): + client = gconf.client_get_default() + self.set_pitch(client.get_int('/desktop/sugar/speech/pitch')) + self.set_rate(client.get_int('/desktop/sugar/speech/rate')) + logging.debug('loading speech configuration pitch %s rate %s' % + (self._pitch, self._rate)) + + +class AudioGrabGst(gobject.GObject): + + __gsignals__ = { + 'new-buffer': (gobject.SIGNAL_RUN_FIRST, None, [gobject.TYPE_PYOBJECT]) + } + + def __init__(self): + gobject.GObject.__init__(self) + self.pipeline = None + self.quiet = True + + def restart_sound_device(self): + self.quiet = False + + self.pipeline.set_state(gst.STATE_NULL) + self.pipeline.set_state(gst.STATE_PLAYING) + + def stop_sound_device(self): + if self.pipeline is None: + return + + self.pipeline.set_state(gst.STATE_NULL) + # Shut theirs mouths down + self._new_buffer('') + + self.quiet = True + + def make_pipeline(self, cmd): + if self.pipeline is not None: + self.stop_sound_device() + del self.pipeline + + # build a pipeline that reads the given file + # and sends it to both the real audio output + # and a fake one that we use to draw from + self.pipeline = gst.parse_launch( + cmd + ' ' \ + '! decodebin ' \ + '! tee name=tee ' \ + 'tee.! audioconvert ' \ + '! alsasink ' \ + 'tee.! queue ' \ + '! audioconvert ! fakesink name=sink') + + def on_buffer(element, buffer, pad): + # we got a new buffer of data, ask for another + gobject.timeout_add(100, self._new_buffer, str(buffer)) + return True + + sink = self.pipeline.get_by_name('sink') + sink.props.signal_handoffs = True + sink.connect('handoff', on_buffer) + + def gstmessage_cb(bus, message): + self._was_message = True + + if message.type == gst.MESSAGE_WARNING: + def check_after_warnings(): + if not self._was_message: + self.stop_sound_device() + return True + + logging.debug(message.type) + self._was_message = False + gobject.timeout_add(500, self._new_buffer, str(buffer)) + + elif message.type in (gst.MESSAGE_EOS, gst.MESSAGE_ERROR): + logging.debug(message.type) + self.stop_sound_device() + + self._was_message = False + bus = self.pipeline.get_bus() + bus.add_signal_watch() + bus.connect('message', gstmessage_cb) + + def _new_buffer(self, buf): + if not self.quiet: + # pass captured audio to anyone who is interested + self.emit("new-buffer", buf) + return False + + def speak(self, pitch, rate, voice_name, text): + # XXX workaround for http://bugs.sugarlabs.org/ticket/1801 + if not [i for i in text if i.isalnum()]: + return + + self.make_pipeline('espeak name=espeak ! wavenc') + src = self.pipeline.get_by_name('espeak') + + logging.debug('pitch=%d rate=%d voice=%s text=%s' % (pitch, rate, + voice_name, text)) + + src.props.text = text + src.props.pitch = pitch + src.props.rate = rate + src.props.voice = voice_name + + self.restart_sound_device() + + def get_all_voices(self): + all_voices = {} + for i in gst.element_factory_make('espeak').props.voices: + name, language, dialect = i + #if name in ('en-rhotic','english_rp','english_wmids'): + # these voices don't produce sound + # continue + all_voices[language] = name + return all_voices + + def get_default_voice(self): + """Try to figure out the default voice, from the current locale ($LANG) + Fall back to espeak's voice called Default.""" + voices = self.get_all_voices() + + try: + lang = os.environ['LANG'] + if lang.find('.') > --1: + lang = lang[0:lang.find('.')] + lang = lang.replace('_', '-').lower() + except: + lang = "" + + best = "default" + + try: + best = voices[lang] + except: + try: + lang = lang[0:lang.find('-')] + best = voices[lang] + except: + pass + + logging.debug('Best voice for LANG %s seems to be %s' % + (lang, best)) + return best + + +def get_speech_manager(): + global _speech_manager + + if _speech_manager == None: + _speech_manager = SpeechManager() + return _speech_manager diff --git a/src/jarabe/view/keyhandler.py b/src/jarabe/view/keyhandler.py index d79bfe6..a71f260 100644 --- a/src/jarabe/view/keyhandler.py +++ b/src/jarabe/view/keyhandler.py @@ -60,13 +60,9 @@ _actions_table = { '<alt><shift>f': 'frame', '<alt><shift>q': 'quit_emulator', 'XF86Search': 'open_search', - '<alt><shift>o': 'open_search', - '<alt><shift>s': 'say_text', + '<alt><shift>o': 'open_search' } -SPEECH_DBUS_SERVICE = 'org.laptop.Speech' -SPEECH_DBUS_PATH = '/org/laptop/Speech' -SPEECH_DBUS_INTERFACE = 'org.laptop.Speech' _instance = None @@ -77,7 +73,6 @@ class KeyHandler(object): self._key_pressed = None self._keycode_pressed = 0 self._keystate_pressed = 0 - self._speech_proxy = None self._key_grabber = KeyGrabber() self._key_grabber.connect('key-pressed', @@ -114,28 +109,6 @@ class KeyHandler(object): sound.set_volume(volume) sound.set_muted(volume == 0) - def _get_speech_proxy(self): - if self._speech_proxy is None: - bus = dbus.SessionBus() - speech_obj = bus.get_object(SPEECH_DBUS_SERVICE, SPEECH_DBUS_PATH, - follow_name_owner_changes=True) - self._speech_proxy = dbus.Interface(speech_obj, - SPEECH_DBUS_INTERFACE) - return self._speech_proxy - - def _on_speech_err(self, ex): - logging.error('An error occurred with the ESpeak service: %r', ex) - - def _primary_selection_cb(self, clipboard, text, user_data): - logging.debug('KeyHandler._primary_selection_cb: %r', text) - if text: - self._get_speech_proxy().SayText(text, reply_handler=lambda: None, - error_handler=self._on_speech_err) - - def handle_say_text(self, event_time): - clipboard = gtk.clipboard_get(selection='PRIMARY') - clipboard.request_text(self._primary_selection_cb) - def handle_previous_window(self, event_time): self._tabbing_handler.previous_activity(event_time) -- 1.7.7.3
_______________________________________________ Sugar-devel mailing list Sugar-devel@lists.sugarlabs.org http://lists.sugarlabs.org/listinfo/sugar-devel