Re: [Sugar-devel] [FEATURE] Global Text to Speech

Gonzalo Odiard Mon, 21 Nov 2011 18:23:28 -0800

Attached is a first try of implementation, to discuss.

Is using gstreamer-plugins-espeak, already included in the last images,
and used in all the activities using text-to-speech. Do not add any new
dependency.

A device is added in the frame to configure pitch and velocity,
and the hotkey alt-shift-s is used to say the selected text.

The SpeechManager provide a simple say_text method to be used by activities
if needed.

A pending functionality is add a list of languages, to translate it and
have a single list,
and do not need translate this list in every activity. There are code in
Speak activity to do this,
I need look at this.

Gonzalo

On Tue, Nov 15, 2011 at 8:16 PM, Samuel Klein <meta...@gmail.com> wrote:

> +1  This would be amazing.  This would also encourage more people to
> contribute to the speech engine for their language or dialect.
>
> On Tue, Nov 15, 2011 at 9:15 AM, Gonzalo Odiard <gonz...@laptop.org>wrote:
>
>> I want propose the feature "Global Text to Speech" [1]
>>
>> In fact, the functionality was already designed, and part implemented,
>> but is not working in Sugar.
>>
>> We have code in Speak, Read and Memorize activities implementing the call
>> to the backend,
>> the only missing part is a icon device to configure pitch and velocity.
>>
>> Gonzalo
>>
>> [1] http://wiki.sugarlabs.org/go/Features/GlobalTextToSpeech
>>
>>
>> _______________________________________________
>> Sugar-devel mailing list
>> Sugar-devel@lists.sugarlabs.org
>> http://lists.sugarlabs.org/listinfo/sugar-devel
>>
>>
>
>
> --
> Samuel Klein          identi.ca:sj           w:user:sj          +1 617
> 529 4266
>
>

From 8126ce7de174e4b440d93f8987d43ed987c6d823 Mon Sep 17 00:00:00 2001
From: Gonzalo Odiard <godi...@gmail.com>
Date: Fri, 18 Nov 2011 18:02:40 -0300
Subject: [PATCH] Implement text to speech in Sugar

A device is added to the frame, to configure rate and speech,
the voice is selected based on the LANG variable.

This patch is a initial implementation of the feature
http://wiki.sugarlabs.org/go/Features/Global_Text_To_Speech

Signed-of-by: Gonzalo Odiard <gonz...@laptop.org>
---
 extensions/deviceicon/Makefile.am |    1 +
 extensions/deviceicon/speech.py   |  137 ++++++++++++++++++++
 extensions/globalkey/Makefile.am  |    1 +
 extensions/globalkey/speech.py    |   24 ++++
 src/jarabe/model/Makefile.am      |    1 +
 src/jarabe/model/speech.py        |  250 +++++++++++++++++++++++++++++++++++++
 src/jarabe/view/keyhandler.py     |   29 +----
 7 files changed, 415 insertions(+), 28 deletions(-)
 create mode 100644 extensions/deviceicon/speech.py
 create mode 100644 extensions/globalkey/speech.py
 create mode 100644 src/jarabe/model/speech.py

diff --git a/extensions/deviceicon/Makefile.am b/extensions/deviceicon/Makefile.am
index 118d866..7ed1f77 100644
--- a/extensions/deviceicon/Makefile.am
+++ b/extensions/deviceicon/Makefile.am
@@ -5,5 +5,6 @@ sugar_PYTHON = 		\
 	battery.py	\
 	network.py	\
 	speaker.py	\
+	speech.py	\
 	touchpad.py	\
 	volume.py
diff --git a/extensions/deviceicon/speech.py b/extensions/deviceicon/speech.py
new file mode 100644
index 0000000..acf96e1
--- /dev/null
+++ b/extensions/deviceicon/speech.py
@@ -0,0 +1,137 @@
+# Copyright (C) 2008 Martin Dengler
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+from gettext import gettext as _
+import gconf
+
+import glib
+import gtk
+
+from sugar.graphics.icon import Icon
+from sugar.graphics.menuitem import MenuItem
+from sugar.graphics.tray import TrayIcon
+from sugar.graphics.palette import Palette
+from sugar.graphics.xocolor import XoColor
+
+from jarabe.frame.frameinvoker import FrameWidgetInvoker
+from jarabe.model import speech
+
+_ICON_NAME = 'microphone'
+
+
+class SpeechDeviceView(TrayIcon):
+
+    FRAME_POSITION_RELATIVE = 105
+
+    def __init__(self):
+        client = gconf.client_get_default()
+        self._color = XoColor(client.get_string('/desktop/sugar/user/color'))
+
+        TrayIcon.__init__(self, icon_name=_ICON_NAME, xo_color=self._color)
+
+        self.set_palette_invoker(FrameWidgetInvoker(self))
+
+        self._manager = speech.get_speech_manager()
+
+        self.connect('expose-event', self.__expose_event_cb)
+
+        self._icon_widget.connect('button-release-event',
+                                  self.__button_release_event_cb)
+
+    def create_palette(self):
+        label = glib.markup_escape_text(_('Speech'))
+        palette = SpeechPalette(label, manager=self._manager)
+        palette.set_group_id('frame')
+        return palette
+
+    def __button_release_event_cb(self, widget, event):
+        if event.button != 1:
+            return False
+
+        self.palette_invoker.notify_right_click()
+        return True
+
+    def __expose_event_cb(self, *args):
+        self._update_info()
+
+
+class SpeechPalette(Palette):
+
+    def __init__(self, primary_text, manager):
+        Palette.__init__(self, label=primary_text)
+
+        self._manager = manager
+
+        vbox = gtk.VBox()
+        self.set_content(vbox)
+
+        pitch_step = 10
+        self._adj_pitch = gtk.Adjustment(value=self._manager.get_pitch(),
+                                          lower=self._manager.MIN_PITCH,
+                                          upper=self._manager.MAX_PITCH,
+                                          step_incr=pitch_step,
+                                          page_incr=pitch_step,
+                                          page_size=pitch_step)
+        self._hscale_pitch = gtk.HScale(self._adj_pitch)
+        self._hscale_pitch.set_digits(0)
+        self._hscale_pitch.set_draw_value(False)
+
+        hbox_pitch = gtk.HBox()
+        hbox_pitch.pack_start(gtk.Label(_('Pitch')))
+        hbox_pitch.pack_start(self._hscale_pitch)
+        vbox.add(hbox_pitch)
+
+        rate_step = 10
+        self._adj_rate = gtk.Adjustment(value=self._manager.get_rate(),
+                                          lower=self._manager.MIN_RATE,
+                                          upper=self._manager.MAX_RATE,
+                                          step_incr=rate_step,
+                                          page_incr=rate_step,
+                                          page_size=rate_step)
+        self._hscale_rate = gtk.HScale(self._adj_rate)
+        self._hscale_rate.set_digits(0)
+        self._hscale_rate.set_draw_value(False)
+
+        hbox_rate = gtk.HBox()
+        hbox_rate.pack_start(gtk.Label(_('Rate')))
+        hbox_rate.pack_start(self._hscale_rate)
+        vbox.add(hbox_rate)
+        vbox.show_all()
+
+        self._play_item = MenuItem('Speak selected text')
+        self._play_icon = Icon(icon_name='player_play',
+                icon_size=gtk.ICON_SIZE_MENU)
+        self._play_item.set_image(self._play_icon)
+        self.menu.append(self._play_item)
+        self._play_item.show()
+
+        self._adj_pitch.connect('value_changed', self.__adj_pitch_changed_cb)
+        self._adj_rate.connect('value_changed', self.__adj_rate_changed_cb)
+        self._play_item.connect('activate', self.__play_activate_cb)
+        #self.connect('popup', self.__popup_cb)
+
+    def __adj_pitch_changed_cb(self, adjustement):
+        self._manager.set_pitch(int(adjustement.value))
+
+    def __adj_rate_changed_cb(self, adjustement):
+        self._manager.set_rate(int(adjustement.value))
+
+    def __play_activate_cb(self, menuitem_):
+        self._manager.say_selected_text()
+
+
+def setup(tray):
+    tray.add_device(SpeechDeviceView())
diff --git a/extensions/globalkey/Makefile.am b/extensions/globalkey/Makefile.am
index 69afac2..b6cbbd6 100644
--- a/extensions/globalkey/Makefile.am
+++ b/extensions/globalkey/Makefile.am
@@ -3,4 +3,5 @@ sugardir = $(pkgdatadir)/extensions/globalkey
 sugar_PYTHON = 		\
 	__init__.py	\
 	screenshot.py	\
+	speech.py	\
 	viewsource.py
diff --git a/extensions/globalkey/speech.py b/extensions/globalkey/speech.py
new file mode 100644
index 0000000..6e55dd5
--- /dev/null
+++ b/extensions/globalkey/speech.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2011 One Laptop Per Child
+# Copyright (C) 2011 Gonzalo Odiard
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+from jarabe.model import speech
+
+BOUND_KEYS = ['<alt><shift>s']
+
+
+def handle_key_press(key):
+    speech.get_speech_manager().say_selected_text()
diff --git a/src/jarabe/model/Makefile.am b/src/jarabe/model/Makefile.am
index 92e8712..2fc6b1c 100644
--- a/src/jarabe/model/Makefile.am
+++ b/src/jarabe/model/Makefile.am
@@ -16,4 +16,5 @@ sugar_PYTHON =			\
 	screen.py		\
         session.py		\
 	sound.py		\
+	speech.py		\
 	telepathyclient.py
diff --git a/src/jarabe/model/speech.py b/src/jarabe/model/speech.py
new file mode 100644
index 0000000..bb2da87
--- /dev/null
+++ b/src/jarabe/model/speech.py
@@ -0,0 +1,250 @@
+# Copyright (C) 2006-2008 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+import gconf
+
+from sugar import env
+
+import gst
+import gtk
+import gobject
+
+import os
+from gettext import gettext as _
+import logging
+
+# TRANS: The language pitch (range [0 - 99], default 50 for English)
+# Look at http://espeak.sourceforge.net/commands.html for details
+DEFAULT_PITCH = int(_('50'))
+
+
+# TRANS: The diction speed, in average words per minute (range [80 - 390],
+# default 170 for English).
+# Look at http://espeak.sourceforge.net/commands.html for details
+DEFAULT_RATE = int(_('170'))
+
+_speech_manager = None
+
+
+class SpeechManager():
+
+    MIN_PITCH = 0
+    MAX_PITCH = 99
+
+    MIN_RATE = 80
+    MAX_RATE = 390
+
+    def __init__(self):
+        self.player = AudioGrabGst()
+        logging.debug('SpeechManager setting default parameters')
+        self._voice_name = self.player.get_default_voice()
+        self._pitch = DEFAULT_PITCH
+        self._rate = DEFAULT_RATE
+        try:
+            self._loading = True
+            self.restore()
+            self._loading = False
+        except:
+            pass
+
+    def get_pitch(self):
+        return self._pitch
+
+    def get_rate(self):
+        return self._rate
+
+    def set_pitch(self, pitch):
+        self._pitch = pitch
+        if not self._loading:
+            self.save()
+
+    def set_rate(self, rate):
+        self._rate = rate
+        if not self._loading:
+            self.save()
+
+    def say_text(self, text):
+        if text:
+            self.player.speak(self._pitch, self._rate, self._voice_name, text)
+
+    def say_selected_text(self):
+        clipboard = gtk.clipboard_get(selection='PRIMARY')
+        clipboard.request_text(self._primary_selection_cb)
+
+    def _primary_selection_cb(self, clipboard, text, user_data):
+        logging.debug('SpeechManager._primary_selection_cb: %r', text)
+        self.say_text(text)
+
+    def save(self):
+        client = gconf.client_get_default()
+        client.set_int('/desktop/sugar/speech/pitch', self.get_pitch())
+        client.set_int('/desktop/sugar/speech/rate',
+                self.get_rate())
+        logging.debug('saving speech configuration pitch %s rate %s' %
+                (self._pitch, self._rate))
+
+    def restore(self):
+        client = gconf.client_get_default()
+        self.set_pitch(client.get_int('/desktop/sugar/speech/pitch'))
+        self.set_rate(client.get_int('/desktop/sugar/speech/rate'))
+        logging.debug('loading speech configuration pitch %s rate %s' %
+                (self._pitch, self._rate))
+
+
+class AudioGrabGst(gobject.GObject):
+
+    __gsignals__ = {
+        'new-buffer': (gobject.SIGNAL_RUN_FIRST, None, [gobject.TYPE_PYOBJECT])
+    }
+
+    def __init__(self):
+        gobject.GObject.__init__(self)
+        self.pipeline = None
+        self.quiet = True
+
+    def restart_sound_device(self):
+        self.quiet = False
+
+        self.pipeline.set_state(gst.STATE_NULL)
+        self.pipeline.set_state(gst.STATE_PLAYING)
+
+    def stop_sound_device(self):
+        if self.pipeline is None:
+            return
+
+        self.pipeline.set_state(gst.STATE_NULL)
+        # Shut theirs mouths down
+        self._new_buffer('')
+
+        self.quiet = True
+
+    def make_pipeline(self, cmd):
+        if self.pipeline is not None:
+            self.stop_sound_device()
+            del self.pipeline
+
+        # build a pipeline that reads the given file
+        # and sends it to both the real audio output
+        # and a fake one that we use to draw from
+        self.pipeline = gst.parse_launch(
+                cmd + ' ' \
+                '! decodebin ' \
+                '! tee name=tee ' \
+                'tee.! audioconvert ' \
+                    '! alsasink ' \
+                'tee.! queue ' \
+                    '! audioconvert ! fakesink name=sink')
+
+        def on_buffer(element, buffer, pad):
+            # we got a new buffer of data, ask for another
+            gobject.timeout_add(100, self._new_buffer, str(buffer))
+            return True
+
+        sink = self.pipeline.get_by_name('sink')
+        sink.props.signal_handoffs = True
+        sink.connect('handoff', on_buffer)
+
+        def gstmessage_cb(bus, message):
+            self._was_message = True
+
+            if message.type == gst.MESSAGE_WARNING:
+                def check_after_warnings():
+                    if not self._was_message:
+                        self.stop_sound_device()
+                    return True
+
+                logging.debug(message.type)
+                self._was_message = False
+                gobject.timeout_add(500, self._new_buffer, str(buffer))
+
+            elif  message.type in (gst.MESSAGE_EOS, gst.MESSAGE_ERROR):
+                logging.debug(message.type)
+                self.stop_sound_device()
+
+        self._was_message = False
+        bus = self.pipeline.get_bus()
+        bus.add_signal_watch()
+        bus.connect('message', gstmessage_cb)
+
+    def _new_buffer(self, buf):
+        if not self.quiet:
+            # pass captured audio to anyone who is interested
+            self.emit("new-buffer", buf)
+        return False
+
+    def speak(self, pitch, rate, voice_name, text):
+        # XXX workaround for http://bugs.sugarlabs.org/ticket/1801
+        if not [i for i in text if i.isalnum()]:
+            return
+
+        self.make_pipeline('espeak name=espeak ! wavenc')
+        src = self.pipeline.get_by_name('espeak')
+
+        logging.debug('pitch=%d rate=%d voice=%s text=%s' % (pitch, rate,
+                voice_name, text))
+
+        src.props.text = text
+        src.props.pitch = pitch
+        src.props.rate = rate
+        src.props.voice = voice_name
+
+        self.restart_sound_device()
+
+    def get_all_voices(self):
+        all_voices = {}
+        for i in gst.element_factory_make('espeak').props.voices:
+            name, language, dialect = i
+            #if name in ('en-rhotic','english_rp','english_wmids'):
+                # these voices don't produce sound
+             #   continue
+            all_voices[language] = name
+        return all_voices
+
+    def get_default_voice(self):
+        """Try to figure out the default voice, from the current locale ($LANG)
+           Fall back to espeak's voice called Default."""
+        voices = self.get_all_voices()
+
+        try:
+            lang = os.environ['LANG']
+            if lang.find('.') > --1:
+                lang = lang[0:lang.find('.')]
+                lang = lang.replace('_', '-').lower()
+        except:
+            lang = ""
+
+        best = "default"
+
+        try:
+            best = voices[lang]
+        except:
+            try:
+                lang = lang[0:lang.find('-')]
+                best = voices[lang]
+            except:
+                pass
+
+        logging.debug('Best voice for LANG %s seems to be %s' %
+                (lang, best))
+        return best
+
+
+def get_speech_manager():
+    global _speech_manager
+
+    if _speech_manager == None:
+        _speech_manager = SpeechManager()
+    return _speech_manager
diff --git a/src/jarabe/view/keyhandler.py b/src/jarabe/view/keyhandler.py
index d79bfe6..a71f260 100644
--- a/src/jarabe/view/keyhandler.py
+++ b/src/jarabe/view/keyhandler.py
@@ -60,13 +60,9 @@ _actions_table = {
     '<alt><shift>f': 'frame',
     '<alt><shift>q': 'quit_emulator',
     'XF86Search': 'open_search',
-    '<alt><shift>o': 'open_search',
-    '<alt><shift>s': 'say_text',
+    '<alt><shift>o': 'open_search'
 }
 
-SPEECH_DBUS_SERVICE = 'org.laptop.Speech'
-SPEECH_DBUS_PATH = '/org/laptop/Speech'
-SPEECH_DBUS_INTERFACE = 'org.laptop.Speech'
 
 _instance = None
 
@@ -77,7 +73,6 @@ class KeyHandler(object):
         self._key_pressed = None
         self._keycode_pressed = 0
         self._keystate_pressed = 0
-        self._speech_proxy = None
 
         self._key_grabber = KeyGrabber()
         self._key_grabber.connect('key-pressed',
@@ -114,28 +109,6 @@ class KeyHandler(object):
         sound.set_volume(volume)
         sound.set_muted(volume == 0)
 
-    def _get_speech_proxy(self):
-        if self._speech_proxy is None:
-            bus = dbus.SessionBus()
-            speech_obj = bus.get_object(SPEECH_DBUS_SERVICE, SPEECH_DBUS_PATH,
-                                        follow_name_owner_changes=True)
-            self._speech_proxy = dbus.Interface(speech_obj,
-                                                SPEECH_DBUS_INTERFACE)
-        return self._speech_proxy
-
-    def _on_speech_err(self, ex):
-        logging.error('An error occurred with the ESpeak service: %r', ex)
-
-    def _primary_selection_cb(self, clipboard, text, user_data):
-        logging.debug('KeyHandler._primary_selection_cb: %r', text)
-        if text:
-            self._get_speech_proxy().SayText(text, reply_handler=lambda: None,
-                error_handler=self._on_speech_err)
-
-    def handle_say_text(self, event_time):
-        clipboard = gtk.clipboard_get(selection='PRIMARY')
-        clipboard.request_text(self._primary_selection_cb)
-
     def handle_previous_window(self, event_time):
         self._tabbing_handler.previous_activity(event_time)
 
-- 
1.7.7.3

_______________________________________________
Sugar-devel mailing list
Sugar-devel@lists.sugarlabs.org
http://lists.sugarlabs.org/listinfo/sugar-devel

Re: [Sugar-devel] [FEATURE] Global Text to Speech

Reply via email to