[Tutor] a Pygtk question sort of

richard kappler Fri, 11 Jan 2013 17:20:34 -0800

Before anybody jumps me about this question being inappropriate for this
list, yes, I know it probably is BUT, the two places where it might be
appropriate are down pretty hard, so this is my only option (I think).


The question is in regards to pygtk I think, and pocketsphinx obliquely.
Appended below is code from the CMU Sphinx website that I wish to ever so
slightly modify, but am not quite sure how to proceed. This code creates a
gui with a button that stops and starts the pocketsphinx speech recognition
engine. When the button reads "speak" and you push it, the button goes
blank iirc, pocketsphinx listens, performs speech recognition and sends the
resulting text to the gui, changing the button text back to "speak" and
putting the pocketsphinx engine ostensibly (though not in actuality) on
pause.

I understand how the below code does all of this, including how
pocketsphinx works. Here's the issue: I want to use this code or code like
it (pocketsphinx can be imported and run directly in python but apparently
only to decode wav files, not as a real-time decoder unless you run it
through gst as shown in the appended code as I understand it) in my bot
program, so I don't need the gui, button or any of that. I need
pocketsphinx to work exactly as below, but send the text output back to the
main program or to a different program (chatbot) instead of the gui. Make
sense? I've been pouring over this code off and on for months as I've been
learning, and it's not quite as simple as dump the gui method and the
button. The problem is the button controls the vader (determines begin and
end of utterances) as well. Detailed explanation here:
http://cmusphinx.sourceforge.net/wiki/gstreamer

So can anyone give me some guidance here or point me towards a place to
discuss this? The forums at Python.org are under construction, the
CMUSphinx forums at Sourceforge are down (404) so I'm not quite sure where
to go for help.

regards, Richard

#!/usr/bin/env python

# Copyright (c) 2008 Carnegie Mellon University.
#
# You may modify and redistribute this file under the same terms as
# the CMU Sphinx system.  See
# http://cmusphinx.sourceforge.net/html/LICENSE for more information.

import pygtk
pygtk.require('2.0')
import gtk

import gobject
import pygst
pygst.require('0.10')
gobject.threads_init()
import gst

class DemoApp(object):
    """GStreamer/PocketSphinx Demo Application"""
    def __init__(self):
        """Initialize a DemoApp object"""
        self.init_gui()
        self.init_gst()

    def init_gui(self):
        """Initialize the GUI components"""
        self.window = gtk.Window()
        self.window.connect("delete-event", gtk.main_quit)
        self.window.set_default_size(400,200)
        self.window.set_border_width(10)
        vbox = gtk.VBox()
        self.textbuf = gtk.TextBuffer()
        self.text = gtk.TextView(self.textbuf)
        self.text.set_wrap_mode(gtk.WRAP_WORD)
        vbox.pack_start(self.text)
        self.button = gtk.ToggleButton("Speak")
        self.button.connect('clicked', self.button_clicked)
        vbox.pack_start(self.button, False, False, 5)
        self.window.add(vbox)
        self.window.show_all()

    def init_gst(self):
        """Initialize the speech components"""
        self.pipeline = gst.parse_launch('gconfaudiosrc ! audioconvert
! audioresample '
                                         + '! vader name=vad
auto-threshold=true '
                                         + '! pocketsphinx name=asr ! fakesink')
        asr = self.pipeline.get_by_name('asr')
        asr.connect('partial_result', self.asr_partial_result)
        asr.connect('result', self.asr_result)
        asr.set_property('configured', True)

        bus = self.pipeline.get_bus()
        bus.add_signal_watch()
        bus.connect('message::application', self.application_message)

        self.pipeline.set_state(gst.STATE_PAUSED)

    def asr_partial_result(self, asr, text, uttid):
        """Forward partial result signals on the bus to the main thread."""
        struct = gst.Structure('partial_result')
        struct.set_value('hyp', text)
        struct.set_value('uttid', uttid)
        asr.post_message(gst.message_new_application(asr, struct))

    def asr_result(self, asr, text, uttid):
        """Forward result signals on the bus to the main thread."""
        struct = gst.Structure('result')
        struct.set_value('hyp', text)
        struct.set_value('uttid', uttid)
        asr.post_message(gst.message_new_application(asr, struct))

    def application_message(self, bus, msg):
        """Receive application messages from the bus."""
        msgtype = msg.structure.get_name()
        if msgtype == 'partial_result':
            self.partial_result(msg.structure['hyp'], msg.structure['uttid'])
        elif msgtype == 'result':
            self.final_result(msg.structure['hyp'], msg.structure['uttid'])
            self.pipeline.set_state(gst.STATE_PAUSED)
            self.button.set_active(False)

    def partial_result(self, hyp, uttid):
        """Delete any previous selection, insert text and select it."""
        # All this stuff appears as one single action
        self.textbuf.begin_user_action()
        self.textbuf.delete_selection(True, self.text.get_editable())
        self.textbuf.insert_at_cursor(hyp)
        ins = self.textbuf.get_insert()
        iter = self.textbuf.get_iter_at_mark(ins)
        iter.backward_chars(len(hyp))
        self.textbuf.move_mark(ins, iter)
        self.textbuf.end_user_action()

    def final_result(self, hyp, uttid):
        """Insert the final result."""
        # All this stuff appears as one single action
        self.textbuf.begin_user_action()
        self.textbuf.delete_selection(True, self.text.get_editable())
        self.textbuf.insert_at_cursor(hyp)
        self.textbuf.end_user_action()

    def button_clicked(self, button):
        """Handle button presses."""
        if button.get_active():
            button.set_label("Stop")
            self.pipeline.set_state(gst.STATE_PLAYING)
        else:
            button.set_label("Speak")
            vader = self.pipeline.get_by_name('vad')
            vader.set_property('silent', True)

app = DemoApp()
gtk.main()





-- 

quando omni flunkus moritati

_______________________________________________
Tutor maillist  -  [email protected]
To unsubscribe or change subscription options:
http://mail.python.org/mailman/listinfo/tutor

[Tutor] a Pygtk question sort of

Reply via email to