experimental: improving video playback performance by using a fragment shader for color space conversion

Jan Bölsche Thu, 26 Nov 2009 13:09:29 -0800

Hi!

For my application pyglet's video playback performance isn't
sufficient when playing back HD Mpeg-2 video streams. I found one
relatively easy way to reduce CPU load by letting the GPU perform the
necessary  YUV to RGB color space conversion as pointed out in this
excellent blog post by
Michael Dominic Kostrzewa: 
http://www.mdk.org.pl/2007/11/17/gl-colorspace-conversions


I managed to make media_player.py work with a fragment shader doing
the color space conversion (see patch attached). The shader was
borrowed from here: http://www.fourcc.org/source/YUV420P-OpenGL-GLSLang.c

This patch must be considered very experimental since there are a few
problems with the code:

1. Unfortunately AVbin explicitly calls img_convert() in its
avbin_decode_video() function. img_convert() does color space
conversion using the CPU. This makes avbin_decode_video() pretty
useless for our purpose. So I reimplemented it in avbin.py without
calling img_convert(). I had to use a private structure of AVbin as
well as some ctypes declarations for libavcodec structures and one
function. These will probably break with the next release of AVbin.
This issue could easily be resolved by adding a function to the
official API of AVbin that does not call img_convert() (which is
deprecated anyway). I sent a feature request to the AVbin project.

2. In order to be able to let the GPU perform YUV to RGB conversion,
the three image planes (Y, U and V) from the video decoder's output
need to be present in three separate textures. I made this happen by
  - adding an array 'textures' to pyglet.media.Player which holds the
three textures (plus a potential fourth alph plane) alongside the pre-
existing "texture" field with is now equivalent to textures[0] (aka
the luminance channel)
  - AVbinSource.get_next_video_frame() now returns a list of 3 image
instances
The above isn't exactly good design.

3. Although the final video output is as expected in general, there
are occasional artifacts. I suspect this to be caused by concurrent
access to AVbinStream.frame, which is the decoder's target buffer. I
didn't investigate this issue further yet.

I am using pyglet-shaders: http://pyglet-shaders.googlecode.com

I didn't run any benchmarks yet to confirm that this actually causes a
significantly lower CPU load. But I'd be surprised if it doesn't.

Hoping this is useful for someone:
Jan

Index: pyglet/media/__init__.py
===================================================================
--- pyglet/media/__init__.py    (Revision 2539)
+++ pyglet/media/__init__.py    (Arbeitskopie)
@@ -909,6 +909,7 @@

     _last_video_timestamp = None
     _texture = None
+    _textures = [None] * 4

     # Spacialisation attributes, preserved between audio players
     _volume = 1.0
@@ -1098,12 +1099,19 @@

     time = property(_get_time)

-    def _create_texture(self):
+    def _create_texture(self, index = 0, width = None, height =
None):
         video_format = self.source.video_format
-        self._texture = pyglet.image.Texture.create(
-            video_format.width, video_format.height, rectangle=True)
-        self._texture = self._texture.get_transform(flip_y=True)
-        self._texture.anchor_y = 0
+        if width is None: width = video_format.width
+        if height is None: height = video_format.height
+        tex = pyglet.image.Texture.create(
+            width, height, rectangle=True)
+        tex = tex.get_transform(flip_y=True)
+        tex.anchor_y = 0
+
+        self._textures[index] = tex
+
+        if index == 0:
+            self._texture = tex

     def get_texture(self):
         return self._texture
@@ -1135,11 +1143,13 @@
             self._last_video_timestamp = None
             return

-        image = self._groups[0].get_next_video_frame()
-        if image is not None:
-            if self._texture is None:
-                self._create_texture()
-            self._texture.blit_into(image, 0, 0, 0)
+        images = self._groups[0].get_next_video_frame()
+        if images is not None:
+            for i in range(len(images)):
+                if self._textures[i] is None:
+                    print "creating texture #", i
+                    self._create_texture(i, images[i].width, images
[i].height)
+                self._textures[i].blit_into(images[i], 0, 0, 0)
             self._last_video_timestamp = ts

     def _set_eos_action(self, eos_action):
Index: pyglet/media/avbin.py
===================================================================
--- pyglet/media/avbin.py       (Revision 2539)
+++ pyglet/media/avbin.py       (Arbeitskopie)
@@ -81,10 +81,42 @@
 AVbinLogLevel = ctypes.c_int

 AVbinFileP = ctypes.c_void_p
-AVbinStreamP = ctypes.c_void_p

 Timestamp = ctypes.c_int64

+# libavcodec internals as of revision 13661
+# These may change in future versions
+
+class AVFrame(ctypes.Structure):
+    _fields_ = [
+        ('data', ctypes.c_char_p * 4),
+        ('linesize', ctypes.c_int * 4),
+        # additional fields skipped
+    ]
+
+# int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+#     int *got_picture_ptr,
+#     const uint8_t *buf, int buf_size);
+
+av.avcodec_decode_video.restype = ctypes.c_int
+av.avcodec_decode_video.argtypes = [ctypes.c_void_p,
+    ctypes.POINTER(AVFrame), ctypes.POINTER(ctypes.c_int),
ctypes.c_void_p, ctypes.c_size_t]
+
+# AVbin interal stream repesentation (non public, probably unstable)
+# as of version 7
+
+class AVbinStream(ctypes.Structure):
+    _fields_ = [
+        ('type', ctypes.c_int),
+        ('format_context', ctypes.c_void_p),
+        ('codec_context', ctypes.c_void_p),
+        ('frame', ctypes.POINTER(AVFrame)),
+    ]
+
+AVbinStreamP = ctypes.POINTER(AVbinStream)
+
+### end of unstable APIs
+
 class AVbinFileInfo(ctypes.Structure):
     _fields_ = [
         ('structure_size', ctypes.c_size_t),
@@ -163,7 +195,7 @@
 av.avbin_stream_info.argtypes = [AVbinFileP, ctypes.c_int,
                                  ctypes.POINTER(AVbinStreamInfo8)]

-av.avbin_open_stream.restype = ctypes.c_void_p
+av.avbin_open_stream.restype = AVbinStreamP
 av.avbin_open_stream.argtypes = [AVbinFileP, ctypes.c_int]
 av.avbin_close_stream.argtypes = [AVbinStreamP]

@@ -219,6 +251,7 @@

         # Decoded image.  0 == not decoded yet; None == Error or
discarded
         self.image = 0
+        self.images = None # YUV image planes

         self.id = self._next_id
         self.__class__._next_id += 1
@@ -489,15 +522,32 @@
         height = self.video_format.height
         pitch = width * 3
         buffer = (ctypes.c_uint8 * (pitch * height))()
-        result = av.avbin_decode_video(self._video_stream,
-                                       packet.data, packet.size,
-                                       buffer)
+        result = self._decode_video(packet.data, packet.size)
+
         if result < 0:
-            image_data = None
+            packet.image = None
+            packet.images = None
         else:
-            image_data = image.ImageData(width, height, 'RGB',
buffer, pitch)
-
-        packet.image = image_data
+            images = [
+                image.ImageData(
+                    width, height, 'L',
+                    self._video_stream[0].frame[0].data[0],
+                    self._video_stream[0].frame[0].linesize[0]
+                ),
+                image.ImageData(
+                    width/2, height/2, 'L',
+                    self._video_stream[0].frame[0].data[1],
+                    self._video_stream[0].frame[0].linesize[1]
+                ),
+                image.ImageData(
+                    width/2, height/2, 'L',
+                    self._video_stream[0].frame[0].data[2],
+                    self._video_stream[0].frame[0].linesize[2]
+                ),
+            ]
+
+            packet.image = images[0]
+            packet.images = images

         # Notify get_next_video_frame() that another one is ready.
         self._condition.acquire()
@@ -550,8 +600,28 @@

             if _debug:
                 print 'Returning', packet
-            return packet.image
+            return packet.images

+
+    # similar to avbin_decode_video() but not calling
+    # img_convert()
+    # the frame is decoded into the (private) field
+    # "frame" of self._video_stream
+
+    def _decode_video(self, data_in, size_in):
+        width = self.video_format.width
+        height = self.video_format.height
+        stream = self._video_stream
+
+        got_picture = ctypes.c_int()
+        used = av.avcodec_decode_video(stream[0].codec_context,
+                                    stream[0].frame, ctypes.byref
(got_picture),
+                                    data_in, size_in)
+        if not got_picture.value:
+            return AVBIN_RESULT_ERROR
+
+        return used
+
 av.avbin_init()
 if pyglet.options['debug_media']:
     _debug = True
Index: pyglet/image/__init__.py
===================================================================
--- pyglet/image/__init__.py    (Revision 2539)
+++ pyglet/image/__init__.py    (Arbeitskopie)
@@ -1607,7 +1607,7 @@

     # no implementation of blit_to_texture yet (could use aux buffer)

-    def blit(self, x, y, z=0, width=None, height=None):
+    def get_coord_array(self, x, y, z=0, width=None, height=None):
         t = self.tex_coords
         x1 = x - self.anchor_x
         y1 = y - self.anchor_y
@@ -1622,7 +1622,10 @@
              x2,    y2,    z,     1.,
              t[9],  t[10], t[11], 1.,
              x1,    y2,    z,     1.)
-
+        return array
+
+    def blit(self, x, y, z=0, width=None, height=None):
+        array = self.get_coord_array(x, y, z, width, height)
         glPushAttrib(GL_ENABLE_BIT)
         glEnable(self.target)
         glBindTexture(self.target, self.id)
Index: examples/media_player.py
===================================================================
--- examples/media_player.py    (Revision 2539)
+++ examples/media_player.py    (Arbeitskopie)
@@ -44,6 +44,7 @@
 from pyglet.gl import *
 import pyglet
 from pyglet.window import key
+from shader import FragmentShader, ShaderError, ShaderProgram

 def draw_rect(x, y, width, height):
     glBegin(GL_LINE_LOOP)
@@ -298,16 +299,84 @@

         # Video
         if self.player.source and self.player.source.video_format:
-            self.player.get_texture().blit(self.video_x,
-                                           self.video_y,
-                                           width=self.video_width,
-                                           height=self.video_height)
+            glPushAttrib(GL_ENABLE_BIT)
+            index = 0
+            texs = self.player._textures
+            if texs[0] is not None and texs[1] is not None and texs
[2] is not None:
+                glUseProgram(shader.id)
+
+                arrays = [None]*3
+                for i in range(3):
+                    arrays[i] = texs[i].get_coord_array( \
+                        self.video_x, \
+                        self.video_y, \
+                        width=self.video_width, \
+                        height=self.video_height \
+                    )
+
+                    glActiveTexture
((GL_TEXTURE0,GL_TEXTURE1,GL_TEXTURE2)[i])
+                    glClientActiveTexture
((GL_TEXTURE0,GL_TEXTURE1,GL_TEXTURE2)[i])
+                    l = glGetUniformLocationARB(shader.id,("Ytex",
"Utex", "Vtex")[i])
+                    glUniform1iARB(l,i)
+
+                    glEnable(texs[i].target)
+                    glBindTexture(texs[i].target, texs[i].id)
+
+                glBegin (GL_QUADS);
+
+                for p in range(4):
+                    x, y = arrays[0][p*8+4:p*8+6]
+                    glColor4f (1.0, 1.0, 1.0,
1.0)
+                    for i in range(3):
+                        u, v = arrays[i][p*8+0:p*8+2]
+                        glMultiTexCoord2fARB(
+                            (GL_TEXTURE0_ARB,
+                            GL_TEXTURE1_ARB,
+                            GL_TEXTURE2_ARB)[i],
+                            u, v)
+                    glVertex2f (x, y)
+                glEnd ();
+                glUseProgram(0)
+                glPopAttrib()

         # GUI
         self.slider.value = self.player.time
         for control in self.controls:
             control.draw()

+shader = None
+def install_shaders():
+    global shader
+
+    # the fragment shader was borrowed from:
+    # http://www.fourcc.org/source/YUV420P-OpenGL-GLSLang.c
+    fsrc = """
+    uniform sampler2DRect Ytex;
+    uniform sampler2DRect Utex,Vtex;
+    void main(void) {
+      float r,g,b,y,u,v;
+
+      y=texture2DRect(Ytex,gl_TexCoord[0].xy).r;
+      u=texture2DRect(Utex,gl_TexCoord[1].xy).r;
+      v=texture2DRect(Vtex,gl_TexCoord[2].xy).r;
+
+      y=1.1643*(y-0.0625);
+      u=u-0.5;
+      v=v-0.5;
+
+      r=y+1.5958*v;
+      g=y-0.39173*u-0.81290*v;
+      b=y+2.017*u;
+
+      gl_FragColor=vec4(r,g,b,1.0);
+    }
+    """
+    fshader = FragmentShader([fsrc])
+
+    shader = ShaderProgram(fshader)
+    shader.use()
+    glUseProgram(0)
+
 if __name__ == '__main__':
     if len(sys.argv) < 2:
         print 'Usage: media_player.py <filename> [<filename> ...]'
@@ -319,6 +388,12 @@
         player = pyglet.media.Player()
         window = PlayerWindow(player)

+        try:
+            install_shaders()
+        except ShaderError, e:
+            print str(e)
+            sys.exit(2)
+
         source = pyglet.media.load(filename)
         player.queue(source)

@@ -328,6 +403,10 @@
         window.set_default_video_size()
         window.set_visible(True)

+        if not pyglet.gl.gl_info.have_extension
('GL_ARB_multitexture'):
+            print 'no GL_ARB_multitexture'
+            sys.exit(-1)
+
         player.play()
         window.gui_update_state()

--

You received this message because you are subscribed to the Google Groups 
"pyglet-users" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/pyglet-users?hl=en.

experimental: improving video playback performance by using a fragment shader for color space conversion

Reply via email to