Hi, Using libjpeg's raw decode facility, we can make it output planar YUV 4:4:4 (the new DSPF_YUV444P) or 4:2:0 (DSPF_I420) and then use the graphics card to do the YUV->RGB conversion.
This will greatly increase speed when this type of acceleration is available in the gfxdriver. Andre'
>From 8c99bf306ae422e7575effb1055f916475590b9b Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Andr=C3=A9=20Draszik?= <andre.dras...@st.com> Date: Thu, 3 Dec 2009 04:34:56 +0000 Subject: [PATCH 1/1] jpeg: implement raw libjpeg decode for possible HW acceleration MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit using libjpeg's raw decode facility, we can make it output planar YUV 4:4:4 or 4:2:0 and then use the graphics card to do the YUV->RGB conversion. This will greatly increase speed when this type of acceleration is available in the gfxdriver. Signed-off-by: André Draszik <andre.dras...@st.com> --- .../idirectfbimageprovider_jpeg.c | 346 +++++++++++++++++++- 1 files changed, 328 insertions(+), 18 deletions(-) diff --git a/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c b/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c index 205ceca..f450562 100644 --- a/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c +++ b/interfaces/IDirectFBImageProvider/idirectfbimageprovider_jpeg.c @@ -72,6 +72,8 @@ Construct( IDirectFBImageProvider *thiz, DIRECT_INTERFACE_IMPLEMENTATION( IDirectFBImageProvider, JPEG ) +D_DEBUG_DOMAIN( JPEG, "image/jpeg", "ImageProvider: JPEG"); + /* * private data struct of IDirectFBImageProvider_JPEG */ @@ -91,6 +93,8 @@ typedef struct { int image_height; /* height of image data */ CoreDFB *core; + + CoreSurface *decode_surface; } IDirectFBImageProvider_JPEG_data; static DirectResult @@ -368,6 +372,9 @@ IDirectFBImageProvider_JPEG_Destruct( IDirectFBImageProvider *thiz ) if (data->image) D_FREE( data->image ); + if (data->decode_surface) + dfb_surface_unref( data->decode_surface ); + DIRECT_DEALLOCATE_INTERFACE( thiz ); } @@ -393,6 +400,20 @@ IDirectFBImageProvider_JPEG_Release( IDirectFBImageProvider *thiz ) return DFB_OK; } +static void +JPEG_stretchblit (CardState *state, + DFBRectangle *src_rect, + DFBRectangle *dst_rect) +{ + D_DEBUG_AT (JPEG, "StretchBlit %dx%d -> %dx%d (%s -> %s)\n", + src_rect->w, src_rect->h, dst_rect->w, dst_rect->h, + dfb_pixelformat_name (state->source->config_format), + dfb_pixelformat_name (state->destination->config_format)); + + /* thankfully this is intelligent enough to do a simple blit if possible */ + dfb_gfxcard_stretchblit (src_rect, dst_rect, state); +} + static DFBResult IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, IDirectFBSurface *destination, @@ -405,8 +426,9 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, DFBSurfacePixelFormat format; IDirectFBSurface_data *dst_data; CoreSurface *dst_surface; - CoreSurfaceBufferLock lock; + CoreSurfaceBufferLock lock = { .pitch = 0 }; DIRenderCallbackResult cb_result = DIRCR_OK; + bool try_raw = true; DIRECT_INTERFACE_GET_DATA(IDirectFBImageProvider_JPEG) @@ -439,10 +461,6 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, rect = dst_data->area.wanted; } - ret = dfb_surface_lock_buffer( dst_surface, CSBR_BACK, CSAID_CPU, CSAF_WRITE, &lock ); - if (ret) - return ret; - if (data->image && (rect.x || rect.y || rect.w != data->image_width || rect.h != data->image_height)) { D_FREE( data->image ); @@ -452,7 +470,8 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, } /* actual loading and rendering */ - if (!data->image) { + if (!data->image + && !data->decode_surface) { struct jpeg_decompress_struct cinfo; struct my_error_mgr jerr; JSAMPARRAY buffer; /* Output row buffer */ @@ -464,6 +483,7 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = jpeglib_panic; + /* we don't get here in case of 'raw decode' errors */ if (setjmp(jerr.setjmp_buffer)) { D_ERROR( "ImageProvider/JPEG: Error during decoding!\n" ); @@ -472,7 +492,8 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, if (data->image) { dfb_scale_linear_32( data->image, data->image_width, data->image_height, lock.addr, lock.pitch, &rect, dst_surface, &clip ); - dfb_surface_unlock_buffer( dst_surface, &lock ); + if (lock.pitch) + dfb_surface_unlock_buffer( dst_surface, &lock ); if (data->render_callback) { DFBRectangle r = { 0, 0, data->image_width, data->image_height }; @@ -482,12 +503,13 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, return DFB_INCOMPLETE; } - else + else if (lock.pitch) dfb_surface_unlock_buffer( dst_surface, &lock ); return DFB_FAILURE; } +restart: jpeg_create_decompress(&cinfo); jpeg_buffer_src(&cinfo, data->buffer, 0); jpeg_read_header(&cinfo, TRUE); @@ -501,10 +523,7 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, #endif jpeg_calc_output_dimensions(&cinfo); - if (cinfo.output_width == rect.w && cinfo.output_height == rect.h) { - direct = true; - } - else if (rect.x == 0 && rect.y == 0) { + if (rect.x == 0 && rect.y == 0) { #if JPEG_LIB_VERSION >= 70 cinfo.scale_num = 16; while (cinfo.scale_num > 1) { @@ -526,6 +545,267 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, cinfo.output_components = 3; + data->image_width = cinfo.output_width; + data->image_height = cinfo.output_height; + + /* can we do a raw decode? */ + if (cinfo.num_components == 3 + && cinfo.jpeg_color_space == JCS_YCbCr + && try_raw) { + CoreSurfaceConfig config; + int pitch_y, pitch_cb, pitch_cr; + int offset_cb, offset_cr; + + pitch_cb = (cinfo.cur_comp_info[1]->downsampled_width + 7L) & ~7L; + pitch_cr = (cinfo.cur_comp_info[2]->downsampled_width + 7L) & ~7L; + + if (cinfo.max_h_samp_factor == 1 + && cinfo.max_v_samp_factor == 1 + && cinfo.cur_comp_info[0]->h_samp_factor == 1 + && cinfo.cur_comp_info[0]->v_samp_factor == 1 + && cinfo.cur_comp_info[1]->h_samp_factor == 1 + && cinfo.cur_comp_info[1]->v_samp_factor == 1 + && cinfo.cur_comp_info[2]->h_samp_factor == 1 + && cinfo.cur_comp_info[2]->v_samp_factor == 1) { + D_DEBUG_AT (JPEG, "doing raw YCbCr 4:4:4 JPEG decode\n"); + config.format = DSPF_YUV444P; + config.size.w = (data->image_width + 7L) & ~7L; + config.size.h = (data->image_height + 7L) & ~7L; + pitch_y = (cinfo.cur_comp_info[0]->downsampled_width + 7L) & ~7L; + offset_cb = pitch_y * config.size.h; + offset_cr = pitch_y * config.size.h; + } + else if (cinfo.max_h_samp_factor == 2 + && cinfo.max_v_samp_factor == 2 + && cinfo.cur_comp_info[0]->h_samp_factor == 2 + && cinfo.cur_comp_info[0]->v_samp_factor == 2 + && cinfo.cur_comp_info[1]->h_samp_factor == 1 + && cinfo.cur_comp_info[1]->v_samp_factor == 1 + && cinfo.cur_comp_info[2]->h_samp_factor == 1 + && cinfo.cur_comp_info[2]->v_samp_factor == 1) { + D_DEBUG_AT (JPEG, "doing raw YCbCr 4:2:0 JPEG decode\n"); + config.format = DSPF_I420; + config.size.w = (data->image_width + 15L) & ~15L; + config.size.h = (data->image_height + 15L) & ~15L; + pitch_y = (cinfo.cur_comp_info[0]->downsampled_width + 15L) & ~15L; + offset_cb = pitch_y * config.size.h; + offset_cr = pitch_cb * config.size.h/2; + } +#if 0 + else if (cinfo.max_h_samp_factor == 2 + && cinfo.max_v_samp_factor == 1 + && cinfo.cur_comp_info[0]->h_samp_factor == 2 + && cinfo.cur_comp_info[0]->v_samp_factor == 1 + && cinfo.cur_comp_info[1]->h_samp_factor == 1 + && cinfo.cur_comp_info[1]->v_samp_factor == 1 + && cinfo.cur_comp_info[2]->h_samp_factor == 1 + && cinfo.cur_comp_info[2]->v_samp_factor == 1) { + D_DEBUG_AT (JPEG, "doing raw YCbCr 4:2:2 JPEG decode\n"); + config.format = DSPF_YUV422P; + config.size.w = (data->image_width + 15L) & ~15L; + config.size.h = (data->image_height + 15L) & ~15L; + pitch_y = (cinfo.cur_comp_info[0]->downsampled_width + 15L) & ~15L; + offset_cb = pitch_y * config.size.h; + offset_cr = pitch_cb * config.size.h/2; + } +#endif + else + goto pure_software; + + + /* yes, we can handle this raw format! */ + cinfo.raw_data_out = true; + + CoreSurfaceBufferLock lock2; + + D_DEBUG_AT (JPEG, " -> output w/h: %d/%d " + "downsampled(0,1,2) w/h: %d/%d %d/%d %d/%d " + "pitches y/cb/cr: %d/%d/%d\n", + data->image_width, data->image_height, + cinfo.cur_comp_info[0]->downsampled_width, + cinfo.cur_comp_info[0]->downsampled_height, + cinfo.cur_comp_info[1]->downsampled_width, + cinfo.cur_comp_info[1]->downsampled_height, + cinfo.cur_comp_info[2]->downsampled_width, + cinfo.cur_comp_info[2]->downsampled_height, + pitch_y, pitch_cb, pitch_cr); + + config.flags = CSCONF_SIZE | CSCONF_FORMAT | CSCONF_CAPS; + config.caps = DSCAPS_VIDEOONLY; + #ifdef DIRECT_BUILD_DEBUG + config.caps |= DSCAPS_SHARED; + #endif + if (dfb_surface_create (data->core, + &config, + CSTF_NONE, + 0, + NULL, + &data->decode_surface)) { + D_ERROR ("failed to create temporary decode surface\n"); + goto pure_software; + } + + ret = dfb_surface_lock_buffer (data->decode_surface, + CSBR_BACK, + CSAID_CPU, CSAF_WRITE, &lock2); + if (ret) { + dfb_surface_unref (data->decode_surface); + data->decode_surface = NULL; + goto pure_software; + } + + /* Worst case, 2x2 chroma subsampling where an MCU is + 16 lines. libjpeg won't decode more than one MCU in one + go. */ +#define MAX_MCULINES 16 + JSAMPROW Yrows[MAX_MCULINES]; + JSAMPROW Cbrows[MAX_MCULINES]; + JSAMPROW Crrows[MAX_MCULINES]; + + JSAMPARRAY jpeg_buffer[3] = { + [0] = Yrows, + [1] = Cbrows, + [2] = Crrows, + }; + + /* Initialize the various pointers to build a planar YUV + buffer. */ + void *yaddr = lock2.addr; + void *cbaddr = yaddr + offset_cb; + void *craddr = cbaddr + offset_cr; + int l; + + for (l = 0; l < MAX_MCULINES; ++l) { + Yrows[l] = yaddr + l * pitch_y; + Cbrows[l] = cbaddr + l * pitch_cb; + Crrows[l] = craddr + l * pitch_cr; + } + + jpeg_start_decompress (&cinfo); + + /* init a state, so that we can use gfxcard/blit to convert + YUV to requested destination format */ + CardState state; + dfb_state_init (&state, data->core); + dfb_state_set_source (&state, data->decode_surface); + dfb_state_set_destination (&state, dst_surface); + dfb_state_set_clip (&state, &clip); + + while (cinfo.output_scanline < data->image_height + && cb_result == DIRCR_OK) { + int x = jpeg_read_raw_data (&cinfo, jpeg_buffer, + MAX_MCULINES); + if (x <= 0) + /* Actually, x == 0 means that we don't have enough + data to continue decoding the picture. */ + break; + + D_DEBUG_AT (JPEG, " -> decoded %d scanlines (out of %d)\n", + cinfo.output_scanline, data->image_height); + + for (l = 0; l < MAX_MCULINES ; l++) { + Yrows[l] += ((x * cinfo.cur_comp_info[0]->h_samp_factor) / cinfo.max_h_samp_factor) * pitch_y; + Cbrows[l] += ((x * cinfo.cur_comp_info[1]->h_samp_factor) / cinfo.max_h_samp_factor) * pitch_cb; + Crrows[l] += ((x * cinfo.cur_comp_info[2]->h_samp_factor) / cinfo.max_h_samp_factor) * pitch_cr; + } + + if (data->render_callback) { + DFBRectangle src_rect = { + .x = 0, + .y = cinfo.output_scanline - x, + .w = data->image_width, + .h = x, + }; + DFBRectangle r = src_rect; + float factor = (rect.h + / (float) data->image_height); + DFBRectangle dst_rect = { + .x = rect.x, + .y = (int) (src_rect.y * factor), + .w = rect.w, + .h = (int) (src_rect.h * factor), + }; + + D_DEBUG_AT (JPEG, " -> render callback %d,%d %dx%d -> %d,%d %dx%d\n", + src_rect.x, src_rect.y, src_rect.w, + src_rect.h, dst_rect.x, dst_rect.y, + dst_rect.w, dst_rect.h); + + JPEG_stretchblit (&state, &src_rect, &dst_rect); + + cb_result = data->render_callback (&r, + data->render_callback_context); + } + } + + D_DEBUG_AT (JPEG, " -> decoded %d scanlines (out of %d)\n", + cinfo.output_scanline, data->image_height); + + if (cinfo.output_scanline < data->image_height + || cb_result != DIRCR_OK) { + if (cb_result != DIRCR_OK) + D_ERROR ("raw decode failed after %d of %d scanlines, " + "trying pure software\n", + cinfo.output_scanline, cinfo.output_height); + jpeg_abort_decompress (&cinfo); + jpeg_destroy_decompress (&cinfo); + dfb_surface_unlock_buffer (data->decode_surface, &lock2); + dfb_surface_unref (data->decode_surface); + data->decode_surface = NULL; + + dfb_state_set_source (&state, NULL); + dfb_state_set_destination (&state, NULL); + dfb_state_destroy (&state); + + if (cb_result != DIRCR_OK) + return DFB_INTERRUPTED; + + try_raw = false; + goto restart; + } + + jpeg_finish_decompress (&cinfo); + jpeg_destroy_decompress (&cinfo); + + dfb_surface_unlock_buffer (data->decode_surface, &lock2); + + /* use DFB to convert raw YUV to destination format, and + apply any necessary additional clip/stretch */ + { + DFBRectangle src_rect, r; + + src_rect.x = 0; + src_rect.y = 0; + src_rect.w = data->image_width; + src_rect.h = cinfo.output_height; + r = src_rect; + + JPEG_stretchblit (&state, &src_rect, &rect); + + /* remove the state */ + dfb_state_set_source (&state, NULL); + dfb_state_set_destination (&state, NULL); + dfb_state_destroy (&state); + + if (data->render_callback) + data->render_callback (&src_rect, + data->render_callback_context); + } + + return DFB_OK; + } + +pure_software: + ret = dfb_surface_lock_buffer( dst_surface, CSBR_BACK, CSAID_CPU, CSAF_WRITE, &lock ); + if (ret) { + jpeg_abort_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + return ret; + } + + if (cinfo.output_width == rect.w && cinfo.output_height == rect.h) + direct = true; + switch (dst_surface->config.format) { case DSPF_NV16: uv_offset = dst_surface->config.size.h * lock.pitch; @@ -546,9 +826,6 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, jpeg_start_decompress(&cinfo); - data->image_width = cinfo.output_width; - data->image_height = cinfo.output_height; - row_stride = cinfo.output_width * 3; buffer = (*cinfo.mem->alloc_sarray)((j_common_ptr) &cinfo, @@ -618,17 +895,51 @@ IDirectFBImageProvider_JPEG_RenderTo( IDirectFBImageProvider *thiz, jpeg_finish_decompress(&cinfo); } jpeg_destroy_decompress(&cinfo); + + dfb_surface_unlock_buffer( dst_surface, &lock ); + } + else if (data->decode_surface) { + CardState state; + DFBRectangle src_rect = { + .x = 0, + .y = 0, + .w = data->image_width, + .h = data->image_height + }; + + /* use DFB to convert raw YUV to destination format, and + apply any necessary additional clip/stretch */ + dfb_state_init (&state, data->core); + dfb_state_set_source (&state, data->decode_surface); + dfb_state_set_destination (&state, dst_surface); + dfb_state_set_clip (&state, &clip); + + JPEG_stretchblit (&state, &src_rect, &rect); + + /* remove the state */ + dfb_state_set_source (&state, NULL); + dfb_state_set_destination (&state, NULL); + dfb_state_destroy (&state); + + if (data->render_callback) { + DFBRectangle r = { 0, 0, data->image_width, data->image_height }; + data->render_callback (&r, data->render_callback_context); + } } else { + ret = dfb_surface_lock_buffer( dst_surface, CSBR_BACK, CSAID_CPU, CSAF_WRITE, &lock ); + if (ret) + return ret; + dfb_scale_linear_32( data->image, data->image_width, data->image_height, lock.addr, lock.pitch, &rect, dst_surface, &clip ); if (data->render_callback) { DFBRectangle r = { 0, 0, data->image_width, data->image_height }; data->render_callback( &r, data->render_callback_context ); } - } - dfb_surface_unlock_buffer( dst_surface, &lock ); + dfb_surface_unlock_buffer( dst_surface, &lock ); + } if (cb_result != DIRCR_OK) return DFB_INTERRUPTED; @@ -676,4 +987,3 @@ IDirectFBImageProvider_JPEG_GetImageDescription( IDirectFBImageProvider *thiz, return DFB_OK; } - -- 1.6.3.3
_______________________________________________ directfb-dev mailing list directfb-dev@directfb.org http://mail.directfb.org/cgi-bin/mailman/listinfo/directfb-dev