Hi Ilia, On 27/06/13 12:26, Ilia Mirkin wrote: > Adds H.264 and MPEG2 codec support via VP2, using firmware from the > blob. Acceleration is supported at the bitstream level for H.264 and > IDCT level for MPEG2. > > Known issues: > - H.264 interlaced doesn't render properly > - H.264 shows very occasional artifacts on a small fraction of videos > - MPEG2 + VDPAU shows frequent but small artifacts, which aren't there > when using XvMC on the same videos > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
Big thanks for working on this I believe the hardware is capable of accelerating IDCT for VC1. Do you have any plans for it ? As far as I know mesa in general is keen on keeping trailing statements on the next line, as well as 78(80) characters line length. > --- > > I did try to work out the known issues above, but so far to no avail. > > The kernel support for these engines is not in mainline yet, but it's likely > going to appear in 3.11. I figured that there would be a bunch of feedback so > I might as well send it out early. > > I played around a lot with XvMC performance, the mb function shows up as #1 in > the profiles, with the SSE4.2 optimizations I put in, it drops to #2. > Something clever can likely be done to improve VDPAU performance as well, > e.g. using SSE to do the inverse quantization operations, but I've left that Curious how many machines with vp2 card have a sse4.2 capable CPU? Mine is only sse4.1 ;( > out. (It gets tricky because a lot of the data is 0's, so it's unclear whether > it's faster to use SSE to do operations on everything or one-at-a-time on the > non-0's like I have it now.) Even with these, XvMC ends up ~20% faster than > plain CPU decoding, and likely that percent improves on older CPUs that can't > decode MPEG2 quite as quickly. VDPAU provides further improvements (likely > because it is able to skip mb's while XvMC can't), but there are artifacts for > reasons unknown. > > Note that in order to get XvMC to work, you need my previous patch (or > something similar) since otherwise libXvMCnouveau can't be dlopen'd: > http://lists.freedesktop.org/archives/mesa-dev/2013-June/040949.html > > If you want to test it out, the kernel patch: > http://lists.freedesktop.org/archives/nouveau/2013-June/012821.html > > Firmware: > https://github.com/imirkin/re-vp2/blob/master/extract_firmware.py > > src/gallium/drivers/nv50/Makefile.sources | 5 +- > src/gallium/drivers/nv50/nv50_context.c | 13 +- > src/gallium/drivers/nv50/nv50_context.h | 24 + > src/gallium/drivers/nv50/nv50_miptree.c | 27 ++ > src/gallium/drivers/nv50/nv50_resource.h | 1 + > src/gallium/drivers/nv50/nv50_screen.c | 13 +- > src/gallium/drivers/nv50/nv50_winsys.h | 4 + > src/gallium/drivers/nv50/nv84_video.c | 778 > ++++++++++++++++++++++++++++++ > src/gallium/drivers/nv50/nv84_video.h | 134 +++++ > src/gallium/drivers/nv50/nv84_video_bsp.c | 251 ++++++++++ > src/gallium/drivers/nv50/nv84_video_vp.c | 521 ++++++++++++++++++++ > 11 files changed, 1768 insertions(+), 3 deletions(-) > create mode 100644 src/gallium/drivers/nv50/nv84_video.c > create mode 100644 src/gallium/drivers/nv50/nv84_video.h > create mode 100644 src/gallium/drivers/nv50/nv84_video_bsp.c > create mode 100644 src/gallium/drivers/nv50/nv84_video_vp.c > ... > diff --git a/src/gallium/drivers/nv50/nv84_video.c > b/src/gallium/drivers/nv50/nv84_video.c > new file mode 100644 > index 0000000..064178c > --- /dev/null > +++ b/src/gallium/drivers/nv50/nv84_video.c > @@ -0,0 +1,778 @@ ... > +static int > +nv84_copy_firmware(const char *path, void *dest, size_t len) ssize_t len To prevent signed/unsigned issues in conditional below > +{ > + int fd = open(path, O_RDONLY | O_CLOEXEC); > + ssize_t r; > + if (fd < 0) { > + fprintf(stderr, "opening firmware file %s failed: %m\n", path); > + return 1; > + } > + r = read(fd, dest, len); > + close(fd); > + > + if (r != len) { Here ^^ > + fprintf(stderr, "reading firwmare file %s failed: %m\n", path); > + return 1; > + } > + > + return 0; > +} > + ... > +static void > +nv84_decoder_decode_bitstream_mpeg12(struct pipe_video_decoder *decoder, > + struct pipe_video_buffer *video_target, > + struct pipe_picture_desc *picture, > + unsigned num_buffers, > + const void *const *data, > + const unsigned *num_bytes) > +{ > + struct nv84_decoder *dec = (struct nv84_decoder *)decoder; > + struct nv84_video_buffer *target = (struct nv84_video_buffer > *)video_target; > + > + struct pipe_mpeg12_picture_desc *desc = (struct pipe_mpeg12_picture_desc > *)picture; > + > + assert(target->base.buffer_format == PIPE_FORMAT_NV12); This can be written as assert(video_target->buffer_format == PIPE_FORMAT_NV12); > + > + vl_mpg12_bs_decode(dec->mpeg12_bs, > + video_target, > + desc, > + num_buffers, > + data, > + num_bytes); And then the temporary variables can be removed, as you've done in nv84_decoder_end_frame_mpeg12() > +} > + ... > + > +struct pipe_video_decoder * > +nv84_create_decoder(struct pipe_context *context, > + enum pipe_video_profile profile, > + enum pipe_video_entrypoint entrypoint, > + enum pipe_video_chroma_format chroma_format, > + unsigned width, unsigned height, > + unsigned max_references, > + bool chunked_decode) > +{ > + struct nv50_context *nv50 = (struct nv50_context *)context; > + struct nouveau_screen *screen = &nv50->screen->base; > + struct nv84_decoder *dec; > + struct nouveau_pushbuf *bsp_push, *vp_push; > + struct nv50_surface surf; > + struct nv50_miptree mip; > + union pipe_color_union color; > + struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; > + int ret, i; > + int is_h264 = u_reduce_video_profile(profile) == > PIPE_VIDEO_CODEC_MPEG4_AVC; > + int is_mpeg12 = u_reduce_video_profile(profile) == > PIPE_VIDEO_CODEC_MPEG12; > + struct nouveau_pushbuf_refn fence_ref[] = { > + { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, > + }; > + > + > + if (getenv("XVMC_VL")) > + return vl_create_decoder(context, profile, entrypoint, > + chroma_format, width, height, > + max_references, chunked_decode); > + > + if ((is_h264 && entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) || > + (is_mpeg12 && entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT)) { > + debug_printf("%x\n", entrypoint); > + return NULL; > + } > + > + if (!is_h264 && !is_mpeg12) { > + debug_printf("invalid profile: %x\n", profile); > + return NULL; > + } > + > + dec = CALLOC_STRUCT(nv84_decoder); > + if (!dec) return NULL; > + > + dec->base.context = context; > + dec->base.profile = profile; > + dec->base.entrypoint = entrypoint; > + dec->base.chroma_format = chroma_format; > + dec->base.width = width; > + dec->base.height = height; > + dec->base.max_references = max_references; > + dec->base.destroy = nv84_decoder_destroy; > + dec->base.flush = nv84_decoder_flush; > + if (is_h264) { > + dec->base.decode_bitstream = nv84_decoder_decode_bitstream_h264; > + dec->base.begin_frame = nv84_decoder_begin_frame_h264; > + dec->base.end_frame = nv84_decoder_end_frame_h264; > + > + dec->frame_mbs = mb(dec->base.width) * mb_half(dec->base.height) * 2; > + dec->frame_size = dec->frame_mbs << 8; > + dec->vpring_deblock = align(0x30 * dec->frame_mbs, 0x100); > + dec->vpring_residual = 0x2000 + MAX2(0x32000, 0x600 * dec->frame_mbs); > + dec->vpring_ctrl = MAX2(0x10000, align(0x1080 + 0x144 * > dec->frame_mbs, 0x100)); > + } else if (is_mpeg12) { > + dec->base.decode_macroblock = nv84_decoder_decode_macroblock; > + dec->base.begin_frame = nv84_decoder_begin_frame_mpeg12; > + dec->base.end_frame = nv84_decoder_end_frame_mpeg12; > + > + if (entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { > + dec->mpeg12_bs = CALLOC_STRUCT(vl_mpg12_bs); > + if (!dec->mpeg12_bs) > + goto fail; > + vl_mpg12_bs_init(dec->mpeg12_bs, &dec->base); > + dec->base.decode_bitstream = nv84_decoder_decode_bitstream_mpeg12; > + } > + } else { > + goto fail; Seems to be handled already by - if (!is_h264 && !is_mpeg12)... > + } > + > + ret = nouveau_client_new(screen->device, &dec->client); > + if (ret) > + goto fail; Is there any particular reason for using a variable to store the return value through this functions? Me thinks it can be safely purged, making the code a bit cleaner > + > + if (is_h264) { > + ret = nouveau_object_new(&screen->device->object, 0, > + NOUVEAU_FIFO_CHANNEL_CLASS, > + &nv04_data, sizeof(nv04_data), > &dec->bsp_channel); ... > + if (is_h264) { > + /* Zero out some parts of mbring/vpring. there's gotta be some cleaner > way > + * of doing this... perhaps makes sense to just copy the relevant logic > + * here. */ > + color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0; > + surf.offset = dec->frame_size; > + surf.width = 64; > + surf.height = (max_references + 1) * dec->frame_mbs / 4; > + surf.depth = 1; > + surf.base.format = PIPE_FORMAT_B8G8R8A8_UNORM; > + surf.base.u.tex.level = 0; > + surf.base.texture = &mip.base.base; > + mip.level[0].tile_mode = 0; > + mip.level[0].pitch = surf.width * 4; > + mip.base.domain = NOUVEAU_BO_VRAM; > + mip.base.bo = dec->mbring; > + context->clear_render_target(context, (struct pipe_surface *)&surf, > &color, 0, 0, 64, 4760); > + surf.offset = dec->vpring->size / 2 - 0x1000; > + surf.width = 1024; > + surf.height = 1; > + mip.level[0].pitch = surf.width * 4; > + mip.base.bo = dec->vpring; > + context->clear_render_target(context, (struct pipe_surface *)&surf, > &color, 0, 0, 1024, 1); > + surf.offset = dec->vpring->size - 0x1000; > + context->clear_render_target(context, (struct pipe_surface *)&surf, > &color, 0, 0, 1024, 1); > + > + PUSH_SPACE(screen->pushbuf, 5); > + fence_ref[0].bo = dec->fence; > + nouveau_pushbuf_refn(screen->pushbuf, fence_ref, 1); > + /* The clear_render_target is done via 3D engine, so use it to write > to a > + * sempahore to indicate that it's done. > + */ > + BEGIN_NV04(screen->pushbuf, SUBC_3D(0x1b00), 4); > + PUSH_DATAh(screen->pushbuf, dec->fence->offset); > + PUSH_DATA (screen->pushbuf, dec->fence->offset); > + PUSH_DATA (screen->pushbuf, 1); > + PUSH_DATA (screen->pushbuf, 0xf010); > + PUSH_KICK (screen->pushbuf); > + > + PUSH_SPACE(bsp_push, 2 + 12 + 2 + 4 + 3); > + > + BEGIN_NV04(bsp_push, SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); > + PUSH_DATA (bsp_push, dec->bsp->handle); > + > + BEGIN_NV04(bsp_push, SUBC_BSP(0x180), 11); > + for (i = 0; i < 11; i++) Any idea where 11 comes from ? Is it related to some other parameter ? > + PUSH_DATA(bsp_push, nv04_data.vram); > + BEGIN_NV04(bsp_push, SUBC_BSP(0x1b8), 1); > + PUSH_DATA (bsp_push, nv04_data.vram); > + ... > +struct pipe_video_buffer * > +nv84_video_buffer_create(struct pipe_context *pipe, > + const struct pipe_video_buffer *template) > +{ ... > + buffer->base.buffer_format = template->buffer_format; > + buffer->base.context = pipe; > + buffer->base.destroy = nv84_video_buffer_destroy; > + buffer->base.chroma_format = template->chroma_format; > + buffer->base.width = template->width; > + buffer->base.height = template->height; > + buffer->base.get_sampler_view_planes = > nv84_video_buffer_sampler_view_planes; > + buffer->base.get_sampler_view_components = > nv84_video_buffer_sampler_view_components; > + buffer->base.get_surfaces = nv84_video_buffer_surfaces; > + buffer->base.interlaced = true; By storing the number of planes, will be able to demagic some constants later on buffer->num_planes = 2; > + > + memset(&templ, 0, sizeof(templ)); > + templ.target = PIPE_TEXTURE_2D_ARRAY; > + templ.depth0 = 1; > + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; > + templ.format = PIPE_FORMAT_R8_UNORM; > + templ.width0 = align(template->width, 2); > + templ.height0 = align(template->height, 4) / 2; > + templ.flags = NV50_RESOURCE_FLAG_VIDEO; > + templ.array_size = 2; > + > + cfg.nv50.tile_mode = 0x20; > + cfg.nv50.memtype = 0x70; > + > + buffer->resources[0] = pipe->screen->resource_create(pipe->screen, > &templ); > + if (!buffer->resources[0]) > + goto error; > + > + templ.format = PIPE_FORMAT_R8G8_UNORM; > + templ.width0 /= 2; > + templ.height0 /= 2; > + buffer->resources[1] = pipe->screen->resource_create(pipe->screen, > &templ); > + if (!buffer->resources[1]) > + goto error; I believe that the nvc0 version of the code is easier to read (bikeshed) for (i = 1; i < buffer->num_planes; ++i) { buffer->resources[i] = pipe->screen->resource_create(pipe->screen, &templ); if (!buffer->resources[i]) goto error; } > + > + mt0 = nv50_miptree(buffer->resources[0]); > + mt1 = nv50_miptree(buffer->resources[1]); > + > + bo_size = mt0->total_size + mt1->total_size; > + if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, > 0, > + bo_size, &cfg, &buffer->interlaced)) > + goto error; > + /* XXX Change reference frame management so that this is only allocated in > + * the decoder when necessary. */ > + if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, > 0, > + bo_size, &cfg, &buffer->full)) > + goto error; > + > + mt0->base.bo = buffer->interlaced; > + mt0->base.domain = NOUVEAU_BO_VRAM; > + mt0->base.offset = 0; > + mt0->base.address = buffer->interlaced->offset; IMHO this looks a bit easier to grasp mt0->base.address = buffer->interlaced->offset + mt0->base.offset; > + nouveau_bo_ref(buffer->interlaced, &empty); > + > + mt1->base.bo = buffer->interlaced; > + mt1->base.domain = NOUVEAU_BO_VRAM; > + mt1->base.offset = mt0->layer_stride * 2; > + mt1->base.address = buffer->interlaced->offset + mt0->layer_stride * 2; Similar mt1->base.address = buffer->interlaced->offset + mt1->base.offset; > + nouveau_bo_ref(buffer->interlaced, &empty); > + > + memset(&sv_templ, 0, sizeof(sv_templ)); > + for (component = 0, i = 0; i < 2; ++i ) { for (component = 0, i = 0; i < buffer->num_planes; ++i ) { > + struct pipe_resource *res = buffer->resources[i]; > + unsigned nr_components = util_format_get_nr_components(res->format); > + > + u_sampler_view_default_template(&sv_templ, res, res->format); > + buffer->sampler_view_planes[i] = pipe->create_sampler_view(pipe, res, > &sv_templ); > + if (!buffer->sampler_view_planes[i]) > + goto error; > + > + for (j = 0; j < nr_components; ++j, ++component) { > + sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = > PIPE_SWIZZLE_RED + j; > + sv_templ.swizzle_a = PIPE_SWIZZLE_ONE; > + > + buffer->sampler_view_components[component] = > pipe->create_sampler_view(pipe, res, &sv_templ); > + if (!buffer->sampler_view_components[component]) > + goto error; > + } > + } > + > + memset(&surf_templ, 0, sizeof(surf_templ)); > + for (j = 0; j < 2; ++j) { for (j = 0; j < buffer->num_planes; ++j) { > + surf_templ.format = buffer->resources[j]->format; > + surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 0; > + buffer->surfaces[j * 2] = pipe->create_surface(pipe, > buffer->resources[j], &surf_templ); > + if (!buffer->surfaces[j * 2]) > + goto error; > + > + surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 1; > + buffer->surfaces[j * 2 + 1] = pipe->create_surface(pipe, > buffer->resources[j], &surf_templ); > + if (!buffer->surfaces[j * 2 + 1]) > + goto error; > + } > + > + return &buffer->base; > + > +error: > + nv84_video_buffer_destroy(&buffer->base); > + return NULL; > +} > + > +int > +nv84_screen_get_video_param(struct pipe_screen *pscreen, > + enum pipe_video_profile profile, > + enum pipe_video_cap param) > +{ > + switch (param) { > + case PIPE_VIDEO_CAP_SUPPORTED: > + return u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG4_AVC || > + u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12; switch (u_reduce_video_profile(profile)) { case PIPE_VIDEO_CODEC_MPEG12: case PIPE_VIDEO_CODEC_MPEG4_AVC: return true; case PIPE_VIDEO_CODEC_VC1: /* TODO: Hardware is capable of IDCT acceleration for VC1*/ case PIPE_VIDEO_CODEC_MPEG4: default: return false; } > + case PIPE_VIDEO_CAP_NPOT_TEXTURES: > + return 1; > + case PIPE_VIDEO_CAP_MAX_WIDTH: > + case PIPE_VIDEO_CAP_MAX_HEIGHT: > + return 2048; > + case PIPE_VIDEO_CAP_PREFERED_FORMAT: > + return PIPE_FORMAT_NV12; > + case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: > + case PIPE_VIDEO_CAP_PREFERS_INTERLACED: > + return true; > + case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: > + return false; > + default: > + debug_printf("unknown video param: %d\n", param); > + return 0; > + } > +} > + > +boolean > +nv84_screen_video_supported(struct pipe_screen *screen, > + enum pipe_format format, > + enum pipe_video_profile profile) > +{ > + return format == PIPE_FORMAT_NV12; Will this work when we have XVMC_VL set ? > +} > diff --git a/src/gallium/drivers/nv50/nv84_video.h > b/src/gallium/drivers/nv50/nv84_video.h > new file mode 100644 > index 0000000..4ff8cf3 > --- /dev/null > +++ b/src/gallium/drivers/nv50/nv84_video.h ... > +struct nv84_video_buffer { > + struct pipe_video_buffer base; unsigned num_planes; > + struct pipe_resource *resources[VL_NUM_COMPONENTS]; > + struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS]; > + struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS]; > + struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2]; > + > + struct nouveau_bo *interlaced, *full; > + int mvidx; > + unsigned frame_num, frame_num_max; > +}; > + Looking at the params associated with each video engine, I was wondering about compacting it into a struct (names chosen are the first thing that came to mind) struct nv84_decoder_eng { struct nouveau_object *obj; struct nouveau_object *channel; struct nouveau_pushbuf *pushbuf; struct nouveau_bufctx *bufctx; struct nouveau_bo *fw; struct nouveau_bo *data; } and then having an enum for the different engine types enum nv84_decoder_eng_type { BSP = 0, VP }; #define NV84_DECODER_ENG_NUM VP + 1 > +struct nv84_decoder { > + struct pipe_video_decoder base; > + struct nouveau_client *client; > + struct nouveau_object *bsp_channel, *vp_channel, *bsp, *vp; > + struct nouveau_pushbuf *bsp_pushbuf, *vp_pushbuf; > + struct nouveau_bufctx *bsp_bufctx, *vp_bufctx; Then the struct will look a bit cleaner struct nv84_decoder { struct pipe_video_decoder base; struct nouveau_client *client; struct nv84_decoder_eng engs[NV84_DECODER_ENG_NUM]; > + > + struct nouveau_bo *bsp_fw, *bsp_data; > + struct nouveau_bo *vp_fw, *vp_data; > + struct nouveau_bo *mbring, *vpring; > + > + /* > + * states: > + * 0: init > + * 1: vpring/mbring cleared, bsp is ready > + * 2: bsp is done, vp is ready > + * and then vp it back to 1 > + */ > + struct nouveau_bo *fence; > + > + struct nouveau_bo *bitstream; > + struct nouveau_bo *vp_params; > + > + size_t vp_fw2_offset; > + > + unsigned frame_mbs, frame_size; > + /* VPRING layout: > + RESIDUAL > + CTRL > + DEBLOCK > + 0x1000 > + */ > + unsigned vpring_deblock, vpring_residual, vpring_ctrl; > + > + > + struct vl_mpg12_bs *mpeg12_bs; > + > + struct nouveau_bo *mpeg12_bo; > + void *mpeg12_mb_info; > + uint16_t *mpeg12_data; > + const int *zscan; > + uint8_t mpeg12_intra_matrix[64]; > + uint8_t mpeg12_non_intra_matrix[64]; > +}; > + ... > +static INLINE uint32_t mb(uint32_t coord) > +{ > + return (coord + 0xf)>>4; > +} > + > +static INLINE uint32_t mb_half(uint32_t coord) > +{ > + return (coord + 0x1f)>>5; > +} How about moving these in nouveau_video.h ? (and removing the duplicate copy from nvc0_video.h) Might be better as a follow on patch ... > diff --git a/src/gallium/drivers/nv50/nv84_video_vp.c > b/src/gallium/drivers/nv50/nv84_video_vp.c > new file mode 100644 > index 0000000..60c0848 > --- /dev/null > +++ b/src/gallium/drivers/nv50/nv84_video_vp.c > @@ -0,0 +1,521 @@ > +/* > + * Copyright 2013 Ilia Mirkin > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +#include <immintrin.h> Wrap this include in #ifdef __SSE4_2__ #endif considering its definitions are used in a similar block ... > +void > +nv84_decoder_vp_h264(struct nv84_decoder *dec, > + struct pipe_h264_picture_desc *desc, > + struct nv84_video_buffer *dest) > +{ ... > + for (i = 0; i < 2; i++) { for (i = 0; i < dest->num_planes; i++) { > + struct nv50_miptree *mt = nv50_miptree(dest->resources[i]); > + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; > + } > + > + PUSH_KICK (push); > +} > + ... > +void > +nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec, > + struct pipe_mpeg12_picture_desc *desc, > + const struct pipe_mpeg12_macroblock *macrob) > +{ ... > +#ifdef __SSE4_2__ IMHO this may produce non portable binaries in case of aggressive mtune/march flags. I'm not objecting against it just pointing out > +void > +nv84_decoder_vp_mpeg12(struct nv84_decoder *dec, > + struct pipe_mpeg12_picture_desc *desc, > + struct nv84_video_buffer *dest) > +{ ... > + for (i = 0; i < 2; i++) { for (i = 0; i < dest->num_planes; i++) { Cheers Emil > + struct nv50_miptree *mt = nv50_miptree(dest->resources[i]); > + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; > + } > + PUSH_KICK (push); > +} > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev