This shifts the mid-point (after horizontal, before vertical) block state of the transform to match the C code. This forces shifting 8 vectors of 4 elements instead of 4 vectors of 8 elements and is thus slight slower. --- libavcodec/riscv/vc1dsp_rvv.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index 4b7ab33307..7e1fb84b0c 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -257,6 +257,9 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x vsetivli zero, 4, e16, mf2, ta, ma vlseg8e16.v v0, (a2) jal t0, ff_vc1_inv_trans_8_rvv + .irp n,0,1,2,3,4,5,6,7 + vssra.vi v\n, v\n, 3 + .endr vsseg8e16.v v0, (a2) addi a3, a2, 1 * 8 * 2 vsetivli zero, 8, e16, m1, ta, ma @@ -266,10 +269,6 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x addi a5, a2, 3 * 8 * 2 vle16.v v2, (a4) vle16.v v3, (a5) - .irp n,0,1,2,3 - # shift 4 vectors of 8 elems after transpose instead of 8 of 4 - vssra.vi v\n, v\n, 3 - .endr li t1, 7 jal t0, ff_vc1_inv_trans_4_rvv add a3, a1, a0 -- 2.45.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".