On Tue, 25 Apr 2017, Martin Storsjö wrote:

On Tue, 25 Apr 2017, Alexandra Hájková wrote:

---
libavcodec/arm/hevc_idct.S | 40
+++++++++++++++++++++++++++------------
libavcodec/arm/hevcdsp_init_arm.c |  9 +++++++++
2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/libavcodec/arm/hevc_idct.S b/libavcodec/arm/hevc_idct.S
index ceded7a..41b1b29 100644
--- a/libavcodec/arm/hevc_idct.S
+++ b/libavcodec/arm/hevc_idct.S
@@ -30,26 +30,29 @@ const trans, align=4
        .short 57, 43, 25, 9
endconst

-function ff_hevc_idct_4x4_dc_8_neon, export=1
+.macro idct_4x4_dc bitdepth
+function ff_hevc_idct_4x4_dc_\bitdepth\()_neon, export=1
        ldrsh           r1, [r0]
-        ldr             r2, =0x20
+        ldr             r2, =(1 << (13 - \bitdepth))
        add             r1, #1
        asr             r1, #1
        add             r1, r2
-        asr             r1, #6
+        asr             r1, #(14 - \bitdepth)
        vdup.16         q0, r1
        vdup.16         q1, r1
        vst1.16         {q0, q1}, [r0, :128]
        bx              lr
endfunc
+.endm

-function ff_hevc_idct_8x8_dc_8_neon, export=1
+.macro idct_8x8_dc bitdepth
+function ff_hevc_idct_8x8_dc_\bitdepth\()_neon, export=1
        ldrsh           r1, [r0]
-        ldr             r2, =0x20
+        ldr             r2, =(1 << (13 - \bitdepth))
        add             r1, #1
        asr             r1, #1
        add             r1, r2
-        asr             r1, #6
+        asr             r1, #(14 - \bitdepth)
        vdup.16         q8, r1
        vdup.16         q9, r1
        vmov.16         q10, q8
@@ -61,14 +64,16 @@ function ff_hevc_idct_8x8_dc_8_neon, export=1
        vstm            r0, {q8-q15}
        bx              lr
endfunc
+.endm

-function ff_hevc_idct_16x16_dc_8_neon, export=1
+.macro idct_16x16_dc bitdepth
+function ff_hevc_idct_16x16_dc_\bitdepth\()_neon, export=1
        ldrsh           r1, [r0]
-        ldr             r2, =0x20
+        ldr             r2, =(1 << (13 - \bitdepth))
        add             r1, #1
        asr             r1, #1
        add             r1, r2
-        asr             r1, #6
+        asr             r1, #(14 - \bitdepth)
        vdup.16         q8, r1
        vdup.16         q9, r1
        vmov.16         q10, q8
@@ -83,14 +88,16 @@ function ff_hevc_idct_16x16_dc_8_neon, export=1
        vstm            r0, {q8-q15}
        bx              lr
endfunc
+.endm

-function ff_hevc_idct_32x32_dc_8_neon, export=1
+.macro idct_32x32_dc bitdepth
+function ff_hevc_idct_32x32_dc_\bitdepth\()_neon, export=1
        ldrsh           r1, [r0]
-        ldr             r2, =0x20
+        ldr             r2, =(1 << (13 - \bitdepth))
        add             r1, #1
        asr             r1, #1
        add             r1, r2
-        asr             r1, #6
+        asr             r1, #(14 - \bitdepth)
        mov             r3, #16
        vdup.16         q8, r1
        vdup.16         q9, r1
@@ -105,6 +112,7 @@ function ff_hevc_idct_32x32_dc_8_neon, export=1
        bne             1b
        bx              lr
endfunc
+.endm

.macro sum_sub out, in, c, op
  .ifc \op, +
@@ -496,8 +504,16 @@ tr_16x4 secondpass_10, 20 - 10
.ltorg

idct_4x4 8
+idct_4x4_dc 8
idct_4x4 10
+idct_4x4_dc 10
idct_8x8 8
+idct_8x8_dc 8
idct_8x8 10
+idct_8x8_dc 10
idct_16x16 8
+idct_16x16_dc 8
idct_16x16 10
+idct_16x16_dc 10
+idct_32x32_dc 8
+idct_32x32_dc 10

Mostly LGTM.

This section would probably look better if the dc ones were grouped together. I'll do that change, test in my configs, and push.

Actually, this version looks better as is - I won't do that change.

// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to