vlc | branch: master | Martin Storsjö <[email protected]> | Wed Feb 7 11:25:37 2018 +0200| [4a9b6e51932b17763d3d53f42b9486af3e7f7297] | committer: Martin Storsjö
arm: Allow building assembly in thumb mode Windows on arm is thumb2 only. Add the necessary "it" instructions before conditionally executed instructions (which doesn't emit any extra instructions when not building in thumb mode). The number of "it" instructions could be reduced in some places by reordering the instructions, but keeping them as they were originally to avoid any impact on existing targets. Remove redundant .arm directives; the assembler normally starts out in that mode anyway, and for windows we shouldn't override the default mode. > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=4a9b6e51932b17763d3d53f42b9486af3e7f7297 --- modules/arm_neon/amplify.S | 2 +- modules/arm_neon/deinterleave_chroma.S | 1 + modules/arm_neon/i420_rgb.S | 3 +++ modules/arm_neon/i420_rv16.S | 3 +++ modules/arm_neon/i420_yuyv.S | 4 ++++ modules/arm_neon/i422_yuyv.S | 2 ++ modules/arm_neon/nv12_rgb.S | 3 +++ modules/arm_neon/nv21_rgb.S | 3 +++ modules/arm_neon/yuyv_i422.S | 2 ++ modules/video_filter/deinterlace/merge_arm.S | 5 ++++- 10 files changed, 26 insertions(+), 2 deletions(-) diff --git a/modules/arm_neon/amplify.S b/modules/arm_neon/amplify.S index 9e655afe16..59c945445b 100644 --- a/modules/arm_neon/amplify.S +++ b/modules/arm_neon/amplify.S @@ -21,7 +21,6 @@ #include "asm.S" .syntax unified - .arm #if HAVE_AS_FPU_DIRECTIVE .fpu neon #endif @@ -33,6 +32,7 @@ .align 2 function amplify_float_arm_neon cmp SIZE, #0 + it eq bxeq lr #ifdef __ARM_PCS vmov s0, r3 @ softfp diff --git a/modules/arm_neon/deinterleave_chroma.S b/modules/arm_neon/deinterleave_chroma.S index 9cd01c7aed..7c0723e2f6 100644 --- a/modules/arm_neon/deinterleave_chroma.S +++ b/modules/arm_neon/deinterleave_chroma.S @@ -52,6 +52,7 @@ function deinterleave_chroma_neon sub IPAD, IPITCH, WIDTH, lsl #1 sub OPAD, OPITCH, WIDTH 1: + ite gt movsgt COUNT, WIDTH pople {r4-r6,pc} 2: diff --git a/modules/arm_neon/i420_rgb.S b/modules/arm_neon/i420_rgb.S index 54fb38746e..6624c4e604 100644 --- a/modules/arm_neon/i420_rgb.S +++ b/modules/arm_neon/i420_rgb.S @@ -95,6 +95,7 @@ function i420_rgb_neon /* round the width to be a multiple of 16 */ ands OPAD, WIDTH, #15 sub WIDTH, WIDTH, OPAD + it ne addne WIDTH, WIDTH, #16 /* init constants (scale value by 64) */ @@ -115,10 +116,12 @@ function i420_rgb_neon sub YPAD, YPITCH, WIDTH loop_row: + it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH /* exit if all rows have been processed */ + itt le vpople {q4-q7} pople {r4-r8,r10-r11,pc} diff --git a/modules/arm_neon/i420_rv16.S b/modules/arm_neon/i420_rv16.S index 15d1e7b7bd..a3bbae2ed1 100644 --- a/modules/arm_neon/i420_rv16.S +++ b/modules/arm_neon/i420_rv16.S @@ -98,6 +98,7 @@ function i420_rv16_neon /* round the width to be a multiple of 16 */ ands OPAD, WIDTH, #15 sub WIDTH, WIDTH, OPAD + it ne addne WIDTH, WIDTH, #16 /* init constants (scale value by 64) */ @@ -117,10 +118,12 @@ function i420_rv16_neon sub YPAD, YPITCH, WIDTH loop_row: + it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH /* exit if all rows have been processed */ + itt le vpople {q4-q7} pople {r4-r8,r10-r11,pc} diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S index 29668e438e..22355e0b61 100644 --- a/modules/arm_neon/i420_yuyv.S +++ b/modules/arm_neon/i420_yuyv.S @@ -49,9 +49,11 @@ function i420_yuyv_neon sub OPAD, OPITCH, WIDTH, lsl #1 sub YPAD, YPITCH, WIDTH 1: + it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH + it le pople {r4-r8,r10-r11,pc} 2: pld [U, #64] @@ -86,9 +88,11 @@ function i420_uyvy_neon sub OPAD, OPITCH, WIDTH, lsl #1 sub YPAD, YPITCH, WIDTH 1: + it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH + it le pople {r4-r8,r10-r11,pc} 2: pld [U, #64] diff --git a/modules/arm_neon/i422_yuyv.S b/modules/arm_neon/i422_yuyv.S index 9119839ea5..9a5b8fc4b8 100644 --- a/modules/arm_neon/i422_yuyv.S +++ b/modules/arm_neon/i422_yuyv.S @@ -45,6 +45,7 @@ function i422_yuyv_neon sub OPAD, OPAD, WIDTH, lsl #1 sub YPAD, YPAD, WIDTH 1: + ite gt movsgt COUNT, WIDTH pople {r4-r6,pc} 2: @@ -76,6 +77,7 @@ function i422_uyvy_neon sub OPAD, OPAD, WIDTH, lsl #1 sub YPAD, YPAD, WIDTH 1: + ite gt movsgt COUNT, WIDTH pople {r4-r6,pc} 2: diff --git a/modules/arm_neon/nv12_rgb.S b/modules/arm_neon/nv12_rgb.S index 1bb924fc2b..ceef76cbef 100644 --- a/modules/arm_neon/nv12_rgb.S +++ b/modules/arm_neon/nv12_rgb.S @@ -91,6 +91,7 @@ function nv12_rgb_neon /* round the width to be a multiple of 16 */ ands OPAD, WIDTH, #15 sub WIDTH, WIDTH, OPAD + it ne addne WIDTH, WIDTH, #16 /* init constants (scale value by 64) */ @@ -111,10 +112,12 @@ function nv12_rgb_neon sub YPAD, YPITCH, WIDTH loop_row: + it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH /* exit if all rows have been processed */ + itt le vpople {q4-q7} pople {r4-r8,r10-r11,pc} diff --git a/modules/arm_neon/nv21_rgb.S b/modules/arm_neon/nv21_rgb.S index f775b5a6ac..0d75b9f7f9 100644 --- a/modules/arm_neon/nv21_rgb.S +++ b/modules/arm_neon/nv21_rgb.S @@ -91,6 +91,7 @@ function nv21_rgb_neon /* round the width to be a multiple of 16 */ ands OPAD, WIDTH, #15 sub WIDTH, WIDTH, OPAD + it ne addne WIDTH, WIDTH, #16 /* init constants (scale value by 64) */ @@ -111,10 +112,12 @@ function nv21_rgb_neon sub YPAD, YPITCH, WIDTH loop_row: + it gt movsgt COUNT, WIDTH add O2, O1, OPITCH add Y2, Y1, YPITCH /* exit if all rows have been processed */ + itt le vpople {q4-q7} pople {r4-r8,r10-r11,pc} diff --git a/modules/arm_neon/yuyv_i422.S b/modules/arm_neon/yuyv_i422.S index 637effe9bc..62d826c838 100644 --- a/modules/arm_neon/yuyv_i422.S +++ b/modules/arm_neon/yuyv_i422.S @@ -45,6 +45,7 @@ function yuyv_i422_neon sub YPAD, YPAD, WIDTH sub IPAD, IPAD, WIDTH, lsl #1 1: + ite gt movsgt COUNT, WIDTH pople {r4-r6,pc} 2: @@ -74,6 +75,7 @@ function uyvy_i422_neon sub YPAD, YPAD, WIDTH sub IPAD, IPAD, WIDTH, lsl #1 1: + ite gt movsgt COUNT, WIDTH pople {r4-r6,pc} 2: diff --git a/modules/video_filter/deinterlace/merge_arm.S b/modules/video_filter/deinterlace/merge_arm.S index d3f32c5ca6..4b211aeb31 100644 --- a/modules/video_filter/deinterlace/merge_arm.S +++ b/modules/video_filter/deinterlace/merge_arm.S @@ -21,7 +21,6 @@ #include "../arm_neon/asm.S" .syntax unified - .arm #if HAVE_AS_ARCH_DIRECTIVE .arch armv6 #endif @@ -67,6 +66,7 @@ function merge8_arm_neon vst1.u8 {q0-q1}, [DEST,:128]! 3: cmp SIZE, #16 + it lo bxlo lr vld1.u8 {q0}, [SRC1,:128]! sub SIZE, SIZE, #16 @@ -106,6 +106,7 @@ function merge16_arm_neon vst1.u16 {q0-q1}, [DEST,:128]! 3: cmp SIZE, #16 + it lo bxlo lr vld1.u16 {q0}, [SRC1,:128]! sub SIZE, SIZE, #16 @@ -131,6 +132,7 @@ function merge8_armv6 stm DEST!, {r4-r5} uhadd8 r7, r7, lr stm DEST!, {r6-r7} + it eq popeq {r4-r9,pc} b 1b @@ -151,5 +153,6 @@ function merge16_armv6 stm DEST!, {r4-r5} uhadd16 r7, r7, lr stm DEST!, {r6-r7} + it eq popeq {r4-r9,pc} b 1b _______________________________________________ vlc-commits mailing list [email protected] https://mailman.videolan.org/listinfo/vlc-commits
