Here is an update to dav1d 1.2.1.
Changes for 1.2.1 'Arctic Peregrine Falcon': ------------------------------------------- 1.2.1 is a small release of dav1d, adding more SIMD and fixes - Fix a threading race on task_thread.init_done - NEON z2 8bpc and high bit-depth optimizations - SSSE3 z2 high bit-depth optimziations - Fix a desynced luma/chroma planes issue with Film Grain - Reduce memory consumption - Improve dav1d_parse_sequence_header() speed - OBU: Improve header parsing and fix potential overflows - OBU: Improve ITU-T T.35 parsing speed - Misc buildsystems, CI and headers fixes Index: Makefile =================================================================== RCS file: /home/cvs/ports/multimedia/dav1d/Makefile,v retrieving revision 1.33 diff -u -p -u -p -r1.33 Makefile --- Makefile 19 May 2023 11:07:30 -0000 1.33 +++ Makefile 6 Jun 2023 20:03:17 -0000 @@ -1,6 +1,6 @@ COMMENT= small and fast AV1 decoder -VER= 1.2.0 +VER= 1.2.1 DISTNAME= dav1d-${VER} CATEGORIES= multimedia MASTER_SITES= https://downloads.videolan.org/pub/videolan/dav1d/${VER}/ Index: distinfo =================================================================== RCS file: /home/cvs/ports/multimedia/dav1d/distinfo,v retrieving revision 1.17 diff -u -p -u -p -r1.17 distinfo --- distinfo 19 May 2023 11:07:30 -0000 1.17 +++ distinfo 6 Jun 2023 20:03:24 -0000 @@ -1,2 +1,2 @@ -SHA256 (dav1d-1.2.0.tar.xz) = Ixvti8G7KKQdiNprTCwRjehLkuXx1nyv+ht/garqjG4= -SIZE (dav1d-1.2.0.tar.xz) = 866120 +SHA256 (dav1d-1.2.1.tar.xz) = TjPrYexUx2ihbaDPj6CSi0xFk/X4BKPIh9SiHDGDQLI= +SIZE (dav1d-1.2.1.tar.xz) = 873008 Index: patches/patch-src_arm_64_ipred16_S =================================================================== RCS file: /home/cvs/ports/multimedia/dav1d/patches/patch-src_arm_64_ipred16_S,v retrieving revision 1.2 diff -u -p -u -p -r1.2 patch-src_arm_64_ipred16_S --- patches/patch-src_arm_64_ipred16_S 19 May 2023 11:07:30 -0000 1.2 +++ patches/patch-src_arm_64_ipred16_S 6 Jun 2023 20:32:43 -0000 @@ -387,7 +387,7 @@ Index: src/arm/64/ipred16.S endfunc const padding_mask_buf -@@ -2445,13 +2463,13 @@ function ipred_filter_\bpc\()bpc_neon +@@ -3880,13 +3898,13 @@ function ipred_filter_\bpc\()bpc_neon add x6, x6, w5, uxtw ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [x6], #32 clz w9, w3 @@ -404,7 +404,7 @@ Index: src/arm/64/ipred16.S sxtl v18.8h, v18.8b sxtl v19.8h, v19.8b add x6, x0, x1 -@@ -2725,11 +2743,13 @@ function ipred_filter_\bpc\()bpc_neon +@@ -4160,11 +4178,13 @@ function ipred_filter_\bpc\()bpc_neon 9: ret @@ -422,7 +422,7 @@ Index: src/arm/64/ipred16.S endfunc .endm -@@ -2749,11 +2769,11 @@ endfunc +@@ -4184,11 +4204,11 @@ endfunc function pal_pred_16bpc_neon, export=1 ld1 {v30.8h}, [x2] clz w9, w4 @@ -437,7 +437,7 @@ Index: src/arm/64/ipred16.S br x6 40: AARCH64_VALID_JUMP_TARGET -@@ -2922,12 +2942,14 @@ function pal_pred_16bpc_neon, export=1 +@@ -4357,12 +4377,14 @@ function pal_pred_16bpc_neon, export=1 b.gt 64b ret @@ -457,7 +457,7 @@ Index: src/arm/64/ipred16.S endfunc // void ipred_cfl_128_16bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -2938,12 +2960,12 @@ endfunc +@@ -4373,12 +4395,12 @@ endfunc function ipred_cfl_128_16bpc_neon, export=1 dup v31.8h, w7 // bitdepth_max clz w9, w3 @@ -473,7 +473,7 @@ Index: src/arm/64/ipred16.S add x6, x0, x1 lsl x1, x1, #1 movi v30.8h, #0 -@@ -3075,12 +3097,14 @@ L(ipred_cfl_splat_w16): +@@ -4510,12 +4532,14 @@ L(ipred_cfl_splat_w16): b.gt 1b ret @@ -492,7 +492,7 @@ Index: src/arm/64/ipred16.S endfunc // void ipred_cfl_top_16bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -3091,12 +3115,12 @@ endfunc +@@ -4526,12 +4550,12 @@ endfunc function ipred_cfl_top_16bpc_neon, export=1 dup v31.8h, w7 // bitdepth_max clz w9, w3 @@ -508,7 +508,7 @@ Index: src/arm/64/ipred16.S add x6, x0, x1 lsl x1, x1, #1 movi v30.8h, #0 -@@ -3134,11 +3158,13 @@ function ipred_cfl_top_16bpc_neon, export=1 +@@ -4569,11 +4593,13 @@ function ipred_cfl_top_16bpc_neon, export=1 dup v0.8h, v0.h[0] b L(ipred_cfl_splat_w16) @@ -526,7 +526,7 @@ Index: src/arm/64/ipred16.S endfunc // void ipred_cfl_left_16bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -3151,15 +3177,15 @@ function ipred_cfl_left_16bpc_neon, export=1 +@@ -4586,15 +4612,15 @@ function ipred_cfl_left_16bpc_neon, export=1 sub x2, x2, w4, uxtw #1 clz w9, w3 clz w8, w4 @@ -548,7 +548,7 @@ Index: src/arm/64/ipred16.S add x6, x0, x1 lsl x1, x1, #1 movi v30.8h, #0 -@@ -3201,11 +3227,13 @@ L(ipred_cfl_left_h32): +@@ -4636,11 +4662,13 @@ L(ipred_cfl_left_h32): dup v0.8h, v0.h[0] br x9 @@ -566,7 +566,7 @@ Index: src/arm/64/ipred16.S endfunc // void ipred_cfl_16bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -3221,16 +3249,15 @@ function ipred_cfl_16bpc_neon, export=1 +@@ -4656,16 +4684,15 @@ function ipred_cfl_16bpc_neon, export=1 clz w9, w3 clz w6, w4 dup v16.4s, w8 // width + height @@ -587,7 +587,7 @@ Index: src/arm/64/ipred16.S ushr v16.4s, v16.4s, #1 // (width + height) >> 1 dup v17.4s, w8 // -ctz(width + height) add x6, x0, x1 -@@ -3354,15 +3381,17 @@ L(ipred_cfl_w32): +@@ -4789,15 +4816,17 @@ L(ipred_cfl_w32): dup v0.8h, v0.h[0] b L(ipred_cfl_splat_w16) @@ -613,7 +613,7 @@ Index: src/arm/64/ipred16.S endfunc // void cfl_ac_420_16bpc_neon(int16_t *const ac, const pixel *const ypx, -@@ -3371,14 +3400,14 @@ endfunc +@@ -4806,14 +4835,14 @@ endfunc function ipred_cfl_ac_420_16bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 @@ -631,7 +631,7 @@ Index: src/arm/64/ipred16.S sub w8, w6, w4 // height - h_pad rbit w9, w5 // rbit(width) rbit w10, w6 // rbit(height) -@@ -3510,9 +3539,9 @@ L(ipred_cfl_ac_420_w8_hpad): +@@ -4945,9 +4974,9 @@ L(ipred_cfl_ac_420_w8_hpad): L(ipred_cfl_ac_420_w16): AARCH64_VALID_JUMP_TARGET @@ -644,7 +644,7 @@ Index: src/arm/64/ipred16.S br x7 L(ipred_cfl_ac_420_w16_wpad0): -@@ -3689,17 +3718,19 @@ L(ipred_cfl_ac_420_w16_hpad): +@@ -5124,17 +5153,19 @@ L(ipred_cfl_ac_420_w16_hpad): lsl w6, w6, #2 b L(ipred_cfl_ac_420_w4_calc_subtract_dc) @@ -672,7 +672,7 @@ Index: src/arm/64/ipred16.S endfunc // void cfl_ac_422_16bpc_neon(int16_t *const ac, const pixel *const ypx, -@@ -3708,14 +3739,14 @@ endfunc +@@ -5143,14 +5174,14 @@ endfunc function ipred_cfl_ac_422_16bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 @@ -690,7 +690,7 @@ Index: src/arm/64/ipred16.S sub w8, w6, w4 // height - h_pad rbit w9, w5 // rbit(width) rbit w10, w6 // rbit(height) -@@ -3816,9 +3847,9 @@ L(ipred_cfl_ac_422_w8_wpad): +@@ -5251,9 +5282,9 @@ L(ipred_cfl_ac_422_w8_wpad): L(ipred_cfl_ac_422_w16): AARCH64_VALID_JUMP_TARGET @@ -703,7 +703,7 @@ Index: src/arm/64/ipred16.S br x7 L(ipred_cfl_ac_422_w16_wpad0): -@@ -3937,17 +3968,19 @@ L(ipred_cfl_ac_422_w16_wpad3): +@@ -5372,17 +5403,19 @@ L(ipred_cfl_ac_422_w16_wpad3): mov v1.16b, v3.16b b L(ipred_cfl_ac_420_w16_hpad) @@ -731,7 +731,7 @@ Index: src/arm/64/ipred16.S endfunc // void cfl_ac_444_16bpc_neon(int16_t *const ac, const pixel *const ypx, -@@ -3956,14 +3989,14 @@ endfunc +@@ -5391,14 +5424,14 @@ endfunc function ipred_cfl_ac_444_16bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 @@ -749,7 +749,7 @@ Index: src/arm/64/ipred16.S sub w8, w6, w4 // height - h_pad rbit w9, w5 // rbit(width) rbit w10, w6 // rbit(height) -@@ -4072,10 +4105,11 @@ L(ipred_cfl_ac_444_w16_wpad): +@@ -5507,10 +5540,11 @@ L(ipred_cfl_ac_444_w16_wpad): L(ipred_cfl_ac_444_w32): AARCH64_VALID_JUMP_TARGET @@ -764,7 +764,7 @@ Index: src/arm/64/ipred16.S br x7 L(ipred_cfl_ac_444_w32_wpad0): -@@ -4190,15 +4224,17 @@ L(ipred_cfl_ac_444_w32_hpad): +@@ -5625,15 +5659,17 @@ L(ipred_cfl_ac_444_w32_hpad): lsl w6, w6, #3 b L(ipred_cfl_ac_420_w4_calc_subtract_dc) Index: patches/patch-src_arm_64_ipred_S =================================================================== RCS file: /home/cvs/ports/multimedia/dav1d/patches/patch-src_arm_64_ipred_S,v retrieving revision 1.2 diff -u -p -u -p -r1.2 patch-src_arm_64_ipred_S --- patches/patch-src_arm_64_ipred_S 19 May 2023 11:07:30 -0000 1.2 +++ patches/patch-src_arm_64_ipred_S 6 Jun 2023 20:32:55 -0000 @@ -387,7 +387,7 @@ Index: src/arm/64/ipred.S endfunc const padding_mask_buf -@@ -1614,11 +1632,11 @@ endfunc +@@ -1653,11 +1671,11 @@ endfunc // const int dx, const int max_base_x); function ipred_z1_fill1_8bpc_neon, export=1 clz w9, w3 @@ -402,7 +402,7 @@ Index: src/arm/64/ipred.S ld1r {v31.16b}, [x10] // padding mov w7, w5 mov w15, #64 -@@ -1777,12 +1795,14 @@ function ipred_z1_fill1_8bpc_neon, export=1 +@@ -1816,12 +1834,14 @@ function ipred_z1_fill1_8bpc_neon, export=1 mov w3, w12 b 169b @@ -422,7 +422,7 @@ Index: src/arm/64/ipred.S endfunc function ipred_z1_fill2_8bpc_neon, export=1 -@@ -1900,11 +1920,11 @@ endconst +@@ -3160,11 +3180,11 @@ endfunc function ipred_z3_fill1_8bpc_neon, export=1 cmp w6, #64 clz w9, w3 @@ -437,7 +437,7 @@ Index: src/arm/64/ipred.S movrel x11, increments ld1r {v31.16b}, [x10] // padding ld1 {v30.8h}, [x11] // increments -@@ -2243,17 +2263,20 @@ L(ipred_z3_fill1_large_h16): +@@ -3503,17 +3523,20 @@ L(ipred_z3_fill1_large_h16): 9: ret @@ -464,7 +464,7 @@ Index: src/arm/64/ipred.S b.gt L(ipred_z3_fill_padding_wide) // w3 = remaining width, w4 = constant height mov w12, w4 -@@ -2264,8 +2287,7 @@ function ipred_z3_fill_padding_neon, export=0 +@@ -3524,8 +3547,7 @@ function ipred_z3_fill_padding_neon, export=0 // power of two in the remaining width, and repeating. clz w9, w3 sub w9, w9, #25 @@ -474,7 +474,7 @@ Index: src/arm/64/ipred.S br x9 2: -@@ -2345,13 +2367,15 @@ function ipred_z3_fill_padding_neon, export=0 +@@ -3605,13 +3627,15 @@ function ipred_z3_fill_padding_neon, export=0 9: ret @@ -496,7 +496,7 @@ Index: src/arm/64/ipred.S L(ipred_z3_fill_padding_wide): // Fill a WxH rectangle with padding, with W > 16. -@@ -2506,13 +2530,13 @@ function ipred_filter_8bpc_neon, export=1 +@@ -3766,13 +3790,13 @@ function ipred_filter_8bpc_neon, export=1 add x6, x6, w5, uxtw ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [x6], #32 clz w9, w3 @@ -513,7 +513,7 @@ Index: src/arm/64/ipred.S sxtl v18.8h, v18.8b sxtl v19.8h, v19.8b add x6, x0, x1 -@@ -2653,11 +2677,13 @@ function ipred_filter_8bpc_neon, export=1 +@@ -3913,11 +3937,13 @@ function ipred_filter_8bpc_neon, export=1 9: ret @@ -531,7 +531,7 @@ Index: src/arm/64/ipred.S endfunc // void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -2666,11 +2692,11 @@ endfunc +@@ -3926,11 +3952,11 @@ endfunc function pal_pred_8bpc_neon, export=1 ld1 {v0.8h}, [x2] clz w9, w4 @@ -546,7 +546,7 @@ Index: src/arm/64/ipred.S add x2, x0, x1 lsl x1, x1, #1 br x6 -@@ -2748,12 +2774,14 @@ function pal_pred_8bpc_neon, export=1 +@@ -4008,12 +4034,14 @@ function pal_pred_8bpc_neon, export=1 b.gt 64b ret @@ -566,7 +566,7 @@ Index: src/arm/64/ipred.S endfunc // void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -2762,12 +2790,12 @@ endfunc +@@ -4022,12 +4050,12 @@ endfunc // const int16_t *ac, const int alpha); function ipred_cfl_128_8bpc_neon, export=1 clz w9, w3 @@ -582,7 +582,7 @@ Index: src/arm/64/ipred.S add x6, x0, x1 lsl x1, x1, #1 br x7 -@@ -2872,12 +2900,14 @@ L(ipred_cfl_splat_w16): +@@ -4132,12 +4160,14 @@ L(ipred_cfl_splat_w16): b.gt 1b ret @@ -601,7 +601,7 @@ Index: src/arm/64/ipred.S endfunc // void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -2886,12 +2916,12 @@ endfunc +@@ -4146,12 +4176,12 @@ endfunc // const int16_t *ac, const int alpha); function ipred_cfl_top_8bpc_neon, export=1 clz w9, w3 @@ -617,7 +617,7 @@ Index: src/arm/64/ipred.S add x6, x0, x1 lsl x1, x1, #1 br x7 -@@ -2926,11 +2956,13 @@ function ipred_cfl_top_8bpc_neon, export=1 +@@ -4186,11 +4216,13 @@ function ipred_cfl_top_8bpc_neon, export=1 dup v0.8h, v2.h[0] b L(ipred_cfl_splat_w16) @@ -635,7 +635,7 @@ Index: src/arm/64/ipred.S endfunc // void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -2941,15 +2973,15 @@ function ipred_cfl_left_8bpc_neon, export=1 +@@ -4201,15 +4233,15 @@ function ipred_cfl_left_8bpc_neon, export=1 sub x2, x2, w4, uxtw clz w9, w3 clz w8, w4 @@ -657,7 +657,7 @@ Index: src/arm/64/ipred.S add x6, x0, x1 lsl x1, x1, #1 br x7 -@@ -2988,11 +3020,13 @@ L(ipred_cfl_left_h32): +@@ -4248,11 +4280,13 @@ L(ipred_cfl_left_h32): dup v0.8h, v2.h[0] br x9 @@ -675,7 +675,7 @@ Index: src/arm/64/ipred.S endfunc // void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride, -@@ -3006,16 +3040,15 @@ function ipred_cfl_8bpc_neon, export=1 +@@ -4266,16 +4300,15 @@ function ipred_cfl_8bpc_neon, export=1 clz w9, w3 clz w6, w4 dup v16.8h, w8 // width + height @@ -696,7 +696,7 @@ Index: src/arm/64/ipred.S ushr v16.8h, v16.8h, #1 // (width + height) >> 1 dup v17.8h, w8 // -ctz(width + height) add x6, x0, x1 -@@ -3132,15 +3165,17 @@ L(ipred_cfl_w32): +@@ -4392,15 +4425,17 @@ L(ipred_cfl_w32): dup v0.8h, v0.h[0] b L(ipred_cfl_splat_w16) @@ -722,7 +722,7 @@ Index: src/arm/64/ipred.S endfunc // void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx, -@@ -3149,14 +3184,14 @@ endfunc +@@ -4409,14 +4444,14 @@ endfunc function ipred_cfl_ac_420_8bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 @@ -740,7 +740,7 @@ Index: src/arm/64/ipred.S sub w8, w6, w4 // height - h_pad rbit w9, w5 // rbit(width) rbit w10, w6 // rbit(height) -@@ -3295,9 +3330,9 @@ L(ipred_cfl_ac_420_w8_subtract_dc): +@@ -4555,9 +4590,9 @@ L(ipred_cfl_ac_420_w8_subtract_dc): L(ipred_cfl_ac_420_w16): AARCH64_VALID_JUMP_TARGET @@ -753,7 +753,7 @@ Index: src/arm/64/ipred.S br x7 L(ipred_cfl_ac_420_w16_wpad0): -@@ -3454,17 +3489,19 @@ L(ipred_cfl_ac_420_w16_hpad): +@@ -4714,17 +4749,19 @@ L(ipred_cfl_ac_420_w16_hpad): lsl w6, w6, #1 b L(ipred_cfl_ac_420_w8_calc_subtract_dc) @@ -781,7 +781,7 @@ Index: src/arm/64/ipred.S endfunc // void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx, -@@ -3473,14 +3510,14 @@ endfunc +@@ -4733,14 +4770,14 @@ endfunc function ipred_cfl_ac_422_8bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 @@ -799,7 +799,7 @@ Index: src/arm/64/ipred.S sub w8, w6, w4 // height - h_pad rbit w9, w5 // rbit(width) rbit w10, w6 // rbit(height) -@@ -3571,9 +3608,9 @@ L(ipred_cfl_ac_422_w8_wpad): +@@ -4831,9 +4868,9 @@ L(ipred_cfl_ac_422_w8_wpad): L(ipred_cfl_ac_422_w16): AARCH64_VALID_JUMP_TARGET @@ -812,7 +812,7 @@ Index: src/arm/64/ipred.S br x7 L(ipred_cfl_ac_422_w16_wpad0): -@@ -3676,17 +3713,19 @@ L(ipred_cfl_ac_422_w16_wpad3): +@@ -4936,17 +4973,19 @@ L(ipred_cfl_ac_422_w16_wpad3): mov v1.16b, v3.16b b L(ipred_cfl_ac_420_w16_hpad) @@ -840,7 +840,7 @@ Index: src/arm/64/ipred.S endfunc // void cfl_ac_444_8bpc_neon(int16_t *const ac, const pixel *const ypx, -@@ -3695,14 +3734,14 @@ endfunc +@@ -4955,14 +4994,14 @@ endfunc function ipred_cfl_ac_444_8bpc_neon, export=1 clz w8, w5 lsl w4, w4, #2 @@ -858,7 +858,7 @@ Index: src/arm/64/ipred.S sub w8, w6, w4 // height - h_pad rbit w9, w5 // rbit(width) rbit w10, w6 // rbit(height) -@@ -3823,9 +3862,10 @@ L(ipred_cfl_ac_444_w16_wpad): +@@ -5083,9 +5122,10 @@ L(ipred_cfl_ac_444_w16_wpad): L(ipred_cfl_ac_444_w32): AARCH64_VALID_JUMP_TARGET @@ -872,7 +872,7 @@ Index: src/arm/64/ipred.S br x7 L(ipred_cfl_ac_444_w32_wpad0): -@@ -3971,15 +4011,17 @@ L(ipred_cfl_ac_444_w32_hpad): +@@ -5231,15 +5271,17 @@ L(ipred_cfl_ac_444_w32_hpad): dup v4.8h, v4.h[0] b L(ipred_cfl_ac_420_w8_subtract_dc) Index: patches/patch-src_thread_task_c =================================================================== RCS file: patches/patch-src_thread_task_c diff -N patches/patch-src_thread_task_c --- patches/patch-src_thread_task_c 19 May 2023 11:07:30 -0000 1.1 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,28 +0,0 @@ -threading: Fix a race on task_thread.init_done - -Index: src/thread_task.c ---- src/thread_task.c.orig -+++ src/thread_task.c -@@ -327,6 +327,7 @@ int dav1d_task_create_tile_sbrow(Dav1dFrameContext *co - f->task_thread.pending_tasks.tail->next = &tasks[0]; - f->task_thread.pending_tasks.tail = prev_t; - atomic_store(&f->task_thread.pending_tasks.merge, 1); -+ atomic_store(&f->task_thread.init_done, 1); - pthread_mutex_unlock(&f->task_thread.pending_tasks.lock); - - return 0; -@@ -730,14 +731,11 @@ void *dav1d_worker_task(void *data) { - dav1d_decode_frame_exit(f, DAV1D_ERR(ENOMEM)); - f->n_tile_data = 0; - pthread_cond_signal(&f->task_thread.cond); -- atomic_store(&f->task_thread.init_done, 1); -- continue; - } else { - pthread_mutex_unlock(&ttd->lock); - } - } - } -- atomic_store(&f->task_thread.init_done, 1); - pthread_mutex_lock(&ttd->lock); - } else { - pthread_mutex_lock(&ttd->lock);