On Fri, 21 Dec 2012 18:49:53 -0000
"Ben Avison" <bavi...@riscosopen.org> wrote:

> Citing precedent of pixman-arm-neon-asm-bilinear.S, I'm moving the scaled
> operations into their own source file, intending it to include both
> nearest-neighbour and bilinear-interpolation scaled operations. At the
> moment, these two operations are merely cut-and-pasted from the previous
> revision of pixman-arm-simd-asm.S.
> 
> diff --git a/pixman/pixman-arm-simd-asm-scaled.S 
> b/pixman/pixman-arm-simd-asm-scaled.S
> new file mode 100644
> index 0000000..7110995
> --- /dev/null
> +++ b/pixman/pixman-arm-simd-asm-scaled.S
> @@ -0,0 +1,165 @@
> +/*
> + * Copyright © 2008 Mozilla Corporation
> + * Copyright © 2010 Nokia Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and 
> its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software 
> without
> + * specific, written prior permission.  Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose.  
> It
> + * is provided "as is" without express or implied warranty.
> + *
> + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
> + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
> + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
> + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
> + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
> + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
> + * SOFTWARE.
> + *
> + * Author:  Jeff Muizelaar (j...@infidigm.net)
> + *
> + */
> +
> +/* Prevent the stack from becoming executable */
> +#if defined(__linux__) && defined(__ELF__)
> +.section .note.GNU-stack,"",%progbits
> +#endif
> +
> +     .text
> +     .arch armv6
> +     .object_arch armv4
> +     .arm
> +     .altmacro
> +     .p2align 2
> +
> +/* Supplementary macro for setting function attributes */
> +.macro pixman_asm_function fname
> +     .func fname
> +     .global fname
> +#ifdef __ELF__
> +     .hidden fname
> +     .type fname, %function
> +#endif
> +fname:
> +.endm
> +
> +/*
> + * Note: This code is only using armv5te instructions (not even armv6),
> + *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
> + *       be split into a few variants, tuned for each microarchitecture.
> + *
> + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
> + * have efficient write combining), it needs to be changed to use 16-byte
> + * aligned writes using STM instruction.
> + *
> + * Nearest scanline scaler macro template uses the following arguments:
> + *  fname                     - name of the function to generate
> + *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in 
> bytes
> + *  t                         - type suffix for LDR/STR instructions
> + *  prefetch_distance         - prefetch in the source image by that many
> + *                              pixels ahead
> + *  prefetch_braking_distance - stop prefetching when that many pixels are
> + *                              remaining before the end of scanline
> + */
> +
> +.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
> +                                      prefetch_distance,        \
> +                                      prefetch_braking_distance
> +
> +pixman_asm_function fname
> +     W               .req    r0
> +     DST             .req    r1
> +     SRC             .req    r2
> +     VX              .req    r3
> +     UNIT_X          .req    ip
> +     TMP1            .req    r4
> +     TMP2            .req    r5
> +     VXMASK          .req    r6
> +     PF_OFFS         .req    r7
> +     SRC_WIDTH_FIXED .req    r8
> +
> +     ldr     UNIT_X, [sp]
> +     push    {r4, r5, r6, r7, r8, r10}
> +     mvn     VXMASK, #((1 << bpp_shift) - 1)
> +     ldr     SRC_WIDTH_FIXED, [sp, #28]
> +
> +     /* define helper macro */
> +     .macro  scale_2_pixels
> +             ldr&t   TMP1, [SRC, TMP1]
> +             and     TMP2, VXMASK, VX, asr #(16 - bpp_shift)
> +             adds    VX, VX, UNIT_X
> +             str&t   TMP1, [DST], #(1 << bpp_shift)
> +9:           subpls  VX, VX, SRC_WIDTH_FIXED
> +             bpl     9b
> +
> +             ldr&t   TMP2, [SRC, TMP2]
> +             and     TMP1, VXMASK, VX, asr #(16 - bpp_shift)
> +             adds    VX, VX, UNIT_X
> +             str&t   TMP2, [DST], #(1 << bpp_shift)
> +9:           subpls  VX, VX, SRC_WIDTH_FIXED
> +             bpl     9b
> +     .endm
> +
> +     /* now do the scaling */
> +     and     TMP1, VXMASK, VX, asr #(16 - bpp_shift)
> +     adds    VX, VX, UNIT_X
> +9:   subpls  VX, VX, SRC_WIDTH_FIXED
> +     bpl     9b
> +     subs    W, W, #(8 + prefetch_braking_distance)
> +     blt     2f
> +     /* calculate prefetch offset */
> +     mov     PF_OFFS, #prefetch_distance
> +     mla     PF_OFFS, UNIT_X, PF_OFFS, VX
> +1:   /* main loop, process 8 pixels per iteration with prefetch */
> +     pld     [SRC, PF_OFFS, asr #(16 - bpp_shift)]
> +     add     PF_OFFS, UNIT_X, lsl #3
> +     scale_2_pixels
> +     scale_2_pixels
> +     scale_2_pixels
> +     scale_2_pixels
> +     subs    W, W, #8
> +     bge     1b
> +2:
> +     subs    W, W, #(4 - 8 - prefetch_braking_distance)
> +     blt     2f
> +1:   /* process the remaining pixels */
> +     scale_2_pixels
> +     scale_2_pixels
> +     subs    W, W, #4
> +     bge     1b
> +2:
> +     tst     W, #2
> +     beq     2f
> +     scale_2_pixels
> +2:
> +     tst     W, #1
> +     ldrne&t TMP1, [SRC, TMP1]
> +     strne&t TMP1, [DST]
> +     /* cleanup helper macro */
> +     .purgem scale_2_pixels
> +     .unreq  DST
> +     .unreq  SRC
> +     .unreq  W
> +     .unreq  VX
> +     .unreq  UNIT_X
> +     .unreq  TMP1
> +     .unreq  TMP2
> +     .unreq  VXMASK
> +     .unreq  PF_OFFS
> +     .unreq  SRC_WIDTH_FIXED
> +     /* return */
> +     pop     {r4, r5, r6, r7, r8, r10}
> +     bx      lr
> +.endfunc
> +.endm
> +
> +generate_nearest_scanline_func \
> +    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
> +
> +generate_nearest_scanline_func \
> +    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32

If you are moving the code, just do the creation of the new file,
removal of this code from the old file and the makefile tweaks as one
commit.

-- 
Best regards,
Siarhei Siamashka
_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to