From: "Ronald S. Bultje" <[email protected]>
---
libavcodec/alpha/Makefile | 2 +
libavcodec/alpha/dsputil_alpha.c | 185 --------------------
libavcodec/alpha/dsputil_alpha.h | 2 -
libavcodec/alpha/dsputil_alpha_asm.S | 92 ----------
.../alpha/{dsputil_alpha.c => hpeldsp_alpha.c} | 136 +-------------
.../{bfin/vp3_bfin.h => alpha/hpeldsp_alpha.h} | 10 +-
libavcodec/alpha/hpeldsp_alpha_asm.S | 135 ++++++++++++++
libavcodec/hpeldsp.c | 1 +
libavcodec/hpeldsp.h | 1 +
9 files changed, 147 insertions(+), 417 deletions(-)
copy libavcodec/alpha/{dsputil_alpha.c => hpeldsp_alpha.c} (70%)
copy libavcodec/{bfin/vp3_bfin.h => alpha/hpeldsp_alpha.h} (76%)
create mode 100644 libavcodec/alpha/hpeldsp_alpha_asm.S
diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile
index e28200d..6f22137 100644
--- a/libavcodec/alpha/Makefile
+++ b/libavcodec/alpha/Makefile
@@ -4,4 +4,6 @@ OBJS += alpha/dsputil_alpha.o
\
alpha/motion_est_mvi_asm.o \
alpha/simple_idct_alpha.o \
+OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \
+ alpha/hpeldsp_alpha_asm.o
OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 34fe2eb..7a41cb8 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -119,196 +119,11 @@ static void clear_blocks_axp(int16_t *blocks) {
} while (n);
}
-static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
-{
- return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-static inline uint64_t avg2(uint64_t a, uint64_t b)
-{
- return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-#if 0
-/* The XY2 routines basically utilize this scheme, but reuse parts in
- each iteration. */
-static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
-{
- uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
- return r1 + r2;
-}
-#endif
-
-#define OP(LOAD, STORE) \
- do { \
- STORE(LOAD(pixels), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_X2(LOAD, STORE) \
- do { \
- uint64_t pix1, pix2; \
- \
- pix1 = LOAD(pixels); \
- pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- STORE(AVG2(pix1, pix2), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_Y2(LOAD, STORE) \
- do { \
- uint64_t pix = LOAD(pixels); \
- do { \
- uint64_t next_pix; \
- \
- pixels += line_size; \
- next_pix = LOAD(pixels); \
- STORE(AVG2(pix, next_pix), block); \
- block += line_size; \
- pix = next_pix; \
- } while (--h); \
- } while (0)
-
-#define OP_XY2(LOAD, STORE) \
- do { \
- uint64_t pix1 = LOAD(pixels); \
- uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
- + (pix2 & BYTE_VEC(0x03)); \
- uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
- \
- do { \
- uint64_t npix1, npix2; \
- uint64_t npix_l, npix_h; \
- uint64_t avg; \
- \
- pixels += line_size; \
- npix1 = LOAD(pixels); \
- npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- npix_l = (npix1 & BYTE_VEC(0x03)) \
- + (npix2 & BYTE_VEC(0x03)); \
- npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
- avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
- + pix_h + npix_h; \
- STORE(avg, block); \
- \
- block += line_size; \
- pix_l = npix_l; \
- pix_h = npix_h; \
- } while (--h); \
- } while (0)
-
-#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
-static void OPNAME ## _pixels ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- if ((size_t) pixels & 0x7) { \
- OPKIND(uldq, STORE); \
- } else { \
- OPKIND(ldq, STORE); \
- } \
-} \
- \
-static void OPNAME ## _pixels16 ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
- OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
-}
-
-#define PIXOP(OPNAME, STORE) \
- MAKE_OP(OPNAME, , OP, STORE) \
- MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
- MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
- MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
-
-/* Rounding primitives. */
-#define AVG2 avg2
-#define AVG4 avg4
-#define AVG4_ROUNDER BYTE_VEC(0x02)
-#define STORE(l, b) stq(l, b)
-PIXOP(put, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg, STORE);
-
-/* Not rounding primitives. */
-#undef AVG2
-#undef AVG4
-#undef AVG4_ROUNDER
-#undef STORE
-#define AVG2 avg2_no_rnd
-#define AVG4 avg4_no_rnd
-#define AVG4_ROUNDER BYTE_VEC(0x01)
-#define STORE(l, b) stq(l, b)
-PIXOP(put_no_rnd, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg_no_rnd, STORE);
-
-static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h)
-{
- put_pixels_axp_asm(block, pixels, line_size, h);
- put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
-}
-
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
- c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
- c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
- c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
-
- c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
- c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_axp;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
-
- c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
- c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
- c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
- c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
-
- c->put_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_pixels_tab[1][1] = put_pixels_x2_axp;
- c->put_pixels_tab[1][2] = put_pixels_y2_axp;
- c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
-
- c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
- c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
-
- c->avg_pixels_tab[1][0] = avg_pixels_axp;
- c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
- c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
- c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
-
c->clear_blocks = clear_blocks_axp;
}
diff --git a/libavcodec/alpha/dsputil_alpha.h b/libavcodec/alpha/dsputil_alpha.h
index fcea47c..d976c18 100644
--- a/libavcodec/alpha/dsputil_alpha.h
+++ b/libavcodec/alpha/dsputil_alpha.h
@@ -26,8 +26,6 @@ void ff_simple_idct_axp(int16_t *block);
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
-void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
int line_size);
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
diff --git a/libavcodec/alpha/dsputil_alpha_asm.S
b/libavcodec/alpha/dsputil_alpha_asm.S
index 1f589aa..fe7e124 100644
--- a/libavcodec/alpha/dsputil_alpha_asm.S
+++ b/libavcodec/alpha/dsputil_alpha_asm.S
@@ -43,98 +43,6 @@
.text
/************************************************************************
- * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- * int line_size, int h)
- */
- .align 6
- .globl put_pixels_axp_asm
- .ent put_pixels_axp_asm
-put_pixels_axp_asm:
- .frame sp, 0, ra
- .prologue 0
-
- and a1, 7, t0
- beq t0, $aligned
-
- .align 4
-$unaligned:
- ldq_u t0, 0(a1)
- ldq_u t1, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t2, 0(a1)
- ldq_u t3, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t4, 0(a1)
- ldq_u t5, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t6, 0(a1)
- ldq_u t7, 8(a1)
- extql t0, a1, t0
- addq a1, a2, a1
-
- extqh t1, a1, t1
- addq a0, a2, t8
- extql t2, a1, t2
- addq t8, a2, t9
-
- extqh t3, a1, t3
- addq t9, a2, ta
- extql t4, a1, t4
- or t0, t1, t0
-
- extqh t5, a1, t5
- or t2, t3, t2
- extql t6, a1, t6
- or t4, t5, t4
-
- extqh t7, a1, t7
- or t6, t7, t6
- stq t0, 0(a0)
- stq t2, 0(t8)
-
- stq t4, 0(t9)
- subq a3, 4, a3
- stq t6, 0(ta)
- addq ta, a2, a0
-
- bne a3, $unaligned
- ret
-
- .align 4
-$aligned:
- ldq t0, 0(a1)
- addq a1, a2, a1
- ldq t1, 0(a1)
- addq a1, a2, a1
-
- ldq t2, 0(a1)
- addq a1, a2, a1
- ldq t3, 0(a1)
-
- addq a0, a2, t4
- addq a1, a2, a1
- addq t4, a2, t5
- subq a3, 4, a3
-
- stq t0, 0(a0)
- addq t5, a2, t6
- stq t1, 0(t4)
- addq t6, a2, a0
-
- stq t2, 0(t5)
- stq t3, 0(t6)
-
- bne a3, $aligned
- ret
- .end put_pixels_axp_asm
-
-/************************************************************************
* void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
* int line_size)
*/
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c
similarity index 70%
copy from libavcodec/alpha/dsputil_alpha.c
copy to libavcodec/alpha/hpeldsp_alpha.c
index 34fe2eb..1f1699f 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/hpeldsp_alpha.c
@@ -19,106 +19,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "libavutil/attributes.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_alpha.h"
+#include "libavcodec/hpeldsp.h"
+#include "hpeldsp_alpha.h"
#include "asm.h"
-void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
- int line_size);
-void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
- int line_size);
-
-#if 0
-/* These functions were the base for the optimized assembler routines,
- and remain here for documentation purposes. */
-static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size)
-{
- int i = 8;
- uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
-
- do {
- uint64_t shorts0, shorts1;
-
- shorts0 = ldq(block);
- shorts0 = maxsw4(shorts0, 0);
- shorts0 = minsw4(shorts0, clampmask);
- stl(pkwb(shorts0), pixels);
-
- shorts1 = ldq(block + 4);
- shorts1 = maxsw4(shorts1, 0);
- shorts1 = minsw4(shorts1, clampmask);
- stl(pkwb(shorts1), pixels + 4);
-
- pixels += line_size;
- block += 8;
- } while (--i);
-}
-
-void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size)
-{
- int h = 8;
- /* Keep this function a leaf function by generating the constants
- manually (mainly for the hack value ;-). */
- uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
- uint64_t signmask = zap(-1, 0x33);
- signmask ^= signmask >> 1; /* 0x8000800080008000 */
-
- do {
- uint64_t shorts0, pix0, signs0;
- uint64_t shorts1, pix1, signs1;
-
- shorts0 = ldq(block);
- shorts1 = ldq(block + 4);
-
- pix0 = unpkbw(ldl(pixels));
- /* Signed subword add (MMX paddw). */
- signs0 = shorts0 & signmask;
- shorts0 &= ~signmask;
- shorts0 += pix0;
- shorts0 ^= signs0;
- /* Clamp. */
- shorts0 = maxsw4(shorts0, 0);
- shorts0 = minsw4(shorts0, clampmask);
-
- /* Next 4. */
- pix1 = unpkbw(ldl(pixels + 4));
- signs1 = shorts1 & signmask;
- shorts1 &= ~signmask;
- shorts1 += pix1;
- shorts1 ^= signs1;
- shorts1 = maxsw4(shorts1, 0);
- shorts1 = minsw4(shorts1, clampmask);
-
- stl(pkwb(shorts0), pixels);
- stl(pkwb(shorts1), pixels + 4);
-
- pixels += line_size;
- block += 8;
- } while (--h);
-}
-#endif
-
-static void clear_blocks_axp(int16_t *blocks) {
- uint64_t *p = (uint64_t *) blocks;
- int n = sizeof(int16_t) * 6 * 64;
-
- do {
- p[0] = 0;
- p[1] = 0;
- p[2] = 0;
- p[3] = 0;
- p[4] = 0;
- p[5] = 0;
- p[6] = 0;
- p[7] = 0;
- p += 8;
- n -= 8 * 8;
- } while (n);
-}
-
static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
{
return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
@@ -269,11 +173,8 @@ static void put_pixels16_axp_asm(uint8_t *block, const
uint8_t *pixels,
put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
}
-av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
+av_cold void ff_hpeldsp_init_alpha(HpelDSPContext* c, int flags)
{
- const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
- if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
@@ -308,35 +209,4 @@ av_cold void ff_dsputil_init_alpha(DSPContext *c,
AVCodecContext *avctx)
c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
-
- c->clear_blocks = clear_blocks_axp;
- }
-
- /* amask clears all bits that correspond to present features. */
- if (amask(AMASK_MVI) == 0) {
- c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
- c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
-
- if (!high_bit_depth)
- c->get_pixels = get_pixels_mvi;
- c->diff_pixels = diff_pixels_mvi;
- c->sad[0] = pix_abs16x16_mvi_asm;
- c->sad[1] = pix_abs8x8_mvi;
- c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
- c->pix_abs[1][0] = pix_abs8x8_mvi;
- c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
- c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
- c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
- }
-
- put_pixels_clamped_axp_p = c->put_pixels_clamped;
- add_pixels_clamped_axp_p = c->add_pixels_clamped;
-
- if (avctx->bits_per_raw_sample <= 8 &&
- (avctx->idct_algo == FF_IDCT_AUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
- c->idct_put = ff_simple_idct_put_axp;
- c->idct_add = ff_simple_idct_add_axp;
- c->idct = ff_simple_idct_axp;
- }
}
diff --git a/libavcodec/bfin/vp3_bfin.h b/libavcodec/alpha/hpeldsp_alpha.h
similarity index 76%
copy from libavcodec/bfin/vp3_bfin.h
copy to libavcodec/alpha/hpeldsp_alpha.h
index 5a2c5a4..304cf2c 100644
--- a/libavcodec/bfin/vp3_bfin.h
+++ b/libavcodec/alpha/hpeldsp_alpha.h
@@ -16,12 +16,12 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-
-#ifndef AVCODEC_BFIN_VP3_BFIN_H
-#define AVCODEC_BFIN_VP3_BFIN_H
+#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H
+#define AVCODEC_ALPHA_HPELDSP_ALPHA_H
#include <stdint.h>
-void ff_bfin_vp3_idct(int16_t *block);
+void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
-#endif /* AVCODEC_BFIN_VP3_BFIN_H */
+#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */
diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S
b/libavcodec/alpha/hpeldsp_alpha_asm.S
new file mode 100644
index 0000000..122b542
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha_asm.S
@@ -0,0 +1,135 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * These functions are scheduled for pca56. They should work
+ * reasonably on ev6, though.
+ */
+
+#include "regdef.h"
+
+/* Some nicer register names. */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+ .set noat
+ .set noreorder
+ .arch pca56
+ .text
+
+/************************************************************************
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ * int line_size, int h)
+ */
+ .align 6
+ .globl put_pixels_axp_asm
+ .ent put_pixels_axp_asm
+put_pixels_axp_asm:
+ .frame sp, 0, ra
+ .prologue 0
+
+ and a1, 7, t0
+ beq t0, $aligned
+
+ .align 4
+$unaligned:
+ ldq_u t0, 0(a1)
+ ldq_u t1, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t2, 0(a1)
+ ldq_u t3, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t4, 0(a1)
+ ldq_u t5, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t6, 0(a1)
+ ldq_u t7, 8(a1)
+ extql t0, a1, t0
+ addq a1, a2, a1
+
+ extqh t1, a1, t1
+ addq a0, a2, t8
+ extql t2, a1, t2
+ addq t8, a2, t9
+
+ extqh t3, a1, t3
+ addq t9, a2, ta
+ extql t4, a1, t4
+ or t0, t1, t0
+
+ extqh t5, a1, t5
+ or t2, t3, t2
+ extql t6, a1, t6
+ or t4, t5, t4
+
+ extqh t7, a1, t7
+ or t6, t7, t6
+ stq t0, 0(a0)
+ stq t2, 0(t8)
+
+ stq t4, 0(t9)
+ subq a3, 4, a3
+ stq t6, 0(ta)
+ addq ta, a2, a0
+
+ bne a3, $unaligned
+ ret
+
+ .align 4
+$aligned:
+ ldq t0, 0(a1)
+ addq a1, a2, a1
+ ldq t1, 0(a1)
+ addq a1, a2, a1
+
+ ldq t2, 0(a1)
+ addq a1, a2, a1
+ ldq t3, 0(a1)
+
+ addq a0, a2, t4
+ addq a1, a2, a1
+ addq t4, a2, t5
+ subq a3, 4, a3
+
+ stq t0, 0(a0)
+ addq t5, a2, t6
+ stq t1, 0(t4)
+ addq t6, a2, a0
+
+ stq t2, 0(t5)
+ stq t3, 0(t6)
+
+ bne a3, $aligned
+ ret
+ .end put_pixels_axp_asm
diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c
index 4b7623e..76ca85d 100644
--- a/libavcodec/hpeldsp.c
+++ b/libavcodec/hpeldsp.c
@@ -54,6 +54,7 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
hpel_funcs(avg, [3], 2);
hpel_funcs(avg_no_rnd,, 16);
+ if (ARCH_ALPHA) ff_hpeldsp_init_alpha (c, flags);
if (ARCH_ARM) ff_hpeldsp_init_arm (c, flags);
if (ARCH_BFIN) ff_hpeldsp_init_bfin (c, flags);
if (ARCH_PPC) ff_hpeldsp_init_ppc (c, flags);
diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h
index 6c267cf..d304ec2 100644
--- a/libavcodec/hpeldsp.h
+++ b/libavcodec/hpeldsp.h
@@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
void ff_hpeldsp_init(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_alpha(HpelDSPContext* c, int flags);
void ff_hpeldsp_init_arm(HpelDSPContext* c, int flags);
void ff_hpeldsp_init_bfin(HpelDSPContext* c, int flags);
void ff_hpeldsp_init_ppc(HpelDSPContext* c, int flags);
--
1.7.9.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel