diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 5c1039b..762d454 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -251,6 +251,23 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
 }
 
 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
+#if ARCH_X86_64
+    uint64_t       *dst64 = (uint64_t *)dst;
+    const uint64_t *src64 = (const uint64_t *)src;
+    int i;
+
+    for(i=0; i+4<=w>>1; i+=4){
+        uint64_t r0 = av_bswap64(src64[i+0]);
+        uint64_t r1 = av_bswap64(src64[i+1]);
+        uint64_t r2 = av_bswap64(src64[i+2]);
+        uint64_t r3 = av_bswap64(src64[i+3]);
+        dst64[i+0]= (r0>>32)|(r0<<32);
+        dst64[i+1]= (r1>>32)|(r1<<32);
+        dst64[i+2]= (r2>>32)|(r2<<32);
+        dst64[i+3]= (r3>>32)|(r3<<32);
+    }
+    i <<= 1;
+#else
     int i;
 
     for(i=0; i+8<=w; i+=8){
@@ -263,6 +280,7 @@ static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
         dst[i+6]= av_bswap32(src[i+6]);
         dst[i+7]= av_bswap32(src[i+7]);
     }
+#endif
     for(;i<w; i++){
         dst[i+0]= av_bswap32(src[i+0]);
     }
