# HG changeset patch
# User Yuvaraj Venkatesh yuva...@multicorewareinc.com
# Date 1383113029 -19800
# Wed Oct 30 11:33:49 2013 +0530
# Node ID c218021583e62a36c06d8f8787e5e5bec13beae6
# Parent c946d617fd9fbd2ae237d5ba7d8338a1a3f5ea7d
assembly code for pixel_sad_x3_4x16 and pixel_sad_x4_4x16
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383117858 -19800
# Wed Oct 30 12:54:18 2013 +0530
# Node ID e574ab7ef2d0dffc20a28097fc04cad30f742f0b
# Parent abf8286f3fa9a153220f17674803c9a0d985b990
asm: declare asm function pointers for sad_64xN
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383118037 -19800
# Wed Oct 30 12:57:17 2013 +0530
# Node ID f5e6b97fa8021cc40777a5112a90552f091ff523
# Parent e574ab7ef2d0dffc20a28097fc04cad30f742f0b
asm: created comman asm macro for pixel_sad_64xN
# HG changeset patch
# User Praveen Tiwari
# Date 1383120856 -19800
# Node ID bb3c2693897146b41f26f8e6eb7b28a0b2b50e55
# Parent abf8286f3fa9a153220f17674803c9a0d985b990
chroma interp_4tap_vert_pp all blocks asm code
diff -r abf8286f3fa9 -r bb3c26938971 source/common/x86/asm-primitives.cpp
---
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383120938 -19800
# Wed Oct 30 13:45:38 2013 +0530
# Node ID c08f78ad73bf677164f3ffec707a1fb22de21d59
# Parent 147651ebea036e9378966a8e1e187dbab72d3896
asm: assembly code for pixel_sad_64x32
diff -r
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383122501 -19800
# Wed Oct 30 14:11:41 2013 +0530
# Node ID 1f32144338f3399a962fddbaf2f261242128425c
# Parent c08f78ad73bf677164f3ffec707a1fb22de21d59
asm: assembly code for pixel_sad_64x48 and
# HG changeset patch
# User Yuvaraj Venkatesh yuva...@multicorewareinc.com
# Date 1383124045 -19800
# Wed Oct 30 14:37:25 2013 +0530
# Node ID eca1142d1cec9303afad71108494f9076586ce05
# Parent 65462024832b4498cd9f05a5a81cb6b559bf378b
assembly code for pixel_sad_x3_24x32
diff -r 65462024832b
Hi,
I have written a patch for adding CRF into x265,including the following
features,
1. add a parameter of --crf to the command line,
2. modify the running branches of rateControlStart, using if(isAbr) instead
of switch (cfg-param.rc.rateControlMode), for the logic of classifying the
# HG changeset patch
# User Deepthi Devaki deepthidev...@multicorewareinc.com
# Date 1383126419 -19800
# Node ID 77db80a67f4e55f22bc02ed02930a269bfac6b50
# Parent 74bf8634037ce3e673b21738a5ffaf1c14381414
no-rdo: use bit estimates from ME to calculate RDcost.
bits estimated in ME stored in CU and
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383136861 -19800
# Wed Oct 30 18:11:01 2013 +0530
# Node ID 62a51fe2fcbfd76fc8476a6f714f961b3f3f23ef
# Parent eb7d9f928ee031a108b7c77e56f1d64f123d7157
asm: assembly code for pixel_sad_24x32
diff -r
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383142575 -19800
# Wed Oct 30 19:46:15 2013 +0530
# Node ID 5037cc891114619e32ceeff332884d0abfd138fd
# Parent 62a51fe2fcbfd76fc8476a6f714f961b3f3f23ef
asm: assembly code for pixel_sad_12x16
diff -r
# HG changeset patch
# User Min Chen chenm...@163.com
# Date 1383144464 -28800
# Node ID 6bfafdf72eaef415aba43f4579f222cccbac60d9
# Parent 77db80a67f4e55f22bc02ed02930a269bfac6b50
asm: filterConvertPelToShort
diff -r 77db80a67f4e -r 6bfafdf72eae source/Lib/TLibCommon/TComPrediction.cpp
---
+PROCESS_SAD_8x4
+PROCESS_SAD_8x4
+
+movqm1, [r2]
+movqm2, [r2 + r3]
+lea r2, [r2 + 2 * r3]
+movqm3, [r0]
+movqm4, [r0 + r1]
+lea r0, [r0 + 2 * r1]
+punpcklqdq m1, m2
+punpcklqdq m3, m4
+psadbw m1,
seems good___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
-- Forwarded message --
From: dnyanesh...@multicorewareinc.com
Date: Wed, Oct 30, 2013 at 7:47 PM
Subject: [x265] [PATCH] asm: assembly code for pixel_sad_12x16
To: x265-devel@videolan.org
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date
On Wed, Oct 30, 2013 at 3:16 AM, dnyanesh...@multicorewareinc.com wrote:
# HG changeset patch
# User Dnyaneshwar Gorade dnyanesh...@multicorewareinc.com
# Date 1383120938 -19800
# Wed Oct 30 13:45:38 2013 +0530
# Node ID c08f78ad73bf677164f3ffec707a1fb22de21d59
# Parent
+pxor m0, m0
+mov r4, 32
+.loop
+
+sub r4, 8
+cmp r4, 8
+
+jnz .loop
+PROCESS_SAD_64x4
+lea r2, [r2 + r3]
+lea r0, [r0 + r1]
1. below is short:
mov r4, 3
dec r4
jnz loop
2. line jnz need ident
3. why don't include PROCESS_SAD_64x4 that after jnz
+psadbw m5, m3
+psadbw m6, m4
+pshufd m6, m6, 84
You want to clear high 96 bits to zero, why not use pand, of course, we can
avoid this, see below
+paddd m5, m6
+paddd m0, m5
we can sum as 32xN and drop high 64 bits in last step
+%macro SAD_X3_W24 0
+cglobal
-- Forwarded message --
From: yuva...@multicorewareinc.com
Date: Wed, Oct 30, 2013 at 2:38 PM
Subject: [x265] [PATCH] assembly code for pixel_sad_x3_24x32
To: x265-devel@videolan.org
# HG changeset patch
# User Yuvaraj Venkatesh yuva...@multicorewareinc.com
# Date 1383124045
+paddd m1, m2
+paddd m0, m1
+paddd m0, m3
we have enough register for sum, why you make longer dependency link
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
+%macro PROCESS_SAD_12x4 0
+movum1, [r2]
+movum2, [r0]
+pandm1, m4
+pandm2, m4
+psadbw m1, m2
+paddd m0, m1
+lea r2, [r2 + r3]
+lea r0, [r0 + r1]
+movum1, [r2]
+movum2, [r0]
+pandm1, m4
+pandm2,
If possible, each lea should do e.g. lea r0, [r0+r1*2], with
intermediate addresses being [r0] and [r0+r1]. That results in half
the LEAs.
Jason
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
On Wed, Oct 30, 2013 at 4:47 AM, deepthidev...@multicorewareinc.com wrote:
# HG changeset patch
# User Deepthi Devaki deepthidev...@multicorewareinc.com
# Date 1383126419 -19800
# Node ID 77db80a67f4e55f22bc02ed02930a269bfac6b50
# Parent 74bf8634037ce3e673b21738a5ffaf1c14381414
no-rdo: use
+pmulhrswm7,[tab_c_512]
+pmulhrswm6,[tab_c_512]
+pmulhrswm5,[tab_c_512]
+pmulhrswm4,[tab_c_512]
Could we load this into a temp instead of loading it 4 times?
+cglobal interp_8tap_vert_pp_%1x%2, 4, 7, 7
+mov r4d,
Steve,
This is part of an ongoing change to rd 0/1 where we want to replace cost =
distortion + lambda*(coeff + mv bits), as opposed to that derived from the
RDO process. Here, the coeff bits have not been added, only me bits are
considered.
I believe we'll need an exhaustive set of tests for
+shl r4,6
I think this should be r4d (general coding suggestion: use 32-bit
unless 64-bit/native-size is necessary, e.g. pointers).
use r4d will generate a extra prefix byte 41H, there no doc show the difference
about execute time (cycle), so we select short instruction
26 matches
Mail list logo