# HG changeset patch
# User Satoshi Nakagawa <[email protected]>
# Date 1463052561 -32400
# Thu May 12 20:29:21 2016 +0900
# Node ID 3d6c4c1fcb9923e8215aefae62bfeeb118e173c0
# Parent a5362b9533f6a5b77740b4e8f97dba2555b6f929
remove m_immedVals
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/common/ipfilter.cpp Thu May 12 20:29:21 2016 +0900
@@ -365,10 +365,10 @@
template<int N, int width, int height>
void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t
dstStride, int idxX, int idxY)
{
- short immedVals[(64 + 8) * (64 + 8)];
+ ALIGN_VAR_32(int16_t, immed[width * (height + N - 1)]);
- interp_horiz_ps_c<N, width, height>(src, srcStride, immedVals, width,
idxX, 1);
- filterVertical_sp_c<N>(immedVals + 3 * width, width, dst, dstStride,
width, height, idxY);
+ interp_horiz_ps_c<N, width, height>(src, srcStride, immed, width, idxX, 1);
+ filterVertical_sp_c<N>(immed + (N / 2 - 1) * width, width, dst, dstStride,
width, height, idxY);
}
}
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.cpp
--- a/source/common/predict.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/common/predict.cpp Thu May 12 20:29:21 2016 +0900
@@ -57,12 +57,10 @@
Predict::Predict()
{
- m_immedVals = NULL;
}
Predict::~Predict()
{
- X265_FREE(m_immedVals);
m_predShortYuv[0].destroy();
m_predShortYuv[1].destroy();
}
@@ -72,12 +70,8 @@
m_csp = csp;
m_hChromaShift = CHROMA_H_SHIFT(csp);
m_vChromaShift = CHROMA_V_SHIFT(csp);
- CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
return m_predShortYuv[0].create(MAX_CU_SIZE, csp) &&
m_predShortYuv[1].create(MAX_CU_SIZE, csp);
-
-fail:
- return false;
}
void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu,
Yuv& predYuv, bool bLuma, bool bChroma)
@@ -258,8 +252,8 @@
int partEnum = partitionFromSizes(pu.width, pu.height);
const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx +
pu.puAbsPartIdx) + srcOffset;
- int xFrac = mv.x & 0x3;
- int yFrac = mv.y & 0x3;
+ int xFrac = mv.x & 3;
+ int yFrac = mv.y & 3;
if (!(yFrac | xFrac))
primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
@@ -280,14 +274,14 @@
intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx +
pu.puAbsPartIdx) + srcOffset;
- int xFrac = mv.x & 0x3;
- int yFrac = mv.y & 0x3;
-
int partEnum = partitionFromSizes(pu.width, pu.height);
X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not
divisible by 4\n");
X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu
size\n");
+ int xFrac = mv.x & 3;
+ int yFrac = mv.y & 3;
+
if (!(yFrac | xFrac))
primitives.pu[partEnum].convert_p2s(src, srcStride, dst, dstStride);
else if (!yFrac)
@@ -296,11 +290,12 @@
primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride,
yFrac);
else
{
- int tmpStride = pu.width;
- int filterSize = NTAPS_LUMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals,
tmpStride, xFrac, 1);
- primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) *
tmpStride, tmpStride, dst, dstStride, yFrac);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA -
1)]);
+ int immedStride = pu.width;
+ int halfFilterSize = NTAPS_LUMA >> 1;
+
+ primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride,
xFrac, 1);
+ primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) *
immedStride, immedStride, dst, dstStride, yFrac);
}
}
@@ -309,10 +304,10 @@
intptr_t dstStride = dstYuv.m_csize;
intptr_t refStride = refPic.m_strideC;
- int shiftHor = (2 + m_hChromaShift);
- int shiftVer = (2 + m_vChromaShift);
+ int mvx = mv.x << (1 - m_hChromaShift);
+ int mvy = mv.y << (1 - m_vChromaShift);
- intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+ intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx +
pu.puAbsPartIdx) + refOffset;
const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx +
pu.puAbsPartIdx) + refOffset;
@@ -320,11 +315,11 @@
pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx);
pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx);
- int xFrac = mv.x & ((1 << shiftHor) - 1);
- int yFrac = mv.y & ((1 << shiftVer) - 1);
+ int partEnum = partitionFromSizes(pu.width, pu.height);
- int partEnum = partitionFromSizes(pu.width, pu.height);
-
+ int xFrac = mvx & 7;
+ int yFrac = mvy & 7;
+
if (!(yFrac | xFrac))
{
primitives.chroma[m_csp].pu[partEnum].copy_pp(dstCb, dstStride, refCb,
refStride);
@@ -332,37 +327,36 @@
}
else if (!yFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride,
dstCb, dstStride, xFrac << (1 - m_hChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride,
dstCr, dstStride, xFrac << (1 - m_hChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCb, refStride,
dstCb, dstStride, xFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_hpp(refCr, refStride,
dstCr, dstStride, xFrac);
}
else if (!xFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride,
dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride,
dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCb, refStride,
dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_vpp(refCr, refStride,
dstCr, dstStride, yFrac);
}
else
{
- int extStride = pu.width >> m_hChromaShift;
- int filterSize = NTAPS_CHROMA;
- int halfFilterSize = (filterSize >> 1);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA
- 1)]);
+ int immedStride = pu.width >> m_hChromaShift;
+ int halfFilterSize = NTAPS_CHROMA >> 1;
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride,
m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals +
(halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 -
m_vChromaShift));
-
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride,
m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vsp(m_immedVals +
(halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 -
m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride,
immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed +
(halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride,
immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vsp(immed +
(halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
}
}
void Predict::predInterChromaShort(const PredictionUnit& pu, ShortYuv&
dstSYuv, const PicYuv& refPic, const MV& mv) const
{
+ intptr_t dstStride = dstSYuv.m_csize;
intptr_t refStride = refPic.m_strideC;
- intptr_t dstStride = dstSYuv.m_csize;
- int shiftHor = (2 + m_hChromaShift);
- int shiftVer = (2 + m_vChromaShift);
+ int mvx = mv.x << (1 - m_hChromaShift);
+ int mvy = mv.y << (1 - m_vChromaShift);
- intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+ intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx +
pu.puAbsPartIdx) + refOffset;
const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx +
pu.puAbsPartIdx) + refOffset;
@@ -370,15 +364,15 @@
int16_t* dstCb = dstSYuv.getCbAddr(pu.puAbsPartIdx);
int16_t* dstCr = dstSYuv.getCrAddr(pu.puAbsPartIdx);
- int xFrac = mv.x & ((1 << shiftHor) - 1);
- int yFrac = mv.y & ((1 << shiftVer) - 1);
-
int partEnum = partitionFromSizes(pu.width, pu.height);
uint32_t cxWidth = pu.width >> m_hChromaShift;
X265_CHECK(((cxWidth | (pu.height >> m_vChromaShift)) % 2) == 0, "chroma
block size expected to be multiple of 2\n");
+ int xFrac = mvx & 7;
+ int yFrac = mvy & 7;
+
if (!(yFrac | xFrac))
{
primitives.chroma[m_csp].pu[partEnum].p2s(refCb, refStride, dstCb,
dstStride);
@@ -386,23 +380,24 @@
}
else if (!yFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride,
dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0);
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride,
dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride,
dstCb, dstStride, xFrac, 0);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride,
dstCr, dstStride, xFrac, 0);
}
else if (!xFrac)
{
- primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride,
dstCb, dstStride, yFrac << (1 - m_vChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride,
dstCr, dstStride, yFrac << (1 - m_vChromaShift));
+ primitives.chroma[m_csp].pu[partEnum].filter_vps(refCb, refStride,
dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_vps(refCr, refStride,
dstCr, dstStride, yFrac);
}
else
{
- int extStride = cxWidth;
- int filterSize = NTAPS_CHROMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride,
m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals +
(halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 -
m_vChromaShift));
- primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride,
m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1);
- primitives.chroma[m_csp].pu[partEnum].filter_vss(m_immedVals +
(halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 -
m_vChromaShift));
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_CHROMA
- 1)]);
+ int immedStride = cxWidth;
+ int halfFilterSize = NTAPS_CHROMA >> 1;
+
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCb, refStride,
immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vss(immed +
(halfFilterSize - 1) * immedStride, immedStride, dstCb, dstStride, yFrac);
+ primitives.chroma[m_csp].pu[partEnum].filter_hps(refCr, refStride,
immed, immedStride, xFrac, 1);
+ primitives.chroma[m_csp].pu[partEnum].filter_vss(immed +
(halfFilterSize - 1) * immedStride, immedStride, dstCr, dstStride, yFrac);
}
}
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.h
--- a/source/common/predict.h Wed May 04 21:08:09 2016 +0000
+++ b/source/common/predict.h Thu May 12 20:29:21 2016 +0900
@@ -73,7 +73,6 @@
};
ShortYuv m_predShortYuv[2]; /* temporary storage for weighted prediction
*/
- int16_t* m_immedVals;
// Unfiltered/filtered neighbours of the current partition.
pixel intraNeighbourBuf[2][258];
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/common/x86/asm-primitives.cpp Thu May 12 20:29:21 2016 +0900
@@ -861,12 +861,12 @@
template<int size>
void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst,
intptr_t dstStride, int idxX, int idxY)
{
- ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA)]);
- const int filterSize = NTAPS_LUMA;
- const int halfFilterSize = filterSize >> 1;
-
- primitives.pu[size].luma_hps(src, srcStride, immed, MAX_CU_SIZE, idxX, 1);
- primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * MAX_CU_SIZE,
MAX_CU_SIZE, dst, dstStride, idxY);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
+ const int halfFilterSize = NTAPS_LUMA >> 1;
+ const int immedStride = MAX_CU_SIZE;
+
+ primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1);
+ primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride,
immedStride, dst, dstStride, idxY);
}
#if HIGH_BIT_DEPTH
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/encoder/motion.cpp Thu May 12 20:29:21 2016 +0900
@@ -1180,15 +1180,17 @@
int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv,
pixelcmp_t cmp)
{
intptr_t refStride = ref->lumaStride;
- pixel *fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) + (qmv.y >>
2) * refStride;
+ const pixel* fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) +
(qmv.y >> 2) * refStride;
int xFrac = qmv.x & 0x3;
int yFrac = qmv.y & 0x3;
int cost;
- intptr_t lclStride = fencPUYuv.m_size;
- X265_CHECK(lclStride == FENC_STRIDE, "fenc buffer is assumed to have
FENC_STRIDE by sad_x3 and sad_x4\n");
+ const intptr_t fencStride = FENC_STRIDE;
+ X265_CHECK(fencPUYuv.m_size == FENC_STRIDE, "fenc buffer is assumed to
have FENC_STRIDE by sad_x3 and sad_x4\n");
+ ALIGN_VAR_32(pixel, subpelbuf[MAX_CU_SIZE * MAX_CU_SIZE]);
+
if (!(yFrac | xFrac))
- cost = cmp(fencPUYuv.m_buf[0], lclStride, fref, refStride);
+ cost = cmp(fencPUYuv.m_buf[0], fencStride, fref, refStride);
else
{
/* we are taking a short-cut here if the reference is weighted. To be
@@ -1196,15 +1198,13 @@
* the final 16bit values prior to rounding and down shifting. Instead
we
* are simply interpolating the weighted full-pel pixels. Not 100%
* accurate but good enough for fast qpel ME */
- ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
if (!yFrac)
- primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf,
lclStride, xFrac);
+ primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf,
blockwidth, xFrac);
else if (!xFrac)
- primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf,
lclStride, yFrac);
+ primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf,
blockwidth, yFrac);
else
- primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf,
lclStride, xFrac, yFrac);
-
- cost = cmp(fencPUYuv.m_buf[0], lclStride, subpelbuf, lclStride);
+ primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf,
blockwidth, xFrac, yFrac);
+ cost = cmp(fencPUYuv.m_buf[0], fencStride, subpelbuf, blockwidth);
}
if (bChromaSATD)
@@ -1212,12 +1212,12 @@
int csp = fencPUYuv.m_csp;
int hshift = fencPUYuv.m_hChromaShift;
int vshift = fencPUYuv.m_vChromaShift;
- int shiftHor = (2 + hshift);
- int shiftVer = (2 + vshift);
- lclStride = fencPUYuv.m_csize;
+ int mvx = qmv.x << (1 - hshift);
+ int mvy = qmv.y << (1 - vshift);
+ intptr_t fencStrideC = fencPUYuv.m_csize;
intptr_t refStrideC = ref->reconPic->m_strideC;
- intptr_t refOffset = (qmv.x >> shiftHor) + (qmv.y >> shiftVer) *
refStrideC;
+ intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStrideC;
const pixel* refCb = ref->getCbAddr(ctuAddr, absPartIdx) + refOffset;
const pixel* refCr = ref->getCrAddr(ctuAddr, absPartIdx) + refOffset;
@@ -1225,48 +1225,46 @@
X265_CHECK((hshift == 0) || (hshift == 1), "hshift must be 0 or 1\n");
X265_CHECK((vshift == 0) || (vshift == 1), "vshift must be 0 or 1\n");
- xFrac = qmv.x & (hshift ? 7 : 3);
- yFrac = qmv.y & (vshift ? 7 : 3);
+ xFrac = mvx & 7;
+ yFrac = mvy & 7;
if (!(yFrac | xFrac))
{
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, refCb,
refStrideC);
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, refCr,
refStrideC);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, refCb,
refStrideC);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, refCr,
refStrideC);
}
else
{
- ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
+ int blockwidthC = blockwidth >> hshift;
+
if (!yFrac)
{
- primitives.chroma[csp].pu[partEnum].filter_hpp(refCb,
refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf,
lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hpp(refCb,
refStrideC, subpelbuf, blockwidthC, xFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf,
blockwidthC);
- primitives.chroma[csp].pu[partEnum].filter_hpp(refCr,
refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf,
lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hpp(refCr,
refStrideC, subpelbuf, blockwidthC, xFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf,
blockwidthC);
}
else if (!xFrac)
{
- primitives.chroma[csp].pu[partEnum].filter_vpp(refCb,
refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf,
lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_vpp(refCb,
refStrideC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf,
blockwidthC);
- primitives.chroma[csp].pu[partEnum].filter_vpp(refCr,
refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf,
lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_vpp(refCr,
refStrideC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf,
blockwidthC);
}
else
{
- ALIGN_VAR_32(int16_t, immed[64 * (64 + NTAPS_CHROMA)]);
-
- int extStride = blockwidth >> hshift;
- int filterSize = NTAPS_CHROMA;
- int halfFilterSize = (filterSize >> 1);
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE +
NTAPS_LUMA - 1)]);
+ const int halfFilterSize = (NTAPS_CHROMA >> 1);
- primitives.chroma[csp].pu[partEnum].filter_hps(refCb,
refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
- primitives.chroma[csp].pu[partEnum].filter_vsp(immed +
(halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1
- vshift));
- cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf,
lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hps(refCb,
refStrideC, immed, blockwidthC, xFrac, 1);
+ primitives.chroma[csp].pu[partEnum].filter_vsp(immed +
(halfFilterSize - 1) * blockwidthC, blockwidthC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[1], fencStrideC, subpelbuf,
blockwidthC);
- primitives.chroma[csp].pu[partEnum].filter_hps(refCr,
refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
- primitives.chroma[csp].pu[partEnum].filter_vsp(immed +
(halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1
- vshift));
- cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf,
lclStride);
+ primitives.chroma[csp].pu[partEnum].filter_hps(refCr,
refStrideC, immed, blockwidthC, xFrac, 1);
+ primitives.chroma[csp].pu[partEnum].filter_vsp(immed +
(halfFilterSize - 1) * blockwidthC, blockwidthC, subpelbuf, blockwidthC, yFrac);
+ cost += chromaSatd(fencPUYuv.m_buf[2], fencStrideC, subpelbuf,
blockwidthC);
}
}
}
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Wed May 04 21:08:09 2016 +0000
+++ b/source/encoder/weightPrediction.cpp Thu May 12 20:29:21 2016 +0900
@@ -132,25 +132,25 @@
intptr_t fpeloffset = (mv.y >> 2) * stride + (mv.x >> 2);
pixel *temp = src + pixoff + fpeloffset;
- int xFrac = mv.x & 0x7;
- int yFrac = mv.y & 0x7;
- if ((yFrac | xFrac) == 0)
+ int xFrac = mv.x & 7;
+ int yFrac = mv.y & 7;
+ if (!(yFrac | xFrac))
{
primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout +
pixoff, stride, temp, stride);
}
- else if (yFrac == 0)
+ else if (!yFrac)
{
primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp,
stride, mcout + pixoff, stride, xFrac);
}
- else if (xFrac == 0)
+ else if (!xFrac)
{
primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp,
stride, mcout + pixoff, stride, yFrac);
}
else
{
- ALIGN_VAR_16(int16_t, imm[16 * (16 + NTAPS_CHROMA)]);
- primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp,
stride, imm, bw, xFrac, 1);
- primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(imm +
((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
+ ALIGN_VAR_16(int16_t, immed[16 * (16 + NTAPS_CHROMA - 1)]);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp,
stride, immed, bw, xFrac, 1);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(immed +
((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
}
}
else
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel