[x265] constants: g_zscanToRaster, g_rasterToZscan as const table

2016-07-05 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa <nakagawa...@oki.com>
# Date 1467719659 -32400
#  Tue Jul 05 20:54:19 2016 +0900
# Node ID cf06a60ee646199cf4f139a5c22cff7ac5626d68
# Parent  a932b4366235ab6597c8d124c1569dade6ff790a
constants: g_zscanToRaster, g_rasterToZscan as const table

diff -r a932b4366235 -r cf06a60ee646 source/common/common.h
--- a/source/common/common.hMon Jul 04 21:25:59 2016 +0530
+++ b/source/common/common.hTue Jul 05 20:54:19 2016 +0900
@@ -255,7 +255,9 @@
 #define LOG2_UNIT_SIZE  2   // log2(unitSize)
 #define UNIT_SIZE   (1 << LOG2_UNIT_SIZE)   // unit size of CU 
partition
 
-#define MAX_NUM_PARTITIONS  256
+#define LOG2_RASTER_SIZE(MAX_LOG2_CU_SIZE - LOG2_UNIT_SIZE)
+#define RASTER_SIZE (1 << LOG2_RASTER_SIZE)
+#define MAX_NUM_PARTITIONS  (RASTER_SIZE * RASTER_SIZE)
 #define NUM_4x4_PARTITIONS  (1U << (g_unitSizeDepth << 1)) // number of 
4x4 units in max CU size
 
 #define MIN_PU_SIZE 4
diff -r a932b4366235 -r cf06a60ee646 source/common/constants.cpp
--- a/source/common/constants.cpp   Mon Jul 04 21:25:59 2016 +0530
+++ b/source/common/constants.cpp   Tue Jul 05 20:54:19 2016 +0900
@@ -166,9 +166,47 @@
 uint32_t g_maxCUSize = MAX_CU_SIZE;
 uint32_t g_unitSizeDepth = NUM_CU_DEPTH;
 uint32_t g_maxCUDepth= NUM_CU_DEPTH - 1;
-uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
-uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
 
+const uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] =
+{
+0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x20, 0x21, 0x30, 0x31, 
0x22, 0x23, 0x32, 0x33,
+0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17, 0x24, 0x25, 0x34, 0x35, 
0x26, 0x27, 0x36, 0x37,
+0x40, 0x41, 0x50, 0x51, 0x42, 0x43, 0x52, 0x53, 0x60, 0x61, 0x70, 0x71, 
0x62, 0x63, 0x72, 0x73,
+0x44, 0x45, 0x54, 0x55, 0x46, 0x47, 0x56, 0x57, 0x64, 0x65, 0x74, 0x75, 
0x66, 0x67, 0x76, 0x77,
+0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x28, 0x29, 0x38, 0x39, 
0x2A, 0x2B, 0x3A, 0x3B,
+0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F, 0x2C, 0x2D, 0x3C, 0x3D, 
0x2E, 0x2F, 0x3E, 0x3F,
+0x48, 0x49, 0x58, 0x59, 0x4A, 0x4B, 0x5A, 0x5B, 0x68, 0x69, 0x78, 0x79, 
0x6A, 0x6B, 0x7A, 0x7B,
+0x4C, 0x4D, 0x5C, 0x5D, 0x4E, 0x4F, 0x5E, 0x5F, 0x6C, 0x6D, 0x7C, 0x7D, 
0x6E, 0x6F, 0x7E, 0x7F,
+0x80, 0x81, 0x90, 0x91, 0x82, 0x83, 0x92, 0x93, 0xA0, 0xA1, 0xB0, 0xB1, 
0xA2, 0xA3, 0xB2, 0xB3,
+0x84, 0x85, 0x94, 0x95, 0x86, 0x87, 0x96, 0x97, 0xA4, 0xA5, 0xB4, 0xB5, 
0xA6, 0xA7, 0xB6, 0xB7,
+0xC0, 0xC1, 0xD0, 0xD1, 0xC2, 0xC3, 0xD2, 0xD3, 0xE0, 0xE1, 0xF0, 0xF1, 
0xE2, 0xE3, 0xF2, 0xF3,
+0xC4, 0xC5, 0xD4, 0xD5, 0xC6, 0xC7, 0xD6, 0xD7, 0xE4, 0xE5, 0xF4, 0xF5, 
0xE6, 0xE7, 0xF6, 0xF7,
+0x88, 0x89, 0x98, 0x99, 0x8A, 0x8B, 0x9A, 0x9B, 0xA8, 0xA9, 0xB8, 0xB9, 
0xAA, 0xAB, 0xBA, 0xBB,
+0x8C, 0x8D, 0x9C, 0x9D, 0x8E, 0x8F, 0x9E, 0x9F, 0xAC, 0xAD, 0xBC, 0xBD, 
0xAE, 0xAF, 0xBE, 0xBF,
+0xC8, 0xC9, 0xD8, 0xD9, 0xCA, 0xCB, 0xDA, 0xDB, 0xE8, 0xE9, 0xF8, 0xF9, 
0xEA, 0xEB, 0xFA, 0xFB,
+0xCC, 0xCD, 0xDC, 0xDD, 0xCE, 0xCF, 0xDE, 0xDF, 0xEC, 0xED, 0xFC, 0xFD, 
0xEE, 0xEF, 0xFE, 0xFF
+};
+
+const uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] =
+{
+0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15, 0x40, 0x41, 0x44, 0x45, 
0x50, 0x51, 0x54, 0x55,
+0x02, 0x03, 0x06, 0x07, 0x12, 0x13, 0x16, 0x17, 0x42, 0x43, 0x46, 0x47, 
0x52, 0x53, 0x56, 0x57,
+0x08, 0x09, 0x0C, 0x0D, 0x18, 0x19, 0x1C, 0x1D, 0x48, 0x49, 0x4C, 0x4D, 
0x58, 0x59, 0x5C, 0x5D,
+0x0A, 0x0B, 0x0E, 0x0F, 0x1A, 0x1B, 0x1E, 0x1F, 0x4A, 0x4B, 0x4E, 0x4F, 
0x5A, 0x5B, 0x5E, 0x5F,
+0x20, 0x21, 0x24, 0x25, 0x30, 0x31, 0x34, 0x35, 0x60, 0x61, 0x64, 0x65, 
0x70, 0x71, 0x74, 0x75,
+0x22, 0x23, 0x26, 0x27, 0x32, 0x33, 0x36, 0x37, 0x62, 0x63, 0x66, 0x67, 
0x72, 0x73, 0x76, 0x77,
+0x28, 0x29, 0x2C, 0x2D, 0x38, 0x39, 0x3C, 0x3D, 0x68, 0x69, 0x6C, 0x6D, 
0x78, 0x79, 0x7C, 0x7D,
+0x2A, 0x2B, 0x2E, 0x2F, 0x3A, 0x3B, 0x3E, 0x3F, 0x6A, 0x6B, 0x6E, 0x6F, 
0x7A, 0x7B, 0x7E, 0x7F,
+0x80, 0x81, 0x84, 0x85, 0x90, 0x91, 0x94, 0x95, 0xC0, 0xC1, 0xC4, 0xC5, 
0xD0, 0xD1, 0xD4, 0xD5,
+0x82, 0x83, 0x86, 0x87, 0x92, 0x93, 0x96, 0x97, 0xC2, 0xC3, 0xC6, 0xC7, 
0xD2, 0xD3, 0xD6, 0xD7,
+0x88, 0x89, 0x8C, 0x8D, 0x98, 0x99, 0x9C, 0x9D, 0xC8, 0xC9, 0xCC, 0xCD, 
0xD8, 0xD9, 0xDC, 0xDD,
+0x8A, 0x8B, 0x8E, 0x8F, 0x9A, 0x9B, 0x9E, 0x9F, 0xCA, 0xCB, 0xCE, 0xCF, 
0xDA, 0xDB, 0xDE, 0xDF,
+0xA0, 0xA1, 0xA4, 0xA5, 0xB0, 0xB1, 0xB4, 0xB5, 0xE0, 0xE1, 0xE4, 0xE5, 
0xF0, 0xF1, 0xF4, 0xF5,
+0xA2, 0xA3, 0xA6, 0xA7, 0xB2, 0xB3, 0xB6, 0xB7, 0xE2, 0xE3, 0xE6, 0xE7, 
0xF2, 0xF3, 0xF6, 0xF7,
+0xA8, 0xA9, 0xAC, 0xAD, 0xB8, 0xB9, 0xBC, 0xBD, 0xE8, 0xE9, 0xEC, 0xED, 
0xF8, 0xF9, 0xFC, 0xFD,
+0xAA, 0xAB, 0xAE, 0xAF, 0xBA, 0xBB, 0xBE, 0xBF, 0xEA, 0xEB, 0xEE, 0xEF, 
0xFA, 0xFB, 0xFE, 0xFF
+};
+
 const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
 {
 0, 4, 0, 

[x265] remove m_immedVals

2016-05-12 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa <nakagawa...@oki.com>
# Date 1463052561 -32400
#  Thu May 12 20:29:21 2016 +0900
# Node ID 3d6c4c1fcb9923e8215aefae62bfeeb118e173c0
# Parent  a5362b9533f6a5b77740b4e8f97dba2555b6f929
remove m_immedVals

diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cppWed May 04 21:08:09 2016 +
+++ b/source/common/ipfilter.cppThu May 12 20:29:21 2016 +0900
@@ -365,10 +365,10 @@
 template
 void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t 
dstStride, int idxX, int idxY)
 {
-short immedVals[(64 + 8) * (64 + 8)];
+ALIGN_VAR_32(int16_t, immed[width * (height + N - 1)]);
 
-interp_horiz_ps_c<N, width, height>(src, srcStride, immedVals, width, 
idxX, 1);
-filterVertical_sp_c(immedVals + 3 * width, width, dst, dstStride, 
width, height, idxY);
+interp_horiz_ps_c<N, width, height>(src, srcStride, immed, width, idxX, 1);
+filterVertical_sp_c(immed + (N / 2 - 1) * width, width, dst, dstStride, 
width, height, idxY);
 }
 }
 
diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.cpp
--- a/source/common/predict.cpp Wed May 04 21:08:09 2016 +
+++ b/source/common/predict.cpp Thu May 12 20:29:21 2016 +0900
@@ -57,12 +57,10 @@
 
 Predict::Predict()
 {
-m_immedVals = NULL;
 }
 
 Predict::~Predict()
 {
-X265_FREE(m_immedVals);
 m_predShortYuv[0].destroy();
 m_predShortYuv[1].destroy();
 }
@@ -72,12 +70,8 @@
 m_csp = csp;
 m_hChromaShift = CHROMA_H_SHIFT(csp);
 m_vChromaShift = CHROMA_V_SHIFT(csp);
-CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1));
 
 return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && 
m_predShortYuv[1].create(MAX_CU_SIZE, csp);
-
-fail:
-return false;
 }
 
 void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, 
Yuv& predYuv, bool bLuma, bool bChroma)
@@ -258,8 +252,8 @@
 int partEnum = partitionFromSizes(pu.width, pu.height);
 const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + 
pu.puAbsPartIdx) + srcOffset;
 
-int xFrac = mv.x & 0x3;
-int yFrac = mv.y & 0x3;
+int xFrac = mv.x & 3;
+int yFrac = mv.y & 3;
 
 if (!(yFrac | xFrac))
 primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
@@ -280,14 +274,14 @@
 intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride;
 const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + 
pu.puAbsPartIdx) + srcOffset;
 
-int xFrac = mv.x & 0x3;
-int yFrac = mv.y & 0x3;
-
 int partEnum = partitionFromSizes(pu.width, pu.height);
 
 X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not 
divisible by 4\n");
 X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu 
size\n");
 
+int xFrac = mv.x & 3;
+int yFrac = mv.y & 3;
+
 if (!(yFrac | xFrac))
 primitives.pu[partEnum].convert_p2s(src, srcStride, dst, dstStride);
 else if (!yFrac)
@@ -296,11 +290,12 @@
 primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, 
yFrac);
 else
 {
-int tmpStride = pu.width;
-int filterSize = NTAPS_LUMA;
-int halfFilterSize = (filterSize >> 1);
-primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, 
tmpStride, xFrac, 1);
-primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) * 
tmpStride, tmpStride, dst, dstStride, yFrac);
+ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 
1)]);
+int immedStride = pu.width;
+int halfFilterSize = NTAPS_LUMA >> 1;
+
+primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, 
xFrac, 1);
+primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * 
immedStride, immedStride, dst, dstStride, yFrac);
 }
 }
 
@@ -309,10 +304,10 @@
 intptr_t dstStride = dstYuv.m_csize;
 intptr_t refStride = refPic.m_strideC;
 
-int shiftHor = (2 + m_hChromaShift);
-int shiftVer = (2 + m_vChromaShift);
+int mvx = mv.x << (1 - m_hChromaShift);
+int mvy = mv.y << (1 - m_vChromaShift);
 
-intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride;
+intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride;
 
 const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + 
pu.puAbsPartIdx) + refOffset;
 const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + 
pu.puAbsPartIdx) + refOffset;
@@ -320,11 +315,11 @@
 pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx);
 pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx);
 
-int xFrac = mv.x & ((1 << shiftHor) - 1);
-int yFrac = mv.y & ((1 << shiftVer) - 1);
+int partEnum = partitionFromSizes(pu.width, pu.height);
 
-int partEnum = partitionF

[x265] move tables from .h to .cpp

2016-03-28 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa <nakagawa...@oki.com>
# Date 1459165671 -32400
#  Mon Mar 28 20:47:51 2016 +0900
# Node ID 68099f6e3cd9a8911b6e559a0387c8ff485f5afe
# Parent  5dbd6a0c8e17481a0c4d31243ebc8b46ad59e15d
move tables from .h to .cpp

diff -r 5dbd6a0c8e17 -r 68099f6e3cd9 source/common/contexts.h
--- a/source/common/contexts.h  Mon Mar 28 12:53:40 2016 +0530
+++ b/source/common/contexts.h  Mon Mar 28 20:47:51 2016 +0900
@@ -117,196 +117,8 @@
 #define sbacGetEntropyBits(S, V) (g_entropyBits[(S) ^ (V)])
 #define sbacGetEntropyBitsTrm(V) (g_entropyBits[126 ^ (V)])
 
-#define MAX_NUM_CHANNEL_TYPE 2
+static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 
2, 3, 4, 5, 6 } };
 
-static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 
2, 3, 4, 5, 6 } };
-static const uint32_t significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] 
= { { 0,  9, 21 }, { 0,  9, 12 } };
-static const uint32_t significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3]  
= { { 9, 12,  6 }, { 9,  3,  3 } };
-static const uint32_t nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE]   
= {  6, 0  };
-static const uint32_t 
notFirstGroupNeighbourhoodContextOffset[MAX_NUM_CHANNEL_TYPE] = { 3, 0 };
-
-// initial probability for cu_transquant_bypass flag
-static const uint8_t 
INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
-{
-{ 154 },
-{ 154 },
-{ 154 },
-};
-
-// initial probability for split flag
-static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
-{
-{ 107,  139,  126, },
-{ 107,  139,  126, },
-{ 139,  141,  157, },
-};
-
-static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
-{
-{ 197,  185,  201, },
-{ 197,  185,  201, },
-{ CNU,  CNU,  CNU, },
-};
-
-static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
-{
-{ 154, },
-{ 110, },
-{ CNU, },
-};
-
-static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
-{
-{ 137, },
-{ 122, },
-{ CNU, },
-};
-
-static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
-{
-{ 154,  139,  154, 154 },
-{ 154,  139,  154, 154 },
-{ 184,  CNU,  CNU, CNU },
-};
-
-static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
-{
-{ 134, },
-{ 149, },
-{ CNU, },
-};
-
-static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
-{
-{ 183, },
-{ 154, },
-{ 184, },
-};
-
-static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
-{
-{ 152,  139, },
-{ 152,  139, },
-{  63,  139, },
-};
-
-static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
-{
-{  95,   79,   63,   31,  31, },
-{  95,   79,   63,   31,  31, },
-{ CNU,  CNU,  CNU,  CNU, CNU, },
-};
-
-static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
-{
-{ 169,  198, },
-{ 140,  198, },
-{ CNU,  CNU, },
-};
-
-static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
-{
-{ 153,  153 },
-{ 153,  153 },
-{ CNU,  CNU },
-};
-
-static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
-{
-{ 154,  154,  154, },
-{ 154,  154,  154, },
-{ 154,  154,  154, },
-};
-
-static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
-{
-{ 153,  111,  149,   92,  167,  154,  154 },
-{ 153,  111,  149,  107,  167,  154,  154 },
-{ 111,  141,   94,  138,  182,  154,  154 },
-};
-
-static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
-{
-{  79, },
-{  79, },
-{ CNU, },
-};
-
-static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
-{
-{ 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  
111,  111,   79,
-  108,  123,   93 },
-{ 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  
111,   95,   94,
-  108,  123,  108 },
-{ 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  
127,  111,   79,
-  108,  123,   63 },
-};
-
-static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
-{
-{ 121,  140,
-  61,  154, },
-{ 121,  140,
-  61,  154, },
-{  91,  171,
-   134,  141, },
-};
-
-static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
-{
-{ 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  
136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  
153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  
140,  151,  183,  140,  },
-{ 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  
136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  
153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  
140,  151,  183,  140,  },
-{ 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  
179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  
153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  
111,  136,  139,  111,  },
-};
-
-static

[x265] remove broadcast of non-leaf CBF

2016-03-24 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa <nakagawa...@oki.com>
# Date 1458869254 -32400
#  Fri Mar 25 10:27:34 2016 +0900
# Node ID a2acf2e18b329b39f9e69d2ea818d834179b8a77
# Parent  c8ec86965e546f271ef54bad508a82e8a4911008
remove broadcast of non-leaf CBF

diff -r c8ec86965e54 -r a2acf2e18b32 source/common/cudata.h
--- a/source/common/cudata.hFri Feb 19 14:36:52 2016 +0530
+++ b/source/common/cudata.hFri Mar 25 10:27:34 2016 +0900
@@ -247,7 +247,7 @@
 void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx);
 
 uint8_t  getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) 
const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; }
-uint8_t  getQtRootCbf(uint32_t absPartIdx) const   
  { if (m_chromaFormat == X265_CSP_I400) return m_cbf[0][absPartIdx] || false; 
else { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || 
m_cbf[2][absPartIdx];} }
+bool getQtRootCbf(uint32_t absPartIdx) const   
  { return (m_cbf[0][absPartIdx] || ((m_chromaFormat != X265_CSP_I400) && 
(m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]))); }
 int8_t   getRefQP(uint32_t currAbsIdxInCTU) const;
 uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, 
MVField (*candMvField)[2], uint8_t* candDir) const;
 void clipMv(MV& outMV) const;
diff -r c8ec86965e54 -r a2acf2e18b32 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cppFri Feb 19 14:36:52 2016 +0530
+++ b/source/encoder/entropy.cppFri Mar 25 10:27:34 2016 +0900
@@ -721,16 +721,12 @@
 bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
 if (!curDepth || !bSmallChroma)
 {
-if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1))
+uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - 
LOG2_UNIT_SIZE) * 2);
+if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
 codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
-if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1))
+if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
 codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
 }
-else
-{
-X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == 
cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match 
failure\n");
-X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == 
cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match 
failure\n");
-}
 
 if (subdiv)
 {
@@ -753,7 +749,7 @@
 X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been 
set\n");
 }
 else
-codeQtCbfLuma(cu, absPartIdx, curDepth);
+codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
 
 uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
 uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
@@ -874,7 +870,7 @@
 X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been 
set\n");
 }
 else
-codeQtCbfLuma(cu, absPartIdx, curDepth);
+codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
 
 uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
 
diff -r c8ec86965e54 -r a2acf2e18b32 source/encoder/entropy.h
--- a/source/encoder/entropy.h  Fri Feb 19 14:36:52 2016 +0530
+++ b/source/encoder/entropy.h  Fri Mar 25 10:27:34 2016 +0900
@@ -162,7 +162,6 @@
 
 void codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
 void codePredInfo(const CUData& cu, uint32_t absPartIdx);
-inline void codeQtCbfLuma(const CUData& cu, uint32_t absPartIdx, uint32_t 
tuDepth) { codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth), tuDepth); }
 
 void codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType 
ttype, uint32_t tuDepth, bool lowestLevel);
 void codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, 
const uint32_t depthRange[2]);
diff -r c8ec86965e54 -r a2acf2e18b32 source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Feb 19 14:36:52 2016 +0530
+++ b/source/encoder/search.cpp Fri Mar 25 10:27:34 2016 +0900
@@ -222,9 +222,10 @@
 
 if (!(log2TrSize - m_hChromaShift < 2))
 {
-if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
+uint32_t parentIdx = absPartIdx & (0xFF << (log2TrSize + 1 - 
LOG2_UNIT_SIZE) * 2);
+if (!tuDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, tuDepth - 1))
 m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, 
tuDepth, !subdiv);
-if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
+if (!tuDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, tuDepth - 1))
  

[x265] backout implicit inter TU split condition

2015-02-16 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1424139217 -32400
#  Tue Feb 17 11:13:37 2015 +0900
# Node ID cf88e808db61a66344978bcc6b16d19825f2ade2
# Parent  cbec71924b09f27e80f2c752caad9e0e7bf7878b
backout implicit inter TU split condition

diff -r cbec71924b09 -r cf88e808db61 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cppMon Feb 16 18:26:29 2015 +0530
+++ b/source/encoder/entropy.cppTue Feb 17 11:13:37 2015 +0900
@@ -694,7 +694,8 @@
 {
 X265_CHECK(subdiv, intra NxN requires TU depth below CU depth\n);
 }
-else if (cu.isInter(absPartIdx)  cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
+else if (cu.isInter(absPartIdx)  cu.m_partSize[absPartIdx] != SIZE_2Nx2N 

+ !curDepth  cu.m_slice-m_sps-quadtreeTUMaxDepthInter == 1)
 {
 X265_CHECK(subdiv, inter TU must be smaller than CU when not 2Nx2N 
part size: log2CurSize %d, depthRange[0] %d\n, log2CurSize, depthRange[0]);
 }
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] quant: add m_tqBypass

2015-01-28 Thread Satoshi Nakagawa

 this patch looks ok except for the fact that this assignment looks like
 it should be done in setupQPForQuant(cu) itself.

setQPForQuant() is written with intention in CTU level, but tqBypass control
is CU level.

# s/ctu/cu/ for setQPForQuant ?


 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Thursday, January 29, 2015 1:13 AM
 To: Development for x265
 Subject: Re: [x265] quant: add m_tqBypass
 
 On 01/28, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1422456196 -32400
  #  Wed Jan 28 23:43:16 2015 +0900
  # Node ID 231f1a91eaefdd5e79bc250b0c505178a89f185e
  # Parent  c1371f175178edcc0d0402a745b7478aa240c3b4
  quant: add m_tqBypass
 
  diff -r c1371f175178 -r 231f1a91eaef source/common/deblock.cpp
  --- a/source/common/deblock.cpp Mon Jan 26 15:31:42 2015 -0600
  +++ b/source/common/deblock.cpp Wed Jan 28 23:43:16 2015 +0900
  @@ -401,14 +401,22 @@
   if (!bs)
   continue;
 
  -int32_t qpQ = cuQ-m_qp[partQ];
  -
   // Derive neighboring PU index
   uint32_t partP;
   const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP,
  partQ) : cuQ-getPUAbove(partP, partQ));
 
  +if (bCheckNoFilter)
  +{
  +// check if each of PUs is lossless coded
  +maskP = cuP-m_tqBypass[partP] - 1;
  +maskQ = cuQ-m_tqBypass[partQ] - 1;
  +if (!(maskP | maskQ))
  +continue;
  +}
  +
  +int32_t qpQ = cuQ-m_qp[partQ];
   int32_t qpP = cuP-m_qp[partP];
  -int32_t qp = (qpP + qpQ + 1)  1;
  +int32_t qp  = (qpP + qpQ + 1)  1;
 
   int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
 
  @@ -428,13 +436,6 @@
   if (d = beta)
   continue;
 
  -if (bCheckNoFilter)
  -{
  -// check if each of PUs is lossless coded
  -maskP = (cuP-m_tqBypass[partP] ? 0 : -1);
  -maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1);
  -}
  -
   int32_t indexTC = x265_clip3(0, QP_MAX_SPEC +
 DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs -
 1) + tcOffset));
   int32_t tc = s_tcTable[indexTC]  bitdepthShift;
 
  @@ -506,33 +507,29 @@
   if (bs = 1)
   continue;
 
  -int32_t qpQ = cuQ-m_qp[partQ];
  -
   // Derive neighboring PU index
   uint32_t partP;
   const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP,
  partQ) : cuQ-getPUAbove(partP, partQ));
 
  -int32_t qpP = cuP-m_qp[partP];
  -
   if (bCheckNoFilter)
   {
   // check if each of PUs is lossless coded
   maskP = (cuP-m_tqBypass[partP] ? 0 : -1);
   maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1);
  +if (!(maskP | maskQ))
  +continue;
   }
 
  +int32_t qpQ = cuQ-m_qp[partQ];
  +int32_t qpP = cuP-m_qp[partP];
  +int32_t qpA = (qpP + qpQ + 1)  1;
  +
   intptr_t unitOffset = idx * srcStep  LOG2_UNIT_SIZE;
   for (uint32_t chromaIdx = 0; chromaIdx  2; chromaIdx++)
   {
  -int32_t chromaQPOffset  =
 pps-chromaQpOffset[chromaIdx];
  -int32_t qp = ((qpP + qpQ + 1)  1) + chromaQPOffset;
  +int32_t qp = qpA + pps-chromaQpOffset[chromaIdx];
   if (qp = 30)
  -{
  -if (chFmt == X265_CSP_I420)
  -qp = g_chromaScale[qp];
  -else
  -qp = X265_MIN(qp, 51);
  -}
  +qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] :
  + X265_MIN(qp, 51);
 
   int32_t indexTC = x265_clip3(0, QP_MAX_SPEC +
 DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET +
 tcOffset));
   const int32_t bitdepthShift = X265_DEPTH - 8; diff -r
  c1371f175178 -r 231f1a91eaef source/common/quant.cpp
  --- a/source/common/quant.cpp   Mon Jan 26 15:31:42 2015 -0600
  +++ b/source/common/quant.cpp   Wed Jan 28 23:43:16 2015 +0900
  @@ -169,6 +169,7 @@
   m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE
 * 2);
   m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
   m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE *
 MAX_TR_SIZE);
  +m_tqBypass = false;
 
   return m_resiDctCoeff  m_fencShortBuf;  } @@ -326,7 +327,7 @@
coeff_t* coeff, uint32_t log2TrSize,
  TextType ttype, uint32_t absPartIdx, bool useTransformSkip)  {
   const uint32_t sizeIdx = log2TrSize - 2;
  -if (cu.m_tqBypass[absPartIdx])
  +if (m_tqBypass)
   {
   X265_CHECK(log2TrSize = 2  log2TrSize = 5, Block size
 mistake!\n);
   return primitives.cu[sizeIdx].copy_cnt(coeff, residual,
  resiStride); @@ -406,11 +407,11 @@
   }
   }
 
  -void Quant

[x265] quant: add m_tqBypass

2015-01-28 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1422456196 -32400
#  Wed Jan 28 23:43:16 2015 +0900
# Node ID 231f1a91eaefdd5e79bc250b0c505178a89f185e
# Parent  c1371f175178edcc0d0402a745b7478aa240c3b4
quant: add m_tqBypass

diff -r c1371f175178 -r 231f1a91eaef source/common/deblock.cpp
--- a/source/common/deblock.cpp Mon Jan 26 15:31:42 2015 -0600
+++ b/source/common/deblock.cpp Wed Jan 28 23:43:16 2015 +0900
@@ -401,14 +401,22 @@
 if (!bs)
 continue;
 
-int32_t qpQ = cuQ-m_qp[partQ];
-
 // Derive neighboring PU index
 uint32_t partP;
 const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP, partQ) : 
cuQ-getPUAbove(partP, partQ));
 
+if (bCheckNoFilter)
+{
+// check if each of PUs is lossless coded
+maskP = cuP-m_tqBypass[partP] - 1;
+maskQ = cuQ-m_tqBypass[partQ] - 1;
+if (!(maskP | maskQ))
+continue;
+}
+
+int32_t qpQ = cuQ-m_qp[partQ];
 int32_t qpP = cuP-m_qp[partP];
-int32_t qp = (qpP + qpQ + 1)  1;
+int32_t qp  = (qpP + qpQ + 1)  1;
 
 int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
 
@@ -428,13 +436,6 @@
 if (d = beta)
 continue;
 
-if (bCheckNoFilter)
-{
-// check if each of PUs is lossless coded
-maskP = (cuP-m_tqBypass[partP] ? 0 : -1);
-maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1);
-}
-
 int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, 
int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
 int32_t tc = s_tcTable[indexTC]  bitdepthShift;
 
@@ -506,33 +507,29 @@
 if (bs = 1)
 continue;
 
-int32_t qpQ = cuQ-m_qp[partQ];
-
 // Derive neighboring PU index
 uint32_t partP;
 const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP, partQ) : 
cuQ-getPUAbove(partP, partQ));
 
-int32_t qpP = cuP-m_qp[partP];
-
 if (bCheckNoFilter)
 {
 // check if each of PUs is lossless coded
 maskP = (cuP-m_tqBypass[partP] ? 0 : -1);
 maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1);
+if (!(maskP | maskQ))
+continue;
 }
 
+int32_t qpQ = cuQ-m_qp[partQ];
+int32_t qpP = cuP-m_qp[partP];
+int32_t qpA = (qpP + qpQ + 1)  1;
+
 intptr_t unitOffset = idx * srcStep  LOG2_UNIT_SIZE;
 for (uint32_t chromaIdx = 0; chromaIdx  2; chromaIdx++)
 {
-int32_t chromaQPOffset  = pps-chromaQpOffset[chromaIdx];
-int32_t qp = ((qpP + qpQ + 1)  1) + chromaQPOffset;
+int32_t qp = qpA + pps-chromaQpOffset[chromaIdx];
 if (qp = 30)
-{
-if (chFmt == X265_CSP_I420)
-qp = g_chromaScale[qp];
-else
-qp = X265_MIN(qp, 51);
-}
+qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, 
51);
 
 int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + 
DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
 const int32_t bitdepthShift = X265_DEPTH - 8;
diff -r c1371f175178 -r 231f1a91eaef source/common/quant.cpp
--- a/source/common/quant.cpp   Mon Jan 26 15:31:42 2015 -0600
+++ b/source/common/quant.cpp   Wed Jan 28 23:43:16 2015 +0900
@@ -169,6 +169,7 @@
 m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
 m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
 m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
+m_tqBypass = false;
 
 return m_resiDctCoeff  m_fencShortBuf;
 }
@@ -326,7 +327,7 @@
  coeff_t* coeff, uint32_t log2TrSize, TextType 
ttype, uint32_t absPartIdx, bool useTransformSkip)
 {
 const uint32_t sizeIdx = log2TrSize - 2;
-if (cu.m_tqBypass[absPartIdx])
+if (m_tqBypass)
 {
 X265_CHECK(log2TrSize = 2  log2TrSize = 5, Block size 
mistake!\n);
 return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
@@ -406,11 +407,11 @@
 }
 }
 
-void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t 
resiStride, const coeff_t* coeff,
+void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const 
coeff_t* coeff,
 uint32_t log2TrSize, TextType ttype, bool bIntra, 
bool useTransformSkip, uint32_t numSig)
 {
 const uint32_t sizeIdx = log2TrSize - 2;
-if (transQuantBypass)
+if (m_tqBypass)
 {
 primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
 return;
diff -r c1371f175178 -r 231f1a91eaef source/common/quant.h
--- a/source/common/quant.h Mon Jan 26 15:31:42 2015 -0600
+++ b/source/common/quant.h Wed Jan 28 23:43:16 2015 +0900
@@ -93,6 +93,7 @@
 
 NoiseReduction*m_nr

[x265] more use CUGeom

2015-01-17 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1421487172 -32400
#  Sat Jan 17 18:32:52 2015 +0900
# Node ID 270c9786681069d34c8eb709b74412843e37373a
# Parent  65e71f08c55a0e9303d51691b3435cb5fdf6c6a1
more use CUGeom

diff -r 65e71f08c55a -r 270c97866810 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Sat Jan 17 10:12:34 2015 +0530
+++ b/source/common/cudata.cpp  Sat Jan 17 18:32:52 2015 +0900
@@ -57,51 +57,51 @@
 void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
 
 /* Check whether 2 addresses point to the same column */
-inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow)
+inline bool isEqualCol(int addrA, int addrB, int numUnits)
 {
-// addrA % numUnitsPerRow == addrB % numUnitsPerRow
-return ((addrA ^ addrB)   (numUnitsPerRow - 1)) == 0;
+// addrA % numUnits == addrB % numUnits
+return ((addrA ^ addrB)   (numUnits - 1)) == 0;
 }
 
 /* Check whether 2 addresses point to the same row */
-inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow)
+inline bool isEqualRow(int addrA, int addrB, int numUnits)
 {
-// addrA / numUnitsPerRow == addrB / numUnitsPerRow
-return ((addrA ^ addrB)  ~(numUnitsPerRow - 1)) == 0;
+// addrA / numUnits == addrB / numUnits
+return ((addrA ^ addrB)  ~(numUnits - 1)) == 0;
 }
 
 /* Check whether 2 addresses point to the same row or column */
-inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow)
+inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits)
 {
-return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, 
numUnitsPerRow);
+return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, 
numUnits);
 }
 
 /* Check whether one address points to the first column */
-inline bool isZeroCol(int addr, int numUnitsPerRow)
+inline bool isZeroCol(int addr, int numUnits)
 {
-// addr % numUnitsPerRow == 0
-return (addr  (numUnitsPerRow - 1)) == 0;
+// addr % numUnits == 0
+return (addr  (numUnits - 1)) == 0;
 }
 
 /* Check whether one address points to the first row */
-inline bool isZeroRow(int addr, int numUnitsPerRow)
+inline bool isZeroRow(int addr, int numUnits)
 {
-// addr / numUnitsPerRow == 0
-return (addr  ~(numUnitsPerRow - 1)) == 0;
+// addr / numUnits == 0
+return (addr  ~(numUnits - 1)) == 0;
 }
 
 /* Check whether one address points to a column whose index is smaller than a 
given value */
-inline bool lessThanCol(int addr, int val, int numUnitsPerRow)
+inline bool lessThanCol(int addr, int val, int numUnits)
 {
-// addr % numUnitsPerRow  val
-return (addr  (numUnitsPerRow - 1))  val;
+// addr % numUnits  val
+return (addr  (numUnits - 1))  val;
 }
 
 /* Check whether one address points to a row whose index is smaller than a 
given value */
-inline bool lessThanRow(int addr, int val, int numUnitsPerRow)
+inline bool lessThanRow(int addr, int val, int numUnits)
 {
-// addr / numUnitsPerRow  val
-return addr  val * numUnitsPerRow;
+// addr / numUnits  val
+return addr  val * numUnits;
 }
 
 inline MV scaleMv(MV mv, int scale)
@@ -1533,17 +1533,17 @@
 m_encData-getPicCTU(m_cuAddr)-m_cuPelY + 
g_zscanToPelY[partIdxRB] + UNIT_SIZE  m_slice-m_sps-picHeightInLumaSamples)
 {
 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
-uint32_t numPartInCUSize = s_numPartInCUSize;
-bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, 
numPartInCUSize); // is not at the last column of CTU
-bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, 
numPartInCUSize); // is not at the last rowof CTU
+uint32_t numUnits = s_numPartInCUSize;
+bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, 
numUnits); // is not at the last column of CTU
+bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, 
numUnits); // is not at the last rowof CTU
 
 if (bNotLastCol  bNotLastRow)
 {
-absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 
1];
+absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1];
 ctuIdx = m_cuAddr;
 }
 else if (bNotLastCol)
-absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize 
+ 1)  (numPartInCUSize - 1)];
+absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1)  
(numUnits - 1)];
 else if (bNotLastRow)
 {
 absPartAddr = g_rasterToZscan[absPartIdxRB + 1];
@@ -1760,17 +1760,17 @@
 m_encData-getPicCTU(m_cuAddr)-m_cuPelY + 
g_zscanToPelY[partIdxRB] + UNIT_SIZE  m_slice-m_sps-picHeightInLumaSamples)
 {
 uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
-uint32_t numPartInCUSize = s_numPartInCUSize;
-bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1

Re: [x265] [PATCH] slicetype: allow queue to fill past full to prevent bottlenecks

2015-01-08 Thread Satoshi Nakagawa
Steve,

This patch cause deadlock/freeze in short clips, shorter than lookahead.

# my test script often use -f 17

Please check.

Satoshi

 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Tuesday, January 06, 2015 9:23 PM
 To: x265-devel@videolan.org
 Subject: [x265] [PATCH] slicetype: allow queue to fill past full to
 prevent bottlenecks
 
 # HG changeset patch
 # User Steve Borho st...@borho.org
 # Date 1420538938 -19800
 #  Tue Jan 06 15:38:58 2015 +0530
 # Node ID d36211d0190f5aafdf7ecf6657e8d1a5ba14657c
 # Parent  95f1e1f0efa4541e253125e7f564ecfbf8e647f9
 slicetype: allow queue to fill past full to prevent bottlenecks
 
 Allow the lookahead to grow just past full before we begin pulling off
 output frames and handing them to frame encoders.  This lag of about one
 mini-gop allows slicetypeDecide to stay ahead of the frame encoders and
 always have frames in the output queue when they are needed.  It's a
 non-trivial performance boost for most presets that used b-adapt 2.
 
 diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/encoder.cpp
 --- a/source/encoder/encoder.cpp  Tue Jan 06 12:33:36 2015 +0530
 +++ b/source/encoder/encoder.cpp  Tue Jan 06 15:38:58 2015 +0530
 @@ -291,10 +291,7 @@
  delete [] m_threadLocalData;
 
  if (m_lookahead)
 -{
 -m_lookahead-destroy();
 -delete m_lookahead;
 -}
 +m_lookahead-stop();
 
  delete m_dpb;
  if (m_rateControl)
 @@ -302,10 +299,17 @@
  m_rateControl-destroy();
  delete m_rateControl;
  }
 +
  // thread pool release should always happen last
  if (m_threadPool)
  m_threadPool-release();
 
 +if (m_lookahead)
 +{
 +m_lookahead-destroy();
 +delete m_lookahead;
 +}
 +
  X265_FREE(m_cuOffsetY);
  X265_FREE(m_cuOffsetC);
  X265_FREE(m_buOffsetY);
 diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/slicetype.cpp
 --- a/source/encoder/slicetype.cppTue Jan 06 12:33:36 2015 +0530
 +++ b/source/encoder/slicetype.cppTue Jan 06 15:38:58 2015 +0530
 @@ -59,11 +59,12 @@
  : JobProvider(pool)
  , m_est(pool)
  {
 -m_bReady = 0;
 +m_bReady = false;
 +m_bBusy = false;
  m_param = param;
  m_lastKeyframe = -m_param-keyframeMax;
  m_lastNonB = NULL;
 -m_bFilling = true;
 +m_bFilled = false;
  m_bFlushed = false;
  m_widthInCU = ((m_param-sourceWidth / 2) + X265_LOWRES_CU_SIZE -
 1)  X265_LOWRES_CU_BITS;
  m_heightInCU = ((m_param-sourceHeight / 2) + X265_LOWRES_CU_SIZE
 - 1)  X265_LOWRES_CU_BITS; @@ -79,17 +80,26 @@
  ((m_param-bFrameAdaptive  m_param-bframes) ||
   m_param-rc.cuTree || m_param-scenecutThreshold ||
   (m_param-lookaheadDepth  m_param-rc.vbvBufferSize)))
 -m_pool = m_pool; /* allow use of worker thread */
 +{
 +JobProvider::enqueue();
 +}
  else
  m_pool = NULL; /* disable use of worker thread */  }
 
 +void Lookahead::stop()
 +{
 +/* do not allow slicetypeDecide() to get started again */
 +m_bReady = false;
 +m_bFlushed = false;
 +m_bBusy = false;
 +
 +if (m_pool)
 +JobProvider::flush(); // flush will dequeue, if it is necessary
 +}
 +
  void Lookahead::destroy()
  {
 -if (m_pool)
 -// flush will dequeue, if it is necessary
 -JobProvider::flush();
 -
  // these two queues will be empty unless the encode was aborted
  while (!m_inputQueue.empty())
  {
 @@ -120,47 +130,52 @@
 
  if (m_inputQueue.size() = m_param-lookaheadDepth)
  {
 -/* when queue fills the first time, run slicetypeDecide
 synchronously,
 - * since the encoder will always be blocked here */
 -if (m_pool  !m_bFilling)
 +if (m_pool)
  {
 +m_bReady = !m_bBusy;
  m_inputQueueLock.release();
 -m_bReady = 1;
  m_pool-pokeIdleThread();
  }
  else
  slicetypeDecide();
 -
 -if (m_bFilling  m_pool)
 -JobProvider::enqueue();
 -m_bFilling = false;
  }
  else
  m_inputQueueLock.release();
 +
 +/* determine if the lookahead is (over) filled enough for frames
 to begin to
 + * be consumed by frame encoders */
 +if (!m_bFilled)
 +{
 +if (!m_param-bframes  !m_param-lookaheadDepth)
 +m_bFilled = true; /* zero-latency */
 +else if (curFrame-m_poc = m_param-lookaheadDepth + 2 +
 m_param-bframes)
 +m_bFilled = true; /* full capacity plus mini-gop lag */
 +}
  }
 
  /* Called by API thread */
  void Lookahead::flush()
  {
 +m_bFilled = true;
 +
  /* just in case the input queue is never allowed to fill */
 -m_bFilling = false;
 -
 -/* flush synchronously */
  m_inputQueueLock.acquire();
 -if (!m_inputQueue.empty())
 +if (m_inputQueue.empty())
  {
 -slicetypeDecide();
 +   

[x265] slicetype: fix flush

2015-01-08 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1420711444 -32400
#  Thu Jan 08 19:04:04 2015 +0900
# Node ID 25fb38350e81cda31a5e4af4f2814d12b968a8d1
# Parent  6dce2b87f0fe4aa37f9c7d66ec99447919b19c64
slicetype: fix flush

diff -r 6dce2b87f0fe -r 25fb38350e81 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp  Thu Jan 08 10:29:09 2015 +0530
+++ b/source/encoder/slicetype.cpp  Thu Jan 08 19:04:04 2015 +0900
@@ -66,6 +66,7 @@
 m_lastNonB = NULL;
 m_bFilled = false;
 m_bFlushed = false;
+m_bFlush = false;
 m_widthInCU = ((m_param-sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1)  
X265_LOWRES_CU_BITS;
 m_heightInCU = ((m_param-sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1)  
X265_LOWRES_CU_BITS;
 m_scratch = (int*)x265_malloc(m_widthInCU * sizeof(int));
@@ -92,6 +93,7 @@
 /* do not allow slicetypeDecide() to get started again */
 m_bReady = false;
 m_bFlushed = false;
+m_bFlush = false;
 m_bBusy = false;
 
 if (m_pool)
@@ -156,6 +158,7 @@
 /* Called by API thread */
 void Lookahead::flush()
 {
+m_bFlush = true;
 m_bFilled = true;
 
 /* just in case the input queue is never allowed to fill */
@@ -233,7 +236,7 @@
 break;
 }
 while (m_inputQueue.size() = m_param-lookaheadDepth ||
-   (m_bFlushed  m_inputQueue.size()));
+   (m_bFlush  m_inputQueue.size()));
 
 m_bBusy = false;
 m_inputQueueLock.release();
diff -r 6dce2b87f0fe -r 25fb38350e81 source/encoder/slicetype.h
--- a/source/encoder/slicetype.hThu Jan 08 10:29:09 2015 +0530
+++ b/source/encoder/slicetype.hThu Jan 08 19:04:04 2015 +0900
@@ -163,6 +163,7 @@
 bool  m_bBusy;/* input lock - slicetypeDecide() is running */
 bool  m_bFilled;  /* enough frames in lookahead for output to be available 
*/
 bool  m_bFlushed; /* no more frames will be received */
+bool  m_bFlush;
 
 bool  findJob(int);
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] sao: minimize skipped lines [CHANGES OUTPUT]

2015-01-05 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1420337650 -32400
#  Sun Jan 04 11:14:10 2015 +0900
# Node ID 78cf196b3982a327cd38a5f89fcc43fdb94fe5a5
# Parent  f255e8d06423231cb8c58ab5d3b10de7fb27b424
sao: minimize skipped lines [CHANGES OUTPUT]

diff -r f255e8d06423 -r 78cf196b3982 source/encoder/sao.cpp
--- a/source/encoder/sao.cppFri Jan 02 18:22:38 2015 +0530
+++ b/source/encoder/sao.cppSun Jan 04 11:14:10 2015 +0900
@@ -605,8 +605,8 @@
 int32_t* stats;
 int32_t* count;
 
-int skipB = plane ? 2 : 4;
-int skipR = plane ? 3 : 5;
+int skipR, skipB;
+int skipD = plane ? 1 : 3;
 
 int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
 int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
@@ -615,11 +615,9 @@
 {
 const int boShift = X265_DEPTH - SAO_BO_BITS;
 
-if (m_param-bSaoNonDeblocked)
-{
-skipB = plane ? 1 : 3;
-skipR = plane ? 2 : 4;
-}
+skipR = skipD;
+skipB = skipD;
+
 stats = m_offsetOrg[plane][SAO_BO];
 count = m_count[plane][SAO_BO];
 
@@ -646,11 +644,9 @@
 {
 // SAO_EO_0: // dir: -
 {
-if (m_param-bSaoNonDeblocked)
-{
-skipB = plane ? 1 : 3;
-skipR = plane ? 3 : 5;
-}
+skipR = skipD + 1;
+skipB = skipD;
+
 stats = m_offsetOrg[plane][SAO_EO_0];
 count = m_count[plane][SAO_EO_0];
 
@@ -679,11 +675,9 @@
 
 // SAO_EO_1: // dir: |
 {
-if (m_param-bSaoNonDeblocked)
-{
-skipB = plane ? 2 : 4;
-skipR = plane ? 2 : 4;
-}
+skipR = skipD;
+skipB = skipD + 1;
+
 stats = m_offsetOrg[plane][SAO_EO_1];
 count = m_count[plane][SAO_EO_1];
 
@@ -726,11 +720,9 @@
 
 // SAO_EO_2: // dir: 135
 {
-if (m_param-bSaoNonDeblocked)
-{
-skipB = plane ? 2 : 4;
-skipR = plane ? 3 : 5;
-}
+skipR = skipD + 1;
+skipB = skipD + 1;
+
 stats = m_offsetOrg[plane][SAO_EO_2];
 count = m_count[plane][SAO_EO_2];
 
@@ -772,11 +764,9 @@
 
 // SAO_EO_3: // dir: 45
 {
-if (m_param-bSaoNonDeblocked)
-{
-skipB = plane ? 2 : 4;
-skipR = plane ? 3 : 5;
-}
+skipR = skipD + 1;
+skipB = skipD + 1;
+
 stats = m_offsetOrg[plane][SAO_EO_3];
 count = m_count[plane][SAO_EO_3];
 
@@ -846,7 +836,8 @@
 int32_t* stats;
 int32_t* count;
 
-int skipB, skipR;
+int skipR, skipB;
+int skipD = 3;
 
 int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
 int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
@@ -861,6 +852,7 @@
 if (plane == 1)
 {
 stride = frame-m_reconPic-m_strideC;
+skipD = 1;
 picWidth  = m_hChromaShift;
 picHeight = m_vChromaShift;
 ctuWidth  = m_hChromaShift;
@@ -873,8 +865,8 @@
 
 // SAO_BO:
 
-skipB = plane ? 1 : 3;
-skipR = plane ? 2 : 4;
+skipR = skipD;
+skipB = skipD;
 
 stats = m_offsetOrgPreDblk[addr][plane][SAO_BO];
 count = m_countPreDblk[addr][plane][SAO_BO];
@@ -902,8 +894,8 @@
 
 // SAO_EO_0: // dir: -
 {
-skipB = plane ? 1 : 3;
-skipR = plane ? 3 : 5;
+skipR = skipD + 1;
+skipB = skipD;
 
 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0];
 count = m_countPreDblk[addr][plane][SAO_EO_0];
@@ -938,8 +930,8 @@
 
 // SAO_EO_1: // dir: |
 {
-skipB = plane ? 2 : 4;
-skipR = plane ? 2 : 4;
+skipR = skipD;
+skipB = skipD + 1;
 
 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1];
 count = m_countPreDblk[addr][plane][SAO_EO_1];
@@ -983,8 +975,8 @@
 
 // SAO_EO_2: // dir: 135
 {
-skipB = plane ? 2 : 4;
-skipR = plane ? 3 : 5;
+skipR = skipD + 1;
+skipB = skipD + 1;
 
 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2];
 count = m_countPreDblk[addr][plane][SAO_EO_2];
@@ -1035,8 +1027,8 @@
 
 // SAO_EO_3: // dir: 45
 {
-skipB = plane ? 2 : 4;
-skipR = plane ? 3 : 5;
+skipR = skipD + 1;
+skipB = skipD + 1;
 
 stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3];
 count = m_countPreDblk[addr][plane][SAO_EO_3];
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix weightCost() [CHANGES OUTPUT]

2015-01-05 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1420511389 -32400
#  Tue Jan 06 11:29:49 2015 +0900
# Node ID a260403b0d21cd2948fb2546997269c102249369
# Parent  f255e8d06423231cb8c58ab5d3b10de7fb27b424
fix weightCost() [CHANGES OUTPUT]

diff -r f255e8d06423 -r a260403b0d21 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp   Fri Jan 02 18:22:38 2015 +0530
+++ b/source/encoder/weightPrediction.cpp   Tue Jan 06 11:29:49 2015 +0900
@@ -193,9 +193,9 @@
 if (bLuma)
 {
 int cu = 0;
-for (int y = 8; y  height; y += 8, r += 8 * stride, f += 8 * stride)
+for (int y = 0; y  height; y += 8, r += 8 * stride, f += 8 * stride)
 {
-for (int x = 8; x  width; x += 8, cu++)
+for (int x = 0; x  width; x += 8, cu++)
 {
 int cmp = primitives.satd[LUMA_8x8](r + x, stride, f + x, 
stride);
 cost += X265_MIN(cmp, cache.intraCost[cu]);
@@ -203,12 +203,12 @@
 }
 }
 else if (cache.csp == X265_CSP_I444)
-for (int y = 16; y  height; y += 16, r += 16 * stride, f += 16 * 
stride)
-for (int x = 16; x  width; x += 16)
+for (int y = 0; y  height; y += 16, r += 16 * stride, f += 16 * 
stride)
+for (int x = 0; x  width; x += 16)
 cost += primitives.satd[LUMA_16x16](r + x, stride, f + x, 
stride);
 else
-for (int y = 8; y  height; y += 8, r += 8 * stride, f += 8 * stride)
-for (int x = 8; x  width; x += 8)
+for (int y = 0; y  height; y += 8, r += 8 * stride, f += 8 * stride)
+for (int x = 0; x  width; x += 8)
 cost += primitives.satd[LUMA_8x8](r + x, stride, f + x, 
stride);
 
 return cost;
@@ -381,9 +381,9 @@
 break;
 
 case 2:
-fref = refFrame-m_fencPic-m_picOrg[2];
 orig = fencPic-m_picOrg[2];
 stride = fencPic-m_strideC;
+fref = refFrame-m_fencPic-m_picOrg[2];
 width =  ((fencPic-m_picWidth   4)  4)  cache.hshift;
 height = ((fencPic-m_picHeight  4)  4)  cache.vshift;
 if (mvs)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] refine intra neighbors

2014-12-24 Thread Satoshi Nakagawa
code maintainability may be improved.

# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1419480956 -32400
#  Thu Dec 25 13:15:56 2014 +0900
# Node ID d400c836b3796e68bb08538a5c20f16f8966ee18
# Parent  5f9f7194267b76f733e9ffb0f9e8b474dfe89a71
refine intra neighbors

diff -r 5f9f7194267b -r d400c836b379 source/common/common.h
--- a/source/common/common.hTue Dec 23 17:40:53 2014 +0900
+++ b/source/common/common.hThu Dec 25 13:15:56 2014 +0900
@@ -163,6 +163,9 @@
 templatetypename T
 inline T x265_max(T a, T b) { return a  b ? a : b; }
 
+templatetypename T
+inline T x265_clip3(T minVal, T maxVal, T a) { return 
x265_min(x265_max(minVal, a), maxVal); }
+
 typedef int16_t  coeff_t;  // transform coefficient
 
 #define X265_MIN(a, b) ((a)  (b) ? (a) : (b))
diff -r 5f9f7194267b -r d400c836b379 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Tue Dec 23 17:40:53 2014 +0900
+++ b/source/common/cudata.cpp  Thu Dec 25 13:15:56 2014 +0900
@@ -608,7 +608,7 @@
 {
 if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT - 
s_numPartInCUSize + 1])
 {
-uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
+uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - 
s_numPartInCUSize + 1];
 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, 
s_numPartInCUSize))
 return m_encData-getPicCTU(m_cuAddr);
@@ -689,8 +689,6 @@
 return NULL;
 }
 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * 
s_numPartInCUSize - 1];
-if (!m_cuLeft || !m_cuLeft-m_slice)
-return NULL;
 return m_cuLeft;
 }
 
@@ -723,8 +721,6 @@
 return NULL;
 }
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - 
s_numPartInCUSize + partUnitOffset];
-if (!m_cuAbove || !m_cuAbove-m_slice)
-return NULL;
 return m_cuAbove;
 }
 
@@ -732,8 +728,6 @@
 return NULL;
 
 arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + 
partUnitOffset - 1];
-if ((m_cuAboveRight == NULL || m_cuAboveRight-m_slice == NULL || 
(m_cuAboveRight-m_cuAddr)  m_cuAddr))
-return NULL;
 return m_cuAboveRight;
 }
 
@@ -904,7 +898,7 @@
 tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize;
 tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize;
 
-tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - 
(m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1]));
+tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize 
- (m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag));
 }
 
 void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t 
absPartIdx) const
@@ -916,7 +910,7 @@
 tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize;
 tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize;
 
-tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - 
(quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1]));
+tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize 
- (quadtreeTUMaxDepth - 1 + splitFlag));
 }
 
 uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
@@ -1363,14 +1357,6 @@
 return outPartIdxRB;
 }
 
-void CUData::deriveLeftRightTopIdxAdi(uint32_t outPartIdxLT, uint32_t 
outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const
-{
-uint32_t numPartInWidth = 1  (m_log2CUSize[0] - LOG2_UNIT_SIZE - 
partDepth);
-
-outPartIdxLT = m_absIdxInCTU + partOffset;
-outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + 
numPartInWidth - 1];
-}
-
 bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData candCU, 
uint32_t candAbsPartIdx) const
 {
 if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
diff -r 5f9f7194267b -r d400c836b379 source/common/cudata.h
--- a/source/common/cudata.hTue Dec 23 17:40:53 2014 +0900
+++ b/source/common/cudata.hThu Dec 25 13:15:56 2014 +0900
@@ -212,7 +212,6 @@
 
 void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) 
const;
 int  getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* 
intraDirPred) const;
-void deriveLeftRightTopIdxAdi(uint32_t partIdxLT, uint32_t 
partIdxRT, uint32_t partOffset, uint32_t partDepth) const;
 
 uint32_t getSCUAddr() const  { return (m_cuAddr  
g_maxFullDepth * 2) + m_absIdxInCTU; }
 uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const;
diff -r 5f9f7194267b -r d400c836b379 source/common/predict.cpp
--- a/source/common/predict.cpp Tue Dec 23 17:40:53 2014 +0900
+++ b/source/common/predict.cpp Thu Dec 25 13:15:56 2014 +0900
@@ -654,11 +654,8 @@
 }
 }
 
-void Predict::initAdiPattern(const CUData

[x265] rdcost: unify scaleChromaDist*()

2014-12-23 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1419324053 -32400
#  Tue Dec 23 17:40:53 2014 +0900
# Node ID 36bde0fab6510684879e6ad996ab7d5acab86a5e
# Parent  9fdab427a1918939293539f07b49ce77c5104912
rdcost: unify scaleChromaDist*()

diff -r 9fdab427a191 -r 36bde0fab651 source/encoder/rdcost.h
--- a/source/encoder/rdcost.h   Tue Dec 23 12:17:08 2014 +0530
+++ b/source/encoder/rdcost.h   Tue Dec 23 17:40:53 2014 +0900
@@ -37,15 +37,12 @@
 /* all weights and factors stored as FIX8 */
 uint64_t  m_lambda2;
 uint64_t  m_lambda;
-uint64_t  m_cbDistortionWeight;
-uint64_t  m_crDistortionWeight;
+uint32_t  m_chromaDistWeight[2];
 uint32_t  m_psyRdBase;
 uint32_t  m_psyRd;
 int   m_qp;
 
 void setPsyRdScale(double scale){ m_psyRdBase = 
(uint32_t)floor(256.0 * scale * 0.33); }
-void setCbDistortionWeight(uint16_t weightFix8) { m_cbDistortionWeight = 
weightFix8; }
-void setCrDistortionWeight(uint16_t weightFix8) { m_crDistortionWeight = 
weightFix8; }
 
 void setQP(const Slice slice, int qp)
 {
@@ -62,7 +59,7 @@
 qpCb = X265_MIN(qp + slice.m_pps-chromaQpOffset[0], QP_MAX_SPEC);
 int chroma_offset_idx = X265_MIN(qp - qpCb + 12, 
MAX_CHROMA_LAMBDA_OFFSET);
 uint16_t lambdaOffset = m_psyRd ? 
x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
-setCbDistortionWeight(lambdaOffset);
+m_chromaDistWeight[0] = lambdaOffset;
 
 if (slice.m_sps-chromaFormatIdc == X265_CSP_I420)
 qpCr = Clip3(QP_MIN, QP_MAX_MAX, (int)g_chromaScale[qp + 
slice.m_pps-chromaQpOffset[0]]);
@@ -70,7 +67,7 @@
 qpCr = X265_MIN(qp + slice.m_pps-chromaQpOffset[0], QP_MAX_SPEC);
 chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET);
 lambdaOffset = m_psyRd ? 
x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
-setCrDistortionWeight(lambdaOffset);
+m_chromaDistWeight[1] = lambdaOffset;
 }
 
 void setLambda(double lambda2, double lambda)
@@ -82,7 +79,7 @@
 inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits) const
 {
 X265_CHECK(bits = (UINT64_MAX - 128) / m_lambda2,
-   calcRdCost wrap detected dist: %d, bits %d, lambda: %d\n, 
distortion, bits, (int)m_lambda2);
+   calcRdCost wrap detected dist: %u, bits %u, lambda: 
X265_LL\n, distortion, bits, m_lambda2);
 return distortion + ((bits * m_lambda2 + 128)  8);
 }
 
@@ -107,22 +104,15 @@
 inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const
 {
 X265_CHECK(bits = (UINT64_MAX - 128) / m_lambda,
-   calcRdSADCost wrap detected dist: %d, bits %d, lambda: 
X265_LL\n, sadCost, bits, m_lambda);
+   calcRdSADCost wrap detected dist: %u, bits %u, lambda: 
X265_LL\n, sadCost, bits, m_lambda);
 return sadCost + ((bits * m_lambda + 128)  8);
 }
 
-inline uint32_t scaleChromaDistCb(uint32_t dist) const
+inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const
 {
-X265_CHECK(dist = (UINT64_MAX - 128) / m_cbDistortionWeight,
-   scaleChromaDistCb wrap detected dist: %d, lambda: 
X265_LL\n, dist, m_cbDistortionWeight);
-return (uint32_t)(((dist * m_cbDistortionWeight) + 128)  8);
-}
-
-inline uint32_t scaleChromaDistCr(uint32_t dist) const
-{
-X265_CHECK(dist = (UINT64_MAX - 128) / m_crDistortionWeight,
-   scaleChromaDistCr wrap detected dist: %d, lambda: 
X265_LL\n, dist, m_crDistortionWeight);
-return (uint32_t)(((dist * m_crDistortionWeight) + 128)  8);
+X265_CHECK(dist = (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
+   scaleChromaDist wrap detected dist: %u, lambda: %u\n, 
dist, m_chromaDistWeight[plane - 1]);
+return (uint32_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 
128)  8);
 }
 
 inline uint32_t getCost(uint32_t bits) const
diff -r 9fdab427a191 -r 36bde0fab651 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Dec 23 12:17:08 2014 +0530
+++ b/source/encoder/search.cpp Tue Dec 23 17:40:53 2014 +0900
@@ -813,7 +813,6 @@
 
 primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
 uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, 
stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
-uint32_t tmpDist;
 if (numSig)
 {
 m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, 
coeffC, log2TrSizeC, ttype, true, false, numSig);
@@ -827,8 +826,7 @@
 cu.setCbfPartRange(0, ttype, absPartIdxC, 
tuIterator.absPartIdxStep);
 }
 
-tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, 
fenc, stride);
-outDist += (ttype == TEXT_CHROMA_U) ? 
m_rdCost.scaleChromaDistCb(tmpDist) : m_rdCost.scaleChromaDistCr

[x265] refine intra neighbors

2014-12-22 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1419313799 -32400
#  Tue Dec 23 14:49:59 2014 +0900
# Node ID 6b59452a17d75c42c1750d47e2318c8da80c39fb
# Parent  8d2f418829c894c25da79daa861f16c61e5060d7
refine intra neighbors

diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/common.h
--- a/source/common/common.hSat Dec 20 21:27:14 2014 +0900
+++ b/source/common/common.hTue Dec 23 14:49:59 2014 +0900
@@ -163,6 +163,9 @@
 templatetypename T
 inline T x265_max(T a, T b) { return a  b ? a : b; }
 
+templatetypename T
+inline T x265_clip3(T minVal, T maxVal, T a) { return 
x265_min(x265_max(minVal, a), maxVal); }
+
 typedef int16_t  coeff_t;  // transform coefficient
 
 #define X265_MIN(a, b) ((a)  (b) ? (a) : (b))
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/cudata.cpp  Tue Dec 23 14:49:59 2014 +0900
@@ -608,7 +608,7 @@
 {
 if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT - 
s_numPartInCUSize + 1])
 {
-uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
+uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - 
s_numPartInCUSize + 1];
 if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, 
s_numPartInCUSize))
 return m_encData-getPicCTU(m_cuAddr);
@@ -689,8 +689,6 @@
 return NULL;
 }
 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * 
s_numPartInCUSize - 1];
-if (!m_cuLeft || !m_cuLeft-m_slice)
-return NULL;
 return m_cuLeft;
 }
 
@@ -723,8 +721,6 @@
 return NULL;
 }
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - 
s_numPartInCUSize + partUnitOffset];
-if (!m_cuAbove || !m_cuAbove-m_slice)
-return NULL;
 return m_cuAbove;
 }
 
@@ -732,8 +728,6 @@
 return NULL;
 
 arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + 
partUnitOffset - 1];
-if ((m_cuAboveRight == NULL || m_cuAboveRight-m_slice == NULL || 
(m_cuAboveRight-m_cuAddr)  m_cuAddr))
-return NULL;
 return m_cuAboveRight;
 }
 
@@ -904,7 +898,7 @@
 tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize;
 tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize;
 
-tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - 
(m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1]));
+tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize 
- (m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag));
 }
 
 void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t 
absPartIdx) const
@@ -916,7 +910,7 @@
 tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize;
 tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize;
 
-tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - 
(quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1]));
+tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize 
- (quadtreeTUMaxDepth - 1 + splitFlag));
 }
 
 uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
@@ -1363,14 +1357,6 @@
 return outPartIdxRB;
 }
 
-void CUData::deriveLeftRightTopIdxAdi(uint32_t outPartIdxLT, uint32_t 
outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const
-{
-uint32_t numPartInWidth = 1  (m_log2CUSize[0] - LOG2_UNIT_SIZE - 
partDepth);
-
-outPartIdxLT = m_absIdxInCTU + partOffset;
-outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + 
numPartInWidth - 1];
-}
-
 bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData candCU, 
uint32_t candAbsPartIdx) const
 {
 if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.h
--- a/source/common/cudata.hSat Dec 20 21:27:14 2014 +0900
+++ b/source/common/cudata.hTue Dec 23 14:49:59 2014 +0900
@@ -212,7 +212,6 @@
 
 void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) 
const;
 int  getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* 
intraDirPred) const;
-void deriveLeftRightTopIdxAdi(uint32_t partIdxLT, uint32_t 
partIdxRT, uint32_t partOffset, uint32_t partDepth) const;
 
 uint32_t getSCUAddr() const  { return (m_cuAddr  
g_maxFullDepth * 2) + m_absIdxInCTU; }
 uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const;
diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.cpp
--- a/source/common/predict.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/common/predict.cpp Tue Dec 23 14:49:59 2014 +0900
@@ -654,11 +654,8 @@
 }
 }
 
-void Predict::initAdiPattern(const CUData cu, const CUGeom cuGeom, uint32_t

[x265] (no subject)

2014-12-20 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1419078434 -32400
#  Sat Dec 20 21:27:14 2014 +0900
# Node ID 2894938c4de707ae69f8ae560bee2b3c323fd357
# Parent  78ae7996a1ceb60d24cff790cc2fa233d4c31435
fix 4:4:4 rd=1

diff -r 78ae7996a1ce -r 2894938c4de7 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Dec 17 14:31:50 2014 -0600
+++ b/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900
@@ -1591,17 +1591,19 @@
 uint32_t log2TrSizeC = cu.m_log2CUSize[0] - m_hChromaShift;
 uint32_t tuSize = 1  log2TrSizeC;
 int32_t scaleTuSize = tuSize;
+uint32_t tuDepth = 0;
 int32_t costShift = 0;
 
 if (tuSize  32)
 {
 scaleTuSize = 32;
+tuDepth = 1;
 costShift = 2;
 log2TrSizeC = 5;
 }
 
-Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 1);
-Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 2);
+Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1);
+Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2);
 cu.getAllowedChromaDir(0, modeList);
 
 // check chroma modes
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix 4:4:4 rd=1

2014-12-20 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1419078434 -32400
#  Sat Dec 20 21:27:14 2014 +0900
# Node ID 2894938c4de707ae69f8ae560bee2b3c323fd357
# Parent  78ae7996a1ceb60d24cff790cc2fa233d4c31435
fix 4:4:4 rd=1

diff -r 78ae7996a1ce -r 2894938c4de7 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Dec 17 14:31:50 2014 -0600
+++ b/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900
@@ -1591,17 +1591,19 @@
 uint32_t log2TrSizeC = cu.m_log2CUSize[0] - m_hChromaShift;
 uint32_t tuSize = 1  log2TrSizeC;
 int32_t scaleTuSize = tuSize;
+uint32_t tuDepth = 0;
 int32_t costShift = 0;
 
 if (tuSize  32)
 {
 scaleTuSize = 32;
+tuDepth = 1;
 costShift = 2;
 log2TrSizeC = 5;
 }
 
-Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 1);
-Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 2);
+Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1);
+Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2);
 cu.getAllowedChromaDir(0, modeList);
 
 // check chroma modes
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix for old gcc

2014-11-20 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1416475509 -32400
#  Thu Nov 20 18:25:09 2014 +0900
# Node ID c3a72e736de53af55fba25a5a5ba2da27722669f
# Parent  3649fabf90d348c51d7e155989d1bf629ec27f6e
fix for old gcc

diff -r 3649fabf90d3 -r c3a72e736de5 source/common/pixel.cpp
--- a/source/common/pixel.cpp   Thu Nov 20 14:27:53 2014 +0530
+++ b/source/common/pixel.cpp   Thu Nov 20 18:25:09 2014 +0900
@@ -175,7 +175,7 @@
 }
 
 templateint lx, int ly, class T1, class T2
-int sse(T1* pix1, intptr_t stride_pix1, T2* pix2, intptr_t stride_pix2)
+int sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t 
stride_pix2)
 {
 int sum = 0;
 int iTemp;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] replace char to int8_t, where it should be signed char

2014-11-19 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1416450633 -32400
#  Thu Nov 20 11:30:33 2014 +0900
# Node ID 46ae5bd20c8c317b8f71fbce0d7ad6bd6b8bba21
# Parent  d059cfa88f1ac79b319bd8a05bc70704d454f0ba
replace char to int8_t, where it should be signed char

diff -r d059cfa88f1a -r 46ae5bd20c8c source/common/cudata.cpp
--- a/source/common/cudata.cpp  Tue Nov 18 14:11:12 2014 -0600
+++ b/source/common/cudata.cpp  Thu Nov 20 11:30:33 2014 +0900
@@ -227,12 +227,12 @@
 /* Each CU's data is layed out sequentially within the charMemBlock */
 uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * 
BytesPerPartition) * instance;
 
-m_qp  = (char*)charBuf; charBuf += m_numPartitions;
+m_qp= (int8_t*)charBuf; charBuf += m_numPartitions;
 m_log2CUSize = charBuf; charBuf += m_numPartitions;
 m_lumaIntraDir   = charBuf; charBuf += m_numPartitions;
 m_tqBypass   = charBuf; charBuf += m_numPartitions;
-m_refIdx[0]   = (char*)charBuf; charBuf += m_numPartitions;
-m_refIdx[1]   = (char*)charBuf; charBuf += m_numPartitions;
+m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
+m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
 m_cuDepth= charBuf; charBuf += m_numPartitions;
 m_predMode   = charBuf; charBuf += m_numPartitions; /* the order 
up to here is important in initCTU() and initSubCU() */
 m_partSize   = charBuf; charBuf += m_numPartitions;
@@ -772,7 +772,7 @@
 }
 
 /* Get reference QP from left QpMinCu or latest coded QP */
-char CUData::getRefQP(uint32_t curAbsIdxInCTU) const
+int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
 {
 uint32_t lPartIdx = 0, aPartIdx = 0;
 const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + 
curAbsIdxInCTU);
@@ -794,7 +794,7 @@
 return lastValidPartIdx;
 }
 
-char CUData::getLastCodedQP(uint32_t absPartIdx) const
+int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
 {
 uint32_t quPartIdxMask = 0xFF  (g_maxFullDepth - 
m_slice-m_pps-maxCuDQPDepth) * 2;
 int lastValidPartIdx = getLastValidPartIdx(absPartIdx  quPartIdxMask);
@@ -808,7 +808,7 @@
 else if (m_cuAddr  0  !(m_slice-m_pps-bEntropyCodingSyncEnabled 
 !(m_cuAddr % m_slice-m_sps-numCuInWidth)))
 return m_encData-getPicCTU(m_cuAddr - 
1)-getLastCodedQP(NUM_CU_PARTITIONS);
 else
-return (char)m_slice-m_sliceQp;
+return (int8_t)m_slice-m_sliceQp;
 }
 }
 
@@ -936,7 +936,7 @@
 return ctx;
 }
 
-bool CUData::setQPSubCUs(char qp, uint32_t absPartIdx, uint32_t depth)
+bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
 {
 uint32_t curPartNumb = NUM_CU_PARTITIONS  (depth  1);
 uint32_t curPartNumQ = curPartNumb  2;
@@ -1211,7 +1211,7 @@
 setAllPU(m_mv[list], mv, absPartIdx, puIdx);
 }
 
-void CUData::setPURefIdx(int list, char refIdx, int absPartIdx, int puIdx)
+void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
 {
 setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
 }
diff -r d059cfa88f1a -r 46ae5bd20c8c source/common/cudata.h
--- a/source/common/cudata.hTue Nov 18 14:11:12 2014 -0600
+++ b/source/common/cudata.hThu Nov 20 11:30:33 2014 +0900
@@ -127,11 +127,11 @@
 int   m_vChromaShift;
 
 /* Per-part data, stored contiguously */
-char* m_qp;   // array of QP values
+int8_t*   m_qp;   // array of QP values
 uint8_t*  m_log2CUSize;   // array of cu log2Size TODO: seems 
redundant to depth
 uint8_t*  m_lumaIntraDir; // array of intra directions (luma)
 uint8_t*  m_tqBypass; // array of CU lossless flags
-char* m_refIdx[2];// array of motion reference indices per 
list
+int8_t*   m_refIdx[2];// array of motion reference indices per 
list
 uint8_t*  m_cuDepth;  // array of depths
 uint8_t*  m_predMode; // array of prediction modes
 uint8_t*  m_partSize; // array of partition sizes
@@ -177,7 +177,7 @@
 void clearCbf(){ m_partSet(m_cbf[0], 0); 
m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); }
 
 /* these functions all take depth as an absolute depth from CTU, it is 
used to calculate the number of parts to copy */
-void setQPSubParts(char qp, uint32_t absPartIdx, uint32_t depth)   
   { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); }
+void setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth) 
   { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); }
 void setTUDepthSubParts(uint8_t tuDepth, uint32_t absPartIdx, uint32_t 
depth) { s_partSet[depth](m_tuDepth + absPartIdx, tuDepth); }
 void setLumaIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, 
uint32_t depth){ s_partSet

[x265] fseeko for mingw32

2014-11-18 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1416379165 -32400
#  Wed Nov 19 15:39:25 2014 +0900
# Node ID 591547ce9293eef8bfe68a8687e81c5aa1650e2a
# Parent  d059cfa88f1ac79b319bd8a05bc70704d454f0ba
fseeko for mingw32

diff -r d059cfa88f1a -r 591547ce9293 source/common/common.h
--- a/source/common/common.hTue Nov 18 14:11:12 2014 -0600
+++ b/source/common/common.hWed Nov 19 15:39:25 2014 +0900
@@ -56,6 +56,10 @@
 #define x265_stack_align(func, ...) func(__VA_ARGS__)
 #endif
 
+#if defined(__MINGW32__)
+#define fseeko fseeko64
+#endif
+
 #elif defined(_MSC_VER)
 
 #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] modify MV default constructor to do nothing

2014-11-17 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1416221075 -32400
#  Mon Nov 17 19:44:35 2014 +0900
# Node ID 90ec907326e25ae40b7dc38130cf81874d201ad2
# Parent  27d36c4b4a27d2872430c6a6fc538fbddcf791e6
modify MV default constructor to do nothing

diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Mon Nov 17 01:30:26 2014 +0530
+++ b/source/common/cudata.cpp  Mon Nov 17 19:44:35 2014 +0900
@@ -1237,7 +1237,7 @@
 else
 {
 // OUT OF BOUNDARY
-outMvField.mv.word = 0;
+outMvField.mv = 0;
 outMvField.refIdx = REF_NOT_VALID;
 }
 }
@@ -1399,6 +1399,8 @@
 
 for (uint32_t i = 0; i  maxNumMergeCand; ++i)
 {
+mvFieldNeighbours[i][0].mv = 0;
+mvFieldNeighbours[i][1].mv = 0;
 mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID;
 mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID;
 }
@@ -1646,7 +1648,7 @@
 while (count  maxNumMergeCand)
 {
 interDirNeighbours[count] = 1;
-mvFieldNeighbours[count][0].mv.word = 0;
+mvFieldNeighbours[count][0].mv = 0;
 mvFieldNeighbours[count][0].refIdx = r;
 
 if (isInterB)
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/lowres.h
--- a/source/common/lowres.hMon Nov 17 01:30:26 2014 +0530
+++ b/source/common/lowres.hMon Nov 17 19:44:35 2014 +0900
@@ -56,11 +56,10 @@
 {
 int hpelA = (qmv.y  2) | ((qmv.x  2)  1);
 pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x  2) + 
(qmv.y  2) * lumaStride;
-
-MV qmvB = qmv + MV((qmv.x  1) * 2, (qmv.y  1) * 2);
-int hpelB = (qmvB.y  2) | ((qmvB.x  2)  1);
-
-pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x  2) + 
(qmvB.y  2) * lumaStride;
+int qmvx = qmv.x + (qmv.x  1);
+int qmvy = qmv.y + (qmv.y  1);
+int hpelB = (qmvy  2) | ((qmvx  2)  1);
+pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx  2) + 
(qmvy  2) * lumaStride;
 primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, 
lumaStride, frefB, lumaStride, 32);
 return buf;
 }
@@ -79,9 +78,10 @@
 ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
 int hpelA = (qmv.y  2) | ((qmv.x  2)  1);
 pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x  2) + 
(qmv.y  2) * lumaStride;
-MV qmvB = qmv + MV((qmv.x  1) * 2, (qmv.y  1) * 2);
-int hpelB = (qmvB.y  2) | ((qmvB.x  2)  1);
-pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x  2) + 
(qmvB.y  2) * lumaStride;
+int qmvx = qmv.x + (qmv.x  1);
+int qmvy = qmv.y + (qmv.y  1);
+int hpelB = (qmvy  2) | ((qmvx  2)  1);
+pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx  2) + 
(qmvy  2) * lumaStride;
 primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, 
frefB, lumaStride, 32);
 return comp(fenc, FENC_STRIDE, subpelbuf, 8);
 }
diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/mv.h
--- a/source/common/mv.hMon Nov 17 01:30:26 2014 +0530
+++ b/source/common/mv.hMon Nov 17 19:44:35 2014 +0900
@@ -44,19 +44,19 @@
 int32_t word;
 };
 
-MV() : word(0) {}
-
+MV()   {}
+MV(int32_t w) : word(w){}
 MV(int16_t _x, int16_t _y) : x(_x), y(_y)  {}
 
-const MV operator =(uint32_t w)   { word = w; return *this; }
+MV operator =(uint32_t w) { word = w; return *this; }
 
-const MV operator +=(const MV other) { x += other.x; y += other.y; 
return *this; }
+MV operator +=(const MV other)   { x += other.x; y += other.y; 
return *this; }
 
-const MV operator -=(const MV other) { x -= other.x; y -= other.y; 
return *this; }
+MV operator -=(const MV other)   { x -= other.x; y -= other.y; 
return *this; }
 
-const MV operator =(int i)  { x = i; y = i; return 
*this; }
+MV operator =(int i){ x = i; y = i; return 
*this; }
 
-const MV operator =(int i)  { x = i; y = i; return 
*this; }
+MV operator =(int i){ x = i; y = i; return 
*this; }
 
 MV operator (int i) const{ return MV(x  i, y  i); }
 
@@ -64,16 +64,18 @@
 
 MV operator *(int16_t i) const { return MV(x * i, y * i); }
 
-const MV operator -(const MV other) const { return MV(x - other.x, y - 
other.y); }
+MV operator -(const MV other) const   { return MV(x - other.x, y - 
other.y); }
 
-const MV operator +(const MV other) const { return MV(x + other.x, y + 
other.y); }
+MV operator +(const MV other) const   { return MV(x + other.x, y + 
other.y); }
 
 bool operator ==(const MV other) const{ return word == other.word; }
 
 bool operator !=(const MV

[x265] analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT]

2014-11-13 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415871635 -32400
#  Thu Nov 13 18:40:35 2014 +0900
# Node ID cc70f51c5b6dd6009c5f2b9876c9fc8108c75c62
# Parent  18aefbde72ab2dfaa0d4edeea7fd0ab4f9a09f9c
analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT]

diff -r 18aefbde72ab -r cc70f51c5b6d source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Wed Nov 12 17:17:56 2014 -0600
+++ b/source/encoder/analysis.cpp   Thu Nov 13 18:40:35 2014 +0900
@@ -762,7 +762,7 @@
 checkBestMode(*splitPred, depth);
 }
 
-if (!depth || md.bestMode-cu.isInter(0))
+if (mightNotSplit  (!depth || md.bestMode-cu.isInter(0)))
 {
 /* early-out statistics */
 FrameData curEncData = const_castFrameData(*m_frame-m_encData);
@@ -1044,7 +1044,7 @@
 md.bestMode = splitPred;
 }
 
-if (!depth || md.bestMode-cu.isInter(0))
+if (mightNotSplit  (!depth || md.bestMode-cu.isInter(0)))
 {
 /* early-out statistics */
 FrameData curEncData = const_castFrameData(*m_frame-m_encData);
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT]

2014-11-13 Thread Satoshi Nakagawa


 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Friday, November 14, 2014 2:39 AM
 To: Development for x265
 Subject: Re: [x265] analysis: don't add the cost of picture boundary CU
 to avgCost [CHANGES OUTPUT]
 
 On 11/13, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415871635 -32400
  #  Thu Nov 13 18:40:35 2014 +0900
  # Node ID cc70f51c5b6dd6009c5f2b9876c9fc8108c75c62
  # Parent  18aefbde72ab2dfaa0d4edeea7fd0ab4f9a09f9c
  analysis: don't add the cost of picture boundary CU to avgCost
  [CHANGES OUTPUT]
 
 it took me a while to understand what you are doing here.
 
  diff -r 18aefbde72ab -r cc70f51c5b6d source/encoder/analysis.cpp
  --- a/source/encoder/analysis.cpp   Wed Nov 12 17:17:56 2014 -0600
  +++ b/source/encoder/analysis.cpp   Thu Nov 13 18:40:35 2014 +0900
  @@ -762,7 +762,7 @@
   checkBestMode(*splitPred, depth);
   }
 
  -if (!depth || md.bestMode-cu.isInter(0))
  +if (mightNotSplit  (!depth || md.bestMode-cu.isInter(0)))
 
 if mightNotSplit is false, then a split was forced by a picture edge (the
 current depth CU is too large) and so this CU is never coded.
 
 ok, it makes sense to not count the cost of this partially coded splitCU
 against the average cost at this depth
 
 Although looking at this code again it's not clear why depth 0 has a
special
 exemption. I wonder if it would be better as:
 
 if (md.bestMode != md.pred[PRED_SPLIT]  md.bestMode-cu.isInter(0))
 {
   ..
 }
 
 So it only counts costs at the level they were CU coded, and this would
 implicitly handle the forced splits at picture edges.


In non-partial case, it seems better to take into account the split cost
(sum of 4 sub-CU if better than 1 CU).
Also, inter check may be not needed, (intra is better than inter).

So, it would be better simply

if (mightNotSplit)
{
  
}






 
   {
   /* early-out statistics */
   FrameData curEncData =
  const_castFrameData(*m_frame-m_encData);
  @@ -1044,7 +1044,7 @@
   md.bestMode = splitPred;
   }
 
  -if (!depth || md.bestMode-cu.isInter(0))
  +if (mightNotSplit  (!depth || md.bestMode-cu.isInter(0)))
   {
   /* early-out statistics */
   FrameData curEncData =
  const_castFrameData(*m_frame-m_encData);
  ___
  x265-devel mailing list
  x265-devel@videolan.org
  https://mailman.videolan.org/listinfo/x265-devel
 
 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] rdcost: weight chroma lambda for rdo [CHANGES OUTPUT]

2014-11-13 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415930754 -32400
#  Fri Nov 14 11:05:54 2014 +0900
# Node ID 269376f3e1d8e4942f504303909516aca9a0ba75
# Parent  17f2fb0996db9b761f13953408d810608e24397b
rdcost: weight chroma lambda for rdo [CHANGES OUTPUT]

lambdas for rdoq, psy-rdo and psy-rdoq are weighted.
why lambda for rdo is not weighted?


diff -r 17f2fb0996db -r 269376f3e1d8 source/encoder/rdcost.h
--- a/source/encoder/rdcost.h   Thu Nov 13 17:16:07 2014 -0600
+++ b/source/encoder/rdcost.h   Fri Nov 14 11:05:54 2014 +0900
@@ -59,12 +59,12 @@
 
 int qpCb = Clip3(QP_MIN, QP_MAX_MAX, qp + 
slice.m_pps-chromaQpOffset[0]);
 int chroma_offset_idx = X265_MIN(qp - qpCb + 12, 
MAX_CHROMA_LAMBDA_OFFSET);
-uint16_t lambdaOffset = m_psyRd ? 
x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
+uint16_t lambdaOffset = 
x265_chroma_lambda2_offset_tab[chroma_offset_idx];
 setCbDistortionWeight(lambdaOffset);
 
 int qpCr = Clip3(QP_MIN, QP_MAX_MAX, qp + 
slice.m_pps-chromaQpOffset[1]);
 chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET);
-lambdaOffset = m_psyRd ? 
x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
+lambdaOffset = x265_chroma_lambda2_offset_tab[chroma_offset_idx];
 setCrDistortionWeight(lambdaOffset);
 }
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] nits

2014-11-12 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415863775 -32400
#  Thu Nov 13 16:29:35 2014 +0900
# Node ID 49141f28397dd294bb6e590dad2bb8d2b01bf97b
# Parent  18aefbde72ab2dfaa0d4edeea7fd0ab4f9a09f9c
nits

diff -r 18aefbde72ab -r 49141f28397d source/common/cudata.cpp
--- a/source/common/cudata.cpp  Wed Nov 12 17:17:56 2014 -0600
+++ b/source/common/cudata.cpp  Thu Nov 13 16:29:35 2014 +0900
@@ -1953,8 +1953,8 @@
 
 bool CUData::getColMVP(MV outMV, int outRefIdx, int picList, int cuAddr, int 
partUnitIdx) const
 {
-Frame *colPic = m_slice-m_refPicList[m_slice-isInterB() ? 1 - 
m_slice-m_colFromL0Flag : 0][m_slice-m_colRefIdx];
-CUData *colCU = colPic-m_encData-getPicCTU(cuAddr);
+const Frame* colPic = m_slice-m_refPicList[m_slice-isInterB()  
!m_slice-m_colFromL0Flag][m_slice-m_colRefIdx];
+const CUData* colCU = colPic-m_encData-getPicCTU(cuAddr);
 
 if (colCU-m_predMode[partUnitIdx] == MODE_NONE)
 return false;
diff -r 18aefbde72ab -r 49141f28397d source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Wed Nov 12 17:17:56 2014 -0600
+++ b/source/encoder/analysis.cpp   Thu Nov 13 16:29:35 2014 +0900
@@ -138,7 +138,7 @@
 
 if (m_param-analysisMode == X265_ANALYSIS_SAVE  
m_frame-m_intraData)
 {
-CUData *bestCU = m_modeDepth[0].bestMode-cu;
+const CUData* bestCU = m_modeDepth[0].bestMode-cu;
 memcpy(m_frame-m_intraData-depth[ctu.m_cuAddr * 
numPartition], bestCU-m_cuDepth, sizeof(uint8_t) * numPartition);
 memcpy(m_frame-m_intraData-modes[ctu.m_cuAddr * 
numPartition], bestCU-m_lumaIntraDir, sizeof(uint8_t) * numPartition);
 memcpy(m_frame-m_intraData-partSizes[ctu.m_cuAddr * 
numPartition], bestCU-m_partSize, sizeof(uint8_t) * numPartition);
@@ -268,23 +268,23 @@
 
 for (uint32_t subPartIdx = 0; subPartIdx  4; subPartIdx++)
 {
-const CUGeom childCuData = *(cuGeom + cuGeom.childOffset + 
subPartIdx);
-if (childCuData.flags  CUGeom::PRESENT)
+const CUGeom childGeom = *(cuGeom + cuGeom.childOffset + 
subPartIdx);
+if (childGeom.flags  CUGeom::PRESENT)
 {
-m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, 
childCuData.encodeIdx);
+m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, 
childGeom.encodeIdx);
 m_rqt[nextDepth].cur.load(*nextContext);
-compressIntraCU(parentCTU, childCuData, shared, zOrder);
+compressIntraCU(parentCTU, childGeom, shared, zOrder);
 
 // Save best CU and pred data for this sub CU
-splitCU-copyPartFrom(nd.bestMode-cu, childCuData, 
subPartIdx);
+splitCU-copyPartFrom(nd.bestMode-cu, childGeom, subPartIdx);
 splitPred-addSubCosts(*nd.bestMode);
-nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, 
childCuData.numPartitions * subPartIdx);
+nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, 
childGeom.numPartitions * subPartIdx);
 nextContext = nd.bestMode-contexts;
 }
 else
 {
 /* record the depth of this non-present sub-CU */
-splitCU-setEmptyPart(childCuData, subPartIdx);
+splitCU-setEmptyPart(childGeom, subPartIdx);
 zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
 }
 }
@@ -735,22 +735,22 @@
 
 for (uint32_t subPartIdx = 0; subPartIdx  4; subPartIdx++)
 {
-const CUGeom childCuData = *(cuGeom + cuGeom.childOffset + 
subPartIdx);
-if (childCuData.flags  CUGeom::PRESENT)
+const CUGeom childGeom = *(cuGeom + cuGeom.childOffset + 
subPartIdx);
+if (childGeom.flags  CUGeom::PRESENT)
 {
-m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, 
childCuData.encodeIdx);
+m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, 
childGeom.encodeIdx);
 m_rqt[nextDepth].cur.load(*nextContext);
-compressInterCU_dist(parentCTU, childCuData);
+compressInterCU_dist(parentCTU, childGeom);
 
 // Save best CU and pred data for this sub CU
-splitCU-copyPartFrom(nd.bestMode-cu, childCuData, 
subPartIdx);
+splitCU-copyPartFrom(nd.bestMode-cu, childGeom, subPartIdx);
 splitPred-addSubCosts(*nd.bestMode);
 
-nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, 
childCuData.numPartitions * subPartIdx);
+nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, 
childGeom.numPartitions * subPartIdx);
 nextContext = nd.bestMode-contexts;
 }
 else
-splitCU-setEmptyPart(childCuData, subPartIdx);
+splitCU-setEmptyPart(childGeom, subPartIdx

[x265] refine initializeGeoms()

2014-11-11 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415701819 -32400
#  Tue Nov 11 19:30:19 2014 +0900
# Node ID 5638df706f0833bd211c73612ba0d4403c813d9e
# Parent  32513a4c3bd435757347e729dc14b5a1c1c6ceef
refine initializeGeoms()

diff -r 32513a4c3bd4 -r 5638df706f08 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Mon Nov 10 12:39:54 2014 +0900
+++ b/source/common/cudata.cpp  Tue Nov 11 19:30:19 2014 +0900
@@ -2078,7 +2078,7 @@
 
 #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield)  
(~(flag))) | ((~((value) - 1))  (flag))
 
-void CUData::calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t 
maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const
+void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t 
maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
 {
 // Initialize the coding blocks inside the CTB
 for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; 
log2CUSize = MIN_LOG2_CU_SIZE; log2CUSize--)
@@ -2093,10 +2093,10 @@
 uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
 uint32_t cuIdx = rangeCUIdx + depthIdx;
 uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx 
 2);
-uint32_t px = m_cuPelX + sbX * blockSize;
-uint32_t py = m_cuPelY + sbY * blockSize;
-int32_t presentFlag = px  picWidth  py  picHeight;
-int32_t splitMandatoryFlag = presentFlag  !lastLevelFlag  
(px + blockSize  picWidth || py + blockSize  picHeight);
+uint32_t px = sbX * blockSize;
+uint32_t py = sbY * blockSize;
+int32_t presentFlag = px  ctuWidth  py  ctuHeight;
+int32_t splitMandatoryFlag = presentFlag  !lastLevelFlag  
(px + blockSize  ctuWidth || py + blockSize  ctuHeight);
 
 /* Offset of the luma CU in the X, Y direction in terms of 
pixels from the CTU origin */
 uint32_t xOffset = (sbX * blockSize)  3;
diff -r 32513a4c3bd4 -r 5638df706f08 source/common/cudata.h
--- a/source/common/cudata.hMon Nov 10 12:39:54 2014 +0900
+++ b/source/common/cudata.hTue Nov 11 19:30:19 2014 +0900
@@ -158,7 +158,7 @@
 CUData();
 
 void initialize(const CUDataMemPool dataPool, uint32_t depth, int 
csp, int instance);
-void calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t 
maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const;
+static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, 
uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
 
 void initCTU(const Frame frame, uint32_t cuAddr, int qp);
 void initSubCU(const CUData ctu, const CUGeom cuGeom);
diff -r 32513a4c3bd4 -r 5638df706f08 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Mon Nov 10 12:39:54 2014 +0900
+++ b/source/encoder/frameencoder.cpp   Tue Nov 11 19:30:19 2014 +0900
@@ -138,11 +138,12 @@
 }
 
 /* Generate a complete list of unique geom sets for the current picture 
dimensions */
-bool FrameEncoder::initializeGeoms(const FrameData encData)
+bool FrameEncoder::initializeGeoms()
 {
 /* Geoms only vary between CTUs in the presence of picture edges */
-int heightRem = m_param-sourceHeight  (m_param-maxCUSize - 1);
-int widthRem = m_param-sourceWidth  (m_param-maxCUSize - 1);
+int maxCUSize = m_param-maxCUSize;
+int heightRem = m_param-sourceHeight  (maxCUSize - 1);
+int widthRem = m_param-sourceWidth  (maxCUSize - 1);
 int allocGeoms = 1; // body
 if (heightRem  widthRem)
 allocGeoms = 4; // body, right, bottom, corner
@@ -154,33 +155,45 @@
 if (!m_cuGeoms || !m_ctuGeomMap)
 return false;
 
-CUGeom cuLocalData[CUGeom::MAX_GEOMS];
-memset(cuLocalData, 0, sizeof(cuLocalData)); // temporal fix for memcmp
+// body
+CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, m_cuGeoms);
+memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols);
+if (allocGeoms == 1)
+return true;
 
-int countGeoms = 0;
-for (uint32_t ctuAddr = 0; ctuAddr  m_numRows * m_numCols; ctuAddr++)
+int countGeoms = 1;
+if (widthRem)
 {
-/* TODO: detach this logic from TComDataCU */
-encData.m_picCTU[ctuAddr].initCTU(*m_frame, ctuAddr, 0);
-encData.m_picCTU[ctuAddr].calcCTUGeoms(m_param-sourceWidth, 
m_param-sourceHeight, m_param-maxCUSize, cuLocalData);
+// right
+CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, m_cuGeoms + 
countGeoms * CUGeom::MAX_GEOMS);
+for (int i = 0; i  m_numRows; i++)
+{
+uint32_t ctuAddr = m_numCols * (i + 1) - 1;
+m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS;
+}
+countGeoms++;
+}
+if (heightRem)
+{
+// bottom
+CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, m_cuGeoms + 
countGeoms * CUGeom::MAX_GEOMS

[x265] cleanup SIZE_NONE. empty CU has MODE_NONE.

2014-11-09 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415590794 -32400
#  Mon Nov 10 12:39:54 2014 +0900
# Node ID f31250e3eb5625275318bc69633e0fbc31ccdb3a
# Parent  1e04e178a349ff3a27ed0207cca7bdd9f0db4ff8
cleanup SIZE_NONE. empty CU has MODE_NONE.

diff -r 1e04e178a349 -r f31250e3eb56 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/cudata.cpp  Mon Nov 10 12:39:54 2014 +0900
@@ -229,13 +229,13 @@
 
 m_qp  = (char*)charBuf; charBuf += m_numPartitions;
 m_log2CUSize = charBuf; charBuf += m_numPartitions;
-m_partSize   = charBuf; charBuf += m_numPartitions;
 m_lumaIntraDir   = charBuf; charBuf += m_numPartitions;
 m_tqBypass   = charBuf; charBuf += m_numPartitions;
 m_refIdx[0]   = (char*)charBuf; charBuf += m_numPartitions;
 m_refIdx[1]   = (char*)charBuf; charBuf += m_numPartitions;
 m_cuDepth= charBuf; charBuf += m_numPartitions;
 m_predMode   = charBuf; charBuf += m_numPartitions; /* the order 
up to here is important in initCTU() and initSubCU() */
+m_partSize   = charBuf; charBuf += m_numPartitions;
 m_mergeFlag  = charBuf; charBuf += m_numPartitions;
 m_interDir   = charBuf; charBuf += m_numPartitions;
 m_mvpIdx[0]  = charBuf; charBuf += m_numPartitions;
@@ -277,7 +277,6 @@
 /* sequential memsets */
 m_partSet((uint8_t*)m_qp, (uint8_t)qp);
 m_partSet(m_log2CUSize,   (uint8_t)g_maxLog2CUSize);
-m_partSet(m_partSize, (uint8_t)SIZE_NONE);
 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
 m_partSet(m_tqBypass, (uint8_t)frame.m_encData-m_param-bLossless);
 if (m_slice-m_sliceType != I_SLICE)
@@ -289,7 +288,7 @@
 X265_CHECK(!(frame.m_encData-m_param-bLossless  
!m_slice-m_pps-bTransquantBypassEnabled), lossless enabled without TQbypass 
in PPS\n);
 
 /* initialize the remaining CU data in one memset */
-memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
+memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
 
 uint32_t widthInCU = m_slice-m_sps-numCuInWidth;
 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData-getPicCTU(m_cuAddr - 1) : 
NULL;
@@ -316,7 +315,6 @@
 /* sequential memsets */
 m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
 m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
-m_partSet(m_partSize, (uint8_t)SIZE_NONE);
 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
 m_partSet(m_tqBypass, (uint8_t)m_encData-m_param-bLossless);
 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
@@ -324,7 +322,7 @@
 m_partSet(m_cuDepth,  (uint8_t)cuGeom.depth);
 
 /* initialize the remaining CU data in one memset */
-memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
+memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
@@ -336,13 +334,13 @@
 
 m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
 m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
-m_subPartCopy(m_partSize + offset, subCU.m_partSize);
 m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
 m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
 m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
 m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
 m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
 m_subPartCopy(m_predMode + offset, subCU.m_predMode);
+m_subPartCopy(m_partSize + offset, subCU.m_partSize);
 m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
 m_subPartCopy(m_interDir + offset, subCU.m_interDir);
 m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
@@ -423,13 +421,13 @@
 
 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
 m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
-m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
 m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
 m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
 m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, 
(uint8_t*)m_refIdx[0]);
 m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, 
(uint8_t*)m_refIdx[1]);
 m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
 m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
+m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
 m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
 m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
 m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
@@ -472,12 +470,13 @@
 /* copy out all prediction info for this part */
 m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
 m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
-m_partCopy

[x265] fix typo

2014-11-08 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415494975 -32400
#  Sun Nov 09 10:02:55 2014 +0900
# Node ID cbe34d7fef367ad9603513fdae34dfee99d9a03d
# Parent  3f2d6836855411597ef25b4f9786dcaa0fe7394a
fix typo

diff -r 3f2d68368554 -r cbe34d7fef36 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Sat Nov 08 12:30:10 2014 -0600
+++ b/source/encoder/analysis.cpp   Sun Nov 09 10:02:55 2014 +0900
@@ -794,7 +794,7 @@
 
 if (m_param-bEnableRectInter)
 {
-md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
+md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
 checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
 if (md.pred[PRED_Nx2N].sa8dCost  bestInter-sa8dCost)
 bestInter = md.pred[PRED_Nx2N];
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix typo

2014-11-08 Thread Satoshi Nakagawa
ignore previous one.

# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415498145 -32400
#  Sun Nov 09 10:55:45 2014 +0900
# Node ID 51bec6878d7bfe46f92c039a7eb2af66b5d07e09
# Parent  3f2d6836855411597ef25b4f9786dcaa0fe7394a
fix typo

diff -r 3f2d68368554 -r 51bec6878d7b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Sat Nov 08 12:30:10 2014 -0600
+++ b/source/encoder/analysis.cpp   Sun Nov 09 10:55:45 2014 +0900
@@ -794,12 +794,12 @@
 
 if (m_param-bEnableRectInter)
 {
-md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
+md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
 checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
 if (md.pred[PRED_Nx2N].sa8dCost  bestInter-sa8dCost)
 bestInter = md.pred[PRED_Nx2N];
 
-md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
+md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
 checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
 if (md.pred[PRED_2NxN].sa8dCost  bestInter-sa8dCost)
 bestInter = md.pred[PRED_2NxN];
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix typo

2014-11-07 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415348521 -32400
#  Fri Nov 07 17:22:01 2014 +0900
# Node ID ddc90f87dbe7dd704e9f0b0fe15c4752f9156c16
# Parent  bc4f3dab51db5fb0a164fe0667f1556e2111d3c2
fix typo

diff -r bc4f3dab51db -r ddc90f87dbe7 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Fri Nov 07 11:43:15 2014 +0900
+++ b/source/encoder/analysis.cpp   Fri Nov 07 17:22:01 2014 +0900
@@ -1739,7 +1739,7 @@
 }
 
 // give 60% weight to all CU's and 40% weight to neighbour CU's
-if (neighCost + cuCount)
+if (neighCount + cuCount)
 {
 uint64_t avgCost = ((3 * cuCost) + (2 * neighCost)) / ((3 * cuCount) + 
(2 * neighCount));
 uint64_t curCost = m_param-rdLevel  1 ? bestMode.rdCost : 
bestMode.sa8dCost;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix bug in 522baf03fbbd

2014-11-07 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415355446 -32400
#  Fri Nov 07 19:17:26 2014 +0900
# Node ID 8a0b4706d8114ac8caa6b2bcb4359c672265ef75
# Parent  522baf03fbbd17ab3844f8190f78607089ce0a8d
fix bug in 522baf03fbbd

diff -r 522baf03fbbd -r 8a0b4706d811 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Nov 05 16:23:42 2014 +0530
+++ b/source/encoder/search.cpp Fri Nov 07 19:17:26 2014 +0900
@@ -2879,10 +2879,10 @@
 if (nullCostY  singleCostY)
 {
 cbfFlag[TEXT_LUMA][0] = 0;
+primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
 #if CHECKED_BUILD || _DEBUG
 uint32_t numCoeffY = 1  (log2TrSize  1);
 memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
-primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
 #endif
 if (checkTransformSkipY)
 minCost[TEXT_LUMA][0] = nullCostY;
@@ -2955,10 +2955,10 @@
 if (nullCostC  singleCostC)
 {
 cbfFlag[chromaId][tuIterator.section] = 0;
+primitives.blockfill_s[partSizeC](curResiC, 
strideResiC, 0);
 #if CHECKED_BUILD || _DEBUG
 uint32_t numCoeffC = 1  (log2TrSizeC  1);
 memset(coeffCurC + subTUOffset, 0, 
sizeof(coeff_t) * numCoeffC);
-primitives.blockfill_s[partSizeC](curResiC, 
strideResiC, 0);
 #endif
 if (checkTransformSkipC)
 minCost[chromaId][tuIterator.section] = 
nullCostC;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix bug in 522baf03fbbd

2014-11-07 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415356167 -32400
#  Fri Nov 07 19:29:27 2014 +0900
# Node ID cdb4f8e542d3d37710464ecc8279469024d24584
# Parent  4f034e3adef8d52853b88c6631a905dd96713d77
fix bug in 522baf03fbbd

diff -r 4f034e3adef8 -r cdb4f8e542d3 source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Nov 07 17:22:01 2014 +0900
+++ b/source/encoder/search.cpp Fri Nov 07 19:29:27 2014 +0900
@@ -2857,10 +2857,10 @@
 {
 cbfFlag[TEXT_LUMA][0] = 0;
 singleBits[TEXT_LUMA][0] = 0;
+primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
 #if CHECKED_BUILD || _DEBUG
 uint32_t numCoeffY = 1  (log2TrSize  1);
 memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
-primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
 #endif
 if (checkTransformSkipY)
 minCost[TEXT_LUMA][0] = nullCostY;
@@ -2956,10 +2956,10 @@
 {
 cbfFlag[chromaId][tuIterator.section] = 0;
 singleBits[chromaId][tuIterator.section] = 0;
+primitives.blockfill_s[partSizeC](curResiC, 
strideResiC, 0);
 #if CHECKED_BUILD || _DEBUG
 uint32_t numCoeffC = 1  (log2TrSizeC  1);
 memset(coeffCurC + subTUOffset, 0, 
sizeof(coeff_t) * numCoeffC);
-primitives.blockfill_s[partSizeC](curResiC, 
strideResiC, 0);
 #endif
 if (checkTransformSkipC)
 minCost[chromaId][tuIterator.section] = 
nullCostC;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] cudata: remove default argument

2014-11-06 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415328195 -32400
#  Fri Nov 07 11:43:15 2014 +0900
# Node ID f4853d3e81678e487b16a8c6f716a8f9418d6aad
# Parent  0ebd0b00bf9bc447d89892ef935bc017b186fa9d
cudata: remove default argument

diff -r 0ebd0b00bf9b -r f4853d3e8167 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Thu Nov 06 19:37:39 2014 -0600
+++ b/source/common/cudata.cpp  Fri Nov 07 11:43:15 2014 +0900
@@ -546,7 +546,7 @@
 return m_cuLeft;
 }
 
-const CUData* CUData::getPUAbove(uint32_t aPartUnitIdx, uint32_t 
curPartUnitIdx, bool planarAtCTUBoundary) const
+const CUData* CUData::getPUAbove(uint32_t aPartUnitIdx, uint32_t 
curPartUnitIdx) const
 {
 uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
 
@@ -557,15 +557,10 @@
 if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
 return m_encData-getPicCTU(m_cuAddr);
 else
-{
 aPartUnitIdx -= m_absIdxInCTU;
-return this;
-}
+return this;
 }
 
-if (planarAtCTUBoundary)
-return NULL;
-
 aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - 
s_numPartInCUSize];
 return m_cuAbove;
 }
@@ -853,7 +848,7 @@
 leftIntraDir = (tempCU  tempCU-isIntra(tempPartIdx)) ? 
tempCU-m_lumaIntraDir[tempPartIdx] : DC_IDX;
 
 // Get intra direction of above PU
-tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx, true);
+tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx]  0 ? 
getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
 
 aboveIntraDir = (tempCU  tempCU-isIntra(tempPartIdx)) ? 
tempCU-m_lumaIntraDir[tempPartIdx] : DC_IDX;
 
diff -r 0ebd0b00bf9b -r f4853d3e8167 source/common/cudata.h
--- a/source/common/cudata.hThu Nov 06 19:37:39 2014 -0600
+++ b/source/common/cudata.hFri Nov 07 11:43:15 2014 +0900
@@ -222,7 +222,7 @@
 void getTUEntropyCodingParameters(TUEntropyCodingParameters result, 
uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const;
 
 const CUData* getPULeft(uint32_t lPartUnitIdx, uint32_t curPartUnitIdx) 
const;
-const CUData* getPUAbove(uint32_t aPartUnitIdx, uint32_t curPartUnitIdx, 
bool planarAtCTUBoundary = false) const;
+const CUData* getPUAbove(uint32_t aPartUnitIdx, uint32_t curPartUnitIdx) 
const;
 const CUData* getPUAboveLeft(uint32_t alPartUnitIdx, uint32_t 
curPartUnitIdx) const;
 const CUData* getPUAboveRight(uint32_t arPartUnitIdx, uint32_t 
curPartUnitIdx) const;
 const CUData* getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t 
curPartUnitIdx) const;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] refine deblocking filter

2014-11-05 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415178450 -32400
#  Wed Nov 05 18:07:30 2014 +0900
# Node ID ce18e3c8e9af1633d4c8ead10197296d0542d0e1
# Parent  2a8f3d5820a6ebe0937ce73fa81154c263df2ae9
refine deblocking filter

diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/common/deblock.cpp
--- a/source/common/deblock.cpp Tue Nov 04 09:46:14 2014 +0530
+++ b/source/common/deblock.cpp Wed Nov 05 18:07:30 2014 +0900
@@ -33,18 +33,42 @@
 #define DEBLOCK_SMALLEST_BLOCK  8
 #define DEFAULT_INTRA_TC_OFFSET 2
 
-void Deblock::deblockCTU(CUData* cu, int32_t dir)
+void Deblock::deblockCTU(const CUData* ctu, int32_t dir)
 {
-uint8_t blockingStrength[MAX_NUM_PARTITIONS];
+uint8_t blockStrength[MAX_NUM_PARTITIONS];
 
-memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions);
+memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions);
 
-deblockCU(cu, 0, 0, dir, blockingStrength);
+deblockCU(ctu, 0, 0, dir, blockStrength);
+}
+
+static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t 
dir)
+{
+if (dir == Deblock::EDGE_VER)
+{
+if (cu-m_cuPelX + g_zscanToPelX[absPartIdx]  0)
+{
+uint32_ttempPartIdx;
+const CUData* tempCU = cu-getPULeft(tempPartIdx, absPartIdx);
+return tempCU ? 2 : 0;
+}
+}
+else
+{
+if (cu-m_cuPelY + g_zscanToPelY[absPartIdx]  0)
+{
+uint32_ttempPartIdx;
+const CUData* tempCU = cu-getPUAbove(tempPartIdx, absPartIdx);
+return tempCU ? 2 : 0;
+}
+}
+
+return 0;
 }
 
 /* Deblocking filter process in CU-based (the same function as conventional's)
  * param Edge the direction of the edge in block boundary 
(horizonta/vertical), which is added newly */
-void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const 
int32_t dir, uint8_t blockingStrength[])
+void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, 
const int32_t dir, uint8_t blockStrength[])
 {
 if (cu-m_partSize[absPartIdx] == SIZE_NONE)
 return;
@@ -60,23 +84,21 @@
 uint32_t ymax = sps.picHeightInLumaSamples - cu-m_cuPelY;
 for (uint32_t partIdx = 0; partIdx  4; partIdx++, absPartIdx += 
qNumParts)
 if (g_zscanToPelX[absPartIdx]  xmax  g_zscanToPelY[absPartIdx] 
 ymax)
-deblockCU(cu, absPartIdx, depth + 1, dir, blockingStrength);
+deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength);
 return;
 }
 
-const uint32_t widthInBaseUnits = sps.numPartInCUSize  depth;
-Param params;
-setLoopfilterParam(cu, absPartIdx, params);
-setEdgefilterPU(cu, absPartIdx, dir, blockingStrength, widthInBaseUnits);
-setEdgefilterTU(cu, absPartIdx, depth, dir, blockingStrength);
-setEdgefilterMultiple(cu, absPartIdx, dir, 0, (dir == EDGE_VER ? 
params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits);
+const uint32_t numUnits  = sps.numPartInCUSize  depth;
+setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
+setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength);
+setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, 
dir), blockStrength, numUnits);
 
 for (uint32_t partIdx = absPartIdx; partIdx  absPartIdx + curNumParts; 
partIdx++)
 {
 uint32_t bsCheck = !(partIdx  (1  dir));
 
-if (bsCheck  blockingStrength[partIdx])
-getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
+if (bsCheck  blockStrength[partIdx])
+blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, 
blockStrength);
 }
 
 const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK  LOG2_UNIT_SIZE;
@@ -87,34 +109,33 @@
 
 for (uint32_t e = 0; e  sizeInPU; e += partIdxIncr)
 {
-edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockingStrength);
+edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
 if (!((e0 + e)  chromaMask))
-edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockingStrength);
+edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
 }
 }
 
-static inline uint32_t calcBsIdx(CUData* cu, uint32_t absPartIdx, int32_t dir, 
int32_t edgeIdx, int32_t baseUnitIdx)
+static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, 
int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
 {
-uint32_t ctuWidthInBaseUnits = cu-m_slice-m_sps-numPartInCUSize;
+uint32_t numPartInCUSize = cu-m_slice-m_sps-numPartInCUSize;
 
 if (dir)
-return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * 
ctuWidthInBaseUnits + baseUnitIdx];
+return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * 
numPartInCUSize + baseUnitIdx];
 else
-return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * 
ctuWidthInBaseUnits + edgeIdx];
+return g_rasterToZscan

[x265] cleanup CUData::m_skipFlag

2014-11-03 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1415001734 -32400
#  Mon Nov 03 17:02:14 2014 +0900
# Node ID ef411645295a51cf276e7830d9a98ffe50d85f63
# Parent  eebb372eec893efc50e66806fcc19b1c1bd89683
cleanup CUData::m_skipFlag

diff -r eebb372eec89 -r ef411645295a source/common/cudata.cpp
--- a/source/common/cudata.cpp  Fri Oct 31 16:29:20 2014 -0500
+++ b/source/common/cudata.cpp  Mon Nov 03 17:02:14 2014 +0900
@@ -230,13 +230,12 @@
 m_qp  = (char*)charBuf; charBuf += m_numPartitions;
 m_log2CUSize = charBuf; charBuf += m_numPartitions;
 m_partSize   = charBuf; charBuf += m_numPartitions;
-m_predMode   = charBuf; charBuf += m_numPartitions;
 m_lumaIntraDir   = charBuf; charBuf += m_numPartitions;
 m_tqBypass   = charBuf; charBuf += m_numPartitions;
 m_refIdx[0]   = (char*)charBuf; charBuf += m_numPartitions;
 m_refIdx[1]   = (char*)charBuf; charBuf += m_numPartitions;
 m_cuDepth= charBuf; charBuf += m_numPartitions;
-m_skipFlag   = charBuf; charBuf += m_numPartitions; /* the order 
up to here is important in initCTU() and initSubCU() */
+m_predMode   = charBuf; charBuf += m_numPartitions; /* the order 
up to here is important in initCTU() and initSubCU() */
 m_mergeFlag  = charBuf; charBuf += m_numPartitions;
 m_interDir   = charBuf; charBuf += m_numPartitions;
 m_mvpIdx[0]  = charBuf; charBuf += m_numPartitions;
@@ -279,7 +278,6 @@
 m_partSet((uint8_t*)m_qp, (uint8_t)qp);
 m_partSet(m_log2CUSize,   (uint8_t)g_maxLog2CUSize);
 m_partSet(m_partSize, (uint8_t)SIZE_NONE);
-m_partSet(m_predMode, (uint8_t)MODE_NONE);
 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
 m_partSet(m_tqBypass, (uint8_t)frame.m_encData-m_param-bLossless);
 if (m_slice-m_sliceType != I_SLICE)
@@ -291,7 +289,7 @@
 X265_CHECK(!(frame.m_encData-m_param-bLossless  
!m_slice-m_pps-bTransquantBypassEnabled), lossless enabled without TQbypass 
in PPS\n);
 
 /* initialize the remaining CU data in one memset */
-memset(m_cuDepth, 0, (BytesPerPartition - 8) * m_numPartitions);
+memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
 
 uint32_t widthInCU = m_slice-m_sps-numCuInWidth;
 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData-getPicCTU(m_cuAddr - 1) : 
NULL;
@@ -319,7 +317,6 @@
 m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
 m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
 m_partSet(m_partSize, (uint8_t)SIZE_NONE);
-m_partSet(m_predMode, (uint8_t)MODE_NONE);
 m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
 m_partSet(m_tqBypass, (uint8_t)m_encData-m_param-bLossless);
 m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
@@ -327,7 +324,7 @@
 m_partSet(m_cuDepth,  (uint8_t)cuGeom.depth);
 
 /* initialize the remaining CU data in one memset */
-memset(m_skipFlag, 0, (BytesPerPartition - 9) * m_numPartitions);
+memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
@@ -340,13 +337,12 @@
 m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
 m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
 m_subPartCopy(m_partSize + offset, subCU.m_partSize);
-m_subPartCopy(m_predMode + offset, subCU.m_predMode);
 m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
 m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
 m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
 m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
 m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
-m_subPartCopy(m_skipFlag + offset, subCU.m_skipFlag);
+m_subPartCopy(m_predMode + offset, subCU.m_predMode);
 m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
 m_subPartCopy(m_interDir + offset, subCU.m_interDir);
 m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
@@ -410,7 +406,7 @@
 m_partSet(m_tqBypass, true);
 
 /* clear residual coding flags */
-m_partSet(m_skipFlag, 0);
+m_partSet(m_predMode, cu.m_predMode[0]  (MODE_INTRA | MODE_INTER));
 m_partSet(m_tuDepth, 0);
 m_partSet(m_transformSkip[0], 0);
 m_partSet(m_transformSkip[1], 0);
@@ -428,13 +424,12 @@
 m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
 m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
 m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
-m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
 m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
 m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
 m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, 
(uint8_t*)m_refIdx[0]);
 m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, 
(uint8_t*)m_refIdx[1]);
 m_partCopy

[x265] fix uninitialized

2014-10-29 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1414570941 -32400
#  Wed Oct 29 17:22:21 2014 +0900
# Node ID 1f0b295a6dfac9457deea4efe81261edf21f5039
# Parent  da5ba239bf59a48d9b586c54bf2d0a5320043044
fix uninitialized

diff -r da5ba239bf59 -r 1f0b295a6dfa source/common/predict.cpp
--- a/source/common/predict.cpp Wed Oct 29 09:13:25 2014 +0530
+++ b/source/common/predict.cpp Wed Oct 29 17:22:21 2014 +0900
@@ -144,12 +144,17 @@
 primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 
0);
 }
 
-void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, 
int partIdx)
+void Predict::initMotionCompensation(const CUData cu, const CUGeom cuGeom, 
int partIdx)
 {
 m_predSlice = cu.m_slice;
 cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
 m_ctuAddr = cu.m_cuAddr;
 m_cuAbsPartIdx = cuGeom.encodeIdx;
+}
+
+void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, 
int partIdx)
+{
+initMotionCompensation(cu, cuGeom, partIdx);
 
 m_refIdx0  = cu.m_refIdx[0][m_puAbsPartIdx];
 m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx];
diff -r da5ba239bf59 -r 1f0b295a6dfa source/common/predict.h
--- a/source/common/predict.h   Wed Oct 29 09:13:25 2014 +0530
+++ b/source/common/predict.h   Wed Oct 29 17:22:21 2014 +0900
@@ -117,6 +117,7 @@
 public:
 
 /* prepMotionCompensation needs to be called to prepare MC with 
CU-relevant data */
+void initMotionCompensation(const CUData cu, const CUGeom cuGeom, int 
partIdx);
 void prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int 
partIdx);
 void motionCompensation(Yuv predYuv, bool bLuma, bool bChroma);
 
diff -r da5ba239bf59 -r 1f0b295a6dfa source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Wed Oct 29 09:13:25 2014 +0530
+++ b/source/encoder/analysis.cpp   Wed Oct 29 17:22:21 2014 +0900
@@ -882,7 +882,7 @@
 if (m_bTryLossless)
 tryLossless(cuGeom);
 
-if (mightSplit  m_param-rdLevel  1)
+if (mightSplit)
 addSplitFlagCost(*md.bestMode, cuGeom.depth);
 }
 
@@ -934,7 +934,7 @@
 if (mightNotSplit)
 addSplitFlagCost(*splitPred, cuGeom.depth);
 else if (m_param-rdLevel = 1)
-splitPred-sa8dCost = 
m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-totalBits);
+splitPred-sa8dCost = 
m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-sa8dBits);
 else
 updateModeCost(*splitPred);
 
@@ -1539,6 +1539,7 @@
 intraMode.totalBits = bbits;
 intraMode.distortion = bsad;
 intraMode.sa8dCost = bcost;
+intraMode.sa8dBits = bbits;
 }
 
 void Analysis::encodeIntraInInter(Mode intraMode, const CUGeom cuGeom)
diff -r da5ba239bf59 -r 1f0b295a6dfa source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Oct 29 09:13:25 2014 +0530
+++ b/source/encoder/search.cpp Wed Oct 29 17:22:21 2014 +0900
@@ -1728,7 +1728,7 @@
 for (int puIdx = 0; puIdx  numPart; puIdx++)
 {
 /* sets m_puAbsPartIdx, m_puWidth, m_puHeight */
-prepMotionCompensation(cu, cuGeom, puIdx);
+initMotionCompensation(cu, cuGeom, puIdx);
 
 pixel* pu = fencPic-getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + 
m_puAbsPartIdx);
 m_me.setSourcePU(pu - fencPic-m_picOrg[0], m_puWidth, m_puHeight);
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix rd=0,1

2014-10-29 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1414600429 -32400
#  Thu Oct 30 01:33:49 2014 +0900
# Branch stable
# Node ID 2a5e13c6ee9351095e9a7aade3c52e8b4092b7f8
# Parent  da5ba239bf59a48d9b586c54bf2d0a5320043044
fix rd=0,1

diff -r da5ba239bf59 -r 2a5e13c6ee93 source/common/predict.cpp
--- a/source/common/predict.cpp Wed Oct 29 09:13:25 2014 +0530
+++ b/source/common/predict.cpp Thu Oct 30 01:33:49 2014 +0900
@@ -144,12 +144,17 @@
 primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 
0);
 }
 
-void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, 
int partIdx)
+void Predict::initMotionCompensation(const CUData cu, const CUGeom cuGeom, 
int partIdx)
 {
 m_predSlice = cu.m_slice;
 cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight);
 m_ctuAddr = cu.m_cuAddr;
 m_cuAbsPartIdx = cuGeom.encodeIdx;
+}
+
+void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, 
int partIdx)
+{
+initMotionCompensation(cu, cuGeom, partIdx);
 
 m_refIdx0  = cu.m_refIdx[0][m_puAbsPartIdx];
 m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx];
diff -r da5ba239bf59 -r 2a5e13c6ee93 source/common/predict.h
--- a/source/common/predict.h   Wed Oct 29 09:13:25 2014 +0530
+++ b/source/common/predict.h   Thu Oct 30 01:33:49 2014 +0900
@@ -117,6 +117,7 @@
 public:
 
 /* prepMotionCompensation needs to be called to prepare MC with 
CU-relevant data */
+void initMotionCompensation(const CUData cu, const CUGeom cuGeom, int 
partIdx);
 void prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int 
partIdx);
 void motionCompensation(Yuv predYuv, bool bLuma, bool bChroma);
 
diff -r da5ba239bf59 -r 2a5e13c6ee93 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Wed Oct 29 09:13:25 2014 +0530
+++ b/source/encoder/analysis.cpp   Thu Oct 30 01:33:49 2014 +0900
@@ -882,7 +882,7 @@
 if (m_bTryLossless)
 tryLossless(cuGeom);
 
-if (mightSplit  m_param-rdLevel  1)
+if (mightSplit)
 addSplitFlagCost(*md.bestMode, cuGeom.depth);
 }
 
@@ -934,7 +934,7 @@
 if (mightNotSplit)
 addSplitFlagCost(*splitPred, cuGeom.depth);
 else if (m_param-rdLevel = 1)
-splitPred-sa8dCost = 
m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-totalBits);
+splitPred-sa8dCost = 
m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-sa8dBits);
 else
 updateModeCost(*splitPred);
 
@@ -1539,6 +1539,7 @@
 intraMode.totalBits = bbits;
 intraMode.distortion = bsad;
 intraMode.sa8dCost = bcost;
+intraMode.sa8dBits = bbits;
 }
 
 void Analysis::encodeIntraInInter(Mode intraMode, const CUGeom cuGeom)
@@ -1601,8 +1602,6 @@
 uint32_t absPartIdx = cuGeom.encodeIdx;
 int sizeIdx = cuGeom.log2CUSize - 2;
 
-/* at RD 0, the prediction pixels are accumulated into the top depth 
predYuv */
-Yuv predYuv = m_modeDepth[0].bestMode-predYuv;
 Yuv fencYuv = m_modeDepth[0].fencYuv;
 
 /* reuse the bestMode data structures at the current depth */
@@ -1615,18 +1614,13 @@
 
 if (cu.m_predMode[0] == MODE_INTRA)
 {
-uint32_t initTrDepth = cu.m_partSize[0] == SIZE_2Nx2N ? 0 : 1;
-
 uint32_t tuDepthRange[2];
 cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
+uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
 residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, 
tuDepthRange);
 getBestIntraModeChroma(*bestMode, cuGeom);
 residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
-
-/* copy the reconstructed part to the recon pic for later intra
- * predictions */
-reconYuv.copyToPicYuv(*m_frame-m_reconPicYuv, cu.m_cuAddr, 
absPartIdx);
 }
 else
 {
@@ -1636,16 +1630,22 @@
 
 ShortYuv resiYuv = m_rqt[cuGeom.depth].tmpResiYuv;
 
+/* at RD 0, the prediction pixels are accumulated into the top depth 
predYuv */
+Yuv predYuv = m_modeDepth[0].bestMode-predYuv;
+pixel* predY = predYuv.getLumaAddr(absPartIdx);
+pixel* predU = predYuv.getCbAddr(absPartIdx);
+pixel* predV = predYuv.getCrAddr(absPartIdx);
+
 primitives.luma_sub_ps[sizeIdx](resiYuv.m_buf[0], resiYuv.m_size,
-fencYuv.getLumaAddr(absPartIdx), 
predYuv.getLumaAddr(absPartIdx),
+fencYuv.getLumaAddr(absPartIdx), predY,
 fencYuv.m_size, predYuv.m_size);
 
 primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[1], 
resiYuv.m_csize,
-fencYuv.getCbAddr(absPartIdx), 
predYuv.getCbAddr(absPartIdx),
+fencYuv.getCbAddr(absPartIdx), predU,
 fencYuv.m_csize, predYuv.m_csize);
 
 primitives.chroma[m_csp].sub_ps[sizeIdx

[x265] sao: refine sao merge mode

2014-10-14 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1413341637 -32400
#  Wed Oct 15 11:53:57 2014 +0900
# Node ID 8014e8d2c321148c7d68942b6f4552b8eede6e1c
# Parent  02ff8eaad63232e958153e8b7cdcd5907141a7b6
sao: refine sao merge mode

diff -r 02ff8eaad632 -r 8014e8d2c321 source/common/common.h
--- a/source/common/common.hMon Oct 13 18:17:00 2014 +0530
+++ b/source/common/common.hWed Oct 15 11:53:57 2014 +0900
@@ -251,18 +251,23 @@
 uint32_t count[MAX_NUM_TR_CATEGORIES];
 };
 
+enum SaoMergeMode
+{
+SAO_MERGE_NONE,
+SAO_MERGE_LEFT,
+SAO_MERGE_UP
+};
+
 struct SaoCtuParam
 {
-bool mergeUpFlag;
-bool mergeLeftFlag;
+SaoMergeMode mergeMode;
 int  typeIdx;
 uint32_t bandPos;// BO band position
 int  offset[SAO_NUM_OFFSET];
 
 void reset()
 {
-mergeUpFlag = false;
-mergeLeftFlag = false;
+mergeMode = SAO_MERGE_NONE;
 typeIdx = -1;
 bandPos = 0;
 offset[0] = 0;
diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Mon Oct 13 18:17:00 2014 +0530
+++ b/source/encoder/frameencoder.cpp   Wed Oct 15 11:53:57 2014 +0900
@@ -413,7 +413,7 @@
 const uint32_t lastCUAddr = (slice-m_endCUAddr + NUM_CU_PARTITIONS - 1) / 
NUM_CU_PARTITIONS;
 const int numSubstreams = m_param-bEnableWavefront ? 
m_frame-getPicSym()-getFrameHeightInCU() : 1;
 
-SAOParam *saoParam = slice-m_pic-getPicSym()-m_saoParam;
+SAOParam* saoParam = slice-m_sps-bUseSAO ? 
slice-m_pic-getPicSym()-m_saoParam : NULL;
 for (uint32_t cuAddr = 0; cuAddr  lastCUAddr; cuAddr++)
 {
 uint32_t col = cuAddr % widthInLCUs;
@@ -430,12 +430,12 @@
 m_entropyCoder.loadContexts(m_rows[lin - 1].bufferedEntropy);
 }
 
-if (slice-m_sps-bUseSAO)
+if (saoParam)
 {
 if (saoParam-bSaoFlag[0] || saoParam-bSaoFlag[1])
 {
-int mergeLeft = saoParam-ctuParam[0][cuAddr].mergeLeftFlag  
col;
-int mergeUp = saoParam-ctuParam[0][cuAddr].mergeUpFlag  lin;
+int mergeLeft = col  saoParam-ctuParam[0][cuAddr].mergeMode 
== SAO_MERGE_LEFT;
+int mergeUp = lin  saoParam-ctuParam[0][cuAddr].mergeMode 
== SAO_MERGE_UP;
 if (col)
 m_entropyCoder.codeSaoMerge(mergeLeft);
 if (lin  !mergeLeft)
diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/sao.cpp
--- a/source/encoder/sao.cppMon Oct 13 18:17:00 2014 +0530
+++ b/source/encoder/sao.cppWed Oct 15 11:53:57 2014 +0900
@@ -90,7 +90,7 @@
 m_depthSaoRate[1][3] = 0;
 }
 
-bool SAO::create(x265_param *param)
+bool SAO::create(x265_param* param)
 {
 m_param = param;
 m_hChromaShift = CHROMA_H_SHIFT(param-internalCsp);
@@ -161,7 +161,7 @@
 }
 
 /* allocate memory for SAO parameters */
-void SAO::allocSaoParam(SAOParam *saoParam) const
+void SAO::allocSaoParam(SAOParam* saoParam) const
 {
 saoParam-numCuInWidth  = m_numCuInWidth;
 
@@ -170,14 +170,7 @@
 saoParam-ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth];
 }
 
-/* reset SAO parameters once per frame */
-void SAO::resetSAOParam(SAOParam *saoParam)
-{
-saoParam-bSaoFlag[0] = false;
-saoParam-bSaoFlag[1] = false;
-}
-
-void SAO::startSlice(Frame *pic, Entropy initState, int qp)
+void SAO::startSlice(Frame* pic, Entropy initState, int qp)
 {
 Slice* slice = pic-m_picSym-m_slice;
 
@@ -213,7 +206,6 @@
 pic-getPicSym()-m_saoParam = saoParam;
 }
 
-resetSAOParam(saoParam);
 rdoSaoUnitRowInit(saoParam);
 
 // NOTE: Disable SAO automatic turn-off when frame parallelism is
@@ -229,7 +221,7 @@
 void SAO::processSaoCu(int addr, int typeIdx, int plane)
 {
 int x, y;
-TComDataCU *cu = m_pic-getCU(addr);
+const TComDataCU* cu = m_pic-getCU(addr);
 pixel* rec = m_pic-getPicYuvRec()-getPlaneAddr(plane, addr);
 int stride = plane ? m_pic-getCStride() : m_pic-getStride();
 uint32_t picWidth  = m_param-sourceWidth;
@@ -454,12 +446,12 @@
 
 if (!idxY)
 {
-pixel *rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane) : 
m_pic-getPicYuvRec()-getLumaAddr();
+pixel* rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane) : 
m_pic-getPicYuvRec()-getLumaAddr();
 memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
 }
 
 int addr = idxY * m_numCuInWidth;
-pixel *rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane, addr) : 
m_pic-getPicYuvRec()-getLumaAddr(addr);
+pixel* rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane, addr) : 
m_pic-getPicYuvRec()-getLumaAddr(addr);
 
 for (int i = 0; i  ctuHeight + 1; i++)
 {
@@ -477,8 +469,8 @@
 {
 addr = idxY * m_numCuInWidth + idxX;
 
+bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT;
 int typeIdx = ctuParam[addr].typeIdx;
-bool mergeLeftFlag = ctuParam[addr].mergeLeftFlag

Re: [x265] sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]

2014-10-09 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1412847192 -32400
#  Thu Oct 09 18:33:12 2014 +0900
# Node ID ab734f79e1d071d75e4bbea1742fd125698e9ff3
# Parent  0a18adcecd7deb4f861e6436c9f17e05da994625
sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]

diff -r 0a18adcecd7d -r ab734f79e1d0 source/common/common.h
--- a/source/common/common.hWed Oct 08 14:18:20 2014 -0500
+++ b/source/common/common.hThu Oct 09 18:33:12 2014 +0900
@@ -132,6 +132,12 @@
 return std::minT(std::maxT(minVal, a), maxVal);
 }
 
+templatetypename T
+inline T x265_min(T a, T b) { return a  b ? a : b; }
+
+templatetypename T
+inline T x265_max(T a, T b) { return a  b ? a : b; }
+
 typedef int16_t  coeff_t;  // transform coefficient
 
 #define X265_MIN(a, b) ((a)  (b) ? (a) : (b))
@@ -224,17 +230,15 @@
 bool mergeUpFlag;
 bool mergeLeftFlag;
 int  typeIdx;
-int  subTypeIdx;// indicates EO class or BO band position
+uint32_t bandPos;// BO band position
 int  offset[SAO_NUM_OFFSET];
-int  partIdx;
-int  partIdxTmp;
 
 void reset()
 {
 mergeUpFlag = false;
 mergeLeftFlag = false;
 typeIdx = -1;
-subTypeIdx = 0;
+bandPos = 0;
 offset[0] = 0;
 offset[1] = 0;
 offset[2] = 0;
@@ -246,7 +250,6 @@
 {
 SaoCtuParam* ctuParam[3];
 bool bSaoFlag[2];
-int  numCuInHeight;
 int  numCuInWidth;
 
 SAOParam()
@@ -254,6 +257,7 @@
 for (int i = 0; i  3; i++)
 ctuParam[i] = NULL;
 }
+
 ~SAOParam()
 {
 delete[] ctuParam[0];
diff -r 0a18adcecd7d -r ab734f79e1d0 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cppWed Oct 08 14:18:20 2014 -0500
+++ b/source/encoder/entropy.cppThu Oct 09 18:33:12 2014 +0900
@@ -512,7 +512,7 @@
 }
 
 // We need to split, so don't try these modes.
-if (cuSplitFlag) 
+if (cuSplitFlag)
 codeSplitFlag(ctu, absPartIdx, depth);
 
 if (depth  ctu-getDepth(absPartIdx)  depth  g_maxCUDepth)
@@ -864,74 +864,40 @@
 encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, 
absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange);
 }
 
-void Entropy::codeSaoOffset(SaoCtuParam* saoLcuParam, uint32_t compIdx)
+void Entropy::codeSaoOffset(const SaoCtuParam* saoLcuParam, int plane)
 {
-uint32_t symbol;
-int i;
+int typeIdx = saoLcuParam-typeIdx;
 
-symbol = saoLcuParam-typeIdx + 1;
-if (compIdx != 2)
-codeSaoTypeIdx(symbol);
+if (plane != 2)
+{
+encodeBin(typeIdx = 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
+if (typeIdx = 0)
+encodeBinEP(typeIdx  SAO_BO ? 1 : 0);
+}
 
-if (symbol)
+if (typeIdx = 0)
 {
-if (saoLcuParam-typeIdx  SAO_BO  compIdx != 2)
-saoLcuParam-subTypeIdx = saoLcuParam-typeIdx;
+enum { OFFSET_THRESH = 1  X265_MIN(X265_DEPTH - 5, 5) };
 
-int offsetTh = 1  X265_MIN(X265_DEPTH - 5, 5);
-if (saoLcuParam-typeIdx == SAO_BO)
+if (typeIdx == SAO_BO)
 {
-for (i = 0; i  SAO_BO_LEN; i++)
-{
-uint32_t absOffset = ((saoLcuParam-offset[i]  0) ? 
-saoLcuParam-offset[i] : saoLcuParam-offset[i]);
-codeSaoMaxUvlc(absOffset, offsetTh - 1);
-}
+for (int i = 0; i  SAO_BO_LEN; i++)
+codeSaoMaxUvlc(abs(saoLcuParam-offset[i]), OFFSET_THRESH - 1);
 
-for (i = 0; i  SAO_BO_LEN; i++)
-{
+for (int i = 0; i  SAO_BO_LEN; i++)
 if (saoLcuParam-offset[i] != 0)
-{
-uint32_t sign = (saoLcuParam-offset[i]  0) ? 1 : 0;
-codeSAOSign(sign);
-}
-}
+encodeBinEP(saoLcuParam-offset[i]  0);
 
-symbol = (uint32_t)(saoLcuParam-subTypeIdx);
-codeSaoUflc(5, symbol);
+encodeBinsEP(saoLcuParam-bandPos, 5);
 }
-else // if (saoLcuParam-typeIdx  SAO_BO)
+else // if (typeIdx  SAO_BO)
 {
-codeSaoMaxUvlc(saoLcuParam-offset[0], offsetTh - 1);
-codeSaoMaxUvlc(saoLcuParam-offset[1], offsetTh - 1);
-codeSaoMaxUvlc(-saoLcuParam-offset[2], offsetTh - 1);
-codeSaoMaxUvlc(-saoLcuParam-offset[3], offsetTh - 1);
-if (compIdx != 2)
-{
-symbol = (uint32_t)(saoLcuParam-subTypeIdx);
-codeSaoUflc(2, symbol);
-}
-}
-}
-}
-
-void Entropy::codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int 
ry, SaoCtuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int 
allowMergeLeft, int allowMergeUp)
-{
-if (saoFlag)
-{
-if (rx  0  cuAddrInSlice != 0  allowMergeLeft)
-codeSaoMerge(saoLcuParam-mergeLeftFlag);
-else
-saoLcuParam-mergeLeftFlag = 0

[x265] (no subject)

2014-10-08 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1412754238 -32400
#  Wed Oct 08 16:43:58 2014 +0900
# Node ID 84c960cf1552f3f317690fa1d35f6536bf4b36b4
# Parent  46c4b98d92ece7ff25d790b0fc69a8185d575524
fix invalid copy source context in rdLevel==0

diff -r 46c4b98d92ec -r 84c960cf1552 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Mon Oct 06 22:07:54 2014 -0500
+++ b/source/encoder/analysis.cpp   Wed Oct 08 16:43:58 2014 +0900
@@ -1275,6 +1275,8 @@
 
 uint32_t nextDepth = depth + 1;
 invalidateContexts(nextDepth);
+// initialize RD with previous depth buffer
+m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
 TComDataCU* subTempPartCU = m_tempCU[nextDepth];
 for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++)
 {
@@ -1285,10 +1287,8 @@
 
 if (child_cu-flags  CU::PRESENT)
 {
-if (partUnitIdx) // initialize RD with previous depth buffer
+if (partUnitIdx  m_param-rdLevel)
 
m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next);
-else
-m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
 
 compressInterCU_rd0_4(subBestPartCU, subTempPartCU, outTempCU, 
nextDepth, child_cu, cu_unsplit_flag, partUnitIdx, minDepth);
 
@@ -1372,7 +1372,8 @@
 std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]);
 std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]);
 // copy 'next' state from last CU of next depth as next state 
of this CU
-m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
+if (m_param-rdLevel)
+
m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
 }
 }
 else
@@ -1381,7 +1382,8 @@
 std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]);
 std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]);
 // copy 'next' state from last CU of next depth as next state of 
this CU
-m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
+if (m_param-rdLevel)
+m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
 }
 }
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix invalid copy source context in rdLevel==0

2014-10-08 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1412754238 -32400
#  Wed Oct 08 16:43:58 2014 +0900
# Node ID 84c960cf1552f3f317690fa1d35f6536bf4b36b4
# Parent  46c4b98d92ece7ff25d790b0fc69a8185d575524
fix invalid copy source context in rdLevel==0

diff -r 46c4b98d92ec -r 84c960cf1552 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Mon Oct 06 22:07:54 2014 -0500
+++ b/source/encoder/analysis.cpp   Wed Oct 08 16:43:58 2014 +0900
@@ -1275,6 +1275,8 @@
 
 uint32_t nextDepth = depth + 1;
 invalidateContexts(nextDepth);
+// initialize RD with previous depth buffer
+m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
 TComDataCU* subTempPartCU = m_tempCU[nextDepth];
 for (uint32_t partUnitIdx = 0; partUnitIdx  4; partUnitIdx++)
 {
@@ -1285,10 +1287,8 @@
 
 if (child_cu-flags  CU::PRESENT)
 {
-if (partUnitIdx) // initialize RD with previous depth buffer
+if (partUnitIdx  m_param-rdLevel)
 
m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next);
-else
-m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur);
 
 compressInterCU_rd0_4(subBestPartCU, subTempPartCU, outTempCU, 
nextDepth, child_cu, cu_unsplit_flag, partUnitIdx, minDepth);
 
@@ -1372,7 +1372,8 @@
 std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]);
 std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]);
 // copy 'next' state from last CU of next depth as next state 
of this CU
-m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
+if (m_param-rdLevel)
+
m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
 }
 }
 else
@@ -1381,7 +1382,8 @@
 std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]);
 std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]);
 // copy 'next' state from last CU of next depth as next state of 
this CU
-m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
+if (m_param-rdLevel)
+m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next);
 }
 }
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]

2014-10-05 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1412500756 -32400
#  Sun Oct 05 18:19:16 2014 +0900
# Node ID 64ea900398eb29ddd1c12df8126fa9866a280c81
# Parent  b6d49505b179cb509aa76f3a065192f0b4926579
sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]

diff -r b6d49505b179 -r 64ea900398eb source/common/common.h
--- a/source/common/common.hThu Oct 02 16:47:55 2014 -0500
+++ b/source/common/common.hSun Oct 05 18:19:16 2014 +0900
@@ -132,6 +132,12 @@
 return std::minT(std::maxT(minVal, a), maxVal);
 }
 
+templatetypename T
+inline T x265_min(T a, T b) { return a  b ? a : b; }
+
+templatetypename T
+inline T x265_max(T a, T b) { return a  b ? a : b; }
+
 typedef int16_t  coeff_t;  // transform coefficient
 
 #define X265_MIN(a, b) ((a)  (b) ? (a) : (b))
@@ -224,17 +230,15 @@
 bool mergeUpFlag;
 bool mergeLeftFlag;
 int  typeIdx;
-int  subTypeIdx;// indicates EO class or BO band position
+uint32_t bandPos;// BO band position
 int  offset[SAO_NUM_OFFSET];
-int  partIdx;
-int  partIdxTmp;
 
 void reset()
 {
 mergeUpFlag = false;
 mergeLeftFlag = false;
 typeIdx = -1;
-subTypeIdx = 0;
+bandPos = 0;
 offset[0] = 0;
 offset[1] = 0;
 offset[2] = 0;
@@ -246,7 +250,6 @@
 {
 SaoCtuParam* ctuParam[3];
 bool bSaoFlag[2];
-int  numCuInHeight;
 int  numCuInWidth;
 
 SAOParam()
@@ -254,6 +257,7 @@
 for (int i = 0; i  3; i++)
 ctuParam[i] = NULL;
 }
+
 ~SAOParam()
 {
 delete[] ctuParam[0];
diff -r b6d49505b179 -r 64ea900398eb source/encoder/entropy.cpp
--- a/source/encoder/entropy.cppThu Oct 02 16:47:55 2014 -0500
+++ b/source/encoder/entropy.cppSun Oct 05 18:19:16 2014 +0900
@@ -511,7 +511,7 @@
 }
 
 // We need to split, so don't try these modes.
-if (cuSplitFlag) 
+if (cuSplitFlag)
 codeSplitFlag(ctu, absPartIdx, depth);
 
 if (depth  ctu-getDepth(absPartIdx)  depth  g_maxCUDepth)
@@ -863,74 +863,40 @@
 encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, 
absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange);
 }
 
-void Entropy::codeSaoOffset(SaoCtuParam* saoLcuParam, uint32_t compIdx)
+void Entropy::codeSaoOffset(const SaoCtuParam* saoLcuParam, int plane)
 {
-uint32_t symbol;
-int i;
+int typeIdx = saoLcuParam-typeIdx;
 
-symbol = saoLcuParam-typeIdx + 1;
-if (compIdx != 2)
-codeSaoTypeIdx(symbol);
+if (plane != 2)
+{
+encodeBin(typeIdx = 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
+if (typeIdx = 0)
+encodeBinEP(typeIdx  SAO_BO ? 1 : 0);
+}
 
-if (symbol)
+if (typeIdx = 0)
 {
-if (saoLcuParam-typeIdx  SAO_BO  compIdx != 2)
-saoLcuParam-subTypeIdx = saoLcuParam-typeIdx;
+enum { OFFSET_THRESH = 1  X265_MIN(X265_DEPTH - 5, 5) };
 
-int offsetTh = 1  X265_MIN(X265_DEPTH - 5, 5);
-if (saoLcuParam-typeIdx == SAO_BO)
+if (typeIdx == SAO_BO)
 {
-for (i = 0; i  SAO_BO_LEN; i++)
-{
-uint32_t absOffset = ((saoLcuParam-offset[i]  0) ? 
-saoLcuParam-offset[i] : saoLcuParam-offset[i]);
-codeSaoMaxUvlc(absOffset, offsetTh - 1);
-}
+for (int i = 0; i  SAO_BO_LEN; i++)
+codeSaoMaxUvlc(abs(saoLcuParam-offset[i]), OFFSET_THRESH - 1);
 
-for (i = 0; i  SAO_BO_LEN; i++)
-{
+for (int i = 0; i  SAO_BO_LEN; i++)
 if (saoLcuParam-offset[i] != 0)
-{
-uint32_t sign = (saoLcuParam-offset[i]  0) ? 1 : 0;
-codeSAOSign(sign);
-}
-}
+encodeBinEP(saoLcuParam-offset[i]  0);
 
-symbol = (uint32_t)(saoLcuParam-subTypeIdx);
-codeSaoUflc(5, symbol);
+encodeBinsEP(saoLcuParam-bandPos, 5);
 }
-else // if (saoLcuParam-typeIdx  SAO_BO)
+else // if (typeIdx  SAO_BO)
 {
-codeSaoMaxUvlc(saoLcuParam-offset[0], offsetTh - 1);
-codeSaoMaxUvlc(saoLcuParam-offset[1], offsetTh - 1);
-codeSaoMaxUvlc(-saoLcuParam-offset[2], offsetTh - 1);
-codeSaoMaxUvlc(-saoLcuParam-offset[3], offsetTh - 1);
-if (compIdx != 2)
-{
-symbol = (uint32_t)(saoLcuParam-subTypeIdx);
-codeSaoUflc(2, symbol);
-}
-}
-}
-}
-
-void Entropy::codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int 
ry, SaoCtuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int 
allowMergeLeft, int allowMergeUp)
-{
-if (saoFlag)
-{
-if (rx  0  cuAddrInSlice != 0  allowMergeLeft)
-codeSaoMerge(saoLcuParam-mergeLeftFlag);
-else
-saoLcuParam-mergeLeftFlag = 0

[x265] fix bug in 73c6c9086577 for rdLevel=0

2014-10-01 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1412209540 -32400
#  Thu Oct 02 09:25:40 2014 +0900
# Node ID 2efc3c19dd26944506c2c5e801abc96b1c048b40
# Parent  d0fa09e9cca540c6eab84308dea481f8368b1cb1
fix bug in 73c6c9086577 for rdLevel=0

diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Wed Oct 01 09:39:36 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Thu Oct 02 09:25:40 2014 +0900
@@ -454,19 +454,15 @@
 m_cuAboveRight  = cu-getCUAboveRight();
 }
 
-void TComDataCU::copyToSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, 
uint32_t depth)
+void TComDataCU::copyFromPic(TComDataCU* ctu, CU* cuData)
 {
-X265_CHECK(partUnitIdx  4, part unit should be less than 4\n);
+m_pic  = ctu-m_pic;
+m_slice= ctu-m_slice;
+m_cuAddr   = ctu-getAddr();
+m_absIdxInCTU  = cuData-encodeIdx;
 
-uint32_t partOffset = cuData-numPartitions * partUnitIdx;
-
-m_pic  = cu-m_pic;
-m_slice= cu-m_slice;
-m_cuAddr   = cu-getAddr();
-m_absIdxInCTU  = cuData-encodeIdx + partOffset;
-
-m_cuPelX   = cu-getCUPelX() + ((partUnitIdx   1)  
(g_maxLog2CUSize - depth));
-m_cuPelY   = cu-getCUPelY() + ((partUnitIdx  1)  
(g_maxLog2CUSize - depth));
+m_cuPelX   = ctu-getCUPelX() + g_zscanToPelX[m_absIdxInCTU];
+m_cuPelY   = ctu-getCUPelY() + g_zscanToPelY[m_absIdxInCTU];
 
 m_psyEnergy= 0;
 m_totalPsyCost = MAX_INT64;
@@ -478,18 +474,17 @@
 m_coeffBits= 0;
 m_numPartitions= cuData-numPartitions;
 
-TComDataCU* otherCU = m_pic-getCU(m_cuAddr);
 int sizeInChar  = sizeof(char) * m_numPartitions;
 
-memcpy(m_skipFlag, otherCU-getSkipFlag() + m_absIdxInCTU, 
sizeof(*m_skipFlag) * m_numPartitions);
-memcpy(m_qp, otherCU-getQP() + m_absIdxInCTU, sizeInChar);
+memcpy(m_skipFlag, ctu-getSkipFlag() + m_absIdxInCTU, sizeof(*m_skipFlag) 
* m_numPartitions);
+memcpy(m_qp, ctu-getQP() + m_absIdxInCTU, sizeInChar);
 
-memcpy(m_partSizes, otherCU-getPartitionSize() + m_absIdxInCTU, 
sizeof(*m_partSizes) * m_numPartitions);
-memcpy(m_predModes, otherCU-getPredictionMode() + m_absIdxInCTU, 
sizeof(*m_predModes) * m_numPartitions);
+memcpy(m_partSizes, ctu-getPartitionSize() + m_absIdxInCTU, 
sizeof(*m_partSizes) * m_numPartitions);
+memcpy(m_predModes, ctu-getPredictionMode() + m_absIdxInCTU, 
sizeof(*m_predModes) * m_numPartitions);
 
-memcpy(m_lumaIntraDir, otherCU-getLumaIntraDir() + m_absIdxInCTU, 
sizeInChar);
-memcpy(m_depth, otherCU-getDepth() + m_absIdxInCTU, sizeInChar);
-memcpy(m_log2CUSize, otherCU-getLog2CUSize() + m_absIdxInCTU, sizeInChar);
+memcpy(m_lumaIntraDir, ctu-getLumaIntraDir() + m_absIdxInCTU, sizeInChar);
+memcpy(m_depth, ctu-getDepth() + m_absIdxInCTU, sizeInChar);
+memcpy(m_log2CUSize, ctu-getLog2CUSize() + m_absIdxInCTU, sizeInChar);
 }
 
 // 

@@ -2411,6 +2406,8 @@
 void TComDataCU::loadCTUData(uint32_t maxCUSize)
 {
 // Initialize the coding blocks inside the CTB
+int picWidth  = m_pic-m_origPicYuv-m_picWidth;
+int picHeight = m_pic-m_origPicYuv-m_picHeight;
 for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; 
log2CUSize = MIN_LOG2_CU_SIZE; log2CUSize--)
 {
 uint32_t blockSize  = 1  log2CUSize;
@@ -2425,8 +2422,8 @@
 uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + 
(depth_idx  2);
 uint32_t px = m_cuPelX + sb_x * blockSize;
 uint32_t py = m_cuPelY + sb_y * blockSize;
-int32_t present_flag = px  m_pic-m_origPicYuv-m_picWidth  
py  m_pic-m_origPicYuv-m_picHeight;
-int32_t split_mandatory_flag = present_flag  
!last_level_flag  (px + blockSize  m_pic-m_origPicYuv-m_picWidth || py + 
blockSize  m_pic-m_origPicYuv-m_picHeight);
+int32_t present_flag = px  picWidth  py  picHeight;
+int32_t split_mandatory_flag = present_flag  
!last_level_flag  (px + blockSize  picWidth || py + blockSize  picHeight);
 
 /* Offset of the luma CU in the X, Y direction in terms of 
pixels from the CTU origin */
 uint32_t xOffset = (sb_x * blockSize)  3;
diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.hWed Oct 01 09:39:36 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.hThu Oct 02 09:25:40 2014 +0900
@@ -276,7 +276,7 @@
 void  initSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, 
uint32_t depth, int qp);
 void  loadCTUData(uint32_t maxCUSize);
 
-void  copyToSubCU(TComDataCU* ctu, CU* cuData, uint32_t 
partUnitIdx, uint32_t depth);
+void

Re: [x265] [PATCH 1 of 2] TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure

2014-09-30 Thread Satoshi Nakagawa

decoder crush (invalid stream output) for --rd=0.

./x265 --rd=0 -f 9 --b-adapt=0 --bframes=3 --input RaceHorses_416x240_30.yuv 
--input-res 416x240 --fps 30 --output o.bin --recon o.yuv



From: santhosh...@multicorewareinc.com
Subject: [x265] [PATCH 1 of 2] TComDataCU: replace getZorderIdxInCU() with 
encodeIdx of CU structure
Date: Tue, 30 Sep 2014 09:04:28 +0530

 # HG changeset patch
 # User Santhoshini Sekar santhosh...@multicorewareinc.com
 # Date 1412047376 -19800
 #  Tue Sep 30 08:52:56 2014 +0530
 # Node ID 21b1e8daa7e97e3828dfd948ff776951b939f423
 # Parent  5a6845566d1492d29af29ecc0cf75d644994735c
 TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure
 
 diff -r 5a6845566d14 -r 21b1e8daa7e9 source/Lib/TLibCommon/TComDataCU.cpp
 --- a/source/Lib/TLibCommon/TComDataCU.cppMon Sep 29 17:37:47 2014 -0500
 +++ b/source/Lib/TLibCommon/TComDataCU.cppTue Sep 30 08:52:56 2014 +0530
 @@ -387,16 +387,15 @@
  }
  
  // initialize Sub partition
 -void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t 
 depth, int qp)
 +void TComDataCU::initSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, 
 uint32_t depth, int qp)
  {
  X265_CHECK(partUnitIdx  4, part unit should be less than 4\n);
  uint8_t log2CUSize = g_maxLog2CUSize - depth;
 -uint32_t partOffset = (cu-getTotalNumPart()  2) * partUnitIdx;
  
  m_pic  = cu-m_pic;
  m_slice= cu-m_slice;
  m_cuAddr   = cu-getAddr();
 -m_absIdxInLCU  = cu-getZorderIdxInCU() + partOffset;
 +m_absIdxInLCU  = cuData-encodeIdx;
  
  m_cuPelX   = cu-getCUPelX() + ((partUnitIdx   1)  
 log2CUSize);
  m_cuPelY   = cu-getCUPelY() + ((partUnitIdx  1)  
 log2CUSize);
 @@ -453,7 +452,7 @@
  m_cuAboveRight  = cu-getCUAboveRight();
  }
  
 -void TComDataCU::copyToSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t 
 depth)
 +void TComDataCU::copyToSubCU(TComDataCU* cu, CU* cuData, uint32_t 
 partUnitIdx, uint32_t depth)
  {
  X265_CHECK(partUnitIdx  4, part unit should be less than 4\n);
  
 @@ -462,7 +461,7 @@
  m_pic  = cu-m_pic;
  m_slice= cu-m_slice;
  m_cuAddr   = cu-getAddr();
 -m_absIdxInLCU  = cu-getZorderIdxInCU() + partOffset;
 +m_absIdxInLCU  = cuData-encodeIdx + partOffset;
  
  m_cuPelX   = cu-getCUPelX() + ((partUnitIdx   1)  
 (g_maxLog2CUSize - depth));
  m_cuPelY   = cu-getCUPelY() + ((partUnitIdx  1)  
 (g_maxLog2CUSize - depth));
 @@ -1067,9 +1066,9 @@
  }
  else
  {
 -if (getZorderIdxInCU()  0)
 +if (m_pic-getCU(m_cuAddr)-m_CULocalData-encodeIdx  0)
  {
 -return 
 m_pic-getCU(getAddr())-getLastCodedQP(getZorderIdxInCU());
 +return 
 m_pic-getCU(getAddr())-getLastCodedQP(m_pic-getCU(m_cuAddr)-m_CULocalData-encodeIdx);
  }
  else if (getAddr()  0  
 !(m_slice-m_pps-bEntropyCodingSyncEnabled 
  getAddr() % m_pic-getFrameWidthInCU() 
 == 0))
 @@ -2434,7 +2433,7 @@
  CU *cu = m_CULocalData + cuIdx;
  cu-log2CUSize = log2CUSize;
  cu-childIdx = child_idx;
 -cu-encodeIdx = g_depthScanIdx[yOffset][xOffset];
 +cu-encodeIdx = g_depthScanIdx[yOffset][xOffset] * 4;
  cu-flags = 0;
  
  CU_SET_FLAG(cu-flags, CU::PRESENT, present_flag);
 diff -r 5a6845566d14 -r 21b1e8daa7e9 source/Lib/TLibCommon/TComDataCU.h
 --- a/source/Lib/TLibCommon/TComDataCU.h  Mon Sep 29 17:37:47 2014 -0500
 +++ b/source/Lib/TLibCommon/TComDataCU.h  Tue Sep 30 08:52:56 2014 +0530
 @@ -272,10 +272,10 @@
  
  void  initCU(Frame* pic, uint32_t cuAddr);
  void  initEstData();
 -void  initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t 
 depth, int qp);
 +void  initSubCU(TComDataCU* cu, CU* cuData, uint32_t 
 partUnitIdx, uint32_t depth, int qp);
  void  loadCTUData(uint32_t maxCUSize);
  
 -void  copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, 
 uint32_t depth);
 +void  copyToSubCU(TComDataCU* lcu, CU* cuData, uint32_t 
 partUnitIdx, uint32_t depth);
  void  copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, 
 uint32_t depth, bool isRDObasedAnalysis = true);
  
  void  copyToPic(uint32_t depth);
 @@ -288,8 +288,6 @@
  
  uint32_t getAddr(){ return m_cuAddr; }
  
 -uint32_t getZorderIdxInCU()   { return m_absIdxInLCU; }
 -
  uint32_t  getSCUAddr() const   { return (m_cuAddr  
 g_maxFullDepth * 2) + m_absIdxInLCU; }
  
  
 diff -r 5a6845566d14 -r 21b1e8daa7e9 source/Lib/TLibCommon/TComPattern.cpp
 --- a/source/Lib/TLibCommon/TComPattern.cpp   Mon Sep 29 17:37:47 2014 -0500
 +++ b/source/Lib/TLibCommon/TComPattern.cpp   Tue Sep 30 08:52:56 2014 +0530
 @@ -49,7 

[x265] sao: remove frame-based SAO

2014-09-29 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1412038092 -32400
#  Tue Sep 30 09:48:12 2014 +0900
# Node ID 3eacdaa304400b0100dcf1d1515ae1d24cbf4305
# Parent  5a6845566d1492d29af29ecc0cf75d644994735c
sao: remove frame-based SAO

diff -r 5a6845566d14 -r 3eacdaa30440 source/common/common.h
--- a/source/common/common.hMon Sep 29 17:37:47 2014 -0500
+++ b/source/common/common.hTue Sep 30 09:48:12 2014 +0900
@@ -212,34 +212,6 @@
 uint32_t count[8];
 };
 
-struct SAOQTPart
-{
-enum { NUM_DOWN_PART = 4 };
-
-int bestType;
-int subTypeIdx;  // indicates EO class or BO band position
-int offset[SAO_NUM_OFFSET];
-int startCUX;
-int startCUY;
-int endCUX;
-int endCUY;
-
-int partIdx;
-int partLevel;
-int partCol;
-int partRow;
-
-int downPartsIdx[NUM_DOWN_PART];
-int upPartIdx;
-
-boolbSplit;
-
-boolbProcessed;
-double  minCost;
-int64_t minDist;
-int minRate;
-};
-
 struct SaoLcuParam
 {
 bool mergeUpFlag;
@@ -266,10 +238,7 @@
 struct SAOParam
 {
 SaoLcuParam* saoLcuParam[3];
-SAOQTPart*   saoPart[3];
 bool bSaoFlag[2];
-bool oneUnitFlag[3];
-int  maxSplitLevel;
 int  numCuInHeight;
 int  numCuInWidth;
 
@@ -277,15 +246,11 @@
 {
 for (int i = 0; i  3; i++)
 {
-saoPart[i] = NULL;
 saoLcuParam[i] = NULL;
 }
 }
 ~SAOParam()
 {
-delete[] saoPart[0];
-delete[] saoPart[1];
-delete[] saoPart[2];
 delete[] saoLcuParam[0];
 delete[] saoLcuParam[1];
 delete[] saoLcuParam[2];
diff -r 5a6845566d14 -r 3eacdaa30440 source/common/param.cpp
--- a/source/common/param.cpp   Mon Sep 29 17:37:47 2014 -0500
+++ b/source/common/param.cpp   Tue Sep 30 09:48:12 2014 +0900
@@ -169,7 +169,6 @@
 /* SAO Loop Filter */
 param-bEnableSAO = 1;
 param-saoLcuBoundary = 0;
-param-saoLcuBasedOptimization = 1;
 
 /* Coding Quality */
 param-cbQpOffset = 0;
@@ -625,7 +624,6 @@
 OPT(lft) p-bEnableLoopFilter = atobool(value);
 OPT(sao) p-bEnableSAO = atobool(value);
 OPT(sao-lcu-bounds) p-saoLcuBoundary = atoi(value);
-OPT(sao-lcu-opt) p-saoLcuBasedOptimization = atoi(value);
 OPT(ssim) p-bEnableSsim = atobool(value);
 OPT(psnr) p-bEnablePsnr = atobool(value);
 OPT(hash) p-decodedPictureHashSEI = atoi(value);
@@ -1165,13 +1163,7 @@
 fprintf(stderr, nr=%d , param-noiseReduction);
 
 TOOLOPT(param-bEnableLoopFilter, lft);
-if (param-bEnableSAO)
-{
-if (param-saoLcuBasedOptimization)
-fprintf(stderr, sao-lcu );
-else
-fprintf(stderr, sao-frame );
-}
+TOOLOPT(param-bEnableSAO, sao);
 TOOLOPT(param-bEnableSignHiding, signhide);
 TOOLOPT(param-bCULossless, cu-lossless);
 TOOLOPT(param-bEnableFastIntra, fast-intra);
@@ -1245,7 +1237,6 @@
 BOOL(p-bEnableLoopFilter, lft);
 BOOL(p-bEnableSAO, sao);
 s += sprintf(s,  sao-lcu-bounds=%d, p-saoLcuBoundary);
-s += sprintf(s,  sao-lcu-opt=%d, p-saoLcuBasedOptimization);
 BOOL(p-bBPyramid, b-pyramid);
 BOOL(p-rc.cuTree, cutree);
 s += sprintf(s,  rc=%s, p-rc.rateControlMode == X265_RC_ABR ? (
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/encoder.cppTue Sep 30 09:48:12 2014 +0900
@@ -1247,10 +1247,6 @@
 x265_log(p, X265_LOG_INFO, Parallelism disabled, single thread 
mode\n);
 p-bEnableWavefront = 0;
 }
-if (!p-saoLcuBasedOptimization  p-frameNumThreads  1)
-{
-x265_log(p, X265_LOG_INFO, Warning: picture-based SAO used with frame 
parallelism\n);
-}
 
 if (p-keyframeMax  0)
 {
diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Mon Sep 29 17:37:47 2014 -0500
+++ b/source/encoder/frameencoder.cpp   Tue Sep 30 09:48:12 2014 +0900
@@ -85,7 +85,7 @@
 m_param = top-m_param;
 m_numRows = numRows;
 m_numCols = numCols;
-m_filterRowDelay = (m_param-bEnableSAO  
m_param-saoLcuBasedOptimization  m_param-saoLcuBoundary) ?
+m_filterRowDelay = (m_param-bEnableSAO  m_param-saoLcuBoundary) ?
 2 : (m_param-bEnableSAO || m_param-bEnableLoopFilter 
? 1 : 0);
 m_filterRowDelayCus = m_filterRowDelay * numCols;
 
@@ -323,17 +323,6 @@
 m_frameStats.percentSkip  = (double)totalSkip / totalCuCount;
 }
 
-if (slice-m_sps-bUseSAO  !m_param-saoLcuBasedOptimization)
-{
-/* frame based SAO */
-m_frameFilter.m_sao.SAOProcess(m_frame-getPicSym()-m_saoParam);
-restoreLFDisabledOrigYuv(m_frame);
-
-// Extend border after whole-frame SAO is finished
-for (int row = 0; row  m_numRows; row

[x265] refine deblocking filter

2014-09-26 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1411727676 -32400
#  Fri Sep 26 19:34:36 2014 +0900
# Node ID 06237deb460b629d6100d5b613d42033cc3477bd
# Parent  7dccbbed034970de161b361cd6e17ed4efca7226
refine deblocking filter

diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.hWed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.hFri Sep 26 19:34:36 2014 +0900
@@ -155,6 +155,8 @@
 
 pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { 
return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
 
+int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return 
m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
 uint32_t getCUHeight(int rowNum);
 
 void  copyFromPicture(const x265_picture, int padx, int pady);
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 19:34:36 2014 +0900
@@ -115,10 +115,10 @@
 uint32_t g_maxCUSize = MAX_CU_SIZE;
 uint32_t g_maxFullDepth  = NUM_FULL_DEPTH - 1;
 uint32_t g_maxCUDepth= NUM_CU_DEPTH - 1;
-uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
-uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
+uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
+uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
 
-const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
 {
 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
 16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28,
@@ -138,7 +138,7 @@
 48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60
 };
 
-const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] =
+const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS] =
 {
 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12,
diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h   Wed Sep 24 18:26:45 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h   Fri Sep 26 19:34:36 2014 +0900
@@ -54,6 +54,8 @@
 #define UNIT_SIZE   (1  LOG2_UNIT_SIZE)   // unit size of CU 
partition
 #define TMVP_UNIT_MASK  0xF0// mask for 
mapping index to CompressMV field
 
+#define MAX_NUM_PARTITIONS  256
+
 #define MIN_PU_SIZE 4
 #define MIN_TU_SIZE 4
 #define MAX_NUM_SPU_W   (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number 
of SPU in horizontal line
@@ -75,15 +77,15 @@
 extern const uint8_t g_chroma422IntraAngleMappingTable[36];
 
 // flexible conversion from relative to absolute index
-extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
+extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
 
 void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t 
startVal, uint32_t* curIdx);
 void initRasterToZscan(uint32_t maxFullDepth);
 
 // conversion of partition index to picture pel position
-extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
-extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
+extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS];
+extern const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS];
 
 // global variable (LCU width/height, max. CU depth)
 extern uint32_t g_maxLog2CUSize;
diff -r 7dccbbed0349 -r 06237deb460b source/common/deblock.cpp
--- a/source/common/deblock.cpp Wed Sep 24 18:26:45 2014 -0500
+++ b/source/common/deblock.cpp Fri Sep 26 19:34:36 2014 +0900
@@ -32,23 +32,24 @@
 #define DEBLOCK_SMALLEST_BLOCK  8
 #define DEFAULT_INTRA_TC_OFFSET 2
 
-void Deblock::deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], 
uint8_t blockingStrength[])
+void Deblock::deblockCTU(TComDataCU* cu, int32_t dir)
 {
+uint8_t blockingStrength[MAX_NUM_PARTITIONS];
+
 memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions);
-memset(edgeFilter, 0, sizeof(bool) * m_numPartitions);
 
-deblockCU(cu, 0, 0, dir, edgeFilter, blockingStrength);
+deblockCU(cu, 0, 0, dir, blockingStrength);
 }
 
 /* Deblocking filter process in CU-based (the same function as conventional's)
  * param Edge the direction of the edge in block boundary 
(horizonta/vertical), which is added newly */
-void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, 
const int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, 
const int32_t dir, uint8_t blockingStrength[])
 {
 if (!cu-m_pic || cu-getPartitionSize(absZOrderIdx) == SIZE_NONE)
 return;
 
 Frame* pic = cu-m_pic;
-uint32_t

[x265] refine deblocking filter

2014-09-24 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1411549726 -32400
#  Wed Sep 24 18:08:46 2014 +0900
# Node ID 9f96fc8374d834d424190b0b1581054996985b67
# Parent  b2b7072ddbf73085d457bd6a71bca946e505dea8
refine deblocking filter

diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.hWed Sep 24 11:48:15 2014 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.hWed Sep 24 18:08:46 2014 +0900
@@ -155,6 +155,8 @@
 
 pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { 
return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
 
+int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return 
m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
+
 uint32_t getCUHeight(int rowNum);
 
 void  copyFromPicture(const x265_picture, int padx, int pady);
diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp
--- a/source/common/deblock.cpp Wed Sep 24 11:48:15 2014 +0530
+++ b/source/common/deblock.cpp Wed Sep 24 18:08:46 2014 +0900
@@ -48,7 +48,7 @@
 return;
 
 Frame* pic = cu-m_pic;
-uint32_t curNumParts = pic-getNumPartInCU()  (depth  1);
+uint32_t curNumParts = m_numPartitions  (depth * 2);
 
 if (cu-getDepth(absZOrderIdx)  depth)
 {
@@ -56,35 +56,34 @@
 uint32_t xmax = cu-m_slice-m_sps-picWidthInLumaSamples  - 
cu-getCUPelX();
 uint32_t ymax = cu-m_slice-m_sps-picHeightInLumaSamples - 
cu-getCUPelY();
 for (uint32_t partIdx = 0; partIdx  4; partIdx++, absZOrderIdx += 
qNumParts)
-{
 if (g_zscanToPelX[absZOrderIdx]  xmax  
g_zscanToPelY[absZOrderIdx]  ymax)
 deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, 
blockingStrength);
-}
 return;
 }
 
 Param params;
 setLoopfilterParam(cu, absZOrderIdx, params);
-setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, 
blockingStrength);
+setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, 
blockingStrength);
 setEdgefilterPU(cu, absZOrderIdx, dir, params, edgeFilter, 
blockingStrength);
 
 for (uint32_t partIdx = absZOrderIdx; partIdx  absZOrderIdx + 
curNumParts; partIdx++)
 {
-uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx  1) : !(partIdx  2));
+uint32_t bsCheck = !(partIdx  (1  dir));
 
-if (edgeFilter[partIdx]  bsCheck)
+if (bsCheck  edgeFilter[partIdx])
 getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
 }
 
-uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK  LOG2_UNIT_SIZE;
+const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK  LOG2_UNIT_SIZE;
 uint32_t sizeInPU = pic-getNumPartInCUSize()  depth;
 uint32_t shiftFactor = (dir == EDGE_VER) ? cu-getHorzChromaShift() : 
cu-getVertChromaShift();
-const bool alwaysDoChroma = cu-getChromaFormat() == X265_CSP_I444;
-
+uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK  shiftFactor)  
LOG2_UNIT_SIZE) - 1;
+uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : 
g_zscanToPelY[absZOrderIdx])  LOG2_UNIT_SIZE;
+
 for (uint32_t e = 0; e  sizeInPU; e += partIdxIncr)
 {
 edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
-if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK  shiftFactor) 
 LOG2_UNIT_SIZE)))
+if (!((e0 + e)  chromaMask))
 edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, 
blockingStrength);
 }
 }
@@ -115,66 +114,60 @@
 }
 }
 
-void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t 
absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t 
blockingStrength[])
+void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t 
depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
 {
 if (cu-getTransformIdx(absZOrderIdx) + cu-getDepth(absZOrderIdx)  
(uint8_t)depth)
 {
-const uint32_t curNumParts = cu-m_pic-getNumPartInCU()  (depth  
1);
+const uint32_t curNumParts = m_numPartitions  (depth * 2);
 const uint32_t qNumParts   = curNumParts  2;
 
 for (uint32_t partIdx = 0; partIdx  4; partIdx++, absZOrderIdx += 
qNumParts)
-{
-uint32_t nsAddr = absZOrderIdx;
-setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, 
edgeFilter, blockingStrength);
-}
+setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, 
blockingStrength);
 return;
 }
 
 uint32_t widthInBaseUnits  = 1  (cu-getLog2CUSize(absZOrderIdx) - 
cu-getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
-setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, 
blockingStrength, widthInBaseUnits);
+setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, 
blockingStrength, widthInBaseUnits);
 }
 
 void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t 
dir

[x265] simplify intra filter (with fix for da61cf406f16) (Re: primitives: intra_pred[4][35] = intra_pred[35][4] (avoid *35))

2014-09-22 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1411388939 -32400
#  Mon Sep 22 21:28:59 2014 +0900
# Node ID 3f229951f826e1d09dd0258721ef5a1f9fdc4392
# Parent  fd435504f15e0b13dabba9efe0aa94e7047060b5
simplify intra filter (with fix for da61cf406f16)

diff -r fd435504f15e -r 3f229951f826 source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 13:14:54 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 21:28:59 2014 +0900
@@ -52,133 +52,96 @@
 void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, 
uint32_t partDepth, pixel* adiBuf,
  pixel* refAbove, pixel* refLeft, pixel* 
refAboveFlt, pixel* refLeftFlt, int dirMode)
 {
-pixel* roiOrigin;
-pixel* adiTemp;
-
-int picStride = cu-m_pic-getStride();
-
 IntraNeighbors intraNeighbors;
 
 initIntraNeighbors(cu, zOrderIdxInPart, partDepth, true, intraNeighbors);
 uint32_t tuSize = intraNeighbors.tuSize;
 uint32_t tuSize2 = tuSize  1;
 
-roiOrigin = cu-m_pic-getPicYuvRec()-getLumaAddr(cu-getAddr(), 
cu-getZorderIdxInCU() + zOrderIdxInPart);
-adiTemp   = adiBuf;
+pixel* adiOrigin = cu-m_pic-getPicYuvRec()-getLumaAddr(cu-getAddr(), 
cu-getZorderIdxInCU() + zOrderIdxInPart);
+int picStride = cu-m_pic-getStride();
 
-fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
+fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors);
 
+// initialization of ADI buffers
+const int bufOffset = tuSize - 1;
+refAbove += bufOffset;
+refLeft += bufOffset;
+
+//  ADI_BUF_STRIDE * (2 * tuSize + 1);
+memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
+for (int k = 0; k  tuSize2 + 1; k++)
+refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
+
 bool bUseFilteredPredictions = (dirMode == ALL_IDX ? (8 | 16 | 32)  
tuSize : g_intraFilterFlags[dirMode]  tuSize);
 
 if (bUseFilteredPredictions)
 {
 // generate filtered intra prediction samples
-// left and left above border + above and above right border + top 
left corner = length of 3. filter buffer
-int bufSize = tuSize2 + tuSize2 + 1;
-uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1); // number of 
elements in one buffer
+refAboveFlt += bufOffset;
+refLeftFlt += bufOffset;
 
-pixel* filterBuf  = adiBuf + wh; // buffer for 2. filtering 
(sequential)
-pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering 
(sequential)
+bool bStrongSmoothing = (tuSize == 32  
cu-m_slice-m_sps-bUseStrongIntraSmoothing);
 
-int l = 0;
-// left border from bottom to top
-for (int i = 0; i  tuSize2; i++)
+if (bStrongSmoothing)
 {
-filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (tuSize2 - i)];
-}
+const int trSize  = 32;
+const int trSize2 = 32 * 2;
+const int threshold = 1  (X265_DEPTH - 5);
+int refBL = refLeft[trSize2];
+int refTL = refAbove[0];
+int refTR = refAbove[trSize2];
+bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize])   
threshold 
+abs(refTL + refTR - 2 * refAbove[trSize])  
threshold);
 
-// top left corner
-filterBuf[l++] = adiTemp[0];
+if (bStrongSmoothing)
+{
+// bilinear interpolation
+const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1;
+int init = (refTL  shift) + tuSize;
+int delta;
 
-// above border from left to right
-memcpy(filterBuf[l], adiTemp[1], tuSize2 * sizeof(*filterBuf));
+refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
 
-if (tuSize = 32  cu-m_slice-m_sps-bUseStrongIntraSmoothing)
-{
-int bottomLeft = filterBuf[0];
-int topLeft = filterBuf[tuSize2];
-int topRight = filterBuf[bufSize - 1];
-int threshold = 1  (X265_DEPTH - 5);
-bool bilinearLeft = abs(bottomLeft + topLeft - 2 * 
filterBuf[tuSize])  threshold;
-bool bilinearAbove  = abs(topLeft + topRight - 2 * 
filterBuf[tuSize2 + tuSize])  threshold;
+//TODO: Performance Primitive???
+delta = refBL - refTL;
+for (int i = 1; i  trSize2; i++)
+refLeftFlt[i] = (init + delta * i)  shift;
+refLeftFlt[trSize2] = refLeft[trSize2];
 
-if (bilinearLeft  bilinearAbove)
-{
-int shift = intraNeighbors.log2TrSize + 1;
-filterBufN[0] = filterBuf[0];
-filterBufN[tuSize2] = filterBuf[tuSize2];
-filterBufN[bufSize - 1] = filterBuf[bufSize - 1];
-//TODO: Performance Primitive???
-for (int i = 1; i  tuSize2; i

[x265] primitives: intra_pred[4][35] = intra_pred[35][4] (avoid *35)

2014-09-19 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 142115 -32400
#  Fri Sep 19 16:35:15 2014 +0900
# Node ID 3a2c1caf0f80e4ee2c1216636a3f9d067f719d6f
# Parent  4680ab4f92b8cc809b1e8dbc927126ec70bcc5c5
primitives: intra_pred[4][35] = intra_pred[35][4] (avoid *35)

diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp Thu Sep 18 18:16:25 2014 +0530
+++ b/source/Lib/TLibCommon/TComPattern.cpp Fri Sep 19 16:35:15 2014 +0900
@@ -68,9 +68,9 @@
 
 fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
 
-bool bUseFilteredPredictions = (dirMode == ALL_IDX || 
(g_intraFilterFlags[dirMode]  tuSize));
+bool bUseFilteredPredictions = (dirMode == ALL_IDX ? (8 | 16 | 32)  
tuSize : g_intraFilterFlags[dirMode]  tuSize);
 
-if (bUseFilteredPredictions  8 = tuSize  tuSize = 32)
+if (bUseFilteredPredictions)
 {
 // generate filtered intra prediction samples
 // left and left above border + above and above right border + top 
left corner = length of 3. filter buffer
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Thu Sep 18 18:16:25 2014 +0530
+++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 19 16:35:15 2014 +0900
@@ -497,7 +497,7 @@
 };
 
 /* g_intraFilterFlags[dir]  trSize */
-const uint8_t g_intraFilterFlags[35] =
+const uint8_t g_intraFilterFlags[NUM_INTRA_MODE] =
 {
 0x38, 0x00,
 0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 
0x30, 0x30, 0x30, 0x30,
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h   Thu Sep 18 18:16:25 2014 +0530
+++ b/source/Lib/TLibCommon/TComRom.h   Fri Sep 19 16:35:15 2014 +0900
@@ -153,7 +153,7 @@
 extern const uint8_t x265_exp2_lut[64];
 
 // Intra tables
-extern const uint8_t g_intraFilterFlags[35];
+extern const uint8_t g_intraFilterFlags[NUM_INTRA_MODE];
 
 extern const uint32_t g_depthInc[3][4];
 
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/intrapred.cpp
--- a/source/common/intrapred.cpp   Thu Sep 18 18:16:25 2014 +0530
+++ b/source/common/intrapred.cpp   Fri Sep 19 16:35:15 2014 +0900
@@ -281,22 +281,22 @@
 
 void Setup_C_IPredPrimitives(EncoderPrimitives p)
 {
-p.intra_pred[BLOCK_4x4][0] = planar_pred_c2;
-p.intra_pred[BLOCK_8x8][0] = planar_pred_c3;
-p.intra_pred[BLOCK_16x16][0] = planar_pred_c4;
-p.intra_pred[BLOCK_32x32][0] = planar_pred_c5;
+p.intra_pred[0][BLOCK_4x4] = planar_pred_c2;
+p.intra_pred[0][BLOCK_8x8] = planar_pred_c3;
+p.intra_pred[0][BLOCK_16x16] = planar_pred_c4;
+p.intra_pred[0][BLOCK_32x32] = planar_pred_c5;
 
 // Intra Prediction DC
-p.intra_pred[BLOCK_4x4][1] = intra_pred_dc_c4;
-p.intra_pred[BLOCK_8x8][1] = intra_pred_dc_c8;
-p.intra_pred[BLOCK_16x16][1] = intra_pred_dc_c16;
-p.intra_pred[BLOCK_32x32][1] = intra_pred_dc_c32;
+p.intra_pred[1][BLOCK_4x4] = intra_pred_dc_c4;
+p.intra_pred[1][BLOCK_8x8] = intra_pred_dc_c8;
+p.intra_pred[1][BLOCK_16x16] = intra_pred_dc_c16;
+p.intra_pred[1][BLOCK_32x32] = intra_pred_dc_c32;
 for (int i = 2; i  NUM_INTRA_MODE; i++)
 {
-p.intra_pred[BLOCK_4x4][i] = intra_pred_ang_c4;
-p.intra_pred[BLOCK_8x8][i] = intra_pred_ang_c8;
-p.intra_pred[BLOCK_16x16][i] = intra_pred_ang_c16;
-p.intra_pred[BLOCK_32x32][i] = intra_pred_ang_c32;
+p.intra_pred[i][BLOCK_4x4] = intra_pred_ang_c4;
+p.intra_pred[i][BLOCK_8x8] = intra_pred_ang_c8;
+p.intra_pred[i][BLOCK_16x16] = intra_pred_ang_c16;
+p.intra_pred[i][BLOCK_32x32] = intra_pred_ang_c32;
 }
 
 p.intra_pred_allangs[BLOCK_4x4] = all_angs_pred_c2;
diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/primitives.h
--- a/source/common/primitives.hThu Sep 18 18:16:25 2014 +0530
+++ b/source/common/primitives.hFri Sep 19 16:35:15 2014 +0900
@@ -91,6 +91,8 @@
 NUM_SQUARE_BLOCKS
 };
 
+enum { NUM_TR_SIZE = 4 };
+
 // NOTE: Not all DCT functions support dest stride
 enum Dcts
 {
@@ -145,7 +147,6 @@
 typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, 
intptr_t sstride0, pixel *src1, intptr_t sstride1, int weight);
 typedef void (*blockfill_s_t)(int16_t *dst, intptr_t dstride, int16_t val);
 
-typedef void (*intra_planar_t)(pixel* above, pixel* left, pixel* dst, intptr_t 
dstStride);
 typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel *refLeft, 
pixel *refAbove, int dirMode, int bFilter);
 typedef void (*intra_allangs_t)(pixel *dst, pixel *above0, pixel *left0, pixel 
*above1, pixel *left1, int bLuma);
 
@@ -259,8 +260,8 @@
 pixelavg_pp_t   pixelavg_pp[NUM_LUMA_PARTITIONS];
 addAvg_tluma_addAvg[NUM_LUMA_PARTITIONS];
 
-intra_pred_tintra_pred[NUM_SQUARE_BLOCKS - 1][NUM_INTRA_MODE];
-intra_allangs_t intra_pred_allangs[NUM_SQUARE_BLOCKS - 1];
+intra_pred_t

Re: [x265] inline simple functions

2014-09-18 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1411087626 -32400
#  Fri Sep 19 09:47:06 2014 +0900
# Node ID bc71daca1b43fd8aee5ffb770629a086966d4674
# Parent  25dde1ffab66bf29fa2a16945b6e3dff9e2954ec
inline simple functions

diff -r 25dde1ffab66 -r bc71daca1b43 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Thu Sep 18 18:02:36 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Fri Sep 19 09:47:06 2014 +0900
@@ -88,9 +88,6 @@
 m_DataCUMemPool.m_tqBypassYuvMemBlock  = NULL;
 }
 
-TComDataCU::~TComDataCU()
-{}
-
 
 bool TComDataCU::initialize(uint32_t numPartition, uint32_t sizeL, uint32_t 
sizeC, uint32_t numBlocks, bool isLossless)
 {
@@ -1086,15 +1083,6 @@
 }
 }
 
-/** Check whether the CU is coded in lossless coding mode
- * \param   absPartIdx
- * \returns true if the CU is coded in lossless coding mode; false if otherwise
- */
-bool TComDataCU::isLosslessCoded(uint32_t absPartIdx)
-{
-return m_slice-m_pps-bTransquantBypassEnabled  
getCUTransquantBypass(absPartIdx);
-}
-
 /** Get allowed chroma intra modes
 *\param   absPartIdx
 *\param   uiModeList  pointer to chroma intra modes array
@@ -1224,11 +1212,6 @@
 return ctx;
 }
 
-uint32_t TComDataCU::getCtxInterDir(uint32_t absPartIdx)
-{
-return getDepth(absPartIdx);
-}
-
 void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth)
 {
 uint32_t curPartNum = m_pic-getNumPartInCU()  (depth  1);
@@ -2111,11 +2094,6 @@
 return numMvc;
 }
 
-bool TComDataCU::isBipredRestriction()
-{
-return getLog2CUSize(0) == 3  getPartitionSize(0) != SIZE_2Nx2N;
-}
-
 void TComDataCU::clipMv(MV outMV)
 {
 int mvshift = 2;
@@ -2130,15 +2108,6 @@
 outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
 }
 
-/** Test whether the current block is skipped
- * \param partIdx Block index
- * \returns Flag indicating whether the block is skipped
- */
-bool TComDataCU::isSkipped(uint32_t partIdx)
-{
-return getSkipFlag(partIdx);
-}
-
 // 

 // Protected member functions
 // 

@@ -2438,9 +2407,4 @@
 result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
 }
 
-uint32_t TComDataCU::getSCUAddr()
-{
-return (m_cuAddr  g_maxFullDepth * 2) + m_absIdxInLCU;
-}
-
 //! \}
diff -r 25dde1ffab66 -r bc71daca1b43 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.hThu Sep 18 18:02:36 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.hFri Sep 19 09:47:06 2014 +0900
@@ -248,7 +248,7 @@
 public:
 
 TComDataCU();
-virtual ~TComDataCU();
+~TComDataCU() {}
 
 uint32_t  m_psyEnergy;
 uint64_t  m_totalPsyCost;
@@ -290,7 +290,8 @@
 
 uint32_t getZorderIdxInCU()   { return m_absIdxInLCU; }
 
-uint32_t  getSCUAddr();
+uint32_t  getSCUAddr() const   { return (m_cuAddr  
g_maxFullDepth * 2) + m_absIdxInLCU; }
+
 
 uint32_t  getCUPelX()  { return m_cuPelX; }
 
@@ -344,7 +345,7 @@
 char  getLastCodedQP(uint32_t absPartIdx);
 void  setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, 
uint32_t depth, bool foundNonZeroCbf);
 
-bool  isLosslessCoded(uint32_t absPartIdx);
+bool  isLosslessCoded(uint32_t idx) const { return 
m_cuTransquantBypass[idx]  m_slice-m_pps-bTransquantBypassEnabled; }
 
 uint8_t*  getTransformIdx(){ return m_trIdx; }
 
@@ -488,10 +489,9 @@
 // member functions for modes
 // 
---
 
-bool  isIntra(uint32_t partIdx)  { return m_predModes[partIdx] == 
MODE_INTRA; }
-
-bool  isSkipped(uint32_t partIdx); /// SKIP (no residual)
-bool  isBipredRestriction();
+bool  isIntra(uint32_t partIdx) const { return 
m_predModes[partIdx] == MODE_INTRA; }
+bool  isSkipped(uint32_t idx) const { return m_skipFlag[idx]; }
+bool  isBipredRestriction() const { return m_log2CUSize[0] == 3  
m_partSizes[0] != SIZE_2Nx2N; }
 
 // 
---
 // member functions for symbol prediction (most probable / mode conversion)
@@ -506,7 +506,7 @@
 
 uint32_t  getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
 uint32_t  getCtxSkipFlag(uint32_t absPartIdx);
-uint32_t  getCtxInterDir(uint32_t absPartIdx);
+uint32_t  getCtxInterDir(uint32_t idx) const { return m_depth[idx]; }
 
 // 
---
 // member

[x265] inline simple functions

2014-09-17 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1410947343 -32400
#  Wed Sep 17 18:49:03 2014 +0900
# Node ID b00d1f46a7632572df3be47decee9be9881c511c
# Parent  199e8f2e0d54abd16657ccd0952bdc25cadf8420
inline simple functions

diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Tue Sep 16 17:50:06 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Wed Sep 17 18:49:03 2014 +0900
@@ -88,9 +88,6 @@
 m_DataCUMemPool.m_tqBypassYuvMemBlock  = NULL;
 }
 
-TComDataCU::~TComDataCU()
-{}
-
 
 bool TComDataCU::initialize(uint32_t numPartition, uint32_t sizeL, uint32_t 
sizeC, uint32_t numBlocks, bool isLossless)
 {
@@ -1086,15 +1083,6 @@
 }
 }
 
-/** Check whether the CU is coded in lossless coding mode
- * \param   absPartIdx
- * \returns true if the CU is coded in lossless coding mode; false if otherwise
- */
-bool TComDataCU::isLosslessCoded(uint32_t absPartIdx)
-{
-return m_slice-m_pps-bTransquantBypassEnabled  
getCUTransquantBypass(absPartIdx);
-}
-
 /** Get allowed chroma intra modes
 *\param   absPartIdx
 *\param   uiModeList  pointer to chroma intra modes array
@@ -1224,11 +1212,6 @@
 return ctx;
 }
 
-uint32_t TComDataCU::getCtxInterDir(uint32_t absPartIdx)
-{
-return getDepth(absPartIdx);
-}
-
 void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth)
 {
 uint32_t curPartNum = m_pic-getNumPartInCU()  (depth  1);
@@ -2111,11 +2094,6 @@
 return numMvc;
 }
 
-bool TComDataCU::isBipredRestriction()
-{
-return getLog2CUSize(0) == 3  getPartitionSize(0) != SIZE_2Nx2N;
-}
-
 void TComDataCU::clipMv(MV outMV)
 {
 int mvshift = 2;
@@ -2130,15 +2108,6 @@
 outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
 }
 
-/** Test whether the current block is skipped
- * \param partIdx Block index
- * \returns Flag indicating whether the block is skipped
- */
-bool TComDataCU::isSkipped(uint32_t partIdx)
-{
-return getSkipFlag(partIdx);
-}
-
 // 

 // Protected member functions
 // 

@@ -2438,9 +2407,4 @@
 result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
 }
 
-uint32_t TComDataCU::getSCUAddr()
-{
-return (m_cuAddr  g_maxFullDepth * 2) + m_absIdxInLCU;
-}
-
 //! \}
diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.hTue Sep 16 17:50:06 2014 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.hWed Sep 17 18:49:03 2014 +0900
@@ -248,7 +248,7 @@
 public:
 
 TComDataCU();
-virtual ~TComDataCU();
+~TComDataCU() {}
 
 uint32_t  m_psyEnergy;
 uint64_t  m_totalPsyCost;
@@ -290,7 +290,8 @@
 
 uint32_t getZorderIdxInCU()   { return m_absIdxInLCU; }
 
-uint32_t  getSCUAddr();
+uint32_t  getSCUAddr() const   { return (m_cuAddr  
g_maxFullDepth * 2) + m_absIdxInLCU; }
+
 
 uint32_t  getCUPelX()  { return m_cuPelX; }
 
@@ -344,7 +345,7 @@
 char  getLastCodedQP(uint32_t absPartIdx);
 void  setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, 
uint32_t depth, bool foundNonZeroCbf);
 
-bool  isLosslessCoded(uint32_t absPartIdx);
+bool  isLosslessCoded(uint32_t idx) const { return 
m_cuTransquantBypass[idx]  m_slice-m_pps-bTransquantBypassEnabled; }
 
 uint8_t*  getTransformIdx(){ return m_trIdx; }
 
@@ -488,10 +489,9 @@
 // member functions for modes
 // 
---
 
-bool  isIntra(uint32_t partIdx)  { return m_predModes[partIdx] == 
MODE_INTRA; }
-
-bool  isSkipped(uint32_t partIdx); /// SKIP (no residual)
-bool  isBipredRestriction();
+bool  isIntra(uint32_t partIdx) const { return 
m_predModes[partIdx] == MODE_INTRA; }
+bool  isSkipped(uint32_t idx) const { return m_skipFlag[idx]; }
+bool  isBipredRestriction() const { return m_log2CUSize[0] == 3  
m_partSizes[0] != SIZE_2Nx2N; }
 
 // 
---
 // member functions for symbol prediction (most probable / mode conversion)
@@ -506,7 +506,7 @@
 
 uint32_t  getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
 uint32_t  getCtxSkipFlag(uint32_t absPartIdx);
-uint32_t  getCtxInterDir(uint32_t absPartIdx);
+uint32_t  getCtxInterDir(uint32_t idx) const { return m_depth[idx]; }
 
 // 
---
 // member

Re: [x265] [PATCH] x86inc.asm: fix vpbroadcastd bug on Mac platform

2014-09-08 Thread Satoshi Nakagawa


At changeset 27364e9, wrong version pushd.

Correct patch is Min's later one.



--- a/source/common/x86/x86inc.asm  Fri Sep 05 17:36:18 2014 -0700
+++ b/source/common/x86/x86inc.asm  Sun Sep 07 15:24:49 2014 +0900
@@ -1489,6 +1489,6 @@
 movd %1 %+ xmm, %2
 vpbroadcastd %1, %1 %+ xmm
   %else
-vbroadcastsd %1, %2
+vpbroadcastd %1, %2
   %endif
 %endmacro



 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Min Chen
 Sent: Saturday, September 06, 2014 8:48 AM
 To: x265-devel@videolan.org
 Subject: [x265] [PATCH] x86inc.asm: fix vpbroadcastd bug on Mac platform
 
 # HG changeset patch
 # User Min Chen chenm...@163.com
 # Date 1409960883 25200
 # Node ID 8abcfdeeea2eab2e11da59002dad42dcf16aeab8
 # Parent  e0db7914e7020a6a6454fbf1d3ce793efa2209a1
 x86inc.asm: fix vpbroadcastd bug on Mac platform
 
 diff -r e0db7914e702 -r 8abcfdeeea2e source/common/x86/x86inc.asm
 --- a/source/common/x86/x86inc.asmFri Sep 05 16:47:42 2014 -0700
 +++ b/source/common/x86/x86inc.asmFri Sep 05 16:48:03 2014 -0700
 @@ -888,6 +888,8 @@
  %define y%1   mm%1
  %define ymmxmm%1 xmm%1
  %define ymmymm%1 ymm%1
 +%define ymm%1xmm xmm%1
 +%define xmm%1ymm ymm%1
  %define xm%1 xmm %+ m%1
  %define ym%1 ymm %+ m%1
  %endmacro
 @@ -1480,3 +1482,13 @@
  %endif
  %endmacro
  %endif
 +
 +; workaround: vpbroadcastd with register, the yasm will generate wrong
 +code %macro vpbroadcastd 2
 +  %ifid %2
 +movd %1 %+ xmm, %2
 +vpbroadcastd %1, %1 %+ xmm
 +  %else
 +vpbroadcastd %1, %2
 +  %endif
 +%endmacro
 
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix CHECKED_BUILD

2014-09-06 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1410060921 -32400
#  Sun Sep 07 12:35:21 2014 +0900
# Node ID 5bdb43ad058348fbe847b5c8e63ea9712a41c6a4
# Parent  ed4c9acafc11ccdd9ea5221175a891e43e24b1cc
fix CHECKED_BUILD

diff -r ed4c9acafc11 -r 5bdb43ad0583 source/common/dct.cpp
--- a/source/common/dct.cpp Fri Sep 05 17:36:18 2014 -0700
+++ b/source/common/dct.cpp Sun Sep 07 12:35:21 2014 +0900
@@ -729,7 +729,7 @@
 X265_CHECK(num = 32 * 32, dequant num %d too large\n, num);
 X265_CHECK((num % 8) == 0, dequant num %d not multiple of 8\n, num);
 X265_CHECK(shift = 10, shift too large %d\n, shift);
-X265_CHECK(((int)coef  31) == 0, dequant coef buffer not aligned\n);
+X265_CHECK(((intptr_t)coef  31) == 0, dequant coef buffer not 
aligned\n);
 
 int add, coeffQ;
 
diff -r ed4c9acafc11 -r 5bdb43ad0583 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Fri Sep 05 17:36:18 2014 -0700
+++ b/source/encoder/analysis.cpp   Sun Sep 07 12:35:21 2014 +0900
@@ -1056,21 +1056,30 @@
 copyYuv2Pic(pic, outBestCU-getAddr(), absPartIdx, depth);
 }
 
+#if CHECKED_BUILD || _DEBUG
 /* Assert if Best prediction mode is NONE
  * Selected mode's RD-cost must be not MAX_INT64 */
 if (bInsidePicture)
 {
 X265_CHECK(outBestCU-getPartitionSize(0) != SIZE_NONE, no best 
prediction size\n);
 X265_CHECK(outBestCU-getPredictionMode(0) != MODE_NONE, no best 
prediction mode\n);
-if (m_rdCost.m_psyRd)
+if (m_param-rdLevel  1)
 {
-X265_CHECK(outBestCU-m_totalPsyCost != MAX_INT64, no best 
partition cost\n);
+if (m_rdCost.m_psyRd)
+{
+X265_CHECK(outBestCU-m_totalPsyCost != MAX_INT64, no best 
partition cost\n);
+}
+else
+{
+X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best 
partition cost\n);
+}
 }
 else
 {
-X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best 
partition cost\n);
+X265_CHECK(outBestCU-m_sa8dCost != MAX_INT64, no best partition 
cost\n);
 }
 }
+#endif
 
 x265_emms();
 }
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix sao

2014-09-06 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1410062067 -32400
#  Sun Sep 07 12:54:27 2014 +0900
# Node ID ce0c1eb81072b4dae5253b27d5c9bb3117975066
# Parent  ed4c9acafc11ccdd9ea5221175a891e43e24b1cc
fix sao

diff -r ed4c9acafc11 -r ce0c1eb81072 source/encoder/sao.cpp
--- a/source/encoder/sao.cppFri Sep 05 17:36:18 2014 -0700
+++ b/source/encoder/sao.cppSun Sep 07 12:54:27 2014 +0900
@@ -1481,14 +1481,10 @@
 pixel* fenc;
 pixel* recon;
 int stride;
-int lcuHeight;
-int lcuWidth;
 uint32_t rPelX;
 uint32_t bPelY;
 int64_t* stats;
 int64_t* count;
-uint32_t picWidthTmp = 0;
-uint32_t picHeightTmp = 0;
 int classIdx;
 int startX;
 int startY;
@@ -1510,38 +1506,39 @@
 {
 // NOTE: Col
 {
-lcuHeight = g_maxCUSize;
-lcuWidth  = g_maxCUSize;
 addr= idxX + frameWidthInCU * idxY;
 cu  = pic-getCU(addr);
+
+uint32_t picWidthTmp  = m_param-sourceWidth;
+uint32_t picHeightTmp = m_param-sourceHeight;
+int lcuWidth  = g_maxCUSize;
+int lcuHeight = g_maxCUSize;
 lPelX   = cu-getCUPelX();
 tPelY   = cu-getCUPelY();
-
-memset(m_countPreDblk[addr], 0, 3 * MAX_NUM_SAO_TYPE * 
MAX_NUM_SAO_CLASS * sizeof(int64_t));
-memset(m_offsetOrgPreDblk[addr], 0, 3 * MAX_NUM_SAO_TYPE * 
MAX_NUM_SAO_CLASS * sizeof(int64_t));
+rPelX = lPelX + lcuWidth;
+bPelY = tPelY + lcuHeight;
+rPelX = rPelX  picWidthTmp  ? picWidthTmp  : rPelX;
+bPelY = bPelY  picHeightTmp ? picHeightTmp : bPelY;
+lcuWidth  = rPelX - lPelX;
+lcuHeight = bPelY - tPelY;
+
+memset(m_countPreDblk[addr], 0, sizeof(PerPlane));
+memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane));
+
 for (int plane = 0; plane  3; plane++)
 {
 isChroma = !!plane;
-if (plane == 0)
+if (plane == 1)
 {
-picWidthTmp  = m_param-sourceWidth;
-picHeightTmp = m_param-sourceHeight;
+picWidthTmp  = m_hChromaShift;
+picHeightTmp = m_vChromaShift;
+lcuWidth = m_hChromaShift;
+lcuHeight= m_vChromaShift;
+lPelX= m_hChromaShift;
+tPelY= m_vChromaShift;
+rPelX = lPelX + lcuWidth;
+bPelY = tPelY + lcuHeight;
 }
-else if (plane == 1)
-{
-picWidthTmp  = m_param-sourceWidth   isChroma;
-picHeightTmp = m_param-sourceHeight  isChroma;
-lcuWidth = lcuWidth isChroma;
-lcuHeight= lcuHeightisChroma;
-lPelX= lPelXisChroma;
-tPelY= tPelYisChroma;
-}
-rPelX = lPelX + lcuWidth;
-bPelY = tPelY + lcuHeight;
-rPelX = rPelX  picWidthTmp  ? picWidthTmp  : rPelX;
-bPelY = bPelY  picHeightTmp ? picHeightTmp : bPelY;
-lcuWidth  = rPelX - lPelX;
-lcuHeight = bPelY - tPelY;
 
 stride   = (plane == 0) ? pic-getStride() : pic-getCStride();
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix cbf context

2014-09-04 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1409843113 -32400
#  Fri Sep 05 00:05:13 2014 +0900
# Node ID 85a4327fe163a91b4725891515234c87e1153289
# Parent  b686cb0abd713f6fefcc75d00725232d12e36089
fix cbf context

diff -r b686cb0abd71 -r 85a4327fe163 source/Lib/TLibCommon/ContextTables.h
--- a/source/Lib/TLibCommon/ContextTables.h Thu Sep 04 17:06:16 2014 +0530
+++ b/source/Lib/TLibCommon/ContextTables.h Fri Sep 05 00:05:13 2014 +0900
@@ -65,7 +65,7 @@
 
 #define NUM_REF_NO_CTX2   /// number of context models 
for reference index
 #define NUM_TRANS_SUBDIV_FLAG_CTX 3   /// number of context models 
for transform subdivision flags
-#define NUM_QT_CBF_CTX6   /// number of context models 
for QT CBF
+#define NUM_QT_CBF_CTX7   /// number of context models 
for QT CBF
 #define NUM_QT_ROOT_CBF_CTX   1   /// number of context models 
for QT ROOT CBF
 #define NUM_DELTA_QP_CTX  3   /// number of context models 
for dQP
 
@@ -144,7 +144,7 @@
 #define NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4  3
 #define NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4  1
 
-static const uint32_t ctxCbf[3][3] = { { 1, 0, 0 }, { 2, 3, 4 }, { 2, 3, 4} };
+static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 
2, 3, 4, 5, 6 } };
 static const uint32_t significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] 
= { { 0,  9, 21 }, { 0,  9, 12 } };
 static const uint32_t significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3]  
= { { 9, 12,  6 }, { 9,  3,  3 } };
 static const uint32_t nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE]   
= {  6, 0  };
@@ -259,9 +259,9 @@
 static const uint8_t
 INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
 {
-{ 153,  111,  149,   92,  167,  154, },
-{ 153,  111,  149,  107,  167,  154, },
-{ 111,  141,   94,  138,  182,  154, },
+{ 153,  111,  149,   92,  167,  154,  154 },
+{ 153,  111,  149,  107,  167,  154,  154 },
+{ 111,  141,   94,  138,  182,  154,  154 },
 };
 
 static const uint8_t
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix getQuadtreeTULog2MinSizeInCU()

2014-09-04 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1409892209 -32400
#  Fri Sep 05 13:43:29 2014 +0900
# Node ID e30143ac87fe32b6c20152e8fd41723cc9fdeb3c
# Parent  93db2f53fe573537bcd4eb53ca3cdb69af557eb5
fix getQuadtreeTULog2MinSizeInCU()

diff -r 93db2f53fe57 -r e30143ac87fe source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Thu Sep 04 16:42:24 2014 -0700
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Fri Sep 05 13:43:29 2014 +0900
@@ -1197,9 +1197,9 @@
 {
 uint32_t log2CUSize = getLog2CUSize(absPartIdx);
 PartSize partSize   = getPartitionSize(absPartIdx);
-uint32_t quadtreeTUMaxDepth = getPredictionMode(0) == MODE_INTRA ? 
m_slice-m_sps-quadtreeTUMaxDepthIntra : 
m_slice-m_sps-quadtreeTUMaxDepthInter;
+uint32_t quadtreeTUMaxDepth = getPredictionMode(absPartIdx) == MODE_INTRA 
? m_slice-m_sps-quadtreeTUMaxDepthIntra : 
m_slice-m_sps-quadtreeTUMaxDepthInter;
 uint32_t intraSplitFlag = (getPredictionMode(absPartIdx) == MODE_INTRA  
partSize == SIZE_NxN) ? 1 : 0;
-uint32_t interSplitFlag = ((quadtreeTUMaxDepth == 1)  
(getPredictionMode(0) == MODE_INTER)  (partSize != SIZE_2Nx2N));
+uint32_t interSplitFlag = ((quadtreeTUMaxDepth == 1)  
(getPredictionMode(absPartIdx) == MODE_INTER)  (partSize != SIZE_2Nx2N));
 
 tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize;
 tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] asm: fix dequant_normal

2014-08-30 Thread Satoshi Nakagawa
 How about remove '#if...'?
 The asm code didn't check it.

added '%if...' to asm code :)


# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1409378187 -32400
#  Sat Aug 30 14:56:27 2014 +0900
# Node ID c4f15840feb443f8c38ba58b52ef5ba6d518e626
# Parent  4e2d9ac6d489e82e70544d626c89964ee653c452
asm: fix dequant_normal

diff -r 4e2d9ac6d489 -r c4f15840feb4 source/common/dct.cpp
--- a/source/common/dct.cpp Fri Aug 29 11:12:49 2014 +0200
+++ b/source/common/dct.cpp Sat Aug 30 14:56:27 2014 +0900
@@ -720,7 +720,9 @@
 
 void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int 
scale, int shift)
 {
-#if !HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
+X265_CHECK(scale  32768 || ((scale  3) == 0  shift  2), dequant 
invalid scale %d\n, scale);
+#else
 // NOTE: maximum of scale is (72 * 256)
 X265_CHECK(scale  32768, dequant invalid scale %d\n, scale);
 #endif
diff -r 4e2d9ac6d489 -r c4f15840feb4 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Fri Aug 29 11:12:49 2014 +0200
+++ b/source/common/x86/pixel-util8.asm Sat Aug 30 14:56:27 2014 +0900
@@ -1005,23 +1005,23 @@
 ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int 
scale, int shift)
 ;-
 INIT_XMM sse4
-cglobal dequant_normal, 4,5,5
-movdm1, r3 ; m1 = word [scale]
+cglobal dequant_normal, 5,5,5
+movdm1, r3  ; m1 = word [scale]
+movam2, [pw_1]
+%if HIGH_BIT_DEPTH
 cmp r3d, 32767
 jle .skip
-
 psrld   m1, 2
-mov r4d, r4m
+sub r4d, 2
+.skip:
+%endif
 movdm0, r4d ; m0 = shift
 xor r3d, r3d
 dec r4d
 bts r3d, r4d
-movdm2, r3d
-punpcklwd   m1, m2
+movdm3, r3d
+punpcklwd   m1, m3
 pshufd  m1, m1, 0   ; m1 = dword [add scale]
-movam2, [pw_1]
-mov r2d, r2m
-
 ; m0 = shift
 ; m1 = scale
 ; m2 = word [1]
@@ -1029,45 +1029,6 @@
 movum3, [r0]
 movum4, [r0 + 16]
 packssdwm3, m4  ; m3 = clipQCoef
-psllw   m3, 2
-punpckhwd   m4, m3, m2
-punpcklwd   m3, m2
-pmaddwd m3, m1  ; m3 = dword (clipQCoef * scale + add)
-pmaddwd m4, m1
-psrad   m3, m0
-psrad   m4, m0
-packssdwm3, m3  ; OPT_ME: store must be 32 bits
-pmovsxwdm3, m3
-packssdwm4, m4
-pmovsxwdm4, m4
-movu[r1], m3
-movu[r1 + 16], m4
-
-add r0, 32
-add r1, 32
-
-sub r2d, 8
-jnz.loop
-jz .end
-
-.skip:
-mov r4d, r4m
-movdm0, r4d ; m0 = shift
-xor r3d, r3d
-dec r4d
-bts r3d, r4d
-movdm2, r3d
-punpcklwd   m1, m2
-pshufd  m1, m1, 0   ; m1 = dword [add scale]
-movam2, [pw_1]
-mov r2d, r2m
-; m0 = shift
-; m1 = scale
-; m2 = word [1]
-.sloop:
-movum3, [r0]
-movum4, [r0 + 16]
-packssdwm3, m4  ; m3 = clipQCoef
 punpckhwd   m4, m3, m2
 punpcklwd   m3, m2
 pmaddwd m3, m1  ; m3 = dword (clipQCoef * scale + add)
@@ -1085,8 +1046,7 @@
 add r1, 32
 
 sub r2d, 8
-jnz.sloop
-.end:
+jnz.loop
 RET
 
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] asm: fix dequant_normal

2014-08-29 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1409373356 -32400
#  Sat Aug 30 13:35:56 2014 +0900
# Node ID 9b5f0c75d052e963b0a413f341a74036141b3675
# Parent  4e2d9ac6d489e82e70544d626c89964ee653c452
asm: fix dequant_normal

diff -r 4e2d9ac6d489 -r 9b5f0c75d052 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Fri Aug 29 11:12:49 2014 +0200
+++ b/source/common/x86/pixel-util8.asm Sat Aug 30 13:35:56 2014 +0900
@@ -1005,23 +1005,21 @@
 ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int 
scale, int shift)
 ;-
 INIT_XMM sse4
-cglobal dequant_normal, 4,5,5
-movdm1, r3 ; m1 = word [scale]
+cglobal dequant_normal, 5,5,5
 cmp r3d, 32767
+movdm1, r3  ; m1 = word [scale]
+movam2, [pw_1]
 jle .skip
-
 psrld   m1, 2
-mov r4d, r4m
+sub r4d, 2
+.skip:
 movdm0, r4d ; m0 = shift
 xor r3d, r3d
 dec r4d
 bts r3d, r4d
-movdm2, r3d
-punpcklwd   m1, m2
+movdm3, r3d
+punpcklwd   m1, m3
 pshufd  m1, m1, 0   ; m1 = dword [add scale]
-movam2, [pw_1]
-mov r2d, r2m
-
 ; m0 = shift
 ; m1 = scale
 ; m2 = word [1]
@@ -1029,45 +1027,6 @@
 movum3, [r0]
 movum4, [r0 + 16]
 packssdwm3, m4  ; m3 = clipQCoef
-psllw   m3, 2
-punpckhwd   m4, m3, m2
-punpcklwd   m3, m2
-pmaddwd m3, m1  ; m3 = dword (clipQCoef * scale + add)
-pmaddwd m4, m1
-psrad   m3, m0
-psrad   m4, m0
-packssdwm3, m3  ; OPT_ME: store must be 32 bits
-pmovsxwdm3, m3
-packssdwm4, m4
-pmovsxwdm4, m4
-movu[r1], m3
-movu[r1 + 16], m4
-
-add r0, 32
-add r1, 32
-
-sub r2d, 8
-jnz.loop
-jz .end
-
-.skip:
-mov r4d, r4m
-movdm0, r4d ; m0 = shift
-xor r3d, r3d
-dec r4d
-bts r3d, r4d
-movdm2, r3d
-punpcklwd   m1, m2
-pshufd  m1, m1, 0   ; m1 = dword [add scale]
-movam2, [pw_1]
-mov r2d, r2m
-; m0 = shift
-; m1 = scale
-; m2 = word [1]
-.sloop:
-movum3, [r0]
-movum4, [r0 + 16]
-packssdwm3, m4  ; m3 = clipQCoef
 punpckhwd   m4, m3, m2
 punpcklwd   m3, m2
 pmaddwd m3, m1  ; m3 = dword (clipQCoef * scale + add)
@@ -1085,8 +1044,7 @@
 add r1, 32
 
 sub r2d, 8
-jnz.sloop
-.end:
+jnz.loop
 RET
 
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] asm: fix dequant_normal

2014-08-29 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1409376721 -32400
#  Sat Aug 30 14:32:01 2014 +0900
# Node ID eb94972d65473fc8e6fa42ebd95a72cfc727edf5
# Parent  4e2d9ac6d489e82e70544d626c89964ee653c452
asm: fix dequant_normal

diff -r 4e2d9ac6d489 -r eb94972d6547 source/common/dct.cpp
--- a/source/common/dct.cpp Fri Aug 29 11:12:49 2014 +0200
+++ b/source/common/dct.cpp Sat Aug 30 14:32:01 2014 +0900
@@ -720,7 +720,9 @@
 
 void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int 
scale, int shift)
 {
-#if !HIGH_BIT_DEPTH
+#if HIGH_BIT_DEPTH
+X265_CHECK(scale  32768 || ((scale  3) == 0  shift  2), dequant 
invalid scale %d\n, scale);
+#else
 // NOTE: maximum of scale is (72 * 256)
 X265_CHECK(scale  32768, dequant invalid scale %d\n, scale);
 #endif
diff -r 4e2d9ac6d489 -r eb94972d6547 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Fri Aug 29 11:12:49 2014 +0200
+++ b/source/common/x86/pixel-util8.asm Sat Aug 30 14:32:01 2014 +0900
@@ -1005,23 +1005,21 @@
 ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int 
scale, int shift)
 ;-
 INIT_XMM sse4
-cglobal dequant_normal, 4,5,5
-movdm1, r3 ; m1 = word [scale]
+cglobal dequant_normal, 5,5,5
 cmp r3d, 32767
+movdm1, r3  ; m1 = word [scale]
+movam2, [pw_1]
 jle .skip
-
 psrld   m1, 2
-mov r4d, r4m
+sub r4d, 2
+.skip:
 movdm0, r4d ; m0 = shift
 xor r3d, r3d
 dec r4d
 bts r3d, r4d
-movdm2, r3d
-punpcklwd   m1, m2
+movdm3, r3d
+punpcklwd   m1, m3
 pshufd  m1, m1, 0   ; m1 = dword [add scale]
-movam2, [pw_1]
-mov r2d, r2m
-
 ; m0 = shift
 ; m1 = scale
 ; m2 = word [1]
@@ -1029,45 +1027,6 @@
 movum3, [r0]
 movum4, [r0 + 16]
 packssdwm3, m4  ; m3 = clipQCoef
-psllw   m3, 2
-punpckhwd   m4, m3, m2
-punpcklwd   m3, m2
-pmaddwd m3, m1  ; m3 = dword (clipQCoef * scale + add)
-pmaddwd m4, m1
-psrad   m3, m0
-psrad   m4, m0
-packssdwm3, m3  ; OPT_ME: store must be 32 bits
-pmovsxwdm3, m3
-packssdwm4, m4
-pmovsxwdm4, m4
-movu[r1], m3
-movu[r1 + 16], m4
-
-add r0, 32
-add r1, 32
-
-sub r2d, 8
-jnz.loop
-jz .end
-
-.skip:
-mov r4d, r4m
-movdm0, r4d ; m0 = shift
-xor r3d, r3d
-dec r4d
-bts r3d, r4d
-movdm2, r3d
-punpcklwd   m1, m2
-pshufd  m1, m1, 0   ; m1 = dword [add scale]
-movam2, [pw_1]
-mov r2d, r2m
-; m0 = shift
-; m1 = scale
-; m2 = word [1]
-.sloop:
-movum3, [r0]
-movum4, [r0 + 16]
-packssdwm3, m4  ; m3 = clipQCoef
 punpckhwd   m4, m3, m2
 punpcklwd   m3, m2
 pmaddwd m3, m1  ; m3 = dword (clipQCoef * scale + add)
@@ -1085,8 +1044,7 @@
 add r1, 32
 
 sub r2d, 8
-jnz.sloop
-.end:
+jnz.loop
 RET
 
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix m_initSliceContext (uninitialised m_sliceQp)

2014-08-26 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1409041357 -32400
#  Tue Aug 26 17:22:37 2014 +0900
# Node ID c18255467f12da1a780340ade55292c32d95bfdd
# Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
fix m_initSliceContext (uninitialised m_sliceQp)

diff -r 5acfb12ec5d1 -r c18255467f12 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Mon Aug 25 17:53:12 2014 +0900
+++ b/source/encoder/frameencoder.cpp   Tue Aug 26 17:22:37 2014 +0900
@@ -158,8 +158,6 @@
 int64_t startCompressTime = x265_mdate();
 Slice* slice = m_frame-m_picSym-m_slice;
 
-m_initSliceContext.resetEntropy(slice);
-
 /* Emit access unit delimiter unless this is the first frame and the user 
is
  * not repeating headers (since AUD is supposed to be the first NAL in the 
access
  * unit) */
@@ -225,12 +223,15 @@
 m_frameFilter.m_sao.m_refDepth = 2 + !IS_REFERENCED(slice);
 break;
 }
-m_frameFilter.start(m_frame);
 
 // Clip slice QP to 0-51 spec range before encoding
 qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
 slice-m_sliceQp = qp;
 
+m_initSliceContext.resetEntropy(slice);
+
+m_frameFilter.start(m_frame);
+
 if (m_frame-m_lowres.bKeyframe)
 {
 if (m_param-bEmitHRDSEI)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

2014-08-25 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1408956792 -32400
#  Mon Aug 25 17:53:12 2014 +0900
# Node ID 7145e57c722a94a06faec33e3041442032a1892f
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]

diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Fri Aug 22 15:53:34 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Mon Aug 25 17:53:12 2014 +0900
@@ -816,12 +816,12 @@
 
 TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t 
curPartUnitIdx)
 {
+if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + 
UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
+return NULL;
+
 uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
 uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + 
UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
-return NULL;
-
 if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, 
numPartInCUSize))
 {
 if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize))
@@ -857,14 +857,11 @@
 
 TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t 
curPartUnitIdx)
 {
-uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
+if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + 
UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples)
+return NULL;
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + 
UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples)
-{
-return NULL;
-}
-
-uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
+uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
+uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
 if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, 
numPartInCUSize))
 {
@@ -895,15 +892,14 @@
 
 TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx,  uint32_t 
curPartUnitIdx, uint32_t partUnitOffset)
 {
-uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
-
-if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
+if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
 m_slice-m_sps-picHeightInLumaSamples)
 {
 return NULL;
 }
 
-uint32_t numPartInCUSize  = m_pic-getNumPartInCUSize();
+uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx];
+uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
 if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 
partUnitOffset, numPartInCUSize))
 {
@@ -938,14 +934,13 @@
 
 TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t 
curPartUnitIdx, uint32_t partUnitOffset)
 {
-uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
-
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
+if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + 
(partUnitOffset  LOG2_UNIT_SIZE)) =
 m_slice-m_sps-picWidthInLumaSamples)
 {
 return NULL;
 }
 
+uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
 uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
 if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 
partUnitOffset, numPartInCUSize))
@@ -954,7 +949,7 @@
 {
 if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT - 
numPartInCUSize + partUnitOffset])
 {
-uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
+uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize 
+ partUnitOffset];
 if (RasterAddress::isEqualRowOrCol(absPartIdxRT, 
absZorderCUIdx, numPartInCUSize))
 {
@@ -1817,48 +1812,42 @@
 }
 // TMVP always enabled
 {
-// MTK colocated-RightBottom
+MV colmv;
 uint32_t partIdxRB;
 
 deriveRightBottomIdx(puIdx, partIdxRB);
 
-uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB];
-uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
-
-MV colmv;
-int refIdx;
 int lcuIdx = -1;
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + 
g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) = 
m_slice-m_sps-picWidthInLumaSamples)  // image boundary check
+// image boundary check
+if (m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[partIdxRB] + 
UNIT_SIZE  m_slice-m_sps-picWidthInLumaSamples 
+m_pic-getCU(m_cuAddr)-getCUPelY

Re: [x265] fix lossless

2014-08-24 Thread Satoshi Nakagawa
Ø  Negative shift or bts cause unexpected behavior.

 

Negative shift is used for round value calculation.

 

int round = 1  (shift - 1);

 

 

 

From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
Satoshi Nakagawa
Sent: Monday, August 25, 2014 11:50 AM
To: 'Development for x265'
Subject: Re: [x265] fix lossless

 

Negative shift or bts cause unexpected behavior.

 

My Core i5 4300U, (Win7 32bit, VS2013) fails following test.

 

--- a/source/test/pixelharness.cpp Fri Aug 22 15:53:34 2014 -0500

+++ b/source/test/pixelharness.cpp   Mon Aug 25 11:30:37 2014 +0900

@@ -562,7 +562,8 @@

 intptr_t stride = STRIDE;

 for (int i = 0; i  ITERS; i++)

 {

-int shift = (rand() % 7 + 1);

+//int shift = (rand() % 7 + 1);

+int shift = 0;

 

 int index = i % TEST_CASES;

 checked(opt, opt_dest, int_test_buff[index] + j, stride, shift,
(int)STRIDE);

 

 

 

 

From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of chen
Sent: Monday, August 25, 2014 11:20 AM
To: Development for x265
Subject: Re: [x265] fix lossless

 

He just modify shift=0 path, shif-left and shift-righ is equal in that time

At 2014-08-25 10:06:23,Steve Borho st...@borho.org wrote:
On 08/23, Satoshi Nakagawa wrote:
 # HG changeset patch
 # User Satoshi Nakagawa nakagawa...@oki.com
 # Date 1408803114 -32400
 #  Sat Aug 23 23:11:54 2014 +0900
 # Node ID 218b9ddfe148c55afccc2c5a073858692e09f5c6
 # Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
 fix lossless
 
do you have a repro case that this fixes?  I tried Main and Main10
encodes with --lossless --hash 1 and they were both fine prior to this
patch.
 
 diff -r 6e6756f94b27 -r 218b9ddfe148 source/common/quant.cpp
 --- a/source/common/quant.cpp   Fri Aug 22 15:53:34 2014 -0500
 +++ b/source/common/quant.cpp   Sat Aug 23 23:11:54 2014 +0900
 @@ -399,7 +399,7 @@
  {
  if (transQuantBypass)
  {
 -primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 
log2TrSize);
 +primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff,
stride, 0);
  return;
  }
  
 @@ -430,7 +430,7 @@
  #if X265_DEPTH = 10
  primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride,
shift, trSize);
  #else
 -if (shift = 0)
 +if (shift  0)
  primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride,
shift, trSize);
  else
  primitives.cvt32to16_shl[log2TrSize - 2](residual,
m_resiDctCoeff, stride, -shift);
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel
 
-- 
Steve Borho
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix lossless

2014-08-23 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1408803114 -32400
#  Sat Aug 23 23:11:54 2014 +0900
# Node ID 218b9ddfe148c55afccc2c5a073858692e09f5c6
# Parent  6e6756f94b27c3ef30f6159f1880112a7ff978e3
fix lossless

diff -r 6e6756f94b27 -r 218b9ddfe148 source/common/quant.cpp
--- a/source/common/quant.cpp   Fri Aug 22 15:53:34 2014 -0500
+++ b/source/common/quant.cpp   Sat Aug 23 23:11:54 2014 +0900
@@ -399,7 +399,7 @@
 {
 if (transQuantBypass)
 {
-primitives.cvt32to16_shr(residual, coeff, stride, 0, 1  log2TrSize);
+primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff, stride, 0);
 return;
 }
 
@@ -430,7 +430,7 @@
 #if X265_DEPTH = 10
 primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, 
trSize);
 #else
-if (shift = 0)
+if (shift  0)
 primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, 
trSize);
 else
 primitives.cvt32to16_shl[log2TrSize - 2](residual, m_resiDctCoeff, 
stride, -shift);
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] refine depth related.

2014-08-18 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1408347239 -32400
#  Mon Aug 18 16:33:59 2014 +0900
# Node ID 81469708804f322f6c76dfc6bb88f6d78fa983df
# Parent  9a0d242743577e0c8cc56cfac4934f8ea8cb7f6e
refine depth related.

maxCUDepth: CU depth
maxFullDepth:   CU+TU or CU+PU depth
unitSize:   always 4


diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Mon Aug 18 05:34:51 2014 +
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Mon Aug 18 16:33:59 2014 +0900
@@ -129,7 +129,7 @@
 return ok;
 }
 
-void TComDataCU::create(TComDataCU *cu, uint32_t numPartition, uint32_t 
cuSize, int unitSize, int csp, int index, bool isLossless)
+void TComDataCU::create(TComDataCU *cu, uint32_t numPartition, uint32_t 
cuSize, int csp, int index, bool isLossless)
 {
 m_hChromaShift = CHROMA_H_SHIFT(csp);
 m_vChromaShift = CHROMA_V_SHIFT(csp);
@@ -139,12 +139,6 @@
 m_slice = NULL;
 m_numPartitions = numPartition;
 
-uint32_t tmp = 4 * AMVP_DECIMATION_FACTOR / unitSize;
-tmp = tmp * tmp;
-X265_CHECK(tmp == (1  (g_log2Size[tmp])), unexpected pixel count\n);
-tmp = g_log2Size[tmp];
-m_unitMask = ~((1  tmp) - 1);
-
 uint32_t sizeL = cuSize * cuSize;
 uint32_t sizeC = sizeL  (m_hChromaShift + m_vChromaShift);
 
@@ -577,7 +571,7 @@
 
 // Copy current predicted part to a CU in picture.
 // It is used to predict for next part
-void TComDataCU::copyToPic(uint8_t depth)
+void TComDataCU::copyToPic(uint32_t depth)
 {
 TComDataCU* cu = m_pic-getCU(m_cuAddr);
 
@@ -622,7 +616,7 @@
 m_cuMvField[1].copyTo(cu-getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU);
 
 uint32_t tmpY  = 1  ((g_maxLog2CUSize - depth) * 2);
-uint32_t tmpY2 = m_absIdxInLCU  m_pic-getLog2UnitSize() * 2;
+uint32_t tmpY2 = m_absIdxInLCU  LOG2_UNIT_SIZE * 2;
 memcpy(cu-getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
 
 uint32_t tmpC  = tmpY   (m_hChromaShift + m_vChromaShift);
@@ -633,7 +627,7 @@
 if (m_slice-m_pps-bTransquantBypassEnabled)
 {
 uint32_t tmp  = 1  ((g_maxLog2CUSize - depth) * 2);
-uint32_t tmp2 = m_absIdxInLCU  m_pic-getLog2UnitSize() * 2;
+uint32_t tmp2 = m_absIdxInLCU  LOG2_UNIT_SIZE * 2;
 memcpy(cu-getLumaOrigYuv() + tmp2, m_tqBypassOrigYuv[0], 
sizeof(pixel) * tmp);
 
 memcpy(cu-getChromaOrigYuv(1) + tmpC2, m_tqBypassOrigYuv[1], 
sizeof(pixel) * tmpC);
@@ -641,7 +635,7 @@
 }
 }
 
-void TComDataCU::copyCodedToPic(uint8_t depth)
+void TComDataCU::copyCodedToPic(uint32_t depth)
 {
 TComDataCU* cu = m_pic-getCU(m_cuAddr);
 
@@ -660,7 +654,7 @@
 memcpy(cu-getCbf(TEXT_CHROMA_V) + m_absIdxInLCU, m_cbf[2], sizeInChar);
 
 uint32_t tmpY  = 1  ((g_maxLog2CUSize - depth) * 2);
-uint32_t tmpY2 = m_absIdxInLCU  m_pic-getLog2UnitSize() * 2;
+uint32_t tmpY2 = m_absIdxInLCU  LOG2_UNIT_SIZE * 2;
 memcpy(cu-getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
 tmpY  = m_hChromaShift + m_vChromaShift;
 tmpY2 = m_hChromaShift + m_vChromaShift;
@@ -668,7 +662,7 @@
 memcpy(cu-m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
 }
 
-void TComDataCU::copyToPic(uint8_t depth, uint32_t partIdx, uint32_t partDepth)
+void TComDataCU::copyToPic(uint32_t depth, uint32_t partIdx, uint32_t 
partDepth)
 {
 TComDataCU* cu = m_pic-getCU(m_cuAddr);
 uint32_t qNumPart  = m_numPartitions  (partDepth  1);
@@ -713,7 +707,7 @@
 m_cuMvField[1].copyTo(cu-getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU, 
partStart, qNumPart);
 
 uint32_t tmpY  = 1  ((g_maxLog2CUSize - depth - partDepth) * 2);
-uint32_t tmpY2 = partOffset  m_pic-getLog2UnitSize() * 2;
+uint32_t tmpY2 = partOffset  LOG2_UNIT_SIZE * 2;
 memcpy(cu-getCoeffY() + tmpY2, m_trCoeff[0],  sizeof(coeff_t) * tmpY);
 
 uint32_t tmpC  = tmpY  (m_hChromaShift + m_vChromaShift);
@@ -825,7 +819,7 @@
 uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx];
 uint32_t numPartInCUSize = m_pic-getNumPartInCUSize();
 
-if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + 
m_pic-getUnitSize()) = m_slice-m_sps-picWidthInLumaSamples)
+if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + 
UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples)
 return NULL;
 
 if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, 
numPartInCUSize))
@@ -834,7 +828,7 @@
 {
 if (curPartUnitIdx  g_rasterToZscan[absPartIdxRT - 
numPartInCUSize + 1])
 {
-uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 
 (m_log2CUSize[0] - m_pic-getLog2UnitSize())) - 1;
+uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 
 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize 
+ 1];
 if (RasterAddress::isEqualRowOrCol

[x265] replace IntraFilterType[][] to IntraFilterFlags[] (aboid *35)

2014-08-13 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1407984148 -32400
#  Thu Aug 14 11:42:28 2014 +0900
# Node ID a369fcc9c0e43812feb285004c09bf9ea9706456
# Parent  d66e257ace3210d828a399e05eeba1b05abb21af
replace IntraFilterType[][] to IntraFilterFlags[] (aboid *35)

diff -r d66e257ace32 -r a369fcc9c0e4 source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp Wed Aug 13 17:29:18 2014 -0700
+++ b/source/Lib/TLibCommon/TComPattern.cpp Thu Aug 14 11:42:28 2014 +0900
@@ -38,7 +38,7 @@
 #include frame.h
 #include TComPattern.h
 #include TComDataCU.h
-#include predict.h
+#include TComRom.h
 
 using namespace x265;
 
@@ -68,7 +68,7 @@
 
 fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
 
-bool bUseFilteredPredictions = (dirMode == ALL_IDX || 
IntraFilterType[intraNeighbors.log2TrSize - 2][dirMode]);
+bool bUseFilteredPredictions = (dirMode == ALL_IDX || 
(intraFilterFlags[dirMode]  tuSize));
 
 if (bUseFilteredPredictions  8 = tuSize  tuSize = 32)
 {
diff -r d66e257ace32 -r a369fcc9c0e4 source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Wed Aug 13 17:29:18 2014 -0700
+++ b/source/Lib/TLibCommon/TComRom.cpp Thu Aug 14 11:42:28 2014 +0900
@@ -491,5 +491,15 @@
 106,  110,  114,  118,  122,  126,  130,  135,  139,  143,  147,  152,  
156,  161,  165,  170,
 175,  179,  184,  189,  194,  198,  203,  208,  214,  219,  224,  229,  
234,  240,  245,  250
 };
+
+/* intraFilterFlags[dir]  trSize */
+const uint8_t intraFilterFlags[35] =
+{
+0x38, 0x00,
+0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 
0x30, 0x30, 0x30, 0x30,
+0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 
0x30, 0x30, 0x30, 0x30,
+0x38, 
+};
+
 }
 //! \}
diff -r d66e257ace32 -r a369fcc9c0e4 source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h   Wed Aug 13 17:29:18 2014 -0700
+++ b/source/Lib/TLibCommon/TComRom.h   Thu Aug 14 11:42:28 2014 +0900
@@ -148,6 +148,9 @@
 extern const uint8_t g_lpsTable[64][4];
 extern const uint8_t x265_exp2_lut[64];
 
+// Intra tables
+extern const uint8_t intraFilterFlags[35];
+
 }
 
 #endif  //ifndef X265_TCOMROM_H
diff -r d66e257ace32 -r a369fcc9c0e4 source/common/intrapred.cpp
--- a/source/common/intrapred.cpp   Wed Aug 13 17:29:18 2014 -0700
+++ b/source/common/intrapred.cpp   Thu Aug 14 11:42:28 2014 +0900
@@ -21,7 +21,6 @@
  * For more information, contact us at license @ x265.com.
  */
 
-#include predict.h
 #include TLibCommon/TComRom.h
 #include primitives.h
 
@@ -249,11 +248,10 @@
 void all_angs_pred_c(pixel *dest, pixel *above0, pixel *left0, pixel *above1, 
pixel *left1, int bLuma)
 {
 const int size = 1  log2Size;
-const int sizeIdx = log2Size - 2;
 for (int mode = 2; mode = 34; mode++)
 {
-pixel *left = (IntraFilterType[sizeIdx][mode] ? left1 : left0);
-pixel *above = (IntraFilterType[sizeIdx][mode] ? above1 : above0);
+pixel *left  = (intraFilterFlags[mode]  size ? left1  : left0);
+pixel *above = (intraFilterFlags[mode]  size ? above1 : above0);
 pixel *out = dest + ((mode - 2)  (log2Size * 2));
 
 intra_pred_ang_csize(out, size, left, above, mode, bLuma);
diff -r d66e257ace32 -r a369fcc9c0e4 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp  Wed Aug 13 17:29:18 2014 -0700
+++ b/source/common/x86/asm-primitives.cpp  Thu Aug 14 11:42:28 2014 +0900
@@ -27,7 +27,7 @@
 #include x265.h
 #include cpu.h
 #if HIGH_BIT_DEPTH
-#include predict.h
+#include TLibCommon/TComRom.h
 #endif
 
 extern C {
@@ -525,7 +525,7 @@
 SETUP_CHROMA_SS_FUNC_DEF_444(64, 48, cpu); \
 SETUP_CHROMA_SS_FUNC_DEF_444(48, 64, cpu); \
 SETUP_CHROMA_SS_FUNC_DEF_444(64, 16, cpu); \
-SETUP_CHROMA_SS_FUNC_DEF_444(16, 64, cpu)
+SETUP_CHROMA_SS_FUNC_DEF_444(16, 64, cpu);
 
 #if HIGH_BIT_DEPTH// temporary, until all 10bit functions are completed
 #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
@@ -1139,7 +1139,7 @@
 SETUP_CHROMA_VERT_FUNC_DEF_444(64, 48, cpu); \
 SETUP_CHROMA_VERT_FUNC_DEF_444(48, 64, cpu); \
 SETUP_CHROMA_VERT_FUNC_DEF_444(64, 16, cpu); \
-SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu)
+SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu);
 
 #define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \
 p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = 
x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
@@ -1169,7 +1169,7 @@
 SETUP_CHROMA_HORIZ_FUNC_DEF(32, 24, cpu); \
 SETUP_CHROMA_HORIZ_FUNC_DEF(24, 32, cpu); \
 SETUP_CHROMA_HORIZ_FUNC_DEF(32, 8, cpu); \
-SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu)
+SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu);
 
 #define SETUP_CHROMA_HORIZ_FUNC_DEF_422(W, H, cpu) \
 p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = 
x265_interp_4tap_horiz_pp_ ## W ## x

Re: [x265] [PATCH] analysis: check for proper cost

2014-08-12 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1407889822 -32400
#  Wed Aug 13 09:30:22 2014 +0900
# Node ID b284a0c71bb8c69b2bb2097d8ce4d50cfed6e41e
# Parent  8a7f4bb1d1be32fe668d410450c2e320ccae6098
analysis: check for proper cost

diff -r 8a7f4bb1d1be -r b284a0c71bb8 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Tue Aug 12 01:11:39 2014 -0500
+++ b/source/encoder/analysis.cpp   Wed Aug 13 09:30:22 2014 +0900
@@ -1011,12 +1011,28 @@
 
 /* Assert if Best prediction mode is NONE
  * Selected mode's RD-cost must be not MAX_INT64 */
+#if CHECKED_BUILD || _DEBUG
 if (bInsidePicture)
 {
 X265_CHECK(outBestCU-getPartitionSize(0) != SIZE_NONE, no best 
prediction size\n);
 X265_CHECK(outBestCU-getPredictionMode(0) != MODE_NONE, no best 
prediction mode\n);
-X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best prediction 
cost\n);
+if (m_param-rdLevel  1)
+{
+if (m_rdCost.m_psyRd)
+{
+X265_CHECK(outBestCU-m_totalPsyCost != MAX_INT64, no best 
partition cost\n);
+}
+else
+{
+X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best 
partition cost\n);
+}
+}
+else
+{
+X265_CHECK(outBestCU-m_sa8dCost != MAX_INT64, no best partition 
cost\n);
+}
 }
+#endif
 
 x265_emms();
 }
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] replace g_convertToBit[] to g_log2Size[] const table

2014-08-11 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1407814080 -32400
#  Tue Aug 12 12:28:00 2014 +0900
# Node ID c0f00c662c5a255d093f3355e1c8dff123125137
# Parent  23d58a1819c7ab394db69f19926b68bce9e85bb4
replace g_convertToBit[] to g_log2Size[] const table

diff -r 23d58a1819c7 -r c0f00c662c5a source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp  Mon Aug 11 16:54:09 2014 -0700
+++ b/source/Lib/TLibCommon/TComDataCU.cpp  Tue Aug 12 12:28:00 2014 +0900
@@ -141,8 +141,8 @@
 
 uint32_t tmp = 4 * AMVP_DECIMATION_FACTOR / unitSize;
 tmp = tmp * tmp;
-X265_CHECK(tmp == (1  (g_convertToBit[tmp] + 2)), unexpected pixel 
count\n);
-tmp = g_convertToBit[tmp] + 2;
+X265_CHECK(tmp == (1  (g_log2Size[tmp])), unexpected pixel count\n);
+tmp = g_log2Size[tmp];
 m_unitMask = ~((1  tmp) - 1);
 
 uint32_t sizeL = cuSize * cuSize;
diff -r 23d58a1819c7 -r c0f00c662c5a source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Mon Aug 11 16:54:09 2014 -0700
+++ b/source/Lib/TLibCommon/TComRom.cpp Tue Aug 12 12:28:00 2014 +0900
@@ -99,16 +99,6 @@
 {
 if (ATOMIC_CAS32(initialized, 0, 1) == 1)
 return;
-
-int i, c;
-
-memset(g_convertToBit, -1, sizeof(g_convertToBit));
-c = 0;
-for (i = 4; i = MAX_CU_SIZE; i *= 2)
-{
-g_convertToBit[i] = c;
-c++;
-}
 }
 
 void destroyROM()
@@ -300,7 +290,14 @@
 const uint8_t g_chroma422IntraAngleMappingTable[36] =
 { 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23, 
23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31, DM_CHROMA_IDX };
 
-uint8_t g_convertToBit[MAX_CU_SIZE + 1];
+const uint8_t g_log2Size[MAX_CU_SIZE + 1] =
+{
+0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+6
+};
 
 // 

 // Scanning order  context model mapping
diff -r 23d58a1819c7 -r c0f00c662c5a source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h   Mon Aug 11 16:54:09 2014 -0700
+++ b/source/Lib/TLibCommon/TComRom.h   Tue Aug 12 12:28:00 2014 +0900
@@ -134,7 +134,7 @@
 extern const uint8_t g_minInGroup[10];
 extern const uint8_t g_goRiceRange[5]; // maximum value coded with Rice codes
 
-extern uint8_t g_convertToBit[MAX_CU_SIZE + 1]; // from width to log2(width)-2
+extern const uint8_t g_log2Size[MAX_CU_SIZE + 1]; // from size to log2(size)
 
 // Map Luma samples to chroma samples
 extern const int g_winUnitX[MAX_CHROMA_FORMAT_IDC + 1];
diff -r 23d58a1819c7 -r c0f00c662c5a source/common/param.cpp
--- a/source/common/param.cpp   Mon Aug 11 16:54:09 2014 -0700
+++ b/source/common/param.cpp   Tue Aug 12 12:28:00 2014 +0900
@@ -861,8 +861,8 @@
 if (check_failed == 1)
 return check_failed;
 
-uint32_t maxCUDepth = (uint32_t)g_convertToBit[param-maxCUSize];
-uint32_t maxLog2CUSize = maxCUDepth + 2;
+uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param-maxCUSize];
+uint32_t maxCUDepth = maxLog2CUSize - 2;
 uint32_t tuQTMaxLog2Size = maxLog2CUSize - 1;
 uint32_t tuQTMinLog2Size = 2; //log2(4)
 
@@ -1041,7 +1041,8 @@
 
 int x265_set_globals(x265_param *param)
 {
-uint32_t maxCUDepth = (uint32_t)g_convertToBit[param-maxCUSize];
+uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param-maxCUSize];
+uint32_t maxCUDepth = maxLog2CUSize - 2;
 uint32_t tuQTMinLog2Size = 2; //log2(4)
 
 static int once /* = 0 */;
@@ -1058,7 +1059,7 @@
 {
 // set max CU width  height
 g_maxCUSize = param-maxCUSize;
-g_maxLog2CUSize = maxCUDepth + 2;
+g_maxLog2CUSize = maxLog2CUSize;
 
 // compute actual CU depth with respect to config depth and max 
transform size
 g_addCUDepth = g_maxLog2CUSize - maxCUDepth - tuQTMinLog2Size;
diff -r 23d58a1819c7 -r c0f00c662c5a source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Aug 11 16:54:09 2014 -0700
+++ b/source/encoder/encoder.cppTue Aug 12 12:28:00 2014 +0900
@@ -1209,7 +1209,7 @@
 
 setThreadPool(ThreadPool::allocThreadPool(p-poolNumThreads));
 int poolThreadCount = ThreadPool::getThreadPool()-getThreadCount();
-uint32_t maxLog2CUSize = g_convertToBit[p-maxCUSize] + 2;
+uint32_t maxLog2CUSize = g_log2Size[p-maxCUSize];
 int rows = (p-sourceHeight + p-maxCUSize - 1)  maxLog2CUSize;
 
 if (p-frameNumThreads == 0)
@@ -1391,7 +1391,7 @@
 m_conformanceWindow.leftOffset = 0;
 
 // set pad size if width is not multiple of the minimum CU size 
=
-uint32_t maxCUDepth = (uint32_t)g_convertToBit[p-maxCUSize];
+uint32_t maxCUDepth = maxLog2CUSize - 2;
 uint32_t minCUDepth = (p-maxCUSize  (maxCUDepth - 1));
 if ((p-sourceWidth % minCUDepth) != 0

[x265] quant: remove scaledCoeff from nquant()

2014-08-10 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1407658928 -32400
#  Sun Aug 10 17:22:08 2014 +0900
# Node ID d1dad09266327d40b6c2372f9916f7fcf288c2f0
# Parent  6e4eb854220350cf0c980fc02cc11109c506585f
quant: remove scaledCoeff from nquant()

diff -r 6e4eb8542203 -r d1dad0926632 source/common/dct.cpp
--- a/source/common/dct.cpp Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/dct.cpp Sun Aug 10 17:22:08 2014 +0900
@@ -795,7 +795,7 @@
 return numSig;
 }
 
-uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, 
int32_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* qCoef, int 
qBits, int add, int numCoeff)
 {
 uint32_t numSig = 0;
 
@@ -805,7 +805,6 @@
 int sign  = (level  0 ? -1 : 1);
 
 int tmplevel = abs(level) * quantCoeff[blockpos];
-scaledCoeff[blockpos] = tmplevel;
 level = ((tmplevel + add)  qBits);
 if (level)
 ++numSig;
diff -r 6e4eb8542203 -r d1dad0926632 source/common/primitives.h
--- a/source/common/primitives.hSat Aug 09 19:43:23 2014 -0500
+++ b/source/common/primitives.hSun Aug 10 17:22:08 2014 +0900
@@ -160,7 +160,7 @@
 typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, 
pixel *reconipred, int stride, int strideqt, int strideipred);
 typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t 
*deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t 
*scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t 
*qCoef, int qBits, int add, int numCoeff);
 typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t 
*dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
 typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int 
num, int scale, int shift);
 typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
diff -r 6e4eb8542203 -r d1dad0926632 source/common/quant.cpp
--- a/source/common/quant.cpp   Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/quant.cpp   Sun Aug 10 17:22:08 2014 +0900
@@ -487,7 +487,6 @@
  * probability models like CABAC */
 uint32_t Quant::rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t 
log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
 {
-uint32_t trSize = 1  log2TrSize;
 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* 
Represents scaling through forward transform */
 int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype;
 
@@ -500,14 +499,13 @@
 int32_t *qCoef = m_scalingList-m_quantCoef[log2TrSize - 
2][scalingListType][rem];
 
 int numCoeff = 1  log2TrSize * 2;
-int scaledCoeff[32 * 32];
-uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, scaledCoeff, 
dstCoeff, qbits, add, numCoeff);
+uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, 
qbits, add, numCoeff);
 
 X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), 
numSig differ\n);
 if (!numSig)
 return 0;
 
-x265_emms();
+uint32_t trSize = 1  log2TrSize;
 
 /* unquant constants for psy-rdoq. The dequant coefficients have a (14) 
scale applied that
  * must be removed during unquant.  This may be larger than the QP 
upshift, which would turn
diff -r 6e4eb8542203 -r d1dad0926632 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.hSat Aug 09 19:43:23 2014 -0500
+++ b/source/common/x86/pixel-util.hSun Aug 10 17:22:08 2014 +0900
@@ -45,7 +45,7 @@
 void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, 
int32_t *qCoef, int qBits, int add, int numCoeff);
-uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t 
*scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, 
int qBits, int add, int numCoeff);
 void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int 
num, int scale, int shift);
 int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff);
 
diff -r 6e4eb8542203 -r d1dad0926632 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/x86/pixel-util8.asm Sun Aug 10 17:22:08 2014 +0900
@@ -938,72 +938,63 @@
 
 
 ;-
-; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, 
int32_t *qCoef, int qBits, int add, int numCoeff);
+; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int 
qBits, int add, int numCoeff

Re: [x265] [PATCH] TComPattern: remove redundant functions used for CIP

2014-08-05 Thread Satoshi Nakagawa

redundant functions ware to simplify non-CIP case, and to check
m_pps-bConstrainedIntraPred flag upper level.
I-slice is simplified case too.


 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 santhosh...@multicorewareinc.com
 Sent: Tuesday, August 05, 2014 7:10 PM
 To: x265-devel@videolan.org
 Subject: [x265] [PATCH] TComPattern: remove redundant functions used for
 CIP
 
 # HG changeset patch
 # User Santhoshini Sekar santhosh...@multicorewareinc.com
 # Date 1407233322 -19800
 #  Tue Aug 05 15:38:42 2014 +0530
 # Node ID aadca66911c2f838e5b6dba671f65a3abeafcb38
 # Parent  0d4723a0080cff763ff20ab9c516c6e082496a0b
 TComPattern: remove redundant functions used for CIP
 
 diff -r 0d4723a0080c -r aadca66911c2
 source/Lib/TLibCommon/TComPattern.cpp
 --- a/source/Lib/TLibCommon/TComPattern.cpp   Tue Aug 05 01:05:47 2014
 -0500
 +++ b/source/Lib/TLibCommon/TComPattern.cpp   Tue Aug 05 15:38:42 2014
 +0530
 @@ -209,24 +209,13 @@
  int  partIdxStride   = cu-m_pic-getNumPartInCUSize();
  partIdxLB=
 g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) *
 partIdxStride)];
 
 -if (!cu-m_slice-m_pps-bConstrainedIntraPred)
 -{
 -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu,
 partIdxLT);
 -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
 -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
 (bNeighborFlags + leftUnits + 1));
 -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
 partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
 -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
 (bNeighborFlags + leftUnits - 1));
 -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
 partIdxLB, (bNeighborFlags + leftUnits   - 1 - tuHeightInUnits));
 -}
 -else
 -{
 -bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu,
 partIdxLT);
 -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
 -numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT,
 partIdxRT, (bNeighborFlags + leftUnits + 1));
 -numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT,
 partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
 -numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT,
 partIdxLB, (bNeighborFlags + leftUnits - 1));
 -numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT,
 partIdxLB, (bNeighborFlags + leftUnits   - 1 - tuHeightInUnits));
 -}
 +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT);
 +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]);
 +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT,
 (bNeighborFlags + leftUnits + 1));
 +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT,
 partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits));
 +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB,
 (bNeighborFlags + leftUnits - 1));
 +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB,
 (bNeighborFlags + leftUnits   - 1 - tuHeightInUnits));
 +
  intraNeighbors-numIntraNeighbor = numIntraNeighbor;
  intraNeighbors-totalUnits   = aboveUnits + leftUnits + 1;
  intraNeighbors-aboveUnits   = aboveUnits;
 @@ -421,7 +410,10 @@
  uint32_t partAboveLeft;
  TComDataCU* pcCUAboveLeft = cu-getPUAboveLeft(partAboveLeft,
 partIdxLT);
 
 -return pcCUAboveLeft ? true : false;
 +if (!cu-m_slice-m_pps-bConstrainedIntraPred)
 +return pcCUAboveLeft ? true : false;
 +else
 +return pcCUAboveLeft 
 pcCUAboveLeft-isIntra(partAboveLeft);
  }
 
  int TComPattern::isAboveAvailable(TComDataCU* cu, uint32_t partIdxLT,
 uint32_t partIdxRT, bool *bValidFlags) @@ -436,7 +428,7 @@
  {
  uint32_t uiPartAbove;
  TComDataCU* pcCUAbove = cu-getPUAbove(uiPartAbove,
 g_rasterToZscan[rasterPart]);
 -if (pcCUAbove)
 +if (pcCUAbove  (!cu-m_slice-m_pps-bConstrainedIntraPred
 ||
 + pcCUAbove-isIntra(uiPartAbove)))
  {
  numIntra++;
  *validFlagPtr = true;
 @@ -463,7 +455,7 @@
  {
  uint32_t partLeft;
  TComDataCU* pcCULeft = cu-getPULeft(partLeft,
 g_rasterToZscan[rasterPart]);
 -if (pcCULeft)
 +if (pcCULeft  (!cu-m_slice-m_pps-bConstrainedIntraPred
 ||
 + pcCULeft-isIntra(partLeft)))
  {
  numIntra++;
  *validFlagPtr = true;
 @@ -488,7 +480,7 @@
  {
  uint32_t uiPartAboveRight;
  TComDataCU* pcCUAboveRight =
 cu-getPUAboveRightAdi(uiPartAboveRight, partIdxRT, offset);
 -if (pcCUAboveRight)
 +if (pcCUAboveRight 
 + (!cu-m_slice-m_pps-bConstrainedIntraPred ||
 + pcCUAboveRight-isIntra(uiPartAboveRight)))
  {
  numIntra++;
  *validFlagPtr = true;
 @@ -513,119 +505,7 @@
  {
  uint32_t uiPartBelowLeft;
  TComDataCU* pcCUBelowLeft =
 

Re: [x265] primitives for RExt

2014-08-05 Thread Satoshi Nakagawa
-mov   byte [rsp], %2/4
+mov   dword [rsp], %2/4
Why dword? byte is enough for dynamic range
 

partial write needs read-modify-write.

 

 

+cglobal pixel_sub_ps_2x%2, 6, 7, 4, dest, destride, src0, scr1,
srcStride0, srcStride1
 
pinsrw have 2 uops, movd to load 4 bytes and drop unused is better.

 

thanks.

this function is not used, and will be removed.

 

 

 

 

From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of chen
Sent: Wednesday, August 06, 2014 3:29 AM
To: Development for x265
Subject: Re: [x265] primitives for RExt

 

 


At 2014-08-05 20:48:50,Satoshi Nakagawa nakagawa...@oki.com wrote:
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1407242513 -32400
#  Tue Aug 05 21:41:53 2014 +0900
# Node ID 770c40d768d55e68e76c485d5dc61d014257e789
# Parent  0d4723a0080cff763ff20ab9c516c6e082496a0b
primitives for RExt
 
@@ -1494,7 +1599,7 @@

;---
--
 %macro FILTER_VER_CHROMA_SS 4
 INIT_XMM sse2
-cglobal interp_4tap_vert_%3_%1x%2, 5, 7, %4 ,0-1
+cglobal interp_4tap_vert_%3_%1x%2, 5, 7, %4 ,0-gprsize
 
 add   r1d, r1d
 add   r3d, r3d
@@ -1508,7 +1613,7 @@
 lea   r6, [tab_ChromaCoeffV + r4]
 %endif
 
-mov   byte [rsp], %2/4
+mov   dword [rsp], %2/4
Why dword? byte is enough for dynamic range
 
diff -r 0d4723a0080c -r 770c40d768d5 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm   Tue Aug 05 01:05:47 2014
-0500
+++ b/source/common/x86/pixel-util8.asm   Tue Aug 05 21:41:53 2014
+0900
@@ -2878,6 +2878,61 @@
 RET
 

;---
--
+; void pixel_sub_ps_2x%2(pixel *dest, intptr_t destride, pixel *src0,
int16_t *scr1, intptr_t srcStride0, intptr_t srcStride1)
+;-

+%macro PIXEL_SUB_PS_W2_H2 2
+%if HIGH_BIT_DEPTH
+INIT_XMM sse2
+cglobal pixel_sub_ps_2x%2, 6, 7, 4, dest, destride, src0, scr1,
srcStride0, srcStride1
+add r1, r1
+add r4, r4
+add r5, r5
+mov r6d,%2/2
+.loop:
+movdm0, [r2]
+movdm1, [r3]
+movdm2, [r2 + r4]
+movdm3, [r3 + r5]
+dec r6d
+lea r2, [r2 + r4 * 2]
+lea r3, [r3 + r5 * 2]
+psubw   m0, m1
+psubw   m2, m3
+movd[r0],   m0
+movd[r0 + r1],  m2
+lea r0, [r0 + 2 * r1]
+jnz .loop
+RET
+%else
+INIT_XMM sse4
+cglobal pixel_sub_ps_2x%2, 6, 7, 4, dest, destride, src0, scr1,
srcStride0, srcStride1
+add r1, r1
+mov r6d,%2/2
+.loop:
+pinsrw  m0, [r2],   0
+pinsrw  m1, [r3],   0
+pinsrw  m2, [r2 + r4],  0
+pinsrw  m3, [r3 + r5],  0
pinsrw have 2 uops, movd to load 4 bytes and drop unused is better.
+dec r6d
+lea r2, [r2 + r4 * 2]
+lea r3, [r3 + r5 * 2]
+pmovzxbwm0, m0
+pmovzxbwm1, m1
+pmovzxbwm2, m2
+pmovzxbwm3, m3
+psubw   m0, m1
+psubw   m2, m3
+movd[r0],   m0
+movd[r0 + r1],  m2
+lea r0, [r0 + r1 * 2]
+jnz .loop
+RET
+%endif
+%endmacro
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] search: separate bSkipRes == true path

2014-07-28 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1406540991 -32400
#  Mon Jul 28 18:49:51 2014 +0900
# Node ID a4beebdb70524da737d4d5d11e6b55961b9ef988
# Parent  8bab5275baed85f8a6e183d7edfeba9a516a3669
search: separate bSkipRes == true path

diff -r 8bab5275baed -r a4beebdb7052 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 28 00:14:55 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 28 18:49:51 2014 +0900
@@ -2268,6 +2268,57 @@
 mvmax.y = X265_MIN(mvmax.y, m_refLagPixels);
 }
 
+void TEncSearch::encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, 
TComYuv* predYuv, TComYuv* outReconYuv)
+{
+X265_CHECK(!cu-isIntra(0), intra CU not expected\n);
+
+uint32_t log2CUSize = cu-getLog2CUSize(0);
+uint32_t cuSize = 1  log2CUSize;
+uint8_t  depth  = cu-getDepth(0);
+
+int hChromaShift = CHROMA_H_SHIFT(m_csp);
+int vChromaShift = CHROMA_V_SHIFT(m_csp);
+
+// No residual coding : SKIP mode
+
+cu-setSkipFlagSubParts(true, 0, depth);
+cu-setTrIdxSubParts(0, 0, depth);
+cu-clearCbf(0, depth);
+
+outReconYuv-copyFromYuv(predYuv);
+// Luma
+int part = partitionFromLog2Size(log2CUSize);
+uint32_t distortion = primitives.sse_pp[part](fencYuv-getLumaAddr(), 
fencYuv-getStride(), outReconYuv-getLumaAddr(), outReconYuv-getStride());
+// Chroma
+part = partitionFromSizes(cuSize  hChromaShift, cuSize  vChromaShift);
+distortion += 
m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv-getCbAddr(), 
fencYuv-getCStride(), outReconYuv-getCbAddr(), outReconYuv-getCStride()));
+distortion += 
m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv-getCrAddr(), 
fencYuv-getCStride(), outReconYuv-getCrAddr(), outReconYuv-getCStride()));
+
+m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+m_entropyCoder-resetBits();
+if (cu-m_slice-m_pps-bTransquantBypassEnabled)
+m_entropyCoder-codeCUTransquantBypassFlag(cu, 0);
+m_entropyCoder-codeSkipFlag(cu, 0);
+m_entropyCoder-codeMergeIndex(cu, 0);
+
+uint32_t bits = m_entropyCoder-getNumberOfWrittenBits();
+cu-m_mvBits = bits;
+cu-m_coeffBits = 0;
+cu-m_totalBits   = bits;
+cu-m_totalDistortion = distortion;
+if (m_rdCost.psyRdEnabled())
+{
+int size = log2CUSize - 2;
+cu-m_psyEnergy = m_rdCost.psyCost(size, fencYuv-getLumaAddr(), 
fencYuv-getStride(),
+   outReconYuv-getLumaAddr(), 
outReconYuv-getStride());
+cu-m_totalPsyCost = m_rdCost.calcPsyRdCost(cu-m_totalDistortion, 
cu-m_totalBits, cu-m_psyEnergy);
+}
+else
+cu-m_totalRDCost = m_rdCost.calcRdCost(cu-m_totalDistortion, 
cu-m_totalBits);
+
+m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
+}
+
 /** encode residual and calculate rate-distortion for a CU block
  * \param cu
  * \param fencYuv
@@ -2275,17 +2326,14 @@
  * \param outResiYuv
  * \param outBestResiYuv
  * \param outReconYuv
- * \param bSkipRes
  * \returns void
  */
 void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, 
TComYuv* predYuv, ShortYuv* outResiYuv,
-   ShortYuv* outBestResiYuv, TComYuv* 
outReconYuv, bool bSkipRes, bool curUseRDOQ)
+   ShortYuv* outBestResiYuv, TComYuv* 
outReconYuv, bool curUseRDOQ)
 {
-if (cu-isIntra(0))
-return;
-
-uint32_t bits = 0, bestBits = 0, bestCoeffBits = 0;
-uint32_t distortion = 0, bestDist = 0;
+X265_CHECK(!cu-isIntra(0), intra CU not expected\n);
+
+uint32_t bestBits = 0, bestCoeffBits = 0;
 
 uint32_t log2CUSize = cu-getLog2CUSize(0);
 uint32_t cuSize = 1  log2CUSize;
@@ -2294,77 +2342,33 @@
 int hChromaShift = CHROMA_H_SHIFT(m_csp);
 int vChromaShift = CHROMA_V_SHIFT(m_csp);
 
-// No residual coding : SKIP mode
-if (bSkipRes)
+m_trQuant.setQPforQuant(cu);
+
+outResiYuv-subtract(fencYuv, predYuv, log2CUSize);
+
+// Residual coding.
+bool bIsTQBypassEnable = cu-m_slice-m_pps-bTransquantBypassEnabled;
+uint32_t tqBypassMode  = 1;
+
+if (bIsTQBypassEnable)
 {
-cu-setSkipFlagSubParts(true, 0, depth);
-
-outReconYuv-copyFromYuv(predYuv);
-// Luma
-int part = partitionFromLog2Size(log2CUSize);
-distortion = primitives.sse_pp[part](fencYuv-getLumaAddr(), 
fencYuv-getStride(), outReconYuv-getLumaAddr(), outReconYuv-getStride());
-// Chroma
-part = partitionFromSizes(cuSize  hChromaShift, cuSize  
vChromaShift);
-distortion += 
m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv-getCbAddr(), 
fencYuv-getCStride(), outReconYuv-getCbAddr(), outReconYuv-getCStride()));
-distortion += 
m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv-getCrAddr(), 
fencYuv-getCStride(), outReconYuv-getCrAddr(), outReconYuv-getCStride

Re: [x265] [PATCH] analysis: setQPforQuant in checkIntraInter to fix the hash mismatch at rd=56

2014-07-23 Thread Satoshi Nakagawa
Thank you for fix my bug.

My own fix is very similar.


# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1406098305 -32400
#  Wed Jul 23 15:51:45 2014 +0900
# Node ID e3b909fd6e1f5348944872c704cabff229c37f71
# Parent  e3ad03b7c4854be40730645d4fe25e56a93f3f94
fix bug in 4d2c3d09e836

diff -r e3ad03b7c485 -r e3b909fd6e1f source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Tue Jul 22 13:28:54 2014 -0500
+++ b/source/encoder/analysis.cpp   Wed Jul 23 15:51:45 2014 +0900
@@ -1219,6 +1219,8 @@
  outBestCU-getCbf(0, TEXT_CHROMA_U) != 0   ||
  outBestCU-getCbf(0, TEXT_CHROMA_V) != 0)   doIntra)
 {
+m_trQuant.setQPforQuant(outTempCU);
+
 checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N);
 outTempCU-initEstData();

 
From: g...@multicorewareinc.com
Subject: [x265] [PATCH] analysis: setQPforQuant in checkIntraInter to fix the 
hash mismatch at rd=56
Date: Wed, 23 Jul 2014 11:17:01 +0530

 # HG changeset patch
 # User Gopu Govindaswamy g...@multicorewareinc.com
 # Date 1406094393 -19800
 #  Wed Jul 23 11:16:33 2014 +0530
 # Node ID 1beaaabef3eb6d3e832102ed7dafcd855c1d7298
 # Parent  e3ad03b7c4854be40730645d4fe25e56a93f3f94
 analysis: setQPforQuant in checkIntraInter to fix the hash mismatch at rd=56
 
 diff -r e3ad03b7c485 -r 1beaaabef3eb source/encoder/analysis.cpp
 --- a/source/encoder/analysis.cpp Tue Jul 22 13:28:54 2014 -0500
 +++ b/source/encoder/analysis.cpp Wed Jul 23 11:16:33 2014 +0530
 @@ -1722,6 +1722,7 @@
  
  PPAScopeEvent(CheckRDCostIntra + depth);
  
 +m_trQuant.setQPforQuant(outTempCU);
  outTempCU-setSkipFlagSubParts(false, 0, depth);
  outTempCU-setPartSizeSubParts(partSize, 0, depth);
  outTempCU-setPredModeSubParts(MODE_INTRA, 0, depth);
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] qtLayer in reverse order

2014-07-23 Thread Satoshi Nakagawa

 Is NUM_LAYERS the same as MAX_CU_DEPTH? Could we use that here?

No, it is (MAX_LOG2_TR_SIZE - 2 + 1).

 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Thursday, July 24, 2014 12:12 PM
 To: Development for x265
 Subject: Re: [x265] qtLayer in reverse order
 
 On 07/24, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406166493 -32400
  #  Thu Jul 24 10:48:13 2014 +0900
  # Node ID 737648a21e98a7ea339e1b659175c1f229d77c8c
  # Parent  342d72f0b61322f31bec35634cc893e4e2b04be3
  qtLayer in reverse order
 
  diff -r 342d72f0b613 -r 737648a21e98
 source/Lib/TLibEncoder/TEncSearch.cpp
  --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jul 23 17:47:16 2014
 -0500
  +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 24 10:48:13 2014
 +0900
  @@ -51,9 +51,7 @@
 
   TEncSearch::TEncSearch()
   {
  -m_qtTempCoeff[0] = NULL;
  -m_qtTempCoeff[1] = NULL;
  -m_qtTempCoeff[2] = NULL;
  +memset(m_qtTempCoeff, 0, sizeof(m_qtTempCoeff));
   m_qtTempTrIdx = NULL;
   m_qtTempShortYuv = NULL;
   for (int i = 0; i  3; i++)
  @@ -80,7 +78,6 @@
   X265_FREE(m_qtTempCbf[0]);
   X265_FREE(m_qtTempTransformSkipFlag[0]);
 
  -delete[] m_qtTempCoeff[0];
   delete[] m_qtTempShortYuv;
   }
 
  @@ -94,9 +91,6 @@
* available for motion reference.  See refLagRows in
 FrameEncoder::compressCTURows() */
   m_refLagPixels = m_bFrameParallel ? m_param-searchRange :
  m_param-sourceHeight;
 
  -m_qtTempCoeff[0] = new coeff_t*[m_numLayers * 3];
  -m_qtTempCoeff[1] = m_qtTempCoeff[0] + m_numLayers;
  -m_qtTempCoeff[2] = m_qtTempCoeff[0] + m_numLayers * 2;
   m_qtTempShortYuv = new ShortYuv[m_numLayers];
   uint32_t sizeL = 1  (g_maxLog2CUSize * 2);
   uint32_t sizeC = sizeL  (CHROMA_H_SHIFT(m_csp) +
  CHROMA_V_SHIFT(m_csp)); @@ -239,7 +233,7 @@
   }
 
   uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
  -uint32_t qtLayer= cu-m_slice-m_sps-quadtreeTULog2MaxSize
 - log2TrSize;
  +uint32_t qtLayer= log2TrSize - 2;
   uint32_t log2UnitSize = cu-m_pic-getLog2UnitSize();
   uint32_t coeffOffset = absPartIdx  (log2UnitSize * 2);
   coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; @@
  -266,8 +260,6 @@
   }
 
   uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
  -uint32_t qtLayer= cu-m_slice-m_sps-quadtreeTULog2MaxSize
 - log2TrSize;
  -uint32_t log2UnitSize = cu-m_pic-getLog2UnitSize();
 
   uint32_t trDepthC = trDepth;
   int hChromaShift = CHROMA_H_SHIFT(m_csp); @@ -286,6 +278,9 @@
   }
   }
 
  +uint32_t qtLayer= log2TrSize - 2;
  +uint32_t log2UnitSize = cu-m_pic-getLog2UnitSize();
  +
   if (m_csp != CHROMA_422)
   {
   uint32_t shift = (m_csp == CHROMA_420) ? 2 : 0; @@ -560,7
  +555,6 @@
   bool bCheckFull  = (log2TrSize =
 cu-m_slice-m_sps-quadtreeTULog2MaxSize);
   bool bCheckSplit = (log2TrSize 
 cu-getQuadtreeTULog2MinSizeInCU(absPartIdx));
 
  -int maxTuSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize;
   int isIntraSlice = (cu-m_slice-m_sliceType == I_SLICE);
 
   // don't check split if TU size is less or equal to max TU size
  @@ -568,6 +562,7 @@
 
   if (m_param-rdPenalty  !isIntraSlice)
   {
  +int maxTuSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize;
   // in addition don't check split if TU size is less or equal
 to 16x16 TU size for non-intra slice
   noSplitIntraMaxTuSize = (log2TrSize = X265_MIN(maxTuSize,
  4));
 
  @@ -613,7 +608,7 @@
 
   cu-setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
 
  -uint32_t qtLayer=
 cu-m_slice-m_sps-quadtreeTULog2MaxSize - log2TrSize;
  +uint32_t qtLayer= log2TrSize - 2;
   uint32_t coeffOffsetY   = absPartIdx 
 cu-m_pic-getLog2UnitSize() * 2;
   coeff_t* coeffY = m_qtTempCoeff[0][qtLayer] +
 coeffOffsetY;
   int16_t* reconQt=
 m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
  @@ -793,7 +788,7 @@
   cu-setTransformSkipSubParts(bestModeId, TEXT_LUMA,
  absPartIdx, fullDepth);
 
   //--- set reconstruction for next intra prediction blocks ---
  -uint32_t qtLayer   =
 cu-m_slice-m_sps-quadtreeTULog2MaxSize - log2TrSize;
  +uint32_t qtLayer   = log2TrSize - 2;
   uint32_t zorder= cu-getZorderIdxInCU() + absPartIdx;
   int16_t* reconQt   =
 m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
   X265_CHECK(m_qtTempShortYuv[qtLayer].m_width ==
 MAX_CU_SIZE,
  width is not max CU size\n); @@ -823,12 +818,14 @@
   bool bCheckFull  = (log2TrSize =
 cu-m_slice-m_sps-quadtreeTULog2MaxSize);
   bool bCheckSplit = (log2TrSize 
 cu-getQuadtreeTULog2MinSizeInCU(absPartIdx));
 
  -int maxTuSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize;
   int isIntraSlice = (cu-m_slice

[x265] refine partition size related

2014-07-22 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1406011990 -32400
#  Tue Jul 22 15:53:10 2014 +0900
# Node ID b2ad081e4bfc20bbc84e8bfbab59ed52aeac2a73
# Parent  d303b4d860e9f06396a156726dd518d0f41fe796
refine partition size related

- reorder LumaPartitions to simplify partitionFromLog2Size()
- remove unused


diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -127,6 +127,15 @@
 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], getCStride(), srcV, 
srcPicYuv-getCStride());
 }
 
+void TComYuv::copyFromYuv(TComYuv* srcYuv)
+{
+X265_CHECK(m_width = srcYuv-m_width  m_height = srcYuv-m_height, 
invalid size\n);
+
+primitives.luma_copy_pp[m_part](m_buf[0], m_width, srcYuv-m_buf[0], 
srcYuv-m_width);
+primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_cwidth, 
srcYuv-m_buf[1], srcYuv-m_cwidth);
+primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_cwidth, 
srcYuv-m_buf[2], srcYuv-m_cwidth);
+}
+
 void TComYuv::copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx)
 {
 pixel* dstY = dstPicYuv-getLumaAddr(partIdx);
@@ -156,50 +165,9 @@
 primitives.chroma[m_csp].copy_pp[part](dstV, dstPicYuv-getCStride(), 
srcV, getCStride());
 }
 
-void TComYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t 
width, uint32_t height, bool bLuma, bool bChroma)
+void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size)
 {
-int part = partitionFromSizes(width, height);
-
-X265_CHECK(width != 4 || height != 4, 4x4 partition detected\n);
-
-if (bLuma)
-{
-pixel* src = getLumaAddr(partIdx);
-pixel* dst = dstPicYuv-getLumaAddr(partIdx);
-
-uint32_t srcstride = getStride();
-uint32_t dststride = dstPicYuv-getStride();
-
-primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
-}
-if (bChroma)
-{
-pixel* srcU = getCbAddr(partIdx);
-pixel* srcV = getCrAddr(partIdx);
-pixel* dstU = dstPicYuv-getCbAddr(partIdx);
-pixel* dstV = dstPicYuv-getCrAddr(partIdx);
-
-uint32_t srcstride = getCStride();
-uint32_t dststride = dstPicYuv-getCStride();
-
-primitives.chroma[m_csp].copy_pp[part](dstU, dststride, srcU, 
srcstride);
-primitives.chroma[m_csp].copy_pp[part](dstV, dststride, srcV, 
srcstride);
-}
-}
-
-void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, 
uint32_t lumaSize)
-{
-int part = partitionFromSize(lumaSize);
-
-int16_t* dst = dstPicYuv-getLumaAddr(partIdx);
-uint32_t dststride = dstPicYuv-m_width;
-
-primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), 
getStride());
-}
-
-void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
-{
-int part = partitionFromSize(partSize);
+int part = partitionFromLog2Size(log2Size);
 
 addClipLuma(srcYuv0, srcYuv1, part);
 addClipChroma(srcYuv0, srcYuv1, part);
@@ -235,113 +203,32 @@
 primitives.chroma[m_csp].add_ps[part](dstV, dststride, srcV0, srcV1, 
src0Stride, src1Stride);
 }
 
-void TComYuv::addAvg(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partUnitIdx, 
uint32_t width, uint32_t height, bool bLuma, bool bChroma)
-{
-int x, y;
-uint32_t src0Stride, src1Stride, dststride;
-int shiftNum, offset;
-
-pixel* srcY0 = srcYuv0-getLumaAddr(partUnitIdx);
-pixel* srcU0 = srcYuv0-getCbAddr(partUnitIdx);
-pixel* srcV0 = srcYuv0-getCrAddr(partUnitIdx);
-
-pixel* srcY1 = srcYuv1-getLumaAddr(partUnitIdx);
-pixel* srcU1 = srcYuv1-getCbAddr(partUnitIdx);
-pixel* srcV1 = srcYuv1-getCrAddr(partUnitIdx);
-
-pixel* dstY  = getLumaAddr(partUnitIdx);
-pixel* dstU  = getCbAddr(partUnitIdx);
-pixel* dstV  = getCrAddr(partUnitIdx);
-
-if (bLuma)
-{
-src0Stride = srcYuv0-getStride();
-src1Stride = srcYuv1-getStride();
-dststride  = getStride();
-shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
-for (y = 0; y  height; y++)
-{
-for (x = 0; x  width; x += 4)
-{
-dstY[x + 0] = Clip((srcY0[x + 0] + srcY1[x + 0] + offset)  
shiftNum);
-dstY[x + 1] = Clip((srcY0[x + 1] + srcY1[x + 1] + offset)  
shiftNum);
-dstY[x + 2] = Clip((srcY0[x + 2] + srcY1[x + 2] + offset)  
shiftNum);
-dstY[x + 3] = Clip((srcY0[x + 3] + srcY1[x + 3] + offset)  
shiftNum);
-}
-
-srcY0 += src0Stride;
-srcY1 += src1Stride;
-dstY  += dststride;
-}
-}
-if (bChroma)
-{
-shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-offset = (1  (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
-src0Stride = srcYuv0-getCStride();
-src1Stride = srcYuv1

Re: [x265] refine partition size related

2014-07-22 Thread Satoshi Nakagawa
To find non optimized functions, and which function can be aliased.
I think many 4:2:2 functions can be aliased.


 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Tuesday, July 22, 2014 4:16 PM
 To: Development for x265
 Subject: Re: [x265] refine partition size related
 
 On 07/22, Satoshi Nakagawa wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406011990 -32400
  #  Tue Jul 22 15:53:10 2014 +0900
  # Node ID b2ad081e4bfc20bbc84e8bfbab59ed52aeac2a73
  # Parent  d303b4d860e9f06396a156726dd518d0f41fe796
  refine partition size related
 
  - reorder LumaPartitions to simplify partitionFromLog2Size()
  - remove unused
 
 Queued for testing, thanks.
 
 One question below:
 
 
  diff -r d303b4d860e9 -r b2ad081e4bfc
 source/Lib/TLibCommon/TComYuv.cpp
  --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Jul 21 22:43:38 2014
 -0500
  +++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jul 22 15:53:10 2014
 +0900
  @@ -127,6 +127,15 @@
 
 snip
 
  diff -r d303b4d860e9 -r b2ad081e4bfc source/test/testbench.cpp
  --- a/source/test/testbench.cpp Mon Jul 21 22:43:38 2014 -0500
  +++ b/source/test/testbench.cpp Tue Jul 22 15:53:10 2014 +0900
  @@ -127,6 +127,7 @@
   EncoderPrimitives cprim;
   memset(cprim, 0, sizeof(EncoderPrimitives));
   Setup_C_Primitives(cprim);
  +Setup_Alias_Primitives(cprim);
 
   struct test_arch_t
   {
  @@ -186,6 +187,7 @@
   memset(optprim, 0, sizeof(optprim));
   Setup_Instrinsic_Primitives(optprim, cpuid);
   Setup_Assembly_Primitives(optprim, cpuid);
  +Setup_Alias_Primitives(optprim);
 
 is there a reason to test the aliased functions, since by their nature
 they should already be being tested via another function pointer?
 
 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] trquant: store QpParam for each component

2014-07-20 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1405905842 -32400
#  Mon Jul 21 10:24:02 2014 +0900
# Node ID b40af94fd00f5f23a22854aaf498ffef32910110
# Parent  eb983d29c11acc03b91e07fe93c31503fa3a4732
trquant: store QpParam for each component

diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComLoopFilter.cpp
--- a/source/Lib/TLibCommon/TComLoopFilter.cpp  Thu Jul 17 09:29:39 2014 +0200
+++ b/source/Lib/TLibCommon/TComLoopFilter.cpp  Mon Jul 21 10:24:02 2014 +0900
@@ -48,7 +48,6 @@
 // 

 // Constants
 // 

-#define QpUV(iQpY, chFmt)  (((iQpY)  0) ? (iQpY) : (((iQpY)  57) ? ((iQpY) - 
6) : g_chromaScale[chFmt][(iQpY)]))
 #define DEFAULT_INTRA_TC_OFFSET 2 /// Default intra TC offset
 
 // 

@@ -441,9 +440,6 @@
 pixel* tmpsrc = src;
 
 int stride = reconYuv-getStride();
-int qp = 0;
-int qpP = 0;
-int qpQ = 0;
 uint32_t numParts = cu-m_pic-getNumPartInCUSize()  depth;
 
 uint32_t log2UnitSize = g_log2UnitSize;
@@ -457,8 +453,8 @@
 uint32_t  partQ = 0;
 TComDataCU* cuP = cu;
 TComDataCU* cuQ = cu;
-int  betaOffsetDiv2 = cuQ-m_slice-m_pps-deblockingFilterBetaOffsetDiv2;
-int  tcOffsetDiv2 = cuQ-m_slice-m_pps-deblockingFilterTcOffsetDiv2;
+int  betaOffset = cuQ-m_slice-m_pps-deblockingFilterBetaOffsetDiv2  1;
+int  tcOffset = cuQ-m_slice-m_pps-deblockingFilterTcOffsetDiv2  1;
 
 if (dir == EDGE_VER)
 {
@@ -480,7 +476,7 @@
 bs = blockingStrength[bsAbsIdx];
 if (bs)
 {
-qpQ = cu-getQP(bsAbsIdx);
+int qpQ = cu-getQP(bsAbsIdx);
 partQ = bsAbsIdx;
 // Derive neighboring PU index
 if (dir == EDGE_VER)
@@ -492,12 +488,12 @@
 cuP = cuQ-getPUAbove(partP, partQ);
 }
 
-qpP = cuP-getQP(partP);
-qp = (qpP + qpQ + 1)  1;
+int qpP = cuP-getQP(partP);
+int qp = (qpP + qpQ + 1)  1;
 int bitdepthScale = 1  (X265_DEPTH - 8);
 
-int indexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(qp + 
DEFAULT_INTRA_TC_OFFSET * (bs - 1) + (tcOffsetDiv2  1)));
-int indexB = Clip3(0, MAX_QP, qp + (betaOffsetDiv2  1));
+int indexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(qp + 
DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
+int indexB = Clip3(0, MAX_QP, qp + betaOffset);
 
 int tc =  sm_tcTable[indexTC] * bitdepthScale;
 int beta = sm_betaTable[indexB] * bitdepthScale;
@@ -544,13 +540,11 @@
 
 void TComLoopFilter::xEdgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, 
uint32_t depth, int dir, int edge, uint8_t blockingStrength[])
 {
+int chFmt = cu-getChromaFormat();
 TComPicYuv* reconYuv = cu-m_pic-getPicYuvRec();
 int stride = reconYuv-getCStride();
 pixel* srcCb = reconYuv-getCbAddr(cu-getAddr(), absZOrderIdx);
 pixel* srcCr = reconYuv-getCrAddr(cu-getAddr(), absZOrderIdx);
-int qp = 0;
-int qpP = 0;
-int qpQ = 0;
 uint32_t log2UnitSizeH = g_log2UnitSize - cu-getHorzChromaShift();
 uint32_t log2UnitSizeV = g_log2UnitSize - cu-getVertChromaShift();
 uint32_t unitSizeChromaH = 1  log2UnitSizeH;
@@ -565,7 +559,7 @@
 uint32_t  partQ;
 TComDataCU* cuP;
 TComDataCU* cuQ = cu;
-int tcOffsetDiv2 = cu-m_slice-m_pps-deblockingFilterTcOffsetDiv2;
+int tcOffset = cu-m_slice-m_pps-deblockingFilterTcOffsetDiv2  1;
 
 // Vertical Position
 uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % 
lcuWidthInBaseUnits + edge;
@@ -611,7 +605,7 @@
 
 if (bs  1)
 {
-qpQ = cu-getQP(bsAbsIdx);
+int qpQ = cu-getQP(bsAbsIdx);
 partQ = bsAbsIdx;
 // Derive neighboring PU index
 if (dir == EDGE_VER)
@@ -623,7 +617,7 @@
 cuP = cuQ-getPUAbove(partP, partQ);
 }
 
-qpP = cuP-getQP(partP);
+int qpP = cuP-getQP(partP);
 
 if (cu-m_slice-m_pps-bTransquantBypassEnabled)
 {
@@ -636,10 +630,17 @@
 {
 int chromaQPOffset  = (chromaIdx == 0) ? 
cu-m_slice-m_pps-chromaCbQpOffset : cu-m_slice-m_pps-chromaCrQpOffset;
 pixel* piTmpSrcChroma = (chromaIdx == 0) ? tmpSrcCb : tmpSrcCr;
-qp = QpUVqpP + qpQ + 1)  1) + chromaQPOffset), 
cu-getChromaFormat());
+int qp = ((qpP + qpQ + 1)  1) + chromaQPOffset;
+if (qp = 30)
+{
+if (chFmt == CHROMA_420)
+qp = g_chromaScale[qp];
+else

[x265] scan order tables

2014-07-19 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1405768660 -32400
#  Sat Jul 19 20:17:40 2014 +0900
# Node ID 72657141a6068000bbbc8e2c20362bbbd53510bd
# Parent  eb983d29c11acc03b91e07fe93c31503fa3a4732
scan order tables

diff -r eb983d29c11a -r 72657141a606 source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Thu Jul 17 09:29:39 2014 +0200
+++ b/source/Lib/TLibCommon/TComRom.cpp Sat Jul 19 20:17:40 2014 +0900
@@ -42,86 +42,6 @@
 namespace x265 {
 //! \ingroup TLibCommon
 //! \{
-// scanning order table
-uint16_t* 
g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_LOG2_TR_SIZE 
+ 1];
-
-class ScanGenerator
-{
-private:
-
-uint32_t m_line, m_column;
-uint32_t m_blockWidth, m_blockHeight;
-uint32_t m_stride;
-COEFF_SCAN_TYPE m_scanType;
-
-public:
-
-ScanGenerator(uint32_t blockWidth, uint32_t blockHeight, uint32_t stride, 
COEFF_SCAN_TYPE scanType)
-: m_line(0), m_column(0), m_blockWidth(blockWidth), 
m_blockHeight(blockHeight), m_stride(stride), m_scanType(scanType)
-{ }
-
-uint32_t GetCurrentX() const { return m_column; }
-
-uint32_t GetCurrentY() const { return m_line; }
-
-uint32_t GetNextIndex(uint32_t blockOffsetX, uint32_t blockOffsetY)
-{
-int rtn = ((m_line + blockOffsetY) * m_stride) + m_column + 
blockOffsetX;
-
-//advance line and column to the next position
-switch (m_scanType)
-{
-case SCAN_DIAG:
-{
-if ((m_column == (m_blockWidth - 1)) || (m_line == 0)) //if we 
reach the end of a rank, go diagonally down to the next one
-{
-m_line   += m_column + 1;
-m_column  = 0;
-
-if (m_line = m_blockHeight) //if that takes us outside 
the block, adjust so that we are back on the bottom row
-{
-m_column += m_line - (m_blockHeight - 1);
-m_line= m_blockHeight - 1;
-}
-}
-else
-{
-m_column++;
-m_line--;
-}
-}
-break;
-
-case SCAN_HOR:
-{
-if (m_column == (m_blockWidth - 1))
-{
-m_line++;
-m_column = 0;
-}
-else m_column++;
-}
-break;
-
-case SCAN_VER:
-{
-if (m_line == (m_blockHeight - 1))
-{
-m_column++;
-m_line = 0;
-}
-else m_line++;
-}
-break;
-
-default:
-X265_CHECK(0, ERROR: Unknown scan type %d in 
ScanGenerator::GetNextIndex, m_scanType);
-break;
-}
-
-return rtn;
-}
-};
 
 // lambda = pow(2, (double)q / 6 - 2);
 double x265_lambda_tab[MAX_MAX_QP + 1] =
@@ -190,81 +110,12 @@
 g_convertToBit[i] = c;
 c++;
 }
-
-// initialise scan orders
-for (uint32_t log2BlockSize = 0; log2BlockSize = MAX_LOG2_TR_SIZE; 
log2BlockSize++)
-{
-const uint32_t blockWidth  = 1  log2BlockSize;
-const uint32_t blockHeight = 1  log2BlockSize;
-const uint32_t totalValues = blockWidth * blockHeight;
-//non-grouped scan orders
-for (uint32_t scanTypeIndex = 0; scanTypeIndex  SCAN_NUMBER_OF_TYPES; 
scanTypeIndex++)
-{
-const COEFF_SCAN_TYPE scanType = COEFF_SCAN_TYPE(scanTypeIndex);
-g_scanOrder[SCAN_UNGROUPED][scanType][log2BlockSize] = 
X265_MALLOC(uint16_t, totalValues);
-ScanGenerator fullBlockScan(blockWidth, blockHeight, blockWidth, 
scanType);
-
-for (uint32_t scanPosition = 0; scanPosition  totalValues; 
scanPosition++)
-{
-
g_scanOrder[SCAN_UNGROUPED][scanType][log2BlockSize][scanPosition] = 
fullBlockScan.GetNextIndex(0, 0);
-}
-}
-
-//grouped scan orders
-const uint32_t  groupWidth   = 1  MLS_CG_LOG2_SIZE;
-const uint32_t  groupHeight  = 1  MLS_CG_LOG2_SIZE;
-const uint32_t  widthInGroups= blockWidth   MLS_CG_LOG2_SIZE;
-const uint32_t  heightInGroups   = blockHeight  MLS_CG_LOG2_SIZE;
-
-const uint32_t  groupSize= groupWidth* groupHeight;
-const uint32_t  totalGroups  = widthInGroups * heightInGroups;
-
-for (uint32_t scanTypeIndex = 0; scanTypeIndex  SCAN_NUMBER_OF_TYPES; 
scanTypeIndex++)
-{
-const COEFF_SCAN_TYPE scanType = COEFF_SCAN_TYPE(scanTypeIndex);
-
-g_scanOrder[SCAN_GROUPED_4x4][scanType][log2BlockSize] = 
X265_MALLOC(uint16_t, totalValues);
-
-ScanGenerator fullBlockScan(widthInGroups, heightInGroups, 
groupWidth, scanType);
-
-for (uint32_t groupIndex = 0; groupIndex  totalGroups; 
groupIndex++)
-{
-const uint32_t groupPositionY  = fullBlockScan.GetCurrentY

[x265] zero stride for zeroPel[]

2014-07-10 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1405052989 -32400
#  Fri Jul 11 13:29:49 2014 +0900
# Node ID 18a6ee92620f1f7266dfbeff3b9010aae356d796
# Parent  e3e077965c39a56a24e09189652e1de3c5a0e3ea
zero stride for zeroPel[]

diff -r e3e077965c39 -r 18a6ee92620f source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 10 19:29:46 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Jul 11 13:29:49 2014 +0900
@@ -47,7 +47,7 @@
 
 using namespace x265;
 
-ALIGN_VAR_32(const pixel, RDCost::zeroPel[MAX_CU_SIZE * MAX_CU_SIZE]) = { 0 };
+ALIGN_VAR_32(const pixel, RDCost::zeroPel[MAX_CU_SIZE]) = { 0 };
 
 TEncSearch::TEncSearch()
 {
@@ -2404,7 +2404,7 @@
 {
 int size = g_convertToBit[cuSize];
 zeroPsyEnergyY = m_rdCost-psyCost(size, fencYuv-getLumaAddr(), 
fencYuv-getStride(),
-(pixel*)RDCost::zeroPel, MAX_CU_SIZE); // need to check 
whether zero distortion is similar to psyenergy of fenc
+(pixel*)RDCost::zeroPel, 0); // need to check whether zero 
distortion is similar to psyenergy of fenc
 zeroCost = m_rdCost-calcPsyRdCost(zeroDistortion, zeroResiBits, 
zeroPsyEnergyY);
 }
 else
@@ -2813,13 +2813,13 @@
 }
 
 int partSize = partitionFromSize(trSize);
-uint32_t distY = 
primitives.sse_sp[partSize](resiYuv-getLumaAddr(absPartIdx), resiYuv-m_width, 
(pixel*)RDCost::zeroPel, trSize);
+uint32_t distY = 
primitives.sse_sp[partSize](resiYuv-getLumaAddr(absPartIdx), resiYuv-m_width, 
(pixel*)RDCost::zeroPel, 0);
 uint32_t psyEnergyY = 0;
 if (m_rdCost-psyRdEnabled())
 {
-int size = g_convertToBit[trSize];
+int size = log2TrSize - 2;
 psyEnergyY = m_rdCost-psyCost(size, 
fencYuv-getLumaAddr(absPartIdx), fencYuv-getStride(),
-(pixel*)RDCost::zeroPel, trSize); // need to check whether 
zero distortion is similar to psyenergy of fenc
+(pixel*)RDCost::zeroPel, 0); // need to check whether zero 
distortion is similar to psyenergy of fenc
 }
 int16_t *curResiY = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
 X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, width 
not full CU\n);
@@ -2845,7 +2845,7 @@
 uint32_t stride = fencYuv-getStride();
 //= reconstruction =
 primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, 
pred, curResiY, stride, strideResiY);
-int size = g_convertToBit[trSize];
+int size = log2TrSize - 2;
 nonZeroPsyEnergyY = m_rdCost-psyCost(size, 
fencYuv-getLumaAddr(absPartIdx), fencYuv-getStride(),
 cu-getPic()-getPicYuvRec()-getLumaAddr(cu-getAddr(), 
zorder), cu-getPic()-getPicYuvRec()-getStride());
 }
@@ -2922,12 +2922,10 @@
 int16_t *curResiU = 
m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
 int16_t *curResiV = 
m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
 
-distU = 
m_rdCost-scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv-getCbAddr(absPartIdxC),
 resiYuv-m_cwidth, (pixel*)RDCost::zeroPel, trSizeC));
-
+distU = 
m_rdCost-scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv-getCbAddr(absPartIdxC),
 resiYuv-m_cwidth, (pixel*)RDCost::zeroPel, 0));
 if (outZeroDist)
-{
 *outZeroDist += distU;
-}
+
 if (numSigU[tuIterator.section])
 {
 int curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCbQpOffset() + 
cu-getSlice()-getSliceQpDeltaCb();
@@ -2938,7 +2936,6 @@
  curResiU, 
strideResiC);
 const uint32_t nonZeroDistU = 
m_rdCost-scaleChromaDistCb(dist);
 uint32_t nonZeroPsyEnergyU = 0;
-
 if (m_rdCost-psyRdEnabled())
 {
 pixel*   pred = predYuv-getCbAddr(absPartIdxC);
@@ -2948,7 +2945,7 @@
 uint32_t stride = fencYuv-getCStride();
 //= reconstruction =
 primitives.luma_add_ps[partSizeC](reconIPred, 
reconIPredStride, pred, curResiU, stride, strideResiC);
-int size = g_convertToBit[trSizeC];
+int size = log2TrSizeC - 2;
 nonZeroPsyEnergyU = m_rdCost-psyCost(size, 
fencYuv-getCbAddr(absPartIdxC), fencYuv-getCStride(),
 
cu-getPic()-getPicYuvRec()-getCbAddr(cu-getAddr(), zorder), 
cu-getPic()-getPicYuvRec()-getCStride());
 }
@@ -3007,7 +3004,7 @@
 if (!numSigU[tuIterator.section])
 primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0

[x265] use std::swap() for readability

2014-07-09 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1404898046 -32400
#  Wed Jul 09 18:27:26 2014 +0900
# Node ID a3f4317f4acd89b7ef9bb8616068f9e4ff24328c
# Parent  644773b8532929a30f910fd269f521e44621f2f7
use std::swap() for readability

diff -r 644773b85329 -r a3f4317f4acd 
source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09 
13:55:42 2014 +0530
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09 
18:27:26 2014 +0900
@@ -535,12 +535,10 @@
 int isChroma = (yCbCr != 0) ? 1 : 0;
 int shift;
 int cuHeightTmp;
-pixel* tmpLSwap;
 pixel* tmpL;
 pixel* tmpU;
 pixel* clipTbl = NULL;
 int32_t *offsetBo = NULL;
-int32_t *tmp_swap;
 
 picWidthTmp  = (isChroma == 0) ? m_picWidth  : m_picWidth   
m_hChromaShift;
 picHeightTmp = (isChroma == 0) ? m_picHeight : m_picHeight  
m_vChromaShift;
@@ -707,9 +705,7 @@
 
 m_upBufft[startX] = signDown2;
 
-tmp_swap  = m_upBuff1;
-m_upBuff1 = m_upBufft;
-m_upBufft = tmp_swap;
+std::swap(m_upBuff1, m_upBufft);
 
 rec += stride;
 }
@@ -775,9 +771,7 @@
 
 //   if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
 {
-tmpLSwap = m_tmpL1;
-m_tmpL1  = m_tmpL2;
-m_tmpL2  = tmpLSwap;
+std::swap(m_tmpL1, m_tmpL2);
 }
 }
 
@@ -864,7 +858,6 @@
 int frameWidthInCU = m_pic-getFrameWidthInCU();
 int frameHeightInCU = m_pic-getFrameHeightInCU();
 int stride;
-pixel *tmpUSwap;
 int sChroma = (yCbCr == 0) ? 0 : 1;
 bool mergeLeftFlag;
 int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC;
@@ -976,9 +969,7 @@
 }
 }
 
-tmpUSwap   = m_tmpU1[yCbCr];
-m_tmpU1[yCbCr] = m_tmpU2[yCbCr];
-m_tmpU2[yCbCr] = tmpUSwap;
+std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]);
 }
 }
 
@@ -1018,7 +1009,6 @@
 int addr;
 int frameWidthInCU = m_pic-getFrameWidthInCU();
 int stride;
-pixel *tmpUSwap;
 int sChroma = (yCbCr == 0) ? 0 : 1;
 bool mergeLeftFlag;
 int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC;
@@ -1122,9 +1112,7 @@
 }
 }
 
-tmpUSwap   = m_tmpU1[yCbCr];
-m_tmpU1[yCbCr] = m_tmpU2[yCbCr];
-m_tmpU2[yCbCr] = tmpUSwap;
+std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]);
 }
 }
 
diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 13:55:42 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 18:27:26 2014 +0900
@@ -1258,22 +1258,11 @@
 uint64_t bestCost = m_rdCost-psyRdEnabled() ? 
outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost;
 if (tempCost  bestCost)
 {
-TComDataCU* tmp = outTempCU;
-outTempCU = outBestCU;
-outBestCU = tmp;
-
-// Change Prediction data
-TComYuv* yuv = NULL;
-yuv = outBestPredYuv;
-outBestPredYuv = m_tmpPredYuv[depth];
-m_tmpPredYuv[depth] = yuv;
-
-yuv = rpcYuvReconBest;
-rpcYuvReconBest = m_tmpRecoYuv[depth];
-m_tmpRecoYuv[depth] = yuv;
-
+std::swap(outBestCU, outTempCU);
+std::swap(outBestPredYuv, m_tmpPredYuv[depth]);
+std::swap(rpcYuvReconBest, m_tmpRecoYuv[depth]);
 
m_rdSbacCoders[depth][CI_TEMP_BEST]-store(m_rdSbacCoders[depth][CI_NEXT_BEST]);
-}
+}
 outTempCU-setQPSubParts(origQP, 0, depth);
 outTempCU-setSkipFlagSubParts(false, 0, depth);
 if (!bestIsSkip)
@@ -1446,21 +1435,14 @@
 
 if (tempCost  bestCost)
 {
-TComYuv* yuv;
 // Change Information data
-TComDataCU* cu = outBestCU;
-outBestCU = outTempCU;
-outTempCU = cu;
+std::swap(outBestCU, outTempCU);
 
 // Change Prediction data
-yuv = m_bestPredYuv[depth];
-m_bestPredYuv[depth] = m_tmpPredYuv[depth];
-m_tmpPredYuv[depth] = yuv;
+std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]);
 
 // Change Reconstruction data
-yuv = m_bestRecoYuv[depth];
-m_bestRecoYuv[depth] = m_tmpRecoYuv[depth];
-m_tmpRecoYuv[depth] = yuv;
+std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]);
 
 
m_rdSbacCoders[depth][CI_TEMP_BEST]-store(m_rdSbacCoders[depth][CI_NEXT_BEST]);
 }
diff -r 644773b85329 -r a3f4317f4acd 
source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibEncoder

Re: [x265] quant: returns numSig instead of absSum and lastPos

2014-07-07 Thread Satoshi Nakagawa

 LastPos output for IDCT bypass optimize, just didn't upload it now.

DC only detection can be replaced by (numSig == 1  coeff[0] != 0).
exact lastPos not used elsewhere.

  // DC only
 -if (lastPos == 0  !((trSize == 4)  (mode != REG_DCT)))
 +if (numSig == 1  coeff[0] != 0  !useDST)



 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 chen
 Sent: Tuesday, July 08, 2014 12:09 AM
 To: Development for x265
 Subject: Re: [x265] quant: returns numSig instead of absSum and lastPos
 
 At 2014-07-07 16:04:03,Satoshi Nakagawa nakagawa...@oki.com wrote:
 # HG changeset patch# User Satoshi Nakagawa nakagawa...@oki.com #
 Date 1404720026 -32400# Mon Jul 07 17:00:26 2014 +0900 # Node ID
 dcf6f2ce907c59eedc3d488a7f047a5f094bf925 # Parent
 11c808e562b894d84961cf00080173321e272884 quant: returns numSig
 instead of absSum and lastPos- packuswb m7, m7 - pxor m0, m0- psadbw
 m0, m7+ phaddd m7, m7 + phaddd m7, m7 old code have 2 uops, the new
 one have 6 uops LastPos output for IDCT bypass optimize, just didn't
upload
 it now.
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] add primitives.nquant for RDOQ

2014-07-02 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1404286661 -32400
#  Wed Jul 02 16:37:41 2014 +0900
# Node ID 3f25ca9b5addda057040a5e1a544b9ede9afc509
# Parent  a18972fd05b1d6242a881bef979b9e1ff17543d9
add primitives.nquant for RDOQ

diff -r a18972fd05b1 -r 3f25ca9b5add source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 01 14:58:35 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Jul 02 16:37:41 2014 +0900
@@ -508,23 +508,30 @@
 uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, 
coeff_t* dstCoeff, uint32_t trSize,
 TextType ttype, uint32_t absPartIdx, 
int32_t *lastPos)
 {
-x265_emms();
-selectLambda(ttype);
-
 const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
-uint32_t absSum = 0;
 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // 
Represents scaling through forward transform
-uint32_t goRiceParam = 0;
-double blockUncodedCost = 0;
 int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype;
 
 X265_CHECK(scalingListType  6, scaling list type out of range\n);
 
 int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; // Right shift 
of non-RDOQ quantizer;  level = (coeff*Q + offset)q_bits
 int add = (1  (qbits - 1));
-double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, 
m_qpParam.m_rem);
 int32_t *qCoef = getQuantCoeff(scalingListType, m_qpParam.m_rem, 
log2TrSize - 2);
 
+int numCoeff = 1  log2TrSize * 2;
+int scaledCoeff[32 * 32];
+uint32_t numSig = primitives.nquant(srcCoeff, qCoef, scaledCoeff, 
dstCoeff, qbits, add, numCoeff);
+
+X265_CHECK(numSig == primitives.count_nonzero(dstCoeff, numCoeff), numSig 
differ\n);
+if (numSig == 0)
+return 0;
+
+x265_emms();
+selectLambda(ttype);
+
+double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, 
m_qpParam.m_rem);
+
+double blockUncodedCost = 0;
 double costCoeff[32 * 32];
 double costSig[32 * 32];
 double costCoeff0[32 * 32];
@@ -544,6 +551,7 @@
 intc2= 0;
 double baseCost  = 0;
 intlastScanPos   = -1;
+uint32_t goRiceParam = 0;
 uint32_t c1Idx   = 0;
 uint32_t c2Idx   = 0;
 int cgLastScanPos= -1;
@@ -567,16 +575,13 @@
 //= quantization =
 uint32_t blkPos = codingParameters.scan[scanPos];
 // set coeff
-int Q = qCoef[blkPos];
 double scaleFactor = errScale[blkPos];
-int levelDouble= srcCoeff[blkPos];
-levelDouble= 
(int)std::minint64_t((int64_t)abs((int)levelDouble) * Q, MAX_INT - add);
 
-uint32_t maxAbsLevel = (levelDouble + add)  qbits;
+int levelDouble  = scaledCoeff[blkPos];
+uint32_t maxAbsLevel = abs(dstCoeff[blkPos]);
 
 costCoeff0[scanPos] = ((uint64_t)levelDouble * levelDouble) * 
scaleFactor;
 blockUncodedCost   += costCoeff0[scanPos];
-dstCoeff[blkPos]= maxAbsLevel;
 
 if (maxAbsLevel  0  lastScanPos  0)
 {
@@ -776,7 +781,7 @@
 //= estimate last position =
 if (lastScanPos  0)
 {
-return absSum;
+return 0;
 }
 
 double bestCost = 0;
@@ -840,6 +845,7 @@
 } // end if (sigCoeffGroupFlag[ cgBlkPos ])
 } // end for
 
+uint32_t absSum = 0;
 for (int pos = 0; pos  bestLastIdxp1; pos++)
 {
 int blkPos = codingParameters.scan[pos];
diff -r a18972fd05b1 -r 3f25ca9b5add source/common/dct.cpp
--- a/source/common/dct.cpp Tue Jul 01 14:58:35 2014 -0500
+++ b/source/common/dct.cpp Wed Jul 02 16:37:41 2014 +0900
@@ -780,10 +780,8 @@
 
 for (int blockpos = 0; blockpos  numCoeff; blockpos++)
 {
-int level;
-int sign;
-level = coef[blockpos];
-sign  = (level  0 ? -1 : 1);
+int level = coef[blockpos];
+int sign  = (level  0 ? -1 : 1);
 
 int tmplevel = abs(level) * quantCoeff[blockpos];
 level = ((tmplevel + add)  qBits);
@@ -798,6 +796,27 @@
 return acSum;
 }
 
+uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, 
int32_t* qCoef, int qBits, int add, int numCoeff)
+{
+uint32_t numSig = 0;
+
+for (int blockpos = 0; blockpos  numCoeff; blockpos++)
+{
+int level = coef[blockpos];
+int sign  = (level  0 ? -1 : 1);
+
+int tmplevel = abs(level) * quantCoeff[blockpos];
+scaledCoeff[blockpos] = tmplevel;
+level = ((tmplevel + add)  qBits);
+if (level)
+++numSig;
+level *= sign;
+qCoef[blockpos] = Clip3(-32768, 32767, level);
+}
+
+return numSig;
+}
+
 int  count_nonzero_c(const int32_t *quantCoeff, int numCoeff)
 {
 X265_CHECK(((intptr_t)quantCoeff  15) == 0, quant buffer not aligned\n);
@@ -822,6 +841,7

[x265] [PATCH 1 of 2] fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it

2014-07-01 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1404197088 -32400
#  Tue Jul 01 15:44:48 2014 +0900
# Node ID 0f21455078694344f7d3ed1e69c77217b48bb031
# Parent  38da32f28481f6acd17a9ab4f2e73ad057c54972
fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it

diff -r 38da32f28481 -r 0f2145507869 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jun 30 12:42:51 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 01 15:44:48 2014 +0900
@@ -508,6 +508,9 @@
 uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, 
coeff_t* dstCoeff, uint32_t trSize,
 TextType ttype, uint32_t absPartIdx, 
int32_t *lastPos)
 {
+x265_emms();
+selectLambda(ttype);
+
 const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
 uint32_t absSum = 0;
 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // 
Represents scaling through forward transform
diff -r 38da32f28481 -r 0f2145507869 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 30 12:42:51 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jul 01 15:44:48 2014 +0900
@@ -428,8 +428,6 @@
 
 int chFmt = cu-getChromaFormat();
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
-
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, 
TEXT_LUMA, absPartIdx, lastPos, useTransformSkip);
 
 //--- set coded block flag ---
@@ -515,8 +513,6 @@
 curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + 
cu-getSlice()-getSliceQpDeltaCr();
 }
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(ttype);
-
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, 
ttype, absPartIdx, lastPos, useTransformSkipC);
 
 //--- set coded block flag ---
@@ -905,7 +901,6 @@
 int lastPos = -1;
 
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, 
TEXT_LUMA, absPartIdx, lastPos, useTransformSkip);
 
 //--- set coded block flag ---
@@ -1421,8 +1416,6 @@
 curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCrQpOffset() + 
cu-getSlice()-getSliceQpDeltaCr();
 }
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(ttype);
-
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, 
tuSize, ttype, absPartIdxC, lastPos, useTransformSkipC);
 
 //--- set coded block flag ---
@@ -2702,13 +2695,11 @@
 
 cu-setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
 
-m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
-
 int16_t *curResiY = resiYuv-getLumaAddr(absPartIdx);
 const uint32_t strideResiY = resiYuv-m_width;
 const uint32_t strideResiC = resiYuv-m_cwidth;
 
+m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
 absSumY = m_trQuant-transformNxN(cu, curResiY, strideResiY, coeffCurY,
   trSize, TEXT_LUMA, absPartIdx, 
lastPosY, false, curuseRDOQ);
 
@@ -2746,13 +2737,11 @@
 
 int curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCbQpOffset() + 
cu-getSlice()-getSliceQpDeltaCb();
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(TEXT_CHROMA_U);
 absSumU = m_trQuant-transformNxN(cu, curResiU, strideResiC, 
coeffCurU + subTUBufferOffset,
   trSizeC, TEXT_CHROMA_U, 
absPartIdxC, lastPosU, false, curuseRDOQ);
 
 curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCrQpOffset() + 
cu-getSlice()-getSliceQpDeltaCr();
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(TEXT_CHROMA_V);
 absSumV = m_trQuant-transformNxN(cu, curResiV, strideResiC, 
coeffCurV + subTUBufferOffset,
   trSizeC, TEXT_CHROMA_V, 
absPartIdxC, lastPosV, false, curuseRDOQ);
 
@@ -2915,8 +2904,6 @@
 }
 
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
-
 absSum[TEXT_LUMA][0] = m_trQuant-transformNxN(cu, 
resiYuv-getLumaAddr

[x265] [PATCH 2 of 2] split rate calculation functions to luma and chroma to simplify luma path

2014-07-01 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1404197154 -32400
#  Tue Jul 01 15:45:54 2014 +0900
# Node ID ad4455ed3815dd826e46d93e2585747c58a2c858
# Parent  0f21455078694344f7d3ed1e69c77217b48bb031
split rate calculation functions to luma and chroma to simplify luma path

diff -r 0f2145507869 -r ad4455ed3815 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cppTue Jul 01 15:44:48 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncEntropy.cppTue Jul 01 15:45:54 2014 +0900
@@ -325,7 +325,7 @@
 }
 else
 {
-m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, 
cu-getTransformIdx(absPartIdx), absPartIdxStep, tuSize, tuSize, (subdiv == 0));
+m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, 
cu-getTransformIdx(absPartIdx));
 }
 
 if (cbfY || cbfU || cbfV)
@@ -342,7 +342,7 @@
 }
 if (cbfY)
 {
-m_entropyCoder-codeCoeffNxN(cu, (cu-getCoeffY() + offsetLuma), 
absPartIdx, tuSize, TEXT_LUMA);
+m_entropyCoder-codeCoeffNxN(cu, (cu-getCoeffY() + offsetLuma), 
absPartIdx, log2TrSize, TEXT_LUMA);
 }
 
 int chFmt = cu-getChromaFormat();
@@ -351,7 +351,7 @@
 uint32_t partNum = cu-getPic()-getNumPartInCU()  ((depth - 1) 
 1);
 if ((absPartIdx  (partNum - 1)) == (partNum - 1))
 {
-uint32_t trSizeC   = 1  log2TrSize;
+const uint32_t log2TrSizeC = 2;
 const bool splitIntoSubTUs = (chFmt == CHROMA_422);
 
 uint32_t curPartNum = cu-getPic()-getNumPartInCU()  
((depth - 1)  1);
@@ -364,10 +364,10 @@
 do
 {
 uint32_t cbf = 
cu-getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + 
splitIntoSubTUs);
-uint32_t subTUIndex = tuIterator.m_section * trSizeC * 
trSizeC;
 if (cbf)
 {
-m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + 
m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, 
(TextType)chromaId);
+uint32_t subTUOffset = tuIterator.m_section  
(log2TrSizeC * 2);
+m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + 
m_bakChromaOffset + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, 
(TextType)chromaId);
 }
 }
 while (isNextTUSection(tuIterator));
@@ -376,7 +376,7 @@
 }
 else
 {
-uint32_t trSizeC = tuSize  hChromaShift;
+uint32_t log2TrSizeC = log2TrSize - hChromaShift;
 const bool splitIntoSubTUs = (chFmt == CHROMA_422);
 uint32_t curPartNum = cu-getPic()-getNumPartInCU()  (depth  
1);
 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId = TEXT_CHROMA_V; 
chromaId++)
@@ -387,10 +387,10 @@
 do
 {
 uint32_t cbf = cu-getCbf(tuIterator.m_absPartIdxTURelCU, 
(TextType)chromaId, trIdx + splitIntoSubTUs);
-uint32_t subTUIndex = tuIterator.m_section * trSizeC * 
trSizeC;
 if (cbf)
 {
-m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + 
offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, 
(TextType)chromaId);
+uint32_t subTUOffset = tuIterator.m_section  
(log2TrSizeC * 2);
+m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + 
offsetChroma + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, 
(TextType)chromaId);
 }
 }
 while (isNextTUSection(tuIterator));
@@ -540,11 +540,6 @@
 m_entropyCoder-codeQtRootCbf(cu, absPartIdx);
 }
 
-void TEncEntropy::encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t 
trDepth)
-{
-m_entropyCoder-codeQtCbfZero(cu, ttype, trDepth);
-}
-
 void TEncEntropy::encodeQtRootCbfZero(TComDataCU* cu)
 {
 m_entropyCoder-codeQtRootCbfZero(cu);
@@ -593,11 +588,6 @@
 xEncodeTransform(cu, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, 
depth, cuSize, 0, bCodeDQP);
 }
 
-void TEncEntropy::encodeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t 
absPartIdx, uint32_t trSize, TextType ttype)
-{
-m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, trSize, ttype);
-}
-
 void TEncEntropy::estimateBit(estBitsSbacStruct* estBitsSBac, int trSize, 
TextType ttype)
 {
 ttype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA;
diff -r 0f2145507869 -r ad4455ed3815 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h  Tue Jul 01 15:44:48 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncEntropy.h  Tue Jul 01 15:45:54 2014 +0900
@@ -114,14 +114,24 @@
 
 void encodeTransformSubdivFlag(uint32_t symbol, uint32_t ctx);
 void encodeQtCbf

[x265] fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it

2014-06-27 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1403877807 -32400
#  Fri Jun 27 23:03:27 2014 +0900
# Node ID 77f443fe169ca23969df5d5ee6968543bfa5e794
# Parent  32aa6cc3cf4d108ac92f5d29258b2c38ca888d29
fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it

diff -r 32aa6cc3cf4d -r 77f443fe169c source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Fri Jun 27 23:03:27 2014 +0900
@@ -508,6 +508,9 @@
 uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, 
coeff_t* dstCoeff, uint32_t trSize,
 TextType ttype, uint32_t absPartIdx, 
int32_t *lastPos)
 {
+x265_emms();
+selectLambda(ttype);
+
 const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
 uint32_t absSum = 0;
 int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // 
Represents scaling through forward transform
diff -r 32aa6cc3cf4d -r 77f443fe169c source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jun 26 17:19:08 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Jun 27 23:03:27 2014 +0900
@@ -428,8 +428,6 @@
 
 int chFmt = cu-getChromaFormat();
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
-
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, 
TEXT_LUMA, absPartIdx, lastPos, useTransformSkip);
 
 //--- set coded block flag ---
@@ -515,8 +513,6 @@
 curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + 
cu-getSlice()-getSliceQpDeltaCr();
 }
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(ttype);
-
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, 
ttype, absPartIdx, lastPos, useTransformSkipC);
 
 //--- set coded block flag ---
@@ -905,7 +901,6 @@
 int lastPos = -1;
 
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, 
TEXT_LUMA, absPartIdx, lastPos, useTransformSkip);
 
 //--- set coded block flag ---
@@ -1421,8 +1416,6 @@
 curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCrQpOffset() + 
cu-getSlice()-getSliceQpDeltaCr();
 }
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(ttype);
-
 absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, 
tuSize, ttype, absPartIdxC, lastPos, useTransformSkipC);
 
 //--- set coded block flag ---
@@ -2702,13 +2695,11 @@
 
 cu-setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
 
-m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
-
 int16_t *curResiY = resiYuv-getLumaAddr(absPartIdx);
 const uint32_t strideResiY = resiYuv-m_width;
 const uint32_t strideResiC = resiYuv-m_cwidth;
 
+m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
 absSumY = m_trQuant-transformNxN(cu, curResiY, strideResiY, coeffCurY,
   trSize, TEXT_LUMA, absPartIdx, 
lastPosY, false, curuseRDOQ);
 
@@ -2746,13 +2737,11 @@
 
 int curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCbQpOffset() + 
cu-getSlice()-getSliceQpDeltaCb();
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(TEXT_CHROMA_U);
 absSumU = m_trQuant-transformNxN(cu, curResiU, strideResiC, 
coeffCurU + subTUBufferOffset,
   trSizeC, TEXT_CHROMA_U, 
absPartIdxC, lastPosU, false, curuseRDOQ);
 
 curChromaQpOffset = 
cu-getSlice()-getPPS()-getChromaCrQpOffset() + 
cu-getSlice()-getSliceQpDeltaCr();
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, 
cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt);
-m_trQuant-selectLambda(TEXT_CHROMA_V);
 absSumV = m_trQuant-transformNxN(cu, curResiV, strideResiC, 
coeffCurV + subTUBufferOffset,
   trSizeC, TEXT_CHROMA_V, 
absPartIdxC, lastPosV, false, curuseRDOQ);
 
@@ -2915,8 +2904,6 @@
 }
 
 m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, 
chFmt);
-m_trQuant-selectLambda(TEXT_LUMA);
-
 absSum[TEXT_LUMA][0] = m_trQuant-transformNxN(cu, 
resiYuv-getLumaAddr

[x265] cli: add --ipratio and --pbratio

2014-06-26 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1403706071 -32400
#  Wed Jun 25 23:21:11 2014 +0900
# Node ID 3ca045895945f0afc6b4d1b1868feb00382796a3
# Parent  09450ac6dc7d0f495582bf327488612755df1719
cli: add --ipratio and --pbratio

diff -r 09450ac6dc7d -r 3ca045895945 source/common/param.cpp
--- a/source/common/param.cpp   Tue Jun 24 15:41:55 2014 +0900
+++ b/source/common/param.cpp   Wed Jun 25 23:21:11 2014 +0900
@@ -633,6 +633,8 @@
 OPT(aud) p-bEnableAccessUnitDelimiters = atobool(value);
 OPT(b-pyramid) p-bBPyramid = atobool(value);
 OPT(hrd) p-bEmitHRDSEI = atobool(value);
+OPT2(ipratio, ip-factor) p-rc.ipFactor = atof(value);
+OPT2(pbratio, pb-factor) p-rc.pbFactor = atof(value);
 OPT(aq-mode) p-rc.aqMode = atoi(value);
 OPT(aq-strength) p-rc.aqStrength = atof(value);
 OPT(vbv-maxrate) p-rc.vbvMaxBitrate = atoi(value);
diff -r 09450ac6dc7d -r 3ca045895945 source/x265.cpp
--- a/source/x265.cpp   Tue Jun 24 15:41:55 2014 +0900
+++ b/source/x265.cpp   Wed Jun 25 23:21:11 2014 +0900
@@ -142,6 +142,8 @@
 { qp, required_argument, NULL, 'q' },
 { aq-mode,required_argument, NULL, 0 },
 { aq-strength,required_argument, NULL, 0 },
+{ ipratio,required_argument, NULL, 0 },
+{ pbratio,required_argument, NULL, 0 },
 { cbqpoffs,   required_argument, NULL, 0 },
 { crqpoffs,   required_argument, NULL, 0 },
 { rd, required_argument, NULL, 0 },
@@ -394,6 +396,8 @@
 H0(   --aq-mode integer   Mode for Adaptive Quantization - 
0:none 1:uniform AQ 2:auto variance. Default %d\n, param-rc.aqMode);
 H0(   --aq-strength float Reduces blocking and blurring in flat 
and textured areas.(0 to 3.0). Default %f\n, param-rc.aqStrength);
 H0(   --[no-]cutree Enable cutree for Adaptive 
Quantization. Default %s\n, OPT(param-rc.cuTree));
+H0(   --ipratio float QP factor between I and P. Default 
%f\n, param-rc.ipFactor);
+H0(   --pbratio float QP factor between P and B. Default 
%f\n, param-rc.pbFactor);
 H0(   --cbqpoffs integer  Chroma Cb QP Offset. Default %d\n, 
param-cbQpOffset);
 H0(   --crqpoffs integer  Chroma Cr QP Offset. Default %d\n, 
param-crQpOffset);
 H0(   --[no-]hrdEnable HRD parameters signalling. 
Default %s\n, OPT(param-bEmitHRDSEI));
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] RDOQ enabled rdLevel changed

2014-06-26 Thread Satoshi Nakagawa

Is this change intended?

 # HG changeset patch
 # User Sumalatha Polureddysumala...@multicorewareinc.com
 # Date 1403689018 -19800
 #  Wed Jun 25 15:06:58 2014 +0530
 # Node ID e2ed009d296af39926d79f1a245974f158d6861a
 # Parent  5797d6a8197c96b68752705167ced6cb63194013
 psy-rd: implement psy-rd in rdlevel=4,3 and 2

 diff -r 5797d6a8197c -r e2ed009d296a source/encoder/encoder.cpp
 --- a/source/encoder/encoder.cpp  Wed Jun 25 18:21:34 2014 +0530
 +++ b/source/encoder/encoder.cpp  Wed Jun 25 15:06:58 2014 +0530
 @@ -1267,7 +1267,7 @@
  }
  
  // disable RDOQ if psy-rd is enabled; until we make it psy-aware
 -m_bEnableRDOQ = p-psyRd == 0.0  p-rdLevel = 4;
 +m_bEnableRDOQ = p-psyRd == 0.0  p-rdLevel = 1;
  
  if (p-bLossless)
  {
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 1 of 2] fix xGetIntraBitsQTChroma() for 4:2:2 [CHANGES OUTPUT 4:2:2 with tskip]

2014-06-24 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1403592115 -32400
#  Tue Jun 24 15:41:55 2014 +0900
# Node ID 3af58371c5ff95fc838db106610423f2c0ee8265
# Parent  18f936182df50cc5126d1707cd7c2b5fef289ccb
fix xGetIntraBitsQTChroma() for 4:2:2 [CHANGES OUTPUT 4:2:2 with tskip]

diff -r 18f936182df5 -r 3af58371c5ff source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp   Mon Jun 23 17:03:49 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp   Tue Jun 24 15:41:55 2014 +0900
@@ -2116,7 +2116,7 @@
 uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize);
 
 #if CHECKED_BUILD || _DEBUG
-X265_CHECK(numSig  0, cbf check fail);
+X265_CHECK(numSig  0, cbf check fail\n);
 #endif
 
 bool beValid;
diff -r 18f936182df5 -r 3af58371c5ff source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 23 17:03:49 2014 -0700
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:41:55 2014 +0900
@@ -254,7 +254,8 @@
 trDepth--;
 uint32_t qpdiv = cu-getPic()-getNumPartInCU()  ((cu-getDepth(0) + 
trDepth)  1);
 bool bFirstQ = ((absPartIdx  (qpdiv - 1)) == 0);
-if (!bFirstQ)
+bool bSecondQ = (chFmt == CHROMA_422  splitIntoSubTUs) ? 
((absPartIdx  (qpdiv - 1)) == 2) : false;
+if ((!bFirstQ)  (!bSecondQ))
 {
 return;
 }
@@ -1222,7 +1223,7 @@
 }
 else
 {
-uint32_t bitsTmp = xGetIntraBitsQTChroma(cu, 
trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
+uint32_t bitsTmp = singleCbfCTmp ? 
xGetIntraBitsQTChroma(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs) : 0;
 if (m_rdCost-psyRdEnabled())
 {
 uint32_t zorder = cu-getZorderIdxInCU() + 
absPartIdxC;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 2 of 2] refine intra tskip related.

2014-06-24 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1403592156 -32400
#  Tue Jun 24 15:42:36 2014 +0900
# Node ID ed2786407c46be823515c78cf23d7e0f32ee10fc
# Parent  3af58371c5ff95fc838db106610423f2c0ee8265
refine intra tskip related.

diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jun 24 15:42:36 2014 +0900
@@ -197,21 +197,6 @@
 primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), 
getStride());
 }
 
-void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, 
uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs)
-{
-X265_CHECK(chromaId == 1 || chromaId == 2, invalid chroma id);
-
-int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : 
partitionFromSize(lumaSize);
-
-pixel*   src = getChromaAddr(chromaId, partIdx);
-int16_t* dst = dstPicYuv-getChromaAddr(chromaId, partIdx);
-
-uint32_t srcstride = getCStride();
-uint32_t dststride = dstPicYuv-m_cwidth;
-
-primitives.chroma[m_csp].copy_ps[part](dst, dststride, src, srcstride);
-}
-
 void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
 {
 int part = partitionFromSize(partSize);
diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h   Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibCommon/TComYuv.h   Tue Jun 24 15:42:36 2014 +0900
@@ -131,7 +131,6 @@
 voidcopyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t 
width, uint32_t height, bool bLuma, bool bChroma);
 
 voidcopyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t 
lumaSize);
-voidcopyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, 
uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs);
 
 // 
--
 //  Algebraic operation for YUV buffer
diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:41:55 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:42:36 2014 +0900
@@ -57,9 +57,6 @@
 m_qtTempCoeff[2] = NULL;
 m_qtTempTrIdx = NULL;
 m_qtTempShortYuv = NULL;
-m_qtTempTUCoeff[0] = NULL;
-m_qtTempTUCoeff[1] = NULL;
-m_qtTempTUCoeff[2] = NULL;
 for (int i = 0; i  3; i++)
 {
 m_qtTempTransformSkipFlag[i] = NULL;
@@ -83,14 +80,12 @@
 m_qtTempShortYuv[i].destroy();
 }
 
-X265_FREE(m_qtTempTUCoeff[0]);
 X265_FREE(m_qtTempTrIdx);
 X265_FREE(m_qtTempCbf[0]);
 X265_FREE(m_qtTempTransformSkipFlag[0]);
 
 delete[] m_qtTempCoeff[0];
 delete[] m_qtTempShortYuv;
-m_qtTempTransformSkipYuv.destroy();
 }
 
 bool TEncSearch::init(Encoder* top, RDCost* rdCost, TComTrQuant* trQuant)
@@ -133,11 +128,7 @@
 m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + 
numPartitions;
 m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + 
numPartitions * 2;
 
-CHECKED_MALLOC(m_qtTempTUCoeff[0], coeff_t, MAX_CU_SIZE * MAX_CU_SIZE * 3);
-m_qtTempTUCoeff[1] = m_qtTempTUCoeff[0] + MAX_CU_SIZE * MAX_CU_SIZE;
-m_qtTempTUCoeff[2] = m_qtTempTUCoeff[0] + MAX_CU_SIZE * MAX_CU_SIZE * 2;
-
-return m_qtTempTransformSkipYuv.create(g_maxCUSize, g_maxCUSize, 
m_param-internalCsp);
+return true;
 
 fail:
 return false;
@@ -224,7 +215,7 @@
 }
 }
 
-void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t 
absPartIdx, TextType ttype, const bool splitIntoSubTUs)
+void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t 
absPartIdx, TextType ttype)
 {
 if (!cu-getCbf(absPartIdx, ttype, trDepth))
 return;
@@ -238,7 +229,7 @@
 uint32_t qtPartNum = cu-getPic()-getNumPartInCU()  ((fullDepth + 
1)  1);
 for (uint32_t part = 0; part  4; part++)
 {
-xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype, 
splitIntoSubTUs);
+xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
 }
 
 return;
@@ -254,8 +245,7 @@
 trDepth--;
 uint32_t qpdiv = cu-getPic()-getNumPartInCU()  ((cu-getDepth(0) + 
trDepth)  1);
 bool bFirstQ = ((absPartIdx  (qpdiv - 1)) == 0);
-bool bSecondQ = (chFmt == CHROMA_422  splitIntoSubTUs) ? 
((absPartIdx  (qpdiv - 1)) == 2) : false;
-if ((!bFirstQ)  (!bSecondQ))
+if (!bFirstQ)
 {
 return;
 }
@@ -267,7 +257,6 @@
 int cspy = chroma ? m_vChromaShift : 0;
 uint32_t width = cu-getCUSize(0)  (trDepth + cspx);
 uint32_t height = cu-getCUSize(0)  (trDepth + cspy);
-height = splitIntoSubTUs ? height  1 : height;
 uint32_t coeffOffset = absPartIdx  (cu-getPic()-getLog2UnitSize() * 2 
- (cspx + cspy

[x265] input: fix race condition

2014-06-21 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1403343771 -32400
#  Sat Jun 21 18:42:51 2014 +0900
# Node ID 31e1104b97521bde2abe64a3f91d63e673f95c90
# Parent  fe370292c232ec9a629d191791271b71c1c6f354
input: fix race condition

diff -r fe370292c232 -r 31e1104b9752 source/input/y4m.cpp
--- a/source/input/y4m.cpp  Fri Jun 20 16:41:11 2014 -0700
+++ b/source/input/y4m.cpp  Sat Jun 21 18:42:51 2014 +0900
@@ -390,7 +390,6 @@
 /* open the throttle at the end, allow reader to consume
  * remaining valid queue entries */
 threadActive = false;
-frameStat[tail.get()] = false;
 tail.set(QUEUE_SIZE);
 }
 
@@ -406,7 +405,7 @@
 {
 curTail = tail.waitForChange(curTail);
 if (!threadActive)
-return false;
+break;
 }
 
 #else
@@ -417,6 +416,7 @@
 
 if (!frameStat[curHead])
 return false;
+frameStat[curHead] = false;
 
 pic.bitDepth = depth;
 pic.colorSpace = colorSpace;
diff -r fe370292c232 -r 31e1104b9752 source/input/yuv.cpp
--- a/source/input/yuv.cpp  Fri Jun 20 16:41:11 2014 -0700
+++ b/source/input/yuv.cpp  Sat Jun 21 18:42:51 2014 +0900
@@ -182,7 +182,6 @@
 }
 
 threadActive = false;
-frameStat[tail.get()] = false;
 tail.set(QUEUE_SIZE);
 }
 
@@ -218,7 +217,7 @@
 {
 curTail = tail.waitForChange(curTail);
 if (!threadActive)
-return false;
+break;
 }
 
 #else
@@ -229,6 +228,7 @@
 
 if (!frameStat[curHead])
 return false;
+frameStat[curHead] = false;
 
 pic.colorSpace = colorSpace;
 pic.bitDepth = depth;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] input: fix race condition

2014-06-21 Thread Satoshi Nakagawa
When eof detected, threadMain may overwrite frameStat[tail.get()] = false
before read, no head check here.
So, I moved frameStat[] = false to consumer.

 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Sunday, June 22, 2014 1:29 AM
 To: Development for x265
 Subject: Re: [x265] input: fix race condition
 
 On Sat, Jun 21, 2014 at 4:45 AM, Satoshi Nakagawa nakagawa...@oki.com
 wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403343771 -32400
  #  Sat Jun 21 18:42:51 2014 +0900
  # Node ID 31e1104b97521bde2abe64a3f91d63e673f95c90
  # Parent  fe370292c232ec9a629d191791271b71c1c6f354
  input: fix race condition
 
 Can you describe the bug this fixes, is there a network filesystem
 involved?
 
  diff -r fe370292c232 -r 31e1104b9752 source/input/y4m.cpp
  --- a/source/input/y4m.cpp  Fri Jun 20 16:41:11 2014 -0700
  +++ b/source/input/y4m.cpp  Sat Jun 21 18:42:51 2014 +0900
  @@ -390,7 +390,6 @@
   /* open the throttle at the end, allow reader to consume
* remaining valid queue entries */
   threadActive = false;
  -frameStat[tail.get()] = false;
   tail.set(QUEUE_SIZE);
   }
 
  @@ -406,7 +405,7 @@
   {
   curTail = tail.waitForChange(curTail);
   if (!threadActive)
  -return false;
  +break;
   }
 
   #else
  @@ -417,6 +416,7 @@
 
   if (!frameStat[curHead])
   return false;
  +frameStat[curHead] = false;
 
   pic.bitDepth = depth;
   pic.colorSpace = colorSpace;
  diff -r fe370292c232 -r 31e1104b9752 source/input/yuv.cpp
  --- a/source/input/yuv.cpp  Fri Jun 20 16:41:11 2014 -0700
  +++ b/source/input/yuv.cpp  Sat Jun 21 18:42:51 2014 +0900
  @@ -182,7 +182,6 @@
   }
 
   threadActive = false;
  -frameStat[tail.get()] = false;
   tail.set(QUEUE_SIZE);
   }
 
  @@ -218,7 +217,7 @@
   {
   curTail = tail.waitForChange(curTail);
   if (!threadActive)
  -return false;
  +break;
   }
 
   #else
  @@ -229,6 +228,7 @@
 
   if (!frameStat[curHead])
   return false;
  +frameStat[curHead] = false;
 
   pic.colorSpace = colorSpace;
   pic.bitDepth = depth;
  ___
  x265-devel mailing list
  x265-devel@videolan.org
  https://mailman.videolan.org/listinfo/x265-devel
 
 
 
 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] keep TComPic until next call when recpic is exported as pic_out

2014-06-19 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1403183616 -32400
#  Thu Jun 19 22:13:36 2014 +0900
# Node ID 17b083a024a2ed2b209cc0e8f1fbd3cf90956bd5
# Parent  d2a13e8541f45fcd4c7d554e0b7c89a82f2a0ff9
keep TComPic until next call when recpic is exported as pic_out

diff -r d2a13e8541f4 -r 17b083a024a2 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppThu Jun 19 10:28:47 2014 +0530
+++ b/source/encoder/encoder.cppThu Jun 19 22:13:36 2014 +0900
@@ -59,6 +59,7 @@
 m_frameEncoder = NULL;
 m_rateControl = NULL;
 m_dpb = NULL;
+m_exportedPic = NULL;
 m_nals = NULL;
 m_packetData = NULL;
 m_outputCount = 0;
@@ -132,6 +133,12 @@
 
 void Encoder::destroy()
 {
+if (m_exportedPic)
+{
+ATOMIC_DEC(m_exportedPic-m_countRefEncoders);
+m_exportedPic = NULL;
+}
+
 if (m_frameEncoder)
 {
 for (int i = 0; i  m_totalFrameThreads; i++)
@@ -255,6 +262,13 @@
 if (m_aborted)
 return -1;
 
+if (m_exportedPic)
+{
+ATOMIC_DEC(m_exportedPic-m_countRefEncoders);
+m_exportedPic = NULL;
+m_dpb-recycleUnreferenced();
+}
+
 if (pic_in)
 {
 if (pic_in-colorSpace != m_param-internalCsp)
@@ -434,8 +448,14 @@
 finishFrameStats(out, curEncoder, bits);
 
 // Allow this frame to be recycled if no frame encoders are using it 
for reference
-ATOMIC_DEC(out-m_countRefEncoders);
-m_dpb-recycleUnreferenced();
+if (!pic_out)
+{
+ATOMIC_DEC(out-m_countRefEncoders);
+m_dpb-recycleUnreferenced();
+}
+else
+m_exportedPic = out;
+
 ret = 1;
 }
 
diff -r d2a13e8541f4 -r 17b083a024a2 source/encoder/encoder.h
--- a/source/encoder/encoder.h  Thu Jun 19 10:28:47 2014 +0530
+++ b/source/encoder/encoder.h  Thu Jun 19 22:13:36 2014 +0900
@@ -87,6 +87,8 @@
 FrameEncoder*  m_frameEncoder;
 DPB*   m_dpb;
 
+TComPic*   m_exportedPic;
+
 /* frame parallelism */
 intm_curEncoder;
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix: TOPSKIP refers outside of picture [OUTPUT CHANGE]

2014-06-16 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1402906984 -32400
#  Mon Jun 16 17:23:04 2014 +0900
# Node ID 2ece20b5e178bfe66da9ca6d37cb80454bc23f36
# Parent  e69a427e461f8c8944b68323a3d77295b65ec779
fix: TOPSKIP refers outside of picture [OUTPUT CHANGE]

diff -r e69a427e461f -r 2ece20b5e178 source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h   Thu Jun 12 22:53:47 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.h   Mon Jun 16 17:23:04 2014 +0900
@@ -159,7 +159,7 @@
 void finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
 void xCompressCU(TComDataCU* outBestCU, TComDataCU* outTempCU, uint32_t 
depth, bool bInsidePicture, PartSize parentSize = SIZE_NONE);
 void xCompressIntraCU(TComDataCU* outBestCU, TComDataCU* outTempCU, 
uint32_t depth, bool bInsidePicture);
-void xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, 
TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t partitionIndex, 
uint8_t minDepth);
+void xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, 
TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t partitionIndex, 
uint8_t minDepth);
 void xEncodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool 
bInsidePicture);
 void xCheckBestMode(TComDataCU* outBestCU, TComDataCU* outTempCU, 
uint32_t depth);
 
diff -r e69a427e461f -r 2ece20b5e178 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp   Thu Jun 12 22:53:47 2014 -0500
+++ b/source/encoder/compress.cpp   Mon Jun 16 17:23:04 2014 +0900
@@ -338,25 +338,30 @@
 }
 }
 
-void TEncCu::xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, 
TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t PartitionIndex, 
uint8_t minDepth)
+void TEncCu::xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, 
TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t PartitionIndex, 
uint8_t minDepth)
 {
 TComPic* pic = outTempCU-getPic();
+uint32_t absPartIdx = outTempCU-getZorderIdxInCU();
 
 if (depth == 0)
 {
 // get original YUV data from picture
-m_origYuv[depth]-copyFromPicYuv(pic-getPicYuvOrg(), 
outTempCU-getAddr(), outTempCU-getZorderIdxInCU());
+m_origYuv[depth]-copyFromPicYuv(pic-getPicYuvOrg(), 
outTempCU-getAddr(), absPartIdx);
 }
 else
 {
 // copy partition YUV from depth 0 CTU cache
-m_origYuv[0]-copyPartToYuv(m_origYuv[depth], 
outTempCU-getZorderIdxInCU());
+m_origYuv[0]-copyPartToYuv(m_origYuv[depth], absPartIdx);
 }
 
 // variables for fast encoder decision
 bool bSubBranch = true;
 int qp = outTempCU-getQP(0);
 
+#if TOPSKIP
+bool bInsidePictureParent = bInsidePicture;
+#endif
+
 TComSlice* slice = outTempCU-getSlice();
 if (!bInsidePicture)
 {
@@ -375,7 +380,7 @@
 // We need to split, so don't try these modes.
 TComYuv* tempYuv = NULL;
 #if TOPSKIP
-if (depth == 0)
+if (bInsidePicture  !bInsidePictureParent)
 {
 TComDataCU* colocated0 = slice-getNumRefIdx(REF_PIC_LIST_0)  0 ? 
slice-getRefPic(REF_PIC_LIST_0, 0)-getCU(outTempCU-getAddr()) : NULL;
 TComDataCU* colocated1 = slice-getNumRefIdx(REF_PIC_LIST_1)  0 ? 
slice-getRefPic(REF_PIC_LIST_1, 0)-getCU(outTempCU-getAddr()) : NULL;
@@ -383,19 +388,21 @@
 char previousQP = colocated0-getQP(0);
 uint8_t delta = 0, minDepth0 = 4, minDepth1 = 4;
 uint32_t sum0 = 0, sum1 = 0;
-for (uint32_t i = 0; i  outTempCU-getTotalNumPart(); i = i + 4)
+uint32_t numPartitions = outTempCU-getTotalNumPart();
+for (uint32_t i = 0; i  numPartitions; i = i + 4)
 {
-if (colocated0  colocated0-getDepth(i)  minDepth0)
-minDepth0 = colocated0-getDepth(i);
-if (colocated1  colocated1-getDepth(i)  minDepth1)
-minDepth1 = colocated1-getDepth(i);
+uint32_t j = absPartIdx + i;
+if (colocated0  colocated0-getDepth(j)  minDepth0)
+minDepth0 = colocated0-getDepth(j);
+if (colocated1  colocated1-getDepth(j)  minDepth1)
+minDepth1 = colocated1-getDepth(j);
 if (colocated0)
-sum0 += (colocated0-getDepth(i) * 4);
+sum0 += (colocated0-getDepth(j) * 4);
 if (colocated1)
-sum1 += (colocated1-getDepth(i) * 4);
+sum1 += (colocated1-getDepth(j) * 4);
 }
 
-uint32_t avgDepth2 = (sum0 + sum1) / outTempCU-getTotalNumPart();
+uint32_t avgDepth2 = (sum0 + sum1) / numPartitions;
 minDepth = X265_MIN(minDepth0, minDepth1);
 if (((currentQP - previousQP)  0) || (((currentQP - previousQP) = 0) 
 ((avgDepth2 - 2 * minDepth)  1)))
 delta = 0;
@@ -686,7 +693,7 @@
 
 /* Copy Yuv data to picture Yuv */
 if (m_param-rdLevel != 0)
-xCopyYuv2Pic(pic, outBestCU

[x265] Encoder::encode(): don't return 0 while flushing.

2014-06-16 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1402983802 -32400
#  Tue Jun 17 14:43:22 2014 +0900
# Node ID cf222575f2032ff5824d58470d63561f79c3905e
# Parent  f25ed861850950d2bd31c51dbc9267333515af88
Encoder::encode(): don't return 0 while flushing.

diff -r f25ed8618509 -r cf222575f203 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Jun 16 20:32:13 2014 -0500
+++ b/source/encoder/encoder.cppTue Jun 17 14:43:22 2014 +0900
@@ -481,6 +481,11 @@
 curEncoder-m_enable.trigger();
 }
 
+if (!ret  flush  fenc)
+{
+return encode(flush, NULL, pic_out, nalunits);
+}
+
 return ret;
 }
 
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] TComTrQuant: lambda for each Cb and Cr

2014-06-13 Thread Satoshi Nakagawa
It is not intended.
Please tell me the configuration.


 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Saturday, June 14, 2014 8:56 AM
 To: Development for x265
 Subject: Re: [x265] TComTrQuant: lambda for each Cb and Cr
 
 On Tue, Jun 10, 2014 at 11:26 AM, Steve Borho st...@borho.org wrote:
  On Sat, Jun 7, 2014 at 9:01 PM, Satoshi Nakagawa nakagawa...@oki.com
 wrote:
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1402192642
  -32400
  #  Sun Jun 08 10:57:22 2014 +0900
  # Node ID 188e115f07427c759f47154a864467be21b5b6a1
  # Parent  e5656f1e190453efa84732269b259a6dee608ff9
  TComTrQuant: lambda for each Cb and Cr
 
  Queued for testing.  This commit message only describes about a third
  of what this patch does, but it does look like it shouldn't change
  outputs if the chroma weights are not changed from the default.
 
 Our testing showed this commit (or perhaps the tskip changes, since they
 were tested together) changed the encoder outputs.  Was it intentional
 to change outputs with default chroma weights?
 
 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix: emms issue

2014-06-10 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1402449456 -32400
#  Wed Jun 11 10:17:36 2014 +0900
# Node ID 40ad5bf953cd6b80d97aba803f321ba655a388f7
# Parent  d0bacf50eb951fe5f91e419072399b3dae8926d9
fix: emms issue

diff -r d0bacf50eb95 -r 40ad5bf953cd source/encoder/compress.cpp
--- a/source/encoder/compress.cpp   Tue Jun 10 18:37:26 2014 -0500
+++ b/source/encoder/compress.cpp   Wed Jun 11 10:17:36 2014 +0900
@@ -378,6 +378,8 @@
 #if TOPSKIP
 if (depth == 0)
 {
+x265_emms();
+
 TComDataCU* colocated0 = slice-getNumRefIdx(REF_PIC_LIST_0)  0 ? 
slice-getRefPic(REF_PIC_LIST_0, 0)-getCU(outTempCU-getAddr()) : NULL;
 TComDataCU* colocated1 = slice-getNumRefIdx(REF_PIC_LIST_1)  0 ? 
slice-getRefPic(REF_PIC_LIST_1, 0)-getCU(outTempCU-getAddr()) : NULL;
 char currentQP = outTempCU-getQP(0);
@@ -648,6 +650,8 @@
 if (outBestCU != 0)
 #endif
 {
+x265_emms();
+
 uint64_t totalCostNeigh = 0, totalCostCU = 0, totalCountNeigh = 0, 
totalCountCU = 0;
 double avgCost = 0;
 TComDataCU* above = outTempCU-getCUAbove();
@@ -836,6 +840,8 @@
 xCopyYuv2Pic(pic, outBestCU-getAddr(), 
outBestCU-getZorderIdxInCU(), depth);
 }
 
+x265_emms();
+
 if (!bInsidePicture) return;
 
 /* Assert if Best prediction mode is NONE
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] TComTrQuant: lambda for each Cb and Cr

2014-06-07 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1402192642 -32400
#  Sun Jun 08 10:57:22 2014 +0900
# Node ID 188e115f07427c759f47154a864467be21b5b6a1
# Parent  e5656f1e190453efa84732269b259a6dee608ff9
TComTrQuant: lambda for each Cb and Cr

diff -r e5656f1e1904 -r 188e115f0742 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Jun 05 22:45:25 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Sun Jun 08 10:57:22 2014 +0900
@@ -262,49 +262,35 @@
 }
 
 uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, coeff_t* qCoef, 
int trSize,
- TextType ttype, uint32_t absPartIdx, int32_t 
*lastPos, bool curUseRDOQ)
+ TextType ttype, uint32_t absPartIdx, int32_t 
*lastPos)
 {
-uint32_t acSum = 0;
-int add = 0;
-bool useRDOQ = m_useRDOQ  curUseRDOQ;
+const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
+TUEntropyCodingParameters codingParameters;
+getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, log2TrSize, 
ttype);
+int deltaU[32 * 32];
 
-if (useRDOQ)
+int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype;
+X265_CHECK(scalingListType  6, scaling list type out of range\n);
+int32_t *quantCoeff = getQuantCoeff(scalingListType, m_qpParam.m_rem, 
log2TrSize - 2);
+
+int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // 
Represents scaling through forward transform
+
+int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift;
+int add = (cu-getSlice()-getSliceType() == I_SLICE ? 171 : 85)  (qbits 
- 9);
+
+int numCoeff = 1  log2TrSize * 2;
+uint32_t acSum = primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, 
add, numCoeff, lastPos);
+
+if (acSum = 2  cu-getSlice()-getPPS()-getSignHideFlag())
 {
-acSum = xRateDistOptQuant(cu, coef, qCoef, trSize, ttype, absPartIdx, 
lastPos);
-}
-else
-{
-const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
-TUEntropyCodingParameters codingParameters;
-getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, 
log2TrSize, ttype);
-int deltaU[32 * 32];
-
-int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype;
-X265_CHECK(scalingListType  6, scaling list type out of range\n);
-int32_t *quantCoeff = 0;
-quantCoeff = getQuantCoeff(scalingListType, m_qpParam.m_rem, 
log2TrSize - 2);
-
-int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; 
// Represents scaling through forward transform
-
-int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift;
-add = (cu-getSlice()-getSliceType() == I_SLICE ? 171 : 85)  (qbits 
- 9);
-
-int numCoeff = 1  log2TrSize * 2;
-acSum += primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, add, 
numCoeff, lastPos);
-
-if (cu-getSlice()-getPPS()-getSignHideFlag()  acSum = 2)
-{
-signBitHidingHDQ(qCoef, coef, deltaU, codingParameters);
-}
+signBitHidingHDQ(qCoef, coef, deltaU, codingParameters);
 }
 return acSum;
 }
 
-void TComTrQuant::init(uint32_t maxTrSize, bool useRDOQ, bool 
useTransformSkipFast)
+void TComTrQuant::init(bool useRDOQ)
 {
-m_maxTrSize= maxTrSize;
-m_useRDOQ  = useRDOQ;
-m_useTransformSkipFast = useTransformSkipFast;
+m_useRDOQ = useRDOQ;
 }
 
 uint32_t TComTrQuant::transformNxN(TComDataCU* cu,
@@ -363,7 +349,12 @@
 }
 }
 }
-return xQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos, 
curUseRDOQ);
+
+if (m_useRDOQ  curUseRDOQ)
+{
+return xRateDistOptQuant(cu, m_tmpCoeff, coeff, trSize, ttype, 
absPartIdx, lastPos);
+}
+return xQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos);
 }
 
 void TComTrQuant::invtransformNxN(bool transQuantBypass, uint32_t mode, 
int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t trSize, int 
scalingListType, bool useTransformSkip, int lastPos)
@@ -525,10 +516,9 @@
 X265_CHECK(scalingListType  6, scaling list type out of range\n);
 
 int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; // Right shift 
of non-RDOQ quantizer;  level = (coeff*Q + offset)q_bits
-double *errScaleOrg = getErrScaleCoeff(scalingListType, log2TrSize - 2, 
m_qpParam.m_rem);
-int32_t *qCoefOrg = getQuantCoeff(scalingListType, m_qpParam.m_rem, 
log2TrSize - 2);
-int32_t *qCoef = qCoefOrg;
-double *errScale = errScaleOrg;
+int add = (1  (qbits - 1));
+double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, 
m_qpParam.m_rem);
+int32_t *qCoef = getQuantCoeff(scalingListType, m_qpParam.m_rem, 
log2TrSize - 2);
 
 double costCoeff[32 * 32];
 double costSig[32 * 32];
@@ -575,9 +565,9 @@
 int Q = qCoef[blkPos];
 double scaleFactor = errScale[blkPos];
 int

Re: [x265] fix: Bus error

2014-06-04 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1401861502 -32400
#  Wed Jun 04 14:58:22 2014 +0900
# Node ID dbddb764220b340044f25bd47a07949a77b1b827
# Parent  f2479eb454b0ef50e50a7df5ab877ced2cfe5db6
fix: uninitialized read m_totalFrameThreads

diff -r f2479eb454b0 -r dbddb764220b source/Lib/TLibEncoder/NALwrite.cpp
--- a/source/Lib/TLibEncoder/NALwrite.cpp   Tue Jun 03 15:24:22 2014 -0500
+++ b/source/Lib/TLibEncoder/NALwrite.cpp   Wed Jun 04 14:58:22 2014 +0900
@@ -108,7 +108,7 @@
 }
 
 uint32_t i = packetSize;
-out = (uint8_t*)realloc(out, nalsize + 4);
+out = (uint8_t*)realloc(out, packetSize + nalsize + 4);
 memcpy(out + packetSize, emulation, nalsize);
 packetSize += nalsize;
 
diff -r f2479eb454b0 -r dbddb764220b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppTue Jun 03 15:24:22 2014 -0500
+++ b/source/encoder/encoder.cppWed Jun 04 14:58:22 2014 +0900
@@ -174,6 +174,7 @@
 
 void Encoder::init()
 {
+m_totalFrameThreads = param-frameNumThreads;
 if (m_frameEncoder)
 {
 int numRows = (param-sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
@@ -189,7 +190,6 @@
 m_rateControl-init(m_frameEncoder[0].m_sps);
 m_lookahead-init();
 m_encodeStartTime = x265_mdate();
-m_totalFrameThreads = param-frameNumThreads;
 }
 
 int Encoder::getStreamHeaders(NALUnitEBSP **nalunits)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] fix: Bus error

2014-06-03 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1401859296 -32400
#  Wed Jun 04 14:21:36 2014 +0900
# Node ID f256e982b79c0c1a32810500474fa781ee814ac9
# Parent  f2479eb454b0ef50e50a7df5ab877ced2cfe5db6
fix: Bus error

diff -r f2479eb454b0 -r f256e982b79c source/Lib/TLibCommon/SEI.h
--- a/source/Lib/TLibCommon/SEI.h   Tue Jun 03 15:24:22 2014 -0500
+++ b/source/Lib/TLibCommon/SEI.h   Wed Jun 04 14:21:36 2014 +0900
@@ -185,7 +185,10 @@
 
 PayloadType payloadType() const { return PICTURE_TIMING; }
 
-SEIPictureTiming() {}
+SEIPictureTiming()
+: m_numNalusInDuMinus1(0)
+, m_duCpbRemovalDelayMinus1(0)
+{}
 
 virtual ~SEIPictureTiming()
 {
diff -r f2479eb454b0 -r f256e982b79c source/Lib/TLibCommon/TComBitStream.cpp
--- a/source/Lib/TLibCommon/TComBitStream.cpp   Tue Jun 03 15:24:22 2014 -0500
+++ b/source/Lib/TLibCommon/TComBitStream.cpp   Wed Jun 04 14:21:36 2014 +0900
@@ -205,9 +205,9 @@
 if (temp)
 {
 ::memcpy(temp, m_fifo, m_fsize);
-X265_FREE(m_fifo);
 m_fifo = temp;
 m_buffsize *= 2;
+X265_FREE(m_fifo);
 }
 else
 {
diff -r f2479eb454b0 -r f256e982b79c source/Lib/TLibEncoder/NALwrite.cpp
--- a/source/Lib/TLibEncoder/NALwrite.cpp   Tue Jun 03 15:24:22 2014 -0500
+++ b/source/Lib/TLibEncoder/NALwrite.cpp   Wed Jun 04 14:21:36 2014 +0900
@@ -108,7 +108,7 @@
 }
 
 uint32_t i = packetSize;
-out = (uint8_t*)realloc(out, nalsize + 4);
+out = (uint8_t*)realloc(out, packetSize + nalsize + 4);
 memcpy(out + packetSize, emulation, nalsize);
 packetSize += nalsize;
 
diff -r f2479eb454b0 -r f256e982b79c source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppTue Jun 03 15:24:22 2014 -0500
+++ b/source/encoder/encoder.cppWed Jun 04 14:21:36 2014 +0900
@@ -174,6 +174,7 @@
 
 void Encoder::init()
 {
+m_totalFrameThreads = param-frameNumThreads;
 if (m_frameEncoder)
 {
 int numRows = (param-sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
@@ -189,7 +190,6 @@
 m_rateControl-init(m_frameEncoder[0].m_sps);
 m_lookahead-init();
 m_encodeStartTime = x265_mdate();
-m_totalFrameThreads = param-frameNumThreads;
 }
 
 int Encoder::getStreamHeaders(NALUnitEBSP **nalunits)
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] refine cbf==0 path: remove clearing coeff and resi

2014-06-01 Thread Satoshi Nakagawa
# HG changeset patch
# User Satoshi Nakagawa nakagawa...@oki.com
# Date 1401677099 -32400
#  Mon Jun 02 11:44:59 2014 +0900
# Node ID 73f86312c2e0aa5a105e84b0045478e02c8a03e7
# Parent  a5998df9b12ef81e48e7c5b89219a74276a75f27
refine cbf==0 path: remove clearing coeff and resi

diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cppMon Jun 02 07:36:20 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncEntropy.cppMon Jun 02 11:44:59 2014 +0900
@@ -202,7 +202,6 @@
 
 void TEncEntropy::initTUEntropySection(TComTURecurse *tuIterator, uint32_t 
splitMode, uint32_t absPartIdxStep, uint32_t m_absPartIdxTU)
 {
-tuIterator-m_partOffset= 0;
 tuIterator-m_section   = 0;
 tuIterator-m_absPartIdxTURelCU = m_absPartIdxTU;
 tuIterator-m_splitMode = splitMode;
diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncEntropy.h
--- a/source/Lib/TLibEncoder/TEncEntropy.h  Mon Jun 02 07:36:20 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncEntropy.h  Mon Jun 02 11:44:59 2014 +0900
@@ -66,7 +66,6 @@
 uint32_t  m_splitMode;
 uint32_t  m_absPartIdxTURelCU;
 uint32_t  m_absPartIdxStep;
-uint32_t  m_partOffset;
 };
 
 // 

diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp   Mon Jun 02 07:36:20 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp   Mon Jun 02 11:44:59 2014 +0900
@@ -2120,8 +2120,9 @@
 // compute number of significant coefficients
 uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize);
 
-if (numSig == 0)
-return;
+#if CHECKED_BUILD || _DEBUG
+X265_CHECK(numSig  0, cbf check fail);
+#endif
 
 bool beValid;
 if (cu-getCUTransquantBypass(absPartIdx))
diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 07:36:20 2014 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 11:44:59 2014 +0900
@@ -408,8 +408,8 @@
 coeff_t* coeff  = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
 
 int16_t* reconQt= 
m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
-
 X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, width is not 
max CU size\n);
+const uint32_t reconQtStride = MAX_CU_SIZE;
 
 uint32_t zorder   = cu-getZorderIdxInCU() + absPartIdx;
 pixel*   reconIPred   = 
cu-getPic()-getPicYuvRec()-getLumaAddr(cu-getAddr(), zorder);
@@ -443,25 +443,29 @@
 //--- set coded block flag ---
 cu-setCbfSubParts((absSum ? 1 : 0)  trDepth, TEXT_LUMA, absPartIdx, 
fullDepth);
 
-//--- inverse transform ---
 if (absSum)
 {
+//--- inverse transform ---
 int scalingListType = 0 + TEXT_LUMA;
-X265_CHECK(scalingListType  6, scalingListType is too large %d\n, 
scalingListType);
+X265_CHECK(scalingListType  6, scalingListType invalid %d\n, 
scalingListType);
 m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), 
cu-getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, 
scalingListType, useTransformSkip, lastPos);
+X265_CHECK(tuSize = 32, tuSize is too large %d\n, tuSize);
+//= reconstruction =
+primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, 
stride, reconQtStride, reconIPredStride);
+//= update distortion =
+outDist += primitives.sse_sp[part](reconQt, reconQtStride, fenc, 
stride);
 }
 else
 {
-int16_t* resiTmp = residual;
+#if CHECKED_BUILD || _DEBUG
 memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize);
-primitives.blockfill_s[sizeIdx](resiTmp, stride, 0);
+#endif
+//= reconstruction =
+primitives.luma_copy_ps[part](reconQt,reconQtStride,pred, 
stride);
+primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, 
stride);
+//= update distortion =
+outDist += primitives.sse_pp[part](pred, stride, fenc, stride);
 }
-
-X265_CHECK(tuSize = 32, tuSize is too large %d\n, tuSize);
-//= reconstruction =
-primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, 
MAX_CU_SIZE, reconIPredStride);
-//= update distortion =
-outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);
 }
 
 void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu,
@@ -519,67 +523,67 @@
 primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
 
 //= transform and quantization =
+//--- init rate estimation arrays for RDOQ ---
+if (useTransformSkipChroma ? m_cfg-bEnableRDOQTS : m_cfg-bEnableRDOQ)
 {
-//--- init rate estimation arrays for RDOQ

Re: [x265] rdcost: overflow check by integer

2014-05-25 Thread Satoshi Nakagawa
Linux (CentOS) needs it.
FreeBSD also.


 -Original Message-
 From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of
 Steve Borho
 Sent: Monday, May 26, 2014 11:48 AM
 To: Development for x265
 Subject: Re: [x265] rdcost: overflow check by integer
 
 On Sat, May 24, 2014 at 10:48 PM, Satoshi Nakagawa nakagawa...@oki.com
 wrote:
 
  Aside: Most systems I've seen seem to do this in via -D... or
whatever.
 
  It's simpler. Thanks.
 
  # HG changeset patch
  # User Satoshi Nakagawa nakagawa...@oki.com # Date 1400989083 -32400
  #  Sun May 25 12:38:03 2014 +0900
  # Node ID 54a0dc2278494f7ec3b74f3f06a8521c805af79a
  # Parent  5e8cce428457f63fd9b8e18dafed2f8bed674d53
  add -D__STDC_LIMIT_MACROS=1
 
  diff -r 5e8cce428457 -r 54a0dc227849 source/CMakeLists.txt
  --- a/source/CMakeLists.txt Fri May 23 09:11:15 2014 -0500
  +++ b/source/CMakeLists.txt Sun May 25 12:38:03 2014 +0900
  @@ -103,6 +103,8 @@
   endif()
   endif(MSVC)
 
  +add_definitions(-D__STDC_LIMIT_MACROS=1)
  +
   check_include_files(inttypes.h HAVE_INT_TYPES_H)
   if(HAVE_INT_TYPES_H)
   add_definitions(-DHAVE_INT_TYPES_H=1)
 
 Is this still necessary given the way I modified compat/msvc/stdint.h?
  If so, for what compiler?
 
 --
 Steve Borho
 ___
 x265-devel mailing list
 x265-devel@videolan.org
 https://mailman.videolan.org/listinfo/x265-devel

___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


  1   2   >