[x265] constants: g_zscanToRaster, g_rasterToZscan as const table
# HG changeset patch # User Satoshi Nakagawa <nakagawa...@oki.com> # Date 1467719659 -32400 # Tue Jul 05 20:54:19 2016 +0900 # Node ID cf06a60ee646199cf4f139a5c22cff7ac5626d68 # Parent a932b4366235ab6597c8d124c1569dade6ff790a constants: g_zscanToRaster, g_rasterToZscan as const table diff -r a932b4366235 -r cf06a60ee646 source/common/common.h --- a/source/common/common.hMon Jul 04 21:25:59 2016 +0530 +++ b/source/common/common.hTue Jul 05 20:54:19 2016 +0900 @@ -255,7 +255,9 @@ #define LOG2_UNIT_SIZE 2 // log2(unitSize) #define UNIT_SIZE (1 << LOG2_UNIT_SIZE) // unit size of CU partition -#define MAX_NUM_PARTITIONS 256 +#define LOG2_RASTER_SIZE(MAX_LOG2_CU_SIZE - LOG2_UNIT_SIZE) +#define RASTER_SIZE (1 << LOG2_RASTER_SIZE) +#define MAX_NUM_PARTITIONS (RASTER_SIZE * RASTER_SIZE) #define NUM_4x4_PARTITIONS (1U << (g_unitSizeDepth << 1)) // number of 4x4 units in max CU size #define MIN_PU_SIZE 4 diff -r a932b4366235 -r cf06a60ee646 source/common/constants.cpp --- a/source/common/constants.cpp Mon Jul 04 21:25:59 2016 +0530 +++ b/source/common/constants.cpp Tue Jul 05 20:54:19 2016 +0900 @@ -166,9 +166,47 @@ uint32_t g_maxCUSize = MAX_CU_SIZE; uint32_t g_unitSizeDepth = NUM_CU_DEPTH; uint32_t g_maxCUDepth= NUM_CU_DEPTH - 1; -uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, }; -uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, }; +const uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = +{ +0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x20, 0x21, 0x30, 0x31, 0x22, 0x23, 0x32, 0x33, +0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17, 0x24, 0x25, 0x34, 0x35, 0x26, 0x27, 0x36, 0x37, +0x40, 0x41, 0x50, 0x51, 0x42, 0x43, 0x52, 0x53, 0x60, 0x61, 0x70, 0x71, 0x62, 0x63, 0x72, 0x73, +0x44, 0x45, 0x54, 0x55, 0x46, 0x47, 0x56, 0x57, 0x64, 0x65, 0x74, 0x75, 0x66, 0x67, 0x76, 0x77, +0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x28, 0x29, 0x38, 0x39, 0x2A, 0x2B, 0x3A, 0x3B, +0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F, 0x2C, 0x2D, 0x3C, 0x3D, 0x2E, 0x2F, 0x3E, 0x3F, +0x48, 0x49, 0x58, 0x59, 0x4A, 0x4B, 0x5A, 0x5B, 0x68, 0x69, 0x78, 0x79, 0x6A, 0x6B, 0x7A, 0x7B, +0x4C, 0x4D, 0x5C, 0x5D, 0x4E, 0x4F, 0x5E, 0x5F, 0x6C, 0x6D, 0x7C, 0x7D, 0x6E, 0x6F, 0x7E, 0x7F, +0x80, 0x81, 0x90, 0x91, 0x82, 0x83, 0x92, 0x93, 0xA0, 0xA1, 0xB0, 0xB1, 0xA2, 0xA3, 0xB2, 0xB3, +0x84, 0x85, 0x94, 0x95, 0x86, 0x87, 0x96, 0x97, 0xA4, 0xA5, 0xB4, 0xB5, 0xA6, 0xA7, 0xB6, 0xB7, +0xC0, 0xC1, 0xD0, 0xD1, 0xC2, 0xC3, 0xD2, 0xD3, 0xE0, 0xE1, 0xF0, 0xF1, 0xE2, 0xE3, 0xF2, 0xF3, +0xC4, 0xC5, 0xD4, 0xD5, 0xC6, 0xC7, 0xD6, 0xD7, 0xE4, 0xE5, 0xF4, 0xF5, 0xE6, 0xE7, 0xF6, 0xF7, +0x88, 0x89, 0x98, 0x99, 0x8A, 0x8B, 0x9A, 0x9B, 0xA8, 0xA9, 0xB8, 0xB9, 0xAA, 0xAB, 0xBA, 0xBB, +0x8C, 0x8D, 0x9C, 0x9D, 0x8E, 0x8F, 0x9E, 0x9F, 0xAC, 0xAD, 0xBC, 0xBD, 0xAE, 0xAF, 0xBE, 0xBF, +0xC8, 0xC9, 0xD8, 0xD9, 0xCA, 0xCB, 0xDA, 0xDB, 0xE8, 0xE9, 0xF8, 0xF9, 0xEA, 0xEB, 0xFA, 0xFB, +0xCC, 0xCD, 0xDC, 0xDD, 0xCE, 0xCF, 0xDE, 0xDF, 0xEC, 0xED, 0xFC, 0xFD, 0xEE, 0xEF, 0xFE, 0xFF +}; + +const uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = +{ +0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15, 0x40, 0x41, 0x44, 0x45, 0x50, 0x51, 0x54, 0x55, +0x02, 0x03, 0x06, 0x07, 0x12, 0x13, 0x16, 0x17, 0x42, 0x43, 0x46, 0x47, 0x52, 0x53, 0x56, 0x57, +0x08, 0x09, 0x0C, 0x0D, 0x18, 0x19, 0x1C, 0x1D, 0x48, 0x49, 0x4C, 0x4D, 0x58, 0x59, 0x5C, 0x5D, +0x0A, 0x0B, 0x0E, 0x0F, 0x1A, 0x1B, 0x1E, 0x1F, 0x4A, 0x4B, 0x4E, 0x4F, 0x5A, 0x5B, 0x5E, 0x5F, +0x20, 0x21, 0x24, 0x25, 0x30, 0x31, 0x34, 0x35, 0x60, 0x61, 0x64, 0x65, 0x70, 0x71, 0x74, 0x75, +0x22, 0x23, 0x26, 0x27, 0x32, 0x33, 0x36, 0x37, 0x62, 0x63, 0x66, 0x67, 0x72, 0x73, 0x76, 0x77, +0x28, 0x29, 0x2C, 0x2D, 0x38, 0x39, 0x3C, 0x3D, 0x68, 0x69, 0x6C, 0x6D, 0x78, 0x79, 0x7C, 0x7D, +0x2A, 0x2B, 0x2E, 0x2F, 0x3A, 0x3B, 0x3E, 0x3F, 0x6A, 0x6B, 0x6E, 0x6F, 0x7A, 0x7B, 0x7E, 0x7F, +0x80, 0x81, 0x84, 0x85, 0x90, 0x91, 0x94, 0x95, 0xC0, 0xC1, 0xC4, 0xC5, 0xD0, 0xD1, 0xD4, 0xD5, +0x82, 0x83, 0x86, 0x87, 0x92, 0x93, 0x96, 0x97, 0xC2, 0xC3, 0xC6, 0xC7, 0xD2, 0xD3, 0xD6, 0xD7, +0x88, 0x89, 0x8C, 0x8D, 0x98, 0x99, 0x9C, 0x9D, 0xC8, 0xC9, 0xCC, 0xCD, 0xD8, 0xD9, 0xDC, 0xDD, +0x8A, 0x8B, 0x8E, 0x8F, 0x9A, 0x9B, 0x9E, 0x9F, 0xCA, 0xCB, 0xCE, 0xCF, 0xDA, 0xDB, 0xDE, 0xDF, +0xA0, 0xA1, 0xA4, 0xA5, 0xB0, 0xB1, 0xB4, 0xB5, 0xE0, 0xE1, 0xE4, 0xE5, 0xF0, 0xF1, 0xF4, 0xF5, +0xA2, 0xA3, 0xA6, 0xA7, 0xB2, 0xB3, 0xB6, 0xB7, 0xE2, 0xE3, 0xE6, 0xE7, 0xF2, 0xF3, 0xF6, 0xF7, +0xA8, 0xA9, 0xAC, 0xAD, 0xB8, 0xB9, 0xBC, 0xBD, 0xE8, 0xE9, 0xEC, 0xED, 0xF8, 0xF9, 0xFC, 0xFD, +0xAA, 0xAB, 0xAE, 0xAF, 0xBA, 0xBB, 0xBE, 0xBF, 0xEA, 0xEB, 0xEE, 0xEF, 0xFA, 0xFB, 0xFE, 0xFF +}; + const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] = { 0, 4, 0,
[x265] remove m_immedVals
# HG changeset patch # User Satoshi Nakagawa <nakagawa...@oki.com> # Date 1463052561 -32400 # Thu May 12 20:29:21 2016 +0900 # Node ID 3d6c4c1fcb9923e8215aefae62bfeeb118e173c0 # Parent a5362b9533f6a5b77740b4e8f97dba2555b6f929 remove m_immedVals diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/ipfilter.cpp --- a/source/common/ipfilter.cppWed May 04 21:08:09 2016 + +++ b/source/common/ipfilter.cppThu May 12 20:29:21 2016 +0900 @@ -365,10 +365,10 @@ template void interp_hv_pp_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY) { -short immedVals[(64 + 8) * (64 + 8)]; +ALIGN_VAR_32(int16_t, immed[width * (height + N - 1)]); -interp_horiz_ps_c<N, width, height>(src, srcStride, immedVals, width, idxX, 1); -filterVertical_sp_c(immedVals + 3 * width, width, dst, dstStride, width, height, idxY); +interp_horiz_ps_c<N, width, height>(src, srcStride, immed, width, idxX, 1); +filterVertical_sp_c(immed + (N / 2 - 1) * width, width, dst, dstStride, width, height, idxY); } } diff -r a5362b9533f6 -r 3d6c4c1fcb99 source/common/predict.cpp --- a/source/common/predict.cpp Wed May 04 21:08:09 2016 + +++ b/source/common/predict.cpp Thu May 12 20:29:21 2016 +0900 @@ -57,12 +57,10 @@ Predict::Predict() { -m_immedVals = NULL; } Predict::~Predict() { -X265_FREE(m_immedVals); m_predShortYuv[0].destroy(); m_predShortYuv[1].destroy(); } @@ -72,12 +70,8 @@ m_csp = csp; m_hChromaShift = CHROMA_H_SHIFT(csp); m_vChromaShift = CHROMA_V_SHIFT(csp); -CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1)); return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp); - -fail: -return false; } void Predict::motionCompensation(const CUData& cu, const PredictionUnit& pu, Yuv& predYuv, bool bLuma, bool bChroma) @@ -258,8 +252,8 @@ int partEnum = partitionFromSizes(pu.width, pu.height); const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; -int xFrac = mv.x & 0x3; -int yFrac = mv.y & 0x3; +int xFrac = mv.x & 3; +int yFrac = mv.y & 3; if (!(yFrac | xFrac)) primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride); @@ -280,14 +274,14 @@ intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset; -int xFrac = mv.x & 0x3; -int yFrac = mv.y & 0x3; - int partEnum = partitionFromSizes(pu.width, pu.height); X265_CHECK((pu.width % 4) + (pu.height % 4) == 0, "width or height not divisible by 4\n"); X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n"); +int xFrac = mv.x & 3; +int yFrac = mv.y & 3; + if (!(yFrac | xFrac)) primitives.pu[partEnum].convert_p2s(src, srcStride, dst, dstStride); else if (!yFrac) @@ -296,11 +290,12 @@ primitives.pu[partEnum].luma_vps(src, srcStride, dst, dstStride, yFrac); else { -int tmpStride = pu.width; -int filterSize = NTAPS_LUMA; -int halfFilterSize = (filterSize >> 1); -primitives.pu[partEnum].luma_hps(src, srcStride, m_immedVals, tmpStride, xFrac, 1); -primitives.pu[partEnum].luma_vss(m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac); +ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]); +int immedStride = pu.width; +int halfFilterSize = NTAPS_LUMA >> 1; + +primitives.pu[partEnum].luma_hps(src, srcStride, immed, immedStride, xFrac, 1); +primitives.pu[partEnum].luma_vss(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, yFrac); } } @@ -309,10 +304,10 @@ intptr_t dstStride = dstYuv.m_csize; intptr_t refStride = refPic.m_strideC; -int shiftHor = (2 + m_hChromaShift); -int shiftVer = (2 + m_vChromaShift); +int mvx = mv.x << (1 - m_hChromaShift); +int mvy = mv.y << (1 - m_vChromaShift); -intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; +intptr_t refOffset = (mvx >> 3) + (mvy >> 3) * refStride; const pixel* refCb = refPic.getCbAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; const pixel* refCr = refPic.getCrAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + refOffset; @@ -320,11 +315,11 @@ pixel* dstCb = dstYuv.getCbAddr(pu.puAbsPartIdx); pixel* dstCr = dstYuv.getCrAddr(pu.puAbsPartIdx); -int xFrac = mv.x & ((1 << shiftHor) - 1); -int yFrac = mv.y & ((1 << shiftVer) - 1); +int partEnum = partitionFromSizes(pu.width, pu.height); -int partEnum = partitionF
[x265] move tables from .h to .cpp
# HG changeset patch # User Satoshi Nakagawa <nakagawa...@oki.com> # Date 1459165671 -32400 # Mon Mar 28 20:47:51 2016 +0900 # Node ID 68099f6e3cd9a8911b6e559a0387c8ff485f5afe # Parent 5dbd6a0c8e17481a0c4d31243ebc8b46ad59e15d move tables from .h to .cpp diff -r 5dbd6a0c8e17 -r 68099f6e3cd9 source/common/contexts.h --- a/source/common/contexts.h Mon Mar 28 12:53:40 2016 +0530 +++ b/source/common/contexts.h Mon Mar 28 20:47:51 2016 +0900 @@ -117,196 +117,8 @@ #define sbacGetEntropyBits(S, V) (g_entropyBits[(S) ^ (V)]) #define sbacGetEntropyBitsTrm(V) (g_entropyBits[126 ^ (V)]) -#define MAX_NUM_CHANNEL_TYPE 2 +static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 2, 3, 4, 5, 6 } }; -static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 2, 3, 4, 5, 6 } }; -static const uint32_t significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] = { { 0, 9, 21 }, { 0, 9, 12 } }; -static const uint32_t significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3] = { { 9, 12, 6 }, { 9, 3, 3 } }; -static const uint32_t nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE] = { 6, 0 }; -static const uint32_t notFirstGroupNeighbourhoodContextOffset[MAX_NUM_CHANNEL_TYPE] = { 3, 0 }; - -// initial probability for cu_transquant_bypass flag -static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] = -{ -{ 154 }, -{ 154 }, -{ 154 }, -}; - -// initial probability for split flag -static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] = -{ -{ 107, 139, 126, }, -{ 107, 139, 126, }, -{ 139, 141, 157, }, -}; - -static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] = -{ -{ 197, 185, 201, }, -{ 197, 185, 201, }, -{ CNU, CNU, CNU, }, -}; - -static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] = -{ -{ 154, }, -{ 110, }, -{ CNU, }, -}; - -static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] = -{ -{ 137, }, -{ 122, }, -{ CNU, }, -}; - -static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] = -{ -{ 154, 139, 154, 154 }, -{ 154, 139, 154, 154 }, -{ 184, CNU, CNU, CNU }, -}; - -static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] = -{ -{ 134, }, -{ 149, }, -{ CNU, }, -}; - -static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] = -{ -{ 183, }, -{ 154, }, -{ 184, }, -}; - -static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] = -{ -{ 152, 139, }, -{ 152, 139, }, -{ 63, 139, }, -}; - -static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] = -{ -{ 95, 79, 63, 31, 31, }, -{ 95, 79, 63, 31, 31, }, -{ CNU, CNU, CNU, CNU, CNU, }, -}; - -static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] = -{ -{ 169, 198, }, -{ 140, 198, }, -{ CNU, CNU, }, -}; - -static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] = -{ -{ 153, 153 }, -{ 153, 153 }, -{ CNU, CNU }, -}; - -static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] = -{ -{ 154, 154, 154, }, -{ 154, 154, 154, }, -{ 154, 154, 154, }, -}; - -static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] = -{ -{ 153, 111, 149, 92, 167, 154, 154 }, -{ 153, 111, 149, 107, 167, 154, 154 }, -{ 111, 141, 94, 138, 182, 154, 154 }, -}; - -static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] = -{ -{ 79, }, -{ 79, }, -{ CNU, }, -}; - -static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] = -{ -{ 125, 110, 124, 110, 95, 94, 125, 111, 111, 79, 125, 126, 111, 111, 79, - 108, 123, 93 }, -{ 125, 110, 94, 110, 95, 79, 125, 111, 110, 78, 110, 111, 111, 95, 94, - 108, 123, 108 }, -{ 110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111, 79, - 108, 123, 63 }, -}; - -static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] = -{ -{ 121, 140, - 61, 154, }, -{ 121, 140, - 61, 154, }, -{ 91, 171, - 134, 141, }, -}; - -static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] = -{ -{ 170, 154, 139, 153, 139, 123, 123, 63, 124, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138, 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, }, -{ 155, 154, 139, 153, 139, 123, 123, 63, 153, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123, 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, }, -{ 111, 111, 125, 110, 110, 94, 124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182, 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, }, -}; - -static
[x265] remove broadcast of non-leaf CBF
# HG changeset patch # User Satoshi Nakagawa <nakagawa...@oki.com> # Date 1458869254 -32400 # Fri Mar 25 10:27:34 2016 +0900 # Node ID a2acf2e18b329b39f9e69d2ea818d834179b8a77 # Parent c8ec86965e546f271ef54bad508a82e8a4911008 remove broadcast of non-leaf CBF diff -r c8ec86965e54 -r a2acf2e18b32 source/common/cudata.h --- a/source/common/cudata.hFri Feb 19 14:36:52 2016 +0530 +++ b/source/common/cudata.hFri Mar 25 10:27:34 2016 +0900 @@ -247,7 +247,7 @@ void setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx); uint8_t getCbf(uint32_t absPartIdx, TextType ttype, uint32_t tuDepth) const { return (m_cbf[ttype][absPartIdx] >> tuDepth) & 0x1; } -uint8_t getQtRootCbf(uint32_t absPartIdx) const { if (m_chromaFormat == X265_CSP_I400) return m_cbf[0][absPartIdx] || false; else { return m_cbf[0][absPartIdx] || m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx];} } +bool getQtRootCbf(uint32_t absPartIdx) const { return (m_cbf[0][absPartIdx] || ((m_chromaFormat != X265_CSP_I400) && (m_cbf[1][absPartIdx] || m_cbf[2][absPartIdx]))); } int8_t getRefQP(uint32_t currAbsIdxInCTU) const; uint32_t getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField (*candMvField)[2], uint8_t* candDir) const; void clipMv(MV& outMV) const; diff -r c8ec86965e54 -r a2acf2e18b32 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppFri Feb 19 14:36:52 2016 +0530 +++ b/source/encoder/entropy.cppFri Mar 25 10:27:34 2016 +0900 @@ -721,16 +721,12 @@ bool bSmallChroma = (log2CurSize - hChromaShift) < 2; if (!curDepth || !bSmallChroma) { -if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1)) +uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2); +if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1)) codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv); -if (!curDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1)) +if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1)) codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv); } -else -{ -X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, curDepth - 1), "chroma xform size match failure\n"); -X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, curDepth - 1), "chroma xform size match failure\n"); -} if (subdiv) { @@ -753,7 +749,7 @@ X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n"); } else -codeQtCbfLuma(cu, absPartIdx, curDepth); +codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth); uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth); uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth); @@ -874,7 +870,7 @@ X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n"); } else -codeQtCbfLuma(cu, absPartIdx, curDepth); +codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth); uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth); diff -r c8ec86965e54 -r a2acf2e18b32 source/encoder/entropy.h --- a/source/encoder/entropy.h Fri Feb 19 14:36:52 2016 +0530 +++ b/source/encoder/entropy.h Fri Mar 25 10:27:34 2016 +0900 @@ -162,7 +162,6 @@ void codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth); void codePredInfo(const CUData& cu, uint32_t absPartIdx); -inline void codeQtCbfLuma(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth) { codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth), tuDepth); } void codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel); void codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2]); diff -r c8ec86965e54 -r a2acf2e18b32 source/encoder/search.cpp --- a/source/encoder/search.cpp Fri Feb 19 14:36:52 2016 +0530 +++ b/source/encoder/search.cpp Fri Mar 25 10:27:34 2016 +0900 @@ -222,9 +222,10 @@ if (!(log2TrSize - m_hChromaShift < 2)) { -if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1)) +uint32_t parentIdx = absPartIdx & (0xFF << (log2TrSize + 1 - LOG2_UNIT_SIZE) * 2); +if (!tuDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, tuDepth - 1)) m_entropyCoder.codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv); -if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1)) +if (!tuDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, tuDepth - 1))
[x265] backout implicit inter TU split condition
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1424139217 -32400 # Tue Feb 17 11:13:37 2015 +0900 # Node ID cf88e808db61a66344978bcc6b16d19825f2ade2 # Parent cbec71924b09f27e80f2c752caad9e0e7bf7878b backout implicit inter TU split condition diff -r cbec71924b09 -r cf88e808db61 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppMon Feb 16 18:26:29 2015 +0530 +++ b/source/encoder/entropy.cppTue Feb 17 11:13:37 2015 +0900 @@ -694,7 +694,8 @@ { X265_CHECK(subdiv, intra NxN requires TU depth below CU depth\n); } -else if (cu.isInter(absPartIdx) cu.m_partSize[absPartIdx] != SIZE_2Nx2N) +else if (cu.isInter(absPartIdx) cu.m_partSize[absPartIdx] != SIZE_2Nx2N + !curDepth cu.m_slice-m_sps-quadtreeTUMaxDepthInter == 1) { X265_CHECK(subdiv, inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n, log2CurSize, depthRange[0]); } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] quant: add m_tqBypass
this patch looks ok except for the fact that this assignment looks like it should be done in setupQPForQuant(cu) itself. setQPForQuant() is written with intention in CTU level, but tqBypass control is CU level. # s/ctu/cu/ for setQPForQuant ? -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Thursday, January 29, 2015 1:13 AM To: Development for x265 Subject: Re: [x265] quant: add m_tqBypass On 01/28, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1422456196 -32400 # Wed Jan 28 23:43:16 2015 +0900 # Node ID 231f1a91eaefdd5e79bc250b0c505178a89f185e # Parent c1371f175178edcc0d0402a745b7478aa240c3b4 quant: add m_tqBypass diff -r c1371f175178 -r 231f1a91eaef source/common/deblock.cpp --- a/source/common/deblock.cpp Mon Jan 26 15:31:42 2015 -0600 +++ b/source/common/deblock.cpp Wed Jan 28 23:43:16 2015 +0900 @@ -401,14 +401,22 @@ if (!bs) continue; -int32_t qpQ = cuQ-m_qp[partQ]; - // Derive neighboring PU index uint32_t partP; const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP, partQ) : cuQ-getPUAbove(partP, partQ)); +if (bCheckNoFilter) +{ +// check if each of PUs is lossless coded +maskP = cuP-m_tqBypass[partP] - 1; +maskQ = cuQ-m_tqBypass[partQ] - 1; +if (!(maskP | maskQ)) +continue; +} + +int32_t qpQ = cuQ-m_qp[partQ]; int32_t qpP = cuP-m_qp[partP]; -int32_t qp = (qpP + qpQ + 1) 1; +int32_t qp = (qpP + qpQ + 1) 1; int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset); @@ -428,13 +436,6 @@ if (d = beta) continue; -if (bCheckNoFilter) -{ -// check if each of PUs is lossless coded -maskP = (cuP-m_tqBypass[partP] ? 0 : -1); -maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1); -} - int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); int32_t tc = s_tcTable[indexTC] bitdepthShift; @@ -506,33 +507,29 @@ if (bs = 1) continue; -int32_t qpQ = cuQ-m_qp[partQ]; - // Derive neighboring PU index uint32_t partP; const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP, partQ) : cuQ-getPUAbove(partP, partQ)); -int32_t qpP = cuP-m_qp[partP]; - if (bCheckNoFilter) { // check if each of PUs is lossless coded maskP = (cuP-m_tqBypass[partP] ? 0 : -1); maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1); +if (!(maskP | maskQ)) +continue; } +int32_t qpQ = cuQ-m_qp[partQ]; +int32_t qpP = cuP-m_qp[partP]; +int32_t qpA = (qpP + qpQ + 1) 1; + intptr_t unitOffset = idx * srcStep LOG2_UNIT_SIZE; for (uint32_t chromaIdx = 0; chromaIdx 2; chromaIdx++) { -int32_t chromaQPOffset = pps-chromaQpOffset[chromaIdx]; -int32_t qp = ((qpP + qpQ + 1) 1) + chromaQPOffset; +int32_t qp = qpA + pps-chromaQpOffset[chromaIdx]; if (qp = 30) -{ -if (chFmt == X265_CSP_I420) -qp = g_chromaScale[qp]; -else -qp = X265_MIN(qp, 51); -} +qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : + X265_MIN(qp, 51); int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); const int32_t bitdepthShift = X265_DEPTH - 8; diff -r c1371f175178 -r 231f1a91eaef source/common/quant.cpp --- a/source/common/quant.cpp Mon Jan 26 15:31:42 2015 -0600 +++ b/source/common/quant.cpp Wed Jan 28 23:43:16 2015 +0900 @@ -169,6 +169,7 @@ m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2); m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE); m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE); +m_tqBypass = false; return m_resiDctCoeff m_fencShortBuf; } @@ -326,7 +327,7 @@ coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip) { const uint32_t sizeIdx = log2TrSize - 2; -if (cu.m_tqBypass[absPartIdx]) +if (m_tqBypass) { X265_CHECK(log2TrSize = 2 log2TrSize = 5, Block size mistake!\n); return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride); @@ -406,11 +407,11 @@ } } -void Quant
[x265] quant: add m_tqBypass
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1422456196 -32400 # Wed Jan 28 23:43:16 2015 +0900 # Node ID 231f1a91eaefdd5e79bc250b0c505178a89f185e # Parent c1371f175178edcc0d0402a745b7478aa240c3b4 quant: add m_tqBypass diff -r c1371f175178 -r 231f1a91eaef source/common/deblock.cpp --- a/source/common/deblock.cpp Mon Jan 26 15:31:42 2015 -0600 +++ b/source/common/deblock.cpp Wed Jan 28 23:43:16 2015 +0900 @@ -401,14 +401,22 @@ if (!bs) continue; -int32_t qpQ = cuQ-m_qp[partQ]; - // Derive neighboring PU index uint32_t partP; const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP, partQ) : cuQ-getPUAbove(partP, partQ)); +if (bCheckNoFilter) +{ +// check if each of PUs is lossless coded +maskP = cuP-m_tqBypass[partP] - 1; +maskQ = cuQ-m_tqBypass[partQ] - 1; +if (!(maskP | maskQ)) +continue; +} + +int32_t qpQ = cuQ-m_qp[partQ]; int32_t qpP = cuP-m_qp[partP]; -int32_t qp = (qpP + qpQ + 1) 1; +int32_t qp = (qpP + qpQ + 1) 1; int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset); @@ -428,13 +436,6 @@ if (d = beta) continue; -if (bCheckNoFilter) -{ -// check if each of PUs is lossless coded -maskP = (cuP-m_tqBypass[partP] ? 0 : -1); -maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1); -} - int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); int32_t tc = s_tcTable[indexTC] bitdepthShift; @@ -506,33 +507,29 @@ if (bs = 1) continue; -int32_t qpQ = cuQ-m_qp[partQ]; - // Derive neighboring PU index uint32_t partP; const CUData* cuP = (dir == EDGE_VER ? cuQ-getPULeft(partP, partQ) : cuQ-getPUAbove(partP, partQ)); -int32_t qpP = cuP-m_qp[partP]; - if (bCheckNoFilter) { // check if each of PUs is lossless coded maskP = (cuP-m_tqBypass[partP] ? 0 : -1); maskQ = (cuQ-m_tqBypass[partQ] ? 0 : -1); +if (!(maskP | maskQ)) +continue; } +int32_t qpQ = cuQ-m_qp[partQ]; +int32_t qpP = cuP-m_qp[partP]; +int32_t qpA = (qpP + qpQ + 1) 1; + intptr_t unitOffset = idx * srcStep LOG2_UNIT_SIZE; for (uint32_t chromaIdx = 0; chromaIdx 2; chromaIdx++) { -int32_t chromaQPOffset = pps-chromaQpOffset[chromaIdx]; -int32_t qp = ((qpP + qpQ + 1) 1) + chromaQPOffset; +int32_t qp = qpA + pps-chromaQpOffset[chromaIdx]; if (qp = 30) -{ -if (chFmt == X265_CSP_I420) -qp = g_chromaScale[qp]; -else -qp = X265_MIN(qp, 51); -} +qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, 51); int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset)); const int32_t bitdepthShift = X265_DEPTH - 8; diff -r c1371f175178 -r 231f1a91eaef source/common/quant.cpp --- a/source/common/quant.cpp Mon Jan 26 15:31:42 2015 -0600 +++ b/source/common/quant.cpp Wed Jan 28 23:43:16 2015 +0900 @@ -169,6 +169,7 @@ m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2); m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE); m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE); +m_tqBypass = false; return m_resiDctCoeff m_fencShortBuf; } @@ -326,7 +327,7 @@ coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip) { const uint32_t sizeIdx = log2TrSize - 2; -if (cu.m_tqBypass[absPartIdx]) +if (m_tqBypass) { X265_CHECK(log2TrSize = 2 log2TrSize = 5, Block size mistake!\n); return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride); @@ -406,11 +407,11 @@ } } -void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff, +void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig) { const uint32_t sizeIdx = log2TrSize - 2; -if (transQuantBypass) +if (m_tqBypass) { primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0); return; diff -r c1371f175178 -r 231f1a91eaef source/common/quant.h --- a/source/common/quant.h Mon Jan 26 15:31:42 2015 -0600 +++ b/source/common/quant.h Wed Jan 28 23:43:16 2015 +0900 @@ -93,6 +93,7 @@ NoiseReduction*m_nr
[x265] more use CUGeom
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1421487172 -32400 # Sat Jan 17 18:32:52 2015 +0900 # Node ID 270c9786681069d34c8eb709b74412843e37373a # Parent 65e71f08c55a0e9303d51691b3435cb5fdf6c6a1 more use CUGeom diff -r 65e71f08c55a -r 270c97866810 source/common/cudata.cpp --- a/source/common/cudata.cpp Sat Jan 17 10:12:34 2015 +0530 +++ b/source/common/cudata.cpp Sat Jan 17 18:32:52 2015 +0900 @@ -57,51 +57,51 @@ void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); } /* Check whether 2 addresses point to the same column */ -inline bool isEqualCol(int addrA, int addrB, int numUnitsPerRow) +inline bool isEqualCol(int addrA, int addrB, int numUnits) { -// addrA % numUnitsPerRow == addrB % numUnitsPerRow -return ((addrA ^ addrB) (numUnitsPerRow - 1)) == 0; +// addrA % numUnits == addrB % numUnits +return ((addrA ^ addrB) (numUnits - 1)) == 0; } /* Check whether 2 addresses point to the same row */ -inline bool isEqualRow(int addrA, int addrB, int numUnitsPerRow) +inline bool isEqualRow(int addrA, int addrB, int numUnits) { -// addrA / numUnitsPerRow == addrB / numUnitsPerRow -return ((addrA ^ addrB) ~(numUnitsPerRow - 1)) == 0; +// addrA / numUnits == addrB / numUnits +return ((addrA ^ addrB) ~(numUnits - 1)) == 0; } /* Check whether 2 addresses point to the same row or column */ -inline bool isEqualRowOrCol(int addrA, int addrB, int numUnitsPerRow) +inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits) { -return isEqualCol(addrA, addrB, numUnitsPerRow) | isEqualRow(addrA, addrB, numUnitsPerRow); +return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, numUnits); } /* Check whether one address points to the first column */ -inline bool isZeroCol(int addr, int numUnitsPerRow) +inline bool isZeroCol(int addr, int numUnits) { -// addr % numUnitsPerRow == 0 -return (addr (numUnitsPerRow - 1)) == 0; +// addr % numUnits == 0 +return (addr (numUnits - 1)) == 0; } /* Check whether one address points to the first row */ -inline bool isZeroRow(int addr, int numUnitsPerRow) +inline bool isZeroRow(int addr, int numUnits) { -// addr / numUnitsPerRow == 0 -return (addr ~(numUnitsPerRow - 1)) == 0; +// addr / numUnits == 0 +return (addr ~(numUnits - 1)) == 0; } /* Check whether one address points to a column whose index is smaller than a given value */ -inline bool lessThanCol(int addr, int val, int numUnitsPerRow) +inline bool lessThanCol(int addr, int val, int numUnits) { -// addr % numUnitsPerRow val -return (addr (numUnitsPerRow - 1)) val; +// addr % numUnits val +return (addr (numUnits - 1)) val; } /* Check whether one address points to a row whose index is smaller than a given value */ -inline bool lessThanRow(int addr, int val, int numUnitsPerRow) +inline bool lessThanRow(int addr, int val, int numUnits) { -// addr / numUnitsPerRow val -return addr val * numUnitsPerRow; +// addr / numUnits val +return addr val * numUnits; } inline MV scaleMv(MV mv, int scale) @@ -1533,17 +1533,17 @@ m_encData-getPicCTU(m_cuAddr)-m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE m_slice-m_sps-picHeightInLumaSamples) { uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; -uint32_t numPartInCUSize = s_numPartInCUSize; -bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last column of CTU -bool bNotLastRow = lessThanRow(absPartIdxRB, numPartInCUSize - 1, numPartInCUSize); // is not at the last rowof CTU +uint32_t numUnits = s_numPartInCUSize; +bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1, numUnits); // is not at the last column of CTU +bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1, numUnits); // is not at the last rowof CTU if (bNotLastCol bNotLastRow) { -absPartAddr = g_rasterToZscan[absPartIdxRB + numPartInCUSize + 1]; +absPartAddr = g_rasterToZscan[absPartIdxRB + numUnits + 1]; ctuIdx = m_cuAddr; } else if (bNotLastCol) -absPartAddr = g_rasterToZscan[(absPartIdxRB + numPartInCUSize + 1) (numPartInCUSize - 1)]; +absPartAddr = g_rasterToZscan[(absPartIdxRB + numUnits + 1) (numUnits - 1)]; else if (bNotLastRow) { absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; @@ -1760,17 +1760,17 @@ m_encData-getPicCTU(m_cuAddr)-m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE m_slice-m_sps-picHeightInLumaSamples) { uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; -uint32_t numPartInCUSize = s_numPartInCUSize; -bool bNotLastCol = lessThanCol(absPartIdxRB, numPartInCUSize - 1
Re: [x265] [PATCH] slicetype: allow queue to fill past full to prevent bottlenecks
Steve, This patch cause deadlock/freeze in short clips, shorter than lookahead. # my test script often use -f 17 Please check. Satoshi -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Tuesday, January 06, 2015 9:23 PM To: x265-devel@videolan.org Subject: [x265] [PATCH] slicetype: allow queue to fill past full to prevent bottlenecks # HG changeset patch # User Steve Borho st...@borho.org # Date 1420538938 -19800 # Tue Jan 06 15:38:58 2015 +0530 # Node ID d36211d0190f5aafdf7ecf6657e8d1a5ba14657c # Parent 95f1e1f0efa4541e253125e7f564ecfbf8e647f9 slicetype: allow queue to fill past full to prevent bottlenecks Allow the lookahead to grow just past full before we begin pulling off output frames and handing them to frame encoders. This lag of about one mini-gop allows slicetypeDecide to stay ahead of the frame encoders and always have frames in the output queue when they are needed. It's a non-trivial performance boost for most presets that used b-adapt 2. diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Tue Jan 06 12:33:36 2015 +0530 +++ b/source/encoder/encoder.cpp Tue Jan 06 15:38:58 2015 +0530 @@ -291,10 +291,7 @@ delete [] m_threadLocalData; if (m_lookahead) -{ -m_lookahead-destroy(); -delete m_lookahead; -} +m_lookahead-stop(); delete m_dpb; if (m_rateControl) @@ -302,10 +299,17 @@ m_rateControl-destroy(); delete m_rateControl; } + // thread pool release should always happen last if (m_threadPool) m_threadPool-release(); +if (m_lookahead) +{ +m_lookahead-destroy(); +delete m_lookahead; +} + X265_FREE(m_cuOffsetY); X265_FREE(m_cuOffsetC); X265_FREE(m_buOffsetY); diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cppTue Jan 06 12:33:36 2015 +0530 +++ b/source/encoder/slicetype.cppTue Jan 06 15:38:58 2015 +0530 @@ -59,11 +59,12 @@ : JobProvider(pool) , m_est(pool) { -m_bReady = 0; +m_bReady = false; +m_bBusy = false; m_param = param; m_lastKeyframe = -m_param-keyframeMax; m_lastNonB = NULL; -m_bFilling = true; +m_bFilled = false; m_bFlushed = false; m_widthInCU = ((m_param-sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; m_heightInCU = ((m_param-sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; @@ -79,17 +80,26 @@ ((m_param-bFrameAdaptive m_param-bframes) || m_param-rc.cuTree || m_param-scenecutThreshold || (m_param-lookaheadDepth m_param-rc.vbvBufferSize))) -m_pool = m_pool; /* allow use of worker thread */ +{ +JobProvider::enqueue(); +} else m_pool = NULL; /* disable use of worker thread */ } +void Lookahead::stop() +{ +/* do not allow slicetypeDecide() to get started again */ +m_bReady = false; +m_bFlushed = false; +m_bBusy = false; + +if (m_pool) +JobProvider::flush(); // flush will dequeue, if it is necessary +} + void Lookahead::destroy() { -if (m_pool) -// flush will dequeue, if it is necessary -JobProvider::flush(); - // these two queues will be empty unless the encode was aborted while (!m_inputQueue.empty()) { @@ -120,47 +130,52 @@ if (m_inputQueue.size() = m_param-lookaheadDepth) { -/* when queue fills the first time, run slicetypeDecide synchronously, - * since the encoder will always be blocked here */ -if (m_pool !m_bFilling) +if (m_pool) { +m_bReady = !m_bBusy; m_inputQueueLock.release(); -m_bReady = 1; m_pool-pokeIdleThread(); } else slicetypeDecide(); - -if (m_bFilling m_pool) -JobProvider::enqueue(); -m_bFilling = false; } else m_inputQueueLock.release(); + +/* determine if the lookahead is (over) filled enough for frames to begin to + * be consumed by frame encoders */ +if (!m_bFilled) +{ +if (!m_param-bframes !m_param-lookaheadDepth) +m_bFilled = true; /* zero-latency */ +else if (curFrame-m_poc = m_param-lookaheadDepth + 2 + m_param-bframes) +m_bFilled = true; /* full capacity plus mini-gop lag */ +} } /* Called by API thread */ void Lookahead::flush() { +m_bFilled = true; + /* just in case the input queue is never allowed to fill */ -m_bFilling = false; - -/* flush synchronously */ m_inputQueueLock.acquire(); -if (!m_inputQueue.empty()) +if (m_inputQueue.empty()) { -slicetypeDecide(); +
[x265] slicetype: fix flush
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1420711444 -32400 # Thu Jan 08 19:04:04 2015 +0900 # Node ID 25fb38350e81cda31a5e4af4f2814d12b968a8d1 # Parent 6dce2b87f0fe4aa37f9c7d66ec99447919b19c64 slicetype: fix flush diff -r 6dce2b87f0fe -r 25fb38350e81 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Thu Jan 08 10:29:09 2015 +0530 +++ b/source/encoder/slicetype.cpp Thu Jan 08 19:04:04 2015 +0900 @@ -66,6 +66,7 @@ m_lastNonB = NULL; m_bFilled = false; m_bFlushed = false; +m_bFlush = false; m_widthInCU = ((m_param-sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; m_heightInCU = ((m_param-sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; m_scratch = (int*)x265_malloc(m_widthInCU * sizeof(int)); @@ -92,6 +93,7 @@ /* do not allow slicetypeDecide() to get started again */ m_bReady = false; m_bFlushed = false; +m_bFlush = false; m_bBusy = false; if (m_pool) @@ -156,6 +158,7 @@ /* Called by API thread */ void Lookahead::flush() { +m_bFlush = true; m_bFilled = true; /* just in case the input queue is never allowed to fill */ @@ -233,7 +236,7 @@ break; } while (m_inputQueue.size() = m_param-lookaheadDepth || - (m_bFlushed m_inputQueue.size())); + (m_bFlush m_inputQueue.size())); m_bBusy = false; m_inputQueueLock.release(); diff -r 6dce2b87f0fe -r 25fb38350e81 source/encoder/slicetype.h --- a/source/encoder/slicetype.hThu Jan 08 10:29:09 2015 +0530 +++ b/source/encoder/slicetype.hThu Jan 08 19:04:04 2015 +0900 @@ -163,6 +163,7 @@ bool m_bBusy;/* input lock - slicetypeDecide() is running */ bool m_bFilled; /* enough frames in lookahead for output to be available */ bool m_bFlushed; /* no more frames will be received */ +bool m_bFlush; bool findJob(int); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] sao: minimize skipped lines [CHANGES OUTPUT]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1420337650 -32400 # Sun Jan 04 11:14:10 2015 +0900 # Node ID 78cf196b3982a327cd38a5f89fcc43fdb94fe5a5 # Parent f255e8d06423231cb8c58ab5d3b10de7fb27b424 sao: minimize skipped lines [CHANGES OUTPUT] diff -r f255e8d06423 -r 78cf196b3982 source/encoder/sao.cpp --- a/source/encoder/sao.cppFri Jan 02 18:22:38 2015 +0530 +++ b/source/encoder/sao.cppSun Jan 04 11:14:10 2015 +0900 @@ -605,8 +605,8 @@ int32_t* stats; int32_t* count; -int skipB = plane ? 2 : 4; -int skipR = plane ? 3 : 5; +int skipR, skipB; +int skipD = plane ? 1 : 3; int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; @@ -615,11 +615,9 @@ { const int boShift = X265_DEPTH - SAO_BO_BITS; -if (m_param-bSaoNonDeblocked) -{ -skipB = plane ? 1 : 3; -skipR = plane ? 2 : 4; -} +skipR = skipD; +skipB = skipD; + stats = m_offsetOrg[plane][SAO_BO]; count = m_count[plane][SAO_BO]; @@ -646,11 +644,9 @@ { // SAO_EO_0: // dir: - { -if (m_param-bSaoNonDeblocked) -{ -skipB = plane ? 1 : 3; -skipR = plane ? 3 : 5; -} +skipR = skipD + 1; +skipB = skipD; + stats = m_offsetOrg[plane][SAO_EO_0]; count = m_count[plane][SAO_EO_0]; @@ -679,11 +675,9 @@ // SAO_EO_1: // dir: | { -if (m_param-bSaoNonDeblocked) -{ -skipB = plane ? 2 : 4; -skipR = plane ? 2 : 4; -} +skipR = skipD; +skipB = skipD + 1; + stats = m_offsetOrg[plane][SAO_EO_1]; count = m_count[plane][SAO_EO_1]; @@ -726,11 +720,9 @@ // SAO_EO_2: // dir: 135 { -if (m_param-bSaoNonDeblocked) -{ -skipB = plane ? 2 : 4; -skipR = plane ? 3 : 5; -} +skipR = skipD + 1; +skipB = skipD + 1; + stats = m_offsetOrg[plane][SAO_EO_2]; count = m_count[plane][SAO_EO_2]; @@ -772,11 +764,9 @@ // SAO_EO_3: // dir: 45 { -if (m_param-bSaoNonDeblocked) -{ -skipB = plane ? 2 : 4; -skipR = plane ? 3 : 5; -} +skipR = skipD + 1; +skipB = skipD + 1; + stats = m_offsetOrg[plane][SAO_EO_3]; count = m_count[plane][SAO_EO_3]; @@ -846,7 +836,8 @@ int32_t* stats; int32_t* count; -int skipB, skipR; +int skipR, skipB; +int skipD = 3; int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; @@ -861,6 +852,7 @@ if (plane == 1) { stride = frame-m_reconPic-m_strideC; +skipD = 1; picWidth = m_hChromaShift; picHeight = m_vChromaShift; ctuWidth = m_hChromaShift; @@ -873,8 +865,8 @@ // SAO_BO: -skipB = plane ? 1 : 3; -skipR = plane ? 2 : 4; +skipR = skipD; +skipB = skipD; stats = m_offsetOrgPreDblk[addr][plane][SAO_BO]; count = m_countPreDblk[addr][plane][SAO_BO]; @@ -902,8 +894,8 @@ // SAO_EO_0: // dir: - { -skipB = plane ? 1 : 3; -skipR = plane ? 3 : 5; +skipR = skipD + 1; +skipB = skipD; stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_0]; count = m_countPreDblk[addr][plane][SAO_EO_0]; @@ -938,8 +930,8 @@ // SAO_EO_1: // dir: | { -skipB = plane ? 2 : 4; -skipR = plane ? 2 : 4; +skipR = skipD; +skipB = skipD + 1; stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_1]; count = m_countPreDblk[addr][plane][SAO_EO_1]; @@ -983,8 +975,8 @@ // SAO_EO_2: // dir: 135 { -skipB = plane ? 2 : 4; -skipR = plane ? 3 : 5; +skipR = skipD + 1; +skipB = skipD + 1; stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_2]; count = m_countPreDblk[addr][plane][SAO_EO_2]; @@ -1035,8 +1027,8 @@ // SAO_EO_3: // dir: 45 { -skipB = plane ? 2 : 4; -skipR = plane ? 3 : 5; +skipR = skipD + 1; +skipB = skipD + 1; stats = m_offsetOrgPreDblk[addr][plane][SAO_EO_3]; count = m_countPreDblk[addr][plane][SAO_EO_3]; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix weightCost() [CHANGES OUTPUT]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1420511389 -32400 # Tue Jan 06 11:29:49 2015 +0900 # Node ID a260403b0d21cd2948fb2546997269c102249369 # Parent f255e8d06423231cb8c58ab5d3b10de7fb27b424 fix weightCost() [CHANGES OUTPUT] diff -r f255e8d06423 -r a260403b0d21 source/encoder/weightPrediction.cpp --- a/source/encoder/weightPrediction.cpp Fri Jan 02 18:22:38 2015 +0530 +++ b/source/encoder/weightPrediction.cpp Tue Jan 06 11:29:49 2015 +0900 @@ -193,9 +193,9 @@ if (bLuma) { int cu = 0; -for (int y = 8; y height; y += 8, r += 8 * stride, f += 8 * stride) +for (int y = 0; y height; y += 8, r += 8 * stride, f += 8 * stride) { -for (int x = 8; x width; x += 8, cu++) +for (int x = 0; x width; x += 8, cu++) { int cmp = primitives.satd[LUMA_8x8](r + x, stride, f + x, stride); cost += X265_MIN(cmp, cache.intraCost[cu]); @@ -203,12 +203,12 @@ } } else if (cache.csp == X265_CSP_I444) -for (int y = 16; y height; y += 16, r += 16 * stride, f += 16 * stride) -for (int x = 16; x width; x += 16) +for (int y = 0; y height; y += 16, r += 16 * stride, f += 16 * stride) +for (int x = 0; x width; x += 16) cost += primitives.satd[LUMA_16x16](r + x, stride, f + x, stride); else -for (int y = 8; y height; y += 8, r += 8 * stride, f += 8 * stride) -for (int x = 8; x width; x += 8) +for (int y = 0; y height; y += 8, r += 8 * stride, f += 8 * stride) +for (int x = 0; x width; x += 8) cost += primitives.satd[LUMA_8x8](r + x, stride, f + x, stride); return cost; @@ -381,9 +381,9 @@ break; case 2: -fref = refFrame-m_fencPic-m_picOrg[2]; orig = fencPic-m_picOrg[2]; stride = fencPic-m_strideC; +fref = refFrame-m_fencPic-m_picOrg[2]; width = ((fencPic-m_picWidth 4) 4) cache.hshift; height = ((fencPic-m_picHeight 4) 4) cache.vshift; if (mvs) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] refine intra neighbors
code maintainability may be improved. # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1419480956 -32400 # Thu Dec 25 13:15:56 2014 +0900 # Node ID d400c836b3796e68bb08538a5c20f16f8966ee18 # Parent 5f9f7194267b76f733e9ffb0f9e8b474dfe89a71 refine intra neighbors diff -r 5f9f7194267b -r d400c836b379 source/common/common.h --- a/source/common/common.hTue Dec 23 17:40:53 2014 +0900 +++ b/source/common/common.hThu Dec 25 13:15:56 2014 +0900 @@ -163,6 +163,9 @@ templatetypename T inline T x265_max(T a, T b) { return a b ? a : b; } +templatetypename T +inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); } + typedef int16_t coeff_t; // transform coefficient #define X265_MIN(a, b) ((a) (b) ? (a) : (b)) diff -r 5f9f7194267b -r d400c836b379 source/common/cudata.cpp --- a/source/common/cudata.cpp Tue Dec 23 17:40:53 2014 +0900 +++ b/source/common/cudata.cpp Thu Dec 25 13:15:56 2014 +0900 @@ -608,7 +608,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]; if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) return m_encData-getPicCTU(m_cuAddr); @@ -689,8 +689,6 @@ return NULL; } blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1]; -if (!m_cuLeft || !m_cuLeft-m_slice) -return NULL; return m_cuLeft; } @@ -723,8 +721,6 @@ return NULL; } arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset]; -if (!m_cuAbove || !m_cuAbove-m_slice) -return NULL; return m_cuAbove; } @@ -732,8 +728,6 @@ return NULL; arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1]; -if ((m_cuAboveRight == NULL || m_cuAboveRight-m_slice == NULL || (m_cuAboveRight-m_cuAddr) m_cuAddr)) -return NULL; return m_cuAboveRight; } @@ -904,7 +898,7 @@ tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize; tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize; -tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1])); +tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag)); } void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const @@ -916,7 +910,7 @@ tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize; tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize; -tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1])); +tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag)); } uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const @@ -1363,14 +1357,6 @@ return outPartIdxRB; } -void CUData::deriveLeftRightTopIdxAdi(uint32_t outPartIdxLT, uint32_t outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const -{ -uint32_t numPartInWidth = 1 (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth); - -outPartIdxLT = m_absIdxInCTU + partOffset; -outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1]; -} - bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData candCU, uint32_t candAbsPartIdx) const { if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) diff -r 5f9f7194267b -r d400c836b379 source/common/cudata.h --- a/source/common/cudata.hTue Dec 23 17:40:53 2014 +0900 +++ b/source/common/cudata.hThu Dec 25 13:15:56 2014 +0900 @@ -212,7 +212,6 @@ void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const; int getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const; -void deriveLeftRightTopIdxAdi(uint32_t partIdxLT, uint32_t partIdxRT, uint32_t partOffset, uint32_t partDepth) const; uint32_t getSCUAddr() const { return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInCTU; } uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const; diff -r 5f9f7194267b -r d400c836b379 source/common/predict.cpp --- a/source/common/predict.cpp Tue Dec 23 17:40:53 2014 +0900 +++ b/source/common/predict.cpp Thu Dec 25 13:15:56 2014 +0900 @@ -654,11 +654,8 @@ } } -void Predict::initAdiPattern(const CUData
[x265] rdcost: unify scaleChromaDist*()
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1419324053 -32400 # Tue Dec 23 17:40:53 2014 +0900 # Node ID 36bde0fab6510684879e6ad996ab7d5acab86a5e # Parent 9fdab427a1918939293539f07b49ce77c5104912 rdcost: unify scaleChromaDist*() diff -r 9fdab427a191 -r 36bde0fab651 source/encoder/rdcost.h --- a/source/encoder/rdcost.h Tue Dec 23 12:17:08 2014 +0530 +++ b/source/encoder/rdcost.h Tue Dec 23 17:40:53 2014 +0900 @@ -37,15 +37,12 @@ /* all weights and factors stored as FIX8 */ uint64_t m_lambda2; uint64_t m_lambda; -uint64_t m_cbDistortionWeight; -uint64_t m_crDistortionWeight; +uint32_t m_chromaDistWeight[2]; uint32_t m_psyRdBase; uint32_t m_psyRd; int m_qp; void setPsyRdScale(double scale){ m_psyRdBase = (uint32_t)floor(256.0 * scale * 0.33); } -void setCbDistortionWeight(uint16_t weightFix8) { m_cbDistortionWeight = weightFix8; } -void setCrDistortionWeight(uint16_t weightFix8) { m_crDistortionWeight = weightFix8; } void setQP(const Slice slice, int qp) { @@ -62,7 +59,7 @@ qpCb = X265_MIN(qp + slice.m_pps-chromaQpOffset[0], QP_MAX_SPEC); int chroma_offset_idx = X265_MIN(qp - qpCb + 12, MAX_CHROMA_LAMBDA_OFFSET); uint16_t lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256; -setCbDistortionWeight(lambdaOffset); +m_chromaDistWeight[0] = lambdaOffset; if (slice.m_sps-chromaFormatIdc == X265_CSP_I420) qpCr = Clip3(QP_MIN, QP_MAX_MAX, (int)g_chromaScale[qp + slice.m_pps-chromaQpOffset[0]]); @@ -70,7 +67,7 @@ qpCr = X265_MIN(qp + slice.m_pps-chromaQpOffset[0], QP_MAX_SPEC); chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET); lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256; -setCrDistortionWeight(lambdaOffset); +m_chromaDistWeight[1] = lambdaOffset; } void setLambda(double lambda2, double lambda) @@ -82,7 +79,7 @@ inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits) const { X265_CHECK(bits = (UINT64_MAX - 128) / m_lambda2, - calcRdCost wrap detected dist: %d, bits %d, lambda: %d\n, distortion, bits, (int)m_lambda2); + calcRdCost wrap detected dist: %u, bits %u, lambda: X265_LL\n, distortion, bits, m_lambda2); return distortion + ((bits * m_lambda2 + 128) 8); } @@ -107,22 +104,15 @@ inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const { X265_CHECK(bits = (UINT64_MAX - 128) / m_lambda, - calcRdSADCost wrap detected dist: %d, bits %d, lambda: X265_LL\n, sadCost, bits, m_lambda); + calcRdSADCost wrap detected dist: %u, bits %u, lambda: X265_LL\n, sadCost, bits, m_lambda); return sadCost + ((bits * m_lambda + 128) 8); } -inline uint32_t scaleChromaDistCb(uint32_t dist) const +inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const { -X265_CHECK(dist = (UINT64_MAX - 128) / m_cbDistortionWeight, - scaleChromaDistCb wrap detected dist: %d, lambda: X265_LL\n, dist, m_cbDistortionWeight); -return (uint32_t)(((dist * m_cbDistortionWeight) + 128) 8); -} - -inline uint32_t scaleChromaDistCr(uint32_t dist) const -{ -X265_CHECK(dist = (UINT64_MAX - 128) / m_crDistortionWeight, - scaleChromaDistCr wrap detected dist: %d, lambda: X265_LL\n, dist, m_crDistortionWeight); -return (uint32_t)(((dist * m_crDistortionWeight) + 128) 8); +X265_CHECK(dist = (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1], + scaleChromaDist wrap detected dist: %u, lambda: %u\n, dist, m_chromaDistWeight[plane - 1]); +return (uint32_t)((dist * (uint64_t)m_chromaDistWeight[plane - 1] + 128) 8); } inline uint32_t getCost(uint32_t bits) const diff -r 9fdab427a191 -r 36bde0fab651 source/encoder/search.cpp --- a/source/encoder/search.cpp Tue Dec 23 12:17:08 2014 +0530 +++ b/source/encoder/search.cpp Tue Dec 23 17:40:53 2014 +0900 @@ -813,7 +813,6 @@ primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride); uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false); -uint32_t tmpDist; if (numSig) { m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig); @@ -827,8 +826,7 @@ cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep); } -tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, fenc, stride); -outDist += (ttype == TEXT_CHROMA_U) ? m_rdCost.scaleChromaDistCb(tmpDist) : m_rdCost.scaleChromaDistCr
[x265] refine intra neighbors
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1419313799 -32400 # Tue Dec 23 14:49:59 2014 +0900 # Node ID 6b59452a17d75c42c1750d47e2318c8da80c39fb # Parent 8d2f418829c894c25da79daa861f16c61e5060d7 refine intra neighbors diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/common.h --- a/source/common/common.hSat Dec 20 21:27:14 2014 +0900 +++ b/source/common/common.hTue Dec 23 14:49:59 2014 +0900 @@ -163,6 +163,9 @@ templatetypename T inline T x265_max(T a, T b) { return a b ? a : b; } +templatetypename T +inline T x265_clip3(T minVal, T maxVal, T a) { return x265_min(x265_max(minVal, a), maxVal); } + typedef int16_t coeff_t; // transform coefficient #define X265_MIN(a, b) ((a) (b) ? (a) : (b)) diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.cpp --- a/source/common/cudata.cpp Sat Dec 20 21:27:14 2014 +0900 +++ b/source/common/cudata.cpp Tue Dec 23 14:49:59 2014 +0900 @@ -608,7 +608,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - s_numPartInCUSize + 1]; if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, s_numPartInCUSize)) return m_encData-getPicCTU(m_cuAddr); @@ -689,8 +689,6 @@ return NULL; } blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + partUnitOffset) * s_numPartInCUSize - 1]; -if (!m_cuLeft || !m_cuLeft-m_slice) -return NULL; return m_cuLeft; } @@ -723,8 +721,6 @@ return NULL; } arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset]; -if (!m_cuAbove || !m_cuAbove-m_slice) -return NULL; return m_cuAbove; } @@ -732,8 +728,6 @@ return NULL; arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize + partUnitOffset - 1]; -if ((m_cuAboveRight == NULL || m_cuAboveRight-m_slice == NULL || (m_cuAboveRight-m_cuAddr) m_cuAddr)) -return NULL; return m_cuAboveRight; } @@ -904,7 +898,7 @@ tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize; tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize; -tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag), tuDepthRange[1])); +tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice-m_sps-quadtreeTUMaxDepthIntra - 1 + splitFlag)); } void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const @@ -916,7 +910,7 @@ tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize; tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize; -tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1])); +tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag)); } uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const @@ -1363,14 +1357,6 @@ return outPartIdxRB; } -void CUData::deriveLeftRightTopIdxAdi(uint32_t outPartIdxLT, uint32_t outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const -{ -uint32_t numPartInWidth = 1 (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth); - -outPartIdxLT = m_absIdxInCTU + partOffset; -outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1]; -} - bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData candCU, uint32_t candAbsPartIdx) const { if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.h --- a/source/common/cudata.hSat Dec 20 21:27:14 2014 +0900 +++ b/source/common/cudata.hTue Dec 23 14:49:59 2014 +0900 @@ -212,7 +212,6 @@ void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const; int getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const; -void deriveLeftRightTopIdxAdi(uint32_t partIdxLT, uint32_t partIdxRT, uint32_t partOffset, uint32_t partDepth) const; uint32_t getSCUAddr() const { return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInCTU; } uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const; diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.cpp --- a/source/common/predict.cpp Sat Dec 20 21:27:14 2014 +0900 +++ b/source/common/predict.cpp Tue Dec 23 14:49:59 2014 +0900 @@ -654,11 +654,8 @@ } } -void Predict::initAdiPattern(const CUData cu, const CUGeom cuGeom, uint32_t
[x265] (no subject)
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1419078434 -32400 # Sat Dec 20 21:27:14 2014 +0900 # Node ID 2894938c4de707ae69f8ae560bee2b3c323fd357 # Parent 78ae7996a1ceb60d24cff790cc2fa233d4c31435 fix 4:4:4 rd=1 diff -r 78ae7996a1ce -r 2894938c4de7 source/encoder/search.cpp --- a/source/encoder/search.cpp Wed Dec 17 14:31:50 2014 -0600 +++ b/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900 @@ -1591,17 +1591,19 @@ uint32_t log2TrSizeC = cu.m_log2CUSize[0] - m_hChromaShift; uint32_t tuSize = 1 log2TrSizeC; int32_t scaleTuSize = tuSize; +uint32_t tuDepth = 0; int32_t costShift = 0; if (tuSize 32) { scaleTuSize = 32; +tuDepth = 1; costShift = 2; log2TrSizeC = 5; } -Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 1); -Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 2); +Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1); +Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2); cu.getAllowedChromaDir(0, modeList); // check chroma modes ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix 4:4:4 rd=1
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1419078434 -32400 # Sat Dec 20 21:27:14 2014 +0900 # Node ID 2894938c4de707ae69f8ae560bee2b3c323fd357 # Parent 78ae7996a1ceb60d24cff790cc2fa233d4c31435 fix 4:4:4 rd=1 diff -r 78ae7996a1ce -r 2894938c4de7 source/encoder/search.cpp --- a/source/encoder/search.cpp Wed Dec 17 14:31:50 2014 -0600 +++ b/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900 @@ -1591,17 +1591,19 @@ uint32_t log2TrSizeC = cu.m_log2CUSize[0] - m_hChromaShift; uint32_t tuSize = 1 log2TrSizeC; int32_t scaleTuSize = tuSize; +uint32_t tuDepth = 0; int32_t costShift = 0; if (tuSize 32) { scaleTuSize = 32; +tuDepth = 1; costShift = 2; log2TrSizeC = 5; } -Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 1); -Predict::initAdiPatternChroma(cu, cuGeom, 0, 0, 2); +Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1); +Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2); cu.getAllowedChromaDir(0, modeList); // check chroma modes ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix for old gcc
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1416475509 -32400 # Thu Nov 20 18:25:09 2014 +0900 # Node ID c3a72e736de53af55fba25a5a5ba2da27722669f # Parent 3649fabf90d348c51d7e155989d1bf629ec27f6e fix for old gcc diff -r 3649fabf90d3 -r c3a72e736de5 source/common/pixel.cpp --- a/source/common/pixel.cpp Thu Nov 20 14:27:53 2014 +0530 +++ b/source/common/pixel.cpp Thu Nov 20 18:25:09 2014 +0900 @@ -175,7 +175,7 @@ } templateint lx, int ly, class T1, class T2 -int sse(T1* pix1, intptr_t stride_pix1, T2* pix2, intptr_t stride_pix2) +int sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2) { int sum = 0; int iTemp; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] replace char to int8_t, where it should be signed char
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1416450633 -32400 # Thu Nov 20 11:30:33 2014 +0900 # Node ID 46ae5bd20c8c317b8f71fbce0d7ad6bd6b8bba21 # Parent d059cfa88f1ac79b319bd8a05bc70704d454f0ba replace char to int8_t, where it should be signed char diff -r d059cfa88f1a -r 46ae5bd20c8c source/common/cudata.cpp --- a/source/common/cudata.cpp Tue Nov 18 14:11:12 2014 -0600 +++ b/source/common/cudata.cpp Thu Nov 20 11:30:33 2014 +0900 @@ -227,12 +227,12 @@ /* Each CU's data is layed out sequentially within the charMemBlock */ uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance; -m_qp = (char*)charBuf; charBuf += m_numPartitions; +m_qp= (int8_t*)charBuf; charBuf += m_numPartitions; m_log2CUSize = charBuf; charBuf += m_numPartitions; m_lumaIntraDir = charBuf; charBuf += m_numPartitions; m_tqBypass = charBuf; charBuf += m_numPartitions; -m_refIdx[0] = (char*)charBuf; charBuf += m_numPartitions; -m_refIdx[1] = (char*)charBuf; charBuf += m_numPartitions; +m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; +m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; m_cuDepth= charBuf; charBuf += m_numPartitions; m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ m_partSize = charBuf; charBuf += m_numPartitions; @@ -772,7 +772,7 @@ } /* Get reference QP from left QpMinCu or latest coded QP */ -char CUData::getRefQP(uint32_t curAbsIdxInCTU) const +int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const { uint32_t lPartIdx = 0, aPartIdx = 0; const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU); @@ -794,7 +794,7 @@ return lastValidPartIdx; } -char CUData::getLastCodedQP(uint32_t absPartIdx) const +int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const { uint32_t quPartIdxMask = 0xFF (g_maxFullDepth - m_slice-m_pps-maxCuDQPDepth) * 2; int lastValidPartIdx = getLastValidPartIdx(absPartIdx quPartIdxMask); @@ -808,7 +808,7 @@ else if (m_cuAddr 0 !(m_slice-m_pps-bEntropyCodingSyncEnabled !(m_cuAddr % m_slice-m_sps-numCuInWidth))) return m_encData-getPicCTU(m_cuAddr - 1)-getLastCodedQP(NUM_CU_PARTITIONS); else -return (char)m_slice-m_sliceQp; +return (int8_t)m_slice-m_sliceQp; } } @@ -936,7 +936,7 @@ return ctx; } -bool CUData::setQPSubCUs(char qp, uint32_t absPartIdx, uint32_t depth) +bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth) { uint32_t curPartNumb = NUM_CU_PARTITIONS (depth 1); uint32_t curPartNumQ = curPartNumb 2; @@ -1211,7 +1211,7 @@ setAllPU(m_mv[list], mv, absPartIdx, puIdx); } -void CUData::setPURefIdx(int list, char refIdx, int absPartIdx, int puIdx) +void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx) { setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx); } diff -r d059cfa88f1a -r 46ae5bd20c8c source/common/cudata.h --- a/source/common/cudata.hTue Nov 18 14:11:12 2014 -0600 +++ b/source/common/cudata.hThu Nov 20 11:30:33 2014 +0900 @@ -127,11 +127,11 @@ int m_vChromaShift; /* Per-part data, stored contiguously */ -char* m_qp; // array of QP values +int8_t* m_qp; // array of QP values uint8_t* m_log2CUSize; // array of cu log2Size TODO: seems redundant to depth uint8_t* m_lumaIntraDir; // array of intra directions (luma) uint8_t* m_tqBypass; // array of CU lossless flags -char* m_refIdx[2];// array of motion reference indices per list +int8_t* m_refIdx[2];// array of motion reference indices per list uint8_t* m_cuDepth; // array of depths uint8_t* m_predMode; // array of prediction modes uint8_t* m_partSize; // array of partition sizes @@ -177,7 +177,7 @@ void clearCbf(){ m_partSet(m_cbf[0], 0); m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0); } /* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */ -void setQPSubParts(char qp, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); } +void setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); } void setTUDepthSubParts(uint8_t tuDepth, uint32_t absPartIdx, uint32_t depth) { s_partSet[depth](m_tuDepth + absPartIdx, tuDepth); } void setLumaIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth){ s_partSet
[x265] fseeko for mingw32
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1416379165 -32400 # Wed Nov 19 15:39:25 2014 +0900 # Node ID 591547ce9293eef8bfe68a8687e81c5aa1650e2a # Parent d059cfa88f1ac79b319bd8a05bc70704d454f0ba fseeko for mingw32 diff -r d059cfa88f1a -r 591547ce9293 source/common/common.h --- a/source/common/common.hTue Nov 18 14:11:12 2014 -0600 +++ b/source/common/common.hWed Nov 19 15:39:25 2014 +0900 @@ -56,6 +56,10 @@ #define x265_stack_align(func, ...) func(__VA_ARGS__) #endif +#if defined(__MINGW32__) +#define fseeko fseeko64 +#endif + #elif defined(_MSC_VER) #define ALIGN_VAR_8(T, var) __declspec(align(8)) T var ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] modify MV default constructor to do nothing
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1416221075 -32400 # Mon Nov 17 19:44:35 2014 +0900 # Node ID 90ec907326e25ae40b7dc38130cf81874d201ad2 # Parent 27d36c4b4a27d2872430c6a6fc538fbddcf791e6 modify MV default constructor to do nothing diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/cudata.cpp --- a/source/common/cudata.cpp Mon Nov 17 01:30:26 2014 +0530 +++ b/source/common/cudata.cpp Mon Nov 17 19:44:35 2014 +0900 @@ -1237,7 +1237,7 @@ else { // OUT OF BOUNDARY -outMvField.mv.word = 0; +outMvField.mv = 0; outMvField.refIdx = REF_NOT_VALID; } } @@ -1399,6 +1399,8 @@ for (uint32_t i = 0; i maxNumMergeCand; ++i) { +mvFieldNeighbours[i][0].mv = 0; +mvFieldNeighbours[i][1].mv = 0; mvFieldNeighbours[i][0].refIdx = REF_NOT_VALID; mvFieldNeighbours[i][1].refIdx = REF_NOT_VALID; } @@ -1646,7 +1648,7 @@ while (count maxNumMergeCand) { interDirNeighbours[count] = 1; -mvFieldNeighbours[count][0].mv.word = 0; +mvFieldNeighbours[count][0].mv = 0; mvFieldNeighbours[count][0].refIdx = r; if (isInterB) diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/lowres.h --- a/source/common/lowres.hMon Nov 17 01:30:26 2014 +0530 +++ b/source/common/lowres.hMon Nov 17 19:44:35 2014 +0900 @@ -56,11 +56,10 @@ { int hpelA = (qmv.y 2) | ((qmv.x 2) 1); pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x 2) + (qmv.y 2) * lumaStride; - -MV qmvB = qmv + MV((qmv.x 1) * 2, (qmv.y 1) * 2); -int hpelB = (qmvB.y 2) | ((qmvB.x 2) 1); - -pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x 2) + (qmvB.y 2) * lumaStride; +int qmvx = qmv.x + (qmv.x 1); +int qmvy = qmv.y + (qmv.y 1); +int hpelB = (qmvy 2) | ((qmvx 2) 1); +pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx 2) + (qmvy 2) * lumaStride; primitives.pixelavg_pp[LUMA_8x8](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32); return buf; } @@ -79,9 +78,10 @@ ALIGN_VAR_16(pixel, subpelbuf[8 * 8]); int hpelA = (qmv.y 2) | ((qmv.x 2) 1); pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x 2) + (qmv.y 2) * lumaStride; -MV qmvB = qmv + MV((qmv.x 1) * 2, (qmv.y 1) * 2); -int hpelB = (qmvB.y 2) | ((qmvB.x 2) 1); -pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvB.x 2) + (qmvB.y 2) * lumaStride; +int qmvx = qmv.x + (qmv.x 1); +int qmvy = qmv.y + (qmv.y 1); +int hpelB = (qmvy 2) | ((qmvx 2) 1); +pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx 2) + (qmvy 2) * lumaStride; primitives.pixelavg_pp[LUMA_8x8](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32); return comp(fenc, FENC_STRIDE, subpelbuf, 8); } diff -r 27d36c4b4a27 -r 90ec907326e2 source/common/mv.h --- a/source/common/mv.hMon Nov 17 01:30:26 2014 +0530 +++ b/source/common/mv.hMon Nov 17 19:44:35 2014 +0900 @@ -44,19 +44,19 @@ int32_t word; }; -MV() : word(0) {} - +MV() {} +MV(int32_t w) : word(w){} MV(int16_t _x, int16_t _y) : x(_x), y(_y) {} -const MV operator =(uint32_t w) { word = w; return *this; } +MV operator =(uint32_t w) { word = w; return *this; } -const MV operator +=(const MV other) { x += other.x; y += other.y; return *this; } +MV operator +=(const MV other) { x += other.x; y += other.y; return *this; } -const MV operator -=(const MV other) { x -= other.x; y -= other.y; return *this; } +MV operator -=(const MV other) { x -= other.x; y -= other.y; return *this; } -const MV operator =(int i) { x = i; y = i; return *this; } +MV operator =(int i){ x = i; y = i; return *this; } -const MV operator =(int i) { x = i; y = i; return *this; } +MV operator =(int i){ x = i; y = i; return *this; } MV operator (int i) const{ return MV(x i, y i); } @@ -64,16 +64,18 @@ MV operator *(int16_t i) const { return MV(x * i, y * i); } -const MV operator -(const MV other) const { return MV(x - other.x, y - other.y); } +MV operator -(const MV other) const { return MV(x - other.x, y - other.y); } -const MV operator +(const MV other) const { return MV(x + other.x, y + other.y); } +MV operator +(const MV other) const { return MV(x + other.x, y + other.y); } bool operator ==(const MV other) const{ return word == other.word; } bool operator !=(const MV
[x265] analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415871635 -32400 # Thu Nov 13 18:40:35 2014 +0900 # Node ID cc70f51c5b6dd6009c5f2b9876c9fc8108c75c62 # Parent 18aefbde72ab2dfaa0d4edeea7fd0ab4f9a09f9c analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT] diff -r 18aefbde72ab -r cc70f51c5b6d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Wed Nov 12 17:17:56 2014 -0600 +++ b/source/encoder/analysis.cpp Thu Nov 13 18:40:35 2014 +0900 @@ -762,7 +762,7 @@ checkBestMode(*splitPred, depth); } -if (!depth || md.bestMode-cu.isInter(0)) +if (mightNotSplit (!depth || md.bestMode-cu.isInter(0))) { /* early-out statistics */ FrameData curEncData = const_castFrameData(*m_frame-m_encData); @@ -1044,7 +1044,7 @@ md.bestMode = splitPred; } -if (!depth || md.bestMode-cu.isInter(0)) +if (mightNotSplit (!depth || md.bestMode-cu.isInter(0))) { /* early-out statistics */ FrameData curEncData = const_castFrameData(*m_frame-m_encData); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT]
-Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Friday, November 14, 2014 2:39 AM To: Development for x265 Subject: Re: [x265] analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT] On 11/13, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415871635 -32400 # Thu Nov 13 18:40:35 2014 +0900 # Node ID cc70f51c5b6dd6009c5f2b9876c9fc8108c75c62 # Parent 18aefbde72ab2dfaa0d4edeea7fd0ab4f9a09f9c analysis: don't add the cost of picture boundary CU to avgCost [CHANGES OUTPUT] it took me a while to understand what you are doing here. diff -r 18aefbde72ab -r cc70f51c5b6d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Wed Nov 12 17:17:56 2014 -0600 +++ b/source/encoder/analysis.cpp Thu Nov 13 18:40:35 2014 +0900 @@ -762,7 +762,7 @@ checkBestMode(*splitPred, depth); } -if (!depth || md.bestMode-cu.isInter(0)) +if (mightNotSplit (!depth || md.bestMode-cu.isInter(0))) if mightNotSplit is false, then a split was forced by a picture edge (the current depth CU is too large) and so this CU is never coded. ok, it makes sense to not count the cost of this partially coded splitCU against the average cost at this depth Although looking at this code again it's not clear why depth 0 has a special exemption. I wonder if it would be better as: if (md.bestMode != md.pred[PRED_SPLIT] md.bestMode-cu.isInter(0)) { .. } So it only counts costs at the level they were CU coded, and this would implicitly handle the forced splits at picture edges. In non-partial case, it seems better to take into account the split cost (sum of 4 sub-CU if better than 1 CU). Also, inter check may be not needed, (intra is better than inter). So, it would be better simply if (mightNotSplit) { } { /* early-out statistics */ FrameData curEncData = const_castFrameData(*m_frame-m_encData); @@ -1044,7 +1044,7 @@ md.bestMode = splitPred; } -if (!depth || md.bestMode-cu.isInter(0)) +if (mightNotSplit (!depth || md.bestMode-cu.isInter(0))) { /* early-out statistics */ FrameData curEncData = const_castFrameData(*m_frame-m_encData); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] rdcost: weight chroma lambda for rdo [CHANGES OUTPUT]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415930754 -32400 # Fri Nov 14 11:05:54 2014 +0900 # Node ID 269376f3e1d8e4942f504303909516aca9a0ba75 # Parent 17f2fb0996db9b761f13953408d810608e24397b rdcost: weight chroma lambda for rdo [CHANGES OUTPUT] lambdas for rdoq, psy-rdo and psy-rdoq are weighted. why lambda for rdo is not weighted? diff -r 17f2fb0996db -r 269376f3e1d8 source/encoder/rdcost.h --- a/source/encoder/rdcost.h Thu Nov 13 17:16:07 2014 -0600 +++ b/source/encoder/rdcost.h Fri Nov 14 11:05:54 2014 +0900 @@ -59,12 +59,12 @@ int qpCb = Clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps-chromaQpOffset[0]); int chroma_offset_idx = X265_MIN(qp - qpCb + 12, MAX_CHROMA_LAMBDA_OFFSET); -uint16_t lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256; +uint16_t lambdaOffset = x265_chroma_lambda2_offset_tab[chroma_offset_idx]; setCbDistortionWeight(lambdaOffset); int qpCr = Clip3(QP_MIN, QP_MAX_MAX, qp + slice.m_pps-chromaQpOffset[1]); chroma_offset_idx = X265_MIN(qp - qpCr + 12, MAX_CHROMA_LAMBDA_OFFSET); -lambdaOffset = m_psyRd ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256; +lambdaOffset = x265_chroma_lambda2_offset_tab[chroma_offset_idx]; setCrDistortionWeight(lambdaOffset); } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] nits
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415863775 -32400 # Thu Nov 13 16:29:35 2014 +0900 # Node ID 49141f28397dd294bb6e590dad2bb8d2b01bf97b # Parent 18aefbde72ab2dfaa0d4edeea7fd0ab4f9a09f9c nits diff -r 18aefbde72ab -r 49141f28397d source/common/cudata.cpp --- a/source/common/cudata.cpp Wed Nov 12 17:17:56 2014 -0600 +++ b/source/common/cudata.cpp Thu Nov 13 16:29:35 2014 +0900 @@ -1953,8 +1953,8 @@ bool CUData::getColMVP(MV outMV, int outRefIdx, int picList, int cuAddr, int partUnitIdx) const { -Frame *colPic = m_slice-m_refPicList[m_slice-isInterB() ? 1 - m_slice-m_colFromL0Flag : 0][m_slice-m_colRefIdx]; -CUData *colCU = colPic-m_encData-getPicCTU(cuAddr); +const Frame* colPic = m_slice-m_refPicList[m_slice-isInterB() !m_slice-m_colFromL0Flag][m_slice-m_colRefIdx]; +const CUData* colCU = colPic-m_encData-getPicCTU(cuAddr); if (colCU-m_predMode[partUnitIdx] == MODE_NONE) return false; diff -r 18aefbde72ab -r 49141f28397d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Wed Nov 12 17:17:56 2014 -0600 +++ b/source/encoder/analysis.cpp Thu Nov 13 16:29:35 2014 +0900 @@ -138,7 +138,7 @@ if (m_param-analysisMode == X265_ANALYSIS_SAVE m_frame-m_intraData) { -CUData *bestCU = m_modeDepth[0].bestMode-cu; +const CUData* bestCU = m_modeDepth[0].bestMode-cu; memcpy(m_frame-m_intraData-depth[ctu.m_cuAddr * numPartition], bestCU-m_cuDepth, sizeof(uint8_t) * numPartition); memcpy(m_frame-m_intraData-modes[ctu.m_cuAddr * numPartition], bestCU-m_lumaIntraDir, sizeof(uint8_t) * numPartition); memcpy(m_frame-m_intraData-partSizes[ctu.m_cuAddr * numPartition], bestCU-m_partSize, sizeof(uint8_t) * numPartition); @@ -268,23 +268,23 @@ for (uint32_t subPartIdx = 0; subPartIdx 4; subPartIdx++) { -const CUGeom childCuData = *(cuGeom + cuGeom.childOffset + subPartIdx); -if (childCuData.flags CUGeom::PRESENT) +const CUGeom childGeom = *(cuGeom + cuGeom.childOffset + subPartIdx); +if (childGeom.flags CUGeom::PRESENT) { -m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childCuData.encodeIdx); +m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx); m_rqt[nextDepth].cur.load(*nextContext); -compressIntraCU(parentCTU, childCuData, shared, zOrder); +compressIntraCU(parentCTU, childGeom, shared, zOrder); // Save best CU and pred data for this sub CU -splitCU-copyPartFrom(nd.bestMode-cu, childCuData, subPartIdx); +splitCU-copyPartFrom(nd.bestMode-cu, childGeom, subPartIdx); splitPred-addSubCosts(*nd.bestMode); -nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, childCuData.numPartitions * subPartIdx); +nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, childGeom.numPartitions * subPartIdx); nextContext = nd.bestMode-contexts; } else { /* record the depth of this non-present sub-CU */ -splitCU-setEmptyPart(childCuData, subPartIdx); +splitCU-setEmptyPart(childGeom, subPartIdx); zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth]; } } @@ -735,22 +735,22 @@ for (uint32_t subPartIdx = 0; subPartIdx 4; subPartIdx++) { -const CUGeom childCuData = *(cuGeom + cuGeom.childOffset + subPartIdx); -if (childCuData.flags CUGeom::PRESENT) +const CUGeom childGeom = *(cuGeom + cuGeom.childOffset + subPartIdx); +if (childGeom.flags CUGeom::PRESENT) { -m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childCuData.encodeIdx); +m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx); m_rqt[nextDepth].cur.load(*nextContext); -compressInterCU_dist(parentCTU, childCuData); +compressInterCU_dist(parentCTU, childGeom); // Save best CU and pred data for this sub CU -splitCU-copyPartFrom(nd.bestMode-cu, childCuData, subPartIdx); +splitCU-copyPartFrom(nd.bestMode-cu, childGeom, subPartIdx); splitPred-addSubCosts(*nd.bestMode); -nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, childCuData.numPartitions * subPartIdx); +nd.bestMode-reconYuv.copyToPartYuv(splitPred-reconYuv, childGeom.numPartitions * subPartIdx); nextContext = nd.bestMode-contexts; } else -splitCU-setEmptyPart(childCuData, subPartIdx); +splitCU-setEmptyPart(childGeom, subPartIdx
[x265] refine initializeGeoms()
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415701819 -32400 # Tue Nov 11 19:30:19 2014 +0900 # Node ID 5638df706f0833bd211c73612ba0d4403c813d9e # Parent 32513a4c3bd435757347e729dc14b5a1c1c6ceef refine initializeGeoms() diff -r 32513a4c3bd4 -r 5638df706f08 source/common/cudata.cpp --- a/source/common/cudata.cpp Mon Nov 10 12:39:54 2014 +0900 +++ b/source/common/cudata.cpp Tue Nov 11 19:30:19 2014 +0900 @@ -2078,7 +2078,7 @@ #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) (~(flag))) | ((~((value) - 1)) (flag)) -void CUData::calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const +void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) { // Initialize the coding blocks inside the CTB for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize = MIN_LOG2_CU_SIZE; log2CUSize--) @@ -2093,10 +2093,10 @@ uint32_t depthIdx = g_depthScanIdx[sbY][sbX]; uint32_t cuIdx = rangeCUIdx + depthIdx; uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx 2); -uint32_t px = m_cuPelX + sbX * blockSize; -uint32_t py = m_cuPelY + sbY * blockSize; -int32_t presentFlag = px picWidth py picHeight; -int32_t splitMandatoryFlag = presentFlag !lastLevelFlag (px + blockSize picWidth || py + blockSize picHeight); +uint32_t px = sbX * blockSize; +uint32_t py = sbY * blockSize; +int32_t presentFlag = px ctuWidth py ctuHeight; +int32_t splitMandatoryFlag = presentFlag !lastLevelFlag (px + blockSize ctuWidth || py + blockSize ctuHeight); /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ uint32_t xOffset = (sbX * blockSize) 3; diff -r 32513a4c3bd4 -r 5638df706f08 source/common/cudata.h --- a/source/common/cudata.hMon Nov 10 12:39:54 2014 +0900 +++ b/source/common/cudata.hTue Nov 11 19:30:19 2014 +0900 @@ -158,7 +158,7 @@ CUData(); void initialize(const CUDataMemPool dataPool, uint32_t depth, int csp, int instance); -void calcCTUGeoms(uint32_t picWidth, uint32_t picHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) const; +static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]); void initCTU(const Frame frame, uint32_t cuAddr, int qp); void initSubCU(const CUData ctu, const CUGeom cuGeom); diff -r 32513a4c3bd4 -r 5638df706f08 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Nov 10 12:39:54 2014 +0900 +++ b/source/encoder/frameencoder.cpp Tue Nov 11 19:30:19 2014 +0900 @@ -138,11 +138,12 @@ } /* Generate a complete list of unique geom sets for the current picture dimensions */ -bool FrameEncoder::initializeGeoms(const FrameData encData) +bool FrameEncoder::initializeGeoms() { /* Geoms only vary between CTUs in the presence of picture edges */ -int heightRem = m_param-sourceHeight (m_param-maxCUSize - 1); -int widthRem = m_param-sourceWidth (m_param-maxCUSize - 1); +int maxCUSize = m_param-maxCUSize; +int heightRem = m_param-sourceHeight (maxCUSize - 1); +int widthRem = m_param-sourceWidth (maxCUSize - 1); int allocGeoms = 1; // body if (heightRem widthRem) allocGeoms = 4; // body, right, bottom, corner @@ -154,33 +155,45 @@ if (!m_cuGeoms || !m_ctuGeomMap) return false; -CUGeom cuLocalData[CUGeom::MAX_GEOMS]; -memset(cuLocalData, 0, sizeof(cuLocalData)); // temporal fix for memcmp +// body +CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, m_cuGeoms); +memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols); +if (allocGeoms == 1) +return true; -int countGeoms = 0; -for (uint32_t ctuAddr = 0; ctuAddr m_numRows * m_numCols; ctuAddr++) +int countGeoms = 1; +if (widthRem) { -/* TODO: detach this logic from TComDataCU */ -encData.m_picCTU[ctuAddr].initCTU(*m_frame, ctuAddr, 0); -encData.m_picCTU[ctuAddr].calcCTUGeoms(m_param-sourceWidth, m_param-sourceHeight, m_param-maxCUSize, cuLocalData); +// right +CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); +for (int i = 0; i m_numRows; i++) +{ +uint32_t ctuAddr = m_numCols * (i + 1) - 1; +m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS; +} +countGeoms++; +} +if (heightRem) +{ +// bottom +CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS
[x265] cleanup SIZE_NONE. empty CU has MODE_NONE.
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415590794 -32400 # Mon Nov 10 12:39:54 2014 +0900 # Node ID f31250e3eb5625275318bc69633e0fbc31ccdb3a # Parent 1e04e178a349ff3a27ed0207cca7bdd9f0db4ff8 cleanup SIZE_NONE. empty CU has MODE_NONE. diff -r 1e04e178a349 -r f31250e3eb56 source/common/cudata.cpp --- a/source/common/cudata.cpp Sun Nov 09 00:30:09 2014 -0600 +++ b/source/common/cudata.cpp Mon Nov 10 12:39:54 2014 +0900 @@ -229,13 +229,13 @@ m_qp = (char*)charBuf; charBuf += m_numPartitions; m_log2CUSize = charBuf; charBuf += m_numPartitions; -m_partSize = charBuf; charBuf += m_numPartitions; m_lumaIntraDir = charBuf; charBuf += m_numPartitions; m_tqBypass = charBuf; charBuf += m_numPartitions; m_refIdx[0] = (char*)charBuf; charBuf += m_numPartitions; m_refIdx[1] = (char*)charBuf; charBuf += m_numPartitions; m_cuDepth= charBuf; charBuf += m_numPartitions; m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ +m_partSize = charBuf; charBuf += m_numPartitions; m_mergeFlag = charBuf; charBuf += m_numPartitions; m_interDir = charBuf; charBuf += m_numPartitions; m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; @@ -277,7 +277,6 @@ /* sequential memsets */ m_partSet((uint8_t*)m_qp, (uint8_t)qp); m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize); -m_partSet(m_partSize, (uint8_t)SIZE_NONE); m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); m_partSet(m_tqBypass, (uint8_t)frame.m_encData-m_param-bLossless); if (m_slice-m_sliceType != I_SLICE) @@ -289,7 +288,7 @@ X265_CHECK(!(frame.m_encData-m_param-bLossless !m_slice-m_pps-bTransquantBypassEnabled), lossless enabled without TQbypass in PPS\n); /* initialize the remaining CU data in one memset */ -memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions); +memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions); uint32_t widthInCU = m_slice-m_sps-numCuInWidth; m_cuLeft = (m_cuAddr % widthInCU) ? m_encData-getPicCTU(m_cuAddr - 1) : NULL; @@ -316,7 +315,6 @@ /* sequential memsets */ m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]); m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); -m_partSet(m_partSize, (uint8_t)SIZE_NONE); m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); m_partSet(m_tqBypass, (uint8_t)m_encData-m_param-bLossless); m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); @@ -324,7 +322,7 @@ m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); /* initialize the remaining CU data in one memset */ -memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions); +memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions); } /* Copy the results of a sub-part (split) CU to the parent CU */ @@ -336,13 +334,13 @@ m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); -m_subPartCopy(m_partSize + offset, subCU.m_partSize); m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); m_subPartCopy(m_predMode + offset, subCU.m_predMode); +m_subPartCopy(m_partSize + offset, subCU.m_partSize); m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); m_subPartCopy(m_interDir + offset, subCU.m_interDir); m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); @@ -423,13 +421,13 @@ m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); -m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth); m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); +m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag); m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir); m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]); @@ -472,12 +470,13 @@ /* copy out all prediction info for this part */ m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU); m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU); -m_partCopy
[x265] fix typo
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415494975 -32400 # Sun Nov 09 10:02:55 2014 +0900 # Node ID cbe34d7fef367ad9603513fdae34dfee99d9a03d # Parent 3f2d6836855411597ef25b4f9786dcaa0fe7394a fix typo diff -r 3f2d68368554 -r cbe34d7fef36 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Sat Nov 08 12:30:10 2014 -0600 +++ b/source/encoder/analysis.cpp Sun Nov 09 10:02:55 2014 +0900 @@ -794,7 +794,7 @@ if (m_param-bEnableRectInter) { -md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom); +md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom); checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N); if (md.pred[PRED_Nx2N].sa8dCost bestInter-sa8dCost) bestInter = md.pred[PRED_Nx2N]; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix typo
ignore previous one. # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415498145 -32400 # Sun Nov 09 10:55:45 2014 +0900 # Node ID 51bec6878d7bfe46f92c039a7eb2af66b5d07e09 # Parent 3f2d6836855411597ef25b4f9786dcaa0fe7394a fix typo diff -r 3f2d68368554 -r 51bec6878d7b source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Sat Nov 08 12:30:10 2014 -0600 +++ b/source/encoder/analysis.cpp Sun Nov 09 10:55:45 2014 +0900 @@ -794,12 +794,12 @@ if (m_param-bEnableRectInter) { -md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom); +md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom); checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N); if (md.pred[PRED_Nx2N].sa8dCost bestInter-sa8dCost) bestInter = md.pred[PRED_Nx2N]; -md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom); +md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom); checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN); if (md.pred[PRED_2NxN].sa8dCost bestInter-sa8dCost) bestInter = md.pred[PRED_2NxN]; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix typo
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415348521 -32400 # Fri Nov 07 17:22:01 2014 +0900 # Node ID ddc90f87dbe7dd704e9f0b0fe15c4752f9156c16 # Parent bc4f3dab51db5fb0a164fe0667f1556e2111d3c2 fix typo diff -r bc4f3dab51db -r ddc90f87dbe7 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Nov 07 11:43:15 2014 +0900 +++ b/source/encoder/analysis.cpp Fri Nov 07 17:22:01 2014 +0900 @@ -1739,7 +1739,7 @@ } // give 60% weight to all CU's and 40% weight to neighbour CU's -if (neighCost + cuCount) +if (neighCount + cuCount) { uint64_t avgCost = ((3 * cuCost) + (2 * neighCost)) / ((3 * cuCount) + (2 * neighCount)); uint64_t curCost = m_param-rdLevel 1 ? bestMode.rdCost : bestMode.sa8dCost; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix bug in 522baf03fbbd
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415355446 -32400 # Fri Nov 07 19:17:26 2014 +0900 # Node ID 8a0b4706d8114ac8caa6b2bcb4359c672265ef75 # Parent 522baf03fbbd17ab3844f8190f78607089ce0a8d fix bug in 522baf03fbbd diff -r 522baf03fbbd -r 8a0b4706d811 source/encoder/search.cpp --- a/source/encoder/search.cpp Wed Nov 05 16:23:42 2014 +0530 +++ b/source/encoder/search.cpp Fri Nov 07 19:17:26 2014 +0900 @@ -2879,10 +2879,10 @@ if (nullCostY singleCostY) { cbfFlag[TEXT_LUMA][0] = 0; +primitives.blockfill_s[partSize](curResiY, strideResiY, 0); #if CHECKED_BUILD || _DEBUG uint32_t numCoeffY = 1 (log2TrSize 1); memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY); -primitives.blockfill_s[partSize](curResiY, strideResiY, 0); #endif if (checkTransformSkipY) minCost[TEXT_LUMA][0] = nullCostY; @@ -2955,10 +2955,10 @@ if (nullCostC singleCostC) { cbfFlag[chromaId][tuIterator.section] = 0; +primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0); #if CHECKED_BUILD || _DEBUG uint32_t numCoeffC = 1 (log2TrSizeC 1); memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC); -primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0); #endif if (checkTransformSkipC) minCost[chromaId][tuIterator.section] = nullCostC; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix bug in 522baf03fbbd
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415356167 -32400 # Fri Nov 07 19:29:27 2014 +0900 # Node ID cdb4f8e542d3d37710464ecc8279469024d24584 # Parent 4f034e3adef8d52853b88c6631a905dd96713d77 fix bug in 522baf03fbbd diff -r 4f034e3adef8 -r cdb4f8e542d3 source/encoder/search.cpp --- a/source/encoder/search.cpp Fri Nov 07 17:22:01 2014 +0900 +++ b/source/encoder/search.cpp Fri Nov 07 19:29:27 2014 +0900 @@ -2857,10 +2857,10 @@ { cbfFlag[TEXT_LUMA][0] = 0; singleBits[TEXT_LUMA][0] = 0; +primitives.blockfill_s[partSize](curResiY, strideResiY, 0); #if CHECKED_BUILD || _DEBUG uint32_t numCoeffY = 1 (log2TrSize 1); memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY); -primitives.blockfill_s[partSize](curResiY, strideResiY, 0); #endif if (checkTransformSkipY) minCost[TEXT_LUMA][0] = nullCostY; @@ -2956,10 +2956,10 @@ { cbfFlag[chromaId][tuIterator.section] = 0; singleBits[chromaId][tuIterator.section] = 0; +primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0); #if CHECKED_BUILD || _DEBUG uint32_t numCoeffC = 1 (log2TrSizeC 1); memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC); -primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0); #endif if (checkTransformSkipC) minCost[chromaId][tuIterator.section] = nullCostC; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] cudata: remove default argument
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415328195 -32400 # Fri Nov 07 11:43:15 2014 +0900 # Node ID f4853d3e81678e487b16a8c6f716a8f9418d6aad # Parent 0ebd0b00bf9bc447d89892ef935bc017b186fa9d cudata: remove default argument diff -r 0ebd0b00bf9b -r f4853d3e8167 source/common/cudata.cpp --- a/source/common/cudata.cpp Thu Nov 06 19:37:39 2014 -0600 +++ b/source/common/cudata.cpp Fri Nov 07 11:43:15 2014 +0900 @@ -546,7 +546,7 @@ return m_cuLeft; } -const CUData* CUData::getPUAbove(uint32_t aPartUnitIdx, uint32_t curPartUnitIdx, bool planarAtCTUBoundary) const +const CUData* CUData::getPUAbove(uint32_t aPartUnitIdx, uint32_t curPartUnitIdx) const { uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; @@ -557,15 +557,10 @@ if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize)) return m_encData-getPicCTU(m_cuAddr); else -{ aPartUnitIdx -= m_absIdxInCTU; -return this; -} +return this; } -if (planarAtCTUBoundary) -return NULL; - aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_CU_PARTITIONS - s_numPartInCUSize]; return m_cuAbove; } @@ -853,7 +848,7 @@ leftIntraDir = (tempCU tempCU-isIntra(tempPartIdx)) ? tempCU-m_lumaIntraDir[tempPartIdx] : DC_IDX; // Get intra direction of above PU -tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx, true); +tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL; aboveIntraDir = (tempCU tempCU-isIntra(tempPartIdx)) ? tempCU-m_lumaIntraDir[tempPartIdx] : DC_IDX; diff -r 0ebd0b00bf9b -r f4853d3e8167 source/common/cudata.h --- a/source/common/cudata.hThu Nov 06 19:37:39 2014 -0600 +++ b/source/common/cudata.hFri Nov 07 11:43:15 2014 +0900 @@ -222,7 +222,7 @@ void getTUEntropyCodingParameters(TUEntropyCodingParameters result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const; const CUData* getPULeft(uint32_t lPartUnitIdx, uint32_t curPartUnitIdx) const; -const CUData* getPUAbove(uint32_t aPartUnitIdx, uint32_t curPartUnitIdx, bool planarAtCTUBoundary = false) const; +const CUData* getPUAbove(uint32_t aPartUnitIdx, uint32_t curPartUnitIdx) const; const CUData* getPUAboveLeft(uint32_t alPartUnitIdx, uint32_t curPartUnitIdx) const; const CUData* getPUAboveRight(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx) const; const CUData* getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx) const; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] refine deblocking filter
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415178450 -32400 # Wed Nov 05 18:07:30 2014 +0900 # Node ID ce18e3c8e9af1633d4c8ead10197296d0542d0e1 # Parent 2a8f3d5820a6ebe0937ce73fa81154c263df2ae9 refine deblocking filter diff -r 2a8f3d5820a6 -r ce18e3c8e9af source/common/deblock.cpp --- a/source/common/deblock.cpp Tue Nov 04 09:46:14 2014 +0530 +++ b/source/common/deblock.cpp Wed Nov 05 18:07:30 2014 +0900 @@ -33,18 +33,42 @@ #define DEBLOCK_SMALLEST_BLOCK 8 #define DEFAULT_INTRA_TC_OFFSET 2 -void Deblock::deblockCTU(CUData* cu, int32_t dir) +void Deblock::deblockCTU(const CUData* ctu, int32_t dir) { -uint8_t blockingStrength[MAX_NUM_PARTITIONS]; +uint8_t blockStrength[MAX_NUM_PARTITIONS]; -memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions); +memset(blockStrength, 0, sizeof(uint8_t) * m_numPartitions); -deblockCU(cu, 0, 0, dir, blockingStrength); +deblockCU(ctu, 0, 0, dir, blockStrength); +} + +static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir) +{ +if (dir == Deblock::EDGE_VER) +{ +if (cu-m_cuPelX + g_zscanToPelX[absPartIdx] 0) +{ +uint32_ttempPartIdx; +const CUData* tempCU = cu-getPULeft(tempPartIdx, absPartIdx); +return tempCU ? 2 : 0; +} +} +else +{ +if (cu-m_cuPelY + g_zscanToPelY[absPartIdx] 0) +{ +uint32_ttempPartIdx; +const CUData* tempCU = cu-getPUAbove(tempPartIdx, absPartIdx); +return tempCU ? 2 : 0; +} +} + +return 0; } /* Deblocking filter process in CU-based (the same function as conventional's) * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ -void Deblock::deblockCU(CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[]) +void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[]) { if (cu-m_partSize[absPartIdx] == SIZE_NONE) return; @@ -60,23 +84,21 @@ uint32_t ymax = sps.picHeightInLumaSamples - cu-m_cuPelY; for (uint32_t partIdx = 0; partIdx 4; partIdx++, absPartIdx += qNumParts) if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -deblockCU(cu, absPartIdx, depth + 1, dir, blockingStrength); +deblockCU(cu, absPartIdx, depth + 1, dir, blockStrength); return; } -const uint32_t widthInBaseUnits = sps.numPartInCUSize depth; -Param params; -setLoopfilterParam(cu, absPartIdx, params); -setEdgefilterPU(cu, absPartIdx, dir, blockingStrength, widthInBaseUnits); -setEdgefilterTU(cu, absPartIdx, depth, dir, blockingStrength); -setEdgefilterMultiple(cu, absPartIdx, dir, 0, (dir == EDGE_VER ? params.leftEdge : params.topEdge), blockingStrength, widthInBaseUnits); +const uint32_t numUnits = sps.numPartInCUSize depth; +setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits); +setEdgefilterTU(cu, absPartIdx, depth, dir, blockStrength); +setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits); for (uint32_t partIdx = absPartIdx; partIdx absPartIdx + curNumParts; partIdx++) { uint32_t bsCheck = !(partIdx (1 dir)); -if (bsCheck blockingStrength[partIdx]) -getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength); +if (bsCheck blockStrength[partIdx]) +blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength); } const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK LOG2_UNIT_SIZE; @@ -87,34 +109,33 @@ for (uint32_t e = 0; e sizeInPU; e += partIdxIncr) { -edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockingStrength); +edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength); if (!((e0 + e) chromaMask)) -edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockingStrength); +edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength); } } -static inline uint32_t calcBsIdx(CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) +static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx) { -uint32_t ctuWidthInBaseUnits = cu-m_slice-m_sps-numPartInCUSize; +uint32_t numPartInCUSize = cu-m_slice-m_sps-numPartInCUSize; if (dir) -return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * ctuWidthInBaseUnits + baseUnitIdx]; +return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numPartInCUSize + baseUnitIdx]; else -return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * ctuWidthInBaseUnits + edgeIdx]; +return g_rasterToZscan
[x265] cleanup CUData::m_skipFlag
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1415001734 -32400 # Mon Nov 03 17:02:14 2014 +0900 # Node ID ef411645295a51cf276e7830d9a98ffe50d85f63 # Parent eebb372eec893efc50e66806fcc19b1c1bd89683 cleanup CUData::m_skipFlag diff -r eebb372eec89 -r ef411645295a source/common/cudata.cpp --- a/source/common/cudata.cpp Fri Oct 31 16:29:20 2014 -0500 +++ b/source/common/cudata.cpp Mon Nov 03 17:02:14 2014 +0900 @@ -230,13 +230,12 @@ m_qp = (char*)charBuf; charBuf += m_numPartitions; m_log2CUSize = charBuf; charBuf += m_numPartitions; m_partSize = charBuf; charBuf += m_numPartitions; -m_predMode = charBuf; charBuf += m_numPartitions; m_lumaIntraDir = charBuf; charBuf += m_numPartitions; m_tqBypass = charBuf; charBuf += m_numPartitions; m_refIdx[0] = (char*)charBuf; charBuf += m_numPartitions; m_refIdx[1] = (char*)charBuf; charBuf += m_numPartitions; m_cuDepth= charBuf; charBuf += m_numPartitions; -m_skipFlag = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ +m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ m_mergeFlag = charBuf; charBuf += m_numPartitions; m_interDir = charBuf; charBuf += m_numPartitions; m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; @@ -279,7 +278,6 @@ m_partSet((uint8_t*)m_qp, (uint8_t)qp); m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize); m_partSet(m_partSize, (uint8_t)SIZE_NONE); -m_partSet(m_predMode, (uint8_t)MODE_NONE); m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); m_partSet(m_tqBypass, (uint8_t)frame.m_encData-m_param-bLossless); if (m_slice-m_sliceType != I_SLICE) @@ -291,7 +289,7 @@ X265_CHECK(!(frame.m_encData-m_param-bLossless !m_slice-m_pps-bTransquantBypassEnabled), lossless enabled without TQbypass in PPS\n); /* initialize the remaining CU data in one memset */ -memset(m_cuDepth, 0, (BytesPerPartition - 8) * m_numPartitions); +memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions); uint32_t widthInCU = m_slice-m_sps-numCuInWidth; m_cuLeft = (m_cuAddr % widthInCU) ? m_encData-getPicCTU(m_cuAddr - 1) : NULL; @@ -319,7 +317,6 @@ m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]); m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); m_partSet(m_partSize, (uint8_t)SIZE_NONE); -m_partSet(m_predMode, (uint8_t)MODE_NONE); m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX); m_partSet(m_tqBypass, (uint8_t)m_encData-m_param-bLossless); m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); @@ -327,7 +324,7 @@ m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); /* initialize the remaining CU data in one memset */ -memset(m_skipFlag, 0, (BytesPerPartition - 9) * m_numPartitions); +memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions); } /* Copy the results of a sub-part (split) CU to the parent CU */ @@ -340,13 +337,12 @@ m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); m_subPartCopy(m_partSize + offset, subCU.m_partSize); -m_subPartCopy(m_predMode + offset, subCU.m_predMode); m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); -m_subPartCopy(m_skipFlag + offset, subCU.m_skipFlag); +m_subPartCopy(m_predMode + offset, subCU.m_predMode); m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); m_subPartCopy(m_interDir + offset, subCU.m_interDir); m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); @@ -410,7 +406,7 @@ m_partSet(m_tqBypass, true); /* clear residual coding flags */ -m_partSet(m_skipFlag, 0); +m_partSet(m_predMode, cu.m_predMode[0] (MODE_INTRA | MODE_INTER)); m_partSet(m_tuDepth, 0); m_partSet(m_transformSkip[0], 0); m_partSet(m_transformSkip[1], 0); @@ -428,13 +424,12 @@ m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); -m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); m_partCopy
[x265] fix uninitialized
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1414570941 -32400 # Wed Oct 29 17:22:21 2014 +0900 # Node ID 1f0b295a6dfac9457deea4efe81261edf21f5039 # Parent da5ba239bf59a48d9b586c54bf2d0a5320043044 fix uninitialized diff -r da5ba239bf59 -r 1f0b295a6dfa source/common/predict.cpp --- a/source/common/predict.cpp Wed Oct 29 09:13:25 2014 +0530 +++ b/source/common/predict.cpp Wed Oct 29 17:22:21 2014 +0900 @@ -144,12 +144,17 @@ primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0); } -void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx) +void Predict::initMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx) { m_predSlice = cu.m_slice; cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); m_ctuAddr = cu.m_cuAddr; m_cuAbsPartIdx = cuGeom.encodeIdx; +} + +void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx) +{ +initMotionCompensation(cu, cuGeom, partIdx); m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx]; m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx]; diff -r da5ba239bf59 -r 1f0b295a6dfa source/common/predict.h --- a/source/common/predict.h Wed Oct 29 09:13:25 2014 +0530 +++ b/source/common/predict.h Wed Oct 29 17:22:21 2014 +0900 @@ -117,6 +117,7 @@ public: /* prepMotionCompensation needs to be called to prepare MC with CU-relevant data */ +void initMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx); void prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx); void motionCompensation(Yuv predYuv, bool bLuma, bool bChroma); diff -r da5ba239bf59 -r 1f0b295a6dfa source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Wed Oct 29 09:13:25 2014 +0530 +++ b/source/encoder/analysis.cpp Wed Oct 29 17:22:21 2014 +0900 @@ -882,7 +882,7 @@ if (m_bTryLossless) tryLossless(cuGeom); -if (mightSplit m_param-rdLevel 1) +if (mightSplit) addSplitFlagCost(*md.bestMode, cuGeom.depth); } @@ -934,7 +934,7 @@ if (mightNotSplit) addSplitFlagCost(*splitPred, cuGeom.depth); else if (m_param-rdLevel = 1) -splitPred-sa8dCost = m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-totalBits); +splitPred-sa8dCost = m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-sa8dBits); else updateModeCost(*splitPred); @@ -1539,6 +1539,7 @@ intraMode.totalBits = bbits; intraMode.distortion = bsad; intraMode.sa8dCost = bcost; +intraMode.sa8dBits = bbits; } void Analysis::encodeIntraInInter(Mode intraMode, const CUGeom cuGeom) diff -r da5ba239bf59 -r 1f0b295a6dfa source/encoder/search.cpp --- a/source/encoder/search.cpp Wed Oct 29 09:13:25 2014 +0530 +++ b/source/encoder/search.cpp Wed Oct 29 17:22:21 2014 +0900 @@ -1728,7 +1728,7 @@ for (int puIdx = 0; puIdx numPart; puIdx++) { /* sets m_puAbsPartIdx, m_puWidth, m_puHeight */ -prepMotionCompensation(cu, cuGeom, puIdx); +initMotionCompensation(cu, cuGeom, puIdx); pixel* pu = fencPic-getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx); m_me.setSourcePU(pu - fencPic-m_picOrg[0], m_puWidth, m_puHeight); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix rd=0,1
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1414600429 -32400 # Thu Oct 30 01:33:49 2014 +0900 # Branch stable # Node ID 2a5e13c6ee9351095e9a7aade3c52e8b4092b7f8 # Parent da5ba239bf59a48d9b586c54bf2d0a5320043044 fix rd=0,1 diff -r da5ba239bf59 -r 2a5e13c6ee93 source/common/predict.cpp --- a/source/common/predict.cpp Wed Oct 29 09:13:25 2014 +0530 +++ b/source/common/predict.cpp Thu Oct 30 01:33:49 2014 +0900 @@ -144,12 +144,17 @@ primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0); } -void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx) +void Predict::initMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx) { m_predSlice = cu.m_slice; cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); m_ctuAddr = cu.m_cuAddr; m_cuAbsPartIdx = cuGeom.encodeIdx; +} + +void Predict::prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx) +{ +initMotionCompensation(cu, cuGeom, partIdx); m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx]; m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx]; diff -r da5ba239bf59 -r 2a5e13c6ee93 source/common/predict.h --- a/source/common/predict.h Wed Oct 29 09:13:25 2014 +0530 +++ b/source/common/predict.h Thu Oct 30 01:33:49 2014 +0900 @@ -117,6 +117,7 @@ public: /* prepMotionCompensation needs to be called to prepare MC with CU-relevant data */ +void initMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx); void prepMotionCompensation(const CUData cu, const CUGeom cuGeom, int partIdx); void motionCompensation(Yuv predYuv, bool bLuma, bool bChroma); diff -r da5ba239bf59 -r 2a5e13c6ee93 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Wed Oct 29 09:13:25 2014 +0530 +++ b/source/encoder/analysis.cpp Thu Oct 30 01:33:49 2014 +0900 @@ -882,7 +882,7 @@ if (m_bTryLossless) tryLossless(cuGeom); -if (mightSplit m_param-rdLevel 1) +if (mightSplit) addSplitFlagCost(*md.bestMode, cuGeom.depth); } @@ -934,7 +934,7 @@ if (mightNotSplit) addSplitFlagCost(*splitPred, cuGeom.depth); else if (m_param-rdLevel = 1) -splitPred-sa8dCost = m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-totalBits); +splitPred-sa8dCost = m_rdCost.calcRdSADCost(splitPred-distortion, splitPred-sa8dBits); else updateModeCost(*splitPred); @@ -1539,6 +1539,7 @@ intraMode.totalBits = bbits; intraMode.distortion = bsad; intraMode.sa8dCost = bcost; +intraMode.sa8dBits = bbits; } void Analysis::encodeIntraInInter(Mode intraMode, const CUGeom cuGeom) @@ -1601,8 +1602,6 @@ uint32_t absPartIdx = cuGeom.encodeIdx; int sizeIdx = cuGeom.log2CUSize - 2; -/* at RD 0, the prediction pixels are accumulated into the top depth predYuv */ -Yuv predYuv = m_modeDepth[0].bestMode-predYuv; Yuv fencYuv = m_modeDepth[0].fencYuv; /* reuse the bestMode data structures at the current depth */ @@ -1615,18 +1614,13 @@ if (cu.m_predMode[0] == MODE_INTRA) { -uint32_t initTrDepth = cu.m_partSize[0] == SIZE_2Nx2N ? 0 : 1; - uint32_t tuDepthRange[2]; cu.getIntraTUQtDepthRange(tuDepthRange, 0); +uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN; residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange); getBestIntraModeChroma(*bestMode, cuGeom); residualQTIntraChroma(*bestMode, cuGeom, 0, 0); - -/* copy the reconstructed part to the recon pic for later intra - * predictions */ -reconYuv.copyToPicYuv(*m_frame-m_reconPicYuv, cu.m_cuAddr, absPartIdx); } else { @@ -1636,16 +1630,22 @@ ShortYuv resiYuv = m_rqt[cuGeom.depth].tmpResiYuv; +/* at RD 0, the prediction pixels are accumulated into the top depth predYuv */ +Yuv predYuv = m_modeDepth[0].bestMode-predYuv; +pixel* predY = predYuv.getLumaAddr(absPartIdx); +pixel* predU = predYuv.getCbAddr(absPartIdx); +pixel* predV = predYuv.getCrAddr(absPartIdx); + primitives.luma_sub_ps[sizeIdx](resiYuv.m_buf[0], resiYuv.m_size, -fencYuv.getLumaAddr(absPartIdx), predYuv.getLumaAddr(absPartIdx), +fencYuv.getLumaAddr(absPartIdx), predY, fencYuv.m_size, predYuv.m_size); primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[1], resiYuv.m_csize, -fencYuv.getCbAddr(absPartIdx), predYuv.getCbAddr(absPartIdx), +fencYuv.getCbAddr(absPartIdx), predU, fencYuv.m_csize, predYuv.m_csize); primitives.chroma[m_csp].sub_ps[sizeIdx
[x265] sao: refine sao merge mode
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1413341637 -32400 # Wed Oct 15 11:53:57 2014 +0900 # Node ID 8014e8d2c321148c7d68942b6f4552b8eede6e1c # Parent 02ff8eaad63232e958153e8b7cdcd5907141a7b6 sao: refine sao merge mode diff -r 02ff8eaad632 -r 8014e8d2c321 source/common/common.h --- a/source/common/common.hMon Oct 13 18:17:00 2014 +0530 +++ b/source/common/common.hWed Oct 15 11:53:57 2014 +0900 @@ -251,18 +251,23 @@ uint32_t count[MAX_NUM_TR_CATEGORIES]; }; +enum SaoMergeMode +{ +SAO_MERGE_NONE, +SAO_MERGE_LEFT, +SAO_MERGE_UP +}; + struct SaoCtuParam { -bool mergeUpFlag; -bool mergeLeftFlag; +SaoMergeMode mergeMode; int typeIdx; uint32_t bandPos;// BO band position int offset[SAO_NUM_OFFSET]; void reset() { -mergeUpFlag = false; -mergeLeftFlag = false; +mergeMode = SAO_MERGE_NONE; typeIdx = -1; bandPos = 0; offset[0] = 0; diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Oct 13 18:17:00 2014 +0530 +++ b/source/encoder/frameencoder.cpp Wed Oct 15 11:53:57 2014 +0900 @@ -413,7 +413,7 @@ const uint32_t lastCUAddr = (slice-m_endCUAddr + NUM_CU_PARTITIONS - 1) / NUM_CU_PARTITIONS; const int numSubstreams = m_param-bEnableWavefront ? m_frame-getPicSym()-getFrameHeightInCU() : 1; -SAOParam *saoParam = slice-m_pic-getPicSym()-m_saoParam; +SAOParam* saoParam = slice-m_sps-bUseSAO ? slice-m_pic-getPicSym()-m_saoParam : NULL; for (uint32_t cuAddr = 0; cuAddr lastCUAddr; cuAddr++) { uint32_t col = cuAddr % widthInLCUs; @@ -430,12 +430,12 @@ m_entropyCoder.loadContexts(m_rows[lin - 1].bufferedEntropy); } -if (slice-m_sps-bUseSAO) +if (saoParam) { if (saoParam-bSaoFlag[0] || saoParam-bSaoFlag[1]) { -int mergeLeft = saoParam-ctuParam[0][cuAddr].mergeLeftFlag col; -int mergeUp = saoParam-ctuParam[0][cuAddr].mergeUpFlag lin; +int mergeLeft = col saoParam-ctuParam[0][cuAddr].mergeMode == SAO_MERGE_LEFT; +int mergeUp = lin saoParam-ctuParam[0][cuAddr].mergeMode == SAO_MERGE_UP; if (col) m_entropyCoder.codeSaoMerge(mergeLeft); if (lin !mergeLeft) diff -r 02ff8eaad632 -r 8014e8d2c321 source/encoder/sao.cpp --- a/source/encoder/sao.cppMon Oct 13 18:17:00 2014 +0530 +++ b/source/encoder/sao.cppWed Oct 15 11:53:57 2014 +0900 @@ -90,7 +90,7 @@ m_depthSaoRate[1][3] = 0; } -bool SAO::create(x265_param *param) +bool SAO::create(x265_param* param) { m_param = param; m_hChromaShift = CHROMA_H_SHIFT(param-internalCsp); @@ -161,7 +161,7 @@ } /* allocate memory for SAO parameters */ -void SAO::allocSaoParam(SAOParam *saoParam) const +void SAO::allocSaoParam(SAOParam* saoParam) const { saoParam-numCuInWidth = m_numCuInWidth; @@ -170,14 +170,7 @@ saoParam-ctuParam[2] = new SaoCtuParam[m_numCuInHeight * m_numCuInWidth]; } -/* reset SAO parameters once per frame */ -void SAO::resetSAOParam(SAOParam *saoParam) -{ -saoParam-bSaoFlag[0] = false; -saoParam-bSaoFlag[1] = false; -} - -void SAO::startSlice(Frame *pic, Entropy initState, int qp) +void SAO::startSlice(Frame* pic, Entropy initState, int qp) { Slice* slice = pic-m_picSym-m_slice; @@ -213,7 +206,6 @@ pic-getPicSym()-m_saoParam = saoParam; } -resetSAOParam(saoParam); rdoSaoUnitRowInit(saoParam); // NOTE: Disable SAO automatic turn-off when frame parallelism is @@ -229,7 +221,7 @@ void SAO::processSaoCu(int addr, int typeIdx, int plane) { int x, y; -TComDataCU *cu = m_pic-getCU(addr); +const TComDataCU* cu = m_pic-getCU(addr); pixel* rec = m_pic-getPicYuvRec()-getPlaneAddr(plane, addr); int stride = plane ? m_pic-getCStride() : m_pic-getStride(); uint32_t picWidth = m_param-sourceWidth; @@ -454,12 +446,12 @@ if (!idxY) { -pixel *rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane) : m_pic-getPicYuvRec()-getLumaAddr(); +pixel* rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane) : m_pic-getPicYuvRec()-getLumaAddr(); memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth); } int addr = idxY * m_numCuInWidth; -pixel *rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane, addr) : m_pic-getPicYuvRec()-getLumaAddr(addr); +pixel* rec = plane ? m_pic-getPicYuvRec()-getChromaAddr(plane, addr) : m_pic-getPicYuvRec()-getLumaAddr(addr); for (int i = 0; i ctuHeight + 1; i++) { @@ -477,8 +469,8 @@ { addr = idxY * m_numCuInWidth + idxX; +bool mergeLeftFlag = ctuParam[addr].mergeMode == SAO_MERGE_LEFT; int typeIdx = ctuParam[addr].typeIdx; -bool mergeLeftFlag = ctuParam[addr].mergeLeftFlag
Re: [x265] sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1412847192 -32400 # Thu Oct 09 18:33:12 2014 +0900 # Node ID ab734f79e1d071d75e4bbea1742fd125698e9ff3 # Parent 0a18adcecd7deb4f861e6436c9f17e05da994625 sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)] diff -r 0a18adcecd7d -r ab734f79e1d0 source/common/common.h --- a/source/common/common.hWed Oct 08 14:18:20 2014 -0500 +++ b/source/common/common.hThu Oct 09 18:33:12 2014 +0900 @@ -132,6 +132,12 @@ return std::minT(std::maxT(minVal, a), maxVal); } +templatetypename T +inline T x265_min(T a, T b) { return a b ? a : b; } + +templatetypename T +inline T x265_max(T a, T b) { return a b ? a : b; } + typedef int16_t coeff_t; // transform coefficient #define X265_MIN(a, b) ((a) (b) ? (a) : (b)) @@ -224,17 +230,15 @@ bool mergeUpFlag; bool mergeLeftFlag; int typeIdx; -int subTypeIdx;// indicates EO class or BO band position +uint32_t bandPos;// BO band position int offset[SAO_NUM_OFFSET]; -int partIdx; -int partIdxTmp; void reset() { mergeUpFlag = false; mergeLeftFlag = false; typeIdx = -1; -subTypeIdx = 0; +bandPos = 0; offset[0] = 0; offset[1] = 0; offset[2] = 0; @@ -246,7 +250,6 @@ { SaoCtuParam* ctuParam[3]; bool bSaoFlag[2]; -int numCuInHeight; int numCuInWidth; SAOParam() @@ -254,6 +257,7 @@ for (int i = 0; i 3; i++) ctuParam[i] = NULL; } + ~SAOParam() { delete[] ctuParam[0]; diff -r 0a18adcecd7d -r ab734f79e1d0 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppWed Oct 08 14:18:20 2014 -0500 +++ b/source/encoder/entropy.cppThu Oct 09 18:33:12 2014 +0900 @@ -512,7 +512,7 @@ } // We need to split, so don't try these modes. -if (cuSplitFlag) +if (cuSplitFlag) codeSplitFlag(ctu, absPartIdx, depth); if (depth ctu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -864,74 +864,40 @@ encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange); } -void Entropy::codeSaoOffset(SaoCtuParam* saoLcuParam, uint32_t compIdx) +void Entropy::codeSaoOffset(const SaoCtuParam* saoLcuParam, int plane) { -uint32_t symbol; -int i; +int typeIdx = saoLcuParam-typeIdx; -symbol = saoLcuParam-typeIdx + 1; -if (compIdx != 2) -codeSaoTypeIdx(symbol); +if (plane != 2) +{ +encodeBin(typeIdx = 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]); +if (typeIdx = 0) +encodeBinEP(typeIdx SAO_BO ? 1 : 0); +} -if (symbol) +if (typeIdx = 0) { -if (saoLcuParam-typeIdx SAO_BO compIdx != 2) -saoLcuParam-subTypeIdx = saoLcuParam-typeIdx; +enum { OFFSET_THRESH = 1 X265_MIN(X265_DEPTH - 5, 5) }; -int offsetTh = 1 X265_MIN(X265_DEPTH - 5, 5); -if (saoLcuParam-typeIdx == SAO_BO) +if (typeIdx == SAO_BO) { -for (i = 0; i SAO_BO_LEN; i++) -{ -uint32_t absOffset = ((saoLcuParam-offset[i] 0) ? -saoLcuParam-offset[i] : saoLcuParam-offset[i]); -codeSaoMaxUvlc(absOffset, offsetTh - 1); -} +for (int i = 0; i SAO_BO_LEN; i++) +codeSaoMaxUvlc(abs(saoLcuParam-offset[i]), OFFSET_THRESH - 1); -for (i = 0; i SAO_BO_LEN; i++) -{ +for (int i = 0; i SAO_BO_LEN; i++) if (saoLcuParam-offset[i] != 0) -{ -uint32_t sign = (saoLcuParam-offset[i] 0) ? 1 : 0; -codeSAOSign(sign); -} -} +encodeBinEP(saoLcuParam-offset[i] 0); -symbol = (uint32_t)(saoLcuParam-subTypeIdx); -codeSaoUflc(5, symbol); +encodeBinsEP(saoLcuParam-bandPos, 5); } -else // if (saoLcuParam-typeIdx SAO_BO) +else // if (typeIdx SAO_BO) { -codeSaoMaxUvlc(saoLcuParam-offset[0], offsetTh - 1); -codeSaoMaxUvlc(saoLcuParam-offset[1], offsetTh - 1); -codeSaoMaxUvlc(-saoLcuParam-offset[2], offsetTh - 1); -codeSaoMaxUvlc(-saoLcuParam-offset[3], offsetTh - 1); -if (compIdx != 2) -{ -symbol = (uint32_t)(saoLcuParam-subTypeIdx); -codeSaoUflc(2, symbol); -} -} -} -} - -void Entropy::codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoCtuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp) -{ -if (saoFlag) -{ -if (rx 0 cuAddrInSlice != 0 allowMergeLeft) -codeSaoMerge(saoLcuParam-mergeLeftFlag); -else -saoLcuParam-mergeLeftFlag = 0
[x265] (no subject)
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1412754238 -32400 # Wed Oct 08 16:43:58 2014 +0900 # Node ID 84c960cf1552f3f317690fa1d35f6536bf4b36b4 # Parent 46c4b98d92ece7ff25d790b0fc69a8185d575524 fix invalid copy source context in rdLevel==0 diff -r 46c4b98d92ec -r 84c960cf1552 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Oct 06 22:07:54 2014 -0500 +++ b/source/encoder/analysis.cpp Wed Oct 08 16:43:58 2014 +0900 @@ -1275,6 +1275,8 @@ uint32_t nextDepth = depth + 1; invalidateContexts(nextDepth); +// initialize RD with previous depth buffer +m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur); TComDataCU* subTempPartCU = m_tempCU[nextDepth]; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++) { @@ -1285,10 +1287,8 @@ if (child_cu-flags CU::PRESENT) { -if (partUnitIdx) // initialize RD with previous depth buffer +if (partUnitIdx m_param-rdLevel) m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next); -else -m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur); compressInterCU_rd0_4(subBestPartCU, subTempPartCU, outTempCU, nextDepth, child_cu, cu_unsplit_flag, partUnitIdx, minDepth); @@ -1372,7 +1372,8 @@ std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]); std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]); // copy 'next' state from last CU of next depth as next state of this CU -m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); +if (m_param-rdLevel) + m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); } } else @@ -1381,7 +1382,8 @@ std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]); std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]); // copy 'next' state from last CU of next depth as next state of this CU -m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); +if (m_param-rdLevel) +m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix invalid copy source context in rdLevel==0
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1412754238 -32400 # Wed Oct 08 16:43:58 2014 +0900 # Node ID 84c960cf1552f3f317690fa1d35f6536bf4b36b4 # Parent 46c4b98d92ece7ff25d790b0fc69a8185d575524 fix invalid copy source context in rdLevel==0 diff -r 46c4b98d92ec -r 84c960cf1552 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Oct 06 22:07:54 2014 -0500 +++ b/source/encoder/analysis.cpp Wed Oct 08 16:43:58 2014 +0900 @@ -1275,6 +1275,8 @@ uint32_t nextDepth = depth + 1; invalidateContexts(nextDepth); +// initialize RD with previous depth buffer +m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur); TComDataCU* subTempPartCU = m_tempCU[nextDepth]; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++) { @@ -1285,10 +1287,8 @@ if (child_cu-flags CU::PRESENT) { -if (partUnitIdx) // initialize RD with previous depth buffer +if (partUnitIdx m_param-rdLevel) m_rdContexts[nextDepth].cur.load(m_rdContexts[nextDepth].next); -else -m_rdContexts[nextDepth].cur.load(m_rdContexts[depth].cur); compressInterCU_rd0_4(subBestPartCU, subTempPartCU, outTempCU, nextDepth, child_cu, cu_unsplit_flag, partUnitIdx, minDepth); @@ -1372,7 +1372,8 @@ std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]); std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]); // copy 'next' state from last CU of next depth as next state of this CU -m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); +if (m_param-rdLevel) + m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); } } else @@ -1381,7 +1382,8 @@ std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]); std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]); // copy 'next' state from last CU of next depth as next state of this CU -m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); +if (m_param-rdLevel) +m_rdContexts[nextDepth].next.store(m_rdContexts[depth].next); } } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1412500756 -32400 # Sun Oct 05 18:19:16 2014 +0900 # Node ID 64ea900398eb29ddd1c12df8126fa9866a280c81 # Parent b6d49505b179cb509aa76f3a065192f0b4926579 sao: refine, fix sao-non-deblock [CHANGES OUTPUT (RExt, sao-non-deblock)] diff -r b6d49505b179 -r 64ea900398eb source/common/common.h --- a/source/common/common.hThu Oct 02 16:47:55 2014 -0500 +++ b/source/common/common.hSun Oct 05 18:19:16 2014 +0900 @@ -132,6 +132,12 @@ return std::minT(std::maxT(minVal, a), maxVal); } +templatetypename T +inline T x265_min(T a, T b) { return a b ? a : b; } + +templatetypename T +inline T x265_max(T a, T b) { return a b ? a : b; } + typedef int16_t coeff_t; // transform coefficient #define X265_MIN(a, b) ((a) (b) ? (a) : (b)) @@ -224,17 +230,15 @@ bool mergeUpFlag; bool mergeLeftFlag; int typeIdx; -int subTypeIdx;// indicates EO class or BO band position +uint32_t bandPos;// BO band position int offset[SAO_NUM_OFFSET]; -int partIdx; -int partIdxTmp; void reset() { mergeUpFlag = false; mergeLeftFlag = false; typeIdx = -1; -subTypeIdx = 0; +bandPos = 0; offset[0] = 0; offset[1] = 0; offset[2] = 0; @@ -246,7 +250,6 @@ { SaoCtuParam* ctuParam[3]; bool bSaoFlag[2]; -int numCuInHeight; int numCuInWidth; SAOParam() @@ -254,6 +257,7 @@ for (int i = 0; i 3; i++) ctuParam[i] = NULL; } + ~SAOParam() { delete[] ctuParam[0]; diff -r b6d49505b179 -r 64ea900398eb source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Oct 02 16:47:55 2014 -0500 +++ b/source/encoder/entropy.cppSun Oct 05 18:19:16 2014 +0900 @@ -511,7 +511,7 @@ } // We need to split, so don't try these modes. -if (cuSplitFlag) +if (cuSplitFlag) codeSplitFlag(ctu, absPartIdx, depth); if (depth ctu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -863,74 +863,40 @@ encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange); } -void Entropy::codeSaoOffset(SaoCtuParam* saoLcuParam, uint32_t compIdx) +void Entropy::codeSaoOffset(const SaoCtuParam* saoLcuParam, int plane) { -uint32_t symbol; -int i; +int typeIdx = saoLcuParam-typeIdx; -symbol = saoLcuParam-typeIdx + 1; -if (compIdx != 2) -codeSaoTypeIdx(symbol); +if (plane != 2) +{ +encodeBin(typeIdx = 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]); +if (typeIdx = 0) +encodeBinEP(typeIdx SAO_BO ? 1 : 0); +} -if (symbol) +if (typeIdx = 0) { -if (saoLcuParam-typeIdx SAO_BO compIdx != 2) -saoLcuParam-subTypeIdx = saoLcuParam-typeIdx; +enum { OFFSET_THRESH = 1 X265_MIN(X265_DEPTH - 5, 5) }; -int offsetTh = 1 X265_MIN(X265_DEPTH - 5, 5); -if (saoLcuParam-typeIdx == SAO_BO) +if (typeIdx == SAO_BO) { -for (i = 0; i SAO_BO_LEN; i++) -{ -uint32_t absOffset = ((saoLcuParam-offset[i] 0) ? -saoLcuParam-offset[i] : saoLcuParam-offset[i]); -codeSaoMaxUvlc(absOffset, offsetTh - 1); -} +for (int i = 0; i SAO_BO_LEN; i++) +codeSaoMaxUvlc(abs(saoLcuParam-offset[i]), OFFSET_THRESH - 1); -for (i = 0; i SAO_BO_LEN; i++) -{ +for (int i = 0; i SAO_BO_LEN; i++) if (saoLcuParam-offset[i] != 0) -{ -uint32_t sign = (saoLcuParam-offset[i] 0) ? 1 : 0; -codeSAOSign(sign); -} -} +encodeBinEP(saoLcuParam-offset[i] 0); -symbol = (uint32_t)(saoLcuParam-subTypeIdx); -codeSaoUflc(5, symbol); +encodeBinsEP(saoLcuParam-bandPos, 5); } -else // if (saoLcuParam-typeIdx SAO_BO) +else // if (typeIdx SAO_BO) { -codeSaoMaxUvlc(saoLcuParam-offset[0], offsetTh - 1); -codeSaoMaxUvlc(saoLcuParam-offset[1], offsetTh - 1); -codeSaoMaxUvlc(-saoLcuParam-offset[2], offsetTh - 1); -codeSaoMaxUvlc(-saoLcuParam-offset[3], offsetTh - 1); -if (compIdx != 2) -{ -symbol = (uint32_t)(saoLcuParam-subTypeIdx); -codeSaoUflc(2, symbol); -} -} -} -} - -void Entropy::codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoCtuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp) -{ -if (saoFlag) -{ -if (rx 0 cuAddrInSlice != 0 allowMergeLeft) -codeSaoMerge(saoLcuParam-mergeLeftFlag); -else -saoLcuParam-mergeLeftFlag = 0
[x265] fix bug in 73c6c9086577 for rdLevel=0
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1412209540 -32400 # Thu Oct 02 09:25:40 2014 +0900 # Node ID 2efc3c19dd26944506c2c5e801abc96b1c048b40 # Parent d0fa09e9cca540c6eab84308dea481f8368b1cb1 fix bug in 73c6c9086577 for rdLevel=0 diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Wed Oct 01 09:39:36 2014 +0530 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Thu Oct 02 09:25:40 2014 +0900 @@ -454,19 +454,15 @@ m_cuAboveRight = cu-getCUAboveRight(); } -void TComDataCU::copyToSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth) +void TComDataCU::copyFromPic(TComDataCU* ctu, CU* cuData) { -X265_CHECK(partUnitIdx 4, part unit should be less than 4\n); +m_pic = ctu-m_pic; +m_slice= ctu-m_slice; +m_cuAddr = ctu-getAddr(); +m_absIdxInCTU = cuData-encodeIdx; -uint32_t partOffset = cuData-numPartitions * partUnitIdx; - -m_pic = cu-m_pic; -m_slice= cu-m_slice; -m_cuAddr = cu-getAddr(); -m_absIdxInCTU = cuData-encodeIdx + partOffset; - -m_cuPelX = cu-getCUPelX() + ((partUnitIdx 1) (g_maxLog2CUSize - depth)); -m_cuPelY = cu-getCUPelY() + ((partUnitIdx 1) (g_maxLog2CUSize - depth)); +m_cuPelX = ctu-getCUPelX() + g_zscanToPelX[m_absIdxInCTU]; +m_cuPelY = ctu-getCUPelY() + g_zscanToPelY[m_absIdxInCTU]; m_psyEnergy= 0; m_totalPsyCost = MAX_INT64; @@ -478,18 +474,17 @@ m_coeffBits= 0; m_numPartitions= cuData-numPartitions; -TComDataCU* otherCU = m_pic-getCU(m_cuAddr); int sizeInChar = sizeof(char) * m_numPartitions; -memcpy(m_skipFlag, otherCU-getSkipFlag() + m_absIdxInCTU, sizeof(*m_skipFlag) * m_numPartitions); -memcpy(m_qp, otherCU-getQP() + m_absIdxInCTU, sizeInChar); +memcpy(m_skipFlag, ctu-getSkipFlag() + m_absIdxInCTU, sizeof(*m_skipFlag) * m_numPartitions); +memcpy(m_qp, ctu-getQP() + m_absIdxInCTU, sizeInChar); -memcpy(m_partSizes, otherCU-getPartitionSize() + m_absIdxInCTU, sizeof(*m_partSizes) * m_numPartitions); -memcpy(m_predModes, otherCU-getPredictionMode() + m_absIdxInCTU, sizeof(*m_predModes) * m_numPartitions); +memcpy(m_partSizes, ctu-getPartitionSize() + m_absIdxInCTU, sizeof(*m_partSizes) * m_numPartitions); +memcpy(m_predModes, ctu-getPredictionMode() + m_absIdxInCTU, sizeof(*m_predModes) * m_numPartitions); -memcpy(m_lumaIntraDir, otherCU-getLumaIntraDir() + m_absIdxInCTU, sizeInChar); -memcpy(m_depth, otherCU-getDepth() + m_absIdxInCTU, sizeInChar); -memcpy(m_log2CUSize, otherCU-getLog2CUSize() + m_absIdxInCTU, sizeInChar); +memcpy(m_lumaIntraDir, ctu-getLumaIntraDir() + m_absIdxInCTU, sizeInChar); +memcpy(m_depth, ctu-getDepth() + m_absIdxInCTU, sizeInChar); +memcpy(m_log2CUSize, ctu-getLog2CUSize() + m_absIdxInCTU, sizeInChar); } // @@ -2411,6 +2406,8 @@ void TComDataCU::loadCTUData(uint32_t maxCUSize) { // Initialize the coding blocks inside the CTB +int picWidth = m_pic-m_origPicYuv-m_picWidth; +int picHeight = m_pic-m_origPicYuv-m_picHeight; for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize = MIN_LOG2_CU_SIZE; log2CUSize--) { uint32_t blockSize = 1 log2CUSize; @@ -2425,8 +2422,8 @@ uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + (depth_idx 2); uint32_t px = m_cuPelX + sb_x * blockSize; uint32_t py = m_cuPelY + sb_y * blockSize; -int32_t present_flag = px m_pic-m_origPicYuv-m_picWidth py m_pic-m_origPicYuv-m_picHeight; -int32_t split_mandatory_flag = present_flag !last_level_flag (px + blockSize m_pic-m_origPicYuv-m_picWidth || py + blockSize m_pic-m_origPicYuv-m_picHeight); +int32_t present_flag = px picWidth py picHeight; +int32_t split_mandatory_flag = present_flag !last_level_flag (px + blockSize picWidth || py + blockSize picHeight); /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ uint32_t xOffset = (sb_x * blockSize) 3; diff -r d0fa09e9cca5 -r 2efc3c19dd26 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hWed Oct 01 09:39:36 2014 +0530 +++ b/source/Lib/TLibCommon/TComDataCU.hThu Oct 02 09:25:40 2014 +0900 @@ -276,7 +276,7 @@ void initSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth, int qp); void loadCTUData(uint32_t maxCUSize); -void copyToSubCU(TComDataCU* ctu, CU* cuData, uint32_t partUnitIdx, uint32_t depth); +void
Re: [x265] [PATCH 1 of 2] TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure
decoder crush (invalid stream output) for --rd=0. ./x265 --rd=0 -f 9 --b-adapt=0 --bframes=3 --input RaceHorses_416x240_30.yuv --input-res 416x240 --fps 30 --output o.bin --recon o.yuv From: santhosh...@multicorewareinc.com Subject: [x265] [PATCH 1 of 2] TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure Date: Tue, 30 Sep 2014 09:04:28 +0530 # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1412047376 -19800 # Tue Sep 30 08:52:56 2014 +0530 # Node ID 21b1e8daa7e97e3828dfd948ff776951b939f423 # Parent 5a6845566d1492d29af29ecc0cf75d644994735c TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure diff -r 5a6845566d14 -r 21b1e8daa7e9 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cppMon Sep 29 17:37:47 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cppTue Sep 30 08:52:56 2014 +0530 @@ -387,16 +387,15 @@ } // initialize Sub partition -void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp) +void TComDataCU::initSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth, int qp) { X265_CHECK(partUnitIdx 4, part unit should be less than 4\n); uint8_t log2CUSize = g_maxLog2CUSize - depth; -uint32_t partOffset = (cu-getTotalNumPart() 2) * partUnitIdx; m_pic = cu-m_pic; m_slice= cu-m_slice; m_cuAddr = cu-getAddr(); -m_absIdxInLCU = cu-getZorderIdxInCU() + partOffset; +m_absIdxInLCU = cuData-encodeIdx; m_cuPelX = cu-getCUPelX() + ((partUnitIdx 1) log2CUSize); m_cuPelY = cu-getCUPelY() + ((partUnitIdx 1) log2CUSize); @@ -453,7 +452,7 @@ m_cuAboveRight = cu-getCUAboveRight(); } -void TComDataCU::copyToSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth) +void TComDataCU::copyToSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth) { X265_CHECK(partUnitIdx 4, part unit should be less than 4\n); @@ -462,7 +461,7 @@ m_pic = cu-m_pic; m_slice= cu-m_slice; m_cuAddr = cu-getAddr(); -m_absIdxInLCU = cu-getZorderIdxInCU() + partOffset; +m_absIdxInLCU = cuData-encodeIdx + partOffset; m_cuPelX = cu-getCUPelX() + ((partUnitIdx 1) (g_maxLog2CUSize - depth)); m_cuPelY = cu-getCUPelY() + ((partUnitIdx 1) (g_maxLog2CUSize - depth)); @@ -1067,9 +1066,9 @@ } else { -if (getZorderIdxInCU() 0) +if (m_pic-getCU(m_cuAddr)-m_CULocalData-encodeIdx 0) { -return m_pic-getCU(getAddr())-getLastCodedQP(getZorderIdxInCU()); +return m_pic-getCU(getAddr())-getLastCodedQP(m_pic-getCU(m_cuAddr)-m_CULocalData-encodeIdx); } else if (getAddr() 0 !(m_slice-m_pps-bEntropyCodingSyncEnabled getAddr() % m_pic-getFrameWidthInCU() == 0)) @@ -2434,7 +2433,7 @@ CU *cu = m_CULocalData + cuIdx; cu-log2CUSize = log2CUSize; cu-childIdx = child_idx; -cu-encodeIdx = g_depthScanIdx[yOffset][xOffset]; +cu-encodeIdx = g_depthScanIdx[yOffset][xOffset] * 4; cu-flags = 0; CU_SET_FLAG(cu-flags, CU::PRESENT, present_flag); diff -r 5a6845566d14 -r 21b1e8daa7e9 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.h Mon Sep 29 17:37:47 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.h Tue Sep 30 08:52:56 2014 +0530 @@ -272,10 +272,10 @@ void initCU(Frame* pic, uint32_t cuAddr); void initEstData(); -void initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp); +void initSubCU(TComDataCU* cu, CU* cuData, uint32_t partUnitIdx, uint32_t depth, int qp); void loadCTUData(uint32_t maxCUSize); -void copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, uint32_t depth); +void copyToSubCU(TComDataCU* lcu, CU* cuData, uint32_t partUnitIdx, uint32_t depth); void copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, bool isRDObasedAnalysis = true); void copyToPic(uint32_t depth); @@ -288,8 +288,6 @@ uint32_t getAddr(){ return m_cuAddr; } -uint32_t getZorderIdxInCU() { return m_absIdxInLCU; } - uint32_t getSCUAddr() const { return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInLCU; } diff -r 5a6845566d14 -r 21b1e8daa7e9 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 29 17:37:47 2014 -0500 +++ b/source/Lib/TLibCommon/TComPattern.cpp Tue Sep 30 08:52:56 2014 +0530 @@ -49,7
[x265] sao: remove frame-based SAO
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1412038092 -32400 # Tue Sep 30 09:48:12 2014 +0900 # Node ID 3eacdaa304400b0100dcf1d1515ae1d24cbf4305 # Parent 5a6845566d1492d29af29ecc0cf75d644994735c sao: remove frame-based SAO diff -r 5a6845566d14 -r 3eacdaa30440 source/common/common.h --- a/source/common/common.hMon Sep 29 17:37:47 2014 -0500 +++ b/source/common/common.hTue Sep 30 09:48:12 2014 +0900 @@ -212,34 +212,6 @@ uint32_t count[8]; }; -struct SAOQTPart -{ -enum { NUM_DOWN_PART = 4 }; - -int bestType; -int subTypeIdx; // indicates EO class or BO band position -int offset[SAO_NUM_OFFSET]; -int startCUX; -int startCUY; -int endCUX; -int endCUY; - -int partIdx; -int partLevel; -int partCol; -int partRow; - -int downPartsIdx[NUM_DOWN_PART]; -int upPartIdx; - -boolbSplit; - -boolbProcessed; -double minCost; -int64_t minDist; -int minRate; -}; - struct SaoLcuParam { bool mergeUpFlag; @@ -266,10 +238,7 @@ struct SAOParam { SaoLcuParam* saoLcuParam[3]; -SAOQTPart* saoPart[3]; bool bSaoFlag[2]; -bool oneUnitFlag[3]; -int maxSplitLevel; int numCuInHeight; int numCuInWidth; @@ -277,15 +246,11 @@ { for (int i = 0; i 3; i++) { -saoPart[i] = NULL; saoLcuParam[i] = NULL; } } ~SAOParam() { -delete[] saoPart[0]; -delete[] saoPart[1]; -delete[] saoPart[2]; delete[] saoLcuParam[0]; delete[] saoLcuParam[1]; delete[] saoLcuParam[2]; diff -r 5a6845566d14 -r 3eacdaa30440 source/common/param.cpp --- a/source/common/param.cpp Mon Sep 29 17:37:47 2014 -0500 +++ b/source/common/param.cpp Tue Sep 30 09:48:12 2014 +0900 @@ -169,7 +169,6 @@ /* SAO Loop Filter */ param-bEnableSAO = 1; param-saoLcuBoundary = 0; -param-saoLcuBasedOptimization = 1; /* Coding Quality */ param-cbQpOffset = 0; @@ -625,7 +624,6 @@ OPT(lft) p-bEnableLoopFilter = atobool(value); OPT(sao) p-bEnableSAO = atobool(value); OPT(sao-lcu-bounds) p-saoLcuBoundary = atoi(value); -OPT(sao-lcu-opt) p-saoLcuBasedOptimization = atoi(value); OPT(ssim) p-bEnableSsim = atobool(value); OPT(psnr) p-bEnablePsnr = atobool(value); OPT(hash) p-decodedPictureHashSEI = atoi(value); @@ -1165,13 +1163,7 @@ fprintf(stderr, nr=%d , param-noiseReduction); TOOLOPT(param-bEnableLoopFilter, lft); -if (param-bEnableSAO) -{ -if (param-saoLcuBasedOptimization) -fprintf(stderr, sao-lcu ); -else -fprintf(stderr, sao-frame ); -} +TOOLOPT(param-bEnableSAO, sao); TOOLOPT(param-bEnableSignHiding, signhide); TOOLOPT(param-bCULossless, cu-lossless); TOOLOPT(param-bEnableFastIntra, fast-intra); @@ -1245,7 +1237,6 @@ BOOL(p-bEnableLoopFilter, lft); BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); -s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); BOOL(p-bBPyramid, b-pyramid); BOOL(p-rc.cuTree, cutree); s += sprintf(s, rc=%s, p-rc.rateControlMode == X265_RC_ABR ? ( diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Sep 29 17:37:47 2014 -0500 +++ b/source/encoder/encoder.cppTue Sep 30 09:48:12 2014 +0900 @@ -1247,10 +1247,6 @@ x265_log(p, X265_LOG_INFO, Parallelism disabled, single thread mode\n); p-bEnableWavefront = 0; } -if (!p-saoLcuBasedOptimization p-frameNumThreads 1) -{ -x265_log(p, X265_LOG_INFO, Warning: picture-based SAO used with frame parallelism\n); -} if (p-keyframeMax 0) { diff -r 5a6845566d14 -r 3eacdaa30440 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Sep 29 17:37:47 2014 -0500 +++ b/source/encoder/frameencoder.cpp Tue Sep 30 09:48:12 2014 +0900 @@ -85,7 +85,7 @@ m_param = top-m_param; m_numRows = numRows; m_numCols = numCols; -m_filterRowDelay = (m_param-bEnableSAO m_param-saoLcuBasedOptimization m_param-saoLcuBoundary) ? +m_filterRowDelay = (m_param-bEnableSAO m_param-saoLcuBoundary) ? 2 : (m_param-bEnableSAO || m_param-bEnableLoopFilter ? 1 : 0); m_filterRowDelayCus = m_filterRowDelay * numCols; @@ -323,17 +323,6 @@ m_frameStats.percentSkip = (double)totalSkip / totalCuCount; } -if (slice-m_sps-bUseSAO !m_param-saoLcuBasedOptimization) -{ -/* frame based SAO */ -m_frameFilter.m_sao.SAOProcess(m_frame-getPicSym()-m_saoParam); -restoreLFDisabledOrigYuv(m_frame); - -// Extend border after whole-frame SAO is finished -for (int row = 0; row m_numRows; row
[x265] refine deblocking filter
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1411727676 -32400 # Fri Sep 26 19:34:36 2014 +0900 # Node ID 06237deb460b629d6100d5b613d42033cc3477bd # Parent 7dccbbed034970de161b361cd6e17ed4efca7226 refine deblocking filter diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComPicYuv.h --- a/source/Lib/TLibCommon/TComPicYuv.hWed Sep 24 18:26:45 2014 -0500 +++ b/source/Lib/TLibCommon/TComPicYuv.hFri Sep 26 19:34:36 2014 +0900 @@ -155,6 +155,8 @@ pixel* getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; } +int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; } + uint32_t getCUHeight(int rowNum); void copyFromPicture(const x265_picture, int padx, int pady); diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Wed Sep 24 18:26:45 2014 -0500 +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 19:34:36 2014 +0900 @@ -115,10 +115,10 @@ uint32_t g_maxCUSize = MAX_CU_SIZE; uint32_t g_maxFullDepth = NUM_FULL_DEPTH - 1; uint32_t g_maxCUDepth= NUM_CU_DEPTH - 1; -uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, }; -uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, }; +uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, }; +uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, }; -const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = +const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] = { 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12, 16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28, @@ -138,7 +138,7 @@ 48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60 }; -const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = +const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS] = { 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12, 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12, diff -r 7dccbbed0349 -r 06237deb460b source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Wed Sep 24 18:26:45 2014 -0500 +++ b/source/Lib/TLibCommon/TComRom.h Fri Sep 26 19:34:36 2014 +0900 @@ -54,6 +54,8 @@ #define UNIT_SIZE (1 LOG2_UNIT_SIZE) // unit size of CU partition #define TMVP_UNIT_MASK 0xF0// mask for mapping index to CompressMV field +#define MAX_NUM_PARTITIONS 256 + #define MIN_PU_SIZE 4 #define MIN_TU_SIZE 4 #define MAX_NUM_SPU_W (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line @@ -75,15 +77,15 @@ extern const uint8_t g_chroma422IntraAngleMappingTable[36]; // flexible conversion from relative to absolute index -extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; -extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; +extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS]; +extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS]; void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t* curIdx); void initRasterToZscan(uint32_t maxFullDepth); // conversion of partition index to picture pel position -extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; -extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; +extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS]; +extern const uint8_t g_zscanToPelY[MAX_NUM_PARTITIONS]; // global variable (LCU width/height, max. CU depth) extern uint32_t g_maxLog2CUSize; diff -r 7dccbbed0349 -r 06237deb460b source/common/deblock.cpp --- a/source/common/deblock.cpp Wed Sep 24 18:26:45 2014 -0500 +++ b/source/common/deblock.cpp Fri Sep 26 19:34:36 2014 +0900 @@ -32,23 +32,24 @@ #define DEBLOCK_SMALLEST_BLOCK 8 #define DEFAULT_INTRA_TC_OFFSET 2 -void Deblock::deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]) +void Deblock::deblockCTU(TComDataCU* cu, int32_t dir) { +uint8_t blockingStrength[MAX_NUM_PARTITIONS]; + memset(blockingStrength, 0, sizeof(uint8_t) * m_numPartitions); -memset(edgeFilter, 0, sizeof(bool) * m_numPartitions); -deblockCU(cu, 0, 0, dir, edgeFilter, blockingStrength); +deblockCU(cu, 0, 0, dir, blockingStrength); } /* Deblocking filter process in CU-based (the same function as conventional's) * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */ -void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]) +void Deblock::deblockCU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, const int32_t dir, uint8_t blockingStrength[]) { if (!cu-m_pic || cu-getPartitionSize(absZOrderIdx) == SIZE_NONE) return; Frame* pic = cu-m_pic; -uint32_t
[x265] refine deblocking filter
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1411549726 -32400 # Wed Sep 24 18:08:46 2014 +0900 # Node ID 9f96fc8374d834d424190b0b1581054996985b67 # Parent b2b7072ddbf73085d457bd6a71bca946e505dea8 refine deblocking filter diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/Lib/TLibCommon/TComPicYuv.h --- a/source/Lib/TLibCommon/TComPicYuv.hWed Sep 24 11:48:15 2014 +0530 +++ b/source/Lib/TLibCommon/TComPicYuv.hWed Sep 24 18:08:46 2014 +0900 @@ -155,6 +155,8 @@ pixel* getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; } +int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; } + uint32_t getCUHeight(int rowNum); void copyFromPicture(const x265_picture, int padx, int pady); diff -r b2b7072ddbf7 -r 9f96fc8374d8 source/common/deblock.cpp --- a/source/common/deblock.cpp Wed Sep 24 11:48:15 2014 +0530 +++ b/source/common/deblock.cpp Wed Sep 24 18:08:46 2014 +0900 @@ -48,7 +48,7 @@ return; Frame* pic = cu-m_pic; -uint32_t curNumParts = pic-getNumPartInCU() (depth 1); +uint32_t curNumParts = m_numPartitions (depth * 2); if (cu-getDepth(absZOrderIdx) depth) { @@ -56,35 +56,34 @@ uint32_t xmax = cu-m_slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); uint32_t ymax = cu-m_slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); for (uint32_t partIdx = 0; partIdx 4; partIdx++, absZOrderIdx += qNumParts) -{ if (g_zscanToPelX[absZOrderIdx] xmax g_zscanToPelY[absZOrderIdx] ymax) deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength); -} return; } Param params; setLoopfilterParam(cu, absZOrderIdx, params); -setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength); +setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, blockingStrength); setEdgefilterPU(cu, absZOrderIdx, dir, params, edgeFilter, blockingStrength); for (uint32_t partIdx = absZOrderIdx; partIdx absZOrderIdx + curNumParts; partIdx++) { -uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx 1) : !(partIdx 2)); +uint32_t bsCheck = !(partIdx (1 dir)); -if (edgeFilter[partIdx] bsCheck) +if (bsCheck edgeFilter[partIdx]) getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength); } -uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK LOG2_UNIT_SIZE; +const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK LOG2_UNIT_SIZE; uint32_t sizeInPU = pic-getNumPartInCUSize() depth; uint32_t shiftFactor = (dir == EDGE_VER) ? cu-getHorzChromaShift() : cu-getVertChromaShift(); -const bool alwaysDoChroma = cu-getChromaFormat() == X265_CSP_I444; - +uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK shiftFactor) LOG2_UNIT_SIZE) - 1; +uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) LOG2_UNIT_SIZE; + for (uint32_t e = 0; e sizeInPU; e += partIdxIncr) { edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength); -if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK shiftFactor) LOG2_UNIT_SIZE))) +if (!((e0 + e) chromaMask)) edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength); } } @@ -115,66 +114,60 @@ } } -void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]) +void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]) { if (cu-getTransformIdx(absZOrderIdx) + cu-getDepth(absZOrderIdx) (uint8_t)depth) { -const uint32_t curNumParts = cu-m_pic-getNumPartInCU() (depth 1); +const uint32_t curNumParts = m_numPartitions (depth * 2); const uint32_t qNumParts = curNumParts 2; for (uint32_t partIdx = 0; partIdx 4; partIdx++, absZOrderIdx += qNumParts) -{ -uint32_t nsAddr = absZOrderIdx; -setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength); -} +setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength); return; } uint32_t widthInBaseUnits = 1 (cu-getLog2CUSize(absZOrderIdx) - cu-getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE); -setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits); +setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits); } void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir
[x265] simplify intra filter (with fix for da61cf406f16) (Re: primitives: intra_pred[4][35] = intra_pred[35][4] (avoid *35))
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1411388939 -32400 # Mon Sep 22 21:28:59 2014 +0900 # Node ID 3f229951f826e1d09dd0258721ef5a1f9fdc4392 # Parent fd435504f15e0b13dabba9efe0aa94e7047060b5 simplify intra filter (with fix for da61cf406f16) diff -r fd435504f15e -r 3f229951f826 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 13:14:54 2014 +0530 +++ b/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 21:28:59 2014 +0900 @@ -52,133 +52,96 @@ void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, pixel* refAbove, pixel* refLeft, pixel* refAboveFlt, pixel* refLeftFlt, int dirMode) { -pixel* roiOrigin; -pixel* adiTemp; - -int picStride = cu-m_pic-getStride(); - IntraNeighbors intraNeighbors; initIntraNeighbors(cu, zOrderIdxInPart, partDepth, true, intraNeighbors); uint32_t tuSize = intraNeighbors.tuSize; uint32_t tuSize2 = tuSize 1; -roiOrigin = cu-m_pic-getPicYuvRec()-getLumaAddr(cu-getAddr(), cu-getZorderIdxInCU() + zOrderIdxInPart); -adiTemp = adiBuf; +pixel* adiOrigin = cu-m_pic-getPicYuvRec()-getLumaAddr(cu-getAddr(), cu-getZorderIdxInCU() + zOrderIdxInPart); +int picStride = cu-m_pic-getStride(); -fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors); +fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors); +// initialization of ADI buffers +const int bufOffset = tuSize - 1; +refAbove += bufOffset; +refLeft += bufOffset; + +// ADI_BUF_STRIDE * (2 * tuSize + 1); +memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel)); +for (int k = 0; k tuSize2 + 1; k++) +refLeft[k] = adiBuf[k * ADI_BUF_STRIDE]; + bool bUseFilteredPredictions = (dirMode == ALL_IDX ? (8 | 16 | 32) tuSize : g_intraFilterFlags[dirMode] tuSize); if (bUseFilteredPredictions) { // generate filtered intra prediction samples -// left and left above border + above and above right border + top left corner = length of 3. filter buffer -int bufSize = tuSize2 + tuSize2 + 1; -uint32_t wh = ADI_BUF_STRIDE * (tuSize2 + 1); // number of elements in one buffer +refAboveFlt += bufOffset; +refLeftFlt += bufOffset; -pixel* filterBuf = adiBuf + wh; // buffer for 2. filtering (sequential) -pixel* filterBufN = filterBuf + bufSize; // buffer for 1. filtering (sequential) +bool bStrongSmoothing = (tuSize == 32 cu-m_slice-m_sps-bUseStrongIntraSmoothing); -int l = 0; -// left border from bottom to top -for (int i = 0; i tuSize2; i++) +if (bStrongSmoothing) { -filterBuf[l++] = adiTemp[ADI_BUF_STRIDE * (tuSize2 - i)]; -} +const int trSize = 32; +const int trSize2 = 32 * 2; +const int threshold = 1 (X265_DEPTH - 5); +int refBL = refLeft[trSize2]; +int refTL = refAbove[0]; +int refTR = refAbove[trSize2]; +bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) threshold +abs(refTL + refTR - 2 * refAbove[trSize]) threshold); -// top left corner -filterBuf[l++] = adiTemp[0]; +if (bStrongSmoothing) +{ +// bilinear interpolation +const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1; +int init = (refTL shift) + tuSize; +int delta; -// above border from left to right -memcpy(filterBuf[l], adiTemp[1], tuSize2 * sizeof(*filterBuf)); +refLeftFlt[0] = refAboveFlt[0] = refAbove[0]; -if (tuSize = 32 cu-m_slice-m_sps-bUseStrongIntraSmoothing) -{ -int bottomLeft = filterBuf[0]; -int topLeft = filterBuf[tuSize2]; -int topRight = filterBuf[bufSize - 1]; -int threshold = 1 (X265_DEPTH - 5); -bool bilinearLeft = abs(bottomLeft + topLeft - 2 * filterBuf[tuSize]) threshold; -bool bilinearAbove = abs(topLeft + topRight - 2 * filterBuf[tuSize2 + tuSize]) threshold; +//TODO: Performance Primitive??? +delta = refBL - refTL; +for (int i = 1; i trSize2; i++) +refLeftFlt[i] = (init + delta * i) shift; +refLeftFlt[trSize2] = refLeft[trSize2]; -if (bilinearLeft bilinearAbove) -{ -int shift = intraNeighbors.log2TrSize + 1; -filterBufN[0] = filterBuf[0]; -filterBufN[tuSize2] = filterBuf[tuSize2]; -filterBufN[bufSize - 1] = filterBuf[bufSize - 1]; -//TODO: Performance Primitive??? -for (int i = 1; i tuSize2; i
[x265] primitives: intra_pred[4][35] = intra_pred[35][4] (avoid *35)
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 142115 -32400 # Fri Sep 19 16:35:15 2014 +0900 # Node ID 3a2c1caf0f80e4ee2c1216636a3f9d067f719d6f # Parent 4680ab4f92b8cc809b1e8dbc927126ec70bcc5c5 primitives: intra_pred[4][35] = intra_pred[35][4] (avoid *35) diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Thu Sep 18 18:16:25 2014 +0530 +++ b/source/Lib/TLibCommon/TComPattern.cpp Fri Sep 19 16:35:15 2014 +0900 @@ -68,9 +68,9 @@ fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors); -bool bUseFilteredPredictions = (dirMode == ALL_IDX || (g_intraFilterFlags[dirMode] tuSize)); +bool bUseFilteredPredictions = (dirMode == ALL_IDX ? (8 | 16 | 32) tuSize : g_intraFilterFlags[dirMode] tuSize); -if (bUseFilteredPredictions 8 = tuSize tuSize = 32) +if (bUseFilteredPredictions) { // generate filtered intra prediction samples // left and left above border + above and above right border + top left corner = length of 3. filter buffer diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Thu Sep 18 18:16:25 2014 +0530 +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 19 16:35:15 2014 +0900 @@ -497,7 +497,7 @@ }; /* g_intraFilterFlags[dir] trSize */ -const uint8_t g_intraFilterFlags[35] = +const uint8_t g_intraFilterFlags[NUM_INTRA_MODE] = { 0x38, 0x00, 0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Thu Sep 18 18:16:25 2014 +0530 +++ b/source/Lib/TLibCommon/TComRom.h Fri Sep 19 16:35:15 2014 +0900 @@ -153,7 +153,7 @@ extern const uint8_t x265_exp2_lut[64]; // Intra tables -extern const uint8_t g_intraFilterFlags[35]; +extern const uint8_t g_intraFilterFlags[NUM_INTRA_MODE]; extern const uint32_t g_depthInc[3][4]; diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/intrapred.cpp --- a/source/common/intrapred.cpp Thu Sep 18 18:16:25 2014 +0530 +++ b/source/common/intrapred.cpp Fri Sep 19 16:35:15 2014 +0900 @@ -281,22 +281,22 @@ void Setup_C_IPredPrimitives(EncoderPrimitives p) { -p.intra_pred[BLOCK_4x4][0] = planar_pred_c2; -p.intra_pred[BLOCK_8x8][0] = planar_pred_c3; -p.intra_pred[BLOCK_16x16][0] = planar_pred_c4; -p.intra_pred[BLOCK_32x32][0] = planar_pred_c5; +p.intra_pred[0][BLOCK_4x4] = planar_pred_c2; +p.intra_pred[0][BLOCK_8x8] = planar_pred_c3; +p.intra_pred[0][BLOCK_16x16] = planar_pred_c4; +p.intra_pred[0][BLOCK_32x32] = planar_pred_c5; // Intra Prediction DC -p.intra_pred[BLOCK_4x4][1] = intra_pred_dc_c4; -p.intra_pred[BLOCK_8x8][1] = intra_pred_dc_c8; -p.intra_pred[BLOCK_16x16][1] = intra_pred_dc_c16; -p.intra_pred[BLOCK_32x32][1] = intra_pred_dc_c32; +p.intra_pred[1][BLOCK_4x4] = intra_pred_dc_c4; +p.intra_pred[1][BLOCK_8x8] = intra_pred_dc_c8; +p.intra_pred[1][BLOCK_16x16] = intra_pred_dc_c16; +p.intra_pred[1][BLOCK_32x32] = intra_pred_dc_c32; for (int i = 2; i NUM_INTRA_MODE; i++) { -p.intra_pred[BLOCK_4x4][i] = intra_pred_ang_c4; -p.intra_pred[BLOCK_8x8][i] = intra_pred_ang_c8; -p.intra_pred[BLOCK_16x16][i] = intra_pred_ang_c16; -p.intra_pred[BLOCK_32x32][i] = intra_pred_ang_c32; +p.intra_pred[i][BLOCK_4x4] = intra_pred_ang_c4; +p.intra_pred[i][BLOCK_8x8] = intra_pred_ang_c8; +p.intra_pred[i][BLOCK_16x16] = intra_pred_ang_c16; +p.intra_pred[i][BLOCK_32x32] = intra_pred_ang_c32; } p.intra_pred_allangs[BLOCK_4x4] = all_angs_pred_c2; diff -r 4680ab4f92b8 -r 3a2c1caf0f80 source/common/primitives.h --- a/source/common/primitives.hThu Sep 18 18:16:25 2014 +0530 +++ b/source/common/primitives.hFri Sep 19 16:35:15 2014 +0900 @@ -91,6 +91,8 @@ NUM_SQUARE_BLOCKS }; +enum { NUM_TR_SIZE = 4 }; + // NOTE: Not all DCT functions support dest stride enum Dcts { @@ -145,7 +147,6 @@ typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, intptr_t sstride0, pixel *src1, intptr_t sstride1, int weight); typedef void (*blockfill_s_t)(int16_t *dst, intptr_t dstride, int16_t val); -typedef void (*intra_planar_t)(pixel* above, pixel* left, pixel* dst, intptr_t dstStride); typedef void (*intra_pred_t)(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter); typedef void (*intra_allangs_t)(pixel *dst, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma); @@ -259,8 +260,8 @@ pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS]; addAvg_tluma_addAvg[NUM_LUMA_PARTITIONS]; -intra_pred_tintra_pred[NUM_SQUARE_BLOCKS - 1][NUM_INTRA_MODE]; -intra_allangs_t intra_pred_allangs[NUM_SQUARE_BLOCKS - 1]; +intra_pred_t
Re: [x265] inline simple functions
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1411087626 -32400 # Fri Sep 19 09:47:06 2014 +0900 # Node ID bc71daca1b43fd8aee5ffb770629a086966d4674 # Parent 25dde1ffab66bf29fa2a16945b6e3dff9e2954ec inline simple functions diff -r 25dde1ffab66 -r bc71daca1b43 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Thu Sep 18 18:02:36 2014 +0530 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 19 09:47:06 2014 +0900 @@ -88,9 +88,6 @@ m_DataCUMemPool.m_tqBypassYuvMemBlock = NULL; } -TComDataCU::~TComDataCU() -{} - bool TComDataCU::initialize(uint32_t numPartition, uint32_t sizeL, uint32_t sizeC, uint32_t numBlocks, bool isLossless) { @@ -1086,15 +1083,6 @@ } } -/** Check whether the CU is coded in lossless coding mode - * \param absPartIdx - * \returns true if the CU is coded in lossless coding mode; false if otherwise - */ -bool TComDataCU::isLosslessCoded(uint32_t absPartIdx) -{ -return m_slice-m_pps-bTransquantBypassEnabled getCUTransquantBypass(absPartIdx); -} - /** Get allowed chroma intra modes *\param absPartIdx *\param uiModeList pointer to chroma intra modes array @@ -1224,11 +1212,6 @@ return ctx; } -uint32_t TComDataCU::getCtxInterDir(uint32_t absPartIdx) -{ -return getDepth(absPartIdx); -} - void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth) { uint32_t curPartNum = m_pic-getNumPartInCU() (depth 1); @@ -2111,11 +2094,6 @@ return numMvc; } -bool TComDataCU::isBipredRestriction() -{ -return getLog2CUSize(0) == 3 getPartitionSize(0) != SIZE_2Nx2N; -} - void TComDataCU::clipMv(MV outMV) { int mvshift = 2; @@ -2130,15 +2108,6 @@ outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y)); } -/** Test whether the current block is skipped - * \param partIdx Block index - * \returns Flag indicating whether the block is skipped - */ -bool TComDataCU::isSkipped(uint32_t partIdx) -{ -return getSkipFlag(partIdx); -} - // // Protected member functions // @@ -2438,9 +2407,4 @@ result.firstSignificanceMapContext = bIsLuma ? 21 : 12; } -uint32_t TComDataCU::getSCUAddr() -{ -return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInLCU; -} - //! \} diff -r 25dde1ffab66 -r bc71daca1b43 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hThu Sep 18 18:02:36 2014 +0530 +++ b/source/Lib/TLibCommon/TComDataCU.hFri Sep 19 09:47:06 2014 +0900 @@ -248,7 +248,7 @@ public: TComDataCU(); -virtual ~TComDataCU(); +~TComDataCU() {} uint32_t m_psyEnergy; uint64_t m_totalPsyCost; @@ -290,7 +290,8 @@ uint32_t getZorderIdxInCU() { return m_absIdxInLCU; } -uint32_t getSCUAddr(); +uint32_t getSCUAddr() const { return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInLCU; } + uint32_t getCUPelX() { return m_cuPelX; } @@ -344,7 +345,7 @@ char getLastCodedQP(uint32_t absPartIdx); void setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool foundNonZeroCbf); -bool isLosslessCoded(uint32_t absPartIdx); +bool isLosslessCoded(uint32_t idx) const { return m_cuTransquantBypass[idx] m_slice-m_pps-bTransquantBypassEnabled; } uint8_t* getTransformIdx(){ return m_trIdx; } @@ -488,10 +489,9 @@ // member functions for modes // --- -bool isIntra(uint32_t partIdx) { return m_predModes[partIdx] == MODE_INTRA; } - -bool isSkipped(uint32_t partIdx); /// SKIP (no residual) -bool isBipredRestriction(); +bool isIntra(uint32_t partIdx) const { return m_predModes[partIdx] == MODE_INTRA; } +bool isSkipped(uint32_t idx) const { return m_skipFlag[idx]; } +bool isBipredRestriction() const { return m_log2CUSize[0] == 3 m_partSizes[0] != SIZE_2Nx2N; } // --- // member functions for symbol prediction (most probable / mode conversion) @@ -506,7 +506,7 @@ uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth); uint32_t getCtxSkipFlag(uint32_t absPartIdx); -uint32_t getCtxInterDir(uint32_t absPartIdx); +uint32_t getCtxInterDir(uint32_t idx) const { return m_depth[idx]; } // --- // member
[x265] inline simple functions
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1410947343 -32400 # Wed Sep 17 18:49:03 2014 +0900 # Node ID b00d1f46a7632572df3be47decee9be9881c511c # Parent 199e8f2e0d54abd16657ccd0952bdc25cadf8420 inline simple functions diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Sep 16 17:50:06 2014 +0530 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed Sep 17 18:49:03 2014 +0900 @@ -88,9 +88,6 @@ m_DataCUMemPool.m_tqBypassYuvMemBlock = NULL; } -TComDataCU::~TComDataCU() -{} - bool TComDataCU::initialize(uint32_t numPartition, uint32_t sizeL, uint32_t sizeC, uint32_t numBlocks, bool isLossless) { @@ -1086,15 +1083,6 @@ } } -/** Check whether the CU is coded in lossless coding mode - * \param absPartIdx - * \returns true if the CU is coded in lossless coding mode; false if otherwise - */ -bool TComDataCU::isLosslessCoded(uint32_t absPartIdx) -{ -return m_slice-m_pps-bTransquantBypassEnabled getCUTransquantBypass(absPartIdx); -} - /** Get allowed chroma intra modes *\param absPartIdx *\param uiModeList pointer to chroma intra modes array @@ -1224,11 +1212,6 @@ return ctx; } -uint32_t TComDataCU::getCtxInterDir(uint32_t absPartIdx) -{ -return getDepth(absPartIdx); -} - void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth) { uint32_t curPartNum = m_pic-getNumPartInCU() (depth 1); @@ -2111,11 +2094,6 @@ return numMvc; } -bool TComDataCU::isBipredRestriction() -{ -return getLog2CUSize(0) == 3 getPartitionSize(0) != SIZE_2Nx2N; -} - void TComDataCU::clipMv(MV outMV) { int mvshift = 2; @@ -2130,15 +2108,6 @@ outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y)); } -/** Test whether the current block is skipped - * \param partIdx Block index - * \returns Flag indicating whether the block is skipped - */ -bool TComDataCU::isSkipped(uint32_t partIdx) -{ -return getSkipFlag(partIdx); -} - // // Protected member functions // @@ -2438,9 +2407,4 @@ result.firstSignificanceMapContext = bIsLuma ? 21 : 12; } -uint32_t TComDataCU::getSCUAddr() -{ -return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInLCU; -} - //! \} diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hTue Sep 16 17:50:06 2014 +0530 +++ b/source/Lib/TLibCommon/TComDataCU.hWed Sep 17 18:49:03 2014 +0900 @@ -248,7 +248,7 @@ public: TComDataCU(); -virtual ~TComDataCU(); +~TComDataCU() {} uint32_t m_psyEnergy; uint64_t m_totalPsyCost; @@ -290,7 +290,8 @@ uint32_t getZorderIdxInCU() { return m_absIdxInLCU; } -uint32_t getSCUAddr(); +uint32_t getSCUAddr() const { return (m_cuAddr g_maxFullDepth * 2) + m_absIdxInLCU; } + uint32_t getCUPelX() { return m_cuPelX; } @@ -344,7 +345,7 @@ char getLastCodedQP(uint32_t absPartIdx); void setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool foundNonZeroCbf); -bool isLosslessCoded(uint32_t absPartIdx); +bool isLosslessCoded(uint32_t idx) const { return m_cuTransquantBypass[idx] m_slice-m_pps-bTransquantBypassEnabled; } uint8_t* getTransformIdx(){ return m_trIdx; } @@ -488,10 +489,9 @@ // member functions for modes // --- -bool isIntra(uint32_t partIdx) { return m_predModes[partIdx] == MODE_INTRA; } - -bool isSkipped(uint32_t partIdx); /// SKIP (no residual) -bool isBipredRestriction(); +bool isIntra(uint32_t partIdx) const { return m_predModes[partIdx] == MODE_INTRA; } +bool isSkipped(uint32_t idx) const { return m_skipFlag[idx]; } +bool isBipredRestriction() const { return m_log2CUSize[0] == 3 m_partSizes[0] != SIZE_2Nx2N; } // --- // member functions for symbol prediction (most probable / mode conversion) @@ -506,7 +506,7 @@ uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth); uint32_t getCtxSkipFlag(uint32_t absPartIdx); -uint32_t getCtxInterDir(uint32_t absPartIdx); +uint32_t getCtxInterDir(uint32_t idx) const { return m_depth[idx]; } // --- // member
Re: [x265] [PATCH] x86inc.asm: fix vpbroadcastd bug on Mac platform
At changeset 27364e9, wrong version pushd. Correct patch is Min's later one. --- a/source/common/x86/x86inc.asm Fri Sep 05 17:36:18 2014 -0700 +++ b/source/common/x86/x86inc.asm Sun Sep 07 15:24:49 2014 +0900 @@ -1489,6 +1489,6 @@ movd %1 %+ xmm, %2 vpbroadcastd %1, %1 %+ xmm %else -vbroadcastsd %1, %2 +vpbroadcastd %1, %2 %endif %endmacro -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Min Chen Sent: Saturday, September 06, 2014 8:48 AM To: x265-devel@videolan.org Subject: [x265] [PATCH] x86inc.asm: fix vpbroadcastd bug on Mac platform # HG changeset patch # User Min Chen chenm...@163.com # Date 1409960883 25200 # Node ID 8abcfdeeea2eab2e11da59002dad42dcf16aeab8 # Parent e0db7914e7020a6a6454fbf1d3ce793efa2209a1 x86inc.asm: fix vpbroadcastd bug on Mac platform diff -r e0db7914e702 -r 8abcfdeeea2e source/common/x86/x86inc.asm --- a/source/common/x86/x86inc.asmFri Sep 05 16:47:42 2014 -0700 +++ b/source/common/x86/x86inc.asmFri Sep 05 16:48:03 2014 -0700 @@ -888,6 +888,8 @@ %define y%1 mm%1 %define ymmxmm%1 xmm%1 %define ymmymm%1 ymm%1 +%define ymm%1xmm xmm%1 +%define xmm%1ymm ymm%1 %define xm%1 xmm %+ m%1 %define ym%1 ymm %+ m%1 %endmacro @@ -1480,3 +1482,13 @@ %endif %endmacro %endif + +; workaround: vpbroadcastd with register, the yasm will generate wrong +code %macro vpbroadcastd 2 + %ifid %2 +movd %1 %+ xmm, %2 +vpbroadcastd %1, %1 %+ xmm + %else +vpbroadcastd %1, %2 + %endif +%endmacro ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix CHECKED_BUILD
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1410060921 -32400 # Sun Sep 07 12:35:21 2014 +0900 # Node ID 5bdb43ad058348fbe847b5c8e63ea9712a41c6a4 # Parent ed4c9acafc11ccdd9ea5221175a891e43e24b1cc fix CHECKED_BUILD diff -r ed4c9acafc11 -r 5bdb43ad0583 source/common/dct.cpp --- a/source/common/dct.cpp Fri Sep 05 17:36:18 2014 -0700 +++ b/source/common/dct.cpp Sun Sep 07 12:35:21 2014 +0900 @@ -729,7 +729,7 @@ X265_CHECK(num = 32 * 32, dequant num %d too large\n, num); X265_CHECK((num % 8) == 0, dequant num %d not multiple of 8\n, num); X265_CHECK(shift = 10, shift too large %d\n, shift); -X265_CHECK(((int)coef 31) == 0, dequant coef buffer not aligned\n); +X265_CHECK(((intptr_t)coef 31) == 0, dequant coef buffer not aligned\n); int add, coeffQ; diff -r ed4c9acafc11 -r 5bdb43ad0583 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Sep 05 17:36:18 2014 -0700 +++ b/source/encoder/analysis.cpp Sun Sep 07 12:35:21 2014 +0900 @@ -1056,21 +1056,30 @@ copyYuv2Pic(pic, outBestCU-getAddr(), absPartIdx, depth); } +#if CHECKED_BUILD || _DEBUG /* Assert if Best prediction mode is NONE * Selected mode's RD-cost must be not MAX_INT64 */ if (bInsidePicture) { X265_CHECK(outBestCU-getPartitionSize(0) != SIZE_NONE, no best prediction size\n); X265_CHECK(outBestCU-getPredictionMode(0) != MODE_NONE, no best prediction mode\n); -if (m_rdCost.m_psyRd) +if (m_param-rdLevel 1) { -X265_CHECK(outBestCU-m_totalPsyCost != MAX_INT64, no best partition cost\n); +if (m_rdCost.m_psyRd) +{ +X265_CHECK(outBestCU-m_totalPsyCost != MAX_INT64, no best partition cost\n); +} +else +{ +X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best partition cost\n); +} } else { -X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best partition cost\n); +X265_CHECK(outBestCU-m_sa8dCost != MAX_INT64, no best partition cost\n); } } +#endif x265_emms(); } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix sao
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1410062067 -32400 # Sun Sep 07 12:54:27 2014 +0900 # Node ID ce0c1eb81072b4dae5253b27d5c9bb3117975066 # Parent ed4c9acafc11ccdd9ea5221175a891e43e24b1cc fix sao diff -r ed4c9acafc11 -r ce0c1eb81072 source/encoder/sao.cpp --- a/source/encoder/sao.cppFri Sep 05 17:36:18 2014 -0700 +++ b/source/encoder/sao.cppSun Sep 07 12:54:27 2014 +0900 @@ -1481,14 +1481,10 @@ pixel* fenc; pixel* recon; int stride; -int lcuHeight; -int lcuWidth; uint32_t rPelX; uint32_t bPelY; int64_t* stats; int64_t* count; -uint32_t picWidthTmp = 0; -uint32_t picHeightTmp = 0; int classIdx; int startX; int startY; @@ -1510,38 +1506,39 @@ { // NOTE: Col { -lcuHeight = g_maxCUSize; -lcuWidth = g_maxCUSize; addr= idxX + frameWidthInCU * idxY; cu = pic-getCU(addr); + +uint32_t picWidthTmp = m_param-sourceWidth; +uint32_t picHeightTmp = m_param-sourceHeight; +int lcuWidth = g_maxCUSize; +int lcuHeight = g_maxCUSize; lPelX = cu-getCUPelX(); tPelY = cu-getCUPelY(); - -memset(m_countPreDblk[addr], 0, 3 * MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS * sizeof(int64_t)); -memset(m_offsetOrgPreDblk[addr], 0, 3 * MAX_NUM_SAO_TYPE * MAX_NUM_SAO_CLASS * sizeof(int64_t)); +rPelX = lPelX + lcuWidth; +bPelY = tPelY + lcuHeight; +rPelX = rPelX picWidthTmp ? picWidthTmp : rPelX; +bPelY = bPelY picHeightTmp ? picHeightTmp : bPelY; +lcuWidth = rPelX - lPelX; +lcuHeight = bPelY - tPelY; + +memset(m_countPreDblk[addr], 0, sizeof(PerPlane)); +memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane)); + for (int plane = 0; plane 3; plane++) { isChroma = !!plane; -if (plane == 0) +if (plane == 1) { -picWidthTmp = m_param-sourceWidth; -picHeightTmp = m_param-sourceHeight; +picWidthTmp = m_hChromaShift; +picHeightTmp = m_vChromaShift; +lcuWidth = m_hChromaShift; +lcuHeight= m_vChromaShift; +lPelX= m_hChromaShift; +tPelY= m_vChromaShift; +rPelX = lPelX + lcuWidth; +bPelY = tPelY + lcuHeight; } -else if (plane == 1) -{ -picWidthTmp = m_param-sourceWidth isChroma; -picHeightTmp = m_param-sourceHeight isChroma; -lcuWidth = lcuWidth isChroma; -lcuHeight= lcuHeightisChroma; -lPelX= lPelXisChroma; -tPelY= tPelYisChroma; -} -rPelX = lPelX + lcuWidth; -bPelY = tPelY + lcuHeight; -rPelX = rPelX picWidthTmp ? picWidthTmp : rPelX; -bPelY = bPelY picHeightTmp ? picHeightTmp : bPelY; -lcuWidth = rPelX - lPelX; -lcuHeight = bPelY - tPelY; stride = (plane == 0) ? pic-getStride() : pic-getCStride(); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix cbf context
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409843113 -32400 # Fri Sep 05 00:05:13 2014 +0900 # Node ID 85a4327fe163a91b4725891515234c87e1153289 # Parent b686cb0abd713f6fefcc75d00725232d12e36089 fix cbf context diff -r b686cb0abd71 -r 85a4327fe163 source/Lib/TLibCommon/ContextTables.h --- a/source/Lib/TLibCommon/ContextTables.h Thu Sep 04 17:06:16 2014 +0530 +++ b/source/Lib/TLibCommon/ContextTables.h Fri Sep 05 00:05:13 2014 +0900 @@ -65,7 +65,7 @@ #define NUM_REF_NO_CTX2 /// number of context models for reference index #define NUM_TRANS_SUBDIV_FLAG_CTX 3 /// number of context models for transform subdivision flags -#define NUM_QT_CBF_CTX6 /// number of context models for QT CBF +#define NUM_QT_CBF_CTX7 /// number of context models for QT CBF #define NUM_QT_ROOT_CBF_CTX 1 /// number of context models for QT ROOT CBF #define NUM_DELTA_QP_CTX 3 /// number of context models for dQP @@ -144,7 +144,7 @@ #define NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4 3 #define NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4 1 -static const uint32_t ctxCbf[3][3] = { { 1, 0, 0 }, { 2, 3, 4 }, { 2, 3, 4} }; +static const uint32_t ctxCbf[3][5] = { { 1, 0, 0, 0, 0 }, { 2, 3, 4, 5, 6 }, { 2, 3, 4, 5, 6 } }; static const uint32_t significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] = { { 0, 9, 21 }, { 0, 9, 12 } }; static const uint32_t significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3] = { { 9, 12, 6 }, { 9, 3, 3 } }; static const uint32_t nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE] = { 6, 0 }; @@ -259,9 +259,9 @@ static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] = { -{ 153, 111, 149, 92, 167, 154, }, -{ 153, 111, 149, 107, 167, 154, }, -{ 111, 141, 94, 138, 182, 154, }, +{ 153, 111, 149, 92, 167, 154, 154 }, +{ 153, 111, 149, 107, 167, 154, 154 }, +{ 111, 141, 94, 138, 182, 154, 154 }, }; static const uint8_t ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix getQuadtreeTULog2MinSizeInCU()
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409892209 -32400 # Fri Sep 05 13:43:29 2014 +0900 # Node ID e30143ac87fe32b6c20152e8fd41723cc9fdeb3c # Parent 93db2f53fe573537bcd4eb53ca3cdb69af557eb5 fix getQuadtreeTULog2MinSizeInCU() diff -r 93db2f53fe57 -r e30143ac87fe source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Thu Sep 04 16:42:24 2014 -0700 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 05 13:43:29 2014 +0900 @@ -1197,9 +1197,9 @@ { uint32_t log2CUSize = getLog2CUSize(absPartIdx); PartSize partSize = getPartitionSize(absPartIdx); -uint32_t quadtreeTUMaxDepth = getPredictionMode(0) == MODE_INTRA ? m_slice-m_sps-quadtreeTUMaxDepthIntra : m_slice-m_sps-quadtreeTUMaxDepthInter; +uint32_t quadtreeTUMaxDepth = getPredictionMode(absPartIdx) == MODE_INTRA ? m_slice-m_sps-quadtreeTUMaxDepthIntra : m_slice-m_sps-quadtreeTUMaxDepthInter; uint32_t intraSplitFlag = (getPredictionMode(absPartIdx) == MODE_INTRA partSize == SIZE_NxN) ? 1 : 0; -uint32_t interSplitFlag = ((quadtreeTUMaxDepth == 1) (getPredictionMode(0) == MODE_INTER) (partSize != SIZE_2Nx2N)); +uint32_t interSplitFlag = ((quadtreeTUMaxDepth == 1) (getPredictionMode(absPartIdx) == MODE_INTER) (partSize != SIZE_2Nx2N)); tuDepthRange[0] = m_slice-m_sps-quadtreeTULog2MinSize; tuDepthRange[1] = m_slice-m_sps-quadtreeTULog2MaxSize; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] asm: fix dequant_normal
How about remove '#if...'? The asm code didn't check it. added '%if...' to asm code :) # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409378187 -32400 # Sat Aug 30 14:56:27 2014 +0900 # Node ID c4f15840feb443f8c38ba58b52ef5ba6d518e626 # Parent 4e2d9ac6d489e82e70544d626c89964ee653c452 asm: fix dequant_normal diff -r 4e2d9ac6d489 -r c4f15840feb4 source/common/dct.cpp --- a/source/common/dct.cpp Fri Aug 29 11:12:49 2014 +0200 +++ b/source/common/dct.cpp Sat Aug 30 14:56:27 2014 +0900 @@ -720,7 +720,9 @@ void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) { -#if !HIGH_BIT_DEPTH +#if HIGH_BIT_DEPTH +X265_CHECK(scale 32768 || ((scale 3) == 0 shift 2), dequant invalid scale %d\n, scale); +#else // NOTE: maximum of scale is (72 * 256) X265_CHECK(scale 32768, dequant invalid scale %d\n, scale); #endif diff -r 4e2d9ac6d489 -r c4f15840feb4 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Fri Aug 29 11:12:49 2014 +0200 +++ b/source/common/x86/pixel-util8.asm Sat Aug 30 14:56:27 2014 +0900 @@ -1005,23 +1005,23 @@ ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) ;- INIT_XMM sse4 -cglobal dequant_normal, 4,5,5 -movdm1, r3 ; m1 = word [scale] +cglobal dequant_normal, 5,5,5 +movdm1, r3 ; m1 = word [scale] +movam2, [pw_1] +%if HIGH_BIT_DEPTH cmp r3d, 32767 jle .skip - psrld m1, 2 -mov r4d, r4m +sub r4d, 2 +.skip: +%endif movdm0, r4d ; m0 = shift xor r3d, r3d dec r4d bts r3d, r4d -movdm2, r3d -punpcklwd m1, m2 +movdm3, r3d +punpcklwd m1, m3 pshufd m1, m1, 0 ; m1 = dword [add scale] -movam2, [pw_1] -mov r2d, r2m - ; m0 = shift ; m1 = scale ; m2 = word [1] @@ -1029,45 +1029,6 @@ movum3, [r0] movum4, [r0 + 16] packssdwm3, m4 ; m3 = clipQCoef -psllw m3, 2 -punpckhwd m4, m3, m2 -punpcklwd m3, m2 -pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add) -pmaddwd m4, m1 -psrad m3, m0 -psrad m4, m0 -packssdwm3, m3 ; OPT_ME: store must be 32 bits -pmovsxwdm3, m3 -packssdwm4, m4 -pmovsxwdm4, m4 -movu[r1], m3 -movu[r1 + 16], m4 - -add r0, 32 -add r1, 32 - -sub r2d, 8 -jnz.loop -jz .end - -.skip: -mov r4d, r4m -movdm0, r4d ; m0 = shift -xor r3d, r3d -dec r4d -bts r3d, r4d -movdm2, r3d -punpcklwd m1, m2 -pshufd m1, m1, 0 ; m1 = dword [add scale] -movam2, [pw_1] -mov r2d, r2m -; m0 = shift -; m1 = scale -; m2 = word [1] -.sloop: -movum3, [r0] -movum4, [r0 + 16] -packssdwm3, m4 ; m3 = clipQCoef punpckhwd m4, m3, m2 punpcklwd m3, m2 pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add) @@ -1085,8 +1046,7 @@ add r1, 32 sub r2d, 8 -jnz.sloop -.end: +jnz.loop RET ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] asm: fix dequant_normal
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409373356 -32400 # Sat Aug 30 13:35:56 2014 +0900 # Node ID 9b5f0c75d052e963b0a413f341a74036141b3675 # Parent 4e2d9ac6d489e82e70544d626c89964ee653c452 asm: fix dequant_normal diff -r 4e2d9ac6d489 -r 9b5f0c75d052 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Fri Aug 29 11:12:49 2014 +0200 +++ b/source/common/x86/pixel-util8.asm Sat Aug 30 13:35:56 2014 +0900 @@ -1005,23 +1005,21 @@ ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) ;- INIT_XMM sse4 -cglobal dequant_normal, 4,5,5 -movdm1, r3 ; m1 = word [scale] +cglobal dequant_normal, 5,5,5 cmp r3d, 32767 +movdm1, r3 ; m1 = word [scale] +movam2, [pw_1] jle .skip - psrld m1, 2 -mov r4d, r4m +sub r4d, 2 +.skip: movdm0, r4d ; m0 = shift xor r3d, r3d dec r4d bts r3d, r4d -movdm2, r3d -punpcklwd m1, m2 +movdm3, r3d +punpcklwd m1, m3 pshufd m1, m1, 0 ; m1 = dword [add scale] -movam2, [pw_1] -mov r2d, r2m - ; m0 = shift ; m1 = scale ; m2 = word [1] @@ -1029,45 +1027,6 @@ movum3, [r0] movum4, [r0 + 16] packssdwm3, m4 ; m3 = clipQCoef -psllw m3, 2 -punpckhwd m4, m3, m2 -punpcklwd m3, m2 -pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add) -pmaddwd m4, m1 -psrad m3, m0 -psrad m4, m0 -packssdwm3, m3 ; OPT_ME: store must be 32 bits -pmovsxwdm3, m3 -packssdwm4, m4 -pmovsxwdm4, m4 -movu[r1], m3 -movu[r1 + 16], m4 - -add r0, 32 -add r1, 32 - -sub r2d, 8 -jnz.loop -jz .end - -.skip: -mov r4d, r4m -movdm0, r4d ; m0 = shift -xor r3d, r3d -dec r4d -bts r3d, r4d -movdm2, r3d -punpcklwd m1, m2 -pshufd m1, m1, 0 ; m1 = dword [add scale] -movam2, [pw_1] -mov r2d, r2m -; m0 = shift -; m1 = scale -; m2 = word [1] -.sloop: -movum3, [r0] -movum4, [r0 + 16] -packssdwm3, m4 ; m3 = clipQCoef punpckhwd m4, m3, m2 punpcklwd m3, m2 pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add) @@ -1085,8 +1044,7 @@ add r1, 32 sub r2d, 8 -jnz.sloop -.end: +jnz.loop RET ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] asm: fix dequant_normal
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409376721 -32400 # Sat Aug 30 14:32:01 2014 +0900 # Node ID eb94972d65473fc8e6fa42ebd95a72cfc727edf5 # Parent 4e2d9ac6d489e82e70544d626c89964ee653c452 asm: fix dequant_normal diff -r 4e2d9ac6d489 -r eb94972d6547 source/common/dct.cpp --- a/source/common/dct.cpp Fri Aug 29 11:12:49 2014 +0200 +++ b/source/common/dct.cpp Sat Aug 30 14:32:01 2014 +0900 @@ -720,7 +720,9 @@ void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) { -#if !HIGH_BIT_DEPTH +#if HIGH_BIT_DEPTH +X265_CHECK(scale 32768 || ((scale 3) == 0 shift 2), dequant invalid scale %d\n, scale); +#else // NOTE: maximum of scale is (72 * 256) X265_CHECK(scale 32768, dequant invalid scale %d\n, scale); #endif diff -r 4e2d9ac6d489 -r eb94972d6547 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Fri Aug 29 11:12:49 2014 +0200 +++ b/source/common/x86/pixel-util8.asm Sat Aug 30 14:32:01 2014 +0900 @@ -1005,23 +1005,21 @@ ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) ;- INIT_XMM sse4 -cglobal dequant_normal, 4,5,5 -movdm1, r3 ; m1 = word [scale] +cglobal dequant_normal, 5,5,5 cmp r3d, 32767 +movdm1, r3 ; m1 = word [scale] +movam2, [pw_1] jle .skip - psrld m1, 2 -mov r4d, r4m +sub r4d, 2 +.skip: movdm0, r4d ; m0 = shift xor r3d, r3d dec r4d bts r3d, r4d -movdm2, r3d -punpcklwd m1, m2 +movdm3, r3d +punpcklwd m1, m3 pshufd m1, m1, 0 ; m1 = dword [add scale] -movam2, [pw_1] -mov r2d, r2m - ; m0 = shift ; m1 = scale ; m2 = word [1] @@ -1029,45 +1027,6 @@ movum3, [r0] movum4, [r0 + 16] packssdwm3, m4 ; m3 = clipQCoef -psllw m3, 2 -punpckhwd m4, m3, m2 -punpcklwd m3, m2 -pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add) -pmaddwd m4, m1 -psrad m3, m0 -psrad m4, m0 -packssdwm3, m3 ; OPT_ME: store must be 32 bits -pmovsxwdm3, m3 -packssdwm4, m4 -pmovsxwdm4, m4 -movu[r1], m3 -movu[r1 + 16], m4 - -add r0, 32 -add r1, 32 - -sub r2d, 8 -jnz.loop -jz .end - -.skip: -mov r4d, r4m -movdm0, r4d ; m0 = shift -xor r3d, r3d -dec r4d -bts r3d, r4d -movdm2, r3d -punpcklwd m1, m2 -pshufd m1, m1, 0 ; m1 = dword [add scale] -movam2, [pw_1] -mov r2d, r2m -; m0 = shift -; m1 = scale -; m2 = word [1] -.sloop: -movum3, [r0] -movum4, [r0 + 16] -packssdwm3, m4 ; m3 = clipQCoef punpckhwd m4, m3, m2 punpcklwd m3, m2 pmaddwd m3, m1 ; m3 = dword (clipQCoef * scale + add) @@ -1085,8 +1044,7 @@ add r1, 32 sub r2d, 8 -jnz.sloop -.end: +jnz.loop RET ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix m_initSliceContext (uninitialised m_sliceQp)
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409041357 -32400 # Tue Aug 26 17:22:37 2014 +0900 # Node ID c18255467f12da1a780340ade55292c32d95bfdd # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b fix m_initSliceContext (uninitialised m_sliceQp) diff -r 5acfb12ec5d1 -r c18255467f12 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/encoder/frameencoder.cpp Tue Aug 26 17:22:37 2014 +0900 @@ -158,8 +158,6 @@ int64_t startCompressTime = x265_mdate(); Slice* slice = m_frame-m_picSym-m_slice; -m_initSliceContext.resetEntropy(slice); - /* Emit access unit delimiter unless this is the first frame and the user is * not repeating headers (since AUD is supposed to be the first NAL in the access * unit) */ @@ -225,12 +223,15 @@ m_frameFilter.m_sao.m_refDepth = 2 + !IS_REFERENCED(slice); break; } -m_frameFilter.start(m_frame); // Clip slice QP to 0-51 spec range before encoding qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp); slice-m_sliceQp = qp; +m_initSliceContext.resetEntropy(slice); + +m_frameFilter.start(m_frame); + if (m_frame-m_lowres.bKeyframe) { if (m_param-bEmitHRDSEI) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408956792 -32400 # Mon Aug 25 17:53:12 2014 +0900 # Node ID 7145e57c722a94a06faec33e3041442032a1892f # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx] diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Aug 25 17:53:12 2014 +0900 @@ -816,12 +816,12 @@ TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx) { +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) +return NULL; + uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) -return NULL; - if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize)) { if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize)) @@ -857,14 +857,11 @@ TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) +return NULL; -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) -{ -return NULL; -} - -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize)) { @@ -895,15 +892,14 @@ TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picHeightInLumaSamples) { return NULL; } -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize)) { @@ -938,14 +934,13 @@ TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picWidthInLumaSamples) { return NULL; } +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize)) @@ -954,7 +949,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]; if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize)) { @@ -1817,48 +1812,42 @@ } // TMVP always enabled { -// MTK colocated-RightBottom +MV colmv; uint32_t partIdxRB; deriveRightBottomIdx(puIdx, partIdxRB); -uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB]; -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); - -MV colmv; -int refIdx; int lcuIdx = -1; -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) // image boundary check +// image boundary check +if (m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[partIdxRB] + UNIT_SIZE m_slice-m_sps-picWidthInLumaSamples +m_pic-getCU(m_cuAddr)-getCUPelY
Re: [x265] fix lossless
Ø Negative shift or bts cause unexpected behavior. Negative shift is used for round value calculation. int round = 1 (shift - 1); From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Satoshi Nakagawa Sent: Monday, August 25, 2014 11:50 AM To: 'Development for x265' Subject: Re: [x265] fix lossless Negative shift or bts cause unexpected behavior. My Core i5 4300U, (Win7 32bit, VS2013) fails following test. --- a/source/test/pixelharness.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/test/pixelharness.cpp Mon Aug 25 11:30:37 2014 +0900 @@ -562,7 +562,8 @@ intptr_t stride = STRIDE; for (int i = 0; i ITERS; i++) { -int shift = (rand() % 7 + 1); +//int shift = (rand() % 7 + 1); +int shift = 0; int index = i % TEST_CASES; checked(opt, opt_dest, int_test_buff[index] + j, stride, shift, (int)STRIDE); From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of chen Sent: Monday, August 25, 2014 11:20 AM To: Development for x265 Subject: Re: [x265] fix lossless He just modify shift=0 path, shif-left and shift-righ is equal in that time At 2014-08-25 10:06:23,Steve Borho st...@borho.org wrote: On 08/23, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408803114 -32400 # Sat Aug 23 23:11:54 2014 +0900 # Node ID 218b9ddfe148c55afccc2c5a073858692e09f5c6 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 fix lossless do you have a repro case that this fixes? I tried Main and Main10 encodes with --lossless --hash 1 and they were both fine prior to this patch. diff -r 6e6756f94b27 -r 218b9ddfe148 source/common/quant.cpp --- a/source/common/quant.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/common/quant.cpp Sat Aug 23 23:11:54 2014 +0900 @@ -399,7 +399,7 @@ { if (transQuantBypass) { -primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 log2TrSize); +primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff, stride, 0); return; } @@ -430,7 +430,7 @@ #if X265_DEPTH = 10 primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, trSize); #else -if (shift = 0) +if (shift 0) primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, trSize); else primitives.cvt32to16_shl[log2TrSize - 2](residual, m_resiDctCoeff, stride, -shift); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix lossless
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408803114 -32400 # Sat Aug 23 23:11:54 2014 +0900 # Node ID 218b9ddfe148c55afccc2c5a073858692e09f5c6 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 fix lossless diff -r 6e6756f94b27 -r 218b9ddfe148 source/common/quant.cpp --- a/source/common/quant.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/common/quant.cpp Sat Aug 23 23:11:54 2014 +0900 @@ -399,7 +399,7 @@ { if (transQuantBypass) { -primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 log2TrSize); +primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff, stride, 0); return; } @@ -430,7 +430,7 @@ #if X265_DEPTH = 10 primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, trSize); #else -if (shift = 0) +if (shift 0) primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, trSize); else primitives.cvt32to16_shl[log2TrSize - 2](residual, m_resiDctCoeff, stride, -shift); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] refine depth related.
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408347239 -32400 # Mon Aug 18 16:33:59 2014 +0900 # Node ID 81469708804f322f6c76dfc6bb88f6d78fa983df # Parent 9a0d242743577e0c8cc56cfac4934f8ea8cb7f6e refine depth related. maxCUDepth: CU depth maxFullDepth: CU+TU or CU+PU depth unitSize: always 4 diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Mon Aug 18 05:34:51 2014 + +++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Aug 18 16:33:59 2014 +0900 @@ -129,7 +129,7 @@ return ok; } -void TComDataCU::create(TComDataCU *cu, uint32_t numPartition, uint32_t cuSize, int unitSize, int csp, int index, bool isLossless) +void TComDataCU::create(TComDataCU *cu, uint32_t numPartition, uint32_t cuSize, int csp, int index, bool isLossless) { m_hChromaShift = CHROMA_H_SHIFT(csp); m_vChromaShift = CHROMA_V_SHIFT(csp); @@ -139,12 +139,6 @@ m_slice = NULL; m_numPartitions = numPartition; -uint32_t tmp = 4 * AMVP_DECIMATION_FACTOR / unitSize; -tmp = tmp * tmp; -X265_CHECK(tmp == (1 (g_log2Size[tmp])), unexpected pixel count\n); -tmp = g_log2Size[tmp]; -m_unitMask = ~((1 tmp) - 1); - uint32_t sizeL = cuSize * cuSize; uint32_t sizeC = sizeL (m_hChromaShift + m_vChromaShift); @@ -577,7 +571,7 @@ // Copy current predicted part to a CU in picture. // It is used to predict for next part -void TComDataCU::copyToPic(uint8_t depth) +void TComDataCU::copyToPic(uint32_t depth) { TComDataCU* cu = m_pic-getCU(m_cuAddr); @@ -622,7 +616,7 @@ m_cuMvField[1].copyTo(cu-getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU); uint32_t tmpY = 1 ((g_maxLog2CUSize - depth) * 2); -uint32_t tmpY2 = m_absIdxInLCU m_pic-getLog2UnitSize() * 2; +uint32_t tmpY2 = m_absIdxInLCU LOG2_UNIT_SIZE * 2; memcpy(cu-getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); uint32_t tmpC = tmpY (m_hChromaShift + m_vChromaShift); @@ -633,7 +627,7 @@ if (m_slice-m_pps-bTransquantBypassEnabled) { uint32_t tmp = 1 ((g_maxLog2CUSize - depth) * 2); -uint32_t tmp2 = m_absIdxInLCU m_pic-getLog2UnitSize() * 2; +uint32_t tmp2 = m_absIdxInLCU LOG2_UNIT_SIZE * 2; memcpy(cu-getLumaOrigYuv() + tmp2, m_tqBypassOrigYuv[0], sizeof(pixel) * tmp); memcpy(cu-getChromaOrigYuv(1) + tmpC2, m_tqBypassOrigYuv[1], sizeof(pixel) * tmpC); @@ -641,7 +635,7 @@ } } -void TComDataCU::copyCodedToPic(uint8_t depth) +void TComDataCU::copyCodedToPic(uint32_t depth) { TComDataCU* cu = m_pic-getCU(m_cuAddr); @@ -660,7 +654,7 @@ memcpy(cu-getCbf(TEXT_CHROMA_V) + m_absIdxInLCU, m_cbf[2], sizeInChar); uint32_t tmpY = 1 ((g_maxLog2CUSize - depth) * 2); -uint32_t tmpY2 = m_absIdxInLCU m_pic-getLog2UnitSize() * 2; +uint32_t tmpY2 = m_absIdxInLCU LOG2_UNIT_SIZE * 2; memcpy(cu-getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); tmpY = m_hChromaShift + m_vChromaShift; tmpY2 = m_hChromaShift + m_vChromaShift; @@ -668,7 +662,7 @@ memcpy(cu-m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY); } -void TComDataCU::copyToPic(uint8_t depth, uint32_t partIdx, uint32_t partDepth) +void TComDataCU::copyToPic(uint32_t depth, uint32_t partIdx, uint32_t partDepth) { TComDataCU* cu = m_pic-getCU(m_cuAddr); uint32_t qNumPart = m_numPartitions (partDepth 1); @@ -713,7 +707,7 @@ m_cuMvField[1].copyTo(cu-getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU, partStart, qNumPart); uint32_t tmpY = 1 ((g_maxLog2CUSize - depth - partDepth) * 2); -uint32_t tmpY2 = partOffset m_pic-getLog2UnitSize() * 2; +uint32_t tmpY2 = partOffset LOG2_UNIT_SIZE * 2; memcpy(cu-getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY); uint32_t tmpC = tmpY (m_hChromaShift + m_vChromaShift); @@ -825,7 +819,7 @@ uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + m_pic-getUnitSize()) = m_slice-m_sps-picWidthInLumaSamples) +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) return NULL; if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize)) @@ -834,7 +828,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - numPartInCUSize + 1]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - m_pic-getLog2UnitSize())) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + 1]; if (RasterAddress::isEqualRowOrCol
[x265] replace IntraFilterType[][] to IntraFilterFlags[] (aboid *35)
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1407984148 -32400 # Thu Aug 14 11:42:28 2014 +0900 # Node ID a369fcc9c0e43812feb285004c09bf9ea9706456 # Parent d66e257ace3210d828a399e05eeba1b05abb21af replace IntraFilterType[][] to IntraFilterFlags[] (aboid *35) diff -r d66e257ace32 -r a369fcc9c0e4 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Wed Aug 13 17:29:18 2014 -0700 +++ b/source/Lib/TLibCommon/TComPattern.cpp Thu Aug 14 11:42:28 2014 +0900 @@ -38,7 +38,7 @@ #include frame.h #include TComPattern.h #include TComDataCU.h -#include predict.h +#include TComRom.h using namespace x265; @@ -68,7 +68,7 @@ fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors); -bool bUseFilteredPredictions = (dirMode == ALL_IDX || IntraFilterType[intraNeighbors.log2TrSize - 2][dirMode]); +bool bUseFilteredPredictions = (dirMode == ALL_IDX || (intraFilterFlags[dirMode] tuSize)); if (bUseFilteredPredictions 8 = tuSize tuSize = 32) { diff -r d66e257ace32 -r a369fcc9c0e4 source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Wed Aug 13 17:29:18 2014 -0700 +++ b/source/Lib/TLibCommon/TComRom.cpp Thu Aug 14 11:42:28 2014 +0900 @@ -491,5 +491,15 @@ 106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170, 175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250 }; + +/* intraFilterFlags[dir] trSize */ +const uint8_t intraFilterFlags[35] = +{ +0x38, 0x00, +0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, +0x38, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x00, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, +0x38, +}; + } //! \} diff -r d66e257ace32 -r a369fcc9c0e4 source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Wed Aug 13 17:29:18 2014 -0700 +++ b/source/Lib/TLibCommon/TComRom.h Thu Aug 14 11:42:28 2014 +0900 @@ -148,6 +148,9 @@ extern const uint8_t g_lpsTable[64][4]; extern const uint8_t x265_exp2_lut[64]; +// Intra tables +extern const uint8_t intraFilterFlags[35]; + } #endif //ifndef X265_TCOMROM_H diff -r d66e257ace32 -r a369fcc9c0e4 source/common/intrapred.cpp --- a/source/common/intrapred.cpp Wed Aug 13 17:29:18 2014 -0700 +++ b/source/common/intrapred.cpp Thu Aug 14 11:42:28 2014 +0900 @@ -21,7 +21,6 @@ * For more information, contact us at license @ x265.com. */ -#include predict.h #include TLibCommon/TComRom.h #include primitives.h @@ -249,11 +248,10 @@ void all_angs_pred_c(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, int bLuma) { const int size = 1 log2Size; -const int sizeIdx = log2Size - 2; for (int mode = 2; mode = 34; mode++) { -pixel *left = (IntraFilterType[sizeIdx][mode] ? left1 : left0); -pixel *above = (IntraFilterType[sizeIdx][mode] ? above1 : above0); +pixel *left = (intraFilterFlags[mode] size ? left1 : left0); +pixel *above = (intraFilterFlags[mode] size ? above1 : above0); pixel *out = dest + ((mode - 2) (log2Size * 2)); intra_pred_ang_csize(out, size, left, above, mode, bLuma); diff -r d66e257ace32 -r a369fcc9c0e4 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Aug 13 17:29:18 2014 -0700 +++ b/source/common/x86/asm-primitives.cpp Thu Aug 14 11:42:28 2014 +0900 @@ -27,7 +27,7 @@ #include x265.h #include cpu.h #if HIGH_BIT_DEPTH -#include predict.h +#include TLibCommon/TComRom.h #endif extern C { @@ -525,7 +525,7 @@ SETUP_CHROMA_SS_FUNC_DEF_444(64, 48, cpu); \ SETUP_CHROMA_SS_FUNC_DEF_444(48, 64, cpu); \ SETUP_CHROMA_SS_FUNC_DEF_444(64, 16, cpu); \ -SETUP_CHROMA_SS_FUNC_DEF_444(16, 64, cpu) +SETUP_CHROMA_SS_FUNC_DEF_444(16, 64, cpu); #if HIGH_BIT_DEPTH// temporary, until all 10bit functions are completed #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \ @@ -1139,7 +1139,7 @@ SETUP_CHROMA_VERT_FUNC_DEF_444(64, 48, cpu); \ SETUP_CHROMA_VERT_FUNC_DEF_444(48, 64, cpu); \ SETUP_CHROMA_VERT_FUNC_DEF_444(64, 16, cpu); \ -SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu) +SETUP_CHROMA_VERT_FUNC_DEF_444(16, 64, cpu); #define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \ p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \ @@ -1169,7 +1169,7 @@ SETUP_CHROMA_HORIZ_FUNC_DEF(32, 24, cpu); \ SETUP_CHROMA_HORIZ_FUNC_DEF(24, 32, cpu); \ SETUP_CHROMA_HORIZ_FUNC_DEF(32, 8, cpu); \ -SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu) +SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu); #define SETUP_CHROMA_HORIZ_FUNC_DEF_422(W, H, cpu) \ p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x
Re: [x265] [PATCH] analysis: check for proper cost
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1407889822 -32400 # Wed Aug 13 09:30:22 2014 +0900 # Node ID b284a0c71bb8c69b2bb2097d8ce4d50cfed6e41e # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 analysis: check for proper cost diff -r 8a7f4bb1d1be -r b284a0c71bb8 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/analysis.cpp Wed Aug 13 09:30:22 2014 +0900 @@ -1011,12 +1011,28 @@ /* Assert if Best prediction mode is NONE * Selected mode's RD-cost must be not MAX_INT64 */ +#if CHECKED_BUILD || _DEBUG if (bInsidePicture) { X265_CHECK(outBestCU-getPartitionSize(0) != SIZE_NONE, no best prediction size\n); X265_CHECK(outBestCU-getPredictionMode(0) != MODE_NONE, no best prediction mode\n); -X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best prediction cost\n); +if (m_param-rdLevel 1) +{ +if (m_rdCost.m_psyRd) +{ +X265_CHECK(outBestCU-m_totalPsyCost != MAX_INT64, no best partition cost\n); +} +else +{ +X265_CHECK(outBestCU-m_totalRDCost != MAX_INT64, no best partition cost\n); +} +} +else +{ +X265_CHECK(outBestCU-m_sa8dCost != MAX_INT64, no best partition cost\n); +} } +#endif x265_emms(); } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] replace g_convertToBit[] to g_log2Size[] const table
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1407814080 -32400 # Tue Aug 12 12:28:00 2014 +0900 # Node ID c0f00c662c5a255d093f3355e1c8dff123125137 # Parent 23d58a1819c7ab394db69f19926b68bce9e85bb4 replace g_convertToBit[] to g_log2Size[] const table diff -r 23d58a1819c7 -r c0f00c662c5a source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Mon Aug 11 16:54:09 2014 -0700 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Tue Aug 12 12:28:00 2014 +0900 @@ -141,8 +141,8 @@ uint32_t tmp = 4 * AMVP_DECIMATION_FACTOR / unitSize; tmp = tmp * tmp; -X265_CHECK(tmp == (1 (g_convertToBit[tmp] + 2)), unexpected pixel count\n); -tmp = g_convertToBit[tmp] + 2; +X265_CHECK(tmp == (1 (g_log2Size[tmp])), unexpected pixel count\n); +tmp = g_log2Size[tmp]; m_unitMask = ~((1 tmp) - 1); uint32_t sizeL = cuSize * cuSize; diff -r 23d58a1819c7 -r c0f00c662c5a source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Mon Aug 11 16:54:09 2014 -0700 +++ b/source/Lib/TLibCommon/TComRom.cpp Tue Aug 12 12:28:00 2014 +0900 @@ -99,16 +99,6 @@ { if (ATOMIC_CAS32(initialized, 0, 1) == 1) return; - -int i, c; - -memset(g_convertToBit, -1, sizeof(g_convertToBit)); -c = 0; -for (i = 4; i = MAX_CU_SIZE; i *= 2) -{ -g_convertToBit[i] = c; -c++; -} } void destroyROM() @@ -300,7 +290,14 @@ const uint8_t g_chroma422IntraAngleMappingTable[36] = { 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31, DM_CHROMA_IDX }; -uint8_t g_convertToBit[MAX_CU_SIZE + 1]; +const uint8_t g_log2Size[MAX_CU_SIZE + 1] = +{ +0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +6 +}; // // Scanning order context model mapping diff -r 23d58a1819c7 -r c0f00c662c5a source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Mon Aug 11 16:54:09 2014 -0700 +++ b/source/Lib/TLibCommon/TComRom.h Tue Aug 12 12:28:00 2014 +0900 @@ -134,7 +134,7 @@ extern const uint8_t g_minInGroup[10]; extern const uint8_t g_goRiceRange[5]; // maximum value coded with Rice codes -extern uint8_t g_convertToBit[MAX_CU_SIZE + 1]; // from width to log2(width)-2 +extern const uint8_t g_log2Size[MAX_CU_SIZE + 1]; // from size to log2(size) // Map Luma samples to chroma samples extern const int g_winUnitX[MAX_CHROMA_FORMAT_IDC + 1]; diff -r 23d58a1819c7 -r c0f00c662c5a source/common/param.cpp --- a/source/common/param.cpp Mon Aug 11 16:54:09 2014 -0700 +++ b/source/common/param.cpp Tue Aug 12 12:28:00 2014 +0900 @@ -861,8 +861,8 @@ if (check_failed == 1) return check_failed; -uint32_t maxCUDepth = (uint32_t)g_convertToBit[param-maxCUSize]; -uint32_t maxLog2CUSize = maxCUDepth + 2; +uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param-maxCUSize]; +uint32_t maxCUDepth = maxLog2CUSize - 2; uint32_t tuQTMaxLog2Size = maxLog2CUSize - 1; uint32_t tuQTMinLog2Size = 2; //log2(4) @@ -1041,7 +1041,8 @@ int x265_set_globals(x265_param *param) { -uint32_t maxCUDepth = (uint32_t)g_convertToBit[param-maxCUSize]; +uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param-maxCUSize]; +uint32_t maxCUDepth = maxLog2CUSize - 2; uint32_t tuQTMinLog2Size = 2; //log2(4) static int once /* = 0 */; @@ -1058,7 +1059,7 @@ { // set max CU width height g_maxCUSize = param-maxCUSize; -g_maxLog2CUSize = maxCUDepth + 2; +g_maxLog2CUSize = maxLog2CUSize; // compute actual CU depth with respect to config depth and max transform size g_addCUDepth = g_maxLog2CUSize - maxCUDepth - tuQTMinLog2Size; diff -r 23d58a1819c7 -r c0f00c662c5a source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Aug 11 16:54:09 2014 -0700 +++ b/source/encoder/encoder.cppTue Aug 12 12:28:00 2014 +0900 @@ -1209,7 +1209,7 @@ setThreadPool(ThreadPool::allocThreadPool(p-poolNumThreads)); int poolThreadCount = ThreadPool::getThreadPool()-getThreadCount(); -uint32_t maxLog2CUSize = g_convertToBit[p-maxCUSize] + 2; +uint32_t maxLog2CUSize = g_log2Size[p-maxCUSize]; int rows = (p-sourceHeight + p-maxCUSize - 1) maxLog2CUSize; if (p-frameNumThreads == 0) @@ -1391,7 +1391,7 @@ m_conformanceWindow.leftOffset = 0; // set pad size if width is not multiple of the minimum CU size = -uint32_t maxCUDepth = (uint32_t)g_convertToBit[p-maxCUSize]; +uint32_t maxCUDepth = maxLog2CUSize - 2; uint32_t minCUDepth = (p-maxCUSize (maxCUDepth - 1)); if ((p-sourceWidth % minCUDepth) != 0
[x265] quant: remove scaledCoeff from nquant()
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1407658928 -32400 # Sun Aug 10 17:22:08 2014 +0900 # Node ID d1dad09266327d40b6c2372f9916f7fcf288c2f0 # Parent 6e4eb854220350cf0c980fc02cc11109c506585f quant: remove scaledCoeff from nquant() diff -r 6e4eb8542203 -r d1dad0926632 source/common/dct.cpp --- a/source/common/dct.cpp Sat Aug 09 19:43:23 2014 -0500 +++ b/source/common/dct.cpp Sun Aug 10 17:22:08 2014 +0900 @@ -795,7 +795,7 @@ return numSig; } -uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, int32_t* qCoef, int qBits, int add, int numCoeff) +uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* qCoef, int qBits, int add, int numCoeff) { uint32_t numSig = 0; @@ -805,7 +805,6 @@ int sign = (level 0 ? -1 : 1); int tmplevel = abs(level) * quantCoeff[blockpos]; -scaledCoeff[blockpos] = tmplevel; level = ((tmplevel + add) qBits); if (level) ++numSig; diff -r 6e4eb8542203 -r d1dad0926632 source/common/primitives.h --- a/source/common/primitives.hSat Aug 09 19:43:23 2014 -0500 +++ b/source/common/primitives.hSun Aug 10 17:22:08 2014 +0900 @@ -160,7 +160,7 @@ typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred); typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride); typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff); -typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); +typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift); typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff); diff -r 6e4eb8542203 -r d1dad0926632 source/common/quant.cpp --- a/source/common/quant.cpp Sat Aug 09 19:43:23 2014 -0500 +++ b/source/common/quant.cpp Sun Aug 10 17:22:08 2014 +0900 @@ -487,7 +487,6 @@ * probability models like CABAC */ uint32_t Quant::rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy) { -uint32_t trSize = 1 log2TrSize; int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */ int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype; @@ -500,14 +499,13 @@ int32_t *qCoef = m_scalingList-m_quantCoef[log2TrSize - 2][scalingListType][rem]; int numCoeff = 1 log2TrSize * 2; -int scaledCoeff[32 * 32]; -uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, scaledCoeff, dstCoeff, qbits, add, numCoeff); +uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff); X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), numSig differ\n); if (!numSig) return 0; -x265_emms(); +uint32_t trSize = 1 log2TrSize; /* unquant constants for psy-rdoq. The dequant coefficients have a (14) scale applied that * must be removed during unquant. This may be larger than the QP upshift, which would turn diff -r 6e4eb8542203 -r d1dad0926632 source/common/x86/pixel-util.h --- a/source/common/x86/pixel-util.hSat Aug 09 19:43:23 2014 -0500 +++ b/source/common/x86/pixel-util.hSun Aug 10 17:22:08 2014 +0900 @@ -45,7 +45,7 @@ void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride); uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff); -uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); +uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff); diff -r 6e4eb8542203 -r d1dad0926632 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Sat Aug 09 19:43:23 2014 -0500 +++ b/source/common/x86/pixel-util8.asm Sun Aug 10 17:22:08 2014 +0900 @@ -938,72 +938,63 @@ ;- -; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); +; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff
Re: [x265] [PATCH] TComPattern: remove redundant functions used for CIP
redundant functions ware to simplify non-CIP case, and to check m_pps-bConstrainedIntraPred flag upper level. I-slice is simplified case too. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of santhosh...@multicorewareinc.com Sent: Tuesday, August 05, 2014 7:10 PM To: x265-devel@videolan.org Subject: [x265] [PATCH] TComPattern: remove redundant functions used for CIP # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1407233322 -19800 # Tue Aug 05 15:38:42 2014 +0530 # Node ID aadca66911c2f838e5b6dba671f65a3abeafcb38 # Parent 0d4723a0080cff763ff20ab9c516c6e082496a0b TComPattern: remove redundant functions used for CIP diff -r 0d4723a0080c -r aadca66911c2 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Tue Aug 05 01:05:47 2014 -0500 +++ b/source/Lib/TLibCommon/TComPattern.cpp Tue Aug 05 15:38:42 2014 +0530 @@ -209,24 +209,13 @@ int partIdxStride = cu-m_pic-getNumPartInCUSize(); partIdxLB= g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)]; -if (!cu-m_slice-m_pps-bConstrainedIntraPred) -{ -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits)); -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits)); -} -else -{ -bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT); -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); -numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); -numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits)); -numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); -numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits)); -} +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits)); +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits)); + intraNeighbors-numIntraNeighbor = numIntraNeighbor; intraNeighbors-totalUnits = aboveUnits + leftUnits + 1; intraNeighbors-aboveUnits = aboveUnits; @@ -421,7 +410,10 @@ uint32_t partAboveLeft; TComDataCU* pcCUAboveLeft = cu-getPUAboveLeft(partAboveLeft, partIdxLT); -return pcCUAboveLeft ? true : false; +if (!cu-m_slice-m_pps-bConstrainedIntraPred) +return pcCUAboveLeft ? true : false; +else +return pcCUAboveLeft pcCUAboveLeft-isIntra(partAboveLeft); } int TComPattern::isAboveAvailable(TComDataCU* cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags) @@ -436,7 +428,7 @@ { uint32_t uiPartAbove; TComDataCU* pcCUAbove = cu-getPUAbove(uiPartAbove, g_rasterToZscan[rasterPart]); -if (pcCUAbove) +if (pcCUAbove (!cu-m_slice-m_pps-bConstrainedIntraPred || + pcCUAbove-isIntra(uiPartAbove))) { numIntra++; *validFlagPtr = true; @@ -463,7 +455,7 @@ { uint32_t partLeft; TComDataCU* pcCULeft = cu-getPULeft(partLeft, g_rasterToZscan[rasterPart]); -if (pcCULeft) +if (pcCULeft (!cu-m_slice-m_pps-bConstrainedIntraPred || + pcCULeft-isIntra(partLeft))) { numIntra++; *validFlagPtr = true; @@ -488,7 +480,7 @@ { uint32_t uiPartAboveRight; TComDataCU* pcCUAboveRight = cu-getPUAboveRightAdi(uiPartAboveRight, partIdxRT, offset); -if (pcCUAboveRight) +if (pcCUAboveRight + (!cu-m_slice-m_pps-bConstrainedIntraPred || + pcCUAboveRight-isIntra(uiPartAboveRight))) { numIntra++; *validFlagPtr = true; @@ -513,119 +505,7 @@ { uint32_t uiPartBelowLeft; TComDataCU* pcCUBelowLeft =
Re: [x265] primitives for RExt
-mov byte [rsp], %2/4 +mov dword [rsp], %2/4 Why dword? byte is enough for dynamic range partial write needs read-modify-write. +cglobal pixel_sub_ps_2x%2, 6, 7, 4, dest, destride, src0, scr1, srcStride0, srcStride1 pinsrw have 2 uops, movd to load 4 bytes and drop unused is better. thanks. this function is not used, and will be removed. From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of chen Sent: Wednesday, August 06, 2014 3:29 AM To: Development for x265 Subject: Re: [x265] primitives for RExt At 2014-08-05 20:48:50,Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1407242513 -32400 # Tue Aug 05 21:41:53 2014 +0900 # Node ID 770c40d768d55e68e76c485d5dc61d014257e789 # Parent 0d4723a0080cff763ff20ab9c516c6e082496a0b primitives for RExt @@ -1494,7 +1599,7 @@ ;--- -- %macro FILTER_VER_CHROMA_SS 4 INIT_XMM sse2 -cglobal interp_4tap_vert_%3_%1x%2, 5, 7, %4 ,0-1 +cglobal interp_4tap_vert_%3_%1x%2, 5, 7, %4 ,0-gprsize add r1d, r1d add r3d, r3d @@ -1508,7 +1613,7 @@ lea r6, [tab_ChromaCoeffV + r4] %endif -mov byte [rsp], %2/4 +mov dword [rsp], %2/4 Why dword? byte is enough for dynamic range diff -r 0d4723a0080c -r 770c40d768d5 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Tue Aug 05 01:05:47 2014 -0500 +++ b/source/common/x86/pixel-util8.asm Tue Aug 05 21:41:53 2014 +0900 @@ -2878,6 +2878,61 @@ RET ;--- -- +; void pixel_sub_ps_2x%2(pixel *dest, intptr_t destride, pixel *src0, int16_t *scr1, intptr_t srcStride0, intptr_t srcStride1) +;- +%macro PIXEL_SUB_PS_W2_H2 2 +%if HIGH_BIT_DEPTH +INIT_XMM sse2 +cglobal pixel_sub_ps_2x%2, 6, 7, 4, dest, destride, src0, scr1, srcStride0, srcStride1 +add r1, r1 +add r4, r4 +add r5, r5 +mov r6d,%2/2 +.loop: +movdm0, [r2] +movdm1, [r3] +movdm2, [r2 + r4] +movdm3, [r3 + r5] +dec r6d +lea r2, [r2 + r4 * 2] +lea r3, [r3 + r5 * 2] +psubw m0, m1 +psubw m2, m3 +movd[r0], m0 +movd[r0 + r1], m2 +lea r0, [r0 + 2 * r1] +jnz .loop +RET +%else +INIT_XMM sse4 +cglobal pixel_sub_ps_2x%2, 6, 7, 4, dest, destride, src0, scr1, srcStride0, srcStride1 +add r1, r1 +mov r6d,%2/2 +.loop: +pinsrw m0, [r2], 0 +pinsrw m1, [r3], 0 +pinsrw m2, [r2 + r4], 0 +pinsrw m3, [r3 + r5], 0 pinsrw have 2 uops, movd to load 4 bytes and drop unused is better. +dec r6d +lea r2, [r2 + r4 * 2] +lea r3, [r3 + r5 * 2] +pmovzxbwm0, m0 +pmovzxbwm1, m1 +pmovzxbwm2, m2 +pmovzxbwm3, m3 +psubw m0, m1 +psubw m2, m3 +movd[r0], m0 +movd[r0 + r1], m2 +lea r0, [r0 + r1 * 2] +jnz .loop +RET +%endif +%endmacro ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] search: separate bSkipRes == true path
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406540991 -32400 # Mon Jul 28 18:49:51 2014 +0900 # Node ID a4beebdb70524da737d4d5d11e6b55961b9ef988 # Parent 8bab5275baed85f8a6e183d7edfeba9a516a3669 search: separate bSkipRes == true path diff -r 8bab5275baed -r a4beebdb7052 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 28 00:14:55 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 28 18:49:51 2014 +0900 @@ -2268,6 +2268,57 @@ mvmax.y = X265_MIN(mvmax.y, m_refLagPixels); } +void TEncSearch::encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* outReconYuv) +{ +X265_CHECK(!cu-isIntra(0), intra CU not expected\n); + +uint32_t log2CUSize = cu-getLog2CUSize(0); +uint32_t cuSize = 1 log2CUSize; +uint8_t depth = cu-getDepth(0); + +int hChromaShift = CHROMA_H_SHIFT(m_csp); +int vChromaShift = CHROMA_V_SHIFT(m_csp); + +// No residual coding : SKIP mode + +cu-setSkipFlagSubParts(true, 0, depth); +cu-setTrIdxSubParts(0, 0, depth); +cu-clearCbf(0, depth); + +outReconYuv-copyFromYuv(predYuv); +// Luma +int part = partitionFromLog2Size(log2CUSize); +uint32_t distortion = primitives.sse_pp[part](fencYuv-getLumaAddr(), fencYuv-getStride(), outReconYuv-getLumaAddr(), outReconYuv-getStride()); +// Chroma +part = partitionFromSizes(cuSize hChromaShift, cuSize vChromaShift); +distortion += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv-getCbAddr(), fencYuv-getCStride(), outReconYuv-getCbAddr(), outReconYuv-getCStride())); +distortion += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv-getCrAddr(), fencYuv-getCStride(), outReconYuv-getCrAddr(), outReconYuv-getCStride())); + +m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); +m_entropyCoder-resetBits(); +if (cu-m_slice-m_pps-bTransquantBypassEnabled) +m_entropyCoder-codeCUTransquantBypassFlag(cu, 0); +m_entropyCoder-codeSkipFlag(cu, 0); +m_entropyCoder-codeMergeIndex(cu, 0); + +uint32_t bits = m_entropyCoder-getNumberOfWrittenBits(); +cu-m_mvBits = bits; +cu-m_coeffBits = 0; +cu-m_totalBits = bits; +cu-m_totalDistortion = distortion; +if (m_rdCost.psyRdEnabled()) +{ +int size = log2CUSize - 2; +cu-m_psyEnergy = m_rdCost.psyCost(size, fencYuv-getLumaAddr(), fencYuv-getStride(), + outReconYuv-getLumaAddr(), outReconYuv-getStride()); +cu-m_totalPsyCost = m_rdCost.calcPsyRdCost(cu-m_totalDistortion, cu-m_totalBits, cu-m_psyEnergy); +} +else +cu-m_totalRDCost = m_rdCost.calcRdCost(cu-m_totalDistortion, cu-m_totalBits); + +m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]); +} + /** encode residual and calculate rate-distortion for a CU block * \param cu * \param fencYuv @@ -2275,17 +2326,14 @@ * \param outResiYuv * \param outBestResiYuv * \param outReconYuv - * \param bSkipRes * \returns void */ void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, - ShortYuv* outBestResiYuv, TComYuv* outReconYuv, bool bSkipRes, bool curUseRDOQ) + ShortYuv* outBestResiYuv, TComYuv* outReconYuv, bool curUseRDOQ) { -if (cu-isIntra(0)) -return; - -uint32_t bits = 0, bestBits = 0, bestCoeffBits = 0; -uint32_t distortion = 0, bestDist = 0; +X265_CHECK(!cu-isIntra(0), intra CU not expected\n); + +uint32_t bestBits = 0, bestCoeffBits = 0; uint32_t log2CUSize = cu-getLog2CUSize(0); uint32_t cuSize = 1 log2CUSize; @@ -2294,77 +2342,33 @@ int hChromaShift = CHROMA_H_SHIFT(m_csp); int vChromaShift = CHROMA_V_SHIFT(m_csp); -// No residual coding : SKIP mode -if (bSkipRes) +m_trQuant.setQPforQuant(cu); + +outResiYuv-subtract(fencYuv, predYuv, log2CUSize); + +// Residual coding. +bool bIsTQBypassEnable = cu-m_slice-m_pps-bTransquantBypassEnabled; +uint32_t tqBypassMode = 1; + +if (bIsTQBypassEnable) { -cu-setSkipFlagSubParts(true, 0, depth); - -outReconYuv-copyFromYuv(predYuv); -// Luma -int part = partitionFromLog2Size(log2CUSize); -distortion = primitives.sse_pp[part](fencYuv-getLumaAddr(), fencYuv-getStride(), outReconYuv-getLumaAddr(), outReconYuv-getStride()); -// Chroma -part = partitionFromSizes(cuSize hChromaShift, cuSize vChromaShift); -distortion += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv-getCbAddr(), fencYuv-getCStride(), outReconYuv-getCbAddr(), outReconYuv-getCStride())); -distortion += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv-getCrAddr(), fencYuv-getCStride(), outReconYuv-getCrAddr(), outReconYuv-getCStride
Re: [x265] [PATCH] analysis: setQPforQuant in checkIntraInter to fix the hash mismatch at rd=56
Thank you for fix my bug. My own fix is very similar. # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406098305 -32400 # Wed Jul 23 15:51:45 2014 +0900 # Node ID e3b909fd6e1f5348944872c704cabff229c37f71 # Parent e3ad03b7c4854be40730645d4fe25e56a93f3f94 fix bug in 4d2c3d09e836 diff -r e3ad03b7c485 -r e3b909fd6e1f source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Tue Jul 22 13:28:54 2014 -0500 +++ b/source/encoder/analysis.cpp Wed Jul 23 15:51:45 2014 +0900 @@ -1219,6 +1219,8 @@ outBestCU-getCbf(0, TEXT_CHROMA_U) != 0 || outBestCU-getCbf(0, TEXT_CHROMA_V) != 0) doIntra) { +m_trQuant.setQPforQuant(outTempCU); + checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N); outTempCU-initEstData(); From: g...@multicorewareinc.com Subject: [x265] [PATCH] analysis: setQPforQuant in checkIntraInter to fix the hash mismatch at rd=56 Date: Wed, 23 Jul 2014 11:17:01 +0530 # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1406094393 -19800 # Wed Jul 23 11:16:33 2014 +0530 # Node ID 1beaaabef3eb6d3e832102ed7dafcd855c1d7298 # Parent e3ad03b7c4854be40730645d4fe25e56a93f3f94 analysis: setQPforQuant in checkIntraInter to fix the hash mismatch at rd=56 diff -r e3ad03b7c485 -r 1beaaabef3eb source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Tue Jul 22 13:28:54 2014 -0500 +++ b/source/encoder/analysis.cpp Wed Jul 23 11:16:33 2014 +0530 @@ -1722,6 +1722,7 @@ PPAScopeEvent(CheckRDCostIntra + depth); +m_trQuant.setQPforQuant(outTempCU); outTempCU-setSkipFlagSubParts(false, 0, depth); outTempCU-setPartSizeSubParts(partSize, 0, depth); outTempCU-setPredModeSubParts(MODE_INTRA, 0, depth); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] qtLayer in reverse order
Is NUM_LAYERS the same as MAX_CU_DEPTH? Could we use that here? No, it is (MAX_LOG2_TR_SIZE - 2 + 1). -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Thursday, July 24, 2014 12:12 PM To: Development for x265 Subject: Re: [x265] qtLayer in reverse order On 07/24, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406166493 -32400 # Thu Jul 24 10:48:13 2014 +0900 # Node ID 737648a21e98a7ea339e1b659175c1f229d77c8c # Parent 342d72f0b61322f31bec35634cc893e4e2b04be3 qtLayer in reverse order diff -r 342d72f0b613 -r 737648a21e98 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jul 23 17:47:16 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 24 10:48:13 2014 +0900 @@ -51,9 +51,7 @@ TEncSearch::TEncSearch() { -m_qtTempCoeff[0] = NULL; -m_qtTempCoeff[1] = NULL; -m_qtTempCoeff[2] = NULL; +memset(m_qtTempCoeff, 0, sizeof(m_qtTempCoeff)); m_qtTempTrIdx = NULL; m_qtTempShortYuv = NULL; for (int i = 0; i 3; i++) @@ -80,7 +78,6 @@ X265_FREE(m_qtTempCbf[0]); X265_FREE(m_qtTempTransformSkipFlag[0]); -delete[] m_qtTempCoeff[0]; delete[] m_qtTempShortYuv; } @@ -94,9 +91,6 @@ * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */ m_refLagPixels = m_bFrameParallel ? m_param-searchRange : m_param-sourceHeight; -m_qtTempCoeff[0] = new coeff_t*[m_numLayers * 3]; -m_qtTempCoeff[1] = m_qtTempCoeff[0] + m_numLayers; -m_qtTempCoeff[2] = m_qtTempCoeff[0] + m_numLayers * 2; m_qtTempShortYuv = new ShortYuv[m_numLayers]; uint32_t sizeL = 1 (g_maxLog2CUSize * 2); uint32_t sizeC = sizeL (CHROMA_H_SHIFT(m_csp) + CHROMA_V_SHIFT(m_csp)); @@ -239,7 +233,7 @@ } uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= cu-m_slice-m_sps-quadtreeTULog2MaxSize - log2TrSize; +uint32_t qtLayer= log2TrSize - 2; uint32_t log2UnitSize = cu-m_pic-getLog2UnitSize(); uint32_t coeffOffset = absPartIdx (log2UnitSize * 2); coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; @@ -266,8 +260,6 @@ } uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= cu-m_slice-m_sps-quadtreeTULog2MaxSize - log2TrSize; -uint32_t log2UnitSize = cu-m_pic-getLog2UnitSize(); uint32_t trDepthC = trDepth; int hChromaShift = CHROMA_H_SHIFT(m_csp); @@ -286,6 +278,9 @@ } } +uint32_t qtLayer= log2TrSize - 2; +uint32_t log2UnitSize = cu-m_pic-getLog2UnitSize(); + if (m_csp != CHROMA_422) { uint32_t shift = (m_csp == CHROMA_420) ? 2 : 0; @@ -560,7 +555,6 @@ bool bCheckFull = (log2TrSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize); bool bCheckSplit = (log2TrSize cu-getQuadtreeTULog2MinSizeInCU(absPartIdx)); -int maxTuSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize; int isIntraSlice = (cu-m_slice-m_sliceType == I_SLICE); // don't check split if TU size is less or equal to max TU size @@ -568,6 +562,7 @@ if (m_param-rdPenalty !isIntraSlice) { +int maxTuSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize; // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice noSplitIntraMaxTuSize = (log2TrSize = X265_MIN(maxTuSize, 4)); @@ -613,7 +608,7 @@ cu-setTrIdxSubParts(trDepth, absPartIdx, fullDepth); -uint32_t qtLayer= cu-m_slice-m_sps-quadtreeTULog2MaxSize - log2TrSize; +uint32_t qtLayer= log2TrSize - 2; uint32_t coeffOffsetY = absPartIdx cu-m_pic-getLog2UnitSize() * 2; coeff_t* coeffY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY; int16_t* reconQt= m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx); @@ -793,7 +788,7 @@ cu-setTransformSkipSubParts(bestModeId, TEXT_LUMA, absPartIdx, fullDepth); //--- set reconstruction for next intra prediction blocks --- -uint32_t qtLayer = cu-m_slice-m_sps-quadtreeTULog2MaxSize - log2TrSize; +uint32_t qtLayer = log2TrSize - 2; uint32_t zorder= cu-getZorderIdxInCU() + absPartIdx; int16_t* reconQt = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx); X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, width is not max CU size\n); @@ -823,12 +818,14 @@ bool bCheckFull = (log2TrSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize); bool bCheckSplit = (log2TrSize cu-getQuadtreeTULog2MinSizeInCU(absPartIdx)); -int maxTuSize = cu-m_slice-m_sps-quadtreeTULog2MaxSize; int isIntraSlice = (cu-m_slice
[x265] refine partition size related
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406011990 -32400 # Tue Jul 22 15:53:10 2014 +0900 # Node ID b2ad081e4bfc20bbc84e8bfbab59ed52aeac2a73 # Parent d303b4d860e9f06396a156726dd518d0f41fe796 refine partition size related - reorder LumaPartitions to simplify partitionFromLog2Size() - remove unused diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Jul 21 22:43:38 2014 -0500 +++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jul 22 15:53:10 2014 +0900 @@ -127,6 +127,15 @@ primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], getCStride(), srcV, srcPicYuv-getCStride()); } +void TComYuv::copyFromYuv(TComYuv* srcYuv) +{ +X265_CHECK(m_width = srcYuv-m_width m_height = srcYuv-m_height, invalid size\n); + +primitives.luma_copy_pp[m_part](m_buf[0], m_width, srcYuv-m_buf[0], srcYuv-m_width); +primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_cwidth, srcYuv-m_buf[1], srcYuv-m_cwidth); +primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_cwidth, srcYuv-m_buf[2], srcYuv-m_cwidth); +} + void TComYuv::copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx) { pixel* dstY = dstPicYuv-getLumaAddr(partIdx); @@ -156,50 +165,9 @@ primitives.chroma[m_csp].copy_pp[part](dstV, dstPicYuv-getCStride(), srcV, getCStride()); } -void TComYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) +void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size) { -int part = partitionFromSizes(width, height); - -X265_CHECK(width != 4 || height != 4, 4x4 partition detected\n); - -if (bLuma) -{ -pixel* src = getLumaAddr(partIdx); -pixel* dst = dstPicYuv-getLumaAddr(partIdx); - -uint32_t srcstride = getStride(); -uint32_t dststride = dstPicYuv-getStride(); - -primitives.luma_copy_pp[part](dst, dststride, src, srcstride); -} -if (bChroma) -{ -pixel* srcU = getCbAddr(partIdx); -pixel* srcV = getCrAddr(partIdx); -pixel* dstU = dstPicYuv-getCbAddr(partIdx); -pixel* dstV = dstPicYuv-getCrAddr(partIdx); - -uint32_t srcstride = getCStride(); -uint32_t dststride = dstPicYuv-getCStride(); - -primitives.chroma[m_csp].copy_pp[part](dstU, dststride, srcU, srcstride); -primitives.chroma[m_csp].copy_pp[part](dstV, dststride, srcV, srcstride); -} -} - -void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize) -{ -int part = partitionFromSize(lumaSize); - -int16_t* dst = dstPicYuv-getLumaAddr(partIdx); -uint32_t dststride = dstPicYuv-m_width; - -primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), getStride()); -} - -void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize) -{ -int part = partitionFromSize(partSize); +int part = partitionFromLog2Size(log2Size); addClipLuma(srcYuv0, srcYuv1, part); addClipChroma(srcYuv0, srcYuv1, part); @@ -235,113 +203,32 @@ primitives.chroma[m_csp].add_ps[part](dstV, dststride, srcV0, srcV1, src0Stride, src1Stride); } -void TComYuv::addAvg(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) -{ -int x, y; -uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; - -pixel* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); -pixel* srcU0 = srcYuv0-getCbAddr(partUnitIdx); -pixel* srcV0 = srcYuv0-getCrAddr(partUnitIdx); - -pixel* srcY1 = srcYuv1-getLumaAddr(partUnitIdx); -pixel* srcU1 = srcYuv1-getCbAddr(partUnitIdx); -pixel* srcV1 = srcYuv1-getCrAddr(partUnitIdx); - -pixel* dstY = getLumaAddr(partUnitIdx); -pixel* dstU = getCbAddr(partUnitIdx); -pixel* dstV = getCrAddr(partUnitIdx); - -if (bLuma) -{ -src0Stride = srcYuv0-getStride(); -src1Stride = srcYuv1-getStride(); -dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = Clip((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = Clip((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = Clip((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = Clip((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} -} -if (bChroma) -{ -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - -src0Stride = srcYuv0-getCStride(); -src1Stride = srcYuv1
Re: [x265] refine partition size related
To find non optimized functions, and which function can be aliased. I think many 4:2:2 functions can be aliased. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Tuesday, July 22, 2014 4:16 PM To: Development for x265 Subject: Re: [x265] refine partition size related On 07/22, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1406011990 -32400 # Tue Jul 22 15:53:10 2014 +0900 # Node ID b2ad081e4bfc20bbc84e8bfbab59ed52aeac2a73 # Parent d303b4d860e9f06396a156726dd518d0f41fe796 refine partition size related - reorder LumaPartitions to simplify partitionFromLog2Size() - remove unused Queued for testing, thanks. One question below: diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Jul 21 22:43:38 2014 -0500 +++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jul 22 15:53:10 2014 +0900 @@ -127,6 +127,15 @@ snip diff -r d303b4d860e9 -r b2ad081e4bfc source/test/testbench.cpp --- a/source/test/testbench.cpp Mon Jul 21 22:43:38 2014 -0500 +++ b/source/test/testbench.cpp Tue Jul 22 15:53:10 2014 +0900 @@ -127,6 +127,7 @@ EncoderPrimitives cprim; memset(cprim, 0, sizeof(EncoderPrimitives)); Setup_C_Primitives(cprim); +Setup_Alias_Primitives(cprim); struct test_arch_t { @@ -186,6 +187,7 @@ memset(optprim, 0, sizeof(optprim)); Setup_Instrinsic_Primitives(optprim, cpuid); Setup_Assembly_Primitives(optprim, cpuid); +Setup_Alias_Primitives(optprim); is there a reason to test the aliased functions, since by their nature they should already be being tested via another function pointer? -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] trquant: store QpParam for each component
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1405905842 -32400 # Mon Jul 21 10:24:02 2014 +0900 # Node ID b40af94fd00f5f23a22854aaf498ffef32910110 # Parent eb983d29c11acc03b91e07fe93c31503fa3a4732 trquant: store QpParam for each component diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComLoopFilter.cpp --- a/source/Lib/TLibCommon/TComLoopFilter.cpp Thu Jul 17 09:29:39 2014 +0200 +++ b/source/Lib/TLibCommon/TComLoopFilter.cpp Mon Jul 21 10:24:02 2014 +0900 @@ -48,7 +48,6 @@ // // Constants // -#define QpUV(iQpY, chFmt) (((iQpY) 0) ? (iQpY) : (((iQpY) 57) ? ((iQpY) - 6) : g_chromaScale[chFmt][(iQpY)])) #define DEFAULT_INTRA_TC_OFFSET 2 /// Default intra TC offset // @@ -441,9 +440,6 @@ pixel* tmpsrc = src; int stride = reconYuv-getStride(); -int qp = 0; -int qpP = 0; -int qpQ = 0; uint32_t numParts = cu-m_pic-getNumPartInCUSize() depth; uint32_t log2UnitSize = g_log2UnitSize; @@ -457,8 +453,8 @@ uint32_t partQ = 0; TComDataCU* cuP = cu; TComDataCU* cuQ = cu; -int betaOffsetDiv2 = cuQ-m_slice-m_pps-deblockingFilterBetaOffsetDiv2; -int tcOffsetDiv2 = cuQ-m_slice-m_pps-deblockingFilterTcOffsetDiv2; +int betaOffset = cuQ-m_slice-m_pps-deblockingFilterBetaOffsetDiv2 1; +int tcOffset = cuQ-m_slice-m_pps-deblockingFilterTcOffsetDiv2 1; if (dir == EDGE_VER) { @@ -480,7 +476,7 @@ bs = blockingStrength[bsAbsIdx]; if (bs) { -qpQ = cu-getQP(bsAbsIdx); +int qpQ = cu-getQP(bsAbsIdx); partQ = bsAbsIdx; // Derive neighboring PU index if (dir == EDGE_VER) @@ -492,12 +488,12 @@ cuP = cuQ-getPUAbove(partP, partQ); } -qpP = cuP-getQP(partP); -qp = (qpP + qpQ + 1) 1; +int qpP = cuP-getQP(partP); +int qp = (qpP + qpQ + 1) 1; int bitdepthScale = 1 (X265_DEPTH - 8); -int indexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + (tcOffsetDiv2 1))); -int indexB = Clip3(0, MAX_QP, qp + (betaOffsetDiv2 1)); +int indexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset)); +int indexB = Clip3(0, MAX_QP, qp + betaOffset); int tc = sm_tcTable[indexTC] * bitdepthScale; int beta = sm_betaTable[indexB] * bitdepthScale; @@ -544,13 +540,11 @@ void TComLoopFilter::xEdgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int dir, int edge, uint8_t blockingStrength[]) { +int chFmt = cu-getChromaFormat(); TComPicYuv* reconYuv = cu-m_pic-getPicYuvRec(); int stride = reconYuv-getCStride(); pixel* srcCb = reconYuv-getCbAddr(cu-getAddr(), absZOrderIdx); pixel* srcCr = reconYuv-getCrAddr(cu-getAddr(), absZOrderIdx); -int qp = 0; -int qpP = 0; -int qpQ = 0; uint32_t log2UnitSizeH = g_log2UnitSize - cu-getHorzChromaShift(); uint32_t log2UnitSizeV = g_log2UnitSize - cu-getVertChromaShift(); uint32_t unitSizeChromaH = 1 log2UnitSizeH; @@ -565,7 +559,7 @@ uint32_t partQ; TComDataCU* cuP; TComDataCU* cuQ = cu; -int tcOffsetDiv2 = cu-m_slice-m_pps-deblockingFilterTcOffsetDiv2; +int tcOffset = cu-m_slice-m_pps-deblockingFilterTcOffsetDiv2 1; // Vertical Position uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge; @@ -611,7 +605,7 @@ if (bs 1) { -qpQ = cu-getQP(bsAbsIdx); +int qpQ = cu-getQP(bsAbsIdx); partQ = bsAbsIdx; // Derive neighboring PU index if (dir == EDGE_VER) @@ -623,7 +617,7 @@ cuP = cuQ-getPUAbove(partP, partQ); } -qpP = cuP-getQP(partP); +int qpP = cuP-getQP(partP); if (cu-m_slice-m_pps-bTransquantBypassEnabled) { @@ -636,10 +630,17 @@ { int chromaQPOffset = (chromaIdx == 0) ? cu-m_slice-m_pps-chromaCbQpOffset : cu-m_slice-m_pps-chromaCrQpOffset; pixel* piTmpSrcChroma = (chromaIdx == 0) ? tmpSrcCb : tmpSrcCr; -qp = QpUVqpP + qpQ + 1) 1) + chromaQPOffset), cu-getChromaFormat()); +int qp = ((qpP + qpQ + 1) 1) + chromaQPOffset; +if (qp = 30) +{ +if (chFmt == CHROMA_420) +qp = g_chromaScale[qp]; +else
[x265] scan order tables
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1405768660 -32400 # Sat Jul 19 20:17:40 2014 +0900 # Node ID 72657141a6068000bbbc8e2c20362bbbd53510bd # Parent eb983d29c11acc03b91e07fe93c31503fa3a4732 scan order tables diff -r eb983d29c11a -r 72657141a606 source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Thu Jul 17 09:29:39 2014 +0200 +++ b/source/Lib/TLibCommon/TComRom.cpp Sat Jul 19 20:17:40 2014 +0900 @@ -42,86 +42,6 @@ namespace x265 { //! \ingroup TLibCommon //! \{ -// scanning order table -uint16_t* g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_LOG2_TR_SIZE + 1]; - -class ScanGenerator -{ -private: - -uint32_t m_line, m_column; -uint32_t m_blockWidth, m_blockHeight; -uint32_t m_stride; -COEFF_SCAN_TYPE m_scanType; - -public: - -ScanGenerator(uint32_t blockWidth, uint32_t blockHeight, uint32_t stride, COEFF_SCAN_TYPE scanType) -: m_line(0), m_column(0), m_blockWidth(blockWidth), m_blockHeight(blockHeight), m_stride(stride), m_scanType(scanType) -{ } - -uint32_t GetCurrentX() const { return m_column; } - -uint32_t GetCurrentY() const { return m_line; } - -uint32_t GetNextIndex(uint32_t blockOffsetX, uint32_t blockOffsetY) -{ -int rtn = ((m_line + blockOffsetY) * m_stride) + m_column + blockOffsetX; - -//advance line and column to the next position -switch (m_scanType) -{ -case SCAN_DIAG: -{ -if ((m_column == (m_blockWidth - 1)) || (m_line == 0)) //if we reach the end of a rank, go diagonally down to the next one -{ -m_line += m_column + 1; -m_column = 0; - -if (m_line = m_blockHeight) //if that takes us outside the block, adjust so that we are back on the bottom row -{ -m_column += m_line - (m_blockHeight - 1); -m_line= m_blockHeight - 1; -} -} -else -{ -m_column++; -m_line--; -} -} -break; - -case SCAN_HOR: -{ -if (m_column == (m_blockWidth - 1)) -{ -m_line++; -m_column = 0; -} -else m_column++; -} -break; - -case SCAN_VER: -{ -if (m_line == (m_blockHeight - 1)) -{ -m_column++; -m_line = 0; -} -else m_line++; -} -break; - -default: -X265_CHECK(0, ERROR: Unknown scan type %d in ScanGenerator::GetNextIndex, m_scanType); -break; -} - -return rtn; -} -}; // lambda = pow(2, (double)q / 6 - 2); double x265_lambda_tab[MAX_MAX_QP + 1] = @@ -190,81 +110,12 @@ g_convertToBit[i] = c; c++; } - -// initialise scan orders -for (uint32_t log2BlockSize = 0; log2BlockSize = MAX_LOG2_TR_SIZE; log2BlockSize++) -{ -const uint32_t blockWidth = 1 log2BlockSize; -const uint32_t blockHeight = 1 log2BlockSize; -const uint32_t totalValues = blockWidth * blockHeight; -//non-grouped scan orders -for (uint32_t scanTypeIndex = 0; scanTypeIndex SCAN_NUMBER_OF_TYPES; scanTypeIndex++) -{ -const COEFF_SCAN_TYPE scanType = COEFF_SCAN_TYPE(scanTypeIndex); -g_scanOrder[SCAN_UNGROUPED][scanType][log2BlockSize] = X265_MALLOC(uint16_t, totalValues); -ScanGenerator fullBlockScan(blockWidth, blockHeight, blockWidth, scanType); - -for (uint32_t scanPosition = 0; scanPosition totalValues; scanPosition++) -{ - g_scanOrder[SCAN_UNGROUPED][scanType][log2BlockSize][scanPosition] = fullBlockScan.GetNextIndex(0, 0); -} -} - -//grouped scan orders -const uint32_t groupWidth = 1 MLS_CG_LOG2_SIZE; -const uint32_t groupHeight = 1 MLS_CG_LOG2_SIZE; -const uint32_t widthInGroups= blockWidth MLS_CG_LOG2_SIZE; -const uint32_t heightInGroups = blockHeight MLS_CG_LOG2_SIZE; - -const uint32_t groupSize= groupWidth* groupHeight; -const uint32_t totalGroups = widthInGroups * heightInGroups; - -for (uint32_t scanTypeIndex = 0; scanTypeIndex SCAN_NUMBER_OF_TYPES; scanTypeIndex++) -{ -const COEFF_SCAN_TYPE scanType = COEFF_SCAN_TYPE(scanTypeIndex); - -g_scanOrder[SCAN_GROUPED_4x4][scanType][log2BlockSize] = X265_MALLOC(uint16_t, totalValues); - -ScanGenerator fullBlockScan(widthInGroups, heightInGroups, groupWidth, scanType); - -for (uint32_t groupIndex = 0; groupIndex totalGroups; groupIndex++) -{ -const uint32_t groupPositionY = fullBlockScan.GetCurrentY
[x265] zero stride for zeroPel[]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1405052989 -32400 # Fri Jul 11 13:29:49 2014 +0900 # Node ID 18a6ee92620f1f7266dfbeff3b9010aae356d796 # Parent e3e077965c39a56a24e09189652e1de3c5a0e3ea zero stride for zeroPel[] diff -r e3e077965c39 -r 18a6ee92620f source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 10 19:29:46 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Jul 11 13:29:49 2014 +0900 @@ -47,7 +47,7 @@ using namespace x265; -ALIGN_VAR_32(const pixel, RDCost::zeroPel[MAX_CU_SIZE * MAX_CU_SIZE]) = { 0 }; +ALIGN_VAR_32(const pixel, RDCost::zeroPel[MAX_CU_SIZE]) = { 0 }; TEncSearch::TEncSearch() { @@ -2404,7 +2404,7 @@ { int size = g_convertToBit[cuSize]; zeroPsyEnergyY = m_rdCost-psyCost(size, fencYuv-getLumaAddr(), fencYuv-getStride(), -(pixel*)RDCost::zeroPel, MAX_CU_SIZE); // need to check whether zero distortion is similar to psyenergy of fenc +(pixel*)RDCost::zeroPel, 0); // need to check whether zero distortion is similar to psyenergy of fenc zeroCost = m_rdCost-calcPsyRdCost(zeroDistortion, zeroResiBits, zeroPsyEnergyY); } else @@ -2813,13 +2813,13 @@ } int partSize = partitionFromSize(trSize); -uint32_t distY = primitives.sse_sp[partSize](resiYuv-getLumaAddr(absPartIdx), resiYuv-m_width, (pixel*)RDCost::zeroPel, trSize); +uint32_t distY = primitives.sse_sp[partSize](resiYuv-getLumaAddr(absPartIdx), resiYuv-m_width, (pixel*)RDCost::zeroPel, 0); uint32_t psyEnergyY = 0; if (m_rdCost-psyRdEnabled()) { -int size = g_convertToBit[trSize]; +int size = log2TrSize - 2; psyEnergyY = m_rdCost-psyCost(size, fencYuv-getLumaAddr(absPartIdx), fencYuv-getStride(), -(pixel*)RDCost::zeroPel, trSize); // need to check whether zero distortion is similar to psyenergy of fenc +(pixel*)RDCost::zeroPel, 0); // need to check whether zero distortion is similar to psyenergy of fenc } int16_t *curResiY = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx); X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, width not full CU\n); @@ -2845,7 +2845,7 @@ uint32_t stride = fencYuv-getStride(); //= reconstruction = primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY); -int size = g_convertToBit[trSize]; +int size = log2TrSize - 2; nonZeroPsyEnergyY = m_rdCost-psyCost(size, fencYuv-getLumaAddr(absPartIdx), fencYuv-getStride(), cu-getPic()-getPicYuvRec()-getLumaAddr(cu-getAddr(), zorder), cu-getPic()-getPicYuvRec()-getStride()); } @@ -2922,12 +2922,10 @@ int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC); int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC); -distU = m_rdCost-scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv-getCbAddr(absPartIdxC), resiYuv-m_cwidth, (pixel*)RDCost::zeroPel, trSizeC)); - +distU = m_rdCost-scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv-getCbAddr(absPartIdxC), resiYuv-m_cwidth, (pixel*)RDCost::zeroPel, 0)); if (outZeroDist) -{ *outZeroDist += distU; -} + if (numSigU[tuIterator.section]) { int curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCbQpOffset() + cu-getSlice()-getSliceQpDeltaCb(); @@ -2938,7 +2936,6 @@ curResiU, strideResiC); const uint32_t nonZeroDistU = m_rdCost-scaleChromaDistCb(dist); uint32_t nonZeroPsyEnergyU = 0; - if (m_rdCost-psyRdEnabled()) { pixel* pred = predYuv-getCbAddr(absPartIdxC); @@ -2948,7 +2945,7 @@ uint32_t stride = fencYuv-getCStride(); //= reconstruction = primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC); -int size = g_convertToBit[trSizeC]; +int size = log2TrSizeC - 2; nonZeroPsyEnergyU = m_rdCost-psyCost(size, fencYuv-getCbAddr(absPartIdxC), fencYuv-getCStride(), cu-getPic()-getPicYuvRec()-getCbAddr(cu-getAddr(), zorder), cu-getPic()-getPicYuvRec()-getCStride()); } @@ -3007,7 +3004,7 @@ if (!numSigU[tuIterator.section]) primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0
[x265] use std::swap() for readability
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1404898046 -32400 # Wed Jul 09 18:27:26 2014 +0900 # Node ID a3f4317f4acd89b7ef9bb8616068f9e4ff24328c # Parent 644773b8532929a30f910fd269f521e44621f2f7 use std::swap() for readability diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp --- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09 13:55:42 2014 +0530 +++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09 18:27:26 2014 +0900 @@ -535,12 +535,10 @@ int isChroma = (yCbCr != 0) ? 1 : 0; int shift; int cuHeightTmp; -pixel* tmpLSwap; pixel* tmpL; pixel* tmpU; pixel* clipTbl = NULL; int32_t *offsetBo = NULL; -int32_t *tmp_swap; picWidthTmp = (isChroma == 0) ? m_picWidth : m_picWidth m_hChromaShift; picHeightTmp = (isChroma == 0) ? m_picHeight : m_picHeight m_vChromaShift; @@ -707,9 +705,7 @@ m_upBufft[startX] = signDown2; -tmp_swap = m_upBuff1; -m_upBuff1 = m_upBufft; -m_upBufft = tmp_swap; +std::swap(m_upBuff1, m_upBufft); rec += stride; } @@ -775,9 +771,7 @@ // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1) { -tmpLSwap = m_tmpL1; -m_tmpL1 = m_tmpL2; -m_tmpL2 = tmpLSwap; +std::swap(m_tmpL1, m_tmpL2); } } @@ -864,7 +858,6 @@ int frameWidthInCU = m_pic-getFrameWidthInCU(); int frameHeightInCU = m_pic-getFrameHeightInCU(); int stride; -pixel *tmpUSwap; int sChroma = (yCbCr == 0) ? 0 : 1; bool mergeLeftFlag; int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC; @@ -976,9 +969,7 @@ } } -tmpUSwap = m_tmpU1[yCbCr]; -m_tmpU1[yCbCr] = m_tmpU2[yCbCr]; -m_tmpU2[yCbCr] = tmpUSwap; +std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]); } } @@ -1018,7 +1009,6 @@ int addr; int frameWidthInCU = m_pic-getFrameWidthInCU(); int stride; -pixel *tmpUSwap; int sChroma = (yCbCr == 0) ? 0 : 1; bool mergeLeftFlag; int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC; @@ -1122,9 +1112,7 @@ } } -tmpUSwap = m_tmpU1[yCbCr]; -m_tmpU1[yCbCr] = m_tmpU2[yCbCr]; -m_tmpU2[yCbCr] = tmpUSwap; +std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]); } } diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 13:55:42 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 18:27:26 2014 +0900 @@ -1258,22 +1258,11 @@ uint64_t bestCost = m_rdCost-psyRdEnabled() ? outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost; if (tempCost bestCost) { -TComDataCU* tmp = outTempCU; -outTempCU = outBestCU; -outBestCU = tmp; - -// Change Prediction data -TComYuv* yuv = NULL; -yuv = outBestPredYuv; -outBestPredYuv = m_tmpPredYuv[depth]; -m_tmpPredYuv[depth] = yuv; - -yuv = rpcYuvReconBest; -rpcYuvReconBest = m_tmpRecoYuv[depth]; -m_tmpRecoYuv[depth] = yuv; - +std::swap(outBestCU, outTempCU); +std::swap(outBestPredYuv, m_tmpPredYuv[depth]); +std::swap(rpcYuvReconBest, m_tmpRecoYuv[depth]); m_rdSbacCoders[depth][CI_TEMP_BEST]-store(m_rdSbacCoders[depth][CI_NEXT_BEST]); -} +} outTempCU-setQPSubParts(origQP, 0, depth); outTempCU-setSkipFlagSubParts(false, 0, depth); if (!bestIsSkip) @@ -1446,21 +1435,14 @@ if (tempCost bestCost) { -TComYuv* yuv; // Change Information data -TComDataCU* cu = outBestCU; -outBestCU = outTempCU; -outTempCU = cu; +std::swap(outBestCU, outTempCU); // Change Prediction data -yuv = m_bestPredYuv[depth]; -m_bestPredYuv[depth] = m_tmpPredYuv[depth]; -m_tmpPredYuv[depth] = yuv; +std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]); // Change Reconstruction data -yuv = m_bestRecoYuv[depth]; -m_bestRecoYuv[depth] = m_tmpRecoYuv[depth]; -m_tmpRecoYuv[depth] = yuv; +std::swap(m_bestRecoYuv[depth], m_tmpRecoYuv[depth]); m_rdSbacCoders[depth][CI_TEMP_BEST]-store(m_rdSbacCoders[depth][CI_NEXT_BEST]); } diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp --- a/source/Lib/TLibEncoder
Re: [x265] quant: returns numSig instead of absSum and lastPos
LastPos output for IDCT bypass optimize, just didn't upload it now. DC only detection can be replaced by (numSig == 1 coeff[0] != 0). exact lastPos not used elsewhere. // DC only -if (lastPos == 0 !((trSize == 4) (mode != REG_DCT))) +if (numSig == 1 coeff[0] != 0 !useDST) -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of chen Sent: Tuesday, July 08, 2014 12:09 AM To: Development for x265 Subject: Re: [x265] quant: returns numSig instead of absSum and lastPos At 2014-07-07 16:04:03,Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch# User Satoshi Nakagawa nakagawa...@oki.com # Date 1404720026 -32400# Mon Jul 07 17:00:26 2014 +0900 # Node ID dcf6f2ce907c59eedc3d488a7f047a5f094bf925 # Parent 11c808e562b894d84961cf00080173321e272884 quant: returns numSig instead of absSum and lastPos- packuswb m7, m7 - pxor m0, m0- psadbw m0, m7+ phaddd m7, m7 + phaddd m7, m7 old code have 2 uops, the new one have 6 uops LastPos output for IDCT bypass optimize, just didn't upload it now. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] add primitives.nquant for RDOQ
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1404286661 -32400 # Wed Jul 02 16:37:41 2014 +0900 # Node ID 3f25ca9b5addda057040a5e1a544b9ede9afc509 # Parent a18972fd05b1d6242a881bef979b9e1ff17543d9 add primitives.nquant for RDOQ diff -r a18972fd05b1 -r 3f25ca9b5add source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 01 14:58:35 2014 -0500 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Jul 02 16:37:41 2014 +0900 @@ -508,23 +508,30 @@ uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos) { -x265_emms(); -selectLambda(ttype); - const uint32_t log2TrSize = g_convertToBit[trSize] + 2; -uint32_t absSum = 0; int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform -uint32_t goRiceParam = 0; -double blockUncodedCost = 0; int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype; X265_CHECK(scalingListType 6, scaling list type out of range\n); int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; // Right shift of non-RDOQ quantizer; level = (coeff*Q + offset)q_bits int add = (1 (qbits - 1)); -double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, m_qpParam.m_rem); int32_t *qCoef = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2); +int numCoeff = 1 log2TrSize * 2; +int scaledCoeff[32 * 32]; +uint32_t numSig = primitives.nquant(srcCoeff, qCoef, scaledCoeff, dstCoeff, qbits, add, numCoeff); + +X265_CHECK(numSig == primitives.count_nonzero(dstCoeff, numCoeff), numSig differ\n); +if (numSig == 0) +return 0; + +x265_emms(); +selectLambda(ttype); + +double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, m_qpParam.m_rem); + +double blockUncodedCost = 0; double costCoeff[32 * 32]; double costSig[32 * 32]; double costCoeff0[32 * 32]; @@ -544,6 +551,7 @@ intc2= 0; double baseCost = 0; intlastScanPos = -1; +uint32_t goRiceParam = 0; uint32_t c1Idx = 0; uint32_t c2Idx = 0; int cgLastScanPos= -1; @@ -567,16 +575,13 @@ //= quantization = uint32_t blkPos = codingParameters.scan[scanPos]; // set coeff -int Q = qCoef[blkPos]; double scaleFactor = errScale[blkPos]; -int levelDouble= srcCoeff[blkPos]; -levelDouble= (int)std::minint64_t((int64_t)abs((int)levelDouble) * Q, MAX_INT - add); -uint32_t maxAbsLevel = (levelDouble + add) qbits; +int levelDouble = scaledCoeff[blkPos]; +uint32_t maxAbsLevel = abs(dstCoeff[blkPos]); costCoeff0[scanPos] = ((uint64_t)levelDouble * levelDouble) * scaleFactor; blockUncodedCost += costCoeff0[scanPos]; -dstCoeff[blkPos]= maxAbsLevel; if (maxAbsLevel 0 lastScanPos 0) { @@ -776,7 +781,7 @@ //= estimate last position = if (lastScanPos 0) { -return absSum; +return 0; } double bestCost = 0; @@ -840,6 +845,7 @@ } // end if (sigCoeffGroupFlag[ cgBlkPos ]) } // end for +uint32_t absSum = 0; for (int pos = 0; pos bestLastIdxp1; pos++) { int blkPos = codingParameters.scan[pos]; diff -r a18972fd05b1 -r 3f25ca9b5add source/common/dct.cpp --- a/source/common/dct.cpp Tue Jul 01 14:58:35 2014 -0500 +++ b/source/common/dct.cpp Wed Jul 02 16:37:41 2014 +0900 @@ -780,10 +780,8 @@ for (int blockpos = 0; blockpos numCoeff; blockpos++) { -int level; -int sign; -level = coef[blockpos]; -sign = (level 0 ? -1 : 1); +int level = coef[blockpos]; +int sign = (level 0 ? -1 : 1); int tmplevel = abs(level) * quantCoeff[blockpos]; level = ((tmplevel + add) qBits); @@ -798,6 +796,27 @@ return acSum; } +uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, int32_t* qCoef, int qBits, int add, int numCoeff) +{ +uint32_t numSig = 0; + +for (int blockpos = 0; blockpos numCoeff; blockpos++) +{ +int level = coef[blockpos]; +int sign = (level 0 ? -1 : 1); + +int tmplevel = abs(level) * quantCoeff[blockpos]; +scaledCoeff[blockpos] = tmplevel; +level = ((tmplevel + add) qBits); +if (level) +++numSig; +level *= sign; +qCoef[blockpos] = Clip3(-32768, 32767, level); +} + +return numSig; +} + int count_nonzero_c(const int32_t *quantCoeff, int numCoeff) { X265_CHECK(((intptr_t)quantCoeff 15) == 0, quant buffer not aligned\n); @@ -822,6 +841,7
[x265] [PATCH 1 of 2] fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1404197088 -32400 # Tue Jul 01 15:44:48 2014 +0900 # Node ID 0f21455078694344f7d3ed1e69c77217b48bb031 # Parent 38da32f28481f6acd17a9ab4f2e73ad057c54972 fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it diff -r 38da32f28481 -r 0f2145507869 source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jun 30 12:42:51 2014 -0500 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 01 15:44:48 2014 +0900 @@ -508,6 +508,9 @@ uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos) { +x265_emms(); +selectLambda(ttype); + const uint32_t log2TrSize = g_convertToBit[trSize] + 2; uint32_t absSum = 0; int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform diff -r 38da32f28481 -r 0f2145507869 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 30 12:42:51 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jul 01 15:44:48 2014 +0900 @@ -428,8 +428,6 @@ int chFmt = cu-getChromaFormat(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); - absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, lastPos, useTransformSkip); //--- set coded block flag --- @@ -515,8 +513,6 @@ curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + cu-getSlice()-getSliceQpDeltaCr(); } m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(ttype); - absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, lastPos, useTransformSkipC); //--- set coded block flag --- @@ -905,7 +901,6 @@ int lastPos = -1; m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, lastPos, useTransformSkip); //--- set coded block flag --- @@ -1421,8 +1416,6 @@ curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + cu-getSlice()-getSliceQpDeltaCr(); } m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(ttype); - absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, lastPos, useTransformSkipC); //--- set coded block flag --- @@ -2702,13 +2695,11 @@ cu-setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth); -m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); - int16_t *curResiY = resiYuv-getLumaAddr(absPartIdx); const uint32_t strideResiY = resiYuv-m_width; const uint32_t strideResiC = resiYuv-m_cwidth; +m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); absSumY = m_trQuant-transformNxN(cu, curResiY, strideResiY, coeffCurY, trSize, TEXT_LUMA, absPartIdx, lastPosY, false, curuseRDOQ); @@ -2746,13 +2737,11 @@ int curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCbQpOffset() + cu-getSlice()-getSliceQpDeltaCb(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(TEXT_CHROMA_U); absSumU = m_trQuant-transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUBufferOffset, trSizeC, TEXT_CHROMA_U, absPartIdxC, lastPosU, false, curuseRDOQ); curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + cu-getSlice()-getSliceQpDeltaCr(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(TEXT_CHROMA_V); absSumV = m_trQuant-transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUBufferOffset, trSizeC, TEXT_CHROMA_V, absPartIdxC, lastPosV, false, curuseRDOQ); @@ -2915,8 +2904,6 @@ } m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); - absSum[TEXT_LUMA][0] = m_trQuant-transformNxN(cu, resiYuv-getLumaAddr
[x265] [PATCH 2 of 2] split rate calculation functions to luma and chroma to simplify luma path
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1404197154 -32400 # Tue Jul 01 15:45:54 2014 +0900 # Node ID ad4455ed3815dd826e46d93e2585747c58a2c858 # Parent 0f21455078694344f7d3ed1e69c77217b48bb031 split rate calculation functions to luma and chroma to simplify luma path diff -r 0f2145507869 -r ad4455ed3815 source/Lib/TLibEncoder/TEncEntropy.cpp --- a/source/Lib/TLibEncoder/TEncEntropy.cppTue Jul 01 15:44:48 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncEntropy.cppTue Jul 01 15:45:54 2014 +0900 @@ -325,7 +325,7 @@ } else { -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu-getTransformIdx(absPartIdx), absPartIdxStep, tuSize, tuSize, (subdiv == 0)); +m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu-getTransformIdx(absPartIdx)); } if (cbfY || cbfU || cbfV) @@ -342,7 +342,7 @@ } if (cbfY) { -m_entropyCoder-codeCoeffNxN(cu, (cu-getCoeffY() + offsetLuma), absPartIdx, tuSize, TEXT_LUMA); +m_entropyCoder-codeCoeffNxN(cu, (cu-getCoeffY() + offsetLuma), absPartIdx, log2TrSize, TEXT_LUMA); } int chFmt = cu-getChromaFormat(); @@ -351,7 +351,7 @@ uint32_t partNum = cu-getPic()-getNumPartInCU() ((depth - 1) 1); if ((absPartIdx (partNum - 1)) == (partNum - 1)) { -uint32_t trSizeC = 1 log2TrSize; +const uint32_t log2TrSizeC = 2; const bool splitIntoSubTUs = (chFmt == CHROMA_422); uint32_t curPartNum = cu-getPic()-getNumPartInCU() ((depth - 1) 1); @@ -364,10 +364,10 @@ do { uint32_t cbf = cu-getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs); -uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC; if (cbf) { -m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId); +uint32_t subTUOffset = tuIterator.m_section (log2TrSizeC * 2); +m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId); } } while (isNextTUSection(tuIterator)); @@ -376,7 +376,7 @@ } else { -uint32_t trSizeC = tuSize hChromaShift; +uint32_t log2TrSizeC = log2TrSize - hChromaShift; const bool splitIntoSubTUs = (chFmt == CHROMA_422); uint32_t curPartNum = cu-getPic()-getNumPartInCU() (depth 1); for (uint32_t chromaId = TEXT_CHROMA_U; chromaId = TEXT_CHROMA_V; chromaId++) @@ -387,10 +387,10 @@ do { uint32_t cbf = cu-getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs); -uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC; if (cbf) { -m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId); +uint32_t subTUOffset = tuIterator.m_section (log2TrSizeC * 2); +m_entropyCoder-codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId); } } while (isNextTUSection(tuIterator)); @@ -540,11 +540,6 @@ m_entropyCoder-codeQtRootCbf(cu, absPartIdx); } -void TEncEntropy::encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth) -{ -m_entropyCoder-codeQtCbfZero(cu, ttype, trDepth); -} - void TEncEntropy::encodeQtRootCbfZero(TComDataCU* cu) { m_entropyCoder-codeQtRootCbfZero(cu); @@ -593,11 +588,6 @@ xEncodeTransform(cu, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, cuSize, 0, bCodeDQP); } -void TEncEntropy::encodeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype) -{ -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, trSize, ttype); -} - void TEncEntropy::estimateBit(estBitsSbacStruct* estBitsSBac, int trSize, TextType ttype) { ttype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA; diff -r 0f2145507869 -r ad4455ed3815 source/Lib/TLibEncoder/TEncEntropy.h --- a/source/Lib/TLibEncoder/TEncEntropy.h Tue Jul 01 15:44:48 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncEntropy.h Tue Jul 01 15:45:54 2014 +0900 @@ -114,14 +114,24 @@ void encodeTransformSubdivFlag(uint32_t symbol, uint32_t ctx); void encodeQtCbf
[x265] fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403877807 -32400 # Fri Jun 27 23:03:27 2014 +0900 # Node ID 77f443fe169ca23969df5d5ee6968543bfa5e794 # Parent 32aa6cc3cf4d108ac92f5d29258b2c38ca888d29 fix emms: move selectLambda() into xRateDistOptQuant() and issue emms before it diff -r 32aa6cc3cf4d -r 77f443fe169c source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Jun 26 17:19:08 2014 -0700 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Fri Jun 27 23:03:27 2014 +0900 @@ -508,6 +508,9 @@ uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos) { +x265_emms(); +selectLambda(ttype); + const uint32_t log2TrSize = g_convertToBit[trSize] + 2; uint32_t absSum = 0; int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform diff -r 32aa6cc3cf4d -r 77f443fe169c source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jun 26 17:19:08 2014 -0700 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Jun 27 23:03:27 2014 +0900 @@ -428,8 +428,6 @@ int chFmt = cu-getChromaFormat(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); - absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, lastPos, useTransformSkip); //--- set coded block flag --- @@ -515,8 +513,6 @@ curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + cu-getSlice()-getSliceQpDeltaCr(); } m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(ttype); - absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdx, lastPos, useTransformSkipC); //--- set coded block flag --- @@ -905,7 +901,6 @@ int lastPos = -1; m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, TEXT_LUMA, absPartIdx, lastPos, useTransformSkip); //--- set coded block flag --- @@ -1421,8 +1416,6 @@ curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + cu-getSlice()-getSliceQpDeltaCr(); } m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(ttype); - absSum = m_trQuant-transformNxN(cu, residual, stride, coeff, tuSize, ttype, absPartIdxC, lastPos, useTransformSkipC); //--- set coded block flag --- @@ -2702,13 +2695,11 @@ cu-setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth); -m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); - int16_t *curResiY = resiYuv-getLumaAddr(absPartIdx); const uint32_t strideResiY = resiYuv-m_width; const uint32_t strideResiC = resiYuv-m_cwidth; +m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); absSumY = m_trQuant-transformNxN(cu, curResiY, strideResiY, coeffCurY, trSize, TEXT_LUMA, absPartIdx, lastPosY, false, curuseRDOQ); @@ -2746,13 +2737,11 @@ int curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCbQpOffset() + cu-getSlice()-getSliceQpDeltaCb(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(TEXT_CHROMA_U); absSumU = m_trQuant-transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUBufferOffset, trSizeC, TEXT_CHROMA_U, absPartIdxC, lastPosU, false, curuseRDOQ); curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCrQpOffset() + cu-getSlice()-getSliceQpDeltaCr(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset, chFmt); -m_trQuant-selectLambda(TEXT_CHROMA_V); absSumV = m_trQuant-transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUBufferOffset, trSizeC, TEXT_CHROMA_V, absPartIdxC, lastPosV, false, curuseRDOQ); @@ -2915,8 +2904,6 @@ } m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt); -m_trQuant-selectLambda(TEXT_LUMA); - absSum[TEXT_LUMA][0] = m_trQuant-transformNxN(cu, resiYuv-getLumaAddr
[x265] cli: add --ipratio and --pbratio
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403706071 -32400 # Wed Jun 25 23:21:11 2014 +0900 # Node ID 3ca045895945f0afc6b4d1b1868feb00382796a3 # Parent 09450ac6dc7d0f495582bf327488612755df1719 cli: add --ipratio and --pbratio diff -r 09450ac6dc7d -r 3ca045895945 source/common/param.cpp --- a/source/common/param.cpp Tue Jun 24 15:41:55 2014 +0900 +++ b/source/common/param.cpp Wed Jun 25 23:21:11 2014 +0900 @@ -633,6 +633,8 @@ OPT(aud) p-bEnableAccessUnitDelimiters = atobool(value); OPT(b-pyramid) p-bBPyramid = atobool(value); OPT(hrd) p-bEmitHRDSEI = atobool(value); +OPT2(ipratio, ip-factor) p-rc.ipFactor = atof(value); +OPT2(pbratio, pb-factor) p-rc.pbFactor = atof(value); OPT(aq-mode) p-rc.aqMode = atoi(value); OPT(aq-strength) p-rc.aqStrength = atof(value); OPT(vbv-maxrate) p-rc.vbvMaxBitrate = atoi(value); diff -r 09450ac6dc7d -r 3ca045895945 source/x265.cpp --- a/source/x265.cpp Tue Jun 24 15:41:55 2014 +0900 +++ b/source/x265.cpp Wed Jun 25 23:21:11 2014 +0900 @@ -142,6 +142,8 @@ { qp, required_argument, NULL, 'q' }, { aq-mode,required_argument, NULL, 0 }, { aq-strength,required_argument, NULL, 0 }, +{ ipratio,required_argument, NULL, 0 }, +{ pbratio,required_argument, NULL, 0 }, { cbqpoffs, required_argument, NULL, 0 }, { crqpoffs, required_argument, NULL, 0 }, { rd, required_argument, NULL, 0 }, @@ -394,6 +396,8 @@ H0( --aq-mode integer Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance. Default %d\n, param-rc.aqMode); H0( --aq-strength float Reduces blocking and blurring in flat and textured areas.(0 to 3.0). Default %f\n, param-rc.aqStrength); H0( --[no-]cutree Enable cutree for Adaptive Quantization. Default %s\n, OPT(param-rc.cuTree)); +H0( --ipratio float QP factor between I and P. Default %f\n, param-rc.ipFactor); +H0( --pbratio float QP factor between P and B. Default %f\n, param-rc.pbFactor); H0( --cbqpoffs integer Chroma Cb QP Offset. Default %d\n, param-cbQpOffset); H0( --crqpoffs integer Chroma Cr QP Offset. Default %d\n, param-crQpOffset); H0( --[no-]hrdEnable HRD parameters signalling. Default %s\n, OPT(param-bEmitHRDSEI)); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] RDOQ enabled rdLevel changed
Is this change intended? # HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1403689018 -19800 # Wed Jun 25 15:06:58 2014 +0530 # Node ID e2ed009d296af39926d79f1a245974f158d6861a # Parent 5797d6a8197c96b68752705167ced6cb63194013 psy-rd: implement psy-rd in rdlevel=4,3 and 2 diff -r 5797d6a8197c -r e2ed009d296a source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Wed Jun 25 18:21:34 2014 +0530 +++ b/source/encoder/encoder.cpp Wed Jun 25 15:06:58 2014 +0530 @@ -1267,7 +1267,7 @@ } // disable RDOQ if psy-rd is enabled; until we make it psy-aware -m_bEnableRDOQ = p-psyRd == 0.0 p-rdLevel = 4; +m_bEnableRDOQ = p-psyRd == 0.0 p-rdLevel = 1; if (p-bLossless) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 1 of 2] fix xGetIntraBitsQTChroma() for 4:2:2 [CHANGES OUTPUT 4:2:2 with tskip]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403592115 -32400 # Tue Jun 24 15:41:55 2014 +0900 # Node ID 3af58371c5ff95fc838db106610423f2c0ee8265 # Parent 18f936182df50cc5126d1707cd7c2b5fef289ccb fix xGetIntraBitsQTChroma() for 4:2:2 [CHANGES OUTPUT 4:2:2 with tskip] diff -r 18f936182df5 -r 3af58371c5ff source/Lib/TLibEncoder/TEncSbac.cpp --- a/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 23 17:03:49 2014 -0700 +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Tue Jun 24 15:41:55 2014 +0900 @@ -2116,7 +2116,7 @@ uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize); #if CHECKED_BUILD || _DEBUG -X265_CHECK(numSig 0, cbf check fail); +X265_CHECK(numSig 0, cbf check fail\n); #endif bool beValid; diff -r 18f936182df5 -r 3af58371c5ff source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 23 17:03:49 2014 -0700 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:41:55 2014 +0900 @@ -254,7 +254,8 @@ trDepth--; uint32_t qpdiv = cu-getPic()-getNumPartInCU() ((cu-getDepth(0) + trDepth) 1); bool bFirstQ = ((absPartIdx (qpdiv - 1)) == 0); -if (!bFirstQ) +bool bSecondQ = (chFmt == CHROMA_422 splitIntoSubTUs) ? ((absPartIdx (qpdiv - 1)) == 2) : false; +if ((!bFirstQ) (!bSecondQ)) { return; } @@ -1222,7 +1223,7 @@ } else { -uint32_t bitsTmp = xGetIntraBitsQTChroma(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs); +uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsQTChroma(cu, trDepth, absPartIdxC, chromaId, splitIntoSubTUs) : 0; if (m_rdCost-psyRdEnabled()) { uint32_t zorder = cu-getZorderIdxInCU() + absPartIdxC; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 2 of 2] refine intra tskip related.
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403592156 -32400 # Tue Jun 24 15:42:36 2014 +0900 # Node ID ed2786407c46be823515c78cf23d7e0f32ee10fc # Parent 3af58371c5ff95fc838db106610423f2c0ee8265 refine intra tskip related. diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Tue Jun 24 15:41:55 2014 +0900 +++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jun 24 15:42:36 2014 +0900 @@ -197,21 +197,6 @@ primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), getStride()); } -void TComYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs) -{ -X265_CHECK(chromaId == 1 || chromaId == 2, invalid chroma id); - -int part = splitIntoSubTUs ? NUM_CHROMA_PARTITIONS422 : partitionFromSize(lumaSize); - -pixel* src = getChromaAddr(chromaId, partIdx); -int16_t* dst = dstPicYuv-getChromaAddr(chromaId, partIdx); - -uint32_t srcstride = getCStride(); -uint32_t dststride = dstPicYuv-m_cwidth; - -primitives.chroma[m_csp].copy_ps[part](dst, dststride, src, srcstride); -} - void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize) { int part = partitionFromSize(partSize); diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibCommon/TComYuv.h --- a/source/Lib/TLibCommon/TComYuv.h Tue Jun 24 15:41:55 2014 +0900 +++ b/source/Lib/TLibCommon/TComYuv.h Tue Jun 24 15:42:36 2014 +0900 @@ -131,7 +131,6 @@ voidcopyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma); voidcopyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize); -voidcopyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId, const bool splitIntoSubTUs); // -- // Algebraic operation for YUV buffer diff -r 3af58371c5ff -r ed2786407c46 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:41:55 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jun 24 15:42:36 2014 +0900 @@ -57,9 +57,6 @@ m_qtTempCoeff[2] = NULL; m_qtTempTrIdx = NULL; m_qtTempShortYuv = NULL; -m_qtTempTUCoeff[0] = NULL; -m_qtTempTUCoeff[1] = NULL; -m_qtTempTUCoeff[2] = NULL; for (int i = 0; i 3; i++) { m_qtTempTransformSkipFlag[i] = NULL; @@ -83,14 +80,12 @@ m_qtTempShortYuv[i].destroy(); } -X265_FREE(m_qtTempTUCoeff[0]); X265_FREE(m_qtTempTrIdx); X265_FREE(m_qtTempCbf[0]); X265_FREE(m_qtTempTransformSkipFlag[0]); delete[] m_qtTempCoeff[0]; delete[] m_qtTempShortYuv; -m_qtTempTransformSkipYuv.destroy(); } bool TEncSearch::init(Encoder* top, RDCost* rdCost, TComTrQuant* trQuant) @@ -133,11 +128,7 @@ m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] + numPartitions; m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] + numPartitions * 2; -CHECKED_MALLOC(m_qtTempTUCoeff[0], coeff_t, MAX_CU_SIZE * MAX_CU_SIZE * 3); -m_qtTempTUCoeff[1] = m_qtTempTUCoeff[0] + MAX_CU_SIZE * MAX_CU_SIZE; -m_qtTempTUCoeff[2] = m_qtTempTUCoeff[0] + MAX_CU_SIZE * MAX_CU_SIZE * 2; - -return m_qtTempTransformSkipYuv.create(g_maxCUSize, g_maxCUSize, m_param-internalCsp); +return true; fail: return false; @@ -224,7 +215,7 @@ } } -void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype, const bool splitIntoSubTUs) +void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype) { if (!cu-getCbf(absPartIdx, ttype, trDepth)) return; @@ -238,7 +229,7 @@ uint32_t qtPartNum = cu-getPic()-getNumPartInCU() ((fullDepth + 1) 1); for (uint32_t part = 0; part 4; part++) { -xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype, splitIntoSubTUs); +xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype); } return; @@ -254,8 +245,7 @@ trDepth--; uint32_t qpdiv = cu-getPic()-getNumPartInCU() ((cu-getDepth(0) + trDepth) 1); bool bFirstQ = ((absPartIdx (qpdiv - 1)) == 0); -bool bSecondQ = (chFmt == CHROMA_422 splitIntoSubTUs) ? ((absPartIdx (qpdiv - 1)) == 2) : false; -if ((!bFirstQ) (!bSecondQ)) +if (!bFirstQ) { return; } @@ -267,7 +257,6 @@ int cspy = chroma ? m_vChromaShift : 0; uint32_t width = cu-getCUSize(0) (trDepth + cspx); uint32_t height = cu-getCUSize(0) (trDepth + cspy); -height = splitIntoSubTUs ? height 1 : height; uint32_t coeffOffset = absPartIdx (cu-getPic()-getLog2UnitSize() * 2 - (cspx + cspy
[x265] input: fix race condition
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403343771 -32400 # Sat Jun 21 18:42:51 2014 +0900 # Node ID 31e1104b97521bde2abe64a3f91d63e673f95c90 # Parent fe370292c232ec9a629d191791271b71c1c6f354 input: fix race condition diff -r fe370292c232 -r 31e1104b9752 source/input/y4m.cpp --- a/source/input/y4m.cpp Fri Jun 20 16:41:11 2014 -0700 +++ b/source/input/y4m.cpp Sat Jun 21 18:42:51 2014 +0900 @@ -390,7 +390,6 @@ /* open the throttle at the end, allow reader to consume * remaining valid queue entries */ threadActive = false; -frameStat[tail.get()] = false; tail.set(QUEUE_SIZE); } @@ -406,7 +405,7 @@ { curTail = tail.waitForChange(curTail); if (!threadActive) -return false; +break; } #else @@ -417,6 +416,7 @@ if (!frameStat[curHead]) return false; +frameStat[curHead] = false; pic.bitDepth = depth; pic.colorSpace = colorSpace; diff -r fe370292c232 -r 31e1104b9752 source/input/yuv.cpp --- a/source/input/yuv.cpp Fri Jun 20 16:41:11 2014 -0700 +++ b/source/input/yuv.cpp Sat Jun 21 18:42:51 2014 +0900 @@ -182,7 +182,6 @@ } threadActive = false; -frameStat[tail.get()] = false; tail.set(QUEUE_SIZE); } @@ -218,7 +217,7 @@ { curTail = tail.waitForChange(curTail); if (!threadActive) -return false; +break; } #else @@ -229,6 +228,7 @@ if (!frameStat[curHead]) return false; +frameStat[curHead] = false; pic.colorSpace = colorSpace; pic.bitDepth = depth; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] input: fix race condition
When eof detected, threadMain may overwrite frameStat[tail.get()] = false before read, no head check here. So, I moved frameStat[] = false to consumer. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Sunday, June 22, 2014 1:29 AM To: Development for x265 Subject: Re: [x265] input: fix race condition On Sat, Jun 21, 2014 at 4:45 AM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403343771 -32400 # Sat Jun 21 18:42:51 2014 +0900 # Node ID 31e1104b97521bde2abe64a3f91d63e673f95c90 # Parent fe370292c232ec9a629d191791271b71c1c6f354 input: fix race condition Can you describe the bug this fixes, is there a network filesystem involved? diff -r fe370292c232 -r 31e1104b9752 source/input/y4m.cpp --- a/source/input/y4m.cpp Fri Jun 20 16:41:11 2014 -0700 +++ b/source/input/y4m.cpp Sat Jun 21 18:42:51 2014 +0900 @@ -390,7 +390,6 @@ /* open the throttle at the end, allow reader to consume * remaining valid queue entries */ threadActive = false; -frameStat[tail.get()] = false; tail.set(QUEUE_SIZE); } @@ -406,7 +405,7 @@ { curTail = tail.waitForChange(curTail); if (!threadActive) -return false; +break; } #else @@ -417,6 +416,7 @@ if (!frameStat[curHead]) return false; +frameStat[curHead] = false; pic.bitDepth = depth; pic.colorSpace = colorSpace; diff -r fe370292c232 -r 31e1104b9752 source/input/yuv.cpp --- a/source/input/yuv.cpp Fri Jun 20 16:41:11 2014 -0700 +++ b/source/input/yuv.cpp Sat Jun 21 18:42:51 2014 +0900 @@ -182,7 +182,6 @@ } threadActive = false; -frameStat[tail.get()] = false; tail.set(QUEUE_SIZE); } @@ -218,7 +217,7 @@ { curTail = tail.waitForChange(curTail); if (!threadActive) -return false; +break; } #else @@ -229,6 +228,7 @@ if (!frameStat[curHead]) return false; +frameStat[curHead] = false; pic.colorSpace = colorSpace; pic.bitDepth = depth; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] keep TComPic until next call when recpic is exported as pic_out
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1403183616 -32400 # Thu Jun 19 22:13:36 2014 +0900 # Node ID 17b083a024a2ed2b209cc0e8f1fbd3cf90956bd5 # Parent d2a13e8541f45fcd4c7d554e0b7c89a82f2a0ff9 keep TComPic until next call when recpic is exported as pic_out diff -r d2a13e8541f4 -r 17b083a024a2 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppThu Jun 19 10:28:47 2014 +0530 +++ b/source/encoder/encoder.cppThu Jun 19 22:13:36 2014 +0900 @@ -59,6 +59,7 @@ m_frameEncoder = NULL; m_rateControl = NULL; m_dpb = NULL; +m_exportedPic = NULL; m_nals = NULL; m_packetData = NULL; m_outputCount = 0; @@ -132,6 +133,12 @@ void Encoder::destroy() { +if (m_exportedPic) +{ +ATOMIC_DEC(m_exportedPic-m_countRefEncoders); +m_exportedPic = NULL; +} + if (m_frameEncoder) { for (int i = 0; i m_totalFrameThreads; i++) @@ -255,6 +262,13 @@ if (m_aborted) return -1; +if (m_exportedPic) +{ +ATOMIC_DEC(m_exportedPic-m_countRefEncoders); +m_exportedPic = NULL; +m_dpb-recycleUnreferenced(); +} + if (pic_in) { if (pic_in-colorSpace != m_param-internalCsp) @@ -434,8 +448,14 @@ finishFrameStats(out, curEncoder, bits); // Allow this frame to be recycled if no frame encoders are using it for reference -ATOMIC_DEC(out-m_countRefEncoders); -m_dpb-recycleUnreferenced(); +if (!pic_out) +{ +ATOMIC_DEC(out-m_countRefEncoders); +m_dpb-recycleUnreferenced(); +} +else +m_exportedPic = out; + ret = 1; } diff -r d2a13e8541f4 -r 17b083a024a2 source/encoder/encoder.h --- a/source/encoder/encoder.h Thu Jun 19 10:28:47 2014 +0530 +++ b/source/encoder/encoder.h Thu Jun 19 22:13:36 2014 +0900 @@ -87,6 +87,8 @@ FrameEncoder* m_frameEncoder; DPB* m_dpb; +TComPic* m_exportedPic; + /* frame parallelism */ intm_curEncoder; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix: TOPSKIP refers outside of picture [OUTPUT CHANGE]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1402906984 -32400 # Mon Jun 16 17:23:04 2014 +0900 # Node ID 2ece20b5e178bfe66da9ca6d37cb80454bc23f36 # Parent e69a427e461f8c8944b68323a3d77295b65ec779 fix: TOPSKIP refers outside of picture [OUTPUT CHANGE] diff -r e69a427e461f -r 2ece20b5e178 source/Lib/TLibEncoder/TEncCu.h --- a/source/Lib/TLibEncoder/TEncCu.h Thu Jun 12 22:53:47 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncCu.h Mon Jun 16 17:23:04 2014 +0900 @@ -159,7 +159,7 @@ void finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth); void xCompressCU(TComDataCU* outBestCU, TComDataCU* outTempCU, uint32_t depth, bool bInsidePicture, PartSize parentSize = SIZE_NONE); void xCompressIntraCU(TComDataCU* outBestCU, TComDataCU* outTempCU, uint32_t depth, bool bInsidePicture); -void xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t partitionIndex, uint8_t minDepth); +void xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t partitionIndex, uint8_t minDepth); void xEncodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture); void xCheckBestMode(TComDataCU* outBestCU, TComDataCU* outTempCU, uint32_t depth); diff -r e69a427e461f -r 2ece20b5e178 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Jun 12 22:53:47 2014 -0500 +++ b/source/encoder/compress.cpp Mon Jun 16 17:23:04 2014 +0900 @@ -338,25 +338,30 @@ } } -void TEncCu::xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t PartitionIndex, uint8_t minDepth) +void TEncCu::xCompressInterCU(TComDataCU* outBestCU, TComDataCU* outTempCU, TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t PartitionIndex, uint8_t minDepth) { TComPic* pic = outTempCU-getPic(); +uint32_t absPartIdx = outTempCU-getZorderIdxInCU(); if (depth == 0) { // get original YUV data from picture -m_origYuv[depth]-copyFromPicYuv(pic-getPicYuvOrg(), outTempCU-getAddr(), outTempCU-getZorderIdxInCU()); +m_origYuv[depth]-copyFromPicYuv(pic-getPicYuvOrg(), outTempCU-getAddr(), absPartIdx); } else { // copy partition YUV from depth 0 CTU cache -m_origYuv[0]-copyPartToYuv(m_origYuv[depth], outTempCU-getZorderIdxInCU()); +m_origYuv[0]-copyPartToYuv(m_origYuv[depth], absPartIdx); } // variables for fast encoder decision bool bSubBranch = true; int qp = outTempCU-getQP(0); +#if TOPSKIP +bool bInsidePictureParent = bInsidePicture; +#endif + TComSlice* slice = outTempCU-getSlice(); if (!bInsidePicture) { @@ -375,7 +380,7 @@ // We need to split, so don't try these modes. TComYuv* tempYuv = NULL; #if TOPSKIP -if (depth == 0) +if (bInsidePicture !bInsidePictureParent) { TComDataCU* colocated0 = slice-getNumRefIdx(REF_PIC_LIST_0) 0 ? slice-getRefPic(REF_PIC_LIST_0, 0)-getCU(outTempCU-getAddr()) : NULL; TComDataCU* colocated1 = slice-getNumRefIdx(REF_PIC_LIST_1) 0 ? slice-getRefPic(REF_PIC_LIST_1, 0)-getCU(outTempCU-getAddr()) : NULL; @@ -383,19 +388,21 @@ char previousQP = colocated0-getQP(0); uint8_t delta = 0, minDepth0 = 4, minDepth1 = 4; uint32_t sum0 = 0, sum1 = 0; -for (uint32_t i = 0; i outTempCU-getTotalNumPart(); i = i + 4) +uint32_t numPartitions = outTempCU-getTotalNumPart(); +for (uint32_t i = 0; i numPartitions; i = i + 4) { -if (colocated0 colocated0-getDepth(i) minDepth0) -minDepth0 = colocated0-getDepth(i); -if (colocated1 colocated1-getDepth(i) minDepth1) -minDepth1 = colocated1-getDepth(i); +uint32_t j = absPartIdx + i; +if (colocated0 colocated0-getDepth(j) minDepth0) +minDepth0 = colocated0-getDepth(j); +if (colocated1 colocated1-getDepth(j) minDepth1) +minDepth1 = colocated1-getDepth(j); if (colocated0) -sum0 += (colocated0-getDepth(i) * 4); +sum0 += (colocated0-getDepth(j) * 4); if (colocated1) -sum1 += (colocated1-getDepth(i) * 4); +sum1 += (colocated1-getDepth(j) * 4); } -uint32_t avgDepth2 = (sum0 + sum1) / outTempCU-getTotalNumPart(); +uint32_t avgDepth2 = (sum0 + sum1) / numPartitions; minDepth = X265_MIN(minDepth0, minDepth1); if (((currentQP - previousQP) 0) || (((currentQP - previousQP) = 0) ((avgDepth2 - 2 * minDepth) 1))) delta = 0; @@ -686,7 +693,7 @@ /* Copy Yuv data to picture Yuv */ if (m_param-rdLevel != 0) -xCopyYuv2Pic(pic, outBestCU
[x265] Encoder::encode(): don't return 0 while flushing.
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1402983802 -32400 # Tue Jun 17 14:43:22 2014 +0900 # Node ID cf222575f2032ff5824d58470d63561f79c3905e # Parent f25ed861850950d2bd31c51dbc9267333515af88 Encoder::encode(): don't return 0 while flushing. diff -r f25ed8618509 -r cf222575f203 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Jun 16 20:32:13 2014 -0500 +++ b/source/encoder/encoder.cppTue Jun 17 14:43:22 2014 +0900 @@ -481,6 +481,11 @@ curEncoder-m_enable.trigger(); } +if (!ret flush fenc) +{ +return encode(flush, NULL, pic_out, nalunits); +} + return ret; } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] TComTrQuant: lambda for each Cb and Cr
It is not intended. Please tell me the configuration. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Saturday, June 14, 2014 8:56 AM To: Development for x265 Subject: Re: [x265] TComTrQuant: lambda for each Cb and Cr On Tue, Jun 10, 2014 at 11:26 AM, Steve Borho st...@borho.org wrote: On Sat, Jun 7, 2014 at 9:01 PM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1402192642 -32400 # Sun Jun 08 10:57:22 2014 +0900 # Node ID 188e115f07427c759f47154a864467be21b5b6a1 # Parent e5656f1e190453efa84732269b259a6dee608ff9 TComTrQuant: lambda for each Cb and Cr Queued for testing. This commit message only describes about a third of what this patch does, but it does look like it shouldn't change outputs if the chroma weights are not changed from the default. Our testing showed this commit (or perhaps the tskip changes, since they were tested together) changed the encoder outputs. Was it intentional to change outputs with default chroma weights? -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix: emms issue
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1402449456 -32400 # Wed Jun 11 10:17:36 2014 +0900 # Node ID 40ad5bf953cd6b80d97aba803f321ba655a388f7 # Parent d0bacf50eb951fe5f91e419072399b3dae8926d9 fix: emms issue diff -r d0bacf50eb95 -r 40ad5bf953cd source/encoder/compress.cpp --- a/source/encoder/compress.cpp Tue Jun 10 18:37:26 2014 -0500 +++ b/source/encoder/compress.cpp Wed Jun 11 10:17:36 2014 +0900 @@ -378,6 +378,8 @@ #if TOPSKIP if (depth == 0) { +x265_emms(); + TComDataCU* colocated0 = slice-getNumRefIdx(REF_PIC_LIST_0) 0 ? slice-getRefPic(REF_PIC_LIST_0, 0)-getCU(outTempCU-getAddr()) : NULL; TComDataCU* colocated1 = slice-getNumRefIdx(REF_PIC_LIST_1) 0 ? slice-getRefPic(REF_PIC_LIST_1, 0)-getCU(outTempCU-getAddr()) : NULL; char currentQP = outTempCU-getQP(0); @@ -648,6 +650,8 @@ if (outBestCU != 0) #endif { +x265_emms(); + uint64_t totalCostNeigh = 0, totalCostCU = 0, totalCountNeigh = 0, totalCountCU = 0; double avgCost = 0; TComDataCU* above = outTempCU-getCUAbove(); @@ -836,6 +840,8 @@ xCopyYuv2Pic(pic, outBestCU-getAddr(), outBestCU-getZorderIdxInCU(), depth); } +x265_emms(); + if (!bInsidePicture) return; /* Assert if Best prediction mode is NONE ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] TComTrQuant: lambda for each Cb and Cr
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1402192642 -32400 # Sun Jun 08 10:57:22 2014 +0900 # Node ID 188e115f07427c759f47154a864467be21b5b6a1 # Parent e5656f1e190453efa84732269b259a6dee608ff9 TComTrQuant: lambda for each Cb and Cr diff -r e5656f1e1904 -r 188e115f0742 source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Jun 05 22:45:25 2014 -0500 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Sun Jun 08 10:57:22 2014 +0900 @@ -262,49 +262,35 @@ } uint32_t TComTrQuant::xQuant(TComDataCU* cu, int32_t* coef, coeff_t* qCoef, int trSize, - TextType ttype, uint32_t absPartIdx, int32_t *lastPos, bool curUseRDOQ) + TextType ttype, uint32_t absPartIdx, int32_t *lastPos) { -uint32_t acSum = 0; -int add = 0; -bool useRDOQ = m_useRDOQ curUseRDOQ; +const uint32_t log2TrSize = g_convertToBit[trSize] + 2; +TUEntropyCodingParameters codingParameters; +getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, log2TrSize, ttype); +int deltaU[32 * 32]; -if (useRDOQ) +int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype; +X265_CHECK(scalingListType 6, scaling list type out of range\n); +int32_t *quantCoeff = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2); + +int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform + +int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; +int add = (cu-getSlice()-getSliceType() == I_SLICE ? 171 : 85) (qbits - 9); + +int numCoeff = 1 log2TrSize * 2; +uint32_t acSum = primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, add, numCoeff, lastPos); + +if (acSum = 2 cu-getSlice()-getPPS()-getSignHideFlag()) { -acSum = xRateDistOptQuant(cu, coef, qCoef, trSize, ttype, absPartIdx, lastPos); -} -else -{ -const uint32_t log2TrSize = g_convertToBit[trSize] + 2; -TUEntropyCodingParameters codingParameters; -getTUEntropyCodingParameters(cu, codingParameters, absPartIdx, log2TrSize, ttype); -int deltaU[32 * 32]; - -int scalingListType = (cu-isIntra(absPartIdx) ? 0 : 3) + ttype; -X265_CHECK(scalingListType 6, scaling list type out of range\n); -int32_t *quantCoeff = 0; -quantCoeff = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2); - -int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform - -int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; -add = (cu-getSlice()-getSliceType() == I_SLICE ? 171 : 85) (qbits - 9); - -int numCoeff = 1 log2TrSize * 2; -acSum += primitives.quant(coef, quantCoeff, deltaU, qCoef, qbits, add, numCoeff, lastPos); - -if (cu-getSlice()-getPPS()-getSignHideFlag() acSum = 2) -{ -signBitHidingHDQ(qCoef, coef, deltaU, codingParameters); -} +signBitHidingHDQ(qCoef, coef, deltaU, codingParameters); } return acSum; } -void TComTrQuant::init(uint32_t maxTrSize, bool useRDOQ, bool useTransformSkipFast) +void TComTrQuant::init(bool useRDOQ) { -m_maxTrSize= maxTrSize; -m_useRDOQ = useRDOQ; -m_useTransformSkipFast = useTransformSkipFast; +m_useRDOQ = useRDOQ; } uint32_t TComTrQuant::transformNxN(TComDataCU* cu, @@ -363,7 +349,12 @@ } } } -return xQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos, curUseRDOQ); + +if (m_useRDOQ curUseRDOQ) +{ +return xRateDistOptQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos); +} +return xQuant(cu, m_tmpCoeff, coeff, trSize, ttype, absPartIdx, lastPos); } void TComTrQuant::invtransformNxN(bool transQuantBypass, uint32_t mode, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t trSize, int scalingListType, bool useTransformSkip, int lastPos) @@ -525,10 +516,9 @@ X265_CHECK(scalingListType 6, scaling list type out of range\n); int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; // Right shift of non-RDOQ quantizer; level = (coeff*Q + offset)q_bits -double *errScaleOrg = getErrScaleCoeff(scalingListType, log2TrSize - 2, m_qpParam.m_rem); -int32_t *qCoefOrg = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2); -int32_t *qCoef = qCoefOrg; -double *errScale = errScaleOrg; +int add = (1 (qbits - 1)); +double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, m_qpParam.m_rem); +int32_t *qCoef = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2); double costCoeff[32 * 32]; double costSig[32 * 32]; @@ -575,9 +565,9 @@ int Q = qCoef[blkPos]; double scaleFactor = errScale[blkPos]; int
Re: [x265] fix: Bus error
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1401861502 -32400 # Wed Jun 04 14:58:22 2014 +0900 # Node ID dbddb764220b340044f25bd47a07949a77b1b827 # Parent f2479eb454b0ef50e50a7df5ab877ced2cfe5db6 fix: uninitialized read m_totalFrameThreads diff -r f2479eb454b0 -r dbddb764220b source/Lib/TLibEncoder/NALwrite.cpp --- a/source/Lib/TLibEncoder/NALwrite.cpp Tue Jun 03 15:24:22 2014 -0500 +++ b/source/Lib/TLibEncoder/NALwrite.cpp Wed Jun 04 14:58:22 2014 +0900 @@ -108,7 +108,7 @@ } uint32_t i = packetSize; -out = (uint8_t*)realloc(out, nalsize + 4); +out = (uint8_t*)realloc(out, packetSize + nalsize + 4); memcpy(out + packetSize, emulation, nalsize); packetSize += nalsize; diff -r f2479eb454b0 -r dbddb764220b source/encoder/encoder.cpp --- a/source/encoder/encoder.cppTue Jun 03 15:24:22 2014 -0500 +++ b/source/encoder/encoder.cppWed Jun 04 14:58:22 2014 +0900 @@ -174,6 +174,7 @@ void Encoder::init() { +m_totalFrameThreads = param-frameNumThreads; if (m_frameEncoder) { int numRows = (param-sourceHeight + g_maxCUSize - 1) / g_maxCUSize; @@ -189,7 +190,6 @@ m_rateControl-init(m_frameEncoder[0].m_sps); m_lookahead-init(); m_encodeStartTime = x265_mdate(); -m_totalFrameThreads = param-frameNumThreads; } int Encoder::getStreamHeaders(NALUnitEBSP **nalunits) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] fix: Bus error
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1401859296 -32400 # Wed Jun 04 14:21:36 2014 +0900 # Node ID f256e982b79c0c1a32810500474fa781ee814ac9 # Parent f2479eb454b0ef50e50a7df5ab877ced2cfe5db6 fix: Bus error diff -r f2479eb454b0 -r f256e982b79c source/Lib/TLibCommon/SEI.h --- a/source/Lib/TLibCommon/SEI.h Tue Jun 03 15:24:22 2014 -0500 +++ b/source/Lib/TLibCommon/SEI.h Wed Jun 04 14:21:36 2014 +0900 @@ -185,7 +185,10 @@ PayloadType payloadType() const { return PICTURE_TIMING; } -SEIPictureTiming() {} +SEIPictureTiming() +: m_numNalusInDuMinus1(0) +, m_duCpbRemovalDelayMinus1(0) +{} virtual ~SEIPictureTiming() { diff -r f2479eb454b0 -r f256e982b79c source/Lib/TLibCommon/TComBitStream.cpp --- a/source/Lib/TLibCommon/TComBitStream.cpp Tue Jun 03 15:24:22 2014 -0500 +++ b/source/Lib/TLibCommon/TComBitStream.cpp Wed Jun 04 14:21:36 2014 +0900 @@ -205,9 +205,9 @@ if (temp) { ::memcpy(temp, m_fifo, m_fsize); -X265_FREE(m_fifo); m_fifo = temp; m_buffsize *= 2; +X265_FREE(m_fifo); } else { diff -r f2479eb454b0 -r f256e982b79c source/Lib/TLibEncoder/NALwrite.cpp --- a/source/Lib/TLibEncoder/NALwrite.cpp Tue Jun 03 15:24:22 2014 -0500 +++ b/source/Lib/TLibEncoder/NALwrite.cpp Wed Jun 04 14:21:36 2014 +0900 @@ -108,7 +108,7 @@ } uint32_t i = packetSize; -out = (uint8_t*)realloc(out, nalsize + 4); +out = (uint8_t*)realloc(out, packetSize + nalsize + 4); memcpy(out + packetSize, emulation, nalsize); packetSize += nalsize; diff -r f2479eb454b0 -r f256e982b79c source/encoder/encoder.cpp --- a/source/encoder/encoder.cppTue Jun 03 15:24:22 2014 -0500 +++ b/source/encoder/encoder.cppWed Jun 04 14:21:36 2014 +0900 @@ -174,6 +174,7 @@ void Encoder::init() { +m_totalFrameThreads = param-frameNumThreads; if (m_frameEncoder) { int numRows = (param-sourceHeight + g_maxCUSize - 1) / g_maxCUSize; @@ -189,7 +190,6 @@ m_rateControl-init(m_frameEncoder[0].m_sps); m_lookahead-init(); m_encodeStartTime = x265_mdate(); -m_totalFrameThreads = param-frameNumThreads; } int Encoder::getStreamHeaders(NALUnitEBSP **nalunits) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] refine cbf==0 path: remove clearing coeff and resi
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1401677099 -32400 # Mon Jun 02 11:44:59 2014 +0900 # Node ID 73f86312c2e0aa5a105e84b0045478e02c8a03e7 # Parent a5998df9b12ef81e48e7c5b89219a74276a75f27 refine cbf==0 path: remove clearing coeff and resi diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncEntropy.cpp --- a/source/Lib/TLibEncoder/TEncEntropy.cppMon Jun 02 07:36:20 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncEntropy.cppMon Jun 02 11:44:59 2014 +0900 @@ -202,7 +202,6 @@ void TEncEntropy::initTUEntropySection(TComTURecurse *tuIterator, uint32_t splitMode, uint32_t absPartIdxStep, uint32_t m_absPartIdxTU) { -tuIterator-m_partOffset= 0; tuIterator-m_section = 0; tuIterator-m_absPartIdxTURelCU = m_absPartIdxTU; tuIterator-m_splitMode = splitMode; diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncEntropy.h --- a/source/Lib/TLibEncoder/TEncEntropy.h Mon Jun 02 07:36:20 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncEntropy.h Mon Jun 02 11:44:59 2014 +0900 @@ -66,7 +66,6 @@ uint32_t m_splitMode; uint32_t m_absPartIdxTURelCU; uint32_t m_absPartIdxStep; -uint32_t m_partOffset; }; // diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncSbac.cpp --- a/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 02 07:36:20 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Jun 02 11:44:59 2014 +0900 @@ -2120,8 +2120,9 @@ // compute number of significant coefficients uint32_t numSig = primitives.count_nonzero(coeff, trSize * trSize); -if (numSig == 0) -return; +#if CHECKED_BUILD || _DEBUG +X265_CHECK(numSig 0, cbf check fail); +#endif bool beValid; if (cu-getCUTransquantBypass(absPartIdx)) diff -r a5998df9b12e -r 73f86312c2e0 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 07:36:20 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 02 11:44:59 2014 +0900 @@ -408,8 +408,8 @@ coeff_t* coeff = m_qtTempCoeff[0][qtLayer] + coeffOffsetY; int16_t* reconQt= m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx); - X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, width is not max CU size\n); +const uint32_t reconQtStride = MAX_CU_SIZE; uint32_t zorder = cu-getZorderIdxInCU() + absPartIdx; pixel* reconIPred = cu-getPic()-getPicYuvRec()-getLumaAddr(cu-getAddr(), zorder); @@ -443,25 +443,29 @@ //--- set coded block flag --- cu-setCbfSubParts((absSum ? 1 : 0) trDepth, TEXT_LUMA, absPartIdx, fullDepth); -//--- inverse transform --- if (absSum) { +//--- inverse transform --- int scalingListType = 0 + TEXT_LUMA; -X265_CHECK(scalingListType 6, scalingListType is too large %d\n, scalingListType); +X265_CHECK(scalingListType 6, scalingListType invalid %d\n, scalingListType); m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), cu-getLumaIntraDir(absPartIdx), residual, stride, coeff, tuSize, scalingListType, useTransformSkip, lastPos); +X265_CHECK(tuSize = 32, tuSize is too large %d\n, tuSize); +//= reconstruction = +primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride); +//= update distortion = +outDist += primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride); } else { -int16_t* resiTmp = residual; +#if CHECKED_BUILD || _DEBUG memset(coeff, 0, sizeof(coeff_t) * tuSize * tuSize); -primitives.blockfill_s[sizeIdx](resiTmp, stride, 0); +#endif +//= reconstruction = +primitives.luma_copy_ps[part](reconQt,reconQtStride,pred, stride); +primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride); +//= update distortion = +outDist += primitives.sse_pp[part](pred, stride, fenc, stride); } - -X265_CHECK(tuSize = 32, tuSize is too large %d\n, tuSize); -//= reconstruction = -primitives.calcrecon[sizeIdx](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride); -//= update distortion = -outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride); } void TEncSearch::xIntraCodingChromaBlk(TComDataCU* cu, @@ -519,67 +523,67 @@ primitives.calcresidual[sizeIdx](fenc, pred, residual, stride); //= transform and quantization = +//--- init rate estimation arrays for RDOQ --- +if (useTransformSkipChroma ? m_cfg-bEnableRDOQTS : m_cfg-bEnableRDOQ) { -//--- init rate estimation arrays for RDOQ
Re: [x265] rdcost: overflow check by integer
Linux (CentOS) needs it. FreeBSD also. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Monday, May 26, 2014 11:48 AM To: Development for x265 Subject: Re: [x265] rdcost: overflow check by integer On Sat, May 24, 2014 at 10:48 PM, Satoshi Nakagawa nakagawa...@oki.com wrote: Aside: Most systems I've seen seem to do this in via -D... or whatever. It's simpler. Thanks. # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1400989083 -32400 # Sun May 25 12:38:03 2014 +0900 # Node ID 54a0dc2278494f7ec3b74f3f06a8521c805af79a # Parent 5e8cce428457f63fd9b8e18dafed2f8bed674d53 add -D__STDC_LIMIT_MACROS=1 diff -r 5e8cce428457 -r 54a0dc227849 source/CMakeLists.txt --- a/source/CMakeLists.txt Fri May 23 09:11:15 2014 -0500 +++ b/source/CMakeLists.txt Sun May 25 12:38:03 2014 +0900 @@ -103,6 +103,8 @@ endif() endif(MSVC) +add_definitions(-D__STDC_LIMIT_MACROS=1) + check_include_files(inttypes.h HAVE_INT_TYPES_H) if(HAVE_INT_TYPES_H) add_definitions(-DHAVE_INT_TYPES_H=1) Is this still necessary given the way I modified compat/msvc/stdint.h? If so, for what compiler? -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel