x265中计算RD Cost的几种方式

x265中除了传统的RD Cost外，还有Psy-RdCost和SSIM-RdCost，这三种RD Cost的使用是通过命令行参数控制的，如果不设置，则默认使用的是Psy-RDCost,相关命令行参数如下：

–[no-]psy-rd <0…5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default 2.0
–[no-]psy-rdoq <0…50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default 0.0
–[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default disabled
–dynamic-rd <0…4.0> Strength of dynamic RD, 0 to disable. Default 0.00

一、Psycho-visual Rate Distortion

x265中计算Psy-RD Cost的公式如下：

$RD = D + \lambda _{1}*psyRD*psyCost + \lambda _{2} * Rate$

其中D指的是重建块的失真，psyRD表示的是psy-RDCost的强度，psyCost是指的是重建块和原始块AC能量的差，Rate指的是编码当前块所需的码率，x265中计算PsyRDCost的代码如下：

 inline uint64_t calcPsyRdCost(sse_t distortion, uint32_t bits, uint32_t psycost) const{
#if X265_DEPTH < 10X265_CHECK((bits <= (UINT64_MAX / m_lambda2)) && (psycost <= UINT64_MAX / (m_lambda * m_psyRd)),"calcPsyRdCost wrap detected dist: %u, bits: %u, lambda: " X265_LL ", lambda2: " X265_LL "\n",distortion, bits, m_lambda, m_lambda2);
#elseX265_CHECK((bits <= (UINT64_MAX / m_lambda2)) && (psycost <= UINT64_MAX / (m_lambda * m_psyRd)),"calcPsyRdCost wrap detected dist: " X265_LL ", bits: %u, lambda: " X265_LL ", lambda2: " X265_LL "\n",distortion, bits, m_lambda, m_lambda2);
#endifreturn distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);}

即psy-RD Cost比传统的RD Cost计算多了一项m_lambda * m_psyRd * psycost，其中m_lambda和QP有关，m_psyRd由命令行参数psy-rd控制，表示Psy-RD Cost的强度

 /* Scale PSY RD factor by a slice type factor */
static const uint32_t psyScaleFix8[3] = { 300, 256, 96 }; /* B, P, I */
m_psyRd = (m_psyRdBase * psyScaleFix8[slice.m_sliceType]) >> 8;

m_rdCost.setPsyRdScale(param.psyRd);
void setPsyRdScale(double scale)                { m_psyRdBase = (uint32_t)floor(65536.0 * scale * 0.33); }

psyRDCost的计算代码如下：

这里的psyRD表示重建块的AC energy和原始块的AC energy之差，即 $AC_{energy} = SATD(block,0) - SAD(block,0)$ （这里不理解为什么像素块和0的SATD减去下其和0的SAD表示AC能量）

template<int size>
int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride)
{static pixel zeroBuf[8] /* = { 0 } */;if (size){int dim = 1 << (size + 2);uint32_t totEnergy = 0;for (int i = 0; i < dim; i += 8){for (int j = 0; j < dim; j+= 8){/* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);totEnergy += abs(sourceEnergy - reconEnergy);}}return totEnergy;}else{/* 4x4 is too small for sa8d */int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);return abs(sourceEnergy - reconEnergy);}
}

二、SSIM Rate Distortion

x265中计算SSIM RD Cost的公式如下：

$RD = D + \lambda _{1}*ssimCost + \lambda _{2} * Rate$

x265中的计算代码如下：

    inline uint64_t calcSsimRdCost(uint64_t distortion, uint32_t bits, uint32_t ssimCost) const{
#if X265_DEPTH < 10X265_CHECK((bits <= (UINT64_MAX / m_lambda2)) && (ssimCost <= UINT64_MAX / m_lambda),"calcPsyRdCost wrap detected dist: " X265_LL " bits: %u, lambda: " X265_LL ", lambda2: " X265_LL "\n",distortion, bits, m_lambda, m_lambda2);
#elseX265_CHECK((bits <= (UINT64_MAX / m_lambda2)) && (ssimCost <= UINT64_MAX / m_lambda),"calcPsyRdCost wrap detected dist: " X265_LL ", bits: %u, lambda: " X265_LL ", lambda2: " X265_LL "\n",distortion, bits, m_lambda, m_lambda2);
#endifreturn distortion + ((m_lambda * ssimCost) >> 14) + ((bits * m_lambda2) >> 8);}

其中SSIMCost为SSIM指标下的失真，SSIMCost的计算公式推导参考论文

x265中的计算如下

uint64_t Quant::ssimDistortion(const CUData& cu, const pixel* fenc, uint32_t fStride, const pixel* recon, intptr_t rstride, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx)
{static const int ssim_c1 = (int)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64 + .5); // 416static const int ssim_c2 = (int)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63 + .5); // 235963int shift = (X265_DEPTH - 8);int trSize = 1 << log2TrSize;uint64_t ssDc = 0, ssBlock = 0, ssAc = 0;// Calculation of (X(0) - Y(0)) * (X(0) - Y(0)), DCssDc = 0; //ssDc 表示整个块内所有4x4为块的左上角像素差值的平方和for (int y = 0; y < trSize; y += 4){for (int x = 0; x < trSize; x += 4){int temp = fenc[y * fStride + x] - recon[y * rstride + x]; // copy of residual coeffssDc += temp * temp;}}// Calculation of (X(k) - Y(k)) * (X(k) - Y(k)), ACssBlock = 0; //ssBlock 表示整个块内的原始像素-重建像素的差值的平方和uint64_t ac_k = 0; //ac_k表示整个块内原始像素的平方和primitives.cu[log2TrSize - 2].ssimDist(fenc, fStride, recon, rstride, &ssBlock, shift, &ac_k);ssAc = ssBlock - ssDc;// 1. Calculation of fdc'// Calculate numerator of dc normalization factor 计算dc归一化因子的分子uint64_t fDc_num = 0;// 2. Calculate dc componentuint64_t dc_k = 0; //表示整个块内所有4x4为块的左上角像素平方和for (int block_yy = 0; block_yy < trSize; block_yy += 4){for (int block_xx = 0; block_xx < trSize; block_xx += 4){uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift;dc_k += temp * temp;}}fDc_num = (2 * dc_k)  + (trSize * trSize * ssim_c1); // 16 pixels -> for each 4x4 blockfDc_num /= ((trSize >> 2) * (trSize >> 2));// 1. Calculation of fac'// Calculate numerator of ac normalization factoruint64_t fAc_num = 0;// 2. Calculate ac componentac_k -= dc_k;double s = 1 + 0.005 * cu.m_qp[absPartIdx];fAc_num = ac_k + uint64_t(s * ac_k) + ssim_c2;fAc_num /= ((trSize >> 2) * (trSize >> 2));// Calculate dc and ac normalization factoruint64_t ssim_distortion = ((ssDc * cu.m_fDc_den[ttype]) / fDc_num) + ((ssAc * cu.m_fAc_den[ttype]) / fAc_num);return ssim_distortion;
}

三、Rd SATD Cost

RD SATD Cost主要是用于帧内预测模式粗选的时候，为了降低复杂度，使用残差的哈达玛变换近似代替失真，这种方法省去了变换、量化、反量化、反变换等过程，可以大大降低复杂度。其计算公式如下：

$RD = SATD(s,p) + \lambda_{mode}*R_{mode}$

其中，SATD表示原始块和预测块残差的SATD，Rmdoe仅为编码当前模式所需的比特数。

    inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const{X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,"calcRdSADCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n", sadCost, bits, m_lambda);return sadCost + ((bits * m_lambda + 128) >> 8);}