其中第一步的量化与普通量化相同,步骤如下:

代码理解见注释(仅个人理解,欢迎指正):

void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx)
{const FracBitsAccess& fracBits = ctx.getFracBitsAcess();const SPS &sps            = *tu.cs->sps;const CompArea &rect      = tu.blocks[compID];const uint32_t uiWidth        = rect.width;const uint32_t uiHeight       = rect.height;const ChannelType chType  = toChannelType(compID);const int channelBitDepth = sps.getBitDepth( chType );const bool extendedPrecision     = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag();const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result*/// Represents scaling through forward transformint iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);if (tu.mtsIdx[compID] == MTS_SKIP && extendedPrecision){iTransformShift = std::max<int>(0, iTransformShift);}double     d64BlockUncodedCost               = 0;const uint32_t uiLog2BlockWidth                  = floorLog2(uiWidth);const uint32_t uiLog2BlockHeight                 = floorLog2(uiHeight);const uint32_t uiMaxNumCoeff                     = rect.area();CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");int scalingListType = getScalingListType(tu.cu->predMode, compID);CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");const TCoeff *plSrcCoeff = pSrc.buf;TCoeff *piDstCoeff = tu.getCoeffs(compID).buf;double *pdCostCoeff  = m_pdCostCoeff;double *pdCostSig    = m_pdCostSig;double *pdCostCoeff0 = m_pdCostCoeff0;int    *rateIncUp    = m_rateIncUp;int    *rateIncDown  = m_rateIncDown;int    *sigRateDelta = m_sigRateDelta;TCoeff *deltaU       = m_deltaU;memset(piDstCoeff, 0, sizeof(*piDstCoeff) * uiMaxNumCoeff);memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );const bool   isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);const bool   disableSMForLFNST = tu.cs->slice->getExplicitScalingListUsed() ? tu.cs->slice->getSPS()->getDisableScalingMatrixForLfnstBlks() : false;const bool   isLfnstApplied = tu.cu->lfnstIdx > 0 && (tu.cu->isSepTree() ? true : isLuma(compID));const bool   disableSMForACT = tu.cs->slice->getSPS()->getScalingMatrixForAlternativeColourSpaceDisabledFlag() && (tu.cs->slice->getSPS()->getScalingMatrixDesignatedColourSpaceFlag() == tu.cu->colorTransform);const bool   enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied, disableSMForLFNST, disableSMForACT);const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bitsconst TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;int     iCGLastScanPos      = -1;double  d64BaseCost         = 0;int     iLastScanPos        = -1;int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;int remRegBins = (uiWidth * uiHeight * ctxBinSampleRatio) >> 4;uint32_t  goRiceParam   = 0;double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );int iScanPos;coeffGroupRDStats rdStats;#if ENABLE_TRACINGDTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
#endifconst uint32_t lfnstIdx = tu.cu->lfnstIdx;const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--){//遍历cgcctx.initSubblock( subSetId );uint32_t maxNonZeroPosInCG = iCGSizeM1;if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ){maxNonZeroPosInCG = 7;}memset( &rdStats, 0, sizeof (coeffGroupRDStats));for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- ){iScanPos = cctx.minSubPos() + iScanPosinCG;uint32_t    blkPos = cctx.blockPos( iScanPos );piDstCoeff[ blkPos ] = 0;}for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ){//遍历cg中的点,按照z行扫描顺序遍历iScanPos = cctx.minSubPos() + iScanPosinCG;//===== quantization =====第一步,预量化uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);// set coeff//defaultQuantisationCoefficient是MFconst int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;//d*MFconst int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;//lLevelDouble,应该还是d*MFconst Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));//计算出量化值uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));const double dErr         = double( lLevelDouble );pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;//计算量化成0的costd64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];//d64BlockUncodedCost表示tu内部全部量化为0的costpiDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;//把量化值放在piDstCoeff[ uiBlkPos ]中if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 ){//设置当前tu块中最后一个量化系数iLastScanPos            = iScanPos;iCGLastScanPos          = cctx.subSetId();}if ( iLastScanPos >= 0 )//说明存在非0量化值{#if ENABLE_TRACINGuint32_t uiCGPosY = cctx.cgPosX();uint32_t uiCGPosX = cctx.cgPosY();uint32_t uiPosY = cctx.posY( iScanPos );uint32_t uiPosX = cctx.posX( iScanPos );DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
#endif//===== coefficient level estimation =====第二步,确定最优量化值unsigned ctxIdSig = 0;if( iScanPos != iLastScanPos )//如果不是最后一个量化系数{ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );}uint32_t    uiLevel;uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );uint32_t    goRiceZero    = 0;if( remRegBins < 4 ){unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);}const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );if( iScanPos == iLastScanPos )//如果是最后一个量化系数{//在xGetCodedLevel中获取当前系数的最优量化值,放在uiLevel中uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange );}else//不是最后一个位置的量化系数{DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange );sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );}DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));if( uiLevel > 0 )//量化值大于0的,计算附近3个值的rate{int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;}else // uiLevel == 0{if( remRegBins < 4 ){int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange );rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;}else{rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];}}piDstCoeff[ uiBlkPos ] = uiLevel;//当前量化值为uiLeveld64BaseCost           += pdCostCoeff [ iScanPos ];//计算cost和,d64BaseCost最后是整个tu的costif( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) ){goRiceParam   = 0;}else if( remRegBins >= 4 ){int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);goRiceParam = g_auiGoRiceParsCoeff[sumAll];remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);}}else//如果还不存在非0量化值{d64BaseCost    += pdCostCoeff0[ iScanPos ];//那就加上量化为0 的cost}rdStats.d64SigCost += pdCostSig[ iScanPos ];//加上编码这个位置是否为0的costif (iScanPosinCG == 0 )//如果是cg中的左上角第一个点{rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];//编码cg中的左上角第一个点一个符号位的cost}if (piDstCoeff[ uiBlkPos ] )//如果当前系数非0{cctx.setSigGroup();//d64CodedLevelandDist就加上(编码这个量化值的cost-符号的cost),d64CodedLevelandDist为量化值的cost(不加上符号的)rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];//应该是只算error的cost和码率,不包括符号rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];//d64UncodedDist为量化为全0的cost,只包括error和码率if ( iScanPosinCG != 0 )//如果不是cg左上角第一个点,但是量化系数非0{rdStats.iNNZbeforePos0++;}}} //end for (iScanPosinCG)//当前cg内部的遍历if (iCGLastScanPos >= 0)//如果目前tu块中已存在非0的量化系数{if( cctx.subSetId() ){if( !cctx.isSigGroup() ){const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);}else{//跳过最后一个含有非0系数的cg,在下面的步骤(确定最后一个非0系数)时会处理它if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.{if ( rdStats.iNNZbeforePos0 == 0 )//说明除了左上角第一个点,其余量化值都为0{d64BaseCost -= rdStats.d64SigCost_0;//减去标识这个位置有非0系数的costrdStats.d64SigCost -= rdStats.d64SigCost_0;//rdStats.d64SigCost也减去标识这个位置有非0系数的cost}// rd-cost if SigCoeffGroupFlag = 0, initialization//如果SigCoeffGroupFlag=0,则rd-cost初始化double d64CostZeroCG = d64BaseCost; //这里获得的是目前算的tu中的costconst BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );if (cctx.subSetId() < iCGLastScanPos){d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);//1表示标识当前cg中含有非0系数的costd64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);//0表示标识当前cg为全0的costpdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);//标识当前cg中含有非0系数的cost}// try to convert the current coeff group from non-zero to all-zero//计算把非0 的全部变为0的cost//加上把非0变成0的失真d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels//减去保持非0系数的成本d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels//减去标识所有0和非0的costd64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels// if we can save cost, change this block to all-zero blockif ( d64CostZeroCG < d64BaseCost )//如果当前cg量化为0的cost小于 保持量化值不变的cost{cctx.resetSigGroup();d64BaseCost = d64CostZeroCG;if (cctx.subSetId() < iCGLastScanPos){pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);}// reset coeffs to 0 in this blockfor( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ){//遍历当前cg中的每个位置iScanPos      = cctx.minSubPos() + iScanPosinCG;uint32_t uiBlkPos = cctx.blockPos( iScanPos );if (piDstCoeff[ uiBlkPos ])//如果量化系数不为0{piDstCoeff [ uiBlkPos ] = 0;//将该处的量化系数置0pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];//cost为置0的costpdCostSig  [ iScanPos ] = 0;//标识是否含有非0系数的cost也为0}}} // end if ( d64CostAllZeros < d64BaseCost )}} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)}else{cctx.setSigGroup();}}} //end for (cctx.subSetId)//当前cg全部结束,进入下一个cg//===== estimate last position =====第4步,确定最后一个位置if ( iLastScanPos < 0 )//如果当前tu中所有cg遍历结束后,全是0,那么返回{return;}double  d64BestCost         = 0;int     iBestLastIdxP1      = 0;if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 ){const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );}else{bool previousCbf       = tu.cbf[COMPONENT_Cb];bool lastCbfIsInferred = false;if( useIntraSubPartitions ){bool rootCbfSoFar       = false;bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth());if( isLastSubPartition ){TransformUnit* tuPointer = tu.cu->firstTU;for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ){rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, tu.depth);tuPointer     = tuPointer->next;}if( !rootCbfSoFar ){lastCbfIsInferred = true;}}if( !lastCbfIsInferred ){previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);}}BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );if( !lastCbfIsInferred ){d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);}else{d64BestCost  = d64BlockUncodedCost;//d64BlockUncodedCost为量化为全0的cost}}int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };{int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);int bitsX = 0;int bitsY = 0;int ctxId;//X-coordinatefor ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++){const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];bitsX               +=         fB.intBits[ 1 ];}lastBitsX[ctxId] = bitsX;//Y-coordinatefor ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++){const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];bitsY               +=         fB.intBits[ 1 ];}lastBitsY[ctxId] = bitsY;}bool bFoundLast = false;for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--){//遍历cg(从最后一个有非0系数的cg开始)d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];//先减去当前cg标识为(存在非0系数)的costif (cctx.isSigGroup( iCGScanPos ) ){uint32_t maxNonZeroPosInCG = iCGSizeM1;if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ){maxNonZeroPosInCG = 7;}for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ){//遍历cg中的系数iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;if (iScanPos > iLastScanPos)//如果iScanPos > 最后一个非0系数的位置(也就是还没遍历到最后一个非0系数哪里,则continue){continue;}uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );if( piDstCoeff[ uiBlkPos ] )//如果当前量化值不为0,把当前系数作为最后一个量化系数{uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );//得到编码最后一个系数位置 的bits//加上编码这一个系数的bits,减去编码标识这个系数为非0系数的costdouble totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];if( totalCost < d64BestCost )//如果总的cost小于d64BestCost(d64BestCost为量化为全0的cost){iBestLastIdxP1  = iScanPos + 1;d64BestCost     = totalCost;}if( piDstCoeff[ uiBlkPos ] > 1 )//如果遇到大于1的系数,那么跳出循环{bFoundLast = true;break;}d64BaseCost      -= pdCostCoeff[ iScanPos ];//减去保持量化值不变的costd64BaseCost      += pdCostCoeff0[ iScanPos ];//加上量化为0的cost  (因为要遍历下一个点了,当前的点应该置0了)}else//如果量化值为0{d64BaseCost      -= pdCostSig[ iScanPos ];//那就减去标识这个位置为0的cost}} //end forif (bFoundLast){break;}} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );} // end forfor ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ ){//对整个tu遍历,从刚刚选出的最后一个非0系数开始//记录当前点的量化值,放在piDstCoeff中int blkPos = cctx.blockPos( scanPos );TCoeff level = piDstCoeff[ blkPos ];uiAbsSum += level;piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;}//===== clean uncoded coefficients =====清除未编码的系数for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ ){piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;}//SDH技术if( cctx.signHiding() && uiAbsSum>=2)//如果使用SDH技术,并且系数绝对值之和大于等于2{const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16/ (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))+ 0.5);int lastCG = -1;int absSum = 0 ;int n ;for (int subSet = iCGNum - 1; subSet >= 0; subSet--){int  subPos         = subSet << cctx.log2CGSize();int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;absSum = 0 ;for( n = iCGSizeM1; n >= 0; --n ){if( piDstCoeff[ cctx.blockPos( n + subPos )] ){lastNZPosInCG = n;break;}}for( n = 0; n <= iCGSizeM1; n++ ){if( piDstCoeff[ cctx.blockPos( n + subPos )] ){firstNZPosInCG = n;break;}}for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ ){absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);}if(lastNZPosInCG>=0 && lastCG==-1){lastCG = 1;}if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD ){uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);if( signbit!=(absSum&0x1) )  // hide but need tune{// calculate the costint64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();int minPos = -1, finalChange = 0, curChange = 0;for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n ){uint32_t uiBlkPos   = cctx.blockPos( n + subPos );if(piDstCoeff[ uiBlkPos ] != 0 ){int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]-   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1){costDown -= (4<<SCALE_BITS);}if(costUp<costDown){curCost = costUp;curChange =  1;}else{curChange = -1;if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1){curCost = std::numeric_limits<int64_t>::max();}else{curCost = costDown;}}}else{curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;curChange = 1 ;if(n<firstNZPosInCG){uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);if(thissignbit != signbit ){curCost = std::numeric_limits<int64_t>::max();}}}if( curCost<minCostInc){minCostInc = curCost;finalChange = curChange;minPos = uiBlkPos;}}if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum){finalChange = -1;}if(plSrcCoeff[minPos]>=0){piDstCoeff[minPos] += finalChange ;}else{piDstCoeff[minPos] -= finalChange ;}}}if(lastCG==1){lastCG=0 ;}}}
}

最后一段为SDH技术
SDH技术为:首先计算CG内所有非零系数幅值绝对值之和;然后对和值进行奇偶判断,若和值为偶数,则最后一个非零系数的符号被判为“+”,若和值为奇数,则最后一个非零系数的符号被判为“-”。使用SDH 技术,解码端直接判断CG中最后一个非零系数的符号,因此编码端可以省略它的语法元素coeff_sign_flag的嫡编码。然而,若SDH 的最终结果与CG中最后一个非零系数的真实符号不一致,需要对CG中的系数进行调整以使其保持一致,可以采用以下两种方法。
一种方法是编码过程中采用率失真优化量化 (RDOQ)的方法,即编码器允许使用SDH技术,通过调整量化系数,来使SDH判决结果与CG中最后一个非零系数的真实符号保持一致。具体哪个系数修改以及怎样修改,则根据率失真代价来决定。这种方法是基于RDOQ进行的,无须增加额外的运算量,因此编码复杂度增加不多。
对于不进行RDOQ的编码器,引入下面的方法。在一个CG中,计算原始系数值和反量化系数值之间的差值,对差值最大的量化值进行修正:若差值为正,则量化值加1,若差值为负,则量化值减1。由于差值最大的系数最接近其可行量化值,因此这种量化值的调整所产生的影响较小,且复杂度很低。
是否采用SDH技术需要显式标识,图像参数集中的语法元素sign_data_hiding_enabled_flag 置为1表示允许编码器应用SDH技术。具体使用方法规定:当编码器允许使用SDH技术且当前编码的CG中第一个非零系数和最后一个非零系数之间的间隔大于等于4时,则该CG才能省略最后一个非零系数符号的嫡编码。

【VTM10.0】量化之RDOQ技术相关推荐

  1. 【VTM10.0】帧内之PDPC技术

    PDPC (Position dependent intra prediction combination) 一种对预测值的修正的技术.部分帧内模式在进行帧内预测之后,进行PDPC的加权平均计算,得到 ...

  2. 语音识别学习笔记(二)【基于矢量量化的识别技术】

    语音识别学习笔记(二)[基于矢量量化的识别技术] 概述  量化分为标量量化和矢量量化(Vector Quantization,VQ).标量量化是将采样后的信号值逐个进行量化,而适量量化是将若干个采样信 ...

  3. 【量化】股市技术分析利器之TA-Lib(二)

    作者:未来大佬 来源: 恒生LIGHT云社区 上篇文章 [量化]股市技术分析利器之TA-Lib(一) ,主要介绍了Ta-lib的基础知识与绘制布林线,本文将着重介绍TA-Lib中强大的价格转换.周期指 ...

  4. VTM10.0代码学习7:decompressCtu()xReconIntraQT()

    此系列是为了记录自己学习VTM10.0的过程和锻炼表达能力,主要是从解码端进行入手.由于本人水平有限,出现的错误恳请大家指正,欢迎与大家一起交流进步. 之前的博文(VTM10.0代码学习3)提到两个重 ...

  5. SVAC1.0帧内预测技术分析

    ##Date:2017/10/21 ##Content:SVAC1.0帧内预测技术分析 帧内预测主要是利用视频图像的空域相关性来进行预测编码.通常I帧只采用帧内预测进行编码.利用宏块之间的相关性,对当 ...

  6. SVAC1.0帧间预测技术分析

    ###Date:2017/10/21 ###Content:SVAC1.0帧间预测技术分析 1.参考帧选择 P条带或B条带最多可有两个参考帧或四个参考场.,它们应为最临近当前解码图像的参考帧或参考场. ...

  7. VTM10.0代码学习5:coding_unit()cu_pred_data()

    此系列是为了记录自己学习VTM10.0的过程和锻炼表达能力,主要是从解码端进行入手.由于本人水平有限,出现的错误恳请大家指正,欢迎与大家一起交流进步. 上一篇博客(VTM10.0代码学习4)讲述了将语 ...

  8. VTM10.0代码学习10:EncGOP_compressGOP()

    此系列是为了记录自己学习VTM10.0的过程,目前正在看编码端.主要的参考文档有JVET-S2001-vH和JVET-S2002-v1.由于本人水平有限,出现的错误恳请大家指正,欢迎与大家一起交流进步 ...

  9. 【MySQL主从复制】使用MySQL8.0.17的clone技术在线搭建主从复制环境

    [MySQL主从复制]使用MySQL8.0.17的clone技术在线搭建主从复制环境 参考:https://www.xmmup.com/dbbao33shiyongmysql8-0-17declone ...

  10. VTM10.0代码学习3:DecSlice_decompressSlice()

    此系列是为了记录自己学习VTM10.0的过程和锻炼表达能力,主要是从解码端进行入手.由于本人水平有限,出现的错误恳请大家指正,欢迎与大家一起交流进步. 接着本系列的上一篇博客继续讲,上一篇博客的末尾讲 ...

最新文章

  1. Socket阻塞,非阻塞,同步,异步
  2. java uml图中的关系_UML类图中的关系
  3. 条款10 令operator=返回一个reference to *this
  4. solidworks热分析_solidworks解决管道流体问题,原来可以如此轻松
  5. C和C++中include 搜索路径的一般形式以及gcc搜索头文件的路径
  6. 虚拟机配置自定义静态ip,并能访问外网
  7. spring循环依赖及解决方法
  8. android log system
  9. 210221阶段三线程、信号量、互斥锁
  10. Warning:关于_CRT_SECURE_NO_WARNINGS
  11. Unity中使用RequireComponent,没有添加上组件
  12. 美版知乎:没有美国和其他国家的许可,中国怎么能建造空间站?
  13. 【经验分享】linux交叉编译 - openssl动态库
  14. Java学习系列(十九)Java面向对象之数据库编程
  15. .Net QQ互联教程 1
  16. golang中字符串的查找方法小结
  17. asp.net 调用echarts显示图表控件随浏览器自适应解决方案
  18. 【BZOJ 3308】 3308: 九月的咖啡店 (费用流|二分图最大权匹配)
  19. python十以内加减法_【小学】 生成10以内的加减法
  20. Linux学习1-Vmware创建虚拟机以及Xshell连接

热门文章

  1. NYOJ779 兰州烧饼
  2. Java实现Excel导入导出(附Demo)
  3. js打印去除页眉页脚
  4. java.lang.InstantiationException: can't instantiate class
  5. 51单片机堆栈深入剖析(转)
  6. 服务器 备案 文档,备案需要备案服务器
  7. 更改绩效管理流程的5个步骤
  8. 中国社科中外合作办学双证博士创新与领导力管理学博士
  9. pat basic 1082 射击比赛
  10. 微信授权 昵称显示微信用户、无头像