H.264视频中SPS/PPS缺失时实现补帧

问题说明

有些视频文件中, 关键帧的SPS/PPS缺失, 导致播放时解码失败.

比如某些mp4视频, 第一个关键帧有SPS/PPS, 其后所有关键帧都没有SPS/PPS, 播放该mp4文件本身是没问题的, 但是, 如果需要将该mp4文件转封装(不转码)到m3u8格式, 除了第一个ts片, 后续所有ts片的关键帧都没有SPS/PPS. 除非从头开始播放, 而且不拖动进度条. 否则, 从中途开始播放, 或拖动进度条, 将会黑屏无法播放.

为了m3u8文件可正常播放, 在转封装的过程中, 需要对关键帧补上SPS/PPS信息.
此文说明一种补SPS/PPS的方法, 以C语言实现.

如何使用

使用本文提供的函数, 可以很方便的实现补帧, 使用很简单, 如下:

   h264_codecpar_t  cp = { 0 };AVPacket pkt = { 0 };AVFormatContext * ic = NULL;...while(0 == av_read_frame(ic, &pkt)){if (pkt.stream_index == video_index ){// 是否关键帧if (pkt.flags & AV_PKT_FLAG_KEY){// 进行判断和补帧.h264_codecpar_update(&cp, &pkt, TRUE);}...}...}  h264_codecpar_free(&cp);

其中关键函数就是 h264_codecpar_update, 以下对该函数的实现进行说明.

实现说明

h264_codecpar_update 函数的实现逻辑不复杂:

判断Frame是AnnexB还是AVCC格式;
根据不同格式进行解析, 获取其中的SPS/PPS信息;
如果成功取到SPS/PPS, 则判断和之前保存的SPS/PPS是否相同,相同的则直接返回, 否则保存新的SPS/PPS信息到临时内存中, 然后返回
如果无法获取到SPS/PPS, 则将之前保存的SPS/PPS信息复制到本Frame的开头位置, 然后返回

实现的代码:


// 定义结构体:typedef struct h264_codecpar_t h264_codecpar_t;
struct h264_codecpar_t
{uint32_t profile_idc;uint32_t level_idc;uint32_t width;uint32_t height;int fps;int deinterlace;uint8_t* sps_ptr;uint8_t* pps_ptr;uint32_t sps_size;uint32_t pps_size;// 临时分配的, 用于保持extradata的内存区.char* extradata_buff;int extradata_size;
};// 主要函数的实现
int  h264_codecpar_update(h264_codecpar_t* codecpar, AVPacket* pkt, int copy)
{// 如果codecpar有变化, 则返回TRUE, 否则返回FALSE.// copy 标识是否要补帧h264_codecpar_t  tmp = { 0 };uint8_t* data = pkt->data;int changed = FALSE;int ret = -1, avcc = FALSE;// 必须是I帧.assert(pkt->flags & AV_PKT_FLAG_KEY);if (data[0] == 0 && data[1] == 0 &&((data[2] == 0 && data[3] == 1) || (data[2] == 1))){// AnnexB: start with 00 00 00 01 or 00 00 01ret = parseAnnexNalu(&tmp, data, pkt->size);}else{// AVCC, AVC1avcc = TRUE;ret = parseAvccNalu(&tmp, data, pkt->size);}if (0 == ret){// 成功取到SPS/PPS, 则判断信息是否改变.if (tmp.width != codecpar->width ||tmp.height != codecpar->height ||tmp.profile_idc != codecpar->profile_idc ||tmp.level_idc != codecpar->level_idc){// 如果原先不是0, 则表示有变化.if (codecpar->width > 0){changed = TRUE;}if (copy){// 保存SPS/PPS信息.h264_copy_codecpar(codecpar, &tmp, avcc);}}}else if (codecpar->extradata_buff && copy){// 如果没有I帧, 则需要补帧.// 前提是原先有发现I帧.int size = pkt->size + codecpar->extradata_size;data = (uint8_t*)av_malloc((size_t)size);if (data){// 复制内容: SPS/PPS信息memcpy(data, codecpar->extradata_buff, (size_t)codecpar->extradata_size);// 复制内容: 视频包数据.memcpy(data + codecpar->extradata_size, pkt->data, (size_t)pkt->size);// 释放原先的Buffav_buffer_unref(&pkt->buf);// 将临时分配的内存放入Buff, 此处不要释放data.av_packet_from_data(pkt, data, size);}}return changed;
}

其中 h264_copy_codecpar 函数的实现:


static void h264_copy_codecpar(h264_codecpar_t* codecpar, h264_codecpar_t* info, int avcc)
{// 将SPS/PPS信息保存到临时缓存中.size_t size = 0;char* data;// SPS数据长度:  4字节长度信息+SPS数据size += 4 + info->sps_size;if (info->pps_ptr){// 如果有PPS, 则再加上PPS头和数据长度.size += 4 + info->pps_size;}// 分配内存, 保存SPS信息.data = malloc(size);assert(data);// 如果原先有SPS, 则释放.if (codecpar->extradata_buff) free(codecpar->extradata_buff);// 保存SPS数据.codecpar->extradata_buff = data;codecpar->extradata_size = (int)size;if (avcc){// 如果是AVCC格式, 则前4字节是长度*(uint32_t*)data = htonl(info->sps_size);}else{// 如果是 AnnexB 格式, 则前4字节是StartCode, 即可: 0x00 0x00 0x00 0x01, 此处需要转换为网络字节顺序.*(uint32_t*)data = 0x01000000;}// 跳过前4字节(保存长度或StartCode)data += 4;// 复制SPS内存memcpy(data, info->sps_ptr, (size_t)info->sps_size);data += info->sps_size;if (info->pps_ptr){// 如果还有PPS, 则复制PPS信息.if (avcc){*(uint32_t*)data = htonl(info->pps_size);}else{*(uint32_t*)data = 0x01000000;}data += 4;memcpy(data, info->pps_ptr, (size_t)info->pps_size);}// 记录其他参数, 用于判断SPS/PPS是否改变.codecpar->width = info->width;codecpar->height = info->height;codecpar->profile_idc = info->profile_idc;codecpar->level_idc = info->level_idc;codecpar->fps = info->fps;codecpar->deinterlace = info->deinterlace;
}

以上除了SPS/PPS的解析外, 完成的判断和补帧逻辑.

SPS/PPS的解析不进行具体分析, 直接上代码:


enum
{FRAME_UNDEFINED = 0,FRAME_SLICE_NON_IDR = 1,FRAME_SLICE_DATA1 = 2,FRAME_SLICE_DATA2 = 3,FRAME_SLICE_DATA3 = 4,FRAME_IDR = 5,FRAME_SEI = 6,FRAME_SPS = 7,FRAME_PPS = 8,FRAME_AUD = 9, // AccessUnitDelimiterFRAME_ENDSEQ = 10,   // EndOfSequenceFRAME_ENDSTREAM = 11,FRAME_FILLERDATA = 12,
};static uint32_t get_uint32(const uint8_t* p)
{return (uint32_t)(p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]);
}//static uint32_t get_uint16(const uint8_t* p)
//{//  return (uint32_t)(p[0] << 8 | p[1]);
//}static uint32_t Ue(uint8_t* pBuff, uint32_t nLen, uint32_t* nStartBit)
{//计算0bit的个数uint32_t nZeroNum = 0;while (*nStartBit < nLen * 8){//&:按位与，%取余if (pBuff[*nStartBit / 8] & (0x80 >> (*nStartBit % 8))){break;}nZeroNum++;(*nStartBit)++;}(*nStartBit)++;//计算结果uint32_t dwRet = 0;int i = 0;for (i = 0; i < nZeroNum; i++){dwRet <<= 1;if (pBuff[(*nStartBit) / 8] & (0x80 >> ((*nStartBit) % 8))){dwRet += 1;}(*nStartBit)++;}return (uint32_t)((uint32_t)(1 << nZeroNum) - 1 + dwRet);
}static int Se(uint8_t* pBuff, uint32_t nLen, uint32_t* nStartBit)
{int nUeVal = (int)Ue(pBuff, nLen, nStartBit);//double k = nUeVal;//ceil函数：ceil函数的作用是求不小于给定实数的最小整数。ceil(2)=ceil(1.2)=cei(1.5)=2.00int nValue = (int)ceil((double)nUeVal / 2.f);if (nUeVal % 2 == 0)nValue = -nValue;return nValue;
}// u Just returns the BitCount bits of buf and change it to decimal.
// e.g. BitCount = 4, buf = 01011100, then return 5(0101)
static uint32_t u(uint32_t nBitCount, uint8_t* buf, uint32_t* nStartBit)
{uint32_t dwRet = 0;int i = 0;for (i = 0; i < nBitCount; i++){dwRet <<= 1;if (buf[*nStartBit / 8] & (0x80 >> (*nStartBit % 8))){dwRet += 1;}(*nStartBit)++;}return dwRet;
}// w h profile_idc level_idc
static int get_resolution(mdf_h264_codecpar_t* info, uint8_t* pspsData, uint32_t nspsDataLen)//, int* nWidth, int* nHeight, int* profile, int* level, int* nDeinterlace)
{//uint8_t ucLastNalType = pspsData[0];//Analyze SPS to find width and heightuint32_t   nStartBit = 0;uint8_t* pBuf = pspsData;uint32_t nDataLeft = nspsDataLen;//int forbidden_zero_bit = u(1, pBuf, &nStartBit);//int nal_ref_idc = u(2, pBuf, &nStartBit);uint32_t nal_unit_type = u(5, pBuf, &nStartBit);//printf("get_resolution  forbidden_zero_bit=%d, nal_ref_idc=%d, nal_unit_type=%d ",forbidden_zero_bit, nal_ref_idc, nal_unit_type);if (nal_unit_type == FRAME_SPS){uint32_t profile_idc = u(8, pBuf, &nStartBit);//int constraint_set0_flag = u(1, pBuf, &nStartBit);//(buf[1] & 0x80)>>7;//int constraint_set1_flag = u(1, pBuf, &nStartBit);//(buf[1] & 0x40)>>6;//int constraint_set2_flag = u(1, pBuf, &nStartBit);//(buf[1] & 0x20)>>5;//int constraint_set3_flag = u(1, pBuf, &nStartBit);//(buf[1] & 0x10)>>4;//int reserved_zero_4bits = u(4, pBuf, &nStartBit);uint32_t level_idc = u(8, pBuf, &nStartBit);//int seq_parameter_set_id = Ue(pBuf, nDataLeft, &nStartBit);info->profile_idc = profile_idc;info->level_idc = level_idc;uint32_t chroma_format_idc = 0;if (profile_idc == 100 ||  // High profileprofile_idc == 110 ||  // High10 profileprofile_idc == 122 ||  // High422 profileprofile_idc == 244 ||  // High444 Predictive profileprofile_idc == 44 ||  // Cavlc444 profileprofile_idc == 83 ||  // Scalable Constrained High profile (SVC)profile_idc == 86 ||  // Scalable High Intra profile (SVC)profile_idc == 118 ||  // Stereo High profile (MVC)profile_idc == 128 ||  // Multiview High profile (MVC)profile_idc == 138 ||  // Multiview Depth High profile (MVCD)profile_idc == 144)    // old High444 profile{chroma_format_idc = Ue(pBuf, nDataLeft, &nStartBit);if (chroma_format_idc == 3){//int residual_colour_transform_flag = u(1, pBuf, &nStartBit);}//int bit_depth_luma_minus8 = Ue(pBuf, nDataLeft, &nStartBit);//int bit_depth_chroma_minus8 = Ue(pBuf, nDataLeft, &nStartBit);//int qpprime_y_zero_transform_bypass_flag = u(1, pBuf, &nStartBit);uint32_t seq_scaling_matrix_present_flag = u(1, pBuf, &nStartBit);//uint32_t seq_scaling_list_present_flag[8];if (seq_scaling_matrix_present_flag){int i = 0;for (i = 0; i < 8; i++){//    seq_scaling_list_present_flag[i] = u(1, pBuf, &nStartBit);}}}else{chroma_format_idc = 1;}//int log2_max_frame_num_minus4 = Ue(pBuf, nDataLeft, &nStartBit);uint32_t pic_order_cnt_type = Ue(pBuf, nDataLeft, &nStartBit);if (pic_order_cnt_type == 0){//int log2_max_pic_order_cnt_lsb_minus4 = Ue(pBuf, nDataLeft, &nStartBit);}else if (pic_order_cnt_type == 1){//int delta_pic_order_always_zero_flag = u(1, pBuf, &nStartBit);//int offset_for_non_ref_pic = Se(pBuf, nDataLeft, &nStartBit);//int offset_for_top_to_bottom_field = Se(pBuf, nDataLeft, &nStartBit);uint32_t num_ref_frames_in_pic_order_cnt_cycle = Ue(pBuf, nDataLeft, &nStartBit);int* offset_for_ref_frame = (int*)malloc(num_ref_frames_in_pic_order_cnt_cycle * sizeof(int));int i = 0;for (i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++)offset_for_ref_frame[i] = Se(pBuf, nDataLeft, &nStartBit);free(offset_for_ref_frame);}//int num_ref_frames = Ue(pBuf, nDataLeft, &nStartBit);//int gaps_in_frame_num_value_allowed_flag = u(1, pBuf, &nStartBit);uint32_t pic_width_in_mbs_minus1 = Ue(pBuf, nDataLeft, &nStartBit);uint32_t pic_height_in_map_units_minus1 = Ue(pBuf, nDataLeft, &nStartBit);uint32_t frame_mbs_only_flag = u(1, pBuf, &nStartBit);info->deinterlace = 0;if (0 == frame_mbs_only_flag){info->deinterlace = 1;}if (!frame_mbs_only_flag){//int mb_adaptive_frame_field_flag = u(1, pBuf, &nStartBit);}//int direct_8x8_inference_flag = u(1, pBuf, &nStartBit);uint32_t frame_cropping_flag = u(1, pBuf, &nStartBit);uint32_t frame_crop_left_offset = 0;uint32_t frame_crop_right_offset = 0;uint32_t frame_crop_top_offset = 0;uint32_t frame_crop_bottom_offset = 0;if (frame_cropping_flag){frame_crop_left_offset = Ue(pBuf, nDataLeft, &nStartBit);frame_crop_right_offset = Ue(pBuf, nDataLeft, &nStartBit);frame_crop_top_offset = Ue(pBuf, nDataLeft, &nStartBit);frame_crop_bottom_offset = Ue(pBuf, nDataLeft, &nStartBit);}info->width = (pic_width_in_mbs_minus1 + 1) * 16;//*nHeight = (pic_height_in_map_units_minus1 + 1) * 16;info->height = (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1) * 16;if (frame_cropping_flag){uint32_t crop_unit_x;uint32_t crop_unit_y;if (0 == chroma_format_idc) // monochrome{crop_unit_x = 1;crop_unit_y = 2 - frame_mbs_only_flag;}else if (1 == chroma_format_idc) // 4:2:0{crop_unit_x = 2;crop_unit_y = 2 * (2 - frame_mbs_only_flag);}else if (2 == chroma_format_idc) // 4:2:2{crop_unit_x = 2;crop_unit_y = 2 - frame_mbs_only_flag;}else // 3 == sps.chroma_format_idc // 4:4:4{crop_unit_x = 1;crop_unit_y = 2 - frame_mbs_only_flag;}info->width -= crop_unit_x * (frame_crop_left_offset + frame_crop_right_offset);info->height -= crop_unit_y * (frame_crop_top_offset + frame_crop_bottom_offset);}//printf( "get_resolution:: Find SPS frame, Invalid nal unit type, nDataLen(%d), nal_unit_type(%d)", nspsDataLen, nal_unit_type);return 0;}return -1;
}static int parseAvccNalu(mdf_h264_codecpar_t* info, uint8_t* buf, int size)
{//find sps ppsint64_t index = 0;int ret = -1;while (index < size){index += 4;if (index >= size){// parse error, no nal len;break;}uint32_t len = get_uint32(&buf[index - 4]);if (index + len > size){break;}int nal_type = buf[index] & 0x1f;if (nal_type == FRAME_SPS){// get sps// parse spsret = get_resolution(info, &buf[index], len);info->sps_ptr = buf + index;info->sps_size = len;if (info->pps_ptr) break;}else if (nal_type == FRAME_PPS){// get ppsinfo->pps_ptr = buf + index;info->pps_size = len;if (info->sps_ptr) break;}index += len;}return ret;
}static int searchStartCode(uint8_t* data, int size, uint8_t** front, uint8_t** latter)
{uint8_t* p, * end;end = data + size;for (p = data; p < end; ++p){if (p[0] == 0 &&p + 1 < end && p[1] == 0 &&p + 2 < end && p[2] == 0 &&p + 3 < end && p[3] == 1){//find *front = p;*latter = p + 4;return TRUE;}else if (p[0] == 0 &&p + 1 < end && p[1] == 0 &&p + 2 < end && p[2] == 1){//find *front = p;*latter = p + 3;return TRUE;}}return FALSE;
}int parseAnnexNalu(mdf_h264_codecpar_t* info, uint8_t* buf, int size)
{uint8_t* p, * end;int len;uint8_t* last_end, * start = 0, * last_start;int ret = -1;len = size;p = buf;end = buf + size;last_start = p;while (searchStartCode(p, len, &last_end, &start)){if (last_end > last_start && last_end < end){//nal unit typeif ((*last_start & 0x1f) == FRAME_SPS){//sps ret = get_resolution(info, last_start, (uint32_t)(last_end - last_start));info->sps_ptr = last_start;info->sps_size = (uint32_t)(last_end - last_start);if (info->pps_ptr) break;}else if ((*last_start & 0x1f) == FRAME_PPS){info->pps_ptr = last_start;info->pps_size = (uint32_t)(last_end - last_start);if (info->sps_ptr)break;}}p = start;len = (int)(end - p);last_start = start;}if ((*start & 0x1f) == FRAME_SPS){ret = get_resolution(info, start, (uint32_t)(end - start));info->sps_ptr = start;info->sps_size = (uint32_t)(end - start);}else if ((*start & 0x1f) == FRAME_PPS){//parserPps(start, end - start);info->pps_ptr = start;info->pps_size = (uint32_t)(end - start);}return ret;
}

AVCC格式说明

AVCC格式也叫AVC1格式，MPEG-4格式，字节对齐，因此也叫Byte-Stream Format。用于mp4/flv/mkv, VideoToolbox。

例如:

01 42 C0 28 ff e1 00 18 67 64 00 29 ac b4 02 80 2d d0 80 00 00 03 00 80 0f 42 40 07 8c 19 50 01 00 04 68 ef 3c b0 fd f8 f8 00 00 00 00

前4个字节：

0x01: version
　　0x42: avc profile （首个SPS的第1个字节）
　　0xc0: avc compatibility (首个SPS的第2个字节)
0x28: avc level （首个SPS的第3个字节，可以发现后面0x0989位置的3个字，和这3个是一样的）

第5个字节：
　　0xff：
　　　　6_bit: 默认111111 1100 0000
　　 2_bit: 编码数据长度所需字节数

第6个字节：
　　0xe1: [111 00001]
　　　　3_bit: 默认 111
　　　　5_bit: 接下来的sps或pps的个数:：这里为1
　　　　
第7 8个字节：
　　0x00 0x18: 表示接下来sps或者pps的长度为24

第9个字节：
　　0x67: [0110 0111] nalu_type为7，表示SPS，就是说从0988到099f这24个数据为sps

第33个字节：9 (sps_pos) + 24（sps_size）
　　0x01: 接下来的sps或pps的个数:：这里为1

第34 35字节：
　　0x00 0x04: 表示接下来sps或者pps的长度为4

第36个字节：
　　0x68: [0110 1000] nalu_type为8，表示PPS