ffmpeg 音频重采样

什么是重采样？

所谓的重采样，就是改变⾳频的采样率、sampleformat、声道数等参数，使之按照我们期望的参数输出。

为什么要重采样?

为什么要重采样？当然是原有的⾳频参数不满⾜我们的需求，⽐如在FFmpeg解码⾳频的时候，不同的⾳源有不同的格式，采样率等，在解码后的数据中的这些参数也会不⼀致(最新FFmpeg解码⾳频后，⾳频格式为AV_SAMPLE_FMT_FLTP，这个参数应该是⼀致的)，如果我们接下来需要使⽤解码后的⾳频数据做其他操作，⽽这些参数的不⼀致导致会有很多额外⼯作，此时直接对其进⾏重采样，获取我们制定的⾳频参数，这样就会⽅便很多。再⽐如在将⾳频进⾏SDL播放时候，因为当前的SDL2.0不⽀持planar格式，也不⽀持浮点型的，⽽最新的FFMPEG16年会将⾳频解码为AV_SAMPLE_FMT_FLTP格式，因此此时就需要我们对其重采样，使之可以在SDL2.0上进⾏播放。

怎样重采样？

利用ffmpeg内部的重采样器进行重采样

注意事项

1.关于音频质量损失问题

我们在进行如44.1khz采样率向48khz采样率的音频进行转换时因为我们要求重采样后音频的时长不能发生变化而在时长相同时 48khz采样率肯定比44.1khz采样率的采样点要多的多所以必然会带来精度的损失但是44.1khz转换为48khz 就算损失了人耳还是听不出来的

2.ffmpeg中内部支持互相转换的采样格式和声道分布

enum AVSampleFormat {AV_SAMPLE_FMT_NONE = -1,AV_SAMPLE_FMT_U8,          ///< unsigned 8 bitsAV_SAMPLE_FMT_S16,         ///< signed 16 bitsAV_SAMPLE_FMT_S32,         ///< signed 32 bitsAV_SAMPLE_FMT_FLT,         ///< floatAV_SAMPLE_FMT_DBL,         ///< doubleAV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planarAV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planarAV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planarAV_SAMPLE_FMT_FLTP,        ///< float, planarAV_SAMPLE_FMT_DBLP,        ///< double, planarAV_SAMPLE_FMT_S64,         ///< signed 64 bitsAV_SAMPLE_FMT_S64P,        ///< signed 64 bits, planarAV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

#define AV_CH_LAYOUT_MONO              (AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_STEREO            (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
#define AV_CH_LAYOUT_2POINT1           (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_1               (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_SURROUND          (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER)
#define AV_CH_LAYOUT_3POINT1           (AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_4POINT0           (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_4POINT1           (AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_2               (AV_CH_LAYOUT_STEREO|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_QUAD              (AV_CH_LAYOUT_STEREO|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT0           (AV_CH_LAYOUT_SURROUND|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)
#define AV_CH_LAYOUT_5POINT1           (AV_CH_LAYOUT_5POINT0|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_5POINT0_BACK      (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_5POINT1_BACK      (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_6POINT0           (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT0_FRONT     (AV_CH_LAYOUT_2_2|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_HEXAGONAL         (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1           (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_BACK      (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER)
#define AV_CH_LAYOUT_6POINT1_FRONT     (AV_CH_LAYOUT_6POINT0_FRONT|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_7POINT0           (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT0_FRONT     (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1           (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_7POINT1_WIDE      (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
#define AV_CH_LAYOUT_OCTAGONAL         (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
#define AV_CH_LAYOUT_HEXADECAGONAL     (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)
#define AV_CH_LAYOUT_STEREO_DOWNMIX    (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)

3.存储格式的不同交错模式和平面模式

以双声道为例，带P（plane）的数据格式在存储时，其左声道和右声道的数据是分开存储的，左声道的数据存储在data[0]，右声道的数据存储在data[1]，每个声道的所占⽤的字节数为linesize[0]和linesize[1]；不带P（packed）的⾳频数据在存储时，是按照LRLRLR...的格式交替存储在data[0]中，linesize[0]表示总的数据量。

4.linesize和nb_samples

linesize表示一个平面内总比特数(经过内存对齐) nb_samples表示一帧音频帧的采样点数

5.swr_get_delay

重采样器上下文中会缓冲一些还没有重采样的采样点通过这个函数拿到

关于重采样的代码具体实现 ffmpeg官方例子加上中文注释

extern "C"
{
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
}
static int get_format_from_sample_fmt(const char** fmt,enum AVSampleFormat sample_fmt)
{int i;struct sample_fmt_entry {enum AVSampleFormat sample_fmt; const char* fmt_be, * fmt_le;} sample_fmt_entries[] = {{ AV_SAMPLE_FMT_U8,  "u8",    "u8"    },{ AV_SAMPLE_FMT_S16, "s16be", "s16le" },{ AV_SAMPLE_FMT_S32, "s32be", "s32le" },{ AV_SAMPLE_FMT_FLT, "f32be", "f32le" },{ AV_SAMPLE_FMT_DBL, "f64be", "f64le" },};*fmt = NULL;for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {struct sample_fmt_entry* entry = &sample_fmt_entries[i];if (sample_fmt == entry->sample_fmt) {*fmt = AV_NE(entry->fmt_be, entry->fmt_le);return 0;}}fprintf(stderr,"Sample format %s not supported as output format\n",av_get_sample_fmt_name(sample_fmt));return AVERROR(EINVAL);
}/*** Fill dst buffer with nb_samples, generated starting from t. 交错模式的*/
static void fill_samples(double* dst, int nb_samples, int nb_channels, int sample_rate, double* t)
{int i, j;double tincr = 1.0 / sample_rate, * dstp = dst;const double c = 2 * M_PI * 440.0;/* generate sin tone with 440Hz frequency and duplicated channels */for (i = 0; i < nb_samples; i++) {*dstp = sin(c * *t);for (j = 1; j < nb_channels; j++)dstp[j] = dstp[0];dstp += nb_channels;*t += tincr;}
}int main(int argc, char** argv)
{// 输入参数int64_t src_ch_layout = AV_CH_LAYOUT_STEREO;int src_rate = 48000;enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL;int src_nb_channels = 0;uint8_t** src_data = NULL;  // 二级指针int src_linesize;int src_nb_samples = 1024;// 输出参数int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;int dst_rate = 44100;enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;int dst_nb_channels = 0;uint8_t** dst_data = NULL;  //二级指针//为什么我们这里是二级指针呢 因为存储格式的不同可能导致存储有多个平面 所以我们需要到dst_data[0]和dst_data[1]拿数据int dst_linesize;int dst_nb_samples;int max_dst_nb_samples;// 输出文件const char* dst_filename = NULL;    // 保存输出的pcm到本地，然后播放验证FILE* dst_file;int dst_bufsize;const char* fmt;// 重采样实例struct SwrContext* swr_ctx;double t;int ret;//这里随便填 我填的是我本地的一个文件dst_filename = "c://out.pcm";dst_file = fopen(dst_filename, "wb");if (!dst_file) {fprintf(stderr, "Could not open destination file %s\n", dst_filename);exit(1);}// 创建重采样器/* create resampler context */swr_ctx = swr_alloc();if (!swr_ctx) {fprintf(stderr, "Could not allocate resampler context\n");ret = AVERROR(ENOMEM);goto end;}// 设置重采样参数/* set options */// 输入参数av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);//源通道数    av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);//源采样率av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);//源采样格式// 输出参数av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);// 初始化重采样/* initialize the resampling context */if ((ret = swr_init(swr_ctx)) < 0) {fprintf(stderr, "Failed to initialize the resampling context\n");goto end;}/* allocate source and destination samples buffers */// 计算出输入源的通道数量src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);// 给输入源分配内存空间 为什么会是三级指针呢 因为需要给src[0] src[1]分配内存ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,src_nb_samples, src_sample_fmt, 0);if (ret < 0) {fprintf(stderr, "Could not allocate source samples\n");goto end;}/* compute the number of converted samples: buffering is avoided* ensuring that the output buffer will contain at least all the* converted input samples */// 计算一帧音频输出采样数量max_dst_nb_samples = dst_nb_samples =av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);/* buffer is going to be directly written to a rawaudio file, no alignment */dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);// 分配输出缓存内存ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,dst_nb_samples, dst_sample_fmt, 0);if (ret < 0) {fprintf(stderr, "Could not allocate destination samples\n");goto end;}t = 0;do {/* generate synthetic audio */// 生成输入源 这里是自定义的输入源 一个正弦波 你不用管 本来这里也可以用你读出来的AVFrame里的数据来填充fill_samples((double*)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);/* compute destination number of samples */int64_t delay = swr_get_delay(swr_ctx, src_rate);//拿到swr_ctx中缓存的采样点数量dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);//重新计算输出音频帧一帧采样点数量if (dst_nb_samples > max_dst_nb_samples) {//如果大于 释放内存后重新分配内存av_freep(&dst_data[0]);ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,dst_nb_samples, dst_sample_fmt, 1);if (ret < 0)break;max_dst_nb_samples = dst_nb_samples;}ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);//真正的转换函数if (ret < 0) {fprintf(stderr, "Error while converting\n");goto end;}//拿到输出字节数dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,ret, dst_sample_fmt, 1);if (dst_bufsize < 0) {fprintf(stderr, "Could not get sample buffer size\n");goto end;}printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);fwrite(dst_data[0], 1, dst_bufsize, dst_file);} while (t < 10);//拿到swr中剩下的采样点 上文也提及到了 可能swr内部会有缓存的ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0);if (ret < 0) {fprintf(stderr, "Error while converting\n");goto end;}dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,ret, dst_sample_fmt, 1);if (dst_bufsize < 0) {fprintf(stderr, "Could not get sample buffer size\n");goto end;}printf("flush in:%d out:%d\n", 0, ret);fwrite(dst_data[0], 1, dst_bufsize, dst_file);if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)goto end;end:fclose(dst_file);if (src_data)av_freep(&src_data[0]);av_freep(&src_data);if (dst_data)av_freep(&dst_data[0]);av_freep(&dst_data);swr_free(&swr_ctx);return ret < 0;
}

ffmpeg 音频重采样相关推荐

【Android FFMPEG 开发】FFMPEG 音频重采样 ( 初始化音频重采样上下文 SwrContext | 计算音频延迟 | 计算输出样本个数 | 音频重采样 swr_convert )
文章目录 I . FFMPEG 播放视频流程 II . FFMPEG 音频重采样流程 III . FFMPEG 音频重采样 IV . FFMPEG 初始化音频重采样上下文 SwrContext V . ...
ffmpeg音频重采样
音频重采样重采样就是改变音频的采样率.sample format(采样格式).声道数(channel)等参数为什么需要重采样? 1.音频重采样是为了将音频统一格式比如直播: 麦克风采集 44.1 ...
音视频从入门到精通——FFmpeg之swr_convert音频重采样函数分析
文章目录音频重采样 swr_alloc函数 swr_alloc_set_opts函数 swr_init函数 swr_convert函数音频基础音频开发主要应用有音频开发具体内容有音频应用的难 ...
FFmpeg系列（五）—— 音频重采样
文章目录为什么要重采样完整代码运行结果为什么要重采样从设备采集的音频数据与编码器要求的数据不一致扬声器要求的音频数据与要播放的音频数据不一致更方便运算(回音消除须使用单声道,需要先转换) ...
音频重采样ffmpeg(九)
前言广义的音频重采样包括: 1.采样格式转化:比如采样格式从16位整形变为浮点型 2.采样率的转换:降采样和升采样,比如44100采样率降为2000采样率 3.存放方式转化:音频数据从packet方 ...
【ffmpeg】音频重采样
[ffmpeg]音频重采样前言什么是音频重采样实现音频重采样创建重采样上下文初始化重采样进行重采样 ffplay播放参考资料个人简介
音频处理基本概念及音频重采样
音频处理基本概念及音频重采样目录重点问题重采样什么是重采样为什么要重采样可调节的参数对应参数解析采样率采样格式及量化精度(位宽) 分⽚(plane)和打包(packed) 声道分布( ...
swr_convert音频重采样介绍
在做音频处理的时候,我们有时候需要调整音频流的采样率或者采样格式,可能是喇叭不支持 48000 采样率,所以需要降低到 44100 采样了.也可能因为各种业务原因,需要调整采样率,采样格式,或者 ...
关于PCM音频重采样思路及注意事项(频率变换和通道数变换(单通道转双通道))
最近在做一个语音广播的项目,实现语音广播的过程主要是:音频采集.音频编码.音频发送.音频解码和音频播放,这个过程在这里不展开说明,本文主要讲述其中的音频重采样和音频降噪的问题和记录如何解决的思路.本文 ...

ffmpeg 音频重采样

ffmpeg 音频重采样相关推荐

最新文章

热门文章