unimrcp本身提供了一个简单的VAD算法模块,这里并不讨论这个算法的好坏,只记录一下它的用法流程。

算法实现代码:ibs/mpf/src/mpf_activity_detector.c

使用时,调用mpf_activity_detector_create()创建,以demo_recog_engine为例:

    /* create demo recog channel */demo_recog_channel_t *recog_channel = apr_palloc(pool,sizeof(demo_recog_channel_t));recog_channel->demo_engine = engine->obj;recog_channel->recog_request = NULL;recog_channel->stop_response = NULL;recog_channel->detector = mpf_activity_detector_create(pool);recog_channel->audio_out = NULL;

mpf_activity_detector_create的实现代码:

/** Create activity detector */
MPF_DECLARE(mpf_activity_detector_t*) mpf_activity_detector_create(apr_pool_t *pool)
{mpf_activity_detector_t *detector = apr_palloc(pool,sizeof(mpf_activity_detector_t));detector->level_threshold = 2; /* 0 .. 255 */detector->speech_timeout = 300; /* 0.3 s */detector->silence_timeout = 300; /* 0.3 s */detector->noinput_timeout = 5000; /* 5 s */detector->duration = 0;detector->state = DETECTOR_STATE_INACTIVITY;return detector;
}

就是初始化detector的参数,包括能量阈值、状态转换的时间阈值。

这些值,都提供了设置的接口:

/** Create activity detector */
MPF_DECLARE(mpf_activity_detector_t*) mpf_activity_detector_create(apr_pool_t *pool);/** Reset activity detector */
MPF_DECLARE(void) mpf_activity_detector_reset(mpf_activity_detector_t *detector);/** Set threshold of voice activity (silence) level */
MPF_DECLARE(void) mpf_activity_detector_level_set(mpf_activity_detector_t *detector, apr_size_t level_threshold);/** Set noinput timeout */
MPF_DECLARE(void) mpf_activity_detector_noinput_timeout_set(mpf_activity_detector_t *detector, apr_size_t noinput_timeout);/** Set timeout required to trigger speech (transition from inactive to active state) */
MPF_DECLARE(void) mpf_activity_detector_speech_timeout_set(mpf_activity_detector_t *detector, apr_size_t speech_timeout);/** Set timeout required to trigger silence (transition from active to inactive state) */
MPF_DECLARE(void) mpf_activity_detector_silence_timeout_set(mpf_activity_detector_t *detector, apr_size_t silence_timeout);

如果使用1.7里缺省的level_threshold 2,我的实测结果是收不到打断事件。

收到媒体包时,调用mpf_activity_detector_process处理,里面维护了一个状态机:

if(recog_channel->recog_request) {mpf_detector_event_e det_event = mpf_activity_detector_process(recog_channel->detector,frame);switch(det_event) {case MPF_DETECTOR_EVENT_ACTIVITY:apt_log(RECOG_LOG_MARK,APT_PRIO_INFO,"Detected Voice Activity " APT_SIDRES_FMT,MRCP_MESSAGE_SIDRES(recog_channel->recog_request));demo_recog_start_of_input(recog_channel);break;case MPF_DETECTOR_EVENT_INACTIVITY:apt_log(RECOG_LOG_MARK,APT_PRIO_INFO,"Detected Voice Inactivity " APT_SIDRES_FMT,MRCP_MESSAGE_SIDRES(recog_channel->recog_request));demo_recog_recognition_complete(recog_channel,RECOGNIZER_COMPLETION_CAUSE_SUCCESS);break;case MPF_DETECTOR_EVENT_NOINPUT:apt_log(RECOG_LOG_MARK,APT_PRIO_INFO,"Detected Noinput " APT_SIDRES_FMT,MRCP_MESSAGE_SIDRES(recog_channel->recog_request));if(recog_channel->timers_started == TRUE) {demo_recog_recognition_complete(recog_channel,RECOGNIZER_COMPLETION_CAUSE_NO_INPUT_TIMEOUT);}break;default:break;
/** Process current frame */
MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
{mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;apr_size_t level = 0;if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {/* first, calculate current activity level of processed frame */level = mpf_activity_detector_level_calculate(frame);
#if 0apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector [%"APR_SIZE_T_FMT"]",level);
#endif}if(detector->state == DETECTOR_STATE_INACTIVITY) {if(level >= detector->level_threshold) {/* start to detect activity */mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);}else {detector->duration += CODEC_FRAME_TIME_BASE;if(detector->duration >= detector->noinput_timeout) {/* detected noinput */det_event = MPF_DETECTOR_EVENT_NOINPUT;}}}else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {if(level >= detector->level_threshold) {detector->duration += CODEC_FRAME_TIME_BASE;if(detector->duration >= detector->speech_timeout) {/* finally detected activity */det_event = MPF_DETECTOR_EVENT_ACTIVITY;mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);}}else {/* fallback to inactivity */mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);}}else if(detector->state == DETECTOR_STATE_ACTIVITY) {if(level >= detector->level_threshold) {detector->duration += CODEC_FRAME_TIME_BASE;}else {/* start to detect inactivity */mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);}}else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {if(level >= detector->level_threshold) {/* fallback to activity */mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);}else {detector->duration += CODEC_FRAME_TIME_BASE;if(detector->duration >= detector->silence_timeout) {/* detected inactivity */det_event = MPF_DETECTOR_EVENT_INACTIVITY;mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);}}}return det_event;
}

具体算法调用就是mpf_activity_detector_level_calculate()

static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
{apr_size_t sum = 0;apr_size_t count = frame->codec_frame.size/2;const apr_int16_t *cur = frame->codec_frame.buffer;const apr_int16_t *end = cur + count;for(; cur < end; cur++) {if(*cur < 0) {sum -= *cur;}else {sum += *cur;}}return sum / count;
}

对于这个算法,不需要太较真,累加求其平均值,如果大于阈值,表示有声音,如果不大于,表示静音。并没有噪音检测。如果生产需要,肯定是需要修改的。

unimrcp的voice activity dector相关推荐

  1. 建建自学VoIP之VAD(Voice Activity Detector)和CNG(Comfort Noice Generator)

    语音活动检测(Voice Activity Detection,VAD)又称语音端点检测.语音边界检测.目的是从声音信号流里识别和消除长时间的静音期,以达到在不降低业务质量的情况下节省网络资源的作用, ...

  2. 语音端点检测(Voice Activity Detection,VAD)

    1.VAD的总体步骤:https://www.bbsmax.com/A/1O5EOo73z7/ 2.基于短时能量和过零率的简单实现(实际上精确度高的VAD会提取4种或更多的特征进行判断,这里只介绍两种 ...

  3. unimrcp 实现阿里云的plugin

    1. 环境说明 unimrcp版本V1.7.0 阿里SDK版本:V3.X 操作系统:CentOS7 GCC版本:V4.8.5,注意,使用高版本的GCC可能会有SDK编译兼容问题 2. 修改config ...

  4. activity 生命周期_Activity 源码解析

    Android 应用程序启动过程 Activity启动过程可以分为两种:一种是根activity的启动过程,另一种是普通activity启动过程.根activity指的是应用程序启动的第一个activ ...

  5. 有趣的Github项目万里挑一 !(附论文、项目链接)

    来源:PaperWeekly 本文共1000字,建议阅读5分钟. 本文为你介绍9个最新机器学习开源项目. 本文带你快速 get 每个精选Github项目的亮点和痛点,时刻紧跟 AI 前沿成果. 01 ...

  6. WebRTC详解-zz

    1.WebRTC目的 WebRTC(Web Real-Time Communication)项目的最终目的主要是让Web开发者能够基于浏览器(Chrome\FireFox\...) 轻易快捷开发出丰富 ...

  7. 深度学习核心技术精讲100篇(二十七)-如何利用NLP技术对ASR的query文本进行预处理纠错?

    前言 语音系统中语音内容识别 ( ASR ) 的精准性,是影响智能语音产品发展的关键制约因素,用户query的文本,通常是由ASR系统将用户的语音命令转换而成,但由于技术上的原因,这些由ASR生成的文 ...

  8. 好看的论文千篇一律,有趣的Github项目万里挑一!

    在碎片化阅读充斥眼球的时代,越来越少的人会去关注每篇论文背后的探索和思考. 在这个栏目里,你会快速 get 每篇精选论文的亮点和痛点,时刻紧跟 AI 前沿成果. 点击本文底部的「阅读原文」即刻加入社区 ...

  9. lms自适应滤波器实现噪声干扰的语音恢复_ZLG深度解析语音识别技术

    语音识别已成为人与机器通过自然语言交互重要方式之一,本文将从语音识别的原理以及语音识别算法的角度出发为大家介绍语音识别的方案及详细设计过程. 语言作为人类的一种基本交流方式,在数千年历史中得到持续传承 ...

最新文章

  1. LAMP_apache安装_2
  2. Control.Invoke和Control.BeginInvoke
  3. numpy matplotlib 柱状图
  4. matlab优化算法案例分析与应用_最优化计算与matlab实现(18)——粒子群优化算法——权重改进的粒子群算法...
  5. 设置View单个圆角
  6. Ubuntu 10.04下更行新内核
  7. AI智能内容创作的几个方面
  8. 虚拟目录下apache点击报The requested URL* was not found on this server.
  9. 梦工厂动画CEO:不迷信大数据,只迷信耐心与好故事
  10. 信创云:打造自主可控云基础设施 | 厂商征集
  11. PHP 实现防抖功能(防重复请求)
  12. 计算机病毒大多数具有自身复制的功能,《计算机基础》第五章练习题
  13. 资深摄影师眼中 青岛值得一游的景点有哪些之4
  14. STM32F4+DP83848以太网通信指南系列知识储备
  15. IIS网站部署步骤(通过域名访问)
  16. Day 5 字典以及字典的用法
  17. Android 配置引入arr报错解决
  18. 什么是Unicode编码
  19. Stata:Logit模型评介
  20. 自定义开源 Piwigo 相册——分享生活、记录漫漫人生路中的美好时光和感动

热门文章

  1. 风云直播播放器 [封装网页播放]-在线免费看TV
  2. Java辅助之反射,序列与反序列
  3. Android实现换发型功能,换发型相机app下载 换发型相机 for Android V12.0.5 安卓手机版 下载-脚本之家...
  4. linux下提取raw镜像文件,关于Linux系统怎么选择qcow2和raw镜像格式的讲解
  5. IC设计- 浅谈各种验证 - 功能验证,形式验证,原型验证
  6. 计步器源代码实现的俩种方式
  7. (附源码)计算机毕业设计SSM基于百度AI平台的财税报销系统
  8. Python爬虫-爬取贴吧中每个帖子内的楼主图片
  9. 如何在Linux中查看mac地址
  10. 所有的分手,都是其中一个人蓄谋已久!