阿里天池比赛——食物声音识别

最近写毕业论文无聊之余,再次参加阿里天池比赛,之前一直做CV,第一次尝试做语音识别,记录一下过程。

策略:
1.梅尔频谱和梅尔倒谱以及混合
2.多模型测试
想玩这个项目的同学可以查看连接:
阿里天赐比赛
我的代码连接
链接:https://pan.baidu.com/s/1pX21kMX901O7QKcb-m-B6g
提取码:fasf
–来自百度网盘超级会员V5的分享

# 基本库
from audioop import mul
from functools import lru_cache
import pandas as pd
import numpy as npfrom sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,StratifiedKFoldfrom sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_scorefrom sklearn.preprocessing import minmax_scale
# 搭建分类模型所需要的库from keras.models import Sequential, Model
from keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout, LSTM, BatchNormalization, Input, Conv1D,\BatchNormalization, GlobalAveragePooling1D, concatenate, Permute, Dropout, MaxPool1D, Flatten, \Reshape, Lambda, RepeatVector, Multiply
from keras.utils import to_categorical
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import os
import librosa
import librosa.display
import glob
from tqdm import tqdm
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, CSVLogger, TensorBoard
from keras import optimizers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.ensemble import VotingClassifier
from keras import regularizers
from sklearn.utils import class_weight# 建立类别标签,不同类别对应不同的数字。
label_dict = {'aloe': 0, 'burger': 1, 'cabbage': 2,'candied_fruits':3, 'carrots': 4, 'chips':5,'chocolate': 6, 'drinks': 7, 'fries': 8, 'grapes': 9, 'gummies': 10, 'ice-cream':11,'jelly': 12, 'noodles': 13, 'pickles': 14, 'pizza': 15, 'ribs': 16, 'salmon':17,'soup': 18, 'wings': 19}
label_dict_inv = {v:k for k,v in label_dict.items()}def extract_features(parent_dir, sub_dirs, max_file=10, file_ext="*.wav", flag="mix"):c = 0label, feature = [], []for sub_dir in sub_dirs:for fn in tqdm(glob.glob(os.path.join(parent_dir, sub_dir, file_ext))[:max_file]):  # 遍历数据集的所有文件label_name = fn.split('/')[-1].split('\\')[0]# label_name = fn.split('/')[-2]label.extend([label_dict[label_name]])X, sample_rate = librosa.load(fn, res_type='kaiser_fast')if flag == "mfcc":mfcc = np.mean(librosa.feature.mfcc(y=X,  # 梅尔倒谱sr=sample_rate,n_mfcc=128).T,axis=0)feature.append(mfcc)elif flag == "mix":n0 = 9000n1 = 9100mfcc = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=128).T,axis=0)zero_crossings = librosa.zero_crossings(X[n0:n1], pad=False)temp = np.hstack((mfcc,zero_crossings))hop_length = 512chromagram = np.mean(librosa.feature.chroma_stft(X, sr=sample_rate, hop_length=hop_length).T,axis=0)temp = np.hstack((temp,chromagram))feature.append(temp)else:mels = np.mean(librosa.feature.melspectrogram(y=X,  # 计算梅尔频谱(mel spectrogram),并把它作为特征sr=sample_rate).T,axis=0)feature.extend([mels])# 获取特征feature以及类别的labelreturn [feature, label]# 自己更改目录
# parent_dir = './train_sample/'
parent_dir = './train/'
save_dir = "./"
folds = sub_dirs = np.array(['aloe','burger','cabbage','candied_fruits','carrots','chips','chocolate','drinks','fries','grapes','gummies','ice-cream','jelly','noodles','pickles','pizza','ribs','salmon','soup','wings'])# 获取特征feature以及类别的label
# temp = extract_features(parent_dir, sub_dirs, max_file=1000)
# # features, labels = extract_features(parent_dir,sub_dirs,max_file=100)
# temp = np.array(temp)
# data = temp.transpose()
# # 获取特征
# X = np.vstack(data[:, 0])
# # 获取标签
# Y = np.array(data[:, 1])
# print('X的特征尺寸是:',X.shape)
# print('Y的特征尺寸是:',Y.shape)# # 在Keras库中:to_categorical就是将类别向量转换为二进制(只有0和1)的矩阵类型表示
# Y = to_categorical(Y)# '''最终数据'''
# print(X.shape)
# print(Y.shape)
# np.save('features',X)
# np.save('label',Y)def dnn(cnn_shape = (16, 8, 1), lstm_shape = (128, 1)):cnn_input = Input(shape=cnn_shape, name='cnn_input')lstm_input = Input(shape=lstm_shape, name='lstm_input')input_dim = (16, 8, 1)x = LSTM(64, return_sequences=False)(lstm_input)x = Dense(64,activation='softmax')(x)y1 = Conv1D(64, 5, padding='same', activation='relu')(lstm_input)y1 = MaxPool1D(pool_size=3)(y1)dim_num = y1.shape[1]x = RepeatVector(dim_num)(x)y1 = Multiply()([y1, x])y = Conv2D(64, (3, 3), padding="same", activation="relu", input_shape=input_dim)(cnn_input)y = BatchNormalization()(y)y = MaxPool2D(pool_size=(2, 2))(y)y = Dropout(0.15)(y)y = Conv2D(128, (3, 3), padding="same", activation="relu")(y)y = BatchNormalization()(y)y = MaxPool2D(pool_size=(2, 2))(y)y = Dropout(0.3)(y)y = Conv2D(128, (3, 3), padding="same", activation="relu")(y)y = BatchNormalization()(y)y = Dropout(0.2)(y)y = Reshape((-1, 64))(y)output = concatenate([y, y1], axis=1)output = GlobalAveragePooling1D()(output)# output = Flatten()(y)# output = Dense(1024,activation='relu')(output)output = Dense(20, activation="softmax")(output)model = Model(inputs=[cnn_input, lstm_input], outputs=output)model.summary()optimizer = optimizers.Adam(lr=0.001)model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])return modeldef cnn(input_shape=(16, 8, 1)):model_conv = Sequential()# 输入的大小input_dim = input_shapemodel_conv.add(Conv2D(64, (5, 5), padding = "same", activation = "relu", input_shape = input_dim))# 卷积层model_conv.add(MaxPool2D(pool_size=(2, 2)))# 最大池化model_conv.add(Conv2D(128, (3, 3), padding = "same", activation = "relu")) #卷积层model_conv.add(MaxPool2D(pool_size=(2, 2))) # 最大池化层model_conv.add(Dropout(0.1))model_conv.add(Flatten()) # 展开model_conv.add(Dense(1024, activation = "relu"))model_conv.add(Dense(100, activation='relu'))model_conv.add(Dense(20, activation = "softmax")) # 输出层:20个units输出20个类的概率optimizer = optimizers.Adam(lr=0.001)model_conv.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])model_conv.summary()return model_convdef train(model=cnn, nf=True, featrue_path="features_mfcc.npy", label_path="label_mfcc.npy"):X = np.load(featrue_path)Y = np.load(label_path)if nf:nfold = 5kf = KFold(n_splits=nfold, shuffle=True, random_state=2020)i = 0for train_index, valid_index in kf.split(X, Y):train_x, val_x = X[train_index],X[valid_index]train_y, val_y = Y[train_index],Y[valid_index]checkpoint = ModelCheckpoint("./record/weight/dnn_mfcc-ep{epoch:03d}-loss{loss:.3f}-val_acc{val_acc:.3f}.h5", #  ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5 monitor="val_acc",verbose=1, save_best_only=True,mode='max')reduce_lr = ReduceLROnPlateau(monitor='val_acc', patience=100, mode='auto',factor=0.1, cooldown=0, min_lr=1e-5, verbose=1)csvlogger = CSVLogger(filename='./record/log/train.csv',append=True)earlystopping = EarlyStopping(monitor='val_acc',min_delta=0,patience=100,verbose=1,mode='max')tensorboard = TensorBoard(log_dir="./record/log/")# 训练模型if model == cnn:train_x = train_x.reshape(-1, 16, 8, 1)val_x = val_x.reshape(-1, 16, 8, 1)model.fit(train_x, train_y, epochs = 500, batch_size = 128, validation_data = (val_x, val_y),callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])else:X_train = train_x.reshape(-1, 16, 8, 1)X_val = val_x.reshape(-1, 16, 8, 1)lstm_input = train_x.reshape(-1, 128, 1)lstm_val = val_x.reshape(-1, 128, 1)model.fit({'cnn_input': X_train, 'lstm_input': lstm_input}, train_y, epochs=500, batch_size=128,validation_data=({'cnn_input': X_val, 'lstm_input': lstm_val}, val_y),callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])else:train_x, val_x, train_y, val_y = train_test_split(X, Y, random_state = 1, stratify=Y)# print('训练集的大小',len(X_train))# print('测试集的大小',len(X_test))# X_train = X_train.reshape(-1, 16, 8, 1)# X_test = X_test.reshape(-1, 16, 8, 1)# model.compile(optimizer = 'adam', #             loss = 'categorical_crossentropy',#             metrics = ['accuracy'])# model.summary()# # 训练模型# model.fit(X_train, Y_train, #         epochs = 500, #         batch_size = 15, #         validation_data = (X_test, Y_test))if model == cnn:train_x = train_x.reshape(-1, 16, 8, 1)val_x = val_x.reshape(-1, 16, 8, 1)model.fit(train_x, train_y, epochs = 500, batch_size = 128, validation_data = (val_x, val_y),callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])else:X_train = train_x.reshape(-1, 16, 8, 1)X_val = val_x.reshape(-1, 16, 8, 1)lstm_input = train_x.reshape(-1, 128, 1)lstm_val = val_x.reshape(-1, 128, 1)model.fit({'cnn_input': X_train, 'lstm_input': lstm_input}, train_y, epochs=1000, batch_size=128,validation_data=({'cnn_input': X_val, 'lstm_input': lstm_val}, val_y),callbacks=[checkpoint, reduce_lr, csvlogger, earlystopping, tensorboard])def extract_features(test_dir, file_ext="*.wav", flag="mix"):feature = []for fn in tqdm(glob.glob(os.path.join(test_dir, file_ext))[:]): # 遍历数据集的所有文件X, sample_rate = librosa.load(fn,res_type='kaiser_fast')if flag == "mfcc":mfcc = np.mean(librosa.feature.mfcc(y=X,  # 梅尔倒谱sr=sample_rate,n_mfcc=128).T,axis=0)feature.append(mfcc)elif flag == "mix":n0 = 9000n1 = 9100mfcc = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=128).T,axis=0)zero_crossings = librosa.zero_crossings(X[n0:n1], pad=False)temp = np.hstack((mfcc,zero_crossings))hop_length = 512chromagram = np.mean(librosa.feature.chroma_stft(X, sr=sample_rate, hop_length=hop_length).T,axis=0)temp = np.hstack((temp,chromagram))feature.append(temp)else:mels = np.mean(librosa.feature.melspectrogram(y=X,  # 计算梅尔频谱(mel spectrogram),并把它作为特征sr=sample_rate).T,axis=0)feature.extend([mels])# mels = np.mean(librosa.feature.melspectrogram(y=X,sr=sample_rate).T,axis=0) # 计算梅尔频谱(mel spectrogram),并把它作为特征# feature.extend([mels])return featuredef voting(preds_conv, preds_dense, preds_lstm):prob_max = np.tile(np.max(preds_conv, axis=1).reshape(-1, 1), preds_conv.shape[1])preds_c = preds_conv // prob_maxprob_max = np.tile(np.max(preds_dense, axis=1).reshape(-1, 1), preds_dense.shape[1])preds_d = preds_dense // prob_maxprob_max = np.tile(np.max(preds_lstm, axis=1).reshape(-1, 1), preds_lstm.shape[1])preds_l = preds_lstm // prob_maxresult_voting = preds_c + preds_d + preds_lpreds_voting = np.argmax(result_voting, axis=1)return preds_votingdef mul_test(cnn, dnn, cnn_weight, dnn_weight, test_path='./test_a/'):X_test = extract_features(test_path, flag="mfcc")X_test = np.vstack(X_test)cnn.load_weights(cnn_weight)cnn = cnn.predict(X_test.reshape(-1, 16, 8, 1))dnn.load_weights(dnn_weight)x_test = X_test.reshape(-1, 16, 8, 1)lstm_test = X_test.reshape(-1, 128, 1)dnn = dnn.predict({'cnn_input': x_test, 'lstm_input': lstm_test})preds = voting(cnn, dnn, dnn)preds = [label_dict_inv[x] for x in preds]path = glob.glob('./test_a/*.wav')result = pd.DataFrame({'name':path, 'label': preds})result['name'] = result['name'].apply(lambda x: x.split('\\')[-1])result.to_csv('submit4.csv', index=None)# mul_test(cnn, cnn)def single_test(model, weight, test_path='./test_a/'):X_test = extract_features(test_path, flag="mfcc")X_test = np.vstack(X_test)model.load_weights(weight)x_test = X_test.reshape(-1, 16, 8, 1)lstm_test = X_test.reshape(-1, 128, 1)predictions = model.predict({'cnn_input': x_test, 'lstm_input': lstm_test})# predictions = model.predict(x_test)preds = np.argmax(predictions, axis = 1)preds = [label_dict_inv[x] for x in preds]path = glob.glob('./test_a/*.wav')result = pd.DataFrame({'name':path, 'label': preds})result['name'] = result['name'].apply(lambda x: x.split('\\')[-1])result.to_csv('submit3.csv', index=None)if __name__ == "__main__":dnn = dnn(cnn_shape=(16, 8, 1), lstm_shape=(128, 1))cnn = cnn(input_shape=(16, 8, 1))# train(model=dnn, nf=True, featrue_path="features_mfcc.npy", label_path="label_mfcc.npy")cnn_weight = "./record/weight/cnn_mfcc-ep001-loss0.000-val_acc1.000.h5"dnn_weight = "./record/weight/dnn_mfcc-ep001-loss0.003-val_acc1.000.h5"mul_test(cnn, dnn, cnn_weight, dnn_weight)# single_test(dnn, dnn_weight)

阿里天池比赛——食物声音识别相关推荐

  1. 阿里天池比赛——街景字符编码识别

    文章目录 前言 一.街景字符编码识别 1. 目标 2. 数据集 3. 指标 总结 前言 之前参加阿里天池比赛,好久了,一直没有时间整理,现在临近毕业,趁论文外审期间,赶紧把东西整理了,5月底学校就要让 ...

  2. 阿里天池比赛——地表建筑物识别

    阿里天池比赛--地表建筑物识别 记录一下之前参加的阿里天池比赛,方便以后查看. 策略: 1.多模型训练 2.多模型测试 3.数据增强 4.预训练/冻结训练 5.迁移学习 6.TTA 7.后处理 8.f ...

  3. yolov5使用教程训练our数据——手把手教你训练预测(以阿里天池比赛为例)

    玩转yolov5 前言 1.yolov5项目克隆和安装 1.1 代码下载 1.2 安装环境 1.3 下载预训练权重 1.4 安装测试 2.数据准备 2.1 数据形式 2.2 数据转换 2.3 转换结果 ...

  4. 阿里天池比赛多次拿前3,如何做到?

    微信公众号推荐 AI蜗牛车公众号 微信公众号<AI蜗牛车>,公众号致力于技术项目化,具体化,思考化,会写系列的项目工程文章,细致到位,也会写一个读物的读书笔记,或者一个语言/框架的学习笔记 ...

  5. 阿里天池比赛快速入门

    [这里只讲快速入门--即破题,正负样本不平衡.特征数量等问题就自己多看论文或者其他资料吧~~如果还有数据挖掘相关基础知识不了解的,建议看看<数据挖掘导论>] [以下是理解错误案例]:错误的 ...

  6. 阿里天池全国社保比赛心得

    最近时间都忙于参加阿里天池的全国社会保险大数据应用创新大赛,终于结束,最终全国排名第7,总共是1336只队伍参加,还是很激动进了前10,今天想把一些体悟写一下,希望对后来参加的人有用.这个比赛是完成数 ...

  7. 阿里天池供应链需求预测比赛小结

    阿里天池供应链需求预测比赛小结 一.赛题的思路回顾 1.1赛题描述 使用历史平均来预测未来的需求 使用测试集真实数据进行过拟合的结果 名词定义 库存水位 在仓库存数量,用来满足需求. 补货时长(交货时 ...

  8. 第五届阿里天池中间件比赛经历分享

    第五届阿里天池中间件比赛经历分享 本文记录了作者与队友们参加2019年第五届阿里天池中间件的经历.初赛排名175/4000+队伍,幸运进入决赛.虽然最终方案比较简单,但是过程很是曲折.最后通过高分选手 ...

  9. 天池学习赛 -【零基础入门语音识别-食物声音识别】Task1 食物声音识别-Baseline【代码详细手写解释】

    文章目录 一.Task1 食物声音识别-Baseline 二.对应解析 三.参考链接 一.Task1 食物声音识别-Baseline 天池对应代码链接 二.对应解析 三.参考链接 tqdm的解释 深度 ...

最新文章

  1. 【Android 应用开发】Android开发 使用 adb logcat 显示 Android 日志
  2. php软件安装及调试_PHP调试利器XDebug的安装与使用
  3. Maven——Maven概述
  4. GPC:使用GPC计算intersection容易出现的问题
  5. python import如何使用_Python如何import其它.py文件及其函数
  6. 艾特某人代码实现_Vue@某人,At某人,仿新浪微博@某人,@user,艾特,艾特某人...
  7. iPhone SE 2生产无限期推迟,苹果决定推迟3月份产品发布会
  8. 配置php错误导入文件,php 导入文件(逻辑处理)
  9. C# 编译或者解释?
  10. 语言百马百担求马匹数_C语言经典编程题(下)
  11. Windows动态库DllMain
  12. OWASP Top 10 简单介绍
  13. IBM 架构师为何以及如何成为了架构师
  14. Vue3源码阅读(八)effect
  15. 写在1024,致程序员致程序员节致自己
  16. Deeplink(深度链接)拉起App,我是这样做到的
  17. 抖音的服务器到底啥配置?
  18. 【亲近自然亲子营】 世外桃源”享受野趣,双山邂逅浪“慢”~旅程
  19. 机器学习——SVM(支持向量机)与人脸识别
  20. 王半仙儿的日记-0004

热门文章

  1. day_8——LeetCode1:两数之和
  2. 谷歌Play马甲包检测逻辑推测及应对措施
  3. TweenMax逐帧动画
  4. Micheal Jackson 离开了我们
  5. 1、Mac如何剪切文件
  6. Gartner 公布 2022 新兴技术成熟度曲线,这些技术趋势最值得关注
  7. iOS中给图片加水印或文字的几种方法
  8. 快讯:惠普CEO李艾科发表重要讲话
  9. web前端学习路线图
  10. 待忧伤开满山岗,等青春散场