基于深度学习LSTM算法生成音乐

整套架构图

一、背景知识

1.概念（来自百度百科）:

notes(音符):用来记录不同长短的音的进行符号。全音符、二分音符、四分音符、八分音符、十六分音符是最常见的音符。是五线谱中最重要的元素

chord(和弦):和弦是乐理上的一个概念，指的是一定音程关系的一组声音。将三个和三个以上的音，按三度叠置的关系，在纵向上加以结合，就成为和弦

如果无法使用TensorFlow或者配置不够强大，可以使用Colaboratory网址，在线运行，里面已经集成了TensorFlow，pandas等包，很方便使用

二、读取MIDI文件

读取mid里面的音符和和旋信息，我这边使用了70首mid格式的文件作为训练样本，可以网上自己下载。

import tensorflow as tf
import os
from music21 import converter, instrument, note, chord, stream#读取训练数据的Notes
def get_notes():filepath='D:/log/music_midi/'files=os.listdir(filepath)Notes=[]for file in files:try:stream = converter.parse(filepath+file)instru = instrument.partitionByInstrument(stream)if instru:  # 如果有乐器部分，取第一个乐器部分notes = instru.parts[0].recurse()else:  #如果没有乐器部分，直接取notenotes = stream.flat.notesfor element in notes:# 如果是 Note 类型，取音调# 如果是 Chord 类型，取音调的序号,存int类型比较容易处理if isinstance(element, note.Note):Notes.append(str(element.pitch))elif isinstance(element, chord.Chord):Notes.append('.'.join(str(n) for n in element.normalOrder))except:passwith open('Note', 'a+')as f:f.write(str(Notes))return Notes

先读取第一个文件测试看看，可以看到乐器是piano，C major调，4/4拍，还有Note和Chord的信息

保存之后，大概是这个样子的数据:

三、构建神经网络

本次神经网络使用LSTM网络(Longshort term memory),它基于普通RNN在隐藏层各神经单元中增加记忆单元，从而使时间序列上的记忆信息可控，每次在隐藏层各单元间传递时通过几个可控门（遗忘门、输入门、候选门、输出门），可以控制之前信息和当前信息的记忆和遗忘程度，从而使RNN网络具备了长期记忆功能。

架构图：

def get_model(inputs, notes_len, weights_file=None):model = tf.keras.models.Sequential()model.add(tf.keras.layers.LSTM(512,input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences=True))#512层神经元，return_sequences=True表示返回所有的输出序列model.add(tf.keras.layers.Dropout(0.3))  # 丢弃 30% 神经元，防止过拟合model.add(tf.keras.layers.LSTM(512, return_sequences=True))model.add(tf.keras.layers.Dropout(0.3))model.add(tf.keras.layers.LSTM(512))  # return_sequences 是默认的 False，只返回输出序列的最后一个model.add(tf.keras.layers.Dense(256))  # 256 个神经元的全连接层model.add(tf.keras.layers.Dropout(0.3))model.add(tf.keras.layers.Dense(notes_len))  # 输出的数目等于所有不重复的音调的数目model.add(tf.keras.layers.Activation('softmax'))model.compile(loss='categorical_crossentropy', optimizer='rmsprop')if weights_file is not None:model.load_weights(weights_file)return model

四、训练数据

def train():notes=get_notes()notes_len=len(set(notes))note_name=sorted(set(i for i in notes))#获得排序的不重复的音符名字sequence_length = 100 #序列长度note_dict=dict((j,i) for i,j in enumerate(note_name))#设计一个字典，把音符转换成数字，方便训练network_input = []#创建输入序列network_output = []#创建输出序列for i in range(0, len(notes) - sequence_length):#输入100个，输出1个sequence_in = notes[i: i + sequence_length]sequence_out = notes[i + sequence_length]network_input.append([note_dict[k] for k in sequence_in])network_output.append(note_dict[sequence_out])network_input = np.reshape(network_input, (len(network_input), sequence_length, 1))network_input = network_input / float(notes_len) #归一化network_output = tf.keras.utils.to_categorical(network_output)#输出布尔矩阵，配合categorical_crossentropy 算法使用model =get_model(network_input,notes_len)filepath = "weights-{epoch:02d}-{loss:.2f}.hdf5"checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath,monitor='loss',  # 监控的对象是lossverbose=0,save_best_only=True,mode='min'  # 如果监控对象是val_acc则取max，是loss则取min)callbacks_list = [checkpoint]model.fit(network_input, network_output, epochs=100, batch_size=128, callbacks=callbacks_list) #整体迭代100次，每小批128个

我用8G内存的windows电脑在跑，这个等待的时间不仅仅是漫长可以形容的，具体多久呢，跑一个epoch完整大概2小时，我设置跑100次epch，实在等不了了，最终跑了大概50次就暂停了。。。。。。。我现在batch_size是128，如果电脑可以，调高一些，速度会比较快。

五、生成音乐

def generate_notes(model, network_input, note_name, notes_len):randindex = np.random.randint(0, len(network_input) - 1)notedic = dict((i,j) for i, j in enumerate(note_name))    # 把刚才的整数还原成音调pattern = network_input[randindex]prediction = []#随机生成1000个音符for note_index in range(1000):prediction_input = np.reshape(pattern, (1, len(pattern), 1))prediction_input = prediction_input / float(notes_len)#归一化prediction = model.predict(prediction_input, verbose=0)index = np.argmax(prediction)result = notedic[index]prediction.append(result)# 往后移动pattern.append(index)pattern = pattern[1:len(pattern)]return prediction
#生成mid音乐
def create_music():network_input, normal_network_input,notes_len,note_name=train()#寻找loss最小的weight文件，作为训练参数files = os.listdir()minloss = {}for i in files:if 'weights' in i:num = i[11:15]minloss[num] = ibest_weights = minloss[min(minloss.keys())]model = get_model(normal_network_input, notes_len,best_weights)prediction = generate_notes(model, network_input, note_name, notes_len)offset = 0output_notes = []# 生成 Note（音符）或 Chord（和弦）对象for data in prediction:if ('.' in data) or data.isdigit():notes_in_chord = data.split('.')notes = []for current_note in notes_in_chord:new_note = note.Note(int(current_note))new_note.storedInstrument = instrument.Piano()notes.append(new_note)new_chord = chord.Chord(notes)new_chord.offset = offsetoutput_notes.append(new_chord)else:new_note = note.Note(data)new_note.offset = offsetnew_note.storedInstrument = instrument.Piano()output_notes.append(new_note)offset += 1# 创建音乐流（Stream）midi_stream = stream.Stream(output_notes)# 写入 MIDI 文件midi_stream.write('midi', fp='output1.mid')

经过漫长的等待，激动人心的时候到了，终于创造了第一手歌，打开QQ影音播放，听起来还是不错的

完整代码

import tensorflow as tf
import os
import numpy as np
from music21 import converter, instrument, note, chord, stream#读取训练数据的Notes
def get_notes():filepath='D:/log/music_midi/'files=os.listdir(filepath)Notes=[]for file in files:try:stream = converter.parse(filepath+file)instru = instrument.partitionByInstrument(stream)if instru:  # 如果有乐器部分，取第一个乐器部分notes = instru.parts[0].recurse()else:  #如果没有乐器部分，直接取notenotes = stream.flat.notesfor element in notes:# 如果是 Note 类型，取音调# 如果是 Chord 类型，取音调的序号,存int类型比较容易处理if isinstance(element, note.Note):Notes.append(str(element.pitch))elif isinstance(element, chord.Chord):Notes.append('.'.join(str(n) for n in element.normalOrder))except:pass# with open('Note', 'a+')as f:#     f.write(str(Notes))return Notes
#构建神经网络模型
def get_model(inputs, notes_len, weights_file=None):model = tf.keras.models.Sequential()model.add(tf.keras.layers.LSTM(512,input_shape=(inputs.shape[1], inputs.shape[2]),return_sequences=True))#512层神经元，return_sequences=True表示返回所有的输出序列model.add(tf.keras.layers.Dropout(0.3))  # 丢弃 30% 神经元，防止过拟合model.add(tf.keras.layers.LSTM(512, return_sequences=True))model.add(tf.keras.layers.Dropout(0.3))model.add(tf.keras.layers.LSTM(512))  # return_sequences 是默认的 False，只返回输出序列的最后一个model.add(tf.keras.layers.Dense(256))  # 256 个神经元的全连接层model.add(tf.keras.layers.Dropout(0.3))model.add(tf.keras.layers.Dense(notes_len))  # 输出的数目等于所有不重复的音调的数目model.add(tf.keras.layers.Activation('softmax'))model.compile(loss='categorical_crossentropy', optimizer='rmsprop')if weights_file is not None:model.load_weights(weights_file)return model
#训练模型
def train():notes=get_notes()notes_len=len(set(notes))note_name=sorted(set(i for i in notes))#获得排序的不重复的音符名字sequence_length = 100 #序列长度note_dict=dict((j,i) for i,j in enumerate(note_name))#设计一个字典，把音符转换成数字，方便训练network_input = []#创建输入序列network_output = []#创建输出序列for i in range(0, len(notes) - sequence_length):#输入100个，输出1个sequence_in = notes[i: i + sequence_length]sequence_out = notes[i + sequence_length]network_input.append([note_dict[k] for k in sequence_in])network_output.append(note_dict[sequence_out])network_input = np.reshape(network_input, (len(network_input), sequence_length, 1))normal_network_input = network_input / float(notes_len) #归一化network_output = tf.keras.utils.to_categorical(network_output)#输出布尔矩阵，配合categorical_crossentropy 算法使用model =get_model(normal_network_input,notes_len)filepath = "weights-{epoch:02d}-{loss:.2f}.hdf5"checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath,monitor='loss',  # 监控的对象是lossverbose=0,save_best_only=True,mode='min'  # 如果监控对象是val_acc则取max，是loss则取min)callbacks_list = [checkpoint]model.fit(normal_network_input, network_output, epochs=100, batch_size=128, callbacks=callbacks_list) #整体迭代100次，每小批128个return network_input,normal_network_input,notes_len,note_name
#生成音符
def generate_notes(model, network_input, note_name, notes_len):randindex = np.random.randint(0, len(network_input) - 1)notedic = dict((i,j) for i, j in enumerate(note_name))    # 把刚才的整数还原成音调pattern = list(network_input[randindex])#长度为100predictions = []#随机生成1000个音符for note_index in range(1000):#pattern = list(network_input[np.random.randint(0,500)])prediction_input = np.reshape(pattern, (1, len(pattern), 1))prediction_input = prediction_input / float(notes_len)#归一化prediction = model.predict(prediction_input, verbose=0)#verbose = 0 为不在标准输出流输出日志信息index = np.argmax(prediction)#print(index)result = notedic[index]predictions.append(result)# 往后移动pattern.append(index)pattern = pattern[1:len(pattern)]return predictions
#生成mid音乐
def create_music():notes=get_notes()notes_len = len(set(notes))note_name = sorted(set(i for i in notes))sequence_length = 100  # 序列长度note_dict = dict((j, i) for i, j in enumerate(note_name))  # 设计一个字典，把音符转换成数字，方便训练network_input = []  # 创建输入序列network_output = []  # 创建输出序列for i in range(0, len(notes) - sequence_length):# 输入100个，输出1个sequence_in = notes[i: i + sequence_length]sequence_out = notes[i + sequence_length]network_input.append([note_dict[k] for k in sequence_in])network_output.append(note_dict[sequence_out])network_input = np.reshape(network_input, (len(network_input), sequence_length, 1))normal_network_input = network_input / float(notes_len)  # 归一化#print(len(network_input)) #1541019#network_input, normal_network_input,notes_len,note_name=train()#寻找loss最小的weight文件，作为训练参数files = os.listdir()minloss = {}for i in files:if 'weights' in i:num = i[11:15]minloss[num] = ibest_weights = minloss[min(minloss.keys())]print('最佳模型文件为:'+best_weights)model = get_model(normal_network_input, notes_len,best_weights)predictions = generate_notes(model, network_input, note_name, notes_len)offset = 0output_notes = []# 生成 Note（音符）或 Chord（和弦）对象for data in predictions:if ('.' in data) or data.isdigit():notes_in_chord = data.split('.')notes = []for current_note in notes_in_chord:new_note = note.Note(int(current_note))new_note.storedInstrument = instrument.Piano()notes.append(new_note)new_chord = chord.Chord(notes)new_chord.offset = offsetoutput_notes.append(new_chord)else:new_note = note.Note(data)new_note.offset = offsetnew_note.storedInstrument = instrument.Piano()output_notes.append(new_note)offset += 1# 创建音乐流（Stream）midi_stream = stream.Stream(output_notes)# 写入 MIDI 文件midi_stream.write('midi', fp='output1.mid')
if __name__ == '__main__':#train()#训练的时候执行create_music()

参考文件:慕课网TensorFlow教程

基于深度学习LSTM算法生成音乐相关推荐

DL之Yolov3：基于深度学习Yolov3算法实现视频目标检测之对《我要打篮球》视频段进行实时目标检测
DL之Yolov3:基于深度学习Yolov3算法实现视频目标检测之对<我要打篮球>视频段进行实时目标检测目录输出结果设计思路核心代码相关文章成功解决AttributeError ...
DL之Yolov3：基于深度学习Yolov3算法实现视频目标检测之对《俄罗斯总统普京对沙特王储摊的“友好摊手”瞬间—东道主俄罗斯5-0完胜沙特》视频段实时检测
DL之Yolov3:基于深度学习Yolov3算法实现视频目标检测之对<俄罗斯总统普京对沙特王储摊的"友好摊手"瞬间-东道主俄罗斯5-0完胜沙特>视频段实时检测导读 ...
极限元语音算法专家刘斌：基于深度学习的语音生成问题
一.深度学习在语音合成中的应用语音合成主要采用波形拼接合成和统计参数合成两种方式.波形拼接语音合成需要有足够的高质量发音人录音才能够合成高质量的语音,它在工业界中得到了广泛使用.统计参数语音合成虽然 ...
基于深度学习的宋词生成
<自然语言处理>课程报告摘要宋词是一种相对于古体诗的新体诗歌之一,为宋代儒客文人智慧精华,标志宋代文学的最高成就.宋词生成属于自然语言处理领域的文本生成模块,当前文本生成领域主要包括 ...
图像重建算法_基于深度学习图像重建算法(DLIR)对CT图像质量和剂量优化的研究：体模实验...
编者按:今年Joël Greffier博士等在European Radiology (IF 4.1)上发表了题为<Image quality and dose reduction opportu ...
基于深度学习的新闻摘要生成算法实现与详解（Encoder-Decoder框架模型）
目录摘要: 文本摘要生成概述: Encoder-Decoder模式思想: 数据集描述: 模型构建与代码描述(LSTM+Attention) 总结: 参考文献: 摘要: 摘要是文本的主要内容和核心思想 ...
推荐基于深度学习实时同步生成2D动画口型算法
概述实时二维动画是一种相当新颖而强大的交流形式,它使表演者可以实时控制卡通人物,同时与其他演员或观众互动和即兴表演. 最近的例子包括史蒂芬·科尔伯特(Stephen Colbert)在<后期秀 ...
基于深度学习LSTM分类进行故障检测（Matlab代码实现）
推荐：你想用深度学习谱写自己的音乐吗？这篇指南来帮助你！（附代码）...
作者:ARAVIND PAI 翻译:吴金笛校对:和中华本文长度为6800字,建议阅读15分钟本文手把手带你使用python编写一个自动生成音乐的模型. 总览学习如何开发一个自动生成音乐 ...

基于深度学习LSTM算法生成音乐

基于深度学习LSTM算法生成音乐相关推荐

最新文章

热门文章