使用最大离散重叠小波变换MODWT和支持向量回归 SVR的金融时间序列预测

本例使用的数据链接如下：https://www.histdata.com/download-free-forex-historical-data/?/ascii/tick-data-quotes/AUDJPY，将数据从 1 分钟间隔转换为 1 天间隔

第一部分，原始时间序列SVR + 滑动窗方法

首先读取数据

prices = pd.read_csv('../Data/AUD-JPY-2003-2014-day.csv',delimiter=";", header=0, encoding='utf-8', parse_dates=['Date'])
prices

删除不使用的列

prices.drop(["Open", "High", "Low"],axis = 1, inplace = True)

定义变量

dates = prices['Date'].copy()
closing_prices = prices['Close'].copy()#使用 matplotlib 绘制原始时间序列
plt.subplots(figsize=(16,4))
plt.plot(dates, closing_prices, label='Original series AUD-JPY 2003-2014')
plt.legend(loc = 'best')
plt.show()

SVR + 滑动窗，定义滑动窗函数

def slideWindow(series, window_lenght = 2):_X, _Y = [], []aux_Window =  sliding_window_view(series, window_lenght+1)# 将第一个“window_lenght”值作为输入 (X)，将最后一个值 (window_lenght+1) 作为输出 (Y)for i in range(len(aux_Window)):_Y.append(aux_Window[i][-1])_X.append(aux_Window[i][:-1])return _X, _Y
window_lenght = 2
#调用滑动窗函数
X, Y = slideWindow(closing_prices,window_lenght)
idx_test_date = int(0.75*len(Y)) + window_lenght
df = pd.DataFrame(columns = ['test_date'])
df['test_date'] = prices['Date'].iloc[idx_test_date:]

拆分并绘制测试数据，将数据拆分为训练集（75%）和测试集（25%），shuffle = False 表示并非随机打乱数据

x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size=0.25, random_state=None, shuffle=False)fig, ax = plt.subplots(2,1,figsize=(16,8))
ax[0].plot(dates, closing_prices, label='Original')
ax[0].plot(df['test_date'], y_test, label='Values to test the model out',color='orange')
ax[1].plot(df['test_date'], y_test, label='Values to test the model out',color='orange')ax[0].legend(loc = 'best')
ax[1].legend(loc = 'best')
plt.show()

定义训练函数并拟合

def evaluateSVR(_x_train,_y_train,_x_test,_y_test, kernel = 'rbf'):if (kernel == 'rbf'):clf = svm.SVR(kernel ='rbf', C=1e3, gamma=0.1)elif (kernel == 'poly'):clf = svm.SVR(kernel ='poly', C=1e3, degree=2)else:clf = svm.SVR(kernel ='linear',C=1e3)_y_predict = clf.fit(_x_train,_y_train).predict(_x_test)return _y_predicty_predict = evaluateSVR(x_train,y_train,x_test,y_test)plotValuesWt = y_test.copy()#绘制预测值
plt.subplots(figsize=(18, 6))
plt.plot(df['test_date'], y_test, label = "Real")
plt.plot(df['test_date'], y_predict, label = "Predicted")
plt.legend(loc = 'best')
plt.show()

第二部分，使用 MODWT 将时间序列分解

使用“sym4”小波，modwt分解为4层（4 个细节系数 (dC) 和 1 个近似系数 (aC)）

def applyModwt(_data, type='sym4', _level=3):_coeff = modwt(_data, type, _level)return _coefflevel = 4
coeff = applyModwt(closing_prices,type='sym4',_level=level)#检查系数，一个 len(close_prices) 列和 5 行的数组
print(np.shape(coeff))#画系数图
fig, ax =  plt.subplots(len(coeff), 1, figsize=(16, 8))
for i in range(len(coeff)):if i == len(coeff)-1:ax[i].plot(coeff[i], label = 'cA[%.0f]'%(i))ax[i].legend(loc = 'best')else:ax[i].plot(coeff[i], label = 'cD[%.0f]'%(i))ax[i].legend(loc = 'best')

重建原始时间序列

#初始化存储数组
recwt = np.zeros((np.shape(coeff)[0], np.shape(coeff)[1]))#分配近似系数和细节系数
aCdC = coeff.copy()recwt[level:] = coeff[level]#只使用 aC 来重建时间序列
dFs = imodwt(recwt,'sym4')#还可以使用所有的系数来重新构建金融序列
rFs = imodwt(coeff,'sym4')#绘图比较
fig, ax = plt.subplots(4,1,figsize=(16,8))
ax[0].plot(dates, closing_prices, label='Original')
#使用所有aC和dC系数重建
ax[1].plot(dates, rFs, label='Re-constructed (using all coeff)', color = 'green')
#仅使用aC系数重建
ax[2].plot(dates, dFs, label='Re-constructed (using just aC)', color = 'orange')
#原始信号与降噪后的信号
ax[3].plot(dates, closing_prices, label='Original')
ax[3].plot(dates, dFs, label='Re-constructed (using just aC)', color = 'orange')ax[0].legend(loc = 'best')
ax[1].legend(loc = 'best')
ax[2].legend(loc = 'best')
ax[3].legend(loc = 'best')plt.show()

第三部分，使用 SVR 估计小波系数

new_coeff = []
#使用滑动窗口生成 X 和 Y
for i in range(len(aCdC)):index = int(len(aCdC[i])*0.75)#+ window_lenghtX, Y = slideWindow(aCdC[i], window_lenght=5)#划分数据x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size=0.25, random_state=None, shuffle=False)#Evaluating each dC in the SVR functiony_predict = evaluateSVR(x_train,y_train,x_test,y_test)#存储预测值和训练数据 new_coeff.append(np.concatenate((aCdC[i][:index], y_predict)))#绘制每个系数的预测值plt.subplots(figsize=(18, 6))plt.plot(y_test, label = "Real")plt.plot(y_predict, label = "Predicted")plt.legend(loc = 'best')plt.show()

用预测值绘制新的时间序列

rpFs = imodwt(new_coeff,'sym4')index = int(len(rpFs)*0.75)#+ window_lenghtfig, ax = plt.subplots(3,1,figsize=(16,8))
ax[0].plot(df['test_date'], plotValuesWt, label='Original')
#使用所有的dC and aC系数重建
ax[1].plot(rpFs[index:] ,label='Re-constructed (using all coeff)', color = 'green')#df['test_date'], ax[2].plot(df['test_date'], plotValuesWt, label='Original')
ax[2].plot(df['test_date'], rpFs[index:] ,label='Re-constructed (using all coeff)', color = 'green')ax[0].legend(loc = 'best')
ax[1].legend(loc = 'best')
ax[2].legend(loc = 'best')print('MSE',mean_squared_error(plotValuesWt, rpFs[index:],squared=False))

第四部分，构建预测模型（使用所有系数进行预测）

def evaluateModel(svr, X, Y, prediction_days, past_days):X_ = []Y_ = []Y_.append(np.array(Y)[-1])X_.append(X[-1])for i in range(prediction_days):Y_array = np.array([Y_[-1]])X_array = np.array(X_[-1][-past_days+1:])X_Y_concat = np.array([np.concatenate((X_array,Y_array))])X_ = np.concatenate(( X_, X_Y_concat ))p_value = svr.predict(X_[-1].reshape(1, -1))Y_ = np.concatenate(( Y_,  p_value))return Y_def predictValue(past_days = 7, prediction_days = 5, file_Path = 'Data/AUD-JPY-2003-2014-day.csv', dateColName = 'Date', closingPColName = 'Close', delimiter = ';'):#从文件中获取数据dates, closing_prices = getDatesAndPrices(file_Path, dateColName, closingPColName, delimiter)#从小波获取系数coeff = getCoeffFromSeries(closing_prices)#使用 SVR 估计系数predictedCoeff = trainModel(coeff, prediction_days ,past_days)return predictedCoeff, dates, closing_pricesdef getDatesAndPrices(filePath, dateColName, closingPColName, _delimiter):#从 csv 文件中读取数据#使用 'parse_dates' 将日期字符串转换为可以使用的对象prices = pd.read_csv(filePath,delimiter=_delimiter, header=0, encoding='utf-8', parse_dates=[dateColName])# 定义变量dates = prices[dateColName].copy()closing_prices = prices[closingPColName].copy()return dates, closing_pricesdef getCoeffFromSeries(closing_prices):#调用之前定义的函数level = 4coeff = applyModwt(closing_prices,type='sym4',_level=level)return coeffdef trainModel(coeff, prediction_days, past_days):new_coeff = []print('coeff shape: ',np.shape(coeff))for i in range(len(coeff)):firstWindowValues = coeff[i][:past_days]X, Y = slideWindow(coeff[i], past_days)       svr = svm.SVR(kernel ='rbf', C=1e3, gamma=0.1)svr.fit(X, Y)predictCoeff = evaluateModel(svr, X, Y, prediction_days, past_days)newCoeff_concat = np.concatenate((coeff[i][:-1], predictCoeff))new_coeff.append(newCoeff_concat)print('NEW coeff shape: ',np.shape(new_coeff))return new_coeffdaysToPredict = 7
predictedCoeff, dates, closing_prices = predictValue(prediction_days = daysToPredict)

接下来准备绘图进行对比

def plotValues(dates, original, predicted, prediction_days):fig, ax = plt.subplots(3,1,figsize=(16,8))ax[0].plot(dates, original, label='Original')#使用dC and aC系数重建ax[1].plot(predicted ,label='Re-constructed (using all coeff)', color = 'green')#print(type(dates))newDates = (addDayToDates(dates, prediction_days))ax[2].plot(dates, original, label='Original')ax[2].plot(newDates,predicted ,label='Re-constructed (using all coeff)', color = 'green')ax[0].legend(loc = 'best')ax[1].legend(loc = 'best')ax[2].legend(loc = 'best')def addDayToDates(dates, prediction_days):_dates = copy.deepcopy(dates)lastDate = np.array(_dates)[-1]for i in range (prediction_days+1):newDate = pd.to_datetime(lastDate) + pd.DateOffset(days=i)_dates[len(_dates)-1+i] = newDatereturn _dates
rpFs = imodwt(predictedCoeff,'sym4')
plotValues(dates, closing_prices ,rpFs, daysToPredict)

然后，仅使用近似系数进行预测

def readData(past_days = 7, prediction_days = 5, file_Path = '../Data/AUD-JPY-2003-2014-day.csv', dateColName = 'Date', closingPColName = 'Close', delimiter = ';'):#获取数据dates, closing_prices = getDatesAndPrices(file_Path, dateColName, closingPColName, delimiter)return dates, closing_pricesdef getApproxCoeffFromSeries(closing_prices):#调用函数level = 4coeff = applyModwt(closing_prices,type='sym4',_level=level)return coeffdef trainModelApprox(X, Y, past_days):#完全重建时间序列所需的值svr = svm.SVR(kernel ='rbf', C=1e3, gamma=0.1)svr.fit(X, Y)return svrdaysToPredict = 7
past_days = 7
level = 4
dates, closing_prices = readData(past_days = past_days, prediction_days = daysToPredict)

然后

# 仅获取近似系数和最后的细节系数
approxCoeff = getApproxCoeffFromSeries(closing_prices)#初始化存储数组
recwt = np.zeros((np.shape(approxCoeff)[0], np.shape(approxCoeff)[1]))#存储系数
recwt[(level-1):] = approxCoeff[-2]
recwt[level:] = approxCoeff[-1]#只使用 aC 来重建时间序列，相当于给金融时间序列降噪了
dFs = imodwt(recwt,'sym4')

使用近似系数训练模型

X, Y = slideWindow(dFs, past_days)svr = trainModelApprox(X, Y, daysToPredict)

执行预测

predictedValues = evaluateModel(svr, X, Y, prediction_days=daysToPredict, past_days=past_days)
rpFs = np.concatenate((dFs, predictedValues[1:]))
#绘图
plotValues(dates, closing_prices ,rpFs, daysToPredict)

基于最大离散重叠小波变换MODWT和支持向量回归 SVR的金融时间序列预测的步骤大致如此，前面基于滑动窗+SVR的金融序列预测还比较好理解，到小波这边可能就难以理解了，实际上还是各种倒腾小波系数，在每分阶层的小波系数上进行预测，最后再综合，小波分析还是有很大的灵活性的，不管使用近似系数进行预测，还是挑选近似系数+几个细节系数进行预测，并没有一个明确的指导方案，还是要靠自己多试几次。

关于最大离散重叠小波,找了几个金融相关的文章，看一下吧

[1]王健.中美股市联动性——基于极大重叠离散小波变换的研究[J].世界经济文汇,2014(02):72-89.

[2]隋新,何建敏,李亮.时变视角下基于MODWT的沪深300指数现货与期货市场间波动溢出效应[J].系统工程,2015,33(01):31-38.

[3]徐梅. 金融波动分析的小波和频域方法研究[D].天津大学,2004.

详细的代码如下

使用最大离散重叠小波变换MODWT和支持向量回归 SVR的金融时间序列预测相关推荐

基于DDTBOX，使用线性支持向量回归(SVR)从ERP数据中解码连续变量
导读事件相关电位(ERP)数据的多变量分类分析是预测认知变量的强大工具.然而,分类通常仅限于分类变量,并未充分利用连续数据,如反应时间.反应力或主观评分.另一种方法是支持向量回归(SVR),它使用单 ...
❤️解决非线性回归问题的机器学习方法总结：多项式线性模型、广义线性(GAM)模型、回归树模型、支持向量回归(SVR)模型
文章目录前言多项式回归模型概念解释: sklearn实现多项式回归模型: 广义线性可加(GAM)模型概念解释: pygam实现广义线性可加模型: GAM模型的优点与不足: 回归树模型概念解释 ...
【视频】支持向量机SVM、支持向量回归SVR和R语言网格搜索超参数优化实例
最近我们被客户要求撰写关于SVM的研究报告,包括一些图形和统计输出. 什么是支持向量机 (SVM)? 我们将从简单的理解 SVM 开始. [视频]支持向量机SVM.支持向量回归SVR和R语言网格搜索超 ...
机器学习西瓜书笔记：软间隔和支持向量回归SVR
1.首先由SVM问题(最大间隔超平面模型):所有样本都可以正确分类的最优化问题,引入软间隔SVM(允许分类错误)的最优化问题,即需要添加损失函数(样本不满足约束的程度,或者说分类错误的程度),然后最优 ...
机器学习——支持向量回归(SVR)
机器学习--支持向量回归(SVR) educoder平台练习题如果博客中图片加载失败可点击链接跳转至实训详情 https://www.educoder.net/shixuns/b6yi97f2/ch ...
[翻译] 支持向量回归SVR的介绍
X. Introduction 本文先翻译一下: http://www.saedsayad.com/support_vector_machine_reg.htm Support Vector Mach ...
【机器学习系列】之支持向量回归SVR
作者:張張張張 github地址:https://github.com/zhanghekai [转载请注明出处,谢谢!] [机器学习系列]之SVM硬间隔和软间隔 [机器学习系列]之SVM核函数和SMO ...
支持向量回归 svr
from sklearn import svm X = [[0, 0], [1, 1]] y = [0, 1]#建立支持向量分类模型 clf = svm.SVC()#拟合训练数据,得到训练模型参数 c ...
机器学习之支持向量回归(SVR)——南京审计大学金审学院
第1关:线性可分支持向量机本关任务:根据本节课所学知识完成本关所设置的选择题. 第1题 B 第2题 B 第3题 C 第4题 B 第5题 D 第6题 AC 第2关:线性支持向量机本关任务:使用skl ...

使用最大离散重叠小波变换MODWT和支持向量回归 SVR的金融时间序列预测

使用最大离散重叠小波变换MODWT和支持向量回归 SVR的金融时间序列预测相关推荐

最新文章

热门文章