2018年世界杯赔率预测 -DNN

# -*- coding: utf-8 -*-
'''
Created on 2018年7月2日
@author: user
@summary:  Predicting the winner of the 2018 FIFA World Cup
'''
import numpy as np # linear algebra
import pandas as pd # data processing
import tensorflow as tf
from tensorflow.python.data import Dataset
from sklearn import metrics
from itertools import combinationsrankings = pd.read_csv('fifa_ranking.csv')
rankings = rankings.loc[:,['rank', 'country_full', 'country_abrv', 'cur_year_avg_weighted', 'rank_date', 'two_year_ago_weighted', 'three_year_ago_weighted']]
rankings.country_full.replace("^IR Iran*", "Iran", regex=True, inplace=True)
rankings['weighted_points'] =  rankings['cur_year_avg_weighted'] + rankings['two_year_ago_weighted'] + rankings['three_year_ago_weighted']
rankings['rank_date'] = pd.to_datetime(rankings['rank_date'])matches = pd.read_csv("results.csv")
matches =  matches.replace({'Germany DR': 'Germany', 'China': 'China PR'})
matches['date'] = pd.to_datetime(matches['date'])world_cup = pd.read_csv("World Cup 2018 Dataset.csv")
world_cup = world_cup.loc[:, ['Team', 'Group', 'First match \nagainst', 'Second match\n against', 'Third match\n against']]
world_cup = world_cup.dropna(how='all')
world_cup = world_cup.replace({"IRAN": "Iran",  "Costarica": "Costa Rica",  "Porugal": "Portugal", "Columbia": "Colombia", "Korea" : "Korea Republic"})
world_cup = world_cup.set_index('Team')# Get Complete Date wise Ranking table
rankings = rankings.set_index(['rank_date']).groupby(['country_full'],group_keys = False).resample('D').first().fillna(method='ffill').reset_index()
#Join Ranking with match
matches = matches.merge(rankings,left_on=['date', 'home_team'],right_on=['rank_date', 'country_full'])
matches = matches.merge(rankings, left_on=['date', 'away_team'],right_on=['rank_date', 'country_full'],  suffixes=('_home', '_away'))
# feature generation
matches['rank_difference'] = matches['rank_home'] - matches['rank_away']
matches['average_rank'] = (matches['rank_home'] + matches['rank_away'])/2
matches['point_difference'] = matches['weighted_points_home'] - matches['weighted_points_away']
matches['score_difference'] = matches['home_score'] - matches['away_score']
matches['is_won'] = matches['score_difference'] > 0 # take draw as lost
matches['is_stake'] = matches['tournament'] != 'Friendly'tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format
matches = matches.reindex(np.random.permutation(matches.index))
def preprocess_features(matches):selected_features = matches[["average_rank", "rank_difference", "point_difference", "is_stake"]]processed_features = selected_features.copy()return processed_featuresdef preprocess_targets(matches):output_targets = pd.DataFrame()# Scale the target to be in units of thousands of dollars.output_targets["is_won"] = matches['is_won']return output_targets# Choose the first 60% i.e 10900 (out of 18167) examples for training.
training_examples = preprocess_features(matches.head(10900))
training_targets = preprocess_targets(matches.head(10900))# Choose the last 40% i.e 7267 (out of 18167) examples for validation.
validation_examples = preprocess_features(matches.tail(7267))
validation_targets = preprocess_targets(matches.tail(7267))Complete_Data_training = preprocess_features(matches)
Complete_Data_Validation = preprocess_targets(matches)def construct_feature_columns(input_features):return set([tf.feature_column.numeric_column(my_feature) for my_feature in input_features])def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):"""Trains a neural network model.Args:features: pandas DataFrame of featurestargets: pandas DataFrame of targetsbatch_size: Size of batches to be passed to the modelshuffle: True or False. Whether to shuffle the data.num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitelyReturns:Tuple of (features, labels) for next data batch"""# Convert pandas data into a dict of np arrays.features = {key:np.array(value) for key,value in dict(features).items()}                                           # Construct a dataset, and configure batching/repeating.ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limitds = ds.batch(batch_size).repeat(num_epochs)# Shuffle the data, if specified.if shuffle:ds = ds.shuffle(10000)# Return the next batch of data.features, labels = ds.make_one_shot_iterator().get_next()return features, labelsdef train_nn_classification_model(my_optimizer,steps,batch_size,hidden_units,training_examples,training_targets,validation_examples,validation_targets):periods = 10steps_per_period = steps / periods# Create a DNNRegressor object.my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 3.0)dnn_classifier = tf.estimator.DNNClassifier(feature_columns=construct_feature_columns(training_examples),hidden_units=hidden_units,optimizer=my_optimizer)# Create input functions.training_input_fn = lambda: my_input_fn(training_examples, training_targets["is_won"], batch_size=batch_size)predict_training_input_fn = lambda: my_input_fn(training_examples, training_targets["is_won"], num_epochs=1, shuffle=False)predict_validation_input_fn = lambda: my_input_fn(validation_examples, validation_targets["is_won"], num_epochs=1, shuffle=False)# Train the model, but do so inside a loop so that we can periodically assess# loss metrics.# Train the model, but do so inside a loop so that we can periodically assess# loss metrics.print("Training model...")print("LogLoss (on training data):")training_log_losses = []validation_log_losses = []for period in range (0, periods):# Train the model, starting from the prior state.dnn_classifier.train(input_fn=training_input_fn,steps=steps_per_period)# Take a break and compute predictions.    training_probabilities = dnn_classifier.predict(input_fn=predict_training_input_fn)training_probabilities = np.array([item['probabilities'] for item in training_probabilities])validation_probabilities = dnn_classifier.predict(input_fn=predict_validation_input_fn)validation_probabilities = np.array([item['probabilities'] for item in validation_probabilities])training_log_loss = metrics.log_loss(training_targets, training_probabilities)validation_log_loss = metrics.log_loss(validation_targets, validation_probabilities)# Occasionally print the current loss.print("  period %02d : %0.2f" % (period, training_log_loss))# Add the loss metrics from this period to our list.training_log_losses.append(training_log_loss)validation_log_losses.append(validation_log_loss)print("Model training finished.")# Output a graph of loss metrics over periods.return dnn_classifierlinear_classifier = train_nn_classification_model(my_optimizer=tf.train.AdagradOptimizer(learning_rate=0.07),steps=3000,batch_size=2000,hidden_units=[5, 5,6,5],training_examples=training_examples,training_targets=training_targets,validation_examples=validation_examples,validation_targets=validation_targets)predict_validation_input_fn = lambda: my_input_fn(validation_examples, validation_targets["is_won"], num_epochs=1, shuffle=False)validation_probabilities = linear_classifier.predict(input_fn=predict_validation_input_fn)
# Get just the probabilities for the positive class.
validation_probabilities = np.array([item['probabilities'][1] for item in validation_probabilities])false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(validation_targets, validation_probabilities)
evaluation_metrics = linear_classifier.evaluate(input_fn=predict_validation_input_fn)
print("AUC on the validation set: %0.2f" % evaluation_metrics['auc'])
print("Accuracy on the validation set: %0.2f" % evaluation_metrics['accuracy'])#World Cup simulation
# let's define a small margin when we safer to predict draw then win
margin = 0.05# let's define the rankings at the time of the World Cup
world_cup_rankings = rankings.loc[(rankings['rank_date'] == rankings['rank_date'].max()) &  rankings['country_full'].isin(world_cup.index.unique())]
world_cup_rankings = world_cup_rankings.set_index(['country_full'])
opponents = ['First match \nagainst', 'Second match\n against', 'Third match\n against']world_cup['points'] = 0
world_cup['total_prob'] = 0for group in set(world_cup['Group']):print('___Starting group {}:___'.format(group))for home, away in combinations(world_cup.query('Group =="{}"'.format(group)).index, 2):print("{} vs. {}: ".format(home, away))row = pd.DataFrame(np.array([[np.nan, np.nan, np.nan, True]]), columns=validation_examples.columns)home_rank = world_cup_rankings.loc[home, 'rank']home_points = world_cup_rankings.loc[home, 'weighted_points']opp_rank = world_cup_rankings.loc[away, 'rank']opp_points = world_cup_rankings.loc[away, 'weighted_points']row['average_rank'] = (home_rank + opp_rank) / 2row['rank_difference'] = home_rank - opp_rankrow['point_difference'] = home_points - opp_pointsrow['is_won'] =np.nanpredict_validation_input_fn1 = lambda: my_input_fn(row, row["is_won"], num_epochs=1, shuffle=False)validation_probabilities1 = linear_classifier.predict(input_fn=predict_validation_input_fn1)# Get just the probabilities for the positive class.validation_probabilities1 = np.array([item['probabilities'][1] for item in validation_probabilities1])#print(validation_probabilities1[0])home_win_prob = validation_probabilities1[0]world_cup.loc[home, 'total_prob'] += home_win_probworld_cup.loc[away, 'total_prob'] += 1-home_win_probpoints = 0if home_win_prob <= 0.5 - margin:print("{} wins with {:.2f}".format(away, 1-home_win_prob))world_cup.loc[away, 'points'] += 3if home_win_prob > 0.5 - margin:points = 1if home_win_prob >= 0.5 + margin:points = 3world_cup.loc[home, 'points'] += 3print("{} wins with {:.2f}".format(home, home_win_prob))if points == 1:print("Draw")world_cup.loc[home, 'points'] += 1world_cup.loc[away, 'points'] += 1
pairing = [0,3,4,7,8,11,12,15,1,2,5,6,9,10,13,14]world_cup = world_cup.sort_values(by=['Group', 'points', 'total_prob'], ascending=False).reset_index()
next_round_wc = world_cup.groupby('Group').nth([0, 1]) # select the top 2
next_round_wc = next_round_wc.reset_index()
next_round_wc = next_round_wc.loc[pairing]
next_round_wc = next_round_wc.set_index('Team')finals = ['round_of_16', 'quarterfinal', 'semifinal', 'final']labels = list()
odds = list()for f in finals:print("___Starting of the {}___".format(f))iterations = int(len(next_round_wc) / 2)winners = []for i in range(iterations):home = next_round_wc.index[i*2]away = next_round_wc.index[i*2+1]print("{} vs. {}: ".format(home,away))row = pd.DataFrame(np.array([[np.nan, np.nan, np.nan, True]]), columns=validation_examples.columns)home_rank = world_cup_rankings.loc[home, 'rank']home_points = world_cup_rankings.loc[home, 'weighted_points']opp_rank = world_cup_rankings.loc[away, 'rank']opp_points = world_cup_rankings.loc[away, 'weighted_points']row['average_rank'] = (home_rank + opp_rank) / 2row['rank_difference'] = home_rank - opp_rankrow['point_difference'] = home_points - opp_pointsrow['is_won'] =np.nanpredict_validation_input_fn1 = lambda: my_input_fn(row, row["is_won"], num_epochs=1, shuffle=False)validation_probabilities1 = linear_classifier.predict(input_fn=predict_validation_input_fn1)# Get just the probabilities for the positive class.validation_probabilities1 = np.array([item['probabilities'][1] for item in validation_probabilities1])#print(validation_probabilities1[0])home_win_prob = validation_probabilities1[0]#home_win_prob = model.predict_proba(row)[:,1][0]if home_win_prob <= 0.5:print("{0} wins with probability {1:.2f}".format(away, 1-home_win_prob))winners.append(away)else:print("{0} wins with probability {1:.2f}".format(home, home_win_prob))winners.append(home)labels.append("{}({:.2f}) vs. {}({:.2f})".format(world_cup_rankings.loc[home, 'country_abrv'], 1/home_win_prob, world_cup_rankings.loc[away, 'country_abrv'], 1/(1-home_win_prob)))odds.append([home_win_prob, 1-home_win_prob])next_round_wc = next_round_wc.loc[winners]print("\n")

Model training finished.
AUC on the validation set: 0.74
Accuracy on the validation set: 0.67
___Starting group A:___
Russia vs. Saudi Arabia:
Draw
Russia vs. Egypt:
Egypt wins with 0.67
Russia vs. Uruguay:
Uruguay wins with 0.84
Saudi Arabia vs. Egypt:
Egypt wins with 0.66
Saudi Arabia vs. Uruguay:
Uruguay wins with 0.84
Egypt vs. Uruguay:
Uruguay wins with 0.84
___Starting group C:___
France vs. Australia:
France wins with 0.57
France vs. Peru:
Draw
France vs. Denmark:
Draw
Australia vs. Peru:
Peru wins with 0.84
Australia vs. Denmark:
Denmark wins with 0.84
Peru vs. Denmark:
Draw
___Starting group B:___
Portugal vs. Spain:
Draw
Portugal vs. Morocco:
Portugal wins with 0.62
Portugal vs. Iran:
Portugal wins with 0.62
Spain vs. Morocco:
Spain wins with 0.60
Spain vs. Iran:
Spain wins with 0.60
Morocco vs. Iran:
Draw
___Starting group E:___
Brazil vs. Switzerland:
Draw
Brazil vs. Costa Rica:
Draw
Brazil vs. Serbia:
Brazil wins with 0.59
Switzerland vs. Costa Rica:
Draw
Switzerland vs. Serbia:
Switzerland wins with 0.57
Costa Rica vs. Serbia:
Draw
___Starting group D:___
Argentina vs. Iceland:
Draw
Argentina vs. Croatia:
Draw
Argentina vs. Nigeria:
Argentina wins with 0.64
Iceland vs. Croatia:
Draw
Iceland vs. Nigeria:
Iceland wins with 0.60
Croatia vs. Nigeria:
Croatia wins with 0.60
___Starting group G:___
Belgium vs. Panama:
Belgium wins with 0.68
Belgium vs. Tunisia:
Draw
Belgium vs. England:
Draw
Panama vs. Tunisia:
Tunisia wins with 0.84
Panama vs. England:
England wins with 0.84
Tunisia vs. England:
England wins with 0.61
___Starting group F:___
Germany vs. Mexico:
Germany wins with 0.56
Germany vs. Sweden:
Germany wins with 0.59
Germany vs. Korea Republic:
Germany wins with 0.73
Mexico vs. Sweden:
Draw
Mexico vs. Korea Republic:
Mexico wins with 0.65
Sweden vs. Korea Republic:
Sweden wins with 0.64
___Starting group H:___
Poland vs. Senegal:
Draw
Poland vs. Colombia:
Draw
Poland vs. Japan:
Poland wins with 0.66
Senegal vs. Colombia:
Colombia wins with 0.55
Senegal vs. Japan:
Senegal wins with 0.63
Colombia vs. Japan:
Colombia wins with 0.65
___Starting of the round_of_16___
Uruguay vs. Spain:
Spain wins with probability 0.54
Denmark vs. Croatia:
Denmark wins with probability 0.55
Switzerland vs. Mexico:
Mexico wins with probability 0.51
England vs. Poland:
Poland wins with probability 0.53
Egypt vs. Portugal:
Portugal wins with probability 0.84
Peru vs. Argentina:
Argentina wins with probability 0.56
Brazil vs. Germany:
Germany wins with probability 0.84
Belgium vs. Colombia:
Belgium wins with probability 0.54___Starting of the quarterfinal___
Spain vs. Denmark:
Denmark wins with probability 0.52
Mexico vs. Poland:
Poland wins with probability 0.59
Portugal vs. Argentina:
Argentina wins with probability 0.53
Germany vs. Belgium:
Belgium wins with probability 0.52___Starting of the semifinal___
Denmark vs. Poland:
Poland wins with probability 0.51
Argentina vs. Belgium:
Belgium wins with probability 0.57___Starting of the final___
Poland vs. Belgium:
Belgium wins with probability 0.84

2018年世界杯赔率预测 -DNN相关推荐

2018年世界杯赔率预测
参考:https://www.kaggle.com/martj42/international-football-results-from-1872-to-2017/kernels # -*- cod ...
PAT-A1011 World Cup Betting (世界杯赔率）
A1011 World Cup Betting (世界杯赔率) With the 2010 FIFA World Cup running, football fans the world over w ...
了解世界杯赔率，让您运气更‘好‘（个人分享）
足球世界杯买球赢面计算前言理论基础实际计算用例: 代码实现真实数据前言此文是个人关于世界杯的一些浅显的看法,实际统计结果和计算方法有出入,可能原因:1)数据量不够.2)比赛双方差距够大导致 ...
2018世界杯赛程PHP源码,PHP-ML机器学习预测2018俄罗斯世界杯比赛结果
前言: 根据2014年巴西世界杯的小组赛比赛结果和赔率数据简单预测2018世界杯比赛结果,比赛的赔率我们可以事先知道,所以可以使用赔率作为预测数据技术: PHP ML库贝叶斯分类器样本数据:20 ...
世界杯押注还得看技术流，这个预测AI把赔率也算上了
胡澎发自凹非寺量子位报道 | 公众号 QbitAI 世界杯小组赛将收官,你还依然信AI吗? 冷门频出,黑马击败豪强.不少AI模型始料未及. 到底还能不能愉快找到科学规律?或者说足球比赛乃至其 ...
ML之预测：玩转2018世界杯—采用机器学习预测小组赛、十六比赛、四决赛、半决赛、决赛以及世界杯总冠军的各个队伍
ML之预测:玩转2018世界杯-采用机器学习预测小组赛.十六比赛.四决赛.半决赛.决赛以及世界杯总冠军的各个队伍导读机器学习预测.玩转2018世界杯-采用机器学习预测小组赛.十六比赛. ...
预测2018年世界杯决赛_2018年5个电子商务预测
预测2018年世界杯决赛 2018 is with us already and it's important in the current competitive market that eComm ...
浅谈大数据之足球盘口赔率水位分析的思路与神准预测技巧（一）
足球运动是当今世界上开展最广.影响最大.最具魅力.拥有球迷数最多的体育项目之一,尤其是欧洲足球,每年赛事除了五大联赛(英超.西甲.德甲.法甲.意甲)之外,还会有欧冠(欧洲冠军联赛),精湛的球技,完美的 ...
再谈大数据之足球盘口赔率水位分析的思路与神准预测技巧
这两天看到CSDN上一篇同行写的有趣的话题,题目为: 浅谈大数据之足球盘口赔率水位分析的思路与神准预测技巧(一)_linwei_hello的专栏-CSDN博客因为算是同行文章,本人也做足球大数据分析 ...

2018年世界杯赔率预测 -DNN

2018年世界杯赔率预测 -DNN相关推荐

最新文章

热门文章