# -*- coding: utf-8 -*-
'''
Created on 2018年7月2日
@author: user
@summary:  Predicting the winner of the 2018 FIFA World Cup
'''
import numpy as np # linear algebra
import pandas as pd # data processing
import tensorflow as tf
from tensorflow.python.data import Dataset
from sklearn import metrics
from itertools import combinationsrankings = pd.read_csv('fifa_ranking.csv')
rankings = rankings.loc[:,['rank', 'country_full', 'country_abrv', 'cur_year_avg_weighted', 'rank_date', 'two_year_ago_weighted', 'three_year_ago_weighted']]
rankings.country_full.replace("^IR Iran*", "Iran", regex=True, inplace=True)
rankings['weighted_points'] =  rankings['cur_year_avg_weighted'] + rankings['two_year_ago_weighted'] + rankings['three_year_ago_weighted']
rankings['rank_date'] = pd.to_datetime(rankings['rank_date'])matches = pd.read_csv("results.csv")
matches =  matches.replace({'Germany DR': 'Germany', 'China': 'China PR'})
matches['date'] = pd.to_datetime(matches['date'])world_cup = pd.read_csv("World Cup 2018 Dataset.csv")
world_cup = world_cup.loc[:, ['Team', 'Group', 'First match \nagainst', 'Second match\n against', 'Third match\n against']]
world_cup = world_cup.dropna(how='all')
world_cup = world_cup.replace({"IRAN": "Iran",  "Costarica": "Costa Rica",  "Porugal": "Portugal", "Columbia": "Colombia", "Korea" : "Korea Republic"})
world_cup = world_cup.set_index('Team')# Get Complete Date wise Ranking table
rankings = rankings.set_index(['rank_date']).groupby(['country_full'],group_keys = False).resample('D').first().fillna(method='ffill').reset_index()
#Join Ranking with match
matches = matches.merge(rankings,left_on=['date', 'home_team'],right_on=['rank_date', 'country_full'])
matches = matches.merge(rankings, left_on=['date', 'away_team'],right_on=['rank_date', 'country_full'],  suffixes=('_home', '_away'))
# feature generation
matches['rank_difference'] = matches['rank_home'] - matches['rank_away']
matches['average_rank'] = (matches['rank_home'] + matches['rank_away'])/2
matches['point_difference'] = matches['weighted_points_home'] - matches['weighted_points_away']
matches['score_difference'] = matches['home_score'] - matches['away_score']
matches['is_won'] = matches['score_difference'] > 0 # take draw as lost
matches['is_stake'] = matches['tournament'] != 'Friendly'tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format
matches = matches.reindex(np.random.permutation(matches.index))
def preprocess_features(matches):selected_features = matches[["average_rank", "rank_difference", "point_difference", "is_stake"]]processed_features = selected_features.copy()return processed_featuresdef preprocess_targets(matches):output_targets = pd.DataFrame()# Scale the target to be in units of thousands of dollars.output_targets["is_won"] = matches['is_won']return output_targets# Choose the first 60% i.e 10900 (out of 18167) examples for training.
training_examples = preprocess_features(matches.head(10900))
training_targets = preprocess_targets(matches.head(10900))# Choose the last 40% i.e 7267 (out of 18167) examples for validation.
validation_examples = preprocess_features(matches.tail(7267))
validation_targets = preprocess_targets(matches.tail(7267))Complete_Data_training = preprocess_features(matches)
Complete_Data_Validation = preprocess_targets(matches)def construct_feature_columns(input_features):return set([tf.feature_column.numeric_column(my_feature) for my_feature in input_features])def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):"""Trains a neural network model.Args:features: pandas DataFrame of featurestargets: pandas DataFrame of targetsbatch_size: Size of batches to be passed to the modelshuffle: True or False. Whether to shuffle the data.num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitelyReturns:Tuple of (features, labels) for next data batch"""# Convert pandas data into a dict of np arrays.features = {key:np.array(value) for key,value in dict(features).items()}                                           # Construct a dataset, and configure batching/repeating.ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limitds = ds.batch(batch_size).repeat(num_epochs)# Shuffle the data, if specified.if shuffle:ds = ds.shuffle(10000)# Return the next batch of data.features, labels = ds.make_one_shot_iterator().get_next()return features, labelsdef train_nn_classification_model(my_optimizer,steps,batch_size,hidden_units,training_examples,training_targets,validation_examples,validation_targets):periods = 10steps_per_period = steps / periods# Create a DNNRegressor object.my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 3.0)dnn_classifier = tf.estimator.DNNClassifier(feature_columns=construct_feature_columns(training_examples),hidden_units=hidden_units,optimizer=my_optimizer)# Create input functions.training_input_fn = lambda: my_input_fn(training_examples, training_targets["is_won"], batch_size=batch_size)predict_training_input_fn = lambda: my_input_fn(training_examples, training_targets["is_won"], num_epochs=1, shuffle=False)predict_validation_input_fn = lambda: my_input_fn(validation_examples, validation_targets["is_won"], num_epochs=1, shuffle=False)# Train the model, but do so inside a loop so that we can periodically assess# loss metrics.# Train the model, but do so inside a loop so that we can periodically assess# loss metrics.print("Training model...")print("LogLoss (on training data):")training_log_losses = []validation_log_losses = []for period in range (0, periods):# Train the model, starting from the prior state.dnn_classifier.train(input_fn=training_input_fn,steps=steps_per_period)# Take a break and compute predictions.    training_probabilities = dnn_classifier.predict(input_fn=predict_training_input_fn)training_probabilities = np.array([item['probabilities'] for item in training_probabilities])validation_probabilities = dnn_classifier.predict(input_fn=predict_validation_input_fn)validation_probabilities = np.array([item['probabilities'] for item in validation_probabilities])training_log_loss = metrics.log_loss(training_targets, training_probabilities)validation_log_loss = metrics.log_loss(validation_targets, validation_probabilities)# Occasionally print the current loss.print("  period %02d : %0.2f" % (period, training_log_loss))# Add the loss metrics from this period to our list.training_log_losses.append(training_log_loss)validation_log_losses.append(validation_log_loss)print("Model training finished.")# Output a graph of loss metrics over periods.return dnn_classifierlinear_classifier = train_nn_classification_model(my_optimizer=tf.train.AdagradOptimizer(learning_rate=0.07),steps=3000,batch_size=2000,hidden_units=[5, 5,6,5],training_examples=training_examples,training_targets=training_targets,validation_examples=validation_examples,validation_targets=validation_targets)predict_validation_input_fn = lambda: my_input_fn(validation_examples, validation_targets["is_won"], num_epochs=1, shuffle=False)validation_probabilities = linear_classifier.predict(input_fn=predict_validation_input_fn)
# Get just the probabilities for the positive class.
validation_probabilities = np.array([item['probabilities'][1] for item in validation_probabilities])false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(validation_targets, validation_probabilities)
evaluation_metrics = linear_classifier.evaluate(input_fn=predict_validation_input_fn)
print("AUC on the validation set: %0.2f" % evaluation_metrics['auc'])
print("Accuracy on the validation set: %0.2f" % evaluation_metrics['accuracy'])#World Cup simulation
# let's define a small margin when we safer to predict draw then win
margin = 0.05# let's define the rankings at the time of the World Cup
world_cup_rankings = rankings.loc[(rankings['rank_date'] == rankings['rank_date'].max()) &  rankings['country_full'].isin(world_cup.index.unique())]
world_cup_rankings = world_cup_rankings.set_index(['country_full'])
opponents = ['First match \nagainst', 'Second match\n against', 'Third match\n against']world_cup['points'] = 0
world_cup['total_prob'] = 0for group in set(world_cup['Group']):print('___Starting group {}:___'.format(group))for home, away in combinations(world_cup.query('Group =="{}"'.format(group)).index, 2):print("{} vs. {}: ".format(home, away))row = pd.DataFrame(np.array([[np.nan, np.nan, np.nan, True]]), columns=validation_examples.columns)home_rank = world_cup_rankings.loc[home, 'rank']home_points = world_cup_rankings.loc[home, 'weighted_points']opp_rank = world_cup_rankings.loc[away, 'rank']opp_points = world_cup_rankings.loc[away, 'weighted_points']row['average_rank'] = (home_rank + opp_rank) / 2row['rank_difference'] = home_rank - opp_rankrow['point_difference'] = home_points - opp_pointsrow['is_won'] =np.nanpredict_validation_input_fn1 = lambda: my_input_fn(row, row["is_won"], num_epochs=1, shuffle=False)validation_probabilities1 = linear_classifier.predict(input_fn=predict_validation_input_fn1)# Get just the probabilities for the positive class.validation_probabilities1 = np.array([item['probabilities'][1] for item in validation_probabilities1])#print(validation_probabilities1[0])home_win_prob = validation_probabilities1[0]world_cup.loc[home, 'total_prob'] += home_win_probworld_cup.loc[away, 'total_prob'] += 1-home_win_probpoints = 0if home_win_prob <= 0.5 - margin:print("{} wins with {:.2f}".format(away, 1-home_win_prob))world_cup.loc[away, 'points'] += 3if home_win_prob > 0.5 - margin:points = 1if home_win_prob >= 0.5 + margin:points = 3world_cup.loc[home, 'points'] += 3print("{} wins with {:.2f}".format(home, home_win_prob))if points == 1:print("Draw")world_cup.loc[home, 'points'] += 1world_cup.loc[away, 'points'] += 1
pairing = [0,3,4,7,8,11,12,15,1,2,5,6,9,10,13,14]world_cup = world_cup.sort_values(by=['Group', 'points', 'total_prob'], ascending=False).reset_index()
next_round_wc = world_cup.groupby('Group').nth([0, 1]) # select the top 2
next_round_wc = next_round_wc.reset_index()
next_round_wc = next_round_wc.loc[pairing]
next_round_wc = next_round_wc.set_index('Team')finals = ['round_of_16', 'quarterfinal', 'semifinal', 'final']labels = list()
odds = list()for f in finals:print("___Starting of the {}___".format(f))iterations = int(len(next_round_wc) / 2)winners = []for i in range(iterations):home = next_round_wc.index[i*2]away = next_round_wc.index[i*2+1]print("{} vs. {}: ".format(home,away))row = pd.DataFrame(np.array([[np.nan, np.nan, np.nan, True]]), columns=validation_examples.columns)home_rank = world_cup_rankings.loc[home, 'rank']home_points = world_cup_rankings.loc[home, 'weighted_points']opp_rank = world_cup_rankings.loc[away, 'rank']opp_points = world_cup_rankings.loc[away, 'weighted_points']row['average_rank'] = (home_rank + opp_rank) / 2row['rank_difference'] = home_rank - opp_rankrow['point_difference'] = home_points - opp_pointsrow['is_won'] =np.nanpredict_validation_input_fn1 = lambda: my_input_fn(row, row["is_won"], num_epochs=1, shuffle=False)validation_probabilities1 = linear_classifier.predict(input_fn=predict_validation_input_fn1)# Get just the probabilities for the positive class.validation_probabilities1 = np.array([item['probabilities'][1] for item in validation_probabilities1])#print(validation_probabilities1[0])home_win_prob = validation_probabilities1[0]#home_win_prob = model.predict_proba(row)[:,1][0]if home_win_prob <= 0.5:print("{0} wins with probability {1:.2f}".format(away, 1-home_win_prob))winners.append(away)else:print("{0} wins with probability {1:.2f}".format(home, home_win_prob))winners.append(home)labels.append("{}({:.2f}) vs. {}({:.2f})".format(world_cup_rankings.loc[home, 'country_abrv'], 1/home_win_prob, world_cup_rankings.loc[away, 'country_abrv'], 1/(1-home_win_prob)))odds.append([home_win_prob, 1-home_win_prob])next_round_wc = next_round_wc.loc[winners]print("\n")
Model training finished.
AUC on the validation set: 0.74
Accuracy on the validation set: 0.67
___Starting group A:___
Russia vs. Saudi Arabia:
Draw
Russia vs. Egypt:
Egypt wins with 0.67
Russia vs. Uruguay:
Uruguay wins with 0.84
Saudi Arabia vs. Egypt:
Egypt wins with 0.66
Saudi Arabia vs. Uruguay:
Uruguay wins with 0.84
Egypt vs. Uruguay:
Uruguay wins with 0.84
___Starting group C:___
France vs. Australia:
France wins with 0.57
France vs. Peru:
Draw
France vs. Denmark:
Draw
Australia vs. Peru:
Peru wins with 0.84
Australia vs. Denmark:
Denmark wins with 0.84
Peru vs. Denmark:
Draw
___Starting group B:___
Portugal vs. Spain:
Draw
Portugal vs. Morocco:
Portugal wins with 0.62
Portugal vs. Iran:
Portugal wins with 0.62
Spain vs. Morocco:
Spain wins with 0.60
Spain vs. Iran:
Spain wins with 0.60
Morocco vs. Iran:
Draw
___Starting group E:___
Brazil vs. Switzerland:
Draw
Brazil vs. Costa Rica:
Draw
Brazil vs. Serbia:
Brazil wins with 0.59
Switzerland vs. Costa Rica:
Draw
Switzerland vs. Serbia:
Switzerland wins with 0.57
Costa Rica vs. Serbia:
Draw
___Starting group D:___
Argentina vs. Iceland:
Draw
Argentina vs. Croatia:
Draw
Argentina vs. Nigeria:
Argentina wins with 0.64
Iceland vs. Croatia:
Draw
Iceland vs. Nigeria:
Iceland wins with 0.60
Croatia vs. Nigeria:
Croatia wins with 0.60
___Starting group G:___
Belgium vs. Panama:
Belgium wins with 0.68
Belgium vs. Tunisia:
Draw
Belgium vs. England:
Draw
Panama vs. Tunisia:
Tunisia wins with 0.84
Panama vs. England:
England wins with 0.84
Tunisia vs. England:
England wins with 0.61
___Starting group F:___
Germany vs. Mexico:
Germany wins with 0.56
Germany vs. Sweden:
Germany wins with 0.59
Germany vs. Korea Republic:
Germany wins with 0.73
Mexico vs. Sweden:
Draw
Mexico vs. Korea Republic:
Mexico wins with 0.65
Sweden vs. Korea Republic:
Sweden wins with 0.64
___Starting group H:___
Poland vs. Senegal:
Draw
Poland vs. Colombia:
Draw
Poland vs. Japan:
Poland wins with 0.66
Senegal vs. Colombia:
Colombia wins with 0.55
Senegal vs. Japan:
Senegal wins with 0.63
Colombia vs. Japan:
Colombia wins with 0.65
___Starting of the round_of_16___
Uruguay vs. Spain:
Spain wins with probability 0.54
Denmark vs. Croatia:
Denmark wins with probability 0.55
Switzerland vs. Mexico:
Mexico wins with probability 0.51
England vs. Poland:
Poland wins with probability 0.53
Egypt vs. Portugal:
Portugal wins with probability 0.84
Peru vs. Argentina:
Argentina wins with probability 0.56
Brazil vs. Germany:
Germany wins with probability 0.84
Belgium vs. Colombia:
Belgium wins with probability 0.54___Starting of the quarterfinal___
Spain vs. Denmark:
Denmark wins with probability 0.52
Mexico vs. Poland:
Poland wins with probability 0.59
Portugal vs. Argentina:
Argentina wins with probability 0.53
Germany vs. Belgium:
Belgium wins with probability 0.52___Starting of the semifinal___
Denmark vs. Poland:
Poland wins with probability 0.51
Argentina vs. Belgium:
Belgium wins with probability 0.57___Starting of the final___
Poland vs. Belgium:
Belgium wins with probability 0.84

2018年世界杯赔率预测 -DNN相关推荐

  1. 2018年世界杯赔率预测

    参考:https://www.kaggle.com/martj42/international-football-results-from-1872-to-2017/kernels # -*- cod ...

  2. PAT-A1011 World Cup Betting (世界杯赔率)

    A1011 World Cup Betting (世界杯赔率) With the 2010 FIFA World Cup running, football fans the world over w ...

  3. 了解世界杯赔率,让您运气更‘好‘(个人分享)

    足球世界杯买球赢面计算 前言 理论基础 实际计算用例: 代码实现 真实数据 前言 此文是个人关于世界杯的一些浅显的看法,实际统计结果和计算方法有出入,可能原因:1)数据量不够.2)比赛双方差距够大导致 ...

  4. 2018世界杯赛程PHP源码,PHP-ML机器学习预测2018俄罗斯世界杯比赛结果

    前言: 根据2014年巴西世界杯的小组赛比赛结果和赔率数据简单预测2018世界杯比赛结果,比赛的赔率我们可以事先知道,所以可以使用赔率作为预测数据 技术: PHP ML库 贝叶斯分类器 样本数据:20 ...

  5. 世界杯押注还得看技术流,这个预测AI把赔率也算上了

    胡澎 发自 凹非寺  量子位 报道 | 公众号 QbitAI 世界杯小组赛将收官,你还依然信AI吗? 冷门频出,黑马击败豪强.不少AI模型始料未及. 到底还能不能愉快找到科学规律?或者说足球比赛乃至其 ...

  6. ML之预测:玩转2018世界杯—采用机器学习预测小组赛、十六比赛、四决赛、半决赛、决赛以及世界杯总冠军的各个队伍

    ML之预测:玩转2018世界杯-采用机器学习预测小组赛.十六比赛.四决赛.半决赛.决赛以及世界杯总冠军的各个队伍 导读       机器学习预测.玩转2018世界杯-采用机器学习预测小组赛.十六比赛. ...

  7. 预测2018年世界杯决赛_2018年5个电子商务预测

    预测2018年世界杯决赛 2018 is with us already and it's important in the current competitive market that eComm ...

  8. 浅谈大数据之足球盘口赔率水位分析的思路与神准预测技巧(一)

    足球运动是当今世界上开展最广.影响最大.最具魅力.拥有球迷数最多的体育项目之一,尤其是欧洲足球,每年赛事除了五大联赛(英超.西甲.德甲.法甲.意甲)之外,还会有欧冠(欧洲冠军联赛),精湛的球技,完美的 ...

  9. 再谈大数据之足球盘口赔率水位分析的思路与神准预测技巧

    这两天看到CSDN上一篇同行写的有趣的话题,题目为: 浅谈大数据之足球盘口赔率水位分析的思路与神准预测技巧(一)_linwei_hello的专栏-CSDN博客 因为算是同行文章,本人也做足球大数据分析 ...

最新文章

  1. 长见识!居然还有程序员考公指南这种东西?
  2. 自动换行的draw2d标签
  3. unity下载文件三(http异步下载)
  4. Java异常(一) Java异常简介及其架构
  5. 设计模式--单例(Singleton)模式
  6. VTK:PolyData之DownsamplePointCloud
  7. 一张图,看懂阿里云12年的“飞天日记”
  8. vivado路径最大时钟约束_Vivado使用误区与进阶系列(五)XDC约束技巧之I/O篇(下)...
  9. Java设计模式-设计模式概述
  10. 动画:一招学会TCP的三次握手和四次挥手
  11. 对话CDN巨头Akamai:携手金山云,意欲何为?
  12. 软件测试订单测试用例,测试用例 - 进销存软件测试.doc
  13. linux命令和常见的状态码
  14. python基金筛选_【量化投资工具】抓取沪深股市所有指数关联的公募基金列表(含ETF、增强、分级等)...
  15. matlab for循环与subs应用 求解
  16. 魔兽地图编辑器 简单介绍
  17. 高通平台提高核电电压
  18. Linux系统 ELK(8.3.1)单机环境搭建
  19. android edittext 美化,android EditText的美化
  20. JAVA API系统变量名一些缩写

热门文章

  1. 二次元博客php,Mokore: Wordpress二次元简约个人博客主题by江程训
  2. 在visi_Visi如何使用Weave和Docker
  3. 解密:智能化变电站中PTP时钟同步(北斗时钟服务器)
  4. 浅谈:智能化变电站在线监测系统
  5. 【阅读笔记】联邦学习实战——联邦学习在智能物联网中的应用案例
  6. 2020年中国智能物联网(AIoT)白皮书
  7. 网络舆情信息查找网站与怎么查的方法详解
  8. 视频教程-卷积神经网络CNN-深度学习
  9. 如何复制360个人图书馆中的文章
  10. 判断iOS6/iOS7, 3.5inch/4.0inch