使用文本卷积神经网络,并使用MovieLens数据集完成电影推荐的任务

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter
import tensorflow as tfimport os
import pickle
import re
from tensorflow.python.ops import math_ops

载入数据

users_title = ['UserID', 'Gender', 'Age', 'OccupationID', 'Zip-code']
users = pd.read_table('./ml-1m/users.dat', sep='::', header=None, names=users_title, engine = 'python')
#users.head()movies_title = ['MovieID', 'Title', 'Genres']
movies = pd.read_table('./ml-1m/movies.dat', sep='::', header=None, names=movies_title, engine = 'python')
#movies.head()ratings_title = ['UserID','MovieID', 'Rating', 'timestamps']
ratings = pd.read_table('./ml-1m/ratings.dat', sep='::', header=None, names=ratings_title, engine = 'python')
#ratings.head()

实现数据预处理

def load_data():"""Load Dataset from File"""#读取User数据users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code']users = pd.read_table('./ml-1m/users.dat', sep='::', header=None, names=users_title, engine = 'python')users = users.filter(regex='UserID|Gender|Age|JobID')users_orig = users.values#改变User数据中性别和年龄gender_map = {'F':0, 'M':1}users['Gender'] = users['Gender'].map(gender_map)age_map = {val:ii for ii,val in enumerate(set(users['Age']))}users['Age'] = users['Age'].map(age_map)#读取Movie数据集movies_title = ['MovieID', 'Title', 'Genres']movies = pd.read_table('./ml-1m/movies.dat', sep='::', header=None, names=movies_title, engine = 'python')movies_orig = movies.values#将Title中的年份去掉pattern = re.compile(r'^(.*)\((\d+)\)$')title_map = {val:pattern.match(val).group(1) for ii,val in enumerate(set(movies['Title']))}movies['Title'] = movies['Title'].map(title_map)#电影类型转数字字典genres_set = set()for val in movies['Genres'].str.split('|'):genres_set.update(val)genres_set.add('<PAD>')genres2int = {val:ii for ii, val in enumerate(genres_set)}#将电影类型转成等长数字列表,长度是18genres_map = {val:[genres2int[row] for row in val.split('|')] for ii,val in enumerate(set(movies['Genres']))}for key in genres_map:for cnt in range(max(genres2int.values()) - len(genres_map[key])):genres_map[key].insert(len(genres_map[key]) + cnt,genres2int['<PAD>'])movies['Genres'] = movies['Genres'].map(genres_map)#电影Title转数字字典title_set = set()for val in movies['Title'].str.split():title_set.update(val)title_set.add('<PAD>')title2int = {val:ii for ii, val in enumerate(title_set)}#将电影Title转成等长数字列表,长度是15title_count = 15title_map = {val:[title2int[row] for row in val.split()] for ii,val in enumerate(set(movies['Title']))}for key in title_map:for cnt in range(title_count - len(title_map[key])):title_map[key].insert(len(title_map[key]) + cnt,title2int['<PAD>'])movies['Title'] = movies['Title'].map(title_map)#读取评分数据集ratings_title = ['UserID','MovieID', 'ratings', 'timestamps']ratings = pd.read_table('./ml-1m/ratings.dat', sep='::', header=None, names=ratings_title, engine = 'python')ratings = ratings.filter(regex='UserID|MovieID|ratings')#合并三个表data = pd.merge(pd.merge(ratings, users), movies)#将数据分成X和y两张表target_fields = ['ratings']features_pd, targets_pd = data.drop(target_fields, axis=1), data[target_fields]features = features_pd.valuestargets_values = targets_pd.valuesreturn title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig
'''
加载数据并保存到本地
title_count:Title字段的长度(15)
title_set:Title文本的集合
genres2int:电影类型转数字的字典
features:是输入X
targets_values:是学习目标y
ratings:评分数据集的Pandas对象
users:用户数据集的Pandas对象
movies:电影数据的Pandas对象
data:三个数据集组合在一起的Pandas对象
movies_orig:没有做数据处理的原始电影数据
users_orig:没有做数据处理的原始用户数据
'''#title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = load_data()#pickle.dump((title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig), open('preprocess.p', 'wb'))

从本地读取预处理后的数据

title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = pickle.load(open('preprocess.p', mode='rb'))

模型设计

#辅助函数
def save_params(params):"""Save parameters to file"""pickle.dump(params, open('params.p', 'wb'))def load_params():"""Load parameters from file"""return pickle.load(open('params.p', mode='rb'))#编程实现
#嵌入矩阵的维度
embed_dim = 32
#用户ID个数
uid_max = max(features.take(0,1)) + 1 # 6040
#性别个数
gender_max = max(features.take(2,1)) + 1 # 1 + 1 = 2
#年龄类别个数
age_max = max(features.take(3,1)) + 1 # 6 + 1 = 7
#职业个数
job_max = max(features.take(4,1)) + 1# 20 + 1 = 21#电影ID个数
movie_id_max = max(features.take(1,1)) + 1 # 3952
#电影类型个数
movie_categories_max = max(genres2int.values()) + 1 # 18 + 1 = 19
#电影名单词个数
movie_title_max = len(title_set) # 5216#对电影类型嵌入向量做加和操作的标志,考虑过使用mean做平均,但是没实现mean
combiner = "sum"#电影名长度
sentences_size = title_count # = 15
#文本卷积滑动窗口,分别滑动2, 3, 4, 5个单词
window_sizes = {2, 3, 4, 5}
#文本卷积核数量
filter_num = 8#电影ID转下标的字典,数据集中电影ID跟下标不一致,比如第5行的数据电影ID不一定是5
movieid2idx = {val[0]:i for i, val in enumerate(movies.values)}#超参
# Number of Epochs
num_epochs = 5
# Batch Size
batch_size = 256dropout_keep = 0.5
# Learning Rate
learning_rate = 0.0001
# Show stats for every n number of batches
show_every_n_batches = 20save_dir = './save'#输入
#定义输入的占位符
def get_inputs():uid = tf.placeholder(tf.int32, [None, 1], name="uid")user_gender = tf.placeholder(tf.int32, [None, 1], name="user_gender")user_age = tf.placeholder(tf.int32, [None, 1], name="user_age")user_job = tf.placeholder(tf.int32, [None, 1], name="user_job")movie_id = tf.placeholder(tf.int32, [None, 1], name="movie_id")movie_categories = tf.placeholder(tf.int32, [None, 18], name="movie_categories")movie_titles = tf.placeholder(tf.int32, [None, 15], name="movie_titles")targets = tf.placeholder(tf.int32, [None, 1], name="targets")LearningRate = tf.placeholder(tf.float32, name = "LearningRate")dropout_keep_prob = tf.placeholder(tf.float32, name = "dropout_keep_prob")return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, LearningRate, dropout_keep_prob

构建神经网络#

#定义User的嵌入矩阵!
def get_user_embedding(uid, user_gender, user_age, user_job):with tf.name_scope("user_embedding"):uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, embed_dim], -1, 1), name = "uid_embed_matrix")uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name = "uid_embed_layer")gender_embed_matrix = tf.Variable(tf.random_uniform([gender_max, embed_dim // 2], -1, 1), name= "gender_embed_matrix")gender_embed_layer = tf.nn.embedding_lookup(gender_embed_matrix, user_gender, name = "gender_embed_layer")age_embed_matrix = tf.Variable(tf.random_uniform([age_max, embed_dim // 2], -1, 1), name="age_embed_matrix")age_embed_layer = tf.nn.embedding_lookup(age_embed_matrix, user_age, name="age_embed_layer")job_embed_matrix = tf.Variable(tf.random_uniform([job_max, embed_dim // 2], -1, 1), name = "job_embed_matrix")job_embed_layer = tf.nn.embedding_lookup(job_embed_matrix, user_job, name = "job_embed_layer")return uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer
#将User的嵌入矩阵一起全连接生成User的特征
def get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer):with tf.name_scope("user_fc"):#第一层全连接uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name = "uid_fc_layer", activation=tf.nn.relu)gender_fc_layer = tf.layers.dense(gender_embed_layer, embed_dim, name = "gender_fc_layer", activation=tf.nn.relu)age_fc_layer = tf.layers.dense(age_embed_layer, embed_dim, name ="age_fc_layer", activation=tf.nn.relu)job_fc_layer = tf.layers.dense(job_embed_layer, embed_dim, name = "job_fc_layer", activation=tf.nn.relu)#第二层全连接user_combine_layer = tf.concat([uid_fc_layer, gender_fc_layer, age_fc_layer, job_fc_layer], 2)  #(?, 1, 128)user_combine_layer = tf.contrib.layers.fully_connected(user_combine_layer, 200, tf.tanh)  #(?, 1, 200)user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200])return user_combine_layer, user_combine_layer_flat#定义Movie ID的嵌入矩阵!
def get_movie_id_embed_layer(movie_id):with tf.name_scope("movie_embedding"):movie_id_embed_matrix = tf.Variable(tf.random_uniform([movie_id_max, embed_dim], -1, 1), name = "movie_id_embed_matrix")movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name = "movie_id_embed_layer")return movie_id_embed_layer#对电影类型的多个嵌入向量做加和
def get_movie_categories_layers(movie_categories):with tf.name_scope("movie_categories_layers"):movie_categories_embed_matrix = tf.Variable(tf.random_uniform([movie_categories_max, embed_dim], -1, 1), name = "movie_categories_embed_matrix")movie_categories_embed_layer = tf.nn.embedding_lookup(movie_categories_embed_matrix, movie_categories, name = "movie_categories_embed_layer")if combiner == "sum":movie_categories_embed_layer = tf.reduce_sum(movie_categories_embed_layer, axis=1, keep_dims=True)#     elif combiner == "mean":return movie_categories_embed_layer
#Movie Title的文本卷积网络实现
def get_movie_cnn_layer(movie_titles):#从嵌入矩阵中得到电影名对应的各个单词的嵌入向量with tf.name_scope("movie_embedding"):movie_title_embed_matrix = tf.Variable(tf.random_uniform([movie_title_max, embed_dim], -1, 1), name = "movie_title_embed_matrix")movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_titles, name = "movie_title_embed_layer")movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1)#对文本嵌入层使用不同尺寸的卷积核做卷积和最大池化pool_layer_lst = []for window_size in window_sizes:with tf.name_scope("movie_txt_conv_maxpool_{}".format(window_size)):filter_weights = tf.Variable(tf.truncated_normal([window_size, embed_dim, 1, filter_num],stddev=0.1),name = "filter_weights")filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1,1,1,1], padding="VALID", name="conv_layer")relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer,filter_bias), name ="relu_layer")maxpool_layer = tf.nn.max_pool(relu_layer, [1,sentences_size - window_size + 1 ,1,1], [1,1,1,1], padding="VALID", name="maxpool_layer")pool_layer_lst.append(maxpool_layer)#Dropout层with tf.name_scope("pool_dropout"):pool_layer = tf.concat(pool_layer_lst, 3, name ="pool_layer")max_num = len(window_sizes) * filter_numpool_layer_flat = tf.reshape(pool_layer , [-1, 1, max_num], name = "pool_layer_flat")dropout_layer = tf.nn.dropout(pool_layer_flat, dropout_keep_prob, name = "dropout_layer")return pool_layer_flat, dropout_layer
#将Movie的各个层一起做全连接
def get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer):with tf.name_scope("movie_fc"):#第一层全连接movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name = "movie_id_fc_layer", activation=tf.nn.relu)movie_categories_fc_layer = tf.layers.dense(movie_categories_embed_layer, embed_dim, name = "movie_categories_fc_layer", activation=tf.nn.relu)#第二层全连接movie_combine_layer = tf.concat([movie_id_fc_layer, movie_categories_fc_layer, dropout_layer], 2)  #(?, 1, 96)movie_combine_layer = tf.contrib.layers.fully_connected(movie_combine_layer, 200, tf.tanh)  #(?, 1, 200)movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200])return movie_combine_layer, movie_combine_layer_flat

构建计算图

tf.reset_default_graph()
train_graph = tf.Graph()
with train_graph.as_default():#获取输入占位符uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob = get_inputs()#获取User的4个嵌入向量uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer = get_user_embedding(uid, user_gender, user_age, user_job)#得到用户特征user_combine_layer, user_combine_layer_flat = get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer)#获取电影ID的嵌入向量movie_id_embed_layer = get_movie_id_embed_layer(movie_id)#获取电影类型的嵌入向量movie_categories_embed_layer = get_movie_categories_layers(movie_categories)#获取电影名的特征向量pool_layer_flat, dropout_layer = get_movie_cnn_layer(movie_titles)#得到电影特征movie_combine_layer, movie_combine_layer_flat = get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer)#计算出评分,要注意两个不同的方案,inference的名字(name值)是不一样的,后面做推荐时要根据name取得tensorwith tf.name_scope("inference"):#将用户特征和电影特征作为输入,经过全连接,输出一个值的方案
#         inference_layer = tf.concat([user_combine_layer_flat, movie_combine_layer_flat], 1)  #(?, 200)
#         inference = tf.layers.dense(inference_layer, 1,
#                                     kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
#                                     kernel_regularizer=tf.nn.l2_loss, name="inference")#简单的将用户特征和电影特征做矩阵乘法得到一个预测评分
#        inference = tf.matmul(user_combine_layer_flat, tf.transpose(movie_combine_layer_flat))inference = tf.reduce_sum(user_combine_layer_flat * movie_combine_layer_flat, axis=1)inference = tf.expand_dims(inference, axis=1)with tf.name_scope("loss"):# MSE损失,将计算值回归到评分cost = tf.losses.mean_squared_error(targets, inference )loss = tf.reduce_mean(cost)# 优化损失
#     train_op = tf.train.AdamOptimizer(lr).minimize(loss)  #costglobal_step = tf.Variable(0, name="global_step", trainable=False)optimizer = tf.train.AdamOptimizer(lr)gradients = optimizer.compute_gradients(loss)  #costtrain_op = optimizer.apply_gradients(gradients, global_step=global_step)

取得batch

def get_batches(Xs, ys, batch_size):for start in range(0, len(Xs), batch_size):end = min(start + batch_size, len(Xs))yield Xs[start:end], ys[start:end]

训练网络

#%matplotlib inline
#%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import time
import datetimelosses = {'train':[], 'test':[]}with tf.Session(graph=train_graph) as sess:#搜集数据给tensorBoard用# Keep track of gradient values and sparsitygrad_summaries = []for g, v in gradients:if g is not None:grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name.replace(':', '_')), g)sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name.replace(':', '_')), tf.nn.zero_fraction(g))grad_summaries.append(grad_hist_summary)grad_summaries.append(sparsity_summary)grad_summaries_merged = tf.summary.merge(grad_summaries)# Output directory for models and summariestimestamp = str(int(time.time()))out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))print("Writing to {}\n".format(out_dir))# Summaries for loss and accuracyloss_summary = tf.summary.scalar("loss", loss)# Train Summariestrain_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged])train_summary_dir = os.path.join(out_dir, "summaries", "train")train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)# Inference summariesinference_summary_op = tf.summary.merge([loss_summary])inference_summary_dir = os.path.join(out_dir, "summaries", "inference")inference_summary_writer = tf.summary.FileWriter(inference_summary_dir, sess.graph)sess.run(tf.global_variables_initializer())saver = tf.train.Saver()for epoch_i in range(num_epochs):#将数据集分成训练集和测试集,随机种子不固定train_X,test_X, train_y, test_y = train_test_split(features,  targets_values,  test_size = 0.2,  random_state = 0)  train_batches = get_batches(train_X, train_y, batch_size)test_batches = get_batches(test_X, test_y, batch_size)#训练的迭代,保存训练损失for batch_i in range(len(train_X) // batch_size):x, y = next(train_batches)categories = np.zeros([batch_size, 18])for i in range(batch_size):categories[i] = x.take(6,1)[i]titles = np.zeros([batch_size, sentences_size])for i in range(batch_size):titles[i] = x.take(5,1)[i]feed = {uid: np.reshape(x.take(0,1), [batch_size, 1]),user_gender: np.reshape(x.take(2,1), [batch_size, 1]),user_age: np.reshape(x.take(3,1), [batch_size, 1]),user_job: np.reshape(x.take(4,1), [batch_size, 1]),movie_id: np.reshape(x.take(1,1), [batch_size, 1]),movie_categories: categories,  #x.take(6,1)movie_titles: titles,  #x.take(5,1)targets: np.reshape(y, [batch_size, 1]),dropout_keep_prob: dropout_keep, #dropout_keeplr: learning_rate}step, train_loss, summaries, _ = sess.run([global_step, loss, train_summary_op, train_op], feed)  #costlosses['train'].append(train_loss)train_summary_writer.add_summary(summaries, step)  ## Show every <show_every_n_batches> batchesif (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0:time_str = datetime.datetime.now().isoformat()print('{}: Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(time_str,epoch_i,batch_i,(len(train_X) // batch_size),train_loss))#使用测试数据的迭代for batch_i  in range(len(test_X) // batch_size):x, y = next(test_batches)categories = np.zeros([batch_size, 18])for i in range(batch_size):categories[i] = x.take(6,1)[i]titles = np.zeros([batch_size, sentences_size])for i in range(batch_size):titles[i] = x.take(5,1)[i]feed = {uid: np.reshape(x.take(0,1), [batch_size, 1]),user_gender: np.reshape(x.take(2,1), [batch_size, 1]),user_age: np.reshape(x.take(3,1), [batch_size, 1]),user_job: np.reshape(x.take(4,1), [batch_size, 1]),movie_id: np.reshape(x.take(1,1), [batch_size, 1]),movie_categories: categories,  #x.take(6,1)movie_titles: titles,  #x.take(5,1)targets: np.reshape(y, [batch_size, 1]),dropout_keep_prob: 1,lr: learning_rate}step, test_loss, summaries = sess.run([global_step, loss, inference_summary_op], feed)  #cost#保存测试损失losses['test'].append(test_loss)inference_summary_writer.add_summary(summaries, step)  #time_str = datetime.datetime.now().isoformat()if (epoch_i * (len(test_X) // batch_size) + batch_i) % show_every_n_batches == 0:print('{}: Epoch {:>3} Batch {:>4}/{}   test_loss = {:.3f}'.format(time_str,epoch_i,batch_i,(len(test_X) // batch_size),test_loss))# Save Modelsaver.save(sess, save_dir)  #, global_step=epoch_iprint('Model Trained and Saved')

保存参数

save_params((save_dir))load_dir = load_params()
plt.plot(losses['train'], label='Training loss')
plt.legend()
_ = plt.ylim()plt.plot(losses['test'], label='Test loss')
plt.legend()
_ = plt.ylim()def get_tensors(loaded_graph):uid = loaded_graph.get_tensor_by_name("uid:0")user_gender = loaded_graph.get_tensor_by_name("user_gender:0")user_age = loaded_graph.get_tensor_by_name("user_age:0")user_job = loaded_graph.get_tensor_by_name("user_job:0")movie_id = loaded_graph.get_tensor_by_name("movie_id:0")movie_categories = loaded_graph.get_tensor_by_name("movie_categories:0")movie_titles = loaded_graph.get_tensor_by_name("movie_titles:0")targets = loaded_graph.get_tensor_by_name("targets:0")dropout_keep_prob = loaded_graph.get_tensor_by_name("dropout_keep_prob:0")lr = loaded_graph.get_tensor_by_name("LearningRate:0")#两种不同计算预测评分的方案使用不同的name获取tensor inference
#     inference = loaded_graph.get_tensor_by_name("inference/inference/BiasAdd:0")inference = loaded_graph.get_tensor_by_name("inference/ExpandDims:0") # 之前是MatMul:0 因为inference代码修改了 这里也要修改 感谢网友 @清歌 指出问题movie_combine_layer_flat = loaded_graph.get_tensor_by_name("movie_fc/Reshape:0")user_combine_layer_flat = loaded_graph.get_tensor_by_name("user_fc/Reshape:0")return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference, movie_combine_layer_flat, user_combine_layer_flatdef rating_movie(user_id_val, movie_id_val):loaded_graph = tf.Graph()  #with tf.Session(graph=loaded_graph) as sess:  ## Load saved modelloader = tf.train.import_meta_graph(load_dir + '.meta')loader.restore(sess, load_dir)# Get Tensors from loaded modeluid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference,_, __ = get_tensors(loaded_graph)  #loaded_graphcategories = np.zeros([1, 18])categories[0] = movies.values[movieid2idx[movie_id_val]][2]titles = np.zeros([1, sentences_size])titles[0] = movies.values[movieid2idx[movie_id_val]][1]feed = {uid: np.reshape(users.values[user_id_val-1][0], [1, 1]),user_gender: np.reshape(users.values[user_id_val-1][1], [1, 1]),user_age: np.reshape(users.values[user_id_val-1][2], [1, 1]),user_job: np.reshape(users.values[user_id_val-1][3], [1, 1]),movie_id: np.reshape(movies.values[movieid2idx[movie_id_val]][0], [1, 1]),movie_categories: categories,  #x.take(6,1)movie_titles: titles,  #x.take(5,1)dropout_keep_prob: 1}# Get Predictioninference_val = sess.run([inference], feed)  return (inference_val)
rating_movie(234, 1401)
#output : [array([[3.0878916]], dtype=float32)]

开始推荐电影

使用生产的用户特征矩阵和电影特征矩阵做电影推荐

推荐同类型的电影

思路是计算当前看的电影特征向量与整个电影特征矩阵的余弦相似度,取相似度最大的top_k个,这里加了些随机选择在里面,保证每次的推荐稍稍有些不同。

def recommend_same_type_movie(movie_id_val, top_k = 20):loaded_graph = tf.Graph()  #with tf.Session(graph=loaded_graph) as sess:  ## Load saved modelloader = tf.train.import_meta_graph(load_dir + '.meta')loader.restore(sess, load_dir)norm_movie_matrics = tf.sqrt(tf.reduce_sum(tf.square(movie_matrics), 1, keep_dims=True))normalized_movie_matrics = movie_matrics / norm_movie_matrics#推荐同类型的电影probs_embeddings = (movie_matrics[movieid2idx[movie_id_val]]).reshape([1, 200])probs_similarity = tf.matmul(probs_embeddings, tf.transpose(normalized_movie_matrics))sim = (probs_similarity.eval())#     results = (-sim[0]).argsort()[0:top_k]#     print(results)print("您看的电影是:{}".format(movies_orig[movieid2idx[movie_id_val]]))print("以下是给您的推荐:")p = np.squeeze(sim)p[np.argsort(p)[:-top_k]] = 0p = p / np.sum(p)results = set()while len(results) != 5:c = np.random.choice(3883, 1, p=p)[0]results.add(c)for val in (results):print(val)print(movies_orig[val])return results
recommend_same_type_movie(1401, 20)

推荐您喜欢的电影

思路是使用用户特征向量与电影特征矩阵计算所有电影的评分,取评分最高的top_k个,同样加了些随机选择部分。

def recommend_your_favorite_movie(user_id_val, top_k = 10):loaded_graph = tf.Graph()  #with tf.Session(graph=loaded_graph) as sess:  ## Load saved modelloader = tf.train.import_meta_graph(load_dir + '.meta')loader.restore(sess, load_dir)#推荐您喜欢的电影probs_embeddings = (users_matrics[user_id_val-1]).reshape([1, 200])probs_similarity = tf.matmul(probs_embeddings, tf.transpose(movie_matrics))sim = (probs_similarity.eval())#     print(sim.shape)#     results = (-sim[0]).argsort()[0:top_k]#     print(results)#     sim_norm = probs_norm_similarity.eval()#     print((-sim_norm[0]).argsort()[0:top_k])print("以下是给您的推荐:")p = np.squeeze(sim)p[np.argsort(p)[:-top_k]] = 0p = p / np.sum(p)results = set()while len(results) != 5:c = np.random.choice(3883, 1, p=p)[0]results.add(c)for val in (results):print(val)print(movies_orig[val])return results
recommend_your_favorite_movie(234, 10)

看过这个电影的人还看了(喜欢)哪些电影

  • 首先选出喜欢某个电影的top_k个人,得到这几个人的用户特征向量。
  • 然后计算这几个人对所有电影的评分
  • 选择每个人评分最高的电影作为推荐
  • 同样加入了随机选择
import randomdef recommend_other_favorite_movie(movie_id_val, top_k = 20):loaded_graph = tf.Graph()  #with tf.Session(graph=loaded_graph) as sess:  ## Load saved modelloader = tf.train.import_meta_graph(load_dir + '.meta')loader.restore(sess, load_dir)probs_movie_embeddings = (movie_matrics[movieid2idx[movie_id_val]]).reshape([1, 200])probs_user_favorite_similarity = tf.matmul(probs_movie_embeddings, tf.transpose(users_matrics))favorite_user_id = np.argsort(probs_user_favorite_similarity.eval())[0][-top_k:]#     print(normalized_users_matrics.eval().shape)#     print(probs_user_favorite_similarity.eval()[0][favorite_user_id])#     print(favorite_user_id.shape)print("您看的电影是:{}".format(movies_orig[movieid2idx[movie_id_val]]))print("喜欢看这个电影的人是:{}".format(users_orig[favorite_user_id-1]))probs_users_embeddings = (users_matrics[favorite_user_id-1]).reshape([-1, 200])probs_similarity = tf.matmul(probs_users_embeddings, tf.transpose(movie_matrics))sim = (probs_similarity.eval())#     results = (-sim[0]).argsort()[0:top_k]#     print(results)#     print(sim.shape)#     print(np.argmax(sim, 1))p = np.argmax(sim, 1)print("喜欢看这个电影的人还喜欢看:")results = set()while len(results) != 5:c = p[random.randrange(top_k)]results.add(c)for val in (results):print(val)print(movies_orig[val])return results
recommend_other_favorite_movie(1401, 20)

结论

以上就是实现的常用的推荐功能,将网络模型作为回归问题进行训练,得到训练好的用户特征矩阵和电影特征矩阵进行推荐。

使用文本卷积神经网络,并使用MovieLens数据集完成电影推荐的任务相关推荐

  1. python亲和性分析法推荐电影论文_数据挖掘-MovieLens数据集_电影推荐_亲和性分析_Aprioro算法...

    #!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Feb  7 14:38:33 201 ...

  2. “泰迪杯” 挑战赛 - 利用协同过滤与卷积神经网络为电视产品制订智能化营销推荐

    目录 问题重述 模型假设 数据预处理 3.1 数据清洗 3.1.1 数据缺失的分析和处理 3.1.2 数据异常的分析和处理 3.2 数据规约 数据分析 4.1 电视产品体系图 4.2 收视用户体系图 ...

  3. 独家 | 一文带你上手卷积神经网络实战(附数据集学习资料)

    原文标题:Understanding deep Convolutional Neural Networks with a practical use-case in Tensorflow and Ke ...

  4. 深度的卷积神经网络CNN(MNIST数据集示例)

    前面篇幅介绍的全连接层和简单卷积神经网络,层数都比较少,现在来加多几层,对于加深了的深度神经网络,其实就是我们常说的深度学习,类似搭积木,只需要叠加层即可.现在来构建一个深度的CNN,这里使用的卷积层 ...

  5. 【TensorFlow实战笔记】卷积神经网络CNN实战-cifar10数据集(tensorboard可视化)

    IDE:pycharm Python: Python3.6 OS: win10 tf : CPU版本 代码可在github中下载,欢迎star,谢谢 CNN-CIFAR-10 一.CIFAR10数据集 ...

  6. CNN卷积神经网络入门-FASHION MNIST数据集

    建议先看: 卷积与卷积神经网络_哔哩哔哩_bilibili 转载来源:[子豪兄Pytorch]二十分钟搭建神经网络分类Fashion-MNIST数据集时尚物品_哔哩哔哩_bilibili up主的这个 ...

  7. movielens数据集导入mysql_GitHub - Colaplusice/movielens_recommend: 基于movielens数据集的电影推荐系统...

    毕业设计--基于Django的电影推荐系统和论坛 说明 注册普通用户通过web界面来设置,创建创建用户通过creeatsuperuser创建.下文有详细命令 导入电影信息通过insert_movies ...

  8. 深度学习入门——利用卷积神经网络训练CIFAR—10数据集

    CIFAR-10数据集简介 CIFAR-10是由Hinton的学生Alex Krizhevsky和Ilya Sutskever整理的一个用于普适物体的小型数据集.它一共包含10个类别的RGB彩色图片: ...

  9. 深度学习之基于卷积神经网络实现超大Mnist数据集识别

    在以往的手写数字识别中,数据集一共是70000张图片,模型准确率可以达到99%以上的准确率.而本次实验的手写数字数据集中有120000张图片,而且数据集的预处理方式也是之前没有遇到过的.最终在验证集上 ...

最新文章

  1. kcf 脊回归 范数
  2. Linux学习总结(十七)-shell 基础知识
  3. 记录 之 cat 和 awk gsub 的使用
  4. Socket代码实现服务端 和 客户端之间通信
  5. SpringBoot热部署加持
  6. oracle 查看用户、权限、角色
  7. Unterminated lt;c:forEach tag
  8. 程序员离职后12天,被前领导命令回去讲清代码?小伙直接说收费
  9. js中的关键子in的使用方法
  10. ActiveMQ从入门到精通(二)
  11. LED显示驱动(六):LED显示设备显示单层图片调试(DE驱动测试)
  12. scala Set入门到熟悉
  13. Latex:插入数学公式
  14. 图表控件ScottPlot
  15. 四种XML解析方式详解
  16. 齐齐哈尔计算机应用软件学校,齐齐哈尔职业学院计算机应用技术专业介绍
  17. xmlhttp.readyState的值及解释
  18. 用python制作音乐_Python3使用PySynth制作音乐的方法
  19. 关于出版物经营许可证
  20. 第8天-信息收集——站点搭建,WAF等

热门文章

  1. [乐意黎]Centos里MySQL启动后无故停止,并抛 Warning: PDO::query(): MySQL server has gone away
  2. 一个简单的CA/TA应用
  3. python——集合练习题(华为笔试题)
  4. star 序列比对2020-12-25
  5. IQM的Q-Exa联盟被选中将德国量子计算机首次集成到HPC超级计算机
  6. Java之多线程全解析
  7. 一款纯 JS 实现的轻量化图片编辑器
  8. ABAQUS学习(教你学会看写 input 文件)
  9. 文字四境:音、景、情、意
  10. CCF ONI WC2017 冬假令营 面基(姬)记