实现movielen电影推荐
import java.util.Random import org.apache.log4j.Logger import org.apache.log4j.Level import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.rdd._ import org.apache.spark.mllib.recommendation.{ALS, Rating, MatrixFactorizationModel} object MovieLensALS{def main(args: Array[String]) {Logger.getLogger("org.apache.spark").setLevel(Level.WARN)Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)}// val sparkHome = "/zzti/libs/spark" // val master = "local" val conf = new SparkConf() // .setMaster(master) // .setSparkHome(sparkHome) .setAppName("MovieLensALS").set("spark.executor.memory", "2g")// System.setProperty("hadoop.home.dir", "H:\\大三\\spark\\winutils") val sc = new SparkContext(conf)//H:\大三\spark\MLib算法\data_movies\ml-1m 本机 ///movielens/medium/ratings.dat val ratings = sc.textFile("/movielens/medium/ratings.dat").map { line =>val fields = line.split("::")(fields(3).toInt % 10, Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble))}//H:\大三\spark\MLib算法\data_movies\ml-1m ///movielens/medium/movies.dat val movies = sc.textFile("/movielens/medium/movies.dat").map { line =>val fields = line.split("::")(fields(0).toInt, fields(1))}.collect.toMapval numRatings = ratings.countval numUsers = ratings.map(_._2.user).distinct.countval numMovies = ratings.map(_._2.product).distinct.countprintln("Got " + numRatings + " ratings from " + numUsers + " users on " + numMovies + " movies.")val mostRateMovieIds = ratings.map(_._2.product).countByValue().toSeq.sortBy(-_._2).take(50).map(_._1)//获它们的id val random = new Random(0)val seclectedMovies = mostRateMovieIds.filter(x => random.nextDouble() < 0.2).map(x => (x, movies(x))).toSeqval myRatings = elicitateRatings(seclectedMovies)val myRatingsRDD = sc.parallelize(myRatings)val numPartitions = 20 val training = ratings.filter(x => x._1 < 6).values.union(myRatingsRDD).repartition(numPartitions).persistval validation = ratings.filter(x => x._1 >= 6 && x._1 < 8).values.repartition(numPartitions).persistval test = ratings.filter(x => x._1 >= 8).values.persistval numTraining = training.countval numValidation = validation.countval numTest = test.countprintln("Training:" + numTraining + ",validation: " + numValidation + ", test:" + numTest)val ranks = List(8, 12)val lambdas = List(0.1, 10.0)val numIters = List(10, 20)var bestModel: Option[MatrixFactorizationModel] = Nonevar bestValidationRmse = Double.MaxValue var bestRank = 0 var bestLambda = -1.0 var bestNumIter = -1 for (rank <- ranks; lambda <- lambdas; numIter <- numIters) {val model = ALS.train(training, rank, numIter, lambda)val validationRmse = computeRmse(model, validation, numValidation)println("RMSE (validation)=" + validationRmse + "for the model trained with rand =" + rank + ", lambda=" + lambda + ", and numIter= " + numIter + ".")if (validationRmse < bestValidationRmse) {bestModel = Some(model)bestValidationRmse = validationRmsebestRank = rankbestLambda = lambdabestNumIter = numIter}}val testRmse = computeRmse(bestModel.get, test, numTest)println("The best model was trained with rank=" + bestRank + " and lambda =" + bestLambda + ", and numIter =" + bestNumIter + ", and itsRMSE on the test set is" + testRmse + ".")val myRateMoviesIds = myRatings.map(_.product).toSetval candidates = sc.parallelize(movies.keys.filter(!myRateMoviesIds.contains(_)).toSeq)val recommendations = bestModel.get.predict(candidates.map((0, _))).collect().sortBy((-_.rating)).take(50)var i = 1 println("movies recommended for you:")recommendations.foreach { r =>println("%2d".format(i) + ":" + movies(r.product))i += 1 }/** Compute RMSE (Root Mean Squared Error). */ def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], n: Long) = {val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product)))val predictionsAndRatings = predictions.map(x => ((x.user, x.product), x.rating)).join(data.map(x => ((x.user, x.product), x.rating))).valuesmath.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).reduce(_ + _) / n)}/** Elicitate ratings from command-line. */ def elicitateRatings(movies: Seq[(Int, String)]) = {val prompt = "Please rate the following movie (1-5 (best), or 0 if not seen):" println(prompt)val ratings = movies.flatMap { x =>var rating: Option[Rating] = Nonevar valid = false while (!valid) {print(x._2 + ": ")try {val r = Console.readIntif (r < 0 || r > 5) {println(prompt)} else {valid = true if (r > 0) {rating = Some(Rating(0, x._1, r))}}} catch {case e: Exception => println(prompt)}}rating match {case Some(r) => Iterator(r)case None => Iterator.empty }}if (ratings.isEmpty) {error("No rating provided!")} else {ratings}} }
实现movielen电影推荐相关推荐
- 第四课.KNN电影推荐
目录 基于近邻用户的协同过滤 基于近邻物品的协同过滤 相似度计算-Jaccard相似度 实验:基于KNN的电影推荐系统 简介 movielens 数据集 模型实现 基于近邻用户的协同过滤 基于近邻用户 ...
- python亲和性分析法推荐电影论文_数据挖掘-MovieLens数据集_电影推荐_亲和性分析_Aprioro算法...
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Feb 7 14:38:33 201 ...
- ML之RS:基于用户的CF+LFM实现的推荐系统(基于相关度较高的用户实现电影推荐)
ML之RS:基于用户的CF+LFM实现的推荐系统(基于相关度较高的用户实现电影推荐) 目录 输出结果 实现代码 输出结果 实现代码 #ML之RS:基于CF和LFM实现的推荐系统 import nump ...
- 电影推荐之《哈利·波特与火焰杯》 隐私策略(Privacy policy)
1.隐私政策涵盖您对本应用的使用. 2.电影推荐之<哈利·波特与火焰杯>不会收集.存储.分享您的任何个人信息或者与您的设备相关的信息.我们不会收集任何统计数据和分析数据,也不会跟踪用户的行 ...
- 基于混合云存储系统的电影推荐引擎小结
基于混合云存储系统的电影推荐引擎 推荐算法部分是Mahout下的Taste实现的, 数据集采用GroupLens 的数据集合,将这些数据集转换到mysql数据库中 其中Taste:http://mah ...
- Python基于用户协同过滤算法电影推荐的一个小改进
之前曾经推送过这个问题的一个实现,详见:Python基于用户协同过滤算法的电影推荐代码demo 在当时的代码中没有考虑一种情况,如果选出来的最相似用户和待测用户完全一样,就没法推荐电影了.所以,在实际 ...
- 1.3 基于协同过滤的电影推荐案例
1.3 案例–基于协同过滤的电影推荐 学习目标 应用基于用户的协同过滤实现电影评分预测 应用基于物品的协同过滤实现电影评分预测 1 User-Based CF 预测电影评分 数据集下载 下载地址:Mo ...
- 基于协同过滤的电影推荐
日萌社 人工智能AI:Keras PyTorch MXNet TensorFlow PaddlePaddle 深度学习实战(不定时更新) 1.4 案例--基于协同过滤的电影推荐 学习目标 应用基于用户 ...
- 计算机毕业设计JAVA电影推荐网站mybatis+源码+调试部署+系统+数据库+lw
计算机毕业设计JAVA电影推荐网站mybatis+源码+调试部署+系统+数据库+lw 计算机毕业设计JAVA电影推荐网站mybatis+源码+调试部署+系统+数据库+lw 本源码技术栈: 项目架构:B ...
最新文章
- select poll使用
- Matlab实用程序--图形应用-轮廓图
- 【学术相关】热议:寒门硕士要不要继续读博士?
- logback配置(与log4j对比)
- Cocoapods的安装和使用
- 我的世界服务器怎么修复锁链甲,我的世界手机版锁链甲怎么做 怎么获得
- 隐瞒中国iPhone需求下滑实情:库克和苹果惹上事了
- 对象新增方法 object.is() object.assign()
- 中国移动全球通寻宝第四期攻略
- 杨强教授领衔撰写,国内首本联邦学习实战的权威著作
- LoadRunner 常用C语言函数使用举例说明
- 控制理论基础(1)--控制工程概述
- UCOS操作系统——软件定时器(八)
- WinISO5.3的注册码吧
- 误删除文件怎么才能恢复
- 华三防火墙透明模式典型组网配置实例
- Hololens开发学习笔记-4
- SpringBoot集成rabbitmq错误:org.springframework.amqp.AmqpConnectException: java.net.ConnectException的解决办法
- linux忘记root密码怎么办——重置root密码的四种方法
- CCF-CSP 202012-5 星际旅行 80分