豆瓣电影TOP250爬取，并获得相关类型的推荐

import requests
import random
from bs4 import BeautifulSoup
import lxml
'''
https://movie.douban.com/top250
https://movie.douban.com/top250?start=25
https://movie.douban.com/top250?start=50&filter=
'''
header1 = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ''(KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36','Host': "movie.douban.com"
} #谷歌
header2 = {'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"" (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362",'Host': "movie.douban.com"
} # ie
header3 = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0",'Host': "movie.douban.com"
}
header_list = [header1, header2, header3]
datas = {}
comedy = {} #喜剧
love = {} #爱情
sci_fi = {} #科幻
thriller = {} #惊悚
crime = {} #犯罪
animation = {} #动画
for i in range(1, 11):if i == 1:url = "https://movie.douban.com/top250"else:url = 'https://movie.douban.com/top250?start=%d&filter='%((i-1)*25)header = header_list[random.randint(0, 2)]req = requests.get(url, headers = header)html = req.textbf = BeautifulSoup(html, 'lxml')soup = bf.find_all('div', class_ = 'info')for item in soup:data = {}movie_name = item.find('a').find('span').stringscore_str = item.find('div', class_= 'star').find('span', class_ = 'rating_num').stringscore = float(score_str)director_str = item.find('div', class_ = 'bd').find('p')director_str =  str(director_str)director_str = director_str.replace(' ', '')director_str = director_str.replace('<pclass="">', '')director_str = director_str.replace('TimRobbins/...<br/>', '')director_str = director_str.replace('</p>', '')director_str = director_str.replace('...<br/>', '')director_str = director_str.split()director = director_str[0]starring = director_str[1]time = director_str[2]type = director_str[-1]data['name'] = movie_namedata['director'] = director[3 : ]data['type'] = typedata['time'] = timedata['score'] = scoredatas[movie_name] = dataif '喜剧' in type and score >= 9.0:comedy[movie_name] = dataif '爱情' in type and score >= 9.0:love[movie_name] = dataif '科幻' in type and score >= 9.0:sci_fi[movie_name] = dataif '惊悚' in type and score >= 9.0:thriller[movie_name] = dataif '犯罪' in type and score >= 9.0:crime[movie_name] = dataif '动画' in type and score >= 9.0:animation[movie_name] = data
#超级推荐：
print("豆瓣评分最高" + '>'*10)
datas = sorted(datas.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in datas:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()#喜剧电影
print("喜剧电影推荐" + '>'*10)
comedy = sorted(comedy.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in comedy:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()
#爱情电影
print("爱情电影推荐" + '>'*10)
love = sorted(love.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in love:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()
#科幻电影
print("科幻电影推荐" + '>'*10)
sci_fi = sorted(sci_fi.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in sci_fi:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()#惊悚电影
print("惊悚电影推荐" + '>'*10)
thriller = sorted(thriller.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in thriller:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()#犯罪电影
print("犯罪电影推荐" + '>'*10)
crime = sorted(crime.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in crime:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()#动画电影
print("动画电影推荐" + '>'*10)
animation = sorted(animation.items(), key = lambda x:x[1]['score'], reverse=True)
i = 0
tplt = "{0:{2}^10}\t\t\t{1:{2}<10}"
print(tplt.format("电影名称", "评分", chr(12288)))
for value in animation:print(tplt.format(value[1]["name"], value[1]["score"], chr(12288)))i += 1if i == 10:break
print()

豆瓣电影TOP250爬取，并获得相关类型的推荐相关推荐

Colly实现豆瓣电影Top250爬取
使用 Colly 实现豆瓣电影Top250爬取 package mainimport ("encoding/csv""github.com/PuerkitoBio/go ...
豆瓣电影TOP250爬取
import requests from pyquery import PyQuery as pq url = 'https://movie.douban.com/top250?start=' def ...
爬虫入门——电影top250爬取
爬虫入门(自用) 第一篇 Python 爬虫入门之电影top250爬取文章目录爬虫入门(自用) 前言一.前置知识 requests库正则表达式(re库) 二.使用步骤 1.引入库 2.小试牛 ...
JAVA爬虫（一）：豆瓣电影排行榜爬取
JAVA爬虫(一):豆瓣电影排行榜爬取前言流程图步骤一.爬取豆瓣电影榜单网页源代码二.网页源码解析三.爬取单个电影网页源码四.源代码解析及关键信息获取前言最近和大创队友一起给大创做的 ...
豆瓣电影影评爬取---最受欢迎的影评[xpath语法]
豆瓣电影影评爬取---最受欢迎的影评[xpath语法] 1.基础环境配置: requests-->版本:2.12.4 lxml-->版本:3.7.2 2.爬取网址:https://movi ...
easyui datalist 不显示数据_爬虫练习——豆瓣电影信息爬取及数据可视化
最近自学了简单的爬虫项目,简单记录下自己的小白学习路径. 本次爬取的是豆瓣电影TOP250数据,主要用到beautifulsoup.re.urllib库.SQLite包,数据可视化方面主要用到flas ...
豆瓣电影TOP250抓取
全部代码以及分析见GitHub:https://github.com/dta0502/douban-top250 本文是Python爬取豆瓣的top250电影的分析和实现,具体是将电影的标题.电影描述 ...
python爬取豆瓣电影top250_Python 爬取豆瓣电影Top250排行榜，爬虫初试
from bs4 import BeautifulSoup import openpyxl import re import urllib.request import urllib.error # ...
python3豆瓣电影排行榜爬取
项目目标: 爬取豆瓣电影排行榜top250 项目分析: 打开豆瓣电影排行榜网址(以下分别是前三页的网址),由此我们判断只需更改"start=**"的数值既可遍历整个排行榜. htt ...

豆瓣电影TOP250爬取，并获得相关类型的推荐

豆瓣电影TOP250爬取，并获得相关类型的推荐相关推荐

最新文章

热门文章