python爬取交通违法记录_python爬虫爬取汽车页面信息，并附带分析（静态爬虫）...

1 importrequests2 from bs4 importBeautifulSoup3 importre4 importrandom5 importtime6

8 #爬虫主函数

9 defmm(url):10 #设置目标url，使用requests创建请求

11 header ={12 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}13 req0 = requests.get(url=url, headers=header)14 req0.encoding = "gb18030" #解决乱码问题

15 html0 =req0.text16

17 #使用BeautifulSoup创建html代码的BeautifulSoup实例，存为soup0

18 soup0 = BeautifulSoup(html0, "html.parser")19

20 #获取最后一页数字，对应-122（对照前一小节获取尾页的内容看你就明白了）

21 total_page = int(soup0.find("div", class_="pagers").findAll("a")[-2].get_text())22 myfile = open("aika_qc_gn_1_1_1.txt", "a", encoding='gb18030', errors='ignore') #解决乱码问题

23 print("user", "来源", "认为有用人数", "类型", "comment")24 NAME = "user" + "来源" + "认为有用人数" + "类型" + "comment"

25 myfile.write(NAME + "\n")26 for i in list(range(1, total_page + 1)):27 #设置随机暂停时间

28 stop = random.uniform(1, 3)29

30 url = "http://newcar.xcar.com.cn/257/review/0/0_" + str(i) + ".htm"

31 req = requests.get(url=url, headers=header)32 req.encoding = "gb18030" #解决乱码问题

33 html =req.text34

35 soup = BeautifulSoup(html, "html.parser")36 contents = soup.find('div', class_="review_comments").findAll("dl")37 l =len(contents)38 for content incontents:39 tiaoshu =contents.index(content)40 try:41 ss = "正在爬取第%d页的第%d的评论，网址为%s" % (i, tiaoshu + 1, url)42 print(ss) #正在爬取的条数

43 try:44

45 #点评角度

46 comment_jiaodu = content.find("dt").find("em").find("a").get_text().strip().replace("\n",47 "").replace(48 "\t", "").replace("\r", "")49 except:50 comment_jiaodu = "sunny"

51 try:52

53 #点评类型

54 comment_type0 = content.find("dt").get_text().strip().replace("\n", "").replace("\t", "").replace(55 "\r",56 "")57 comment_type1 = comment_type0.split("【")[1]58 comment_type = comment_type1.split("】")[0]59 except:60 comment_type = "sunny"

62 #认为该条评价有用的人数

63 try:64 useful =int(65 content.find("dd").find("div", class_="useful").find("i").find(66 "span").get_text().strip().replace(67 "\n", "").replace("\t", "").replace("\r", ""))68 except:69 useful = "sunny"

71 #评论来源

72 try:73 comment_region = content.find("dd").find("p").find("a").get_text().strip().replace("\n",74 "").replace(75 "\t", "").replace("\r", "")76 except:77 comment_region = "sunny"

79 #评论者名称

80 try:81 user =\82 content.find("dd").find("p").get_text().strip().replace("\n", "").replace("\t", "").replace(83 "\r",84 "").split(85 "：")[-1]86 except:87 user = "sunny"

89 #评论内容

90 try:91 comment_url = content.find('dt').findAll('a')[-1]['href']92 urlc =comment_url93 headerc ={94 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}95 reqc = requests.get(urlc, headers=headerc)96 htmlc =reqc.text97 soupc = BeautifulSoup(htmlc, "html.parser")98

99 comment0 =\100 soupc.find('div', id='mainNew').find('div', class_='maintable').findAll('form')[1].find('table',101 class_='t_msg').findAll(102 'tr')[1]103 try:104 comment = comment0.find('font').get_text().strip().replace("\n", "").replace("\t", "")105 except:106 comment = "sunny"

107 try:108 comment_time = soupc.find('div', id='mainNew').find('div', class_='maintable').findAll('form')[109 1].find('table', class_='t_msg').find('div',110 style='padding-top: 4px;float:left').get_text().strip().replace(111 "\n", "").replace(112 "\t", "")[4:]113 except:114 comment_time = "sunny"

115 except:116 try:117 comment =\118 content.find("dd").get_text().split("\n")[-1].split('\r')[-1].strip().replace("\n",119 "").replace(120 "\t", "").replace("\r", "").split("：")[-1]121 except:122 comment = "sunny"

123

124 time.sleep(stop)125 print(user, comment_region, useful, comment_type, comment)126

127 tt = user + " " + comment_region + " " + str(useful) + " " + comment_type + " " +comment128 myfile.write(tt + "\n")129 exceptException as e:130 print(e)131 s = "爬取第%d页的第%d的评论失败，网址为%s" % (i, tiaoshu + 1, url)132 print(s)133 pass

134 myfile.close()135

136

137 #统计评论分布

138 deffenxi():139 myfile = open("aika_qc_gn_1_1_1.txt", "r")140 good =0141 middle =0142 bad =0143 nn =0144 for line inmyfile:145 commit = line.split(" ")[3]146 if commit == "好评":147 good = good + 1

148 elif commit == "中评":149 middle = middle + 1

150 elif commit == "差评":151 bad = bad + 1

152 else:153 nn = nn + 1

154 count = good + middle + bad +nn155 g = round(good / (count - nn) * 100, 2)156 m = round(middle / (count - nn) * 100, 2)157 b = round(bad / (count - nn) * 100, 2)158 n = round(nn / (count - nn) * 100, 2)159 print("好评占比：", g)160 print("中评占比：", m)161 print("差评占比：", b)162 print ("未评论：", n)163

164

165 url = "http://newcar.xcar.com.cn/257/review/0.htm"

166 mm(url)167 fenxi()

python爬取交通违法记录_python爬虫爬取汽车页面信息，并附带分析（静态爬虫）...相关推荐

python爬取交通违法记录查询_如何查询已交罚款的交通违法记录
展开全部 1.到当地的交管所进行查询 2.使用微信进行查询,微信钱包里的微信城市服e69da5e887aa62616964757a686964616f31333365646234务里有机动车查询,可以 ...
【数据安全案例】交警计算机系统再遭***，交通违法记录随意删除
根据新浪报道,据新华社10月14日电利用当网管的机会,破解密码,非法进入交警计算机系统为他人删除车辆交通违法记录牟利.辽宁省鞍山市铁西区检察院透露,当地某公司员工程尚军因涉嫌破坏计算机信息系统罪,已 ...
黑客入侵交警计算机系统删交通违法记录牟利
据新华社10月14日电利用当网管的机会,破解密码,非法进入交警计算机系统为他人删除车辆交通违法记录牟利.辽宁省鞍山市铁西区检察院透露,当地某公司员工程尚军因涉嫌破坏计算机信息系统罪,已于10月初被依 ...
使用python requests+re库+curl.trillworks.com神器实现淘宝页面信息爬取
慕课[Python网络爬虫与信息提取]课程随手练习~! 和嵩天老师课程中的示范不同的是,淘宝页面现在不能直接爬取,要修改下访问请求的headers表头信息. 目标:使用python的requests+ ...
python中怎么取整数案例题_python中如何取整数
首先,不得不提醒大家一个容易被忽视或者搞混的问题--一般的,0.5这种末尾是5的小数,四舍五入取整应进位.这个进位的意思是:-0.5 → -1:0.5 → 1.即正负情况不同,都向着远离0,使得绝对值 ...
python多线程爬虫爬取多个网页_Python 多线程抓取网页
最近,一直在做网络爬虫相关的东西. 看了一下开源C++写的larbin爬虫,仔细阅读了里面的设计思想和一些关键技术的实现. 1.larbin的URL去重用的很高效的bloom filter算法: 2. ...
python爬取网站数据步骤_python怎么爬取数据
在学习python的过程中,学会获取网站的内容是我们必须要掌握的知识和技能,今天就分享一下爬虫的基本流程,只有了解了过程,我们再慢慢一步步的去掌握它所包含的知识 Python网络爬虫大概需要以下几个步 ...
python数据分析图表展示_NBA数据分析_python数据爬取_可视化图形_python数据可视化案例-帆软...
之前手痒做了一次NBA可视化分析,发个微头条,好多人追着我问教程,这两天终于闲下来了,花时间整理这篇NBA可视化分析教程,手把手教大家做一次炫酷的数据可视化分析! 先部分展示本次教程的作品: 数据获取 ...
python爬去百度文库资料_python urllib爬取百度云连接的实例代码
翻看自己以前写的程序,发现写过一个爬取盘多多百度云资源的东西,完全是当时想看变形金刚才自己写的,而且当时第一次接触python大概写了有2天才搞出来这个程序,学习python语言,可以看得出来那时候的 ...

python爬取交通违法记录_python爬虫爬取汽车页面信息，并附带分析（静态爬虫）...

python爬取交通违法记录_python爬虫爬取汽车页面信息，并附带分析（静态爬虫）...相关推荐

最新文章

热门文章