mysql汽车品牌系列_爬取汽车之家汽车品牌型号系列数据

需要安装python3，安装，代码开头的几个库，只供学习和参考。如需嫌弃麻烦，请直接下载https://download.csdn.net/download/weixin_36691991/11032522

import re

import json

import requests

from lxml import etree

import os

import urllib3.exceptions

import pymysql

import time

main_url = 'https://car.autohome.com.cn/javascript/NewSpecCompare.js'

photo_url = 'https://www.autohome.com.cn/grade/carhtml/'

type_type_url = "https://car.autohome.com.cn/duibi/ashx/specComparehandler.ashx?callback=jsonpCallback&type=1&seriesid="

http = urllib3.PoolManager()

html = requests.get(main_url).text

data = re.findall(r'=(.*?);', html, re.S)[0]

dir_string = '/file/'

folder = os.getcwd() + dir_string

if not os.path.exists(folder):

res = os.makedirs(folder, mode=0o777)

with open(folder+"data.json", 'w',encoding='utf-8') as f:

f.write(data)

with open(folder+"data.json", 'r',encoding='utf-8') as f:

datas = json.loads(f.read())

for data in datas:

brands = {}

brands['name'] = data['N']

brands['ini'] = data['L']

# 获取图片链接

url = photo_url + brands['ini'] + "_photo.html"

html = requests.get(url).text

selecter = etree.HTML(html)

imgs = selecter.xpath('//dl/dt/a/img/@src')

titles = selecter.xpath('//dl/dt/div/a/text()')

for title, img in zip(titles, imgs):

if title == data['N']:

brands['img'] = img.strip('//')

types=[]

for tss in data['List']:

for t in tss['List']:

ts={}

ts['name'] = t['N']

ts['seriesid'] = t['I']

print(t['N'])

'''

获取分类下的分类

'''

type_url = type_type_url+str(t['I'])

type_json = requests.get(type_url).text

type_json = re.findall(r'\({(.*?)}\)', type_json, re.S)[0]

json_file = t['N'].replace('/','')

with open(folder +json_file+".json", 'w+', encoding='utf-8') as f:

f.write("{"+type_json+"}")

with open(folder + json_file+".json", 'r', encoding='utf-8') as f:

datas = json.loads(f.read())

sl = []

for ty_j in datas['List']:

for key,value in ty_j.items():

if type(value)==list:

for v in value:

sl.append(v['N'])

ts['sl']=sl

types.append(ts)

brands['type']=types

"""

创建文件夹

"""

dir_string = '/file/brand'

folder1 = os.getcwd() + dir_string

if not os.path.exists(folder1):

res = os.makedirs(folder1, mode=0o777)

"""

下载图片

"""

heades = {

"User-Agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 71.0.3578.98Safari / 537.36"

}

try:

req = http.request('GET', brands['img'], headers=heades)

res = req.data

file_name = folder1 + "/" + brands['name'] + ".png"

with open(file_name, 'wb') as f:

f.write(res)

brands['img'] = file_name

time.sleep(1)

except urllib3.exceptions.LocationParseError as e:

brands['img'] = ""

print(e)

except KeyError as e:

brands['img']=''

"""

数据入库

"""

conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='weiqing', charset='utf8')

cursor = conn.cursor()

print((brands['name'],brands['ini'],brands['img']));

cursor.execute("insert into brand(name,ini,img)values(%s,%s,%s)",(brands['name'],brands['ini'],brands['img']))

b_pid = cursor.lastrowid

for m_t in brands['type']:

print((b_pid,m_t['name']))

cursor.execute("insert into type(b_id,name)values(%s,%s)",(b_pid,m_t['name']))

t_pid = cursor.lastrowid

try:

for m_s in m_t['sl']:

print((t_pid, m_s))

cursor.execute("insert into slis(t_id,name)values(%s,%s)",(t_pid, m_s))

except KeyError as e:

print(e)

cursor.execute("insert into slis(t_id,name)values(%s,%s)", (t_pid, ""))

conn.commit()

cursor.close()

conn.close()

print(brands['name']+"===="+brands['ini']+"======"+brands['img'])

exit()

mysql汽车品牌系列_爬取汽车之家汽车品牌型号系列数据相关推荐

Python爬虫系列之爬取某奢侈品小程序店铺商品数据
Python爬虫系列之爬取某奢侈品小程序店铺商品数据小程序爬虫接单.app爬虫接单.网页爬虫接单.接口定制.网站开发.小程序开发> 点击这里联系我们 < 微信请扫描下方二维码代码仅供学 ...
mysql存储爬虫图片_爬取微博图片数据存到Mysql中遇到的各种坑\爬取微博图片\Mysql存储图片\微博爬虫...
本人长期出售超大量微博数据.旅游网站评论数据,并提供各种指定数据爬取服务,Message to YuboonaZhang@Yahoo.com.同时欢迎加入社交媒体数据交流群:99918768 前言由 ...
爬取了 31502 条北京自如租房数据，看看是否居者有其屋？
作者 | 小狮子是LEO 责编 | 郭芮自如友家作为北京租房的主要途径之一,租房数据都展示在官方网站之上,价格等房屋信息与网站数据一致,数据可信度较高.格式规整.因而选取自如友家官方网站作为租房数据 ...
爬取全国各地区汽车销量情况并用中国地图可视化展示
爬取全国各地区汽车销量情况并用中国地图可视化展示项目介绍网页详情代码爬取数据代码将爬取的数据保存到文档中中国地图可视化运行效果项目介绍爬取2017年全国各省份的汽车销量情况(由于数据 ...
爬取腾讯新闻中省份疫情数据到Mysql数据库
爬取腾讯新闻中省份疫情数据到Mysql数据库本人是一个中职学生,第一次发表自己所学到技术-- 本篇文章所用到的语言及工具等: python 3.8 pycharm Mysql Navicat Pre ...
爬虫爬取京东商品详细数据（品牌、售价、各类评论量（精确数量）、热评词及数量等）json解析部分数据
文章目录前言一.数据保存格式设置及数据库准备(CentOS云mysql数据库) 1.分析数据需求(单一商品为例) 2.数据库保存格式 3.用到的数据库操作及指令二.网页分析 1.分析网页源码,确 ...
Python爬虫系列之爬取某社区团微信小程序店铺商品数据
Python爬虫系列之爬取某社区团微信小程序店铺商品数据如有问题QQ请> 点击这里联系我们 < 微信请扫描下方二维码代码仅供学习交流,请勿用于非法用途数据库仅用于去重使用,数据主要存 ...
Python爬虫系列之爬取微信公众号新闻数据
Python爬虫系列之爬取微信公众号新闻数据小程序爬虫接单.app爬虫接单.网页爬虫接单.接口定制.网站开发.小程序开发 > 点击这里联系我们 < 微信请扫描下方二维码代码仅供学习交流 ...
爬虫系列-jsoup爬取网页你需要了解的一切
爬虫系列-jsoup爬取网页概述解析和遍历文档文档的对象模型加载HTML数据从String解析文档从String中加载解析片段从URL加载文档描述从文件加载文档描述提取数据使用 ...
爬取3w条『各种品牌』笔记本电脑数据，统计分析并进行可视化展示！真好看~...
本文代码讲解已录成视频,欢迎扫码学习! 本文手撕代码过程 01 前言在上一篇文章[教你用python爬取『京东』商品数据,原来这么简单!]教大家如何学会爬取『京东』商城商品数据. 今天教大家如何爬取 ...

mysql汽车品牌系列_爬取汽车之家汽车品牌型号系列数据

mysql汽车品牌系列_爬取汽车之家汽车品牌型号系列数据相关推荐

最新文章

热门文章