python爬取北京政务公开惠民地图信息

爬取过程分析：
1、分析网页可知上面的数据为json格式
2、找到json数据的请求地址https://map.beijing.gov.cn/api/place_list_for_category.json?categoryId=
3、设计数据库的表
4、将爬取的数据存到mysql数据库中

具体代码如下：

import pprint
import requests
from DBcm import UseDatabase
from bs4 import BeautifulSoupbase_url = 'https://map.beijing.gov.cn/api/place_list_for_category.json?categoryId='headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}dbconfig = {'host': '127.0.0.1','user': 'vsearch','password': 'vsearchpasswd','database': 'vsearchlogdb', }resp = requests.get(url_bbyey,headers=headers).json()
# pprint.pprint(resp)def get_jxzy():# idList = ['3','2','15','9','12','8','28','16','30','31','29','32','33','34','17','35']idList = ['gbyey','zyjyxx','tsjyxx','gdyx','xx','zx']for id in idList:print(id)url = base_url+idprint(url)resp = requests.get(url,headers=headers).json()for i in resp:print(i['categoryId'])with UseDatabase(dbconfig) as cursor:_SQL = """insert into JXZX(placename, addr, postcode, tel,categoryid,reginid)values(%s, %s, %s, %s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['postcode'],i['tel'],i['categoryId'],i['regionId']))def get_ylws():idList = ['ekzlfwjg','zybzdjg','myghyfjzmz','kqymjzmz','cxd','ejyljg','sjyljg','zcjg']for id in idList:print(id)url = base_url+idresp = requests.get(url,headers=headers).json()# pprint.pprint(resp)for i in resp:print(i['categoryId'])with UseDatabase(dbconfig) as cursor:_SQL = """insert into ylwsjg(placename, addr,officehours, postcode, tel,categoryid,regionid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['officeHours'],i['postcode'],i['tel'],i['categoryId'],i['regionId']))get_jxzy()get_ylws()

数据库代码如下：
import mysql.connector 前需要安装mysql数据库驱动引擎

import mysql.connectorclass UseDatabase:def __init__(self, config: dict):self.configuration = configdef __enter__(self) -> 'cursor':self.conn = mysql.connector.connect(**self.configuration)self.cursor = self.conn.cursor()return self.cursordef __exit__(self, exc_type, exc_value, exc_traceback):self.conn.commit()self.cursor.close()self.conn.close()

8.7更新爬取的信息更为详细

import pprint
import requests
from DBcm import UseDatabase
from bs4 import BeautifulSoupbase_url = 'https://map.beijing.gov.cn/api/place_list_for_category.json?categoryId='headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}dbconfig = {'host': '127.0.0.1','user': 'vsearch','password': 'vsearchpasswd','database': 'vsearchlogdb', }# resp = requests.get(url_bbyey,headers=headers).json()
# pprint.pprint(resp)def switch_case(value):switcher = {3: "东城区", 2: "西城区", 15: "朝阳区",9:'海淀区',12:'丰台区',8:'石景山区',28:'门头沟区',16:'房山区',30:'通州区',31:'顺义区',29:'大兴区',32:'昌平区',33:'平谷区',34:'怀柔区',17:'密云区',35:'延庆区',}return switcher.get(value, 'null')# print(switch_case(3))def get_jxzy():# idList = ['3','2','15','9','12','8','28','16','30','31','29','32','33','34','17','35']idList = ['gbyey','zyjyxx','tsjyxx','gdyx']for id in idList:print(id)url = base_url+idprint(url)resp = requests.get(url,headers=headers).json()# print(resp)for i in resp:print(i['categoryId'])placeid = i['placeId']url_info = 'https://map.beijing.gov.cn/place?placeId='+placeid+'&categoryId='+id# print(url_info)cont = requests.get(url_info,headers=headers).text# print(cont)soup = BeautifulSoup(cont,'lxml')try:infos = soup.find('table', {'class': 'nxq_ctab'}).find_all('td')[3].get_text()  #获取简介# print(infos)with UseDatabase(dbconfig) as cursor:_SQL = """insert into jxzx(placename, addr, postcode, tel,introduce,categoryid,reginid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['postcode'],i['tel'],infos,i['categoryId'],switch_case(int(i['regionId']))))except Exception as e:print(e)with UseDatabase(dbconfig) as cursor:_SQL = """insert into JXZX(placename, addr, postcode, tel,introduce,categoryid,reginid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['postcode'],i['tel'],'null',  #有的简介为空i['categoryId'],switch_case(int(i['regionId']))))def get_jxzyxxzx(): #获取中学，小学信息idList = ['xx','zx']for id in idList:print(id)url = base_url+idprint(url)resp = requests.get(url,headers=headers).json()# print(resp)for i in resp:print(i['categoryId'])placeid = i['placeId']url_info = 'https://map.beijing.gov.cn/place?placeId='+placeid+'&categoryId='+idprint(url_info)cont = requests.get(url_info,headers=headers).text# print(cont)soup = BeautifulSoup(cont,'lxml')try:infos = soup.find('table', {'class': 'nxq_ctab'}) #获取简介ever_name = infos.find_all('td')[1].get_text() #曾用名称cbrq = infos.find_all('td')[2].get_text() #本校址创办日期sfjs = infos.find_all('td')[5].get_text() #是否有寄宿xxlb = infos.find_all('td')[6].get_text() #学校种类desc = infos.find_all('td')[7].get_text() #办学规模和主要特色with UseDatabase(dbconfig) as cursor:_SQL = """insert into jxzy2(学校名称,办公地址,曾用名称,校址创办日期,邮编,电话,类型,区域,是否有寄宿,学校类别,办学规模主要特色)values(%s, %s, %s, %s,%s,%s,%s,%s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],ever_name,cbrq,i['postcode'],i['tel'],i['categoryId'],switch_case(int(i['regionId'])),sfjs,xxlb,desc))except Exception as e:print(e)infos = soup.find('table', {'class': 'nxq_ctab'}) #获取简介cbrq = infos.find_all('td')[-6].get_text() #本校址创办日期sfjs = infos.find_all('td')[-3].get_text() #是否有寄宿xxlb = infos.find_all('td')[-2].get_text() #学校种类desc = infos.find_all('td')[-1].get_text() #办学规模和主要特色with UseDatabase(dbconfig) as cursor:_SQL = """insert into jxzy2(学校名称,办公地址,曾用名称,校址创办日期,邮编,电话,类型,区域,是否有寄宿,学校类别,办学规模主要特色)values(%s, %s, %s, %s,%s,%s,%s,%s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],'null',cbrq,i['postcode'],i['tel'],i['categoryId'],switch_case(int(i['regionId'])),sfjs,xxlb,desc))def get_ylws():idList = ['ekzlfwjg','zybzdjg','myghyfjzmz','kqymjzmz','cxd','ejyljg','sjyljg','zcjg']for id in idList:print(id)url = base_url+idresp = requests.get(url,headers=headers).json()# pprint.pprint(resp)for i in resp:print(i['categoryId'])with UseDatabase(dbconfig) as cursor:_SQL = """insert into ylwsjg(placename, addr,officehours, postcode, tel,categoryid,regionid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['officeHours'],i['postcode'],i['tel'],i['categoryId'],i['regionId']))# get_jxzy()
#
# get_ylws()
get_jxzyxxzx()

python爬取北京政务公开惠民地图信息相关推荐

Python 爬取北京二手房数据，分析北漂族买得起房吗？（附完整源码）
来源:CSDN 本文约3500字,建议阅读9分钟. 本文根据Python爬取了赶集网北京二手房数据,R对爬取的二手房房价做线性回归分析,适合刚刚接触Python&R的同学们学习参考. 房价高是 ...
Python爬取北京2.3万条租房信息，发现快租不起房子了！
1.概述北上广深作为打工人最多的超一线城市,大部分都是租房生活着.自如作为目前第三方租房平台,应该算是该行业的龙头.但是最近蛋壳的暴雷,我们不得不更加警觉.那么自如都有多少open状态的房源呢,这些 ...
Python爬取北京地区短租房信息
本文利用Requests和BeautifulSoup第三方库,爬取小猪短租网北京地区短租房的信息.代码参考<从零开始学Python网络爬虫>. 完整代码如下: from bs4 impor ...
爬取深圳市政府政务公开所有文件
文章目录一.前言二.获取文件URL列表 1.获取各类文件的URL 2.获取每类文件的总页数 3.获取每个网页上的文件URL 三.爬取文件内容 1.爬取文件的基本信息和内容 2.下载相应的附件四. ...
python爬取北京租房信息
租房助手发现官网的筛选方式不能满足自己的需求,所以爬取相关网站制作出现在的东西来效果预览-> 在线预览下面进行详细分析一.首先爬取起始地和终点地的路线及沿途地铁站名称 1.爬取8684. ...
python爬取电子书_python爬取计算机电子书（源码移步github）
摘要:今年第一个项目,python爬取网络上公开的计算机电子书近8000本,在此基础上简要分析计算机专业的发展变迁.部分整理好的书籍下载链接见文末.代码链接见文末. 计算机诞生以来不到100年,学术的 ...
python xpath循环_Python爬虫爬取北京二手房数据
点击蓝字"python教程"关注我们哟! 前言 Python现在非常火,语法简单而且功能强大,很多同学都想学Python!所以小的给各位看官们准备了高价值Python学习视频教程及 ...
爬取北京二手房数据信息（python）
数据爬取爬取北京二手房数据信息python代码: # coding : utf-8from requests import get from bs4 import BeautifulSoup as ...
python爬取贝壳找房之北京二手房源信息
所用库 requests xpath解析库 multiprocessing多进程 pandas库用于保存csv文件实战背景本文首发于:python爬取贝壳找房之北京二手房源信息主要是为了做北京二 ...
疫情过去女朋友想去重庆玩，python批量爬取小猪短租重庆民宿信息
疫情过去女朋友想去重庆玩,python批量爬取小猪短租重庆民宿信息随着时间的流逝,在中国共产党的领导,全国人民的共同努力下,疫情逐渐受到了控制,逐渐好转,复工,开学有望.最近在和女朋友的闲聊当中得知 ...

python爬取北京政务公开惠民地图信息

python爬取北京政务公开惠民地图信息相关推荐

最新文章

热门文章