爬取过程分析:
1、分析网页可知上面的数据为json格式
2、找到json数据的请求地址https://map.beijing.gov.cn/api/place_list_for_category.json?categoryId=
3、设计数据库的表
4、将爬取的数据存到mysql数据库中

具体代码如下:

import pprint
import requests
from DBcm import UseDatabase
from bs4 import BeautifulSoupbase_url = 'https://map.beijing.gov.cn/api/place_list_for_category.json?categoryId='headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}dbconfig = {'host': '127.0.0.1','user': 'vsearch','password': 'vsearchpasswd','database': 'vsearchlogdb', }resp = requests.get(url_bbyey,headers=headers).json()
# pprint.pprint(resp)def get_jxzy():# idList = ['3','2','15','9','12','8','28','16','30','31','29','32','33','34','17','35']idList = ['gbyey','zyjyxx','tsjyxx','gdyx','xx','zx']for id in idList:print(id)url = base_url+idprint(url)resp = requests.get(url,headers=headers).json()for i in resp:print(i['categoryId'])with UseDatabase(dbconfig) as cursor:_SQL = """insert into JXZX(placename, addr, postcode, tel,categoryid,reginid)values(%s, %s, %s, %s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['postcode'],i['tel'],i['categoryId'],i['regionId']))def get_ylws():idList = ['ekzlfwjg','zybzdjg','myghyfjzmz','kqymjzmz','cxd','ejyljg','sjyljg','zcjg']for id in idList:print(id)url = base_url+idresp = requests.get(url,headers=headers).json()# pprint.pprint(resp)for i in resp:print(i['categoryId'])with UseDatabase(dbconfig) as cursor:_SQL = """insert into ylwsjg(placename, addr,officehours, postcode, tel,categoryid,regionid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['officeHours'],i['postcode'],i['tel'],i['categoryId'],i['regionId']))get_jxzy()get_ylws()

数据库代码如下:
import mysql.connector 前需要安装mysql数据库驱动引擎

import mysql.connectorclass UseDatabase:def __init__(self, config: dict):self.configuration = configdef __enter__(self) -> 'cursor':self.conn = mysql.connector.connect(**self.configuration)self.cursor = self.conn.cursor()return self.cursordef __exit__(self, exc_type, exc_value, exc_traceback):self.conn.commit()self.cursor.close()self.conn.close()

8.7更新爬取的信息更为详细

import pprint
import requests
from DBcm import UseDatabase
from bs4 import BeautifulSoupbase_url = 'https://map.beijing.gov.cn/api/place_list_for_category.json?categoryId='headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}dbconfig = {'host': '127.0.0.1','user': 'vsearch','password': 'vsearchpasswd','database': 'vsearchlogdb', }# resp = requests.get(url_bbyey,headers=headers).json()
# pprint.pprint(resp)def switch_case(value):switcher = {3: "东城区", 2: "西城区", 15: "朝阳区",9:'海淀区',12:'丰台区',8:'石景山区',28:'门头沟区',16:'房山区',30:'通州区',31:'顺义区',29:'大兴区',32:'昌平区',33:'平谷区',34:'怀柔区',17:'密云区',35:'延庆区',}return switcher.get(value, 'null')# print(switch_case(3))def get_jxzy():# idList = ['3','2','15','9','12','8','28','16','30','31','29','32','33','34','17','35']idList = ['gbyey','zyjyxx','tsjyxx','gdyx']for id in idList:print(id)url = base_url+idprint(url)resp = requests.get(url,headers=headers).json()# print(resp)for i in resp:print(i['categoryId'])placeid = i['placeId']url_info = 'https://map.beijing.gov.cn/place?placeId='+placeid+'&categoryId='+id# print(url_info)cont = requests.get(url_info,headers=headers).text# print(cont)soup = BeautifulSoup(cont,'lxml')try:infos = soup.find('table', {'class': 'nxq_ctab'}).find_all('td')[3].get_text()  #获取简介# print(infos)with UseDatabase(dbconfig) as cursor:_SQL = """insert into jxzx(placename, addr, postcode, tel,introduce,categoryid,reginid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['postcode'],i['tel'],infos,i['categoryId'],switch_case(int(i['regionId']))))except Exception as e:print(e)with UseDatabase(dbconfig) as cursor:_SQL = """insert into JXZX(placename, addr, postcode, tel,introduce,categoryid,reginid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['postcode'],i['tel'],'null',  #有的简介为空i['categoryId'],switch_case(int(i['regionId']))))def get_jxzyxxzx(): #获取中学,小学信息idList = ['xx','zx']for id in idList:print(id)url = base_url+idprint(url)resp = requests.get(url,headers=headers).json()# print(resp)for i in resp:print(i['categoryId'])placeid = i['placeId']url_info = 'https://map.beijing.gov.cn/place?placeId='+placeid+'&categoryId='+idprint(url_info)cont = requests.get(url_info,headers=headers).text# print(cont)soup = BeautifulSoup(cont,'lxml')try:infos = soup.find('table', {'class': 'nxq_ctab'}) #获取简介ever_name = infos.find_all('td')[1].get_text() #曾用名称cbrq = infos.find_all('td')[2].get_text() #本校址创办日期sfjs = infos.find_all('td')[5].get_text() #是否有寄宿xxlb = infos.find_all('td')[6].get_text() #学校种类desc = infos.find_all('td')[7].get_text() #办学规模和主要特色with UseDatabase(dbconfig) as cursor:_SQL = """insert into jxzy2(学校名称,办公地址,曾用名称,校址创办日期,邮编,电话,类型,区域,是否有寄宿,学校类别,办学规模主要特色)values(%s, %s, %s, %s,%s,%s,%s,%s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],ever_name,cbrq,i['postcode'],i['tel'],i['categoryId'],switch_case(int(i['regionId'])),sfjs,xxlb,desc))except Exception as e:print(e)infos = soup.find('table', {'class': 'nxq_ctab'}) #获取简介cbrq = infos.find_all('td')[-6].get_text() #本校址创办日期sfjs = infos.find_all('td')[-3].get_text() #是否有寄宿xxlb = infos.find_all('td')[-2].get_text() #学校种类desc = infos.find_all('td')[-1].get_text() #办学规模和主要特色with UseDatabase(dbconfig) as cursor:_SQL = """insert into jxzy2(学校名称,办公地址,曾用名称,校址创办日期,邮编,电话,类型,区域,是否有寄宿,学校类别,办学规模主要特色)values(%s, %s, %s, %s,%s,%s,%s,%s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],'null',cbrq,i['postcode'],i['tel'],i['categoryId'],switch_case(int(i['regionId'])),sfjs,xxlb,desc))def get_ylws():idList = ['ekzlfwjg','zybzdjg','myghyfjzmz','kqymjzmz','cxd','ejyljg','sjyljg','zcjg']for id in idList:print(id)url = base_url+idresp = requests.get(url,headers=headers).json()# pprint.pprint(resp)for i in resp:print(i['categoryId'])with UseDatabase(dbconfig) as cursor:_SQL = """insert into ylwsjg(placename, addr,officehours, postcode, tel,categoryid,regionid)values(%s, %s, %s, %s,%s,%s,%s)"""cursor.execute(_SQL, (i['placeName'],i['addr'],i['officeHours'],i['postcode'],i['tel'],i['categoryId'],i['regionId']))# get_jxzy()
#
# get_ylws()
get_jxzyxxzx()

python爬取北京政务公开惠民地图信息相关推荐

  1. Python 爬取北京二手房数据,分析北漂族买得起房吗?(附完整源码)

    来源:CSDN 本文约3500字,建议阅读9分钟. 本文根据Python爬取了赶集网北京二手房数据,R对爬取的二手房房价做线性回归分析,适合刚刚接触Python&R的同学们学习参考. 房价高是 ...

  2. Python爬取北京2.3万条租房信息,发现快租不起房子了!

    1.概述 北上广深作为打工人最多的超一线城市,大部分都是租房生活着.自如作为目前第三方租房平台,应该算是该行业的龙头.但是最近蛋壳的暴雷,我们不得不更加警觉.那么自如都有多少open状态的房源呢,这些 ...

  3. Python爬取北京地区短租房信息

    本文利用Requests和BeautifulSoup第三方库,爬取小猪短租网北京地区短租房的信息.代码参考<从零开始学Python网络爬虫>. 完整代码如下: from bs4 impor ...

  4. 爬取深圳市政府政务公开所有文件

    文章目录 一.前言 二.获取文件URL列表 1.获取各类文件的URL 2.获取每类文件的总页数 3.获取每个网页上的文件URL 三.爬取文件内容 1.爬取文件的基本信息和内容 2.下载相应的附件 四. ...

  5. python爬取北京租房信息

    租房助手 发现官网的筛选方式不能满足自己的需求,所以爬取相关网站制作出现在的东西来 效果预览-> 在线预览 下面进行详细分析 一.首先爬取起始地和终点地的路线及沿途地铁站名称 1.爬取8684. ...

  6. python爬取电子书_python爬取计算机电子书(源码移步github)

    摘要:今年第一个项目,python爬取网络上公开的计算机电子书近8000本,在此基础上简要分析计算机专业的发展变迁.部分整理好的书籍下载链接见文末.代码链接见文末. 计算机诞生以来不到100年,学术的 ...

  7. python xpath循环_Python爬虫 爬取北京二手房数据

    点击蓝字"python教程"关注我们哟! 前言 Python现在非常火,语法简单而且功能强大,很多同学都想学Python!所以小的给各位看官们准备了高价值Python学习视频教程及 ...

  8. 爬取北京二手房数据信息(python)

    数据爬取 爬取北京二手房数据信息python代码: # coding : utf-8from requests import get from bs4 import BeautifulSoup as ...

  9. python爬取贝壳找房之北京二手房源信息

    所用库 requests xpath解析库 multiprocessing多进程 pandas库用于保存csv文件 实战背景 本文首发于:python爬取贝壳找房之北京二手房源信息 主要是为了做北京二 ...

  10. 疫情过去女朋友想去重庆玩,python批量爬取小猪短租重庆民宿信息

    疫情过去女朋友想去重庆玩,python批量爬取小猪短租重庆民宿信息 随着时间的流逝,在中国共产党的领导,全国人民的共同努力下,疫情逐渐受到了控制,逐渐好转,复工,开学有望.最近在和女朋友的闲聊当中得知 ...

最新文章

  1. 连接时会提示oracle initialization or shutdown in progress
  2. html给文字加动态效果,20种配合场景的CSS3鼠标滑过文字动画特效
  3. Scala集合:ListBuffer可变集合的head/tail/last/init方法
  4. 每日一linux命令
  5. Repeater控件绑定数据、分页、数据操作,最佳代码
  6. python之gevent模块实现协程
  7. Jedis连接Redis读写基本操作
  8. tomcat、netty以及nodejs的helloworld性能对比
  9. 天翼云搭建socks5和搭建http
  10. matlab非线性规划
  11. 车子刹车油管ABS油管被剪了好几刀,我还有救吗?
  12. Hbase、elasticsearch整合中jar包冲突
  13. 爱尔兰房产泡沫破灭带给我们…
  14. BZOJ 4316: 小C的独立集 仙人掌 + 树形DP
  15. 云监控介绍 - Amazon CloudWatch
  16. 哪里有kitti数据集的百度云资源
  17. flappy+bird+android源代码,Flappy Bird(安卓版)逆向分析(一)
  18. 如何快速更改电脑ip地址【图文教程】?
  19. opencv实战——图像矫正算法深入探讨
  20. 医学图像分割 (MICCAI 2019)

热门文章

  1. 云计算与大数据” 研讨会:迎来新的科学价值
  2. 通用稳定DNS,国际DNS,国内DNS,公共DNS
  3. 微信开发------微信公众号新老账户粉丝迁移问题
  4. Android 项目上线流程总结
  5. win10怎么设置无线网连接到服务器,win10wifi自动连接在哪里设置_win10设置自动连接wifi的方法...
  6. Netd 服务的 netd 套接字创建
  7. mis是商科还是计算机专业,MIS是什么?管理信息系统MIS和计算机科学CS有什么区别?...
  8. MySQL和Navicat怎么连接
  9. iOS用代码判断设备是否越狱
  10. 前路钉板系统在重建胸腰段稳定性中应用 [已发表]