墨迹天气接口html,moji_weather

#coding = utf-8

'''

程序名：墨迹天气爬虫

编写人：bxgj

运行环境：win7x64 Python3.6.4

修改日志：2018.09.09 新建

2018.09.10 完成爬取主要天气信息功能

2018.09.12 完成爬取风力等信息的功能

2018.09.14 完成爬取空气质量信息的功能

2018.09.15 重构代码

2018.09.16 完成数据保存功能

2018.09.21 功能基本完成，修复部分细节问题

版本：V1.2

备注：由于最近没有极端天气，所以爬取高温预警、雷电预警之类的功能暂未完成

空气质量评定标准，AQI分级

优 0-50

良 51-100

轻度污染 101-150

中度污染 151-200

重度污染 201-300

严重污染 301-500

500以上爆表了

'''

# ---------------- 导入模块 ----------------

from bs4 import BeautifulSoup

import requests

import time

import datetime

import os

import re

import csv

# ---------------- 全局变量、初始化等 ----------------

# 要爬取的链接

wea_url = "https://tianqi.moji.com/weather/china/shaanxi/yanta-district"

aqi_url = "https://tianqi.moji.com/aqi/china/shaanxi/yanta-district"

'''

墨迹天气官方网站

https://tianqi.moji.com/

链接格式

天气预报 https://tianqi.moji.com/weather/china/省会/市、区、县等

空气质量 https://tianqi.moji.com/aqi/china/省会/市、区、县等

地级市，直接用拼音，如

https://tianqi.moji.com/weather/china/shaanxi/xian

市区，区名的拼音加-district，如

https://tianqi.moji.com/weather/china/shaanxi/yanta-district

县，县名的拼音加-county

镇，镇名的拼音加-town

其他的地名，如XX山、XX风景区、XX湖基本都是拼音加英文

特殊地名特殊处理，如

秦始皇陵

mausoleum-of-the-first-qin-emperor

陕西历史博物馆

shanxi-history-museum

'''

# 是否使用调试模式

# DEBUG_MODE = True

DEBUG_MODE = False

# ---------------- 类定义 ----------------

# 实时天气信息及预报天气信息

class weather():

def __init__(self, uptime=None, city="", weather="", temp=None,

humi=None, wind_dir="", wind_min=None, wind_max=None, tips="", description="",

aqi=None, aqi_level="", aqi_PM10=None, aqi_PM2P5=None, aqi_NO2=None, aqi_SO2=None, aqi_O3=None, aqi_CO=None,

other_info=""):

# 爬取时间

crawltime = datetime.datetime.now()

self.crawltime = crawltime.replace(microsecond=0)

# 天气基本信息

self.uptime = uptime # 这个时间指的是网站上显示的更新时间，不是爬取的时间

self.city = city

self.weather = weather

self.temp = temp

# 天气详细信息

self.humi = humi

self.wind_dir = wind_dir

self.wind_min = wind_min

self.wind_max = wind_max

self.tips = tips

self.description = description# 总体天气描述

# 空气质量

self.aqi = aqi

self.aqi_level = aqi_level

self.aqi_PM10 = aqi_PM10

self.aqi_PM2P5 = aqi_PM2P5

self.aqi_NO2 = aqi_NO2

self.aqi_SO2 = aqi_SO2

self.aqi_O3 = aqi_O3

self.aqi_CO = aqi_CO

# 天气其他信息，比如高温预警、雷电预警之类的

self.other_info = other_info

# 便于保存成文件什么的

def get_weather_info_list(self):

# return [self.datetime, self.city, self.weather, self.temp,

return [str(self.crawltime), str(self.uptime), self.city, self.weather, self.temp,

self.humi, self.wind_dir, self.wind_min, self.wind_max, self.tips, self.description,

self.aqi, self.aqi_level, self.aqi_PM10, self.aqi_PM2P5, self.aqi_NO2, self.aqi_SO2,

self.aqi_O3, self.aqi_CO, self.other_info]

def get_weather_info_str(self):

return [str(self.crawltime), str(self.uptime), str(self.city), str(self.weather), str(self.temp),

str(self.humi), self.wind_dir, str(self.wind_min), str(self.wind_max), str(self.tips), str(self.description),

str(self.aqi), self.aqi_level, str(self.aqi_PM10), str(self.aqi_PM2P5),

str(self.aqi_NO2), str(self.aqi_SO2), str(self.aqi_O3), str(self.aqi_CO),

str(self.other_info)]

#全部转换成纯字符串

def __str__(self):

return "|".join(self.get_weather_info_str()).replace("None", "")

# 天气预报信息，比较简略

class weather_forecast():

def __init__(self, uptime=None, city="", weather="", temp_min=None, temp_max=None,

wind_dir="", wind_min=None, wind_max=None, aqi=None, aqi_level=None, other_info=""):

# 爬取时间

crawltime = datetime.datetime.now()

self.crawltime = crawltime.replace(microsecond=0)

# 天气基本信息

self.uptime = uptime # 这个时间指的是网站上显示的更新时间，不是爬取的时间

self.city = city

self.weather = weather

self.temp_min = temp_min

self.temp_max = temp_max

# 天气详细信息

self.wind_dir = wind_dir

self.wind_min = wind_min

self.wind_max = wind_max

# 空气质量

self.aqi = aqi

self.aqi_level = aqi_level

# 天气其他信息，比如高温预警、雷电预警之类的

self.other_info = other_info

# 便于保存成文件什么的

def get_weather_info_list(self):

return [str(self.crawltime), str(self.uptime), self.city, self.weather, self.temp_min, self.temp_max,

self.wind_dir, self.wind_min, self.wind_max, self.aqi, self.aqi_level, self.other_info]

def get_weather_info_str(self):

return [str(self.crawltime), str(self.uptime), str(self.city),

str(self.weather), str(self.temp_min), str(self.temp_max),

str(self.wind_dir), str(self.wind_min), str(self.wind_max),

str(self.aqi), str(self.aqi_level), str(self.other_info)]

#全部转换成纯字符串

def __str__(self):

return "|".join(self.get_weather_info_str()).replace("None", "")

# ---------------- 函数定义 ----------------

# 天气爬虫

def weather_spider(weather_info, weather_forecast_info, url):

if (DEBUG_MODE):

# 本地测试文件

with open("yanta.html", 'r', encoding='utf-8') as html_file:

html_text = html_file.read()

soup = BeautifulSoup(html_text, "lxml")

else:

try:

web_data = requests.get(url)

except requests.exceptions.ConnectionError:

print("网络连接异常")

return

except Exception:

print("其他异常")

return

if (web_data.status_code != requests.codes.ok):

print("服务器响应异常", web_data.status_code)

return

soup = BeautifulSoup(web_data.text, "lxml")

# 城市信息

city_tag = soup.select(".search_default > em")

city_name = city_tag[0].get_text().replace(' ', '')

weather_info.city = city_name # 保存城市信息

# 发布时间

uptime_tag = soup.find("strong", class_="info_uptime")

uptime_str = uptime_tag.get_text()

uptime_str = re.search(r"(\d+):(\d+)", uptime_str).groups() # 用正则提取时分

tmp_datetime = datetime.datetime.now()

uptime = tmp_datetime.replace(hour=int(uptime_str[0]), minute=int(uptime_str[1]),

second=0, microsecond=0)

weather_info.uptime = uptime

# 获取描述信息

description_tag = soup.select('meta[name="description"]')

# 用tag的get方法获取指定属性的值

weather_description = description_tag[0].get("content")

weather_description.replace(" ", "").replace(",", "，") # 清除多余空格，替换英文标点符号

weather_info.description = weather_description

# 抓取实时天气信息

tmp_tag = soup.find("div", class_="wea_weather clearfix")

weather_info.temp = float(tmp_tag.em.string) # em标签中是温度

weather_info.weather = tmp_tag.b.string # b标签中是天气

# 这个标签下是湿度和风力信息

tmp_tag = soup.find("div", class_="wea_about clearfix")

humi_str = tmp_tag.span.string # span标签下是湿度

humi_value = re.search(r"(\d+)", humi_str).group()

weather_info.humi = int(humi_value)

wind_str = tmp_tag.em.string # span标签下是风力

try:

wind_dir = re.search(r"([东西南北微无台]+风)", wind_str).group()

except AttributeError:

print("可能是风力等级有新的汉字，原始数据：", wind_str)

else:

weather_info.wind_dir = wind_dir

wind_min = re.search(r"(\d+)", wind_str).group()

weather_info.wind_min = wind_min

tmp_tag = soup.find("div", class_="wea_tips clearfix")

weather_info.tips = tmp_tag.em.string

# 3天预报爬虫

tmp_tag = soup.find_all("ul", class_ = "days clearfix")

day_num = 0

for oneday in tmp_tag:

items_tag = oneday.find_all("li")

# 城市和更新时间前面已经爬了，这里直接用

weather_forecast_info[day_num].uptime = weather_info.uptime

weather_forecast_info[day_num].city = weather_info.city

# 0 今天、明天、后天

# 略，不爬

# 1 天气

weather_forecast_info[day_num].weather = re.sub(r"\s", "", items_tag[1].get_text())

# 2 温度

temps = re.findall(r"(\d+)", items_tag[2].get_text())

weather_forecast_info[day_num].temp_min = int(temps[0])

weather_forecast_info[day_num].temp_max = int(temps[1])

# 3 风力

weather_forecast_info[day_num].wind_dir = items_tag[3].em.get_text() # 风向

wind_values = re.findall(r"(\d+)", items_tag[3].b.get_text()) # 风力

if (len(wind_values) == 1):

weather_forecast_info[day_num].wind_min = int(wind_values[0])

elif (len(wind_values) == 2):

weather_forecast_info[day_num].wind_min = int(wind_values[0])

weather_forecast_info[day_num].wind_max = int(wind_values[1])

else:

print("解析风力级数出错")

# 4 空气质量

aqi_str = re.sub(r"\s", "", items_tag[4].get_text())

weather_forecast_info[day_num].aqi = int(re.search(r"(\d+)", aqi_str).group())

weather_forecast_info[day_num].aqi_level = re.search(r"(\D+)", aqi_str).group()

day_num += 1

# 空气污染爬虫

def aqi_spider(weather_info, url):

if (DEBUG_MODE):

# 本地测试文件

with open("air.html", 'r', encoding='utf-8') as html_file:

html_text = html_file.read()

soup = BeautifulSoup(html_text, "lxml")

else:

try:

web_data = requests.get(url)

except requests.exceptions.ConnectionError:

print("网络连接异常")

return

except Exception:

print("其他异常")

return

if (web_data.status_code != requests.codes.ok):

print("服务器响应异常", web_data.status_code)

return

soup = BeautifulSoup(web_data.text, "lxml")

# 通过id查找空气质量和等级

aqi_value_tag = soup.select("#aqi_value")

weather_info.aqi = int(aqi_value_tag[0].get_text())

aqi_desc_tag = soup.select("#aqi_desc")

weather_info.aqi_level = aqi_desc_tag[0].get_text()

# 爬取空气质量详情

aqi_info_item = soup.find("ul", class_="clearfix")

# 获取空气质量条目的名称

aqi_info_name_list = []

aqi_info_name_tag = aqi_info_item.find_all("em")

for name in aqi_info_name_tag:

aqi_info_name_list.append("".join(name.strings))

# 获取空气质量条目的数值

aqi_info_value_list = []

aqi_info_value_tag = aqi_info_item.find_all("span")

for value in aqi_info_value_tag:

aqi_info_value_list.append(int(value.string))

# 空气质量合并为字典

aqi_info_list = dict(zip(aqi_info_name_list, aqi_info_value_list))

# print(aqi_info_list)

# 保存

weather_info.aqi_PM10 = aqi_info_list["PM10"]

weather_info.aqi_PM2P5 = aqi_info_list["PM2.5"]

weather_info.aqi_NO2 = aqi_info_list["NO2"]

weather_info.aqi_SO2 = aqi_info_list["SO2"]

weather_info.aqi_O3 = aqi_info_list["O3"]

weather_info.aqi_CO = aqi_info_list["CO"]

# 发布时间

# 这个发布时间我感觉严重滞后，就不用了

# aqi_info_time_raw = soup.find(class_="aqi_info_time")

# aqi_info_time = aqi_info_time_raw.b.string

# # print(aqi_info_time)

# uptime = re.search(r"(\d+)年(\d+)月(\d+)日 (\d+)时(\d+)分", aqi_info_time).groups()

# print(uptime)

# 主函数

def main():

save_path = ""

this_path = os.path.realpath(__file__)

dir_path = os.path.dirname(this_path)

save_path = os.path.join(dir_path, "weather_data")

if (DEBUG_MODE):

save_csv_filename = os.path.join(save_path, "weather_debug.csv")

save_txt_filename = os.path.join(save_path, "weather_debug.txt")

else:

save_csv_filename = os.path.join(save_path, "weather.csv")

save_txt_filename = os.path.join(save_path, "weather.txt")

if not (os.path.exists(save_path)):

os.mkdir(save_path)

if not (os.path.exists(save_csv_filename)):

with open(save_csv_filename, "w", newline = "", encoding="GBK") as csv_file:

csv_writer = csv.writer(csv_file)

csv_writer.writerow(["爬取时间", "更新时间", "地区", "天气", "温度",

"湿度", "风向", "最小风力", "最大风力", "小贴士", "描述",

"空气质量指数", "空气质量等级", "PM10", "PM2.5", "NO2", "SO2", "O3", "CO", "其他信息"])

count = 0

while (True):

# 爬实时天气及3天预报

all_weather_info = weather()

all_weather_forecast_info = [weather_forecast(), weather_forecast(), weather_forecast()]

weather_spider(all_weather_info, all_weather_forecast_info, wea_url)

aqi_spider(all_weather_info, aqi_url)

print(all_weather_info.get_weather_info_list())

for info in all_weather_forecast_info:

print(info.get_weather_info_list())

# 保存到csv，方便查阅历史天气

with open(save_csv_filename, "a", newline = "", encoding="GBK") as csv_file:

csv_writer = csv.writer(csv_file)

csv_writer.writerow(all_weather_info.get_weather_info_list())

# 保存到最新天气信息，可供其他程序使用

with open(save_txt_filename, "w", encoding="GBK") as txt_file:

txt_file.write(str(all_weather_info) + "\n")

for item in all_weather_forecast_info:

txt_file.write(str(item) + "\n")

count += 1

print("第%d次爬取完成，待机中……\n" %(count))

time.sleep(2700)

return

# ---------------- 程序入口 ----------------

if (__name__ == "__main__"):

main()

一键复制

编辑

Web IDE

原始数据

按行查看

历史

墨迹天气接口html,moji_weather_spider.py相关推荐

Python 爬虫篇-利用BeautifulSoup库爬取墨迹天气网的天气信息实例演示，调用墨迹天气api接口获取空气质量
安装方法: pip install BeautifulSoup4 BeautifulSoup 详细使用文档墨迹天气抓取演示墨迹天气没有提供专门的天气接口 api,但我们可以用 BeautifulS ...
Py：利用pickle模块和API天气接口实现输入城市得到该城市的天气预报
Py:利用pickle模块和API天气接口实现输入城市得到该城市的天气预报目录输出结果实现代码输出结果实现代码 # -*- coding: utf-8 -*- ''' Created on ...
Android使用SurfaceView实现墨迹天气的风车效果
SurfaceView也是继承自View,它和我们以前接触到的View(Button.TextView等)最大的不同是,SurfaceView可以有一个单独的线程进行绘制,这个线程区别于UI线程(主线 ...
android 和风图标字体移植显示墨迹天气图标
android studio版本:21.2.1 例程:newareaautov1 和风天气字体图标使用方法见: android 显示和风天气字体图标_kim5659的博客-CSDN博客_qweathe ...
Android自定义view--SurfaceView实现墨迹天气的风车效果
Android自定义view--SurfaceView实现墨迹天气的风车效果 SurfaceView也是继承自View,它和我们以前接触到的View(Button.TextView等)最大的不同是,S ...
爬虫笔记1--爬取墨迹天气
爬虫笔记1--爬取墨迹天气最近由于需要写了一个简单的墨迹天气爬取脚本,主要功能为爬取墨迹天气,然后将其存到MySQL数据库中. 1.功能本代码主要功能为:爬取墨迹天气数据,将数据保存到MySQL数 ...
墨迹天气：更重视商业数据的公共价值
[在中国,墨迹天气所涉及的天气数据细化到了城市行政区域.县级.村.街道的级别,也就是说,在同一个城市的不同位置不同时间,用户也可以看到跟自己更贴近的天气情况,准确率超过80%.] 当你推着满满一车物品 ...
云场景实践研究第85期：墨迹天气
更多云场景实践研究案例,点击这里:[云场景实践研究合集]联合不是简单的加法,而是无限的生态,谁会是下一个独角兽墨迹运营团队每天最关心的是用户正在如何使用墨迹,在他们操作中透露了哪些个性化需求.这些数 ...
Android 打造自己的个性化应用(四)：仿墨迹天气实现--自定义扩展名的zip格式的皮肤...
在这里谈一下墨迹天气的换肤实现方式,不过首先声明我只是通过反编译以及参考了一些网上其他资料的方式推测出的换肤原理, 在这里只供参考. 若大家有更好的方式, 欢迎交流. 墨迹天气下载的皮肤就是一个zip ...

墨迹天气接口html,moji_weather_spider.py

墨迹天气接口html,moji_weather_spider.py相关推荐

最新文章

热门文章