爬取cloudmusic歌单

代码

# -*- coding: utf-8 -*-
import re
import csv
import json
import time
import pymysql
import requests
from bs4 import BeautifulSoup
from multiprocessing import Pool# 请求头
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}# 歌单类型链接
type_url = "https://music.163.com/discover/playlist"# 连接数据库
db = pymysql.connect(host = "localhost",user = "root",password = "123456",port=3306,db = "cloudmusic1")cursor = db.cursor()"""获取歌单类型"""
def get_playlist_type(url):response = requests.get(url=url, headers=headers)html = response.textsoup = BeautifulSoup(html, 'lxml')types = [t.text for t in soup.select("a.s-fc1")][1:]return types"""获取歌单id"""
def get_playlist_id(url):response = requests.get(url=url, headers=headers)html = response.textsoup = BeautifulSoup(html, 'lxml')ids = [re.sub(r"\D+", "", i['href']) for i in soup.select("a.msk")]t = re.search('https.*cat=(.*)&limit', url).group(1)get_playlist_info(ids, t)"""获取歌单信息"""
def get_playlist_info(ids, t):playlist_url = "https://api.imjad.cn/cloudmusic/?type=playlist&id={}"urls = [playlist_url.format(i) for i in ids]for url in urls:try:response = requests.get(url=url, headers=headers)json_text = response.textjson_playlist = json.loads(json_text)["playlist"]except:continue# 歌单ID、歌单名、歌单类型、标签、创建时间、最后更新时间、播放量、收藏量、转发量、评论数# 用户名、性别、用户类型、VIP类型、省份、城市playlistID = str(json_playlist["id"])name = json_playlist["name"]playlistType = ttags = "、".join(json_playlist["tags"])createTime = time.strftime("%Y-%m-%d", time.localtime(int(str(json_playlist["createTime"])[:-3])))updateTime = time.strftime("%Y-%m-%d", time.localtime(int(str(json_playlist["updateTime"])[:-3])))tracks_num = len(json_playlist["trackIds"])playCount = json_playlist["playCount"]subscribedCount = json_playlist["subscribedCount"]shareCount = json_playlist["shareCount"]commentCount = json_playlist["commentCount"]nickname = json_playlist['creator']['nickname']gender = str(json_playlist['creator']['gender'])userType = str(json_playlist['creator']['userType'])vipType = str(json_playlist['creator']['vipType'])province = str(json_playlist['creator']['province'])city = str(json_playlist['creator']['city'])# 匹配性别、省份、城市代码if gender == '1':gender = '男'else:gender = '女'# 打开行政区代码文件with open("country.csv", encoding="utf-8") as f:rows = csv.reader(f)for row in rows:if row[0] == province:province = row[1]if row[0] == city:city = row[1]if province == '香港特别行政区':city = '香港特别行政区'if province == '澳门特别行政区':city = '澳门特别行政区'if province == '台湾省':city = '台湾省'if province == str(json_playlist['creator']['province']):province = '海外'city = '海外'if city == str(json_playlist['creator']['city']):city = provinceplaylist = [playlistID, name, playlistType, tags, createTime, updateTime,tracks_num, playCount, subscribedCount, shareCount, commentCount,nickname, gender, userType, vipType, province, city]print(playlist)save_to_playlists(playlist)"""保存到数据库"""
def save_to_playlists(l):sql = """insert into playlists(id, name, type, tags, create_time, update_time, tracks_num, play_count, subscribed_count, share_count, comment_count, nickname,gender, user_type, vip_type, province, city)values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""try:cursor.execute(sql, (l[0],l[1],l[2],l[3],l[4],l[5],l[6],l[7],l[8],l[9],l[10],l[11],l[12],l[13],l[14],l[15],l[16]))db.commit()except:db.rollback()def main():types = get_playlist_type(type_url)urls = []for t in types:for i in range(37):url = "https://music.163.com/discover/playlist/?order=hot&cat={0}&limit=35&offset={1}".format(t, i*35)urls.append(url)pool = Pool(10)for url in urls:pool.apply_async(get_playlist_id, args=(url,))pool.close()pool.join()if __name__ == "__main__":main()

爬取cloudmusic歌单相关推荐

网易云爬取首页歌单里的所有歌曲
网易云爬取首页歌单里的所有歌曲前言:本文章仅供个人参考使用,非商用用途,其中参考了其他的文献资料,如有不妥之处,请联系本人邮箱:wurenjie8@163.com 思路:通过首页URL获取所有首页的 ...
java实现爬虫，爬取网易歌单信息
之前一直对爬虫很好奇,觉得它很神秘,而我有个朋友是做爬虫的,最近有空就向他学习了一下,并试着写了个小程序. 首先是获得httpclient对象及httpresponse对象,此两者是用于发送请求及接受 ...
Crawler：基于requests库+json库+40行代码实现爬取猫眼榜单TOP100榜电影名称主要信息
Crawler:基于requests库+json库+40行代码实现爬取猫眼榜单TOP100榜电影名称主要信息目录输出结果实现代码输出结果实现代码 # -*- coding: utf-8 -* ...
抓取spotify歌单_如何使用python抓取Spotify上最受欢迎的歌曲
抓取spotify歌单 Spotify is one of the most popular music streaming services in the world, with nearly 30 ...
Python爬虫实战，Request+urllib模块，批量下载爬取飙歌榜所有音乐文件
前言今天给大家介绍的是Python爬取飙歌榜所有音频数据并保存本地,在这里给需要的小伙伴们代码,并且给出一点小心得. 首先是爬取之前应该尽可能伪装成浏览器而不被识别出来是爬虫,基本的是加请求头,但是 ...
python爬取股票大单历史记录_利用bs4爬取股票的历史交易数据
听起来,爬取股票的所有历史交易数据跟高大上,有木有? 不过写这个爬虫的时候,发现基于网易财经的股票历史数据的爬取其实挺简单,最后再写到txt文档里(暂时写txt,以后会写csv的.可以在用机器学习干一 ...
【Python】爬取理想论坛单帖爬虫
代码: # 单帖爬虫,用于爬取理想论坛帖子得到发帖人,发帖时间和回帖时间,url例子见main函数 from bs4 import BeautifulSoup import requests impo ...
Python-scrapy爬取起点榜单信息
spider import scrapyfrom qiDianTop.items import QidiantopItem""" 初步逻辑 1.获取所有榜单类型列表遍历 ...
python爬取股票大单历史记录_定向爬取股票数据——记录一次爬虫实战
今天是八月2号,自学python爬虫已经一整个月了.不学不理解,真正学了才发现,python不愧是一门博大精深的高级编程语言,我学到现在也才只见识到它冰山一脚,python应用的范围即便相比于c.c+ ...
从入门到入土：Python爬虫学习|实例练手|爬取猫眼榜单|Xpath定位标签爬取|代码
此博客仅用于记录个人学习进度,学识浅薄,若有错误观点欢迎评论区指出.欢迎各位前来交流.(部分材料来源网络,若有侵权,立即删除) 本人博客所有文章纯属学习之用,不涉及商业利益.不合适引用,自当删除! 若 ...

爬取cloudmusic歌单

爬取cloudmusic歌单相关推荐

最新文章

热门文章