python刷网易云_利用Python获取网易云音乐数据,python

#!/usr/bin/env python

# -*- coding: utf-8 -*-

import json

import re

import urllib.request

import pandas as pd

import requests

from selenium import webdriver

from selenium.webdriver.support.ui import WebDriverWait

import os

# timeout:超出时间等待的最长时间(同时要考虑隐形等待时间)

# 显示等待

driver = webdriver.Chrome()

wait = WebDriverWait(driver, 8)

class MusicInfo(object):

def __init__(self, id, name):

self.id = id

self.name = name

def get_music_info(self):

url = "https://music.163.com/#/artist?id={0}".format(self.id)

driver.get(url)

driver.switch_to.frame('contentFrame')

# with open('data/source.html','w',encoding='utf-8') as f:

# f.write(driver.page_source)

# 获取歌手的姓名，并建立对应文件夹

# artist_name = driver.find_element_by_id('artist-name').text

print(self.name)

path = os.getcwd() + "/data/{0}".format(self.name)

if not os.path.exists(path):

os.makedirs(path)

print(path)

tr_list = driver.find_element_by_id("hotsong-list").find_elements_by_tag_name("tr")

music_info = []

for i in range(len(tr_list)):

content = tr_list[i].find_element_by_class_name('txt')

href = content.find_element_by_tag_name('a').get_attribute('href')

title = content.find_element_by_tag_name('b').get_attribute('title')

music_info.append((title, href))

return music_info, path

def save_csv(self, music_info, path, head=None):

data = pd.DataFrame(music_info, columns=head)

# index=False去掉DataFrame默认的index列

data.to_csv("{0}/singer{1}.csv".format(path, str(self.id)), encoding="utf-8", index=False)

class DownloadMusic(object):

def __init__(self, music_name, music_id, path):

self.music_name = music_name

self.music_id = music_id

self.path = path

def get_lyric(self):

url = 'http://music.163.com/api/song/lyric?' + 'id=' + str(self.music_id) + '&lv=1&kv=1&tv=-1'

r = requests.get(url)

raw_json = r.text

ch_json = json.loads(raw_json)

raw_lyric = ch_json['lrc']['lyric']

del_str = re.compile(r'\[.*\]')

ch_lyric = re.sub(del_str, '', raw_lyric)

return ch_lyric

def download_mp3(self):

url = 'http://music.163.com/song/media/outer/url?id=' + str(self.music_id) + '.mp3'

try:

print("正在下载：{0}".format(self.music_name))

path = self.path + "/music"

if not os.path.exists(path):

os.makedirs(path)

urllib.request.urlretrieve(url, '{0}/{1}.mp3'.format(path, self.music_name))

print("Finish...")

except:

print("Failed...")

def save_txt(self):

lyric = self.get_lyric()

print(lyric)

print("正在写入歌曲:{0}".format(self.music_name))

path = self.path + "/lyric"

if not os.path.exists(path):

os.makedirs(path)

with open("{0}/{1}.txt".format(path, "".join(self.music_name.replace('.', '').replace('?', '').split())), 'w',

encoding='utf-8') as f:

f.write(lyric)

def main(id, name):

mu_info = MusicInfo(id, name) # 类初始化

music_info, path = mu_info.get_music_info() # 调用方法，获取音乐信息及路径

mu_info.save_csv(music_info, path, head=['music', 'link']) # 存储音乐的歌名及链接至csv文件

'''

调用pandas的read_csv()方法时，默认使用C engine作为parser engine，而当文件名中含有中文的时候,就会报错，

这里一定要设置engine为python，即engine='python'

'''

mu_info = pd.read_csv('{0}/singer{1}.csv'.format(path, str(id)), engine='python', encoding='utf-8')

'''

通过iterrows遍历音乐信息的music文件

iterrows返回的是一个元组(index,mu)

'''

for index, mu in mu_info.iterrows():

music = mu['music'] # 取对应的歌曲名称 mu['link']音乐的链接

regex = re.compile(r'(id)(=)(.*)')

link = re.search(regex, mu['link']).group(3)

print(link)

music = DownloadMusic(music, link, path)

music.save_txt()

music.download_mp3()

if __name__ == '__main__':

dict_data = {

'5781': '薛之谦',

'2116': '陈奕迅',

'3684': '林俊杰',

'44266': 'Taylor Swift',

'72724': 'Rihanna',

}

for id, name in dict_data.items():

print(id, name)

main(id, name)

main(12138269, '毛不易')

python刷网易云_利用Python获取网易云音乐数据,python相关推荐

python生成文章标题_利用简书首页文章标题数据生成词云
原标题:利用简书首页文章标题数据生成词云感谢关注天善智能,走好数据之路↑↑↑ 欢迎关注天善智能,我们是专注于商业智能BI,人工智能AI,大数据分析与挖掘领域的垂直社区,学习,问答.求职一站式搞定! ...
python刷步数程序设计_利用python+云函数搭建自己的修改步数api接口
[Asm] 纯文本查看复制代码[{"summary":"{\"slp\":{\"ss\":73,\"lt\" ...
破解网易云js加密,爬虫获取网易云评论
破解网易云js加密,爬虫获取网易云评论抓包这里是对网页版的网易云音乐进行抓包,分析网络请求,url https://music.163.com/#/song?id=36229055 然后可以发现 ...
Python如何在从循环之外不断获取循环内的数据但不退出循环
Python如何在从循环之外不断获取循环内的数据但不退出循环 1.配置环境 2.问题描述 3.问题解决 4.结束语 1.配置环境使用环境:python3.7 平台:Windows10 IDE:PyC ...
python爬取网易云_利用python爬取网易云音乐，并把数据存入mysql
作者:sergiojune Python爱好者社区--专栏作者个人公众号:日常学python 专注python爬虫,数据可视化,数据分析,python前端技术公众号:Python爱好者社区获取本 ...
python爬取歌词_利用Python网络爬虫抓取网易云音乐歌词
今天小编给大家分享网易云音乐歌词爬取方法. 本文的总体思路如下: 找到正确的URL,获取源码: 利用bs4解析源码,获取歌曲名和歌曲ID: 调用网易云歌曲API,获取歌词: 将歌词写入文件,并存入本地 ...
python京东商品采集_利用Python正则表达式抓取京东网商品信息
京东(JD.com)是中国最大的自营式电商企业,2015年第一季度在中国自营式B2C电商市场的占有率为56.3%.如此庞大的一个电商网站,上面的商品信息是海量的,小编今天就带小伙伴利用正则表达式,并且 ...
python读取sas数据集_利用Python获取SAS和R自带数据集
图:北京-奥森公园-2018年4月无论是SAS.R还是Python,本身都自带一些数据集,对于初学者来说,可以通过这些自带的小数据集进行编程练习,无疑是非常方便的.SAS.R作为统计分析软件,本身自 ...
如何利用python刷微博粉丝_使用python进行新浪微博粉丝爬虫
由于最近没事在学python,正好最近也想趴下新浪微博上边的一些数据,在这里主要爬去的是一个人的粉丝具体信息(微博昵称,个人介绍,地址,通过什么方式进行关注),所以就学以致用,通过python来爬去微 ...
python文字验证码识别_利用python进行验证码识别（预处理部分）
# -*- coding: utf-8 -*- """Created on Thu Feb 1 15:52:05 2018@author: Administrator&q ...

python刷网易云_利用Python获取网易云音乐数据,python

python刷网易云_利用Python获取网易云音乐数据,python相关推荐

最新文章

热门文章