目录

1.酷6

2.快手

3.A站

4.B站(音,视频未合成)

5.抖音

6.虎牙


import json
import requestsdef ku6_spider():for page in range(0,11):print('正在抓取第{}页数据'.format(page+1))#1.确定URL路径,headers参数base_url = 'https://www.ku6.com/video/feed'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'}params = {'pageNo': str(page),'pageSize': '40','subjectId': '76',}#2.模拟浏览器-requests发送请求获取数据response = requests.get(base_url,headers=headers,params=params)res_data = response.text#print(res_data)#3.解析数据json_data = json.loads(res_data)    #--字典data_list = json_data['data']#遍历列表for data in data_list:#print(data)video_name = data['title']+".mp4"video_url = data['playUrl']#print(video_url,video_name)print('下载中:',video_name)video_data = requests.get(video_url,headers=headers).content#4.保存数据with open('video\\'+video_name,'wb') as f:f.write(video_data)print('下载完成...')if __name__ == '__main__':ku6_spider()

2.快手

import os
import time
import requests
import json
import pprint
import re# 在当前目录创建一个保存视频的目录
dir_name = 'video'
if not os.path.exists(dir_name):os.mkdir(dir_name)# 响应头,整个复制之后,利用ctrl+r勾选正则表达式来替换(上面原来的(.*?): (.*) (冒号后面的空格)下面替换的格式 ‘$1': '$2',(冒号后面的空格,最后加逗号分隔)
headers = {'accept': '*/*','Accept-Encoding': 'gzip, deflate, br','Accept-Language': 'zh-CN,zh;q=0.9','Connection': 'keep-alive','Content-Length': '1380','content-type': 'application/json','Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_acb408fff3a5f7cd020782d58bb9caa9; ktrace-context=1|MS43NjQ1ODM2OTgyODY2OTgyLjI4ODYxOTgxLjE2MzczNzIwMzc5NTkuMTQ1NDUxNA==|MS43NjQ1ODM2OTgyODY2OTgyLjI3NzMzOTY1LjE2MzczNzIwMzc5NTkuMTQ1NDUxNQ==|0|graphql-server|webservice|false|NA; client_key=65890b29; userId=1232368006; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABXhLnnN974NXDx7wxD7EXA0gUwiENGncAU1PMNvGRI8hgQVPES30K2a6e8FZ9L3yv89WVXIZ5I1HsDjjWJlzDijZgHPj64KgQ8dkTm8-Aq5monZejiGHAuenrIuDovugsUnncYRtFHLY_bmEtKpBDoaswti5UnDOkiVHAuhMMPlqdPBKYwV_LZ3SGFMeznHUrJv5Wg4o4C45yi-1iuOPyDRoSsmhEcimAl3NtJGybSc8y6sdlIiCHg_pUdXqAoXPplQJ-iHcM2h_MTI_3Wkdnw9ucUMR5UCgFMAE; kuaishou.server.web_ph=b3651a369fb9eb9f33d30ccc2cc691a5ecbf','Host': 'www.kuaishou.com','Origin': 'https://www.kuaishou.com','Referer': 'https://www.kuaishou.com/search/video?searchKey=%E6%85%A2%E6%91%87','sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"','Sec-Fetch-Dest': 'empty','Sec-Fetch-Mode': 'cors','Sec-Fetch-Site': 'same-origin','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
}
# 响应头右边的选项payload(报错的地方加上引号)   (单引号里面括着双引号)
keyword = input("请输入你想要查询的关键词:")
for pcursor in range(0, 2):pcursor = str(pcursor)data = {'operationName': "visionSearchPhoto",'query': "query visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      type\n      author {\n        id\n        name\n        following\n        headerUrl\n        headerUrls {\n          cdn\n          url\n          __typename\n        }\n        __typename\n      }\n      tags {\n        type\n        name\n        __typename\n      }\n      photo {\n        id\n        duration\n        caption\n        likeCount\n        realLikeCount\n        coverUrl\n        photoUrl\n        liked\n        timestamp\n        expTag\n        coverUrls {\n          cdn\n          url\n          __typename\n        }\n        photoUrls {\n          cdn\n          url\n          __typename\n        }\n        animatedCoverUrl\n        stereoType\n        videoRatio\n        __typename\n      }\n      canAddComment\n      currentPcursor\n      llsid\n      status\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n",'variables': {'keyword': keyword, 'pcursor': pcursor, 'page': "search"}}  # "keyword"这个控制关键词,"pcursor"控制翻页(手动在网页中下滑之后会出现两个数据包)# 页面搜索视频名字,然后找到抓包,再找响应网址baseurl = "https://www.kuaishou.com/graphql"# headers有一个  'content-type':  'application/json',  这个定义了data(这里类似账号密码之类的数据),要求data是json字符串# print(type(data))data = json.dumps(data)  # 将data由字典类型转换为字符串类型# print(type(data))time.sleep(2)# 发送请求,url:链接地址,headers:伪装,data:查询参数request = requests.post(url=baseurl, headers=headers, data=data)# print(request)response = request.json()# print(response)# pprint.pprint(response)##字典数据利用键来找值  {"键":"值"} |列表直接利用位置索引 [值][值]  [0][1]# title_list = response['data']['visionSearchPhoto']['feeds'][5]['photo']['caption']# print(title_list)# url_list = response['data']['visionSearchPhoto']['feeds'][5]['photo']['photoUrl']# print(url_list)feeds_list = response['data']['visionSearchPhoto']['feeds']#print(feeds_list)for feeds in feeds_list:# 每个feeds是feeds_list列表当中的一个个字典# print(feeds)  #利用这条可以把每个视频的信息都分别打印出来title = feeds['photo']['caption']print(title)list = feeds['photo']['photoUrl']print(list)# #下面这个打印出来把所有类似的数据都放在了同一个列表当中,与下载无关# # titles = [i['photo']['caption']for i in feeds_list]# # print(titles)# # list = [i['photo']['photoUrl']for i in feeds_list]# # print(list)##保存视频  【搜索关键词下载视频/知道一个用户的视频/翻页下载】new_title = re.sub(r'[\/:*?"<>|\n]', '_', title)  # 在windows操作系统当中,必须是没有一些特殊字符  #标题过长可以替换(字符串的切片)当>=256# 发送网络请求,请求每一个视频地址,获取视频二进制数据mp4_data = requests.get(list).contentwith open(dir_name + "/" + new_title + '.mp4', mode='wb') as f:f.write(mp4_data)print(new_title, "下载完成")mp4_data.close()request.close()

3.A站

import requests
import re
import os
import zipfileurl = input('请输入视频网址:')headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
}
response = requests.get(url=url,headers=headers)
#print(response.text)#取第一行,       替换字符
m3u8_url = re.findall('"backupUrl(.*?)"]',response.text)[0].replace('\\":[\\"','').replace('\\','')
title = re.findall('<title >(.*?)- AcFun弹幕视频网 - 认真你就输啦 \(\?\ω\?\)ノ- \( ゜- ゜\)つロ</title>',response.text)[0]
#print(m3u8_url)#os自动创建文件夹
filename = f'{title}\\'
if not os.path.exists(filename):os.mkdir(filename)m3u8_data = requests.get(url=m3u8_url,headers=headers).text
m3u8_data = re.sub('#EXTM3U','',m3u8_data)
m3u8_data = re.sub('#EXT-X-VERSION:\d','',m3u8_data)
m3u8_data = re.sub('#EXT-X-TARGETDURATION:\d','',m3u8_data)
m3u8_data = re.sub('#EXT-X-MEDIA-SEQUENCE:\d','',m3u8_data)
m3u8_data = re.sub('#EXTINF:\d\.\d+,','',m3u8_data)
m3u8_data = re.sub('#EXT-X-ENDLIST','',m3u8_data).split()
#print(m3u8_data)
for index in m3u8_data:ts_url = 'https://ali-safety-video.acfun.cn/mediacloud/acfun/acfun_video/hls/' + indexts_name = ts_url.split('.')[3]#二进制ts_content = requests.get(url=ts_url,headers=headers).contentwith open(filename + ts_name + '.ts',mode='wb') as f:f.write(ts_content)print(ts_name)print('视频片段下载完成')
print('开始合并......')files = os.listdir(filename)
print(files)
with zipfile.ZipFile(filename + title + '.mp4',mode='w') as f:for i in files:file = filename + if.write(file)os.remove(file)
print('爬取完成')

4.B站(音,视频未合成)

import json
import re
import subprocess
import requests# 访问网站
def get_response(html_url):# 出现403加防盗链refererheaders = {'referer': 'https://www.bilibili.com/video/BV1TF411w7vv?spm_id_from=333.337.search-card.all.click&vd_source=415a9fdfbb14115b672b4063903571a0','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'}response = requests.get(url=html_url, headers=headers)return response# 获取信息
def get_video_info(html_url):response = get_response(html_url=html_url)# print(response.text)# 提取视频标题title = re.findall('<h1 title="(.*?)" class="video-title tit">', response.text)[0]html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)[0]# print(title)# print(html_data)# pprint.pprint(html_data)json_data = json.loads(html_data)# 根据冒号左边内容,提取右边内容,键取对值audio_url = json_data['data']['dash']['audio'][0]['baseUrl']video_url = json_data['data']['dash']['video'][0]['baseUrl']video_info = [title, audio_url, video_url]# pprint.pprint(json_data)# print(audio_url)# print(video_url)return video_info# 保存数据
def save(title, audio_url, video_url):audio_content = get_response(html_url=audio_url).contentvideo_content = get_response(html_url=video_url).contentwith open(title + '.mp3', mode='wb') as f:f.write(audio_content)with open(title + '.mp4', mode='wb') as f:f.write(video_content)print(title, '保存成功')# def merge_data(vide_name):
#     print('视频开始合成', vide_name)
#     cmd = f"ffmpeg -i {vide_name}.mp4 -i {vide_name}.mp3 -c:a aac -strict experimental {vide_name}output.mp4"
#     #print(cmd)
#     subprocess.run(cmd, shell=True)
#     print('视频合成完毕', vide_name)def main(bv_id):url = f'https://www.bilibili.com/video/{bv_id}'video_info = get_video_info(url)save(video_info[0], video_info[1], video_info[2])#merge_data(video_info[0])keyword = input('请输入要下载的视频BV号:')
main(keyword)# url = 'https://www.bilibili.com/video/BV1TF411w7vv'
# video_info = get_video_info(url)
# print(video_info)

5.抖音

import requests
import reurl = 'https://www.douyin.com/video/7114220525978668303'headers = {'cookie': 'douyin.com; ttcid=444dfe8e89ff4d99b0662076ad171c8775; ttwid=1%7CTnFKlrGi3lHjKf5bshFdP9Nwu_Vsiwo-TxvX9NISgj8%7C1642083887%7Cfbfa904ea2900763eb6ac090bdd09014d80840da1ca485bbfea193d5401b330e; MONITOR_WEB_ID=c27b9f4a-4917-4256-be93-e948308467e3; odin_tt=0510c3c4196f54b541a96ac64e8b585b3a755be85057da8a1f3fa068e3f7b75ca2de4345e2b856f1e7b3f9455d86079731fe7d07a9f10890f26855d3674858e1; passport_csrf_token=e0b90cb756903c370592bd558c2b0cf5; passport_csrf_token_default=e0b90cb756903c370592bd558c2b0cf5; s_v_web_id=verify_l268jj46_kc7yYkD6_YHWW_4x4v_9snI_EDE0zro77uRn; AVATAR_FULL_LOGIN_GUIDE_COUNT=1; AVATAR_FULL_LOGIN_GUIDE_TIMESTAMP=1650982839652; AVATAR_FULL_LOGIN_GUIDE_ITA_COUNT=1; AVATAR_FULL_LOGIN_GUIDE_ITA_TIMESTAMP=1650982839652; __ac_nonce=0627ba36600d465d72261; __ac_signature=_02B4Z6wo00f01zrB8EAAAIDCWcswKSh.eLM65fTAAKzW8srQpmSjmL6YX9IsdmMSL4a9EBuyJvIwNMROqFQktniG-Ur-UDPK6wHInC8QKqRYUmyGnflwUXLpKzPgVt2FtREyprGmCDAZLrIpcc; douyin.com; strategyABtestKey=1652269927.635; AB_LOGIN_GUIDE_TIMESTAMP=1652269927510; AVATAR_LOGIN_GUIDE_COUNT=1; _tea_utm_cache_2285=undefined; _tea_utm_cache_6383=undefined; _tea_utm_cache_1300=undefined; pwa_guide_count=3; IS_HIDE_THEME_CHANGE=1; THEME_STAY_TIME=299808; msToken=XGPVAVUHDi9iTEQRjdXuQ0YyetxhHq0c9EH1dLLpttanbCXsNSD0DRxwk9oUB0vZ7LB9vKd-ABi2kAkzj2lCn1x98lJ4iTFbf260RcLav-G4QkhNyq8qV9i3oEJRyc8t; home_can_add_dy_2_desktop=1; msToken=3ALqenaebbJHw7kQDiDG6aRAgVYm5WM1pVGqmyyidbGgYpWRWKn-wQ9tcjoxWrHvwcqoYAx3tQ4IGE1qixdq2ei_fPrirMeeI6HeooU3sGR2wyWQ2OAAh2RejVJOrmpA; tt_scid=Gp0q0JW0LDreTqplgpajIZNHCB0.p1NcVv0hhZBgaGDw4SFxkXGlXfKafiCVmWAWc537','user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
}response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
#print(response.text)title = re.findall('<title data-react-helmet="true">(.*?)</title>', response.text)[0]
href = re.findall('src(.*?)vRLmmJ', response.text)[0]
video_url = requests.utils.unquote(href).replace('":"', 'https:')
#print(video_url)video_content = requests.get(url=video_url).content
with open('video\\' + title + '.mp4', mode='wb') as f:f.write(video_content)print(title, video_url)

6.虎牙

import requests
import re
for page in range(1, 5):print(f'正在采集第{page}页的数据内容')link = f'https://v.huya.com/g/all?set_id=51&order=hot&page={page}'headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'}response = requests.get(url=link, headers=headers)# print(response.text)# 从哪里<response.text里面>找什么数据<li data-vid="(\d+)"> \d+ 表示匹配一个或者多个数字video_id_list = re.findall('<li data-vid="(\d+)">', response.text)print(video_id_list)for video_id in video_id_list:url = f'https://liveapi.huya.com/moment/getMomentContent?videoId={video_id}&uid=&_=1652789442223'response = requests.get(url=url, headers=headers)# print(response.json()['status'])title = response.json()['data']['moment']['title']video_url = response.json()['data']['moment']['videoInfo']['definitions'][0]['url']# 保存数据 >>> 也是需要发送请求, 获取数据video_content = requests.get(url=video_url, headers=headers).content  # 获取二进制数据with open('video\\' + title + '.mp4', mode='wb') as f:f.write(video_content)print(title, '保存成功')

文章存在借鉴,如有侵权请联系修改删除!

python小程序——视频篇相关推荐

  1. python编程可视化小程序_人人都可以写的可视化Python小程序第二篇:旋转的烟花...

    兴趣是最好的老师 枯燥的编程容易让人放弃,兴趣才是最好的老师.无论孩子还是大人,只有发现这件事情真的有趣,我们才会非常执着的去做这件事,比如打游戏.如果编程能像玩游戏一样变得有趣,我相信很多人就特别愿 ...

  2. 使用python完成的一个烟花小程序-人人都可以写的可视化Python小程序第二篇:旋转的烟花...

    兴趣是最好的老师 枯燥的编程容易让人放弃,兴趣才是最好的老师.无论孩子还是大人,只有发现这件事情真的有趣,我们才会非常执着的去做这件事,比如打游戏.如果编程能像玩游戏一样变得有趣,我相信很多人就特别愿 ...

  3. 人人都可以写的可视化Python小程序第二篇:旋转的烟花

    兴趣是最好的老师 枯燥的编程容易让人放弃,兴趣才是最好的老师.无论孩子还是大人,只有发现这件事情真的有趣,我们才会非常执着的去做这件事,比如打游戏.如果编程能像玩游戏一样变得有趣,我相信很多人就特别愿 ...

  4. python画静态烟花_人人都可以写的可视化Python小程序第二篇:旋转的烟花

    兴趣是最好的老师 枯燥的编程容易让人放弃,兴趣才是最好的老师.无论孩子还是大人,只有发现这件事情真的有趣,我们才会非常执着的去做这件事,比如打游戏.如果编程能像玩游戏一样变得有趣,我相信很多人就特别愿 ...

  5. 【有趣的Python小程序】Python多个简单上手的库制作WalkLattice 走格子游戏 (思路篇)上

    篇写上一个思路篇,那么今天我们就来完成这一项工作 源代码和配套文件 链接: https://caiyun.139.com/m/i?135ClY1yWrSKX 提取码:e4pq 复制内容打开中国移动云盘 ...

  6. 详细解析黑马微信小程序视频--【思维导图知识范围】

    语言视频选择 收录专辑链接 C 张雪峰推荐选择了计算机专业之后-在大学期间卷起来-[大学生活篇] JAVA 黑马B站视频JAVA部分的知识范围.学习步骤详解 JAVAWEB 黑马B站视频JAVAWEB ...

  7. 小程序视频旋转的相关问题

    背景 最近在开发小程序时遇到个需求,就是在小程序页面中嵌入一个广告视频,客户给的视频时横屏播放的,但是ui显示却要求是竖屏播放,在这里记录一下实现这个效果的踩坑全过程 css transform旋转v ...

  8. python降低图片分辨率_手把手:扫描图片又大又不清晰?这个Python小程序帮你搞定!...

    原标题:手把手:扫描图片又大又不清晰?这个Python小程序帮你搞定! 大数据文摘作品 编译:HAPPEN.于乐源.小鱼 一位乐于分享学生精彩笔记的大学教授对于扫描版的文件非常不满意--颜色不清晰并且 ...

  9. 微信小程序 视频列表 封面图 禁止多个视频同时播放

    微信小程序视频列表用到的组件是  video 链接  https://developers.weixin.qq.com/miniprogram/dev/component/video.html 先附上 ...

最新文章

  1. 怎么使用CorelDRAW 中的默认调色板
  2. C++设计模式(转)
  3. GsonBuilder
  4. 写博客必备的复制黏贴
  5. java中打开指定的文件夹
  6. mysql-表完整性约束
  7. 我国高性能计算机发展,中国高性能计算机发展水平与趋势
  8. 【JS】逻辑运算符 非! 与 或||(处理对象时注意)
  9. mysql主从延迟时间是多少_MySQL主从延迟
  10. html输出json对象属性值,用javascript中的HTML元素值构建JSON对象
  11. oracle目录解析,Oracle目录分析与比较
  12. linux:账号管理
  13. web测试和app测试的区别你知道吗?
  14. 活动回顾 I 《传奇动物园》项目团队沙盘演练圆满结束!
  15. 2月全球搜索引擎市场份额之争:百度战胜Google夺冠
  16. Redis之EXPIRE
  17. 计算机拨打比赛策划书,电脑打字擂台赛活动策划书
  18. 第006话 皮皮和月亮石!
  19. matplotlib之属性组合包(cycler)
  20. PDF文件打开密码的消除办法

热门文章

  1. 网络推广文案八大步,让你的朋友圈引人注目!
  2. 证券行业信息化17_我的香港游记3_交易系统技术支持部_会计与结算技术支持部都干些什么?
  3. java什么时候触发gc_什么时候触发 GC
  4. JACK——TeamsMaual6 Team Formation
  5. 穆利堂推荐 新周刊,当下中国的12中孤单
  6. 为啥不用美元作为IPFS体系的激励?
  7. 创建数据库 mysql 1044_mysql ERROR 1044 (42000): Access denied for user ''@'localhost' to database...
  8. bash: /home/xxx/anaconda3/bin/conda: No such file or directory
  9. python中各种序列/容器的索引、切片小结;如何取得可迭代对象中的element?如何取元素?
  10. 树莓派软路由Openwrt