spiders文件夹下的爬虫文件(自己在spiders下创建)
# -*- coding: utf-8 -*-
import scrapy
import json
from Douyu.items import DouyuItemclass DouyuspiderSpider(scrapy.Spider):name = 'douyuspider'allowed_domains = ['douyucdn.cn']basicUrl="http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset="offset=0start_urls = [basicUrl+str(offset)]def parse(self, response):data_list=json.loads(response.body)["data"]if not len(data_list):returnfor data in data_list:item=DouyuItem()item["nickname"]=data["nickname"]item["imagelink"]=data["vertical_src"]yield itemself.offset+=20yield scrapy.Request(self.basicUrl+str(self.offset),callback=self.parse)

items文件

# -*- coding: utf-8 -*-# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.htmlimport scrapyclass DouyuItem(scrapy.Item):# define the fields for your item here like:# name = scrapy.Field()nickname=scrapy.Field()imagelink=scrapy.Field()
pipelines文件
# -*- coding: utf-8 -*-# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os #用来重命名
import json
import scrapy
from settings import IMAGES_STORE as images_store
from scrapy.pipelines.images import ImagesPipeline #专门用来下载图片的函数
class DouyuPipeline(ImagesPipeline):def get_media_requests(self,item,info):image_link=item["imagelink"]yield scrapy.Request(image_link)def item_completed(self, results, item, info): #用来重命名#取出图片信息results里的图片的path路径信息,OK表示results里的trueimage_path=[x["path"]for ok,x in results if ok]os.rename(images_store+image_path[0],images_store+item["nickname"]+".jpg")#重命名方法return item

settings文件

# -*- coding: utf-8 -*-# Scrapy settings for Douyu project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://doc.scrapy.org/en/latest/topics/settings.html
#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://doc.scrapy.org/en/latest/topics/spider-middleware.htmlBOT_NAME = 'Douyu'SPIDER_MODULES = ['Douyu.spiders']
NEWSPIDER_MODULE = 'Douyu.spiders'IMAGES_STORE="D:\PycharmProjects\Douyu\images"  #自己设置图片保存的路径# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'Douyu (+http://www.yourdomain.com)'# Obey robots.txt rules
ROBOTSTXT_OBEY = False# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16# Disable cookies (enabled by default)
#COOKIES_ENABLED = False# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
#}# Enable or disable spider middlewares
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
#    'Douyu.middlewares.DouyuSpiderMiddleware': 543,
#}# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
#    'Douyu.middlewares.DouyuDownloaderMiddleware': 543,
#}# Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {'Douyu.pipelines.DouyuPipeline': 300,
}# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False# Enable and configure HTTP caching (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

scrapy下载斗鱼主播图片相关推荐

  1. python爬虫(五)---斗鱼主播图片下载并重命名

    目的:爬取照片用主播名进行重命名 url:http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset=0 (一)基本步骤 步骤 ...

  2. scrapy抓斗鱼主播的图片

    1.该项目通过此网站获取信息 http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset=0 打开是这样子的,(如果现实乱码, ...

  3. python爬取斗鱼主播图片

    今天闲来无事,爬取一下斗鱼女主播的图片,之前学习scrapy的时候写过一个找不到了,今天使用requests和bs4重新写了一份,闲话不多说,直奔主题. 首先用Chrome浏览器访问斗鱼官网: 当然是 ...

  4. python爬取斗鱼主播图片_F_hawk189_新浪博客

    今天闲来无事,爬取一下斗鱼女主播的图片,之前学习scrapy的时候写过一个找不到了,今天使用requests和bs4重新写了一份,闲话不多说,直奔主题. 首先用Chrome浏览器访问斗鱼官网: 当然是 ...

  5. 使用scrapy爬取手机版斗鱼主播的房间图片及昵称

    目的:通过fiddler在电脑上对手机版斗鱼主播进行抓包,爬取所有主播的昵称和图片链接 关于使用fiddler抓取手机包的设置: 把手机和装有fiddler的电脑处在同一个网段(同一个wifi),手机 ...

  6. scrapy 斗鱼 主播信息爬取

    原文链接: scrapy 斗鱼 主播信息爬取 上一篇: scrapy 妹子图网站 全站图片爬取 下一篇: TensorFlow models 的slim 模块 使用预训练模型进行识别 api http ...

  7. python怎么爬虎牙_Python爬虫:爬取虎牙星秀主播图片

    动态爬取思路讲解 1.简单的爬虫只需要访问网站搜索栏处的url,就可以在开发者工具(F12)处,利用正则表达式.Xpath.css等进行定位并抓取数据: 2.虎牙星秀页面不同于简单的网页,随时都在更新 ...

  8. Python爬取美女主播图片适合初学者

    Python爬取虎牙女主播图片,非常适合初学者,代码少,思路清晰 开发环境Pycharm import time import requests from lxml import etree from ...

  9. 斗鱼直播画面怎么弄到自己网页上_“集战!创界山勇者”斗鱼主播招募活动开始啦!...

    关注微信公众号:梦幻模拟战手游 Langrisser传说,由你书写! <梦幻模拟战>x<魔神英雄传>联动活动火热来袭!"小救星"战部渡与伙伴剑部武一郎.忍部 ...

最新文章

  1. 常见java相关问题
  2. java聊天室小程序论文_在Java项目中利用continue与break制作一个聊天室小程序
  3. 008_JsonConfig对象
  4. python 递归 分叉_浅谈Python 递归算法指归
  5. 7-37 图形卡片排序游戏 (40 分)
  6. ios地图小例子和手势的使用 供大家参考一下呦
  7. Cloud一分钟 | 电商月将至,腾讯云DCDB助力电商企业应对支付洪峰
  8. MySQL无法启动服务器(1067)
  9. GLSurfaceView源码分析以及简单使用
  10. 【Redis】redis-3.0.0安装以及集群的搭建
  11. 输入某二叉树的前序遍历和中序遍历的结果,请重建出该二叉树。
  12. 霍尼韦尔门禁说明书_霍尼韦尔指纹锁说明书
  13. [青海、甘南之行散记] 当风吹过高原,一颗心在说话
  14. 自然语言处理NLP简介
  15. Excel中ISEVEN函数用法之判断数值奇偶性
  16. 【第163期】游戏策划做游戏:用UnityBolt实现游泳功能
  17. caffe常用层:Reduction层
  18. Flink大数据实时计算系列-Flink的Operator Chains的优化机制
  19. 计算机管理损坏的图像,win7系统提示损坏的图像的解决方法
  20. [cocos2d-x] -- Cocos2d-x简介

热门文章

  1. 输入一串字符,将其中的大写变成小写,若不为大写则原样输出
  2. Cadence(virtuoso)集成电路设计软件基本操作——库和库文件
  3. matlab中频域信号IFFT,MATLAB中ifft函数用法、性质、特性-以及与fft的组合应用全面深入解析(含程序)...
  4. C++标准库分析总结(一)——<标准库简介>
  5. ₣Y2XAEfuV1₳ 这缎,登陸块守,友爱
  6. redisson + CacheManager缓存管理
  7. Go语言开发学习笔记(持续更新中)
  8. duilib 关于wke 控件焦点问题
  9. 抖音素材哪里收集_抖音素材哪里找?最全攻略来了
  10. Json对象和string之间的转换