完整代码:

# coding:utf-8
# 引入requests包和正则表达式包re
import requests
import re
from datetime import datetime
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium import webdriver
import time
from bs4 import BeautifulSoup
# # htp='http://pic.weather.com.cn/images/cn/photo/2021/09/16/20210916100549696B7FD68719D9208B6E945E3B1E260B_xm.jpg'
# htp='http://pic.weather.com.cn/images/cn/photo/2021/09/16/20210916100549696B7FD68719D9208B6E945E3B1E260B_xm.jpg'
# htp='https://bing.ioliu.cn/photo/FreshSalt_ZH-CN12818759319?force=ranking_1'
# rs=re.findall('.+?=ranking_1',htp)
# print(rs)'''
# 自定义下载页面函数
def load_page(url):response = requests.get(url)data = response.contentreturn data# 自定义保存页面图片函数
def get_image(html):# regx = r'http://[\S]*.jpg'  # 定义图片正则表达式jpg.+?\,# regx = r'src="(.*?\.jpg)"'  # 定义图片正则表达式jpg.+?\,regx = r'http://[\S]+.png'  # 定义图片正则表达式jpg.+?\,pattern = re.compile(regx)  # 编译表达式构造匹配模式print(pattern)get_images = re.findall(pattern, repr(html))  # 在页面中匹配图片链接print(get_images )num = 1# 遍历匹配成功的链接for img in get_images:image = load_page(img)  # 根据图片链接,下载图片链接# 将下载的图片保存到对应的文件夹中with open('./picture/第%s张.jpg'% num , 'wb') as fb:#/spider_picturefb.write(image)print("正在下载第%s张图片" % num)num = num + 1print("下载完成!")if __name__ == '__main__':# 定义爬取页面的链接url='http://p.weather.com.cn/'# url='https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E4%BA%BA'# url = 'http://p.weather.com.cn/2021/08/3490083.shtml'# 调用load_page函数,下载页面内容html = load_page(url)# 在页面中,匹配图片链接,并将图片下载下来,保存到对应文件夹get_image(html)
'''
'''
import re
import urllib.request  # Python2中使用的是urllib2
import urllib
import osdef load_page(url):headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \Chrome/74.0.3729.131 Safari/537.36'}response = requests.get(url,headers=headers)data = response.content# print(data)return datadef get_image(html):'图片地址注意要从浏览器中查看网页源代码找出图片路径'# 要加括号,作为元组返回# regx = r'src="(.+?\.jpg)" pic_ext'  # 某个贴吧的图片#bing网页图片获取regx= r'data-progressive.*?src="(.*?)"'  # Bing壁纸合集抓取地址# regx= r'http://[\S]*.jpg\?imageslim' # Bing壁纸合集抓取地址# regx =r'<h3>(.*?)</h3>'# regx = r'src="(.*?\.jpg)"'# regx= r'http://[\S]*.jpg'# https: // bing.ioliu.cn / photo / FreshSalt_ZH - CN12818759319?force = ranking_1# http://h2.ioliu.cn/bing/FalklandRockhoppers_ZH-CN5370686595_640x480.jpg?imageslim# reg = r'src="(.+?\.jpg)" '  # 我的网站图片地址# reg = r'zoomfile="(.+?\.jpg)" '  # 威锋网手机壁纸#正则表达式过滤pattern = re.compile(regx)  # 编译表达式构造匹配模式get_images = re.findall(pattern, repr(html))  # 在页面中匹配图片链接try:#保存图片path = 'E:\\Temporary\\new'  # 输入保存文件的目录地址if not os.path.isdir(path):os.makedirs(path)  # 检查是否存在地址,如果不存在将自动创建文件夹目录paths = path + '\\'  # 保存在test路径下x = 0for imgurl in get_images:#imglist:# urllib.request.urlretrieve(imgurl, '{}{}.jpg'.format(paths, x))# print("正在下载第%s张图片" % x)# # print("下载完成!")image = load_page(imgurl)  # 根据图片链接,下载图片链接# # 将下载的图片保存到对应的文件夹中with open( path +'\.第%s张.jpg' % x ,'wb') as fb:  # /spider_picturefb.write(image)print("正在下载第%s张图片" % x)x = x +1print("下载完成!")except:# continueprint("下载错误!")if __name__ == '__main__':# html = getHtml("http://bbs.feng.com/read-htm-tid-10616371.html")  # 威锋网手机壁纸# html = getHtml("http://www.omegaxyz.com/")  # 我的网站图片地址url = "https://bing.ioliu.cn/ranking"  # Bing壁纸合集抓取地址# url="http://tieba.baidu.com/p/2460150866"  # 某个贴吧的图片html = load_page(url)get_image(html)# print(get_image(html))
''''''
import re,urllib.request# request = urllib.request.urlopen('http://www.imooc.com/course/list')
# buf = request.read().decode('utf-8')
# # print(buf)
# # listurl = re.findall(r'src=.+?\.jpg',buf)
# listurl = re.findall(r'src="(.*?\.jpg)"',buf)
# print (listurl)def load_page(url):headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \Chrome/74.0.3729.131 Safari/537.36'}response = requests.get(url,headers=headers)data = response.content# print(data)return dataurl='https://bing.ioliu.cn/ranking'
html = load_page(url)
# print(html)
# regx =r'class="mark".*?href=.*?=ranking_1'#r'src="(.*?\.jpg)"'
regx= r'http://[\S]*.jpg\?imageslim' # Bing壁纸合集抓取地址
pattern = re.compile(regx)  # 编译表达式构造匹配模式
print(pattern)
listurl  = re.findall(pattern, repr(html))  # 在页面中匹配图片链接
print(listurl)
# https://bing.ioliu.cn/photo/FreshSalt_ZH-CN12818759319?force=ranking_1
res=[]
index = 0
for url in  listurl:# print(url)a= re.findall(r'\/photo\/.*?=ranking_1',url)# print(a[0])# a = re.findall(r'com(\/.+\.jpg)', url)res.append('http://h2.ioliu.cn/bing' + a[0])
print(res[2])
# http://h2.ioliu.cn/bing/FalklandRockhoppers_ZH-CN5370686595_1920x1080.jpg?imageslim
# http://h2.ioliu.cn/bing/FalklandRockhoppers_ZH-CN5370686595_1920x1080.jpg?imageslim
# http://h2.ioliu.cn/bing/FalklandRockhoppers_ZH-CN5370686595_640x480.jpg?imageslimfor url in res:print(url)image = load_page(url)  # 根据图片链接,下载图片链接with open('./picture/第%s张.jpg' % index, 'wb') as fb:  # /spider_picturefb.write(image)print("正在下载第%s张图片" % index)index = index + 1# #关闭文件fb.close()'''
'''
def load_page(url):# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \# Chrome/74.0.3729.131 Safari/537.36'}# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}cookie = 'Cookie: winWH=%5E6_1446x763; BDIMGISLOGIN=0; BDqhfp=%E5%A3%81%E7%BA%B8%26%260-10-1undefined%26%260%26%261; BAIDUID=26E387397DECC40EF0CEC97E91622564:FG=1; __yjs_duid=1_58da01e99cf28c2cfbbb855096fa3e471635314939309; BIDUPSID=26E387397DECC40EF0CEC97E91622564; PSTM=1635315243; BDUSS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; BDUSS_BFESS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; MCITY=-%3A; BDSFRCVID_BFESS=rFuOJeC62uoQSpJHF-lJb5s04mqTv45TH6aoQGj2DDyBWL6TD1tDEG0PMM8g0KuMV-nyogKK3gOTH4PF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tb4qoC82fCD3fP36q4TjMbteblOf5Rj-HD7yWCv2JPbcOR5Jj65hyP-B2hj70Duf-DutQKt22RTSMDTF3MA--tRLbUcr2lTdMncuLlTdWRLWsq0x0hjte-bQyNOa--rzBIOMahkb5h7xOKbMQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjTyjaD8q6tfJb3BQ5re54OEenurKxjhXUI8LNDH-4RIL5RZapLEapcDMt8xj-ca3b53jJO7ttoyLmvBoqbgMloR8KQ2Q5QMQML1Db3uW6vMtg3t2xLEatToepvoDPJc3Mv30-jdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKtILM3D; firstShowTip=1; indexPageSugList=%5B%22%E5%A3%81%E7%BA%B8%22%2C%22%E8%B4%A7%E8%BD%A6%E6%8E%92%E9%98%9F%E5%8F%AB%E5%8F%B7%E7%9C%8B%E6%9D%BF%22%2C%22%E8%B4%A7%E8%BD%A6%E6%8E%92%E9%98%9F%E5%8F%AB%E5%8F%B7%22%2C%22%E8%B4%A7%E8%BD%A6%E6%8E%92%E9%98%9F%E7%9C%8B%E6%9D%BF%22%2C%22%E8%BD%A6%E8%BE%86%E6%8E%92%E9%98%9F%E7%9C%8B%E6%9D%BF%22%2C%22%E8%BD%A6%E8%BE%86%E6%8E%92%E9%98%9F%E5%8F%AB%E5%8F%B7%22%2C%22%E8%BD%A6%E8%BE%86%E5%8F%AB%E5%8F%B7%E7%89%8C%22%2C%22%E7%9C%8B%E6%9D%BF%E7%AE%A1%E7%90%86%22%2C%22%E8%AE%BE%E5%A4%87%E8%B4%A7%E6%9E%B6%E5%B7%A5%E5%85%B7%22%5D; cleanHistoryStatus=0; ab_sr=1.0.1_MjVkMmNlODQ4Yzc0ZDRlMDk5Y2IyZGNmYjk0NTJiNTljY2E5MjgxOTBiNTM0YmM2ZTY5OWIxZGRkNDhkM2U5ZTg4ZmVmNmYzM2I1MzZiMWRlZDc1MzliNzM4MWE4NDIx; H_PS_PSSID=35784_35106_31254_35734_35488_35774_34584_35490_35245_35796_35316_26350_22157; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; PSINO=5; BA_HECTOR=802l81a1250k8000nk1gv21o30q; BAIDUID_BFESS=26E387397DECC40EF0CEC97E91622564:FG=1; BDRCVFR[Q5XHKaSBNfR]=mk3SLVN4HKm; userFrom=null'# cookie ='Cookie: BAIDUID=26E387397DECC40EF0CEC97E91622564:FG=1; __yjs_duid=1_58da01e99cf28c2cfbbb855096fa3e471635314939309; BIDUPSID=26E387397DECC40EF0CEC97E91622564; PSTM=1635315243; BDUSS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; BDUSS_BFESS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; MCITY=-%3A; BDSFRCVID_BFESS=rFuOJeC62uoQSpJHF-lJb5s04mqTv45TH6aoQGj2DDyBWL6TD1tDEG0PMM8g0KuMV-nyogKK3gOTH4PF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tb4qoC82fCD3fP36q4TjMbteblOf5Rj-HD7yWCv2JPbcOR5Jj65hyP-B2hj70Duf-DutQKt22RTSMDTF3MA--tRLbUcr2lTdMncuLlTdWRLWsq0x0hjte-bQyNOa--rzBIOMahkb5h7xOKbMQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjTyjaD8q6tfJb3BQ5re54OEenurKxjhXUI8LNDH-4RIL5RZapLEapcDMt8xj-ca3b53jJO7ttoyLmvBoqbgMloR8KQ2Q5QMQML1Db3uW6vMtg3t2xLEatToepvoDPJc3Mv30-jdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKtILM3D; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BAIDUID_BFESS=26E387397DECC40EF0CEC97E91622564:FG=1; delPer=0; PSINO=5; ZD_ENTRY=empty; BDRCVFR[gQU9D8KBoX6]=IdAnGome-nsnWnYPi4WUvY; H_PS_PSSID=35784_35106_31254_35734_35488_35774_34584_35490_35245_35796_35316_26350_22157; BA_HECTOR=8h8h84ak8ha5250kk91gv20jh0q; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; userFrom=www.baidu.com; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; BDRCVFR[tox4WRQ4-Km]=mk3SLVN4HKm; BDRCVFR[CLK3Lyfkr9D]=mk3SLVN4HKm; ab_sr=1.0.1_NGZiYjhiM2E1YWMwMzdjMDhhNzQ0ODYyODhmMTZkMjlmYjBlZDNlNWQwYmM1MThkMTZlNDU1YjQ3ZjBmOWJjNDRhZDQ4ZTIxNzk0ZDc2MzkxMGE3NjM1ZWI4ZjQ3Y2I1'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36','Cookie': cookie}response = requests.get(url,headers=headers)data = response.contentreturn data
#网址
# url='https://bing.ioliu.cn/'#'https://bing.ioliu.cn/ranking'
url='https://image.baidu.com/search/albumsdetail?tn=albumsdetail&word=%E8%88%AA%E6%8B%8D%E5%9C%B0%E7%90%83%E7%B3%BB%E5%88%97&fr=albumslist&album_tab=%E8%AE%BE%E8%AE%A1%E7%B4%A0%E6%9D%90&album_id=312&rn=30'
html = load_page(url)
print(html)
# 和图片地址
# regx= r'http://[\S]*1920x1080.jpg\?imageslim' # Bing壁纸合集抓取地址#'http://h2.ioliu.cn/bing/StDwynwensDay_ZH-CN3187096355_640x480.jpg?imageslim'
# regx= r'https://[\S]*.jpg|https://[\S]*.png'#(\S*?jpg|\S*?JPG)
# regx=r'https://[\s]*.jpeg'#'https://pics3.baidu.com/feed/d8f9d72a6059252d7121de6ec645c7325ab5b98b.jpeg?token=6f062ca0182058a1405782417a1ee980'
#r'src[=\'\"\s]+[^\"\']+\.jpg[\"\']?token=[\s]'
regx='https://[\S]*.jpeg|https://[\S]*.jpg|https://[\S]*.png'
'https://pics6.baidu.com/feed/3b87e950352ac65ca4b6ab1b1633751892138a77.jpeg?token=46bcb0c0a40b6dd6f8f57420e76b3f14'
'https://pics4.baidu.com/feed/5bafa40f4bfbfbed7b46ef700926313faec31f86.png?token=4b3d3cbdea69f32693f8c3b5d8b4e933'
pattern = re.compile(regx)  # 编译表达式构造匹配模式
print(pattern)
listurl  = re.findall(pattern, repr(html))  # 在页面中匹配图片链接
print(listurl)
index = 0
for url in  listurl:image = load_page(url)  # 根据图片链接,下载图片链接with open('./picture/第%s张' % index+ datetime.strftime(datetime.now(),'%H-%M-%S')+'.jpg','wb') as fb:  # /spider_picture#+ datetime.strftime(datetime.now(),'%H-%M-%S')+fb.write(image)print("正在下载第%s张图片" % index  )#%Y-%m-%d  %p :%Sindex = index + 1# #关闭文件fb.close()'''
'''
# page = input('请输入要爬取多少页:')
page = 3#int(page) + 1
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
}
pn = 1
keyword="ppt目录"
# pn是从第几张图片获取 百度图片下滑时默认一次性显示30张
for m in range(1, page):url = 'https://image.baidu.com/search/acjson?'param = {'tn': 'resultjson_com','logid': '8846269338939606587','ipn': 'rj','ct': '201326592','is': '','fp': 'result','queryWord': keyword,'cl': '2','lm': '-1','ie': 'utf-8','oe': 'utf-8','adpicid': '','st': '-1','z': '','ic': '','hd': '','latest': '','copyright': '','word': keyword,'s': '','se': '','tab': '','width': '','height': '','face': '0','istype': '2','qc': '','nc': '1','fr': '','expermode': '','force': '','cg': 'girl','pn': pn,  # 从第几张图片开始'rn': '30','gsm': '1e',}page_text = requests.get(url=url, headers=header, params=param)# print(page_text.content)page_text.encoding = 'utf-8'page_text = page_text.json()info_list = page_text['data']del info_list[-1]img_path_list = []for i in info_list:img_path_list.append(i['thumbURL'])index=0for img_path in img_path_list:img_data = requests.get(url=img_path, headers=header).contentprint(img_path)# img_path = './' + str(n) + '.jpg'# with open(img_path, 'wb') as fp:#     fp.write(img_data)# n = n + 1with open('./picture/第%s张' % index + datetime.strftime(datetime.now(), '%Y-%m-%d %H-%M') + '.jpg','wb') as fb:  # /spider_picture#+ datetime.strftime(datetime.now(),'%H-%M-%S')+fb.write(img_data)print("正在下载第%s张图片" % index)  # %Y-%m-%d  %p :%Sindex = index + 1# #关闭文件fb.close()pn += 29
'''def web(url):opt = FirefoxOptions()  # ChromeOptions()  # 创建chrome参数opt.headless = False  # 显示浏览器driver = webdriver.Firefox(options=opt)  # Chrome(options=opt)  # 浏览器实例化driver.set_window_size(400, 900)driver.get(url)  # 加载网址source = driver.page_source  # 页面内容实例化data = BeautifulSoup(source, 'html.parser')  # 获取页面内容return data  # datadef load_page(url):# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \# Chrome/74.0.3729.131 Safari/537.36'}headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}# cookie = 'Cookie: winWH=%5E6_1446x763; BDIMGISLOGIN=0; BDqhfp=%E5%A3%81%E7%BA%B8%26%260-10-1undefined%26%260%26%261; BAIDUID=26E387397DECC40EF0CEC97E91622564:FG=1; __yjs_duid=1_58da01e99cf28c2cfbbb855096fa3e471635314939309; BIDUPSID=26E387397DECC40EF0CEC97E91622564; PSTM=1635315243; BDUSS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; BDUSS_BFESS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; MCITY=-%3A; BDSFRCVID_BFESS=rFuOJeC62uoQSpJHF-lJb5s04mqTv45TH6aoQGj2DDyBWL6TD1tDEG0PMM8g0KuMV-nyogKK3gOTH4PF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tb4qoC82fCD3fP36q4TjMbteblOf5Rj-HD7yWCv2JPbcOR5Jj65hyP-B2hj70Duf-DutQKt22RTSMDTF3MA--tRLbUcr2lTdMncuLlTdWRLWsq0x0hjte-bQyNOa--rzBIOMahkb5h7xOKbMQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjTyjaD8q6tfJb3BQ5re54OEenurKxjhXUI8LNDH-4RIL5RZapLEapcDMt8xj-ca3b53jJO7ttoyLmvBoqbgMloR8KQ2Q5QMQML1Db3uW6vMtg3t2xLEatToepvoDPJc3Mv30-jdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKtILM3D; firstShowTip=1; indexPageSugList=%5B%22%E5%A3%81%E7%BA%B8%22%2C%22%E8%B4%A7%E8%BD%A6%E6%8E%92%E9%98%9F%E5%8F%AB%E5%8F%B7%E7%9C%8B%E6%9D%BF%22%2C%22%E8%B4%A7%E8%BD%A6%E6%8E%92%E9%98%9F%E5%8F%AB%E5%8F%B7%22%2C%22%E8%B4%A7%E8%BD%A6%E6%8E%92%E9%98%9F%E7%9C%8B%E6%9D%BF%22%2C%22%E8%BD%A6%E8%BE%86%E6%8E%92%E9%98%9F%E7%9C%8B%E6%9D%BF%22%2C%22%E8%BD%A6%E8%BE%86%E6%8E%92%E9%98%9F%E5%8F%AB%E5%8F%B7%22%2C%22%E8%BD%A6%E8%BE%86%E5%8F%AB%E5%8F%B7%E7%89%8C%22%2C%22%E7%9C%8B%E6%9D%BF%E7%AE%A1%E7%90%86%22%2C%22%E8%AE%BE%E5%A4%87%E8%B4%A7%E6%9E%B6%E5%B7%A5%E5%85%B7%22%5D; cleanHistoryStatus=0; ab_sr=1.0.1_MjVkMmNlODQ4Yzc0ZDRlMDk5Y2IyZGNmYjk0NTJiNTljY2E5MjgxOTBiNTM0YmM2ZTY5OWIxZGRkNDhkM2U5ZTg4ZmVmNmYzM2I1MzZiMWRlZDc1MzliNzM4MWE4NDIx; H_PS_PSSID=35784_35106_31254_35734_35488_35774_34584_35490_35245_35796_35316_26350_22157; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; PSINO=5; BA_HECTOR=802l81a1250k8000nk1gv21o30q; BAIDUID_BFESS=26E387397DECC40EF0CEC97E91622564:FG=1; BDRCVFR[Q5XHKaSBNfR]=mk3SLVN4HKm; userFrom=null'cookie ='Cookie: BAIDUID=26E387397DECC40EF0CEC97E91622564:FG=1; __yjs_duid=1_58da01e99cf28c2cfbbb855096fa3e471635314939309; BIDUPSID=26E387397DECC40EF0CEC97E91622564; PSTM=1635315243; BDUSS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; BDUSS_BFESS=TRwaXc1WHNmRnVPY355VGVJMUJ6OU5MOTV-eEJ4c3p6OHVxNDFaUTdRTHkzYk5oRVFBQUFBJCQAAAAAAAAAAAEAAADB320FNTMzNTg5NDkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPJQjGHyUIxhc; MCITY=-%3A; BDSFRCVID_BFESS=rFuOJeC62uoQSpJHF-lJb5s04mqTv45TH6aoQGj2DDyBWL6TD1tDEG0PMM8g0KuMV-nyogKK3gOTH4PF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tb4qoC82fCD3fP36q4TjMbteblOf5Rj-HD7yWCv2JPbcOR5Jj65hyP-B2hj70Duf-DutQKt22RTSMDTF3MA--tRLbUcr2lTdMncuLlTdWRLWsq0x0hjte-bQyNOa--rzBIOMahkb5h7xOKbMQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3tjjTyjaD8q6tfJb3BQ5re54OEenurKxjhXUI8LNDH-4RIL5RZapLEapcDMt8xj-ca3b53jJO7ttoyLmvBoqbgMloR8KQ2Q5QMQML1Db3uW6vMtg3t2xLEatToepvoDPJc3Mv30-jdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKtILM3D; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BAIDUID_BFESS=26E387397DECC40EF0CEC97E91622564:FG=1; delPer=0; PSINO=5; ZD_ENTRY=empty; BDRCVFR[gQU9D8KBoX6]=IdAnGome-nsnWnYPi4WUvY; H_PS_PSSID=35784_35106_31254_35734_35488_35774_34584_35490_35245_35796_35316_26350_22157; BA_HECTOR=8h8h84ak8ha5250kk91gv20jh0q; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; userFrom=www.baidu.com; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; BDRCVFR[tox4WRQ4-Km]=mk3SLVN4HKm; BDRCVFR[CLK3Lyfkr9D]=mk3SLVN4HKm; ab_sr=1.0.1_NGZiYjhiM2E1YWMwMzdjMDhhNzQ0ODYyODhmMTZkMjlmYjBlZDNlNWQwYmM1MThkMTZlNDU1YjQ3ZjBmOWJjNDRhZDQ4ZTIxNzk0ZDc2MzkxMGE3NjM1ZWI4ZjQ3Y2I1'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36','Cookie': cookie}response = requests.get(url,headers=headers)data = response.contentreturn data  #dataif __name__=="__main__":#网址# url='https://bing.ioliu.cn/'#'https://bing.ioliu.cn/ranking'url='https://mbd.baidu.com/newspage/data/landingsuper?context=%7B%22nid%22%3A%22news_10665880783857073972%22%7D&n_type=-1&p_from=-1'html = web(url)print(html)# 和图片地址# regx= r'http://[\S]*1920x1080.jpg\?imageslim' # Bing壁纸合集抓取地址#'http://h2.ioliu.cn/bing/StDwynwensDay_ZH-CN3187096355_640x480.jpg?imageslim'# regx= r'https://[\S]*.jpg|https://[\S]*.png'#(\S*?jpg|\S*?JPG)# regx=r'https://[\s]*.jpeg'#'https://pics3.baidu.com/feed/d8f9d72a6059252d7121de6ec645c7325ab5b98b.jpeg?token=6f062ca0182058a1405782417a1ee980'#r'src[=\'\"\s]+[^\"\']+\.jpg[\"\']?token=[\s]'regx='https://[\S]*.jpeg|https://[\S]*.jpg|https://[\S]*.png''https://pics6.baidu.com/feed/3b87e950352ac65ca4b6ab1b1633751892138a77.jpeg?token=46bcb0c0a40b6dd6f8f57420e76b3f14''https://pics4.baidu.com/feed/5bafa40f4bfbfbed7b46ef700926313faec31f86.png?token=4b3d3cbdea69f32693f8c3b5d8b4e933'pattern = re.compile(regx)  # 编译表达式构造匹配模式print(pattern)listurl  = re.findall(pattern, repr(html))  # 在页面中匹配图片链接print(listurl)index = 0for url in  listurl:print(url)image = load_page(url)  # 根据图片链接,下载图片链接with open('./picture/第%s张' % index+ datetime.strftime(datetime.now(),'%H-%M-%S')+'.jpg','wb') as fb:  # /spider_picture#+ datetime.strftime(datetime.now(),'%H-%M-%S')+fb.write(image)print("正在下载第%s张图片" % index  )#%Y-%m-%d  %p :%Sindex = index + 1# #关闭文件fb.close()

其他举例:

# 爬取豆瓣top 500
# 电影名,评分,评论
# 难易:⭐⭐
# 一.导入
# import re 导入re包
# import requests 导入正则表达式
#
# def aa():
#     rest = requests.get('https://movie.douban.com/top250') 二.访问链接#     s = rest.content.decode() 三.获取该网页源代码# 四.编写正则表达式,取到所需内容#     ss = re.findall(r'<span class="title">(.*)</span>',s) 爬取片名
#     ss1 =re.findall(r'<span class="rating_num" property="v:average">(.*)</span>',s)   爬取评分
#     ss2=re.findall(r'<span>(\d*)人评价</span>',s)    爬取评论# 五.去除无关信息
#     b = []
#     for i in range(len(ss)):
#         aa = re.findall(r'&nbsp.*', ss[i])
#         if aa == []:
#             b.append(ss[i]) 得到所需信息 b=ss#六.写入文件
#     for i in range(len(b)):
#         print(b[i], ss1[i], ss2[i])
#         with open(r"C:\Users\\陈嘉玉\Desktop\ex.txt",'a+') as ff:
#             ff.writelines(b[i]+' '+ss1[i]+' '+ss2[i]+'\n')#七.抛出异常
# try:
#     aa()
#     print("已爬取")
# # except Exception as c:##
# #     print("爬取失败,错误提示:"+c)##
# else:
#     print("爬取失败,错误提示:"+c)

python中爬取网页图片相关推荐

  1. 上手快!!福利局!新手如何使用python爬虫爬取网页图片(使用正则进行数据解析)当然这个新手是我自己

    作为一个python新入门小白,突然就想发个博客,一方面为了记录学习历程,一方面能分享给新入门的同学经验,更多的是想和大家一起学习和交流.以下是我以小白的角度分享的爬虫的相关知识和一个简单的爬取网页图 ...

  2. python爬虫爬取网页图片_Python爬虫:爬取网页图片

    先分析查找要爬取图片的路径 在浏览器F12 审查元素 整体实现代码 # -- coding:UTF-8 -- import requests from bs4 import BeautifulSoup ...

  3. python爬虫爬取网页图片_Python爬虫实现抓取网页图片

    在逛贴吧的时候看见贴吧里面漂亮的图片,或有漂亮妹纸的图片,是不是想保存下来? 但是有的网页的图片比较多,一个个保存下来比较麻烦. 最近在学Python,所以用Python来抓取网页内容还是比较方便的: ...

  4. 利用python批量爬取网页图片_使用python来批量抓取网站图片

    今天"无意"看美女无意溜达到一个网站,发现妹子多多,但是可恨一个page只显示一张或两张图片,家里WiFi也难用,于是发挥"程序猿"的本色,写个小脚本,把图片扒 ...

  5. 使用Python爬取网页图片

    使用Python爬取网页图片 李晓文 21 天前 近一段时间在学习如何使用Python进行网络爬虫,越来越觉得Python在处理爬虫问题是非常便捷的,那么接下来我就陆陆续续的将自己学习的爬虫知识分享给 ...

  6. Python学习笔记:爬取网页图片

    Python学习笔记:爬取网页图片 上次我们利用requests与BeautifulSoup爬取了豆瓣<下町火箭>短评,这次我们来学习爬取网页图片. 比如想爬取下面这张网页的所有图片.网址 ...

  7. 用python爬虫爬取网页壁纸图片(彼岸桌面网唯美图片)

    参考文章:https://www.cnblogs.com/franklv/p/6829387.html 今天想给我的电脑里面多加点壁纸,但是嫌弃一个个保存太慢,于是想着写个爬虫直接批量爬取,因为爬虫只 ...

  8. 利用python爬取网页图片

    学习python爬取网页图片的时候,可以通过这个工具去批量下载你想要的图片 开始正题: 我从尤物网去爬取我喜欢的女神的写真照,我们这里主要用到的就两个模块 re和urllib模块,有的时候可能会用到t ...

  9. python唯美壁纸_用python爬虫爬取网页壁纸图片(彼岸桌面网唯美图片)

    参考文章:https://www..com/franklv/p/6829387.html 今天想给我的电脑里面多加点壁纸,但是嫌弃一个个保存太慢,于是想着写个爬虫直接批量爬取,因为爬虫只是很久之前学过 ...

最新文章

  1. Springsecurity之AuthenticationEntryPoint
  2. 未解决ora-01034、ora-03113、oracle使用RMAN删除归档日志
  3. n 个整数的无序数组,找到每个元素后面比它大的第一个数,要求时间复杂度为 O(N)
  4. Qt版本中国象棋开发(二)
  5. php辅助框架,【PHP开发框架】Laravel框架中辅助函数:optional ()函数的介绍
  6. 悉尼一船只引擎爆炸男子被迫跳水:严重烧伤被送医
  7. 关于用Java写的贪吃蛇游戏的一些感想
  8. 【Xamarin.iOS】使用iOS 11进行大型游戏
  9. 罗斯蒙特电磁流量计8723说明书_罗斯蒙特电磁流量计8732EM变送器信号处理算法说明...
  10. keeplive+haproxy+nginx
  11. 德标螺纹规格对照表_德标、欧标、国际、国标对照表
  12. 开源免费etl作业批量调度必备软件 Taskctl Free应用版
  13. STM32-DMA控制器
  14. Google Chrome商店开发者认证支付$5【图解认证支付成功】
  15. 离散实验一 油管铺设 (求最小生成树的Prim算法的实际应用)
  16. 使用 BEV 投影的高效城市规模点云分割
  17. Git - 学习/实践 - 以及相关操作
  18. BSP板机支持包、linux启动分析、ARM裸机编程
  19. 爬虫眼中的“周庄”长什么样?
  20. 让apache支持pathinfo

热门文章

  1. 忙忙碌碌缓慢进度 项目经理管理的4大误区
  2. SpringBoot应用篇之FactoryBean及代理实现SPI机制示例
  3. Exercise 1.9
  4. IKBC-DC87无线连接方法
  5. Diva无法运行LVS问题(virtuoso,layout)
  6. 国庆荐书 | 2020年3季度我读过的十本好书!
  7. Oracle查询某个日期的周一到周日SQL语句
  8. LittleFS移植实践
  9. CMU 15-213 CSAPP (Ch1~Ch3)
  10. python计算机语言排行榜_TIOBE 1月编程语言排行榜:C语言再度「C 位」出道,Python惜败...