day26

总结

  • 多线程.py

    """
    !./env python
    -*- coding: utf-8 -*-
    @Time:  2021/6/1 17:09
    @Author:    三玖天下第一
    @File: 多线程.py
    @Software: PyCharm
    """# 一个进程默认有一个线程,该线程叫主线程。其他线程都叫子线程(需要手动创建)
    # 如果一个Python程序需要子线程需要手动创建子线程类Thread对象import time
    import threading
    from threading import Thread
    from datetime import datetime
    from random import randint
    from mine_thread import MyThreadprint_lock = threading.Lock()def my_print(*args, out=True, **kwargs):with print_lock:if out:print(*args, **kwargs)else:input('请输入数据:')def download(name):my_print(f'"{name}"开始下载:{datetime.now()}')time.sleep(randint(3, 7))my_print(f'"{name}"下载结束:{datetime.now()}')if __name__ == '__main__':# new_thread = MyThread(download, '小薇', thread_name='子线程1')# new_thread.start()# new_thread.join()t1 = Thread(target=download, args=('小薇',))t2 = Thread(target=download, args=('猪猪侠',))t3 = Thread(target=download, args=('你好,世界',))# ============电影下载完了再执行主线程===================# t1.start()# t2.start()# t3.start()# t1.join()# t2.join()# t3.join()# ============t1电影下载完了再执行主线程===================# t1.start()# t1.join()# t2.start()# t3.start()# t2.join()# t3.join()# ==========t4等待电影全部下载完了提示=====================def wati_download():t1.start()t2.start()t3.start()t1.join()t2.join()t3.join()my_print('下载完成...')t4 = Thread(target=wati_download)t4.start()while True:my_print(out=False)time.sleep(0.1)# download('小薇')# download('猪猪侠')# download('你好,世界')
  • 多进程.py

    """
    !./env python
    -*- coding: utf-8 -*-
    @Time:  2021/6/2 11:48
    @Author:    三玖天下第一
    @File: 多进程.py
    @Software: PyCharm
    """
    import time
    from datetime import datetime
    from multiprocessing import Process
    from random import randint
    from threading import Threaddef download(name):print(f'"{name}"开始下载:{datetime.now()}')time.sleep(randint(3, 7))print(f'"{name}"下载结束:{datetime.now()}')def wait(*args):for p in args:p.start()for p in args:p.join()print('哈哈哈')if __name__ == '__main__':p1 = Process(target=download, args=('小薇',))p2 = Process(target=download, args=('触不可及',))p3 = Process(target=download, args=('很爱很爱你',))t1 = Thread(target=wait, args=(p1, p2, p3))t1.start()while True:time.sleep(0.1)input('请输入数据:')
    
  • 多进程中创建多线程.py

    """
    !./env python
    -*- coding: utf-8 -*-
    @Time:  2021/6/2 14:09
    @Author:    三玖天下第一
    @File: 多进程中创建多线程.py
    @Software: PyCharm
    """import random
    import time
    from multiprocessing import Process, current_process
    from threading import Thread, current_threaddef download(name):print(f'当前进程{current_process()},当前线程{current_thread()}', end='')print(f'{name}:开始下载...')time.sleep(random.randint(3, 6))print(f'当前进程{current_process()},当前线程{current_thread()}', end='')print(f"{name}:下载结束...")def load(*names):all_thread = []for name in names:t = Thread(target=download, args=(name,))t.start()all_thread.append(t)if __name__ == '__main__':# 1.在主进程中执行# download('小薇')# 2.# t1 = Thread(target=download, args=('雄纠', ))# t2 = Thread(target=download, args=('阿甘正传', ))# t1.start()# t2.start()# 3.# p1 = Process(target=download, args=('阿甘正传',))# p2 = Process(target=download, args=('天堂',))# p3 = Process(target=download, args=('Python',))# p1.start()# p2.start()# p3.start()# 4p1 = Process(target=load, args=('阿甘正传', '肖申克的救赎', '喜羊羊与灰太狼'))p2 = Process(target=load, args=('天堂', '我的世界', '天下第一'))p3 = Process(target=load, args=('Python', 'Java', 'JavaScript'))p1.start()p2.start()p3.start()
  • 进程通信

    """
    !./env python
    -*- coding: utf-8 -*-
    @Time:  2021/6/2 15:29
    @Author:    三玖天下第一
    @File: 进程通信.py
    @Software: PyCharm
    """import random
    import time
    from multiprocessing import Process, current_process, Queue
    from threading import current_threaddef download(name, q: Queue):print(f'当前进程{current_process()},当前线程{current_thread()}', end='')print(f'{name}:开始下载...')time.sleep(random.randint(3, 6))print(f'当前进程{current_process()},当前线程{current_thread()}', end='')print(f"{name}:下载结束...")q.put(name)def get_data(q: Queue):while True:result = q.get()if result == 'end':breakprint(result)if __name__ == '__main__':# 创建空的队列(必须是全局的)q = Queue(maxsize=20)p1 = Process(target=download, args=('小薇', q))p2 = Process(target=download, args=('触不可及', q))p3 = Process(target=download, args=('很爱很爱你', q))p4 = Process(target=get_data, args=(q,))p1.start()p2.start()p3.start()p4.start()p1.join()p2.join()p3.join()q.put('end')
  • 线程通信

    """
    !./env python
    -*- coding: utf-8 -*-
    @Time:  2021/6/2 14:47
    @Author:    三玖天下第一
    @File: 线程间通信.py
    @Software: PyCharm
    """import random
    import time
    from multiprocessing import current_process
    from threading import Thread, current_threadall_datas = []def download(name):print(f'当前进程{current_process()},当前线程{current_thread()}', end='')print(f'{name}:开始下载...')time.sleep(random.randint(3, 6))print(f'当前进程{current_process()},当前线程{current_thread()}', end='')print(f"{name}:下载结束...")all_datas.append(current_thread())if __name__ == '__main__':t1 = Thread(target=download, args=('雄纠',))t2 = Thread(target=download, args=('阿甘正传',))t1.start()t2.start()# 多线程数据共享:同一个进程中多线程数据可以直接共享
    # (同一个进程中的全局变量在作用域范围内可以接受或者存储其他线程中的任何数据
    # 如果需要在一个线程中去获取其他多个线程中的数据,就定义一个全局的可变容器,比如列表,最好是线程的队列

作业

  • 使用多进程和多线程爬取豆瓣图书
"""
@Time:  2021/6/1 9:34
@Author:    三玖天下第一
"""import json
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import Process, Queueimport openpyxl
import requestsprint_lock = threading.Lock()def my_print(*args, **kwargs):with print_lock:print(*args, **kwargs)def get_proxy_ips(ip_queue: Queue):while True:# api = 'http://piping.mogumiao.com/proxy/api/get_ip_bs?appKey=3ee6f035175f4b508d8a825da0fb3833&count=5&expiryDate=0&format=2&newLine=3'api = 'http://api.kuainiaoip.com/index.php?fetch_type=2021060217064947339&pool_id=&qty=5&time=101&province=%E5%9B%9B%E5%B7%9D%E7%9C%81&city=%E6%88%90%E9%83%BD%E5%B8%82&protocol=1&format=txt-normal&dt=1'response = requests.get(api)# print(response.text)if response.status_code == 200:if response.text == '10404:没有找到相关记录':print('提取频繁请按照规定频率提取')else:for ip in response.text.split('\n')[:-1]:ip_queue.put(ip)else:print('获取代理失败!')time.sleep(3)def get_content2(q: Queue, ip_obj, url, header, data):ip = ip_obj.iptry:res = requests.post(url, data=json.dumps(data), headers=header, proxies={'http://': ip, 'https://': ip})# res = requests.post(url, data=json.dumps(data), headers=header)if res.status_code == 200:q.put(res.json())else:if ip_obj.is_update(ip):ip_obj.update(ip)if res.status_code == 403:get_content(q, ip_obj, url, header, data)except Exception as e:print(e)if ip_obj.is_update(ip):ip_obj.update(ip)get_content(q, ip_obj, url, header, data)def proxies(ip):return {'http': ip, 'https': ip}def get_content(q: Queue, ip_obj, url, header, data):ip = ip_obj.ipres = requests.post(url, data=json.dumps(data), headers=header, proxies={'http://': ip, 'https://': ip}, timeout=5)# res = requests.post(url, data=json.dumps(data), headers=header)if res.status_code == 200:q.put(res.json())else:ip_obj.update(ip)time.sleep(0.1)get_content(q, ip_obj, url, header, data)def add_get_page(q: Queue):print('第一个子进程执行...')proxy_ip = Queue()threading.Thread(target=get_proxy_ips, args=(proxy_ip,), daemon=True).start()thread_pool = ThreadPoolExecutor(max_workers=256)class IpObject:lock = threading.RLock()ip = proxy_ip.get()@classmethoddef update(cls, old):with cls.lock:if old == cls.ip:cls.ip = proxy_ip.get()print('update', cls.ip)@classmethoddef is_update(cls, old):if old == cls.ip:return Truereturn False# 请求网址!!!!post请求 data数据实现数据的获取url = "https://read.douban.com/j/kind/"# 浏览器请求头header = {"accept": "application/json","Accept-Encoding": "gzip, deflate, br","Accept-Language": "zh-CN,zh;q=0.8","Connection": "keep-alive","content-type": "application/json","Host": "read.douban.com","Cookie": "bid=jXNUTLsP_28; gr_user_id=e52067be-9219-484a-9f84-a1129fa1acbf; __utmz=30149280.1622524612.1.1.utmcsr=sogou.com|utmccn=(referral)|utmcmd=referral|utmcct=/link; __utma=30149280.2030887735.1622524612.1622524612.1622541364.2; _ga=GA1.3.2030887735.1622524612; _gid=GA1.3.231733992.1622705350; _pk_ses.100001.a7dd=*; _gat=1; _pk_id.100001.a7dd=f10116d5e1b94476.1622705350.1.1622705395.1622705350.","Origin": "https://read.douban.com","User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',"x-csrf-token": "null","x-requested-with": "XMLHttpRequest"}# 64853for page in range(60000, 64853):data = {"sort": "new", "page": page, "kind": 0,"query": "    query getFilterWorksList($works_ids: [ID!]) {      worksList(worksIds: $works_ids) {                title    cover    url    isBundle          url    title          author {      name      url    }    origAuthor {      name      url    }    translator {      name      url    }          abstract    editorHighlight          isOrigin    kinds {          name @skip(if: true)    shortName @include(if: true)    id      }    ... on WorksBase @include(if: true) {      wordCount      wordCountUnit    }    ... on WorksBase @include(if: false) {          isEssay        ... on EssayWorks {      favorCount    }          isNew        averageRating    ratingCount    url          }    ... on WorksBase @include(if: true) {      isColumn      isEssay      onSaleTime      ... on ColumnWorks {        updateTime      }    }    ... on WorksBase @include(if: true) {      isColumn      ... on ColumnWorks {        isFinished      }    }    ... on EssayWorks {      essayActivityData {            title    uri    tag {      name      color      background      icon2x      icon3x      iconSize {        height      }      iconPosition {        x y      }    }        }    }    highlightTags {      name    }      ... on WorksBase @include(if: false) {          fixedPrice    salesPrice    isRebate      }    ... on EbookWorks {          fixedPrice    salesPrice    isRebate      }    ... on WorksBase @include(if: true) {      ... on EbookWorks {        id        isPurchased        isInWishlist      }    }          id        isOrigin      }    }  ","variables": {},"tags": []}thread_pool.submit(get_content2, q, IpObject, url, header, data)# get_content(q, IpObject, url, header, data)thread_pool.shutdown(wait=True)def analysis_data(pending_data: Queue, data: Queue):print('第二个子进程执行...')while True:content = pending_data.get()if content == 'end':print('子进程二结束')returnroot = 'https://read.douban.com'content_list = content['list']all_list = []for dict1 in content_list:title = dict1['title']img = dict1['cover']url = root + dict1['url']author = dict1['author']author_name = author[0]['name'] if author else ''author_url = root + author[0]['url'] if author else ''abstract = dict1['abstract']kinds = '|'.join([x['shortName'] for x in dict1['kinds']])wordCount = dict1['wordCount']isFinished = dict1.get('isFinished', 'TRUE')new_list = [title, img, url, author_name, author_url, abstract, kinds, wordCount, isFinished]all_list.append(new_list)data.put(all_list)def save_data(data: Queue):print('第三个子进程执行...')# file = r'./files/scrawp.csv'# f = open(file, 'w', newline='', encoding='utf-8')# writer = csv.writer(f)# writer.writerow(#     ['title', 'img', 'url', 'author_name', 'author_url', 'abstract', 'kinds', 'wordCount', 'isFinished'])wb = openpyxl.Workbook()sheet = wb.activesheet.title = '豆瓣图书免费'sheet.append(['title', 'img', 'url', 'author_name', 'author_url', 'abstract', 'kinds', 'wordCount', 'isFinished'])file = r'./files/scrawp3.xlsx'def save(wb, file):while True:time.sleep(3)wb.save(file)t = threading.Thread(target=save, args=(wb, file), daemon=True)t.start()while True:content = data.get()if content == 'end':time.sleep(3)print('子进程三结束')breakfor ls in content:sheet.append(ls)if __name__ == '__main__':pending_data = Queue(maxsize=4096)data = Queue(maxsize=4096)p1 = Process(target=add_get_page, args=(pending_data,))p2 = Process(target=analysis_data, args=(pending_data, data))p3 = Process(target=save_data, args=(data,))p1.start()p2.start()p3.start()p1.join()pending_data.put('end')pending_data.put('end')print('网页爬取完成...')p2.join()data.put('end')data.put('end')print('数据解析完成...')

day26-多进程多线程相关推荐

  1. 利用进化算法+多进程/多线程来优化SVM中的两个参数:C和Gamma

    该案例展示了如何利用进化算法+多进程/多线程来优化SVM中的两个参数:C和Gamma. 在执行本案例前,需要确保正确安装sklearn,以保证SVM部分的代码能够正常执行. 本函数需要用到一个外部数据 ...

  2. linux 多进程 多线程的选择

    关于多进程和多线程,教科书上最经典的一句话是"进程是资源分配的最小单位,线程是CPU调度的最小单位",这句话应付考试基本上够了,但如果在工作中遇到类似的选择问题,那就没有这么简单了 ...

  3. 多进程多线程GDB调试 (转)

    多进程多线程GDB调试   一.线程调试指南:   1. gdb attach pid 挂载到调试进程  2. gdb$ set scheduler-locking on 只执行当前选定线程的开关 3 ...

  4. 浅谈多进程多线程的选择(转)

    关于多进程和多线程,教科书上最经典的一句话是"进程是资源分配的最小单位,线程是CPU调度的最小单位",这句话应付考试基本上够了,但如果在工作中遇到类似的选择问题,那就没有这么简单了 ...

  5. linux c多进程多线程,linux下的C\C++多进程多线程编程实例详解

    linux下的C\C++多进程多线程编程实例详解 1.多进程编程 #include #include #include int main() { pid_t child_pid; /* 创建一个子进程 ...

  6. 进程 线程 多进程 多线程 父进程 子进程

    1. 进程定义: 进程就是一个程序在一个数据集上的一次动态执行过程.进程一般由程序.数据集.进程控制块三部分组成.我们编写的程序用来描述进程要完成哪些功能以及如何完成:数据集则是程序在执行过程中所需要 ...

  7. Python实现多进程/多线程同时下载单个文件

    功能描述: 使用多进程/多线程同时下载单个文件,可以自定义文件地址.进程/线程数量. 主要思路: 获取文件大小,使用多个进程/线程分别下载一部分,最后再把这些文件拼接起来. 参考代码: 运行结果: - ...

  8. linux线程并不真正并行,多核时代:并行程序设计探讨(3)——Windows和Linux对决(多进程多线程)...

    并行程序设计探讨(3)--Windows和Linux对决(多进程多线程) 前面的博文经过分析总结,最后得出两种并行技术:多进程多线程.多机协作.对于多进程和多线程来说,最有代表性且最常见的的莫过于Wi ...

  9. 多CPU 多核CPU | 多进程 多线程 | 并行 并发

    文章目录 区分 多CPU && 多核CPU CPU缓存 并行 && 并发 多CPU && 多核CPU | 多进程 && 多线程 | 并行 ...

  10. python多进程多线程,多个程序同时运行

    python 多线程 多进程同时运行 多任务要求 python 基础语法 python 文件目录操作 python 模块应用 开发工具 pycharm 实现方法 多任务的实现可以用进程和线程来实现 进 ...

最新文章

  1. QT5生成可执行文件总结
  2. 两张趣图助你理解状态码的含义~
  3. Linux Android 常用shell命令汇总 不断更新
  4. Cocos2dx实现象棋之布局
  5. linux常用命令100个(转)
  6. python编程语言-python编程语言基础知识总结
  7. 【渝粤题库】陕西师范大学200561 英语写作(一) 作业
  8. 查找表_leetcode454
  9. 数学手册|赋范空间概念
  10. 少儿编程scratch系统介绍(附网站源码)
  11. sqlServer相关
  12. 王者荣耀背景html,《王者荣耀》登录界面背景怎么修改 登录背景图片更换方法...
  13. kafka+fluentd+heka了解资料
  14. 牛津词典 2018 年度词汇 ——「有毒」! 1
  15. vsftpd安装以及配置FTP虚拟用户实践
  16. nginx 配置网站通用的伪静态代码
  17. Android视频流解码
  18. 医学图像——医学坐标体系
  19. mathml_MathML简介–数学标记语言
  20. Shell -----grep

热门文章

  1. 【python】简单使用selenium编写无界面谷歌浏览器的网页登录和签到功能
  2. Ubuntu下使用opera的坑
  3. MySQL 架构与 SQL 执行流程
  4. SQL Server附加数据库失败,823错误。
  5. Gdk-CRITICAL **: IA__gdk_drawable_get_size: assertion `GDK_IS_DRAWABLE (drawable)‘ failed
  6. 3.27下午 口语练习P41
  7. MySQL数据库CPU飙升到100%解决方案
  8. python发送邮件封装
  9. 微信支付:商户订单号重复
  10. c语言中字符加上48是,【2017年整理】C语言字符型数据(4、5).doc