python爬取智联招聘信息

import random
import re
from time import sleep
import requests
from tqdm import tqdm
import user_agents
import csvdef get_page(city,keyword,page):# 构造请求地址
    paras = {'jl': city,  #搜索城市
        'kw': keyword,  #搜索关键词
        'isadv': 0,'isfilter': 1,'p': page          #搜索页数
    }#完整网页地址
    url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?'
    #请求头
    headers = {'User-Agent': random.choice(user_agents.agents),'Host': 'sou.zhaopin.com','Referer': 'https://zhaopin.com','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8','Accept-Encoding': 'gzip, deflate, br','Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
    }try:response = requests.get(url, params=paras, headers=headers)# 通过状态码判断是否获取成功
        if response.status_code == 200:return response.textreturn None
    except:return None

def parse_page(html):# 正则表达式匹配需要的信息
    pattern = re.compile('<td class="zwmc".*? href="(.*?)" target="_blank">(.*?)</a>.*?' # 职位链接和职位名称
                         '<td.*? class="fk_lv".*?<span>(.*?)</span>.*?'                  # 反馈率
                         '<td class="gsmc".*? href="(.*?)" target="_blank">(.*?)</a>.*?'  # 公司链接和公司名称
                         '<td class="zwyx">(.*?)</td>.*?'                                # 月薪
                         '<td class="gzdd">(.*?)</td>.*?'                                # 地点
                         '<td class="gxsj".*?<span>(.*?)</span>.*?'                      #发布时间
                         , re.S)# 匹配所有符合标准的内容
    data = re.findall(pattern, html)# print(items)

    #去掉前面置顶的无用信息 换了职位后手动增加或者减少
    _, _, _, _, *items = data# print(items)
    for item in items:job_name = item[1]job_name = job_name.replace('<b>', '')job_name = job_name.replace('</b>', '')yield {'zhiweilianjie': item[0],'jobname': job_name,'Response Rate': item[2],'gongshilianjie': item[3],'company': item[4],'salary': item[5],'address': item[6],'time': item[7]}def write_file_header(file_name, headers):"""
    写入表头(第一行)
    :param file_name:
    :param headers:
    :return:
    """
    with open(file_name, 'a', encoding='utf-8', newline='') as f:f_csv = csv.DictWriter(f, headers)f_csv.writeheader()def write_file_rows(file_name, headers, rows):"""
    写入信息
    :param file_name:
    :param headers:
    :param rows:
    :return:
    """
    with open(file_name, 'a', encoding='utf-8', newline='') as f:f_csv = csv.DictWriter(f, headers)f_csv.writerows(rows)


def main(city, keyword, page):file_name = '/Users/xiongxing/Desktop/' + '智联' + city + keyword + '.csv'
    headers = ['zhiweilianjie', 'jobname', 'Response Rate', 'gongshilianjie', 'company', 'salary', 'address', 'time']write_file_header(file_name, headers)for i in tqdm(range(page)):job = []html = get_page(city, keyword, i)# print(html)
        sleep(0.1)contents = parse_page(html)for item in contents:# print(item)
            job.append(item)write_file_rows(file_name, headers, job)

if __name__ == '__main__':main('成都', 'python', 1) #可更换搜索条件

python爬取智联招聘信息相关推荐

python+selenium爬取智联招聘信息
python+selenium爬取智联招聘信息需求准备代码结果需求老板给了我一份公司名单(大概几百家如下图),让我到网上看看这些公司分别在招聘哪些岗位,通过分析他们的招聘需求大致能推断出我 ...
(转)python爬虫实例——爬取智联招聘信息
受友人所托,写了一个爬取智联招聘信息的爬虫,与大家分享. 本文将介绍如何实现该爬虫. 目录网页分析实现代码分析结果总结 github代码地址网页分析以https://xiaoyuan.zh ...
python爬虫实例——爬取智联招聘信息
受友人所托,写了一个爬取智联招聘信息的爬虫,与大家分享. 本文将介绍如何实现该爬虫. 目录网页分析实现代码分析结果总结 github代码地址网页分析以https://xiaoyuan.zh ...
用python抓取智联招聘信息并存入excel
用python抓取智联招聘信息并存入excel tags:python 智联招聘导出excel 引言:前一阵子是人们俗称的金三银四,跳槽的小朋友很多,我觉得每个人都应该给自己做一下规划,根据自己的进步 ...
python爬取智联招聘网_python爬取智联招聘工作岗位信息
1 # coding:utf-8 2 # auth:xiaomozi 3 #date:2018.4.19 4 #爬取智联招聘职位信息 5 6 7 import urllib 8 from lxml i ...
爬取智联招聘信息并存储
#-*- coding: utf-8 -*- import urllib.request import os,time from bs4 import BeautifulSoup #爬取智联招聘网站的 ...
爬取智联招聘信息并且存入数据库
任务爬取智联页面的招聘信息并且存入数据库. 由于是初次尝试这里选择了固定的页面存入数据库. 首先确定需要爬取的页面 http://sou.zhaopin.com/jobs/searchresult. ...
python 爬取智联招聘
一个爬取智联的一个小爬虫 python版本:python3.7 依赖模块:selenium.pyquery 废话少说,上代码 from selenium import webdriver from s ...
使用python动手爬取智联招聘信息并简单分析
根据使用场景,网络爬虫可分为通用爬虫和聚焦爬虫两种通用网络爬虫,是捜索引擎抓取系统(Baidu.Google.Yahoo等)的重要组成部分.主要目的是将互联网上的网页下载到本地,形成一个互联 ...

python爬取智联招聘信息

python爬取智联招聘信息相关推荐

最新文章

热门文章