Boss直聘招聘信息爬取

利用selenium进行爬取，数据为CSV文件
编写时间：2020年03月16日（若爬取失败，应该是网站更新造成的。）


from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait                    # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC           # available since 2.26.0
import csv
import re
import sys
import os
import randomclass Boss:def __init__(self):# 设置 chrome 无界面化模式self.chrome_options = Options()# self.chrome_options.add_argument('--headless')# self.chrome_options.add_argument('--disable-gpu')self.driver = webdriver.Chrome(chrome_options=self.chrome_options)def get_url(self, search='python'):"""获取搜索职位的url, demo里面默认搜索python:param search::return:"""gangwei_lst = ["Java", "测试", ".net", "安卓", "UI", "前端"]for search in gangwei_lst:xuhao = 1# 创建文件wr.csv_init(search)for y in range(1, 11):try:self.driver.switch_to.window(sreach_window)except:passurl = 'https://www.zhipin.com/c101090100/?query=' + str(search) + '&page=' + str(y) + '&ka=page-' + str(y)self.driver.get(url)# 获取当前窗口sreach_window = self.driver.current_window_handle# 每页有 30 条内容for x in range(1, 31):data = []try:self.driver.find_element_by_xpath("//*[text()='没有找到相关职位，修改筛选条件试一下']")except:self.driver.switch_to.window(sreach_window)try:xpath_yanzhengma = self.driver.find_element_by_id("verifyMessage").textprint(xpath_yanzhengma)if "当前IP地址可能存在异常访问行为，完成验证后即可正常使用" in xpath_yanzhengma:print('输入验证码验证')os.system("pause")except:pass# 公司名称try:xpath_gongsi_name = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[1]/div[2]/div/h3/a'WebDriverWait(self.driver, 60, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_gongsi_name)))gongsi_name = self.driver.find_element_by_xpath(xpath_gongsi_name).textexcept:print('爬取完成！')# 薪资try:xpath_xinzi = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[1]/div[1]/a/div[2]/span'WebDriverWait(self.driver, 3, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_xinzi)))xinzi = self.driver.find_element_by_xpath(xpath_xinzi).textexcept:xinzi = ""# 岗位名称try:xpath_gangwei = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[1]/div[1]/a/div[1]/span[1]'WebDriverWait(self.driver, 3, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_gangwei)))gangwei = self.driver.find_element_by_xpath(xpath_gangwei).textexcept:gangwei = ""# 公司大小try:xpath_size = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[1]/div[2]/div/p'WebDriverWait(self.driver, 3, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_size)))type_size = self.driver.find_element_by_xpath(xpath_size).textgongsi_size = re.findall('\d+-\d+人', type_size)[0]  # 正则表达式提取数字，返回一个列表if gongsi_size == '':gongsi_size = re.findall('\d+', type_size)  # 正则表达式提取数字，返回一个列表gongsi_type = type_size.split(gongsi_size)[0]else:gongsi_type = type_size.split(gongsi_size)[0]except:gongsi_size = ""gongsi_type = ""# 公司福利try:xpath_fuli = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[2]/div[2]'WebDriverWait(self.driver, 1, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_fuli)))gongsi_fuli = self.driver.find_element_by_xpath(xpath_fuli).textexcept:gongsi_fuli = ""# 工作经验try:xpath_jingyan = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[1]/div[1]/a/div[2]/p'WebDriverWait(self.driver, 1, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_jingyan)))jingyan_xueli = self.driver.find_element_by_xpath(xpath_jingyan).textxueli = ['硕士', '大专', '本科', '博士', '专科']for xue in xueli:if xue in jingyan_xueli:gongsi_jingyan = jingyan_xueli.split(xue)[0]gongsi_xueli = jingyan_xueli.split(gongsi_jingyan)[1]breakexcept:gongsi_jingyan = ""gongsi_xueli = ""# 详情页try:time.sleep(random.randint(1, 4))xpath_dingwei = '//*[@id="main"]/div/div[2]/ul/li[' + str(x) + ']/div/div[1]'WebDriverWait(self.driver, 1, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_dingwei)))continue1 = self.driver.find_element_by_xpath(xpath_dingwei)continue1.click()all_window = self.driver.window_handlesfor handle in all_window:if handle != sreach_window:self.driver.switch_to.window(handle)# 岗位描述try:xpath_miaoshu = '//*[@id="main"]/div[3]/div/div[2]/div[2]/div[1]/div'WebDriverWait(self.driver, 5, 0.5).until(EC.presence_of_element_located((By.XPATH, xpath_miaoshu)))miaoshu = self.driver.find_element_by_xpath(xpath_miaoshu).textexcept:miaoshu = ""# 工作地址try:dizhi = self.driver.find_element_by_class_name("location-address").textexcept:dizhi = ""self.driver.close()  # 关闭当前标识的窗口except:miaoshu = ""dizhi = ""data.append(xuhao)data.append(gongsi_name)data.append(gongsi_size)data.append(gongsi_type)data.append(gangwei)data.append(gongsi_xueli)data.append(gongsi_jingyan)data.append(miaoshu)# 年龄为空data.append("")# 工作时间为空data.append("")data.append(gongsi_fuli)data.append(xinzi)data.append(dizhi)# 备注data.append("")wr.write(data)print("已完成" + str(xuhao) + "条")time.sleep(random.randint(1, 5))xuhao += 1else:self.driver.refresh()class WriteDataToCSV:def csv_init(self, path):self.path = "./result/" + str(path) + ".csv"# 1. 创建文件对象self.f = open(self.path, 'a+', encoding='utf-8', newline="")# 2. 基于文件对象构建 csv写入对象self.csv_writer = csv.writer(self.f)# 3. 构建列表头self.csv_writer.writerow(["序号", "企业名称", "企业规模", "性质/行业", "岗位名称", "学历要求","工作经验", "专业要求", "年龄要求", "工作时间", "社保福利", "薪酬范围","工作地点", "备注"])# 4. 关闭文件self.f.close()def write(self, data):with open(self.path, 'a+', encoding='utf-8', newline="") as f:csv_writer = csv.writer(f)# 4. 写入csv文件内容csv_writer.writerow(data)if __name__ == '__main__':wr = WriteDataToCSV()Boss().get_url()

实际效果：

Boss直聘招聘信息爬取相关推荐

02-01 boss直聘招聘信息爬取（xpath解析）
1. 目标内容: ①公司名称 ②岗位名称 ③薪资 ④岗位描述 2.分析 ①该网页不是动态加载的 ②岗位介绍在岗位详情页面中 3. 代码 import requests from lxml import ...
Boss直聘职位信息爬取+分析
BOSS直聘职位信息爬取分析先上结果,本次抓取了732条职位的信息入库: 代码实现: import requests import json from lxml import etree from ...
实战-selenium实现BOSS直聘网信息爬取
实现了monodb和csv的文件写入 mongodb如下: 这里学历要求获取错了,取错列表下标了...,代码已经改正 Excel如下: 这里学历要求获取错了,取错列表下标了...,代码已经改正待解决 ...
python爬取招聘信息_python 爬取boss直聘招聘信息实现
原标题:python 爬取boss直聘招聘信息实现 1.一些公共方法的准备获取数据库链接: importpymysql ''' 遇到不懂的问题?Python学习交流群:821460695满足你的需求 ...
python爬取boss直聘招聘信息_Python 爬取boss直聘招聘信息！
原标题:Python 爬取boss直聘招聘信息! 1.一些公共方法的准备获取数据库链接: importpymysql ''' 遇到不懂的问题?Python学习交流群:821460695满足你的需求, ...
python爬取boss直聘招聘信息_Python爬虫实战-抓取boss直聘招聘信息
Python Python开发 Python语言 Python爬虫实战-抓取boss直聘招聘信息实战内容:爬取boss直聘的岗位信息,存储在数据库,最后通过可视化展示出来 PS注意:很多人学Pyth ...
基于‘BOSS直聘招聘信息’分析企业到底需要什么样的PHPer
基于'BOSS直聘招聘信息'分析企业到底需要什么样的PHPer 前两篇文章都没看,好意思直接看结果? Python爬虫框架Scrapy实战 - 抓取BOSS直聘招聘信息 Pyhton爬虫实战 - 抓取 ...
【2020-10-27】 scrapy爬虫之猎聘招聘信息爬取
声明:本文只作学习研究,禁止用于非法用途,否则后果自负,如有侵权,请告知删除,谢谢! scrapy爬虫之猎聘招聘信息爬取 1.项目场景目标网址:https://www.liepin.com/zhao ...
python爬取boss直聘招聘信息_Python笔记-爬取Boss直聘的招聘信息
2018-05-09笔记: 最近迷上了爬虫,爬取招聘信息,存在MongoDB上代码思路和上一篇爬取酷狗TOP500差不多,但是在使用CSS选择器的时候,有两组信息是连在一起,所以使用正则来获取本来 ...

Boss直聘招聘信息爬取

Boss直聘招聘信息爬取相关推荐

最新文章

热门文章