利用python如何在前程無憂高效投遞簡歷
前言
在前程無憂上投遞簡歷發(fā)現(xiàn)有競爭力分析,免費(fèi)能看到匹配度評(píng)價(jià)和綜合競爭力分?jǐn)?shù),可以做投遞參考

計(jì)算方式

綜合競爭力得分應(yīng)該越高越好,匹配度評(píng)語也應(yīng)該評(píng)價(jià)越高越好
抓取所有職位關(guān)鍵字搜索結(jié)果并獲取綜合競爭力得分和匹配度評(píng)語,最后篩選得分評(píng)語自動(dòng)投遞合適的簡歷
登陸獲取cookie
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# chrome_options.add_argument('--headless')
from time import sleep
import re
from lxml import etree
import requests
import os
import json
driver = webdriver.Chrome(chrome_options=chrome_options,executable_path = 'D:\python\chromedriver.exe')
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"}
driver.get(https://search.51job.com/list/020000,000000,0000,00,9,99,%2520,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=)
webdriver需要在相應(yīng)域名寫入cookie,所以轉(zhuǎn)到職位搜索頁面

def get_cookie():
driver.get("https://login.51job.com/login.php?loginway=1&lang=c&url=")
sleep(2)
phone=input("輸入手機(jī)號(hào):")
driver.find_element_by_id("loginname").send_keys(phone)
driver.find_element_by_id("btn7").click()
sleep(1)
code=input("輸入短信:")
driver.find_element_by_id("phonecode").send_keys(code)
driver.find_element_by_id("login_btn").click()
sleep(2)
cookies = driver.get_cookies()
with open("cookie.json", "w")as f:
f.write(json.dumps(cookies))
檢查cookie文件是否存在,如果不存在執(zhí)行g(shù)et_cookie把cookie寫入文件,在登陸的時(shí)候最好不用無頭模式,偶爾有滑動(dòng)驗(yàn)證碼
前程無憂手機(jī)短信一天只能發(fā)送三條,保存cookie下次登陸用
def get_job():
driver.get("https://search.51job.com/list/020000,000000,0000,00,9,99,%2520,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=")
sleep(2)
job=input("輸入職位:")
driver.find_element_by_id("kwdselectid").send_keys(job)
driver.find_element_by_xpath('//button[@class="p_but"]').click()
url=driver.current_url
page=driver.page_source
return url,page
在職位搜索獲取職位搜索結(jié)果,需要返回頁面源碼和地址



分析頁碼結(jié)構(gòu)html前的是頁碼,全部頁碼數(shù)量通過共XX頁得到
def get_pages(url,page):
tree=etree.HTML(page)
href=[]
x = tree.xpath('//span[@class="td"]/text()')[0]
total_page=int(re.findall("(\d+)", x)[0])
for i in range(1,total_page+1):
href.append(re.sub("\d.html", f'{i}.html', url))
return href
獲取全部頁碼

def get_job_code(url):
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"}
r=session.get(url,headers=headers)
tree=etree.HTML(r.text)
divs=tree.xpath('//div[@class="el"]/p/span/a/@href')
job=str(divs)
job_id=re.findall("\/(\d+).html",job)
return job_id
獲取職位id

修改id請(qǐng)求網(wǎng)址到競爭力分析頁面
def get_info(job_id):
href=f"https://i.51job.com/userset/bounce_window_redirect.php?jobid={job_id}&redirect_type=2"
r=session.get(href,headers=headers)
r.encoding=r.apparent_encoding
tree=etree.HTML(r.text)
pingjia=tree.xpath('//div[@class="warn w1"]//text()')[0].strip()
gongsi=[]
for i in tree.xpath('//div[@class="lf"]//text()'):
if i.strip():
gongsi.append(i.strip())
fenshu=[]
for i in tree.xpath('//ul[@class="rt"]//text()'):
if i.strip():
fenshu.append(i.strip())
url=f"https://jobs.51job.com/shanghai/{job_id}.html?s=03&t=0"
return {"公司":gongsi[1],"職位":gongsi[0],"匹配度":pingjia,fenshu[3]:fenshu[2],"鏈接":url,"_id":job_id}

抓取競爭力分析頁面,返回一個(gè)字典
主程序
if not os.path.exists("cookie.json"):
get_cookie()
f=open("cookie.json","r")
cookies=json.loads(f.read())
f.close()
檢查cookie文件載入cookie,不存在執(zhí)行g(shù)et_cookie()把cookie保存到文件
session = requests.Session() for cookie in cookies: driver.add_cookie(cookie) session.cookies.set(cookie['name'],cookie['value']) url, page = get_job() driver.close()
在session和webdriver寫入cookie登陸
獲取第一頁和url后webdriver就可以關(guān)掉了
code=[] for i in get_pages(url,page): code=code+get_job_code(i)
獲取的職位id添加到列表
import pymongo
client=pymongo.MongoClient("localhost",27017)
db=client["job_he"]
job_info=db["job_info"]
for i in code:
try:
if not job_info.find_one({"_id":i}):
info=get_info(i)
sleep(1)
job_info.insert_one(info)
print(info,"插入成功")
except:
print(code)
龜速爬取,用MongDB保存結(jié)果,職位id作為索引id,插入之前檢查id是否存在簡單去重減少訪問

吃完飯已經(jīng)抓到8000個(gè)職位了,篩選找到127個(gè)匹配度好的,開始批量投遞

登陸狀態(tài)點(diǎn)擊申請(qǐng)職位,用wevdriver做
for i in job_info.find({"匹配度":{$regex:"排名很好"},"綜合競爭力得分":{$gte:"80"}}):
print(i)
try:
driver.get(i)
driver.find_element_by_id("app_ck").click()
sleep(2)
except:
pass
用cookie登陸簡單for循環(huán)投遞,在Mongodb里查表,正則篩選匹配度和競爭力得分獲取所有匹配結(jié)果

投遞成功
代碼
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# chrome_options.add_argument('--headless')
from time import sleep
import re
from lxml import etree
import requests
import os
import json
driver = webdriver.Chrome(chrome_options=chrome_options,executable_path = 'D:\python\chromedriver.exe')
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"}
driver.get("https://search.51job.com/list/020000,000000,0000,00,9,99,%2520,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=")
def get_cookie():
driver.get("https://login.51job.com/login.php?loginway=1&lang=c&url=")
sleep(2)
phone=input("輸入手機(jī)號(hào):")
driver.find_element_by_id("loginname").send_keys(phone)
driver.find_element_by_id("btn7").click()
sleep(1)
code=input("輸入短信:")
driver.find_element_by_id("phonecode").send_keys(code)
driver.find_element_by_id("login_btn").click()
sleep(2)
cookies = driver.get_cookies()
with open("cookie.json", "w")as f:
f.write(json.dumps(cookies))
def get_job():
driver.get("https://search.51job.com/list/020000,000000,0000,00,9,99,%2520,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=")
sleep(2)
job=input("輸入職位:")
driver.find_element_by_id("kwdselectid").send_keys(job)
driver.find_element_by_xpath('//button[@class="p_but"]').click()
url=driver.current_url
page=driver.page_source
return url,page
def close_driver():
driver.close()
def get_pages(url,page):
tree=etree.HTML(page)
href=[]
x = tree.xpath('//span[@class="td"]/text()')[0]
total_page=int(re.findall("(\d+)", x)[0])
for i in range(1,total_page+1):
href.append(re.sub("\d.html", f'{i}.html', url))
return href
def get_job_code(url):
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"}
r=session.get(url,headers=headers)
tree=etree.HTML(r.text)
divs=tree.xpath('//div[@class="el"]/p/span/a/@href')
job=str(divs)
job_id=re.findall("\/(\d+).html",job)
return job_id
def get_info(job_id):
href=f"https://i.51job.com/userset/bounce_window_redirect.php?jobid={job_id}&redirect_type=2"
r=session.get(href,headers=headers)
r.encoding=r.apparent_encoding
tree=etree.HTML(r.text)
pingjia=tree.xpath('//div[@class="warn w1"]//text()')[0].strip()
gongsi=[]
for i in tree.xpath('//div[@class="lf"]//text()'):
if i.strip():
gongsi.append(i.strip())
fenshu=[]
for i in tree.xpath('//ul[@class="rt"]//text()'):
if i.strip():
fenshu.append(i.strip())
url=f"https://jobs.51job.com/shanghai/{job_id}.html?s=03&t=0"
return {"公司":gongsi[1],"職位":gongsi[0],"匹配度":pingjia,fenshu[3]:fenshu[2],"鏈接":url,"_id":job_id}
if not os.path.exists("cookie.json"):
get_cookie()
f=open("cookie.json","r")
cookies=json.loads(f.read())
f.close()
session = requests.Session()
for cookie in cookies:
driver.add_cookie(cookie)
session.cookies.set(cookie['name'], cookie['value'])
url, page = get_job()
driver.close()
code=[]
for i in get_pages(url,page):
code=code+get_job_code(i)
import pymongo
client=pymongo.MongoClient("localhost",27017)
db=client["job_he"]
job_info=db["job_info"]
for i in code:
try:
if not job_info.find_one({"_id":i}):
info=get_info(i)
sleep(1)
job_info.insert_one(info)
print(info)
print("插入成功")
except:
print(code)
總結(jié)
以上就是這篇文章的全部內(nèi)容了,希望本文的內(nèi)容對(duì)大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價(jià)值,謝謝大家對(duì)腳本之家的支持。
相關(guān)文章
導(dǎo)入pytorch時(shí)libmkl_intel_lp64.so找不到問題解決
這篇文章主要為大家介紹了導(dǎo)入pytorch時(shí)libmkl_intel_lp64.so找不到問題解決示例,有需要的朋友可以借鑒參考下,希望能夠有所幫助,祝大家多多進(jìn)步,早日升職加薪2022-06-06
python簡單幾步獲取各種DOS命令顯示的內(nèi)容詳解流程
你會(huì)用python獲取各種DOS命令顯示的內(nèi)容核心嗎?說的可不是返回值,是用system()函數(shù)調(diào)用windows操作系統(tǒng)的DOS命令來做點(diǎn)事情,需要的朋友可以參考下2021-10-10
關(guān)于numpy和torch.tensor的張量的操作
這篇文章主要介紹了關(guān)于numpy和torch.tensor的張量的操作,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。如有錯(cuò)誤或未考慮完全的地方,望不吝賜教2023-02-02
Python內(nèi)置函數(shù)之filter map reduce介紹
Python內(nèi)置了一些非常有趣、有用的函數(shù),如:filter、map、reduce,都是對(duì)一個(gè)集合進(jìn)行處理,filter很容易理解用于過濾,map用于映射,reduce用于歸并. 是Python列表方法的三架馬車2014-11-11
基于Python實(shí)現(xiàn)簡單的學(xué)生點(diǎn)名系統(tǒng)
現(xiàn)在的學(xué)生大部分都很積極,會(huì)主動(dòng)舉手回答問題。但是,也會(huì)遇到一些不好的情況,比如年級(jí)越高主動(dòng)舉手的人越少,所以本文做了一個(gè)隨機(jī)的學(xué)生點(diǎn)名系統(tǒng)可以幫老師解決這些問題2022-09-09
Pytorch實(shí)現(xiàn)將label變成one hot編碼的兩種方式
這篇文章主要介紹了Pytorch實(shí)現(xiàn)將label變成one hot編碼的兩種方式,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。如有錯(cuò)誤或未考慮完全的地方,望不吝賜教2023-02-02
python機(jī)器學(xué)習(xí)之隨機(jī)森林(七)
這篇文章主要為大家詳細(xì)介紹了python機(jī)器學(xué)習(xí)之隨機(jī)森林,具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2018-03-03

