#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
@Project :all_daily_tasks_code @File :job_another.py
@Author :木子
@Date :2024/2/22 10:10 """
from DrissionPage import ChromiumOptions, ChromiumPage
co = ChromiumOptions()
# 无沙盒模式
co.set_argument('--no-sandbox')
# 禁止所有弹出窗口
co.set_pref(arg='profile.default_content_settings.popups', value='0')
# 隐藏是否保存密码的提示
co.set_pref('credentials_enable_service', False)
# 设置代理
co.set_proxy('http://1077764809707376640:pMkeLLTz@http-dynamic.xiaoxiangdaili.com:10030')
# # 设置无界面
# co.headless(True)
page = ChromiumPage(co)
# 访问网页
page.get("https://www.lagou.com")
key_words = ["爬虫", "数据分析", "python"]
city_list = ["北京", "上海", "武汉", "郑州", "广州", "深圳"]
for city in city_list:
for key in key_words:
for i in range(1, 30):
re_u = f"https://www.lagou.com/wn/jobs?pn={i}&kd={key}&city={city}"
page.get(re_u)
import re
res = re.findall(r"<script id=\"__NEXT_DATA__\" type=\"application/json\">(.*?)</script>", page.html,
re.MULTILINE)
if res:
infp = eval(res[0].replace("null", "None").replace("false", "False").replace("true", "True"))
print(infp)
import json
with open(f"{city}_{key}_{i}.json", "w", encoding="utf-8") as f:
f.write(json.dumps(infp, ensure_ascii=False, indent=4))