# 导进必须的包
import requests
import time,random
from openpyxl import Workbook
import pymysql.cursors
#@ 连接数据库;
# 这一就是我当地上面运作的程序流程 ,用于获得服务器代理。
def get_proxy():
try:
PROXY_POOL_URL = 'http://localhost:5555/random'
response = requests.get(PROXY_POOL_URL)
print(response.text)
if response.status_code == 200:
return response.text
except ConnectionError:
return None
# 用于连接当地mysql,可以不连接,立即写入Excel中
def get_conn():
"""连接当地数据库"""
# 界定要连接的服务器IP ,账户名字和登陆密码,连接的数据库,编号这些
conn = pymysql.connect(host = 'localhost',
user = 'root',
password = '123456',
db = 'python',
charset = 'utf8mb4',
cursorclass = pymysql.cursors.DictCursor)
return conn
# 将数据信息写入到数据库中
def insert(conn,info):
"""数据信息写入数据库"""
with conn.cursor() as cursor:
sql = "INSERT INTO `python` (`companyShortName`, `companyFullName`, `industryField`, `companySize`, `salary`, `city`, `education`) VALUES (%s, %s, %s, %s, %s, %s, %s)"
cursor.execute(sql, info)
conn.commit()
# 获得当今网址的信息
def get_json(url,page,lang_name):
"""回到当今网页页面的信息目录"""
data = {'first':'false','pn':page,'kd':lang_name}
proxies = get_proxy()
proxies = {
"http": "http://" proxies
}
json = ses.post(url,data,proxies = proxies).json()
list_con = json['content']['positionResult']['result']
info_list = []
for i in list_con:
info = []
info.append(i.get('companyShortName','无')) # 公司名字
info.append(i.get('companyFullName','无'))
info.append(i.get('industryField','无'))
info.append(i.get('companySize','无'))
info.append(i.get('salary','无'))
info.append(i.get('city','无'))
info.append(i.get('education','无'))
info_list.append(info)
return info_list

def main():
lang_name = 'python'
wb = Workbook() # 开启Excel工作中薄
conn = get_conn() # 创建数据库连接 不存放数据 ,注解此番
for i in ['北京市','上海市','广州市','深圳市','杭州市']: #五个城市
page = 1
wsl = wb.active
wsl.title = lang_name
url = 'https://www.lagou.com/jobs/positionAjax.json?city={}&needAddtionalResult=false'.format(i)
while page < 2: # 每一个城市30页信息
info = get_json(url,page,lang_name)
page = 1
# time.sleep(random.randint(10,20))
for row in info:
# 插进数据库 ,若不愿存进 注解此番
insert(conn,tuple(row))
wsl.append(row)
# 关掉数据库连接,不存放数据,注解此番
conn.close()
wb.save('{}岗位信息.xlsx'.format(lang_name))

if __name__ == "__main__":
my_headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
"Referer": "https://www.lagou.com/jobs/list_Python?city=全国性&cl=false&fromSearch=true&labelWords=&suginput=",
"Content-Type": "application/x-www-form-urlencoded;charset = UTF-8"
}
# time.sleep(5)
ses = requests.session() # 获得 session
ses.headers.update(my_headers) # 升级
ses.get(
"https://www.lagou.com/jobs/list_python?city=全国性&cl=false&fromSearch=true&labelWords=&suginput=")
main()












文章来源于网络 ,如有侵权请联系站长QQ61910465删除
本文版权归趣营销www.SEOgUrublog.com 所有,如有转发请注明来出,竞价开户托管,seo优化请联系QQ卍61910465