Commit 1538ad5f authored by jimmy's avatar jimmy

提交boss数据

parent 3e7836fa
......@@ -14,6 +14,7 @@ def GetCompany(types):
if response.status_code == 200:
response_data = response.json()
# if response_data.get('name') is not None:
return "上海临方股权投资管理有限公司","https://www.zhipin.com/web/geek/job?query=%E5%A4%A7%E6%97%8F%E6%BF%80%E5%85%89&city=100010000"
return response_data["data"]["company_name"],response_data["data"]["url"]
return "", ""
......
......@@ -21,9 +21,10 @@ def GetBaiduCompany(playwright: Playwright) -> int:
page.goto(url)
all = page.locator(".lemma-summary").all_text_contents()
intro = re.sub(r'\[[\d-]+\]', '', str(all))
crawler.Log(name+"-获取到百度数据:"+intro)
intro_new = intro[2:len(intro) - 2]
crawler.Log(name+"-获取到百度数据:"+intro_new)
if name != "" and intro != '[]':
name2 = crawler.SaveCompanyData(name, 1, intro)
name2 = crawler.SaveCompanyData(name, 1, intro_new)
if name2 != "":
crawler.Log(name2+":百度数据,写入成功")
return 100
......
......@@ -27,9 +27,9 @@ def spider_company(page):
if company_nameO in company_name:
my_obj['intro'] = company_intro
crawler.Log(company_name+"-获取到boss数据:"+company_intro)
name2 = crawler.SaveCompanyData(company_nameO, 1, company_intro)
crawler.Log(company_name+"-获取到boss数据:"+str(company_intro))
name2 = crawler.SaveCompanyData(company_nameO, 2, str(company_intro))
if name2 != "":
crawler.Log(name2 + ":boss数据,写入成功")
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment