Commit 85e1821f authored by jimmy's avatar jimmy

提交

parent 1538ad5f
...@@ -14,7 +14,8 @@ def GetCompany(types): ...@@ -14,7 +14,8 @@ def GetCompany(types):
if response.status_code == 200: if response.status_code == 200:
response_data = response.json() response_data = response.json()
# if response_data.get('name') is not None: # if response_data.get('name') is not None:
return "上海临方股权投资管理有限公司","https://www.zhipin.com/web/geek/job?query=%E5%A4%A7%E6%97%8F%E6%BF%80%E5%85%89&city=100010000" #return "上海临方股权投资管理有限公司","https://www.zhipin.com/web/geek/job?query=%E5%A4%A7%E6%97%8F%E6%BF%80%E5%85%89&city=100010000"
#return "上海临方股权投资管理有限公司","https://baike.baidu.com/item/%E4%B8%8A%E6%B5%B7%E4%B8%B4%E6%96%B9%E8%82%A1%E6%9D%83%E6%8A%95%E8%B5%84%E7%AE%A1%E7%90%86%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8?fromModule=lemma_search-box"
return response_data["data"]["company_name"],response_data["data"]["url"] return response_data["data"]["company_name"],response_data["data"]["url"]
return "", "" return "", ""
......
...@@ -6,34 +6,37 @@ import cyaml ...@@ -6,34 +6,37 @@ import cyaml
def spider_company(page): def spider_company(page):
company_nameO, url = crawler.GetCompany(2) company_nameO, url = crawler.GetCompany(2)
page.goto(url) if url != "":
page.wait_for_timeout(3000) page.goto(url)
page.wait_for_selector(".input-wrap-text") page.wait_for_timeout(3000)
company_detail_el = page.locator('div.company-card-wrapper a') page.wait_for_selector(".input-wrap-text")
company_detail_el_count = company_detail_el.count() company_detail_el = page.locator('div.company-card-wrapper a')
crawler.Log("company_detail_el.count():"+str(company_detail_el_count)) company_detail_el_count = company_detail_el.count()
crawler.Log("company_detail_el.count():"+str(company_detail_el_count))
my_obj = {'intro': ""}
if company_detail_el_count > 0: my_obj = {'intro': ""}
company_detail_el.first.click() if company_detail_el_count > 0:
page.wait_for_timeout(1000) company_detail_el.first.click()
page.wait_for_selector("div.info h1.name") page.wait_for_timeout(1000)
company_name = page.locator("div.info h1.name").first.inner_text() page.wait_for_selector("div.info h1.name")
company_intro_el = page.locator("div.job-sec > div.fold-text") company_name = page.locator("div.info h1.name").first.inner_text()
if company_intro_el.count() > 0: company_intro_el = page.locator("div.job-sec > div.fold-text")
company_intro = company_intro_el.first.inner_text() if company_intro_el.count() > 0:
if company_name in company_nameO: company_intro = company_intro_el.first.inner_text()
my_obj['intro'] = company_intro if company_name in company_nameO:
my_obj['intro'] = company_intro
if company_nameO in company_name:
my_obj['intro'] = company_intro if company_nameO in company_name:
my_obj['intro'] = company_intro
crawler.Log(company_name+"-获取到boss数据:"+str(company_intro))
name2 = crawler.SaveCompanyData(company_nameO, 2, str(company_intro)) crawler.Log(company_name+"-获取到boss数据:"+str(company_intro))
if name2 != "": name2 = crawler.SaveCompanyData(company_nameO, 2, str(company_intro))
crawler.Log(name2 + ":boss数据,写入成功") if name2 != "":
crawler.Log(name2 + ":boss数据,写入成功")
else:
crawler.Log(company_nameO + ":boss数据,写入失败")
else: else:
crawler.Log(company_nameO + ":boss数据,写入失败") crawler.SaveCompanyData(company_nameO, 2, "")
def GetBossCompany(p: Playwright) -> None: def GetBossCompany(p: Playwright) -> None:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment