Commit 31996e88 authored by jimmy's avatar jimmy

启动

parent 46d59467
......@@ -21,13 +21,12 @@ def GetBaiduCompany(playwright: Playwright) -> int:
page.goto(url)
all = page.locator(".lemma-summary").all_text_contents()
intro = re.sub(r'\[[\d-]+\]', '', str(all))
intro_new =str(intro[2:len(intro) - 2])
new_string1 = re.sub(r'\\n', "", intro_new)
new_string2 = re.sub(r'\\xa0', "", new_string1)
crawler.Log(name+"-获取到百度数据:"+new_string2)
crawler.Log(name+"-获取到百度数据:"+intro)
if name != "" and intro != '[]':
intro_new = str(intro[2:len(intro) - 2])
new_string1 = re.sub(r'\\n', "", intro_new)
new_string2 = re.sub(r'\\xa0', "", new_string1)
name2 = crawler.SaveCompanyData(name, 1, new_string2)
if name2 != "":
crawler.Log(name2+":百度数据,写入成功")
......@@ -36,6 +35,7 @@ def GetBaiduCompany(playwright: Playwright) -> int:
crawler.Log(name + ":百度数据,写入失败")
return 100
else:
name2 = crawler.SaveCompanyData(name, 1, "")
crawler.Log(name +":百度数据,写入失败")
return 100
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment