Commit 46d59467 authored by jimmy's avatar jimmy

提交

parent 85e1821f
......@@ -21,10 +21,14 @@ def GetBaiduCompany(playwright: Playwright) -> int:
page.goto(url)
all = page.locator(".lemma-summary").all_text_contents()
intro = re.sub(r'\[[\d-]+\]', '', str(all))
intro_new = intro[2:len(intro) - 2]
crawler.Log(name+"-获取到百度数据:"+intro_new)
intro_new =str(intro[2:len(intro) - 2])
new_string1 = re.sub(r'\\n', "", intro_new)
new_string2 = re.sub(r'\\xa0', "", new_string1)
crawler.Log(name+"-获取到百度数据:"+new_string2)
if name != "" and intro != '[]':
name2 = crawler.SaveCompanyData(name, 1, intro_new)
name2 = crawler.SaveCompanyData(name, 1, new_string2)
if name2 != "":
crawler.Log(name2+":百度数据,写入成功")
return 100
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment