Commit 44f58805 authored by jimmy蒋政彪's avatar jimmy蒋政彪

Merge branch 'dev-charles-private' into 'master'

fixed:百科元素class名称改动拉取异常修复

See merge request !3
parents aa3783b6 7dd787f6
......@@ -32,7 +32,8 @@ def GetCompany(types):
# 重新放入采集队列
def DoSpiderCompany(company, crawler_website):
post_data = {
"company_name": company
"company_name": company,
"crawler_website": crawler_website
}
json_data = json.dumps(post_data)
response = requests.post(cyaml.data[cyaml.data["env"]]["url"] + cyaml.data["php-api"]["respidercompany"],
......
......@@ -24,7 +24,7 @@ def GetBaiduCompany(playwright: Playwright) -> int:
crawler.Log(name + "-百度开始请求数据:" + url)
page.goto(url)
all_summary = page.locator(".lemma-summary").all_text_contents()
all_summary = page.locator('div[class^="lemmaSummary"]').all_text_contents()
intro = re.sub(r'\[[\d-]+\]', '', str(all_summary))
crawler.Log(name + "-获取到百度数据:" + intro)
try:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment