Commit adcf2d90 authored by jimmy's avatar jimmy

提交

parent 7d4d8b4c
久久王食品国际有限公司:写入成功
...@@ -4,7 +4,7 @@ import time ...@@ -4,7 +4,7 @@ import time
import json import json
import crawler_baidu import crawler_baidu
import crawler_boss import crawler_boss
import random
import requests import requests
#获取需要爬数据的企业 百度1 #获取需要爬数据的企业 百度1
...@@ -47,17 +47,20 @@ def SaveCompanyData(name,types,intro): ...@@ -47,17 +47,20 @@ def SaveCompanyData(name,types,intro):
#启动爬虫 #启动爬虫
def CrawlerLaunch(): def CrawlerLaunch():
while True: while True:
time.sleep(3) randomtime = random.randint(3, 100)
time.sleep(randomtime)
now = datetime.datetime.now() now = datetime.datetime.now()
print(str(now)+":启动") print(str(now)+":启动")
# crawler_baidu.CrawlerBaidu() crawler_baidu.CrawlerBaidu()
crawler_boss.CrawlerBoss() #crawler_boss.CrawlerBoss()
# 在进程中执行的任务 # 在进程中执行的任务
def Log():
file = open(str(datetime.date.today()) + ".txt", "a")
def Log(dataS):
file = open(str(datetime.date.today()) + ".txt", "a+")
file.write(dataS+"\n")
file.close()
if __name__ == "__main__": if __name__ == "__main__":
p = multiprocessing.Process(target=CrawlerLaunch) p = multiprocessing.Process(target=CrawlerLaunch)
......
...@@ -5,8 +5,6 @@ import json ...@@ -5,8 +5,6 @@ import json
import datetime import datetime
import crawler import crawler
#爬虫启动 #爬虫启动
def CrawlerBaidu(): def CrawlerBaidu():
with sync_playwright() as playwright: with sync_playwright() as playwright:
...@@ -25,14 +23,9 @@ def GetBaiduCompany(playwright: Playwright) -> None: ...@@ -25,14 +23,9 @@ def GetBaiduCompany(playwright: Playwright) -> None:
intro = re.sub(r'\[[\d-]+\]', '', str(all)) intro = re.sub(r'\[[\d-]+\]', '', str(all))
name2 = crawler.SaveCompanyData(name,1,intro) name2 = crawler.SaveCompanyData(name,1,intro)
if name2 != "" and intro != None: if name2 != "" and intro != None:
file = open(str(datetime.date.today()) + ".txt", "a") crawler.Log(name2+":写入成功")
file.write(name2+":写入成功\n")
file.close()
else: else:
file = open("example.txt", "a") crawler.Log(name2 + ":写入失败")
file.write(name2+":写入失败\n")
file.close()
# --------------------- # ---------------------
context.close() context.close()
......
...@@ -37,7 +37,7 @@ def spider_company(page): ...@@ -37,7 +37,7 @@ def spider_company(page):
name2 = crawler.SaveCompanyData(company_nameO, 1, json.dumps(my_obj)) name2 = crawler.SaveCompanyData(company_nameO, 1, json.dumps(my_obj))
def GetBossCompany(p: Playwright) -> None: def GetBossCompany(p: Playwright) -> None:
browser = p.chromium.launch(headless=False) browser = p.chromium.launch(headless=True)
context = browser.new_context() context = browser.new_context()
js = """ js = """
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}}); Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment