Commit 72e607c7 authored by jimmy's avatar jimmy

提交任意时间

parent adcf2d90
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
...@@ -47,20 +47,20 @@ def SaveCompanyData(name,types,intro): ...@@ -47,20 +47,20 @@ def SaveCompanyData(name,types,intro):
#启动爬虫 #启动爬虫
def CrawlerLaunch(): def CrawlerLaunch():
while True: while True:
randomtime = random.randint(3, 100)
time.sleep(randomtime)
now = datetime.datetime.now() now = datetime.datetime.now()
randomtime = random.randint(3, 120)
print(str(now)+":启动") print(str(now) + ":启动等待时间"+str(randomtime))
time.sleep(randomtime)
crawler_baidu.CrawlerBaidu() crawler_baidu.CrawlerBaidu()
#crawler_boss.CrawlerBoss() #crawler_boss.CrawlerBoss()
# 在进程中执行的任务 # 在进程中执行的任务
def Log(dataS): def Log(dataS):
file = open(str(datetime.date.today()) + ".txt", "a+") with open(str(datetime.date.today()) + ".txt", "a+") as f:
file.write(dataS+"\n") f.write(dataS + "\n")
file.close() f.close()
if __name__ == "__main__": if __name__ == "__main__":
p = multiprocessing.Process(target=CrawlerLaunch) p = multiprocessing.Process(target=CrawlerLaunch)
......
...@@ -12,7 +12,7 @@ def CrawlerBaidu(): ...@@ -12,7 +12,7 @@ def CrawlerBaidu():
#爬取百度的数据 #爬取百度的数据
def GetBaiduCompany(playwright: Playwright) -> None: def GetBaiduCompany(playwright: Playwright) -> None:
browser = playwright.chromium.launch(headless=True) browser = playwright.chromium.launch(headless=False)
context = browser.new_context(viewport={"width": 800, "height": 600}) context = browser.new_context(viewport={"width": 800, "height": 600})
page = context.new_page() page = context.new_page()
#获取需要爬取数据的公司 #获取需要爬取数据的公司
......
...@@ -13,11 +13,6 @@ def spider_company(page): ...@@ -13,11 +13,6 @@ def spider_company(page):
company_detail_el_count = company_detail_el.count() company_detail_el_count = company_detail_el.count()
print("company_detail_el.count():", company_detail_el_count) print("company_detail_el.count():", company_detail_el_count)
my_obj = {'intro': ""} my_obj = {'intro': ""}
post_data = {
"name": company_nameO,
"content": json.dumps(my_obj),
"type": 2
}
if company_detail_el_count > 0: if company_detail_el_count > 0:
company_detail_el.first.click() company_detail_el.first.click()
page.wait_for_timeout(1000) page.wait_for_timeout(1000)
...@@ -38,14 +33,12 @@ def spider_company(page): ...@@ -38,14 +33,12 @@ def spider_company(page):
def GetBossCompany(p: Playwright) -> None: def GetBossCompany(p: Playwright) -> None:
browser = p.chromium.launch(headless=True) browser = p.chromium.launch(headless=True)
context = browser.new_context() context = browser.new_context(viewport={"width": 800, "height": 600})
js = """ js = """
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}}); Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
""" """
page = context.new_page() page = context.new_page()
page.add_init_script(js) page.add_init_script(js)
# spider = BossSpider()
# spider.spider_company(page)
spider_company(page) spider_company(page)
context.close() context.close()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment