Commit b2cfca39 authored by jimmy's avatar jimmy

提交

parent cd864674
...@@ -11,8 +11,6 @@ import cyaml ...@@ -11,8 +11,6 @@ import cyaml
#获取需要爬数据的企业 百度1 #获取需要爬数据的企业 百度1
def GetCompany(types): def GetCompany(types):
response = requests.get(cyaml.data[cyaml.data["env"]]["url"]+cyaml.data["php-api"]["getcompany"]+"?type="+str(types), headers={"Content-Type": "application/json"}) response = requests.get(cyaml.data[cyaml.data["env"]]["url"]+cyaml.data["php-api"]["getcompany"]+"?type="+str(types), headers={"Content-Type": "application/json"})
print(response)
print(cyaml.data[cyaml.data["env"]]["url"]+cyaml.data["php-api"]["getcompany"]+"?type="+str(types))
if response.status_code == 200: if response.status_code == 200:
response_data = response.json() response_data = response.json()
# if response_data.get('name') is not None: # if response_data.get('name') is not None:
......
...@@ -2,7 +2,7 @@ from playwright.sync_api import Playwright, sync_playwright, expect ...@@ -2,7 +2,7 @@ from playwright.sync_api import Playwright, sync_playwright, expect
import re import re
import json import json
import datetime import cyaml
import crawler import crawler
#爬虫启动 #爬虫启动
...@@ -12,7 +12,7 @@ def CrawlerBaidu(): ...@@ -12,7 +12,7 @@ def CrawlerBaidu():
#爬取百度的数据 #爬取百度的数据
def GetBaiduCompany(playwright: Playwright) -> int: def GetBaiduCompany(playwright: Playwright) -> int:
browser = playwright.chromium.launch(headless=False) browser = playwright.chromium.launch(headless=cyaml.data[cyaml.data["env"]]["headless"])
context = browser.new_context(viewport={"width": 800, "height": 600}) context = browser.new_context(viewport={"width": 800, "height": 600})
page = context.new_page() page = context.new_page()
#获取需要爬取数据的公司 #获取需要爬取数据的公司
......
...@@ -2,7 +2,7 @@ import crawler ...@@ -2,7 +2,7 @@ import crawler
import requests import requests
from playwright.sync_api import Playwright, sync_playwright from playwright.sync_api import Playwright, sync_playwright
import json import json
import cyaml
def spider_company(page): def spider_company(page):
company_nameO, url = crawler.GetCompany(2) company_nameO, url = crawler.GetCompany(2)
...@@ -37,7 +37,7 @@ def spider_company(page): ...@@ -37,7 +37,7 @@ def spider_company(page):
def GetBossCompany(p: Playwright) -> None: def GetBossCompany(p: Playwright) -> None:
browser = p.chromium.launch(headless=False) browser = p.chromium.launch(headless=cyaml.data[cyaml.data["env"]]["headless"])
context = browser.new_context(viewport={"width": 800, "height": 600}) context = browser.new_context(viewport={"width": 800, "height": 600})
js = """ js = """
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}}); Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment