Commit b2cfca39 authored by jimmy's avatar jimmy

提交

parent cd864674
......@@ -11,8 +11,6 @@ import cyaml
#获取需要爬数据的企业 百度1
def GetCompany(types):
response = requests.get(cyaml.data[cyaml.data["env"]]["url"]+cyaml.data["php-api"]["getcompany"]+"?type="+str(types), headers={"Content-Type": "application/json"})
print(response)
print(cyaml.data[cyaml.data["env"]]["url"]+cyaml.data["php-api"]["getcompany"]+"?type="+str(types))
if response.status_code == 200:
response_data = response.json()
# if response_data.get('name') is not None:
......
......@@ -2,7 +2,7 @@ from playwright.sync_api import Playwright, sync_playwright, expect
import re
import json
import datetime
import cyaml
import crawler
#爬虫启动
......@@ -12,7 +12,7 @@ def CrawlerBaidu():
#爬取百度的数据
def GetBaiduCompany(playwright: Playwright) -> int:
browser = playwright.chromium.launch(headless=False)
browser = playwright.chromium.launch(headless=cyaml.data[cyaml.data["env"]]["headless"])
context = browser.new_context(viewport={"width": 800, "height": 600})
page = context.new_page()
#获取需要爬取数据的公司
......
......@@ -2,7 +2,7 @@ import crawler
import requests
from playwright.sync_api import Playwright, sync_playwright
import json
import cyaml
def spider_company(page):
company_nameO, url = crawler.GetCompany(2)
......@@ -37,7 +37,7 @@ def spider_company(page):
def GetBossCompany(p: Playwright) -> None:
browser = p.chromium.launch(headless=False)
browser = p.chromium.launch(headless=cyaml.data[cyaml.data["env"]]["headless"])
context = browser.new_context(viewport={"width": 800, "height": 600})
js = """
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment