Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
crawler-py
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
jimmy蒋政彪
crawler-py
Commits
e401b9b8
Commit
e401b9b8
authored
Aug 29, 2023
by
jimmy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
提交
parent
101fb02d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
20 additions
and
4 deletions
+20
-4
crawler.cpython-311.pyc
__pycache__/crawler.cpython-311.pyc
+0
-0
crawler_baidu.cpython-311.pyc
__pycache__/crawler_baidu.cpython-311.pyc
+0
-0
crawler_boss.cpython-311.pyc
__pycache__/crawler_boss.cpython-311.pyc
+0
-0
crawler.py
crawler.py
+1
-1
crawler_baidu.py
crawler_baidu.py
+1
-1
crawler_boss.py
crawler_boss.py
+2
-2
2023-08-29.txt
log/2023-08-29.txt
+16
-0
No files found.
__pycache__/crawler.cpython-311.pyc
View file @
e401b9b8
No preview for this file type
__pycache__/crawler_baidu.cpython-311.pyc
View file @
e401b9b8
No preview for this file type
__pycache__/crawler_boss.cpython-311.pyc
View file @
e401b9b8
No preview for this file type
crawler.py
View file @
e401b9b8
...
@@ -54,7 +54,7 @@ def CrawlerLaunch():
...
@@ -54,7 +54,7 @@ def CrawlerLaunch():
crawler_boss
.
CrawlerBoss
()
crawler_boss
.
CrawlerBoss
()
def
Log
(
dataS
):
def
Log
(
dataS
):
with
open
(
str
(
datetime
.
date
.
today
())
+
".txt"
,
"a+"
)
as
f
:
with
open
(
"log/"
+
str
(
datetime
.
date
.
today
())
+
".txt"
,
"a+"
)
as
f
:
f
.
write
(
dataS
+
"
\n
"
)
f
.
write
(
dataS
+
"
\n
"
)
f
.
close
()
f
.
close
()
...
...
crawler_baidu.py
View file @
e401b9b8
...
@@ -8,7 +8,7 @@ import crawler
...
@@ -8,7 +8,7 @@ import crawler
#爬虫启动
#爬虫启动
def
CrawlerBaidu
():
def
CrawlerBaidu
():
with
sync_playwright
()
as
playwright
:
with
sync_playwright
()
as
playwright
:
GetBaiduCompany
(
playwright
)
return
GetBaiduCompany
(
playwright
)
#爬取百度的数据
#爬取百度的数据
def
GetBaiduCompany
(
playwright
:
Playwright
)
->
int
:
def
GetBaiduCompany
(
playwright
:
Playwright
)
->
int
:
...
...
crawler_boss.py
View file @
e401b9b8
...
@@ -11,7 +11,7 @@ def spider_company(page):
...
@@ -11,7 +11,7 @@ def spider_company(page):
page
.
wait_for_selector
(
".input-wrap-text"
)
page
.
wait_for_selector
(
".input-wrap-text"
)
company_detail_el
=
page
.
locator
(
'div.company-card-wrapper a'
)
company_detail_el
=
page
.
locator
(
'div.company-card-wrapper a'
)
company_detail_el_count
=
company_detail_el
.
count
()
company_detail_el_count
=
company_detail_el
.
count
()
crawler
.
Log
(
"company_detail_el.count():"
+
company_detail_el_count
)
crawler
.
Log
(
"company_detail_el.count():"
+
str
(
company_detail_el_count
)
)
my_obj
=
{
'intro'
:
""
}
my_obj
=
{
'intro'
:
""
}
if
company_detail_el_count
>
0
:
if
company_detail_el_count
>
0
:
...
@@ -37,7 +37,7 @@ def spider_company(page):
...
@@ -37,7 +37,7 @@ def spider_company(page):
def
GetBossCompany
(
p
:
Playwright
)
->
None
:
def
GetBossCompany
(
p
:
Playwright
)
->
None
:
browser
=
p
.
chromium
.
launch
(
headless
=
Tru
e
)
browser
=
p
.
chromium
.
launch
(
headless
=
Fals
e
)
context
=
browser
.
new_context
(
viewport
=
{
"width"
:
800
,
"height"
:
600
})
context
=
browser
.
new_context
(
viewport
=
{
"width"
:
800
,
"height"
:
600
})
js
=
"""
js
=
"""
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
...
...
log/2023-08-29.txt
0 → 100644
View file @
e401b9b8
大成生化科技集团有限公司-获取到百度数据:[]
大成生化科技集团有限公司:百度数据,写入失败
威诚国际控股有限公司-获取到百度数据:[]
威诚国际控股有限公司:百度数据,写入失败
吉林九台农村商业银行股份有限公司-获取到百度数据:['\n吉林九台农村商业银行股份有限公司于2008年12月16日成立。法定代表人高兵,公司经营范围包括:吸收人民币公众存款;发放人民币短期、中期和长期贷款;办理国内结算;办理票据承兑与贴现;代理发行、代理兑付、承销政府债券;买卖政府债券、金融债券,参与货币市场;从事同业拆借;代理收付款项及代理保险业务;提供保险箱服务;代理买卖基金、信托产品及其他理财产品;基金销售;从事银行卡业务;外汇借款、外汇票据的承兑和贴现、外汇担保、自营及代客外汇买卖、外汇存款、外汇贷款、外汇汇款、外币兑换、国际结算、同业外汇拆借和资信调查、咨询、见证,外汇借款、外汇票据的承兑和贴现、外汇担保、即期结售汇、自营及代客外汇买卖;经中国银行业监督管理委员会批准的其他业务;信息服务业务(不含固定网信息服务业务项目)等。\n\xa0\n\n']
吉林九台农村商业银行股份有限公司:百度数据,写入成功
中国枫叶教育集团有限公司-获取到百度数据:[]
中国枫叶教育集团有限公司:百度数据,写入失败
company_detail_el.count():0
亿和精密工业控股有限公司:boss数据,写入成功
美皓医疗集团有限公司-获取到百度数据:[]
美皓医疗集团有限公司:百度数据,写入失败
company_detail_el.count():3
绿景物业-获取到boss数据:深圳市绿景物业管理有限公司成立于1997年,是深圳市绿景地产(绿景中国HK00095)全资子公司,具有国家物业管理企业一级资质。目前管理项目遍布深圳各区,在珠海、惠阳、化州、苏州、益阳等地设有子分公司,涵盖住宅、写字楼、商场、产业园等业态,管理总面积超过400万平方米。未来,绿景物业将以绿景地产的发展为依托,继续秉承“客户至上、精诚服务”的服务宗旨,遵循“专为本、和致远”的价值主张,逐步打造具有自身特色的核心竞争力,成为一个具有行业影响力的物业管理品牌。
人才是绿景物业宝贵的资产。我们提供具有社会竞争力的薪酬福利待遇,以及广阔的发展空间和培训机会,热忱欢迎物业管理精英的加盟,共同开创属于我们的事业。
绿景(中国)地产投资有限公司:boss数据,写入成功
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment