Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
update_lie_category
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
0fc3ae3e
authored
Mar 26, 2019
by
lichenggang
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
init
parent
c920f231
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
151 additions
and
0 deletions
platform/szlc_cate.py
utils/mysqlopera.py
platform/szlc_cate.py
0 → 100644
View file @
0fc3ae3e
#!encoding:utf-8
import
requests
from
lxml
import
etree
import
re
from
utils.mysqlopera
import
MySqlOperator
operator
=
MySqlOperator
(
'szlc'
)
headers
=
{
"authority"
:
"www.szlcsc.com"
,
"method"
:
"GET"
,
"path"
:
"/catalog.html"
,
"scheme"
:
"https"
,
"accept"
:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
,
"accept-encoding"
:
"gzip, deflate, br"
,
"accept-language"
:
"zh-CN,zh;q=0.9"
,
"cache-control"
:
"max-age=0"
,
"if-modified-since"
:
"Wed, 13 Mar 2019 02:15:00 GMT"
,
"upgrade-insecure-requests"
:
"1"
,
"user-agent"
:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
# 拿到所有一级二级分类数据
def
get_szlc_all
():
# level_name_pattern = re.compile(r'\d+\.(.*)\(.*\)')
level_name_pattern
=
re
.
compile
(
r'([^\(]*) \('
)
level_num_pattern
=
re
.
compile
(
r'[^\(]*\((\d+)\)'
)
level2_name_pattern
=
re
.
compile
(
r'([^\(]*)\('
)
url
=
'https://www.szlcsc.com/catalog.html'
resp
=
requests
.
get
(
url
,
headers
=
headers
)
resp
.
encoding
=
'utf-8'
dom_html
=
etree
.
HTML
(
resp
.
text
)
# cates=dom_html.xpath('//div[@class="catalog_a"]//dt/a/text()')
doms_cate
=
dom_html
.
xpath
(
'//div[@class="item"]'
)
all
=
[]
for
dom
in
doms_cate
:
per_1_cate
=
{}
print
(
dom
.
xpath
(
'./a/text()'
)[
2
])
per_1_cate
[
'name'
]
=
level_name_pattern
.
findall
(
dom
.
xpath
(
'./a/text()'
)[
2
])[
0
]
.
strip
()
per_1_cate
[
'num'
]
=
int
(
level_num_pattern
.
findall
(
dom
.
xpath
(
'./a/text()'
)[
2
])[
0
])
per_1_cate
[
'url'
]
=
dom
.
xpath
(
'./a/@href'
)[
0
]
per_1_cate
[
'level'
]
=
1
per_1_cate
[
'islast'
]
=
0
per_1_cate
[
'parent_id'
]
=
0
doms_2cate
=
dom
.
xpath
(
'.//div[@class="child-item"]'
)
per_1_cate
[
'2_list'
]
=
[]
for
dom_cate_2
in
doms_2cate
:
per_2_cate
=
{}
per_2_cate
[
'name'
]
=
level2_name_pattern
.
findall
(
dom_cate_2
.
xpath
(
'.//a/text()'
)[
0
])[
0
]
.
strip
()
per_2_cate
[
'num'
]
=
int
(
level_num_pattern
.
findall
(
dom_cate_2
.
xpath
(
'.//a/text()'
)[
0
])[
0
])
per_2_cate
[
'url'
]
=
dom_cate_2
.
xpath
(
'.//a/@href'
)[
0
]
per_1_cate
[
'level'
]
=
2
per_1_cate
[
'islast'
]
=
1
per_1_cate
[
'2_list'
]
.
append
(
per_2_cate
)
all
.
append
(
per_1_cate
)
#
return
all
# 数据库所有分类的is_show设置为0
def
clean_db
():
with
operator
.
db
.
cursor
()
as
cursor
:
sql
=
"update lie_category set is_show = 0"
cursor
.
execute
(
sql
)
operator
.
db
.
commit
()
# 拿到所有旧的分类的名字和ID
def
get_old_allname_and_cat_id
():
with
operator
.
db
.
cursor
()
as
cursor
:
sql
=
"select cat_id,cat_name from lie_category "
cursor
.
execute
(
sql
)
old
=
cursor
.
fetchall
()
return
old
# 给一级分类数据找到或者生成新ID,给二级分类找到ID
def
add_cat_id
(
all
,
old
):
for
i
in
all
:
for
d
in
old
:
if
i
[
'name'
]
in
d
:
i
[
'cat_id'
]
=
d
[
0
]
break
for
c
in
i
[
'2_list'
]:
for
g
in
old
:
if
c
[
'name'
]
in
g
:
c
[
'cat_id'
]
=
g
[
0
]
with
operator
.
db
.
cursor
()
as
cursor
:
for
i
in
all
:
if
i
.
get
(
'cat_id'
):
sql
=
'UPDATE lie_category SET parent_id=
%
s,is_show=
%
s, url=
%
s,islast=
%
s,level=
%
s WHERE cat_id=
%
s'
data
=
(
0
,
1
,
i
[
'url'
],
0
,
1
,
i
[
'cat_id'
])
cursor
.
execute
(
sql
,
data
)
else
:
sql
=
'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values'
\
'(
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s)'
data
=
(
i
[
"name"
],
0
,
50
,
1
,
i
[
"url"
],
0
,
1
,
0
)
cursor
.
execute
(
sql
,
data
)
operator
.
db
.
commit
()
return
all
# 拿到新的一级分类的ID
def
get_new_allname_and_cat_id
():
with
operator
.
db
.
cursor
()
as
cursor
:
sql
=
"select cat_id,cat_name from lie_category "
cursor
.
execute
(
sql
)
new
=
cursor
.
fetchall
()
return
new
# 给一级分类添加ID
def
add_more_cat_id
(
all_
,
new
):
for
i
in
all_
:
for
d
in
new
:
if
i
[
'name'
]
in
d
:
i
[
'cat_id'
]
=
d
[
0
]
break
return
all_
# 给二级分类添加parent_id,生成自己的新ID
def
over
(
all_
):
with
operator
.
db
.
cursor
()
as
cursor
:
for
a_1
in
all_
:
for
s
in
a_1
[
'2_list'
]:
if
s
.
get
(
'cat_id'
):
sql
=
'UPDATE lie_category SET parent_id=
%
s,is_show=
%
s, url=
%
s,islast=
%
s,level=
%
s WHERE cat_id=
%
s'
data
=
(
a_1
[
'cat_id'
],
1
,
s
[
'url'
],
1
,
2
,
s
[
'cat_id'
])
cursor
.
execute
(
sql
,
data
)
else
:
sql
=
'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values'
\
'(
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s,
%
s)'
data
=
(
s
[
"name"
],
a_1
[
'cat_id'
],
50
,
1
,
s
[
"url"
],
1
,
2
,
0
)
cursor
.
execute
(
sql
,
data
)
operator
.
db
.
commit
()
if
__name__
==
'__main__'
:
clean_db
()
old
=
get_old_allname_and_cat_id
()
all
=
get_szlc_all
()
all_
=
add_cat_id
(
all
,
old
)
new
=
get_new_allname_and_cat_id
()
all_
=
add_more_cat_id
(
all_
,
new
)
over
(
all_
)
utils/mysqlopera.py
0 → 100644
View file @
0fc3ae3e
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment