Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
update_cate
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
834ae550
authored
Aug 28, 2019
by
lichenggang
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
update szlc
parent
757a2f0a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
18 deletions
plat/szlc/cate_stock.py
utils/mysqlopera.py
plat/szlc/cate_stock.py
View file @
834ae550
#!encoding:utf-8
#!encoding:utf-8
import
pymysql
import
requests
import
requests
from
lxml
import
etree
from
lxml
import
etree
import
re
import
re
...
@@ -22,8 +23,7 @@ headers = {
...
@@ -22,8 +23,7 @@ headers = {
# 拿到所有一级二级分类数据
# 拿到所有一级二级分类数据
def
get_szlc_all
():
def
get_szlc_all
():
# level_name_pattern = re.compile(r'\d+\.(.*)\(.*\)')
level_name_pattern
=
re
.
compile
(
r'\d+\.([^\(]*) \('
)
level_name_pattern
=
re
.
compile
(
r'([^\(]*) \('
)
level_num_pattern
=
re
.
compile
(
r'[^\(]*\((\d+)\)'
)
level_num_pattern
=
re
.
compile
(
r'[^\(]*\((\d+)\)'
)
level2_name_pattern
=
re
.
compile
(
r'([^\(]*)\('
)
level2_name_pattern
=
re
.
compile
(
r'([^\(]*)\('
)
url
=
'https://www.szlcsc.com/catalog.html'
url
=
'https://www.szlcsc.com/catalog.html'
...
@@ -32,30 +32,30 @@ def get_szlc_all():
...
@@ -32,30 +32,30 @@ def get_szlc_all():
dom_html
=
etree
.
HTML
(
resp
.
text
)
dom_html
=
etree
.
HTML
(
resp
.
text
)
# cates=dom_html.xpath('//div[@class="catalog_a"]//dt/a/text()')
# cates=dom_html.xpath('//div[@class="catalog_a"]//dt/a/text()')
doms_cate
=
dom_html
.
xpath
(
'//div[@class="
item"]
'
)
doms_cate
=
dom_html
.
xpath
(
'//div[@class="
catalog_a"]//dl
'
)
all
=
[]
all
=
[]
for
dom
in
doms_cate
:
for
dom
in
doms_cate
:
per_1_cate
=
{}
per_1_cate
=
{}
print
(
dom
.
xpath
(
'./a/text()'
)[
2
])
per_1_cate
[
'name'
]
=
level_name_pattern
.
findall
(
dom
.
xpath
(
'./
a/text()'
)[
2
])[
0
]
.
strip
()
per_1_cate
[
'name'
]
=
level_name_pattern
.
findall
(
dom
.
xpath
(
'./
dt/a/text()'
)[
0
])[
0
]
.
strip
()
per_1_cate
[
'num'
]
=
int
(
level_num_pattern
.
findall
(
dom
.
xpath
(
'./
a/text()'
)[
2
])[
0
])
per_1_cate
[
'num'
]
=
int
(
level_num_pattern
.
findall
(
dom
.
xpath
(
'./
dt/a/text()'
)[
0
])[
0
])
per_1_cate
[
'url'
]
=
dom
.
xpath
(
'./a/@href'
)[
0
]
per_1_cate
[
'url'
]
=
dom
.
xpath
(
'./
dt/
a/@href'
)[
0
]
per_1_cate
[
'level'
]
=
1
per_1_cate
[
'level'
]
=
1
per_1_cate
[
'islast'
]
=
0
per_1_cate
[
'islast'
]
=
0
per_1_cate
[
'parent_id'
]
=
0
per_1_cate
[
'parent_id'
]
=
0
doms_2cate
=
dom
.
xpath
(
'.//d
iv[@class="child-item"]
'
)
doms_2cate
=
dom
.
xpath
(
'.//d
d
'
)
per_1_cate
[
'2_list'
]
=
[]
per_1_cate
[
'2_list'
]
=
[]
for
dom_cate_2
in
doms_2cate
:
for
dom_cate_2
in
doms_2cate
:
per_2_cate
=
{}
per_2_cate
=
{}
per_2_cate
[
'name'
]
=
level2_name_pattern
.
findall
(
dom_cate_2
.
xpath
(
'./
/
a/text()'
)[
0
])[
0
]
.
strip
()
per_2_cate
[
'name'
]
=
level2_name_pattern
.
findall
(
dom_cate_2
.
xpath
(
'./a/text()'
)[
0
])[
0
]
.
strip
()
per_2_cate
[
'num'
]
=
int
(
level_num_pattern
.
findall
(
dom_cate_2
.
xpath
(
'./
/
a/text()'
)[
0
])[
0
])
per_2_cate
[
'num'
]
=
int
(
level_num_pattern
.
findall
(
dom_cate_2
.
xpath
(
'./a/text()'
)[
0
])[
0
])
per_2_cate
[
'url'
]
=
dom_cate_2
.
xpath
(
'./
/
a/@href'
)[
0
]
per_2_cate
[
'url'
]
=
dom_cate_2
.
xpath
(
'./a/@href'
)[
0
]
per_
1
_cate
[
'level'
]
=
2
per_
2
_cate
[
'level'
]
=
2
per_
1
_cate
[
'islast'
]
=
1
per_
2
_cate
[
'islast'
]
=
1
per_1_cate
[
'2_list'
]
.
append
(
per_2_cate
)
per_1_cate
[
'2_list'
]
.
append
(
per_2_cate
)
print
(
per_2_cate
)
print
(
per_1_cate
)
all
.
append
(
per_1_cate
)
all
.
append
(
per_1_cate
)
#
#
...
...
utils/mysqlopera.py
View file @
834ae550
...
@@ -7,7 +7,7 @@ import pymysql
...
@@ -7,7 +7,7 @@ import pymysql
COMPANY_PN_LIST
=
[
'hc360'
,
'huangye88'
,
'pangwo'
,
'coovee'
,
'liebiao'
,
'china'
,
'ic114'
,
'cecb2b'
,
'trustexporter'
,
COMPANY_PN_LIST
=
[
'hc360'
,
'huangye88'
,
'pangwo'
,
'coovee'
,
'liebiao'
,
'china'
,
'ic114'
,
'cecb2b'
,
'trustexporter'
,
'x11467'
,
'x912688'
,
'zgtghccl'
,
'herostart'
,
'testmart'
,
'cc'
,
'eastsoo'
,
'x51sole'
,
'eb80'
,
'x258'
,
'x11467'
,
'x912688'
,
'zgtghccl'
,
'herostart'
,
'testmart'
,
'cc'
,
'eastsoo'
,
'x51sole'
,
'eb80'
,
'x258'
,
'taojindi'
,
'd17'
,
'etlong'
,
'mfqyw'
,
'pe168'
,
'qymgc'
,
'atobo'
,
'czvv'
,
'qic'
,
'mmic'
]
'taojindi'
,
'd17'
,
'etlong'
,
'mfqyw'
,
'pe168'
,
'qymgc'
,
'atobo'
,
'czvv'
,
'qic'
,
'mmic'
,
'x51dzw'
]
def
get_env
():
def
get_env
():
...
@@ -22,21 +22,25 @@ ENV = get_env()
...
@@ -22,21 +22,25 @@ ENV = get_env()
HOST_SET
=
{
HOST_SET
=
{
'test'
:
'192.168.2.232'
,
'test'
:
'192.168.2.232'
,
'test234'
:
'192.168.1.234'
,
'test234'
:
'192.168.1.234'
,
'produce'
:
'172.18.137.37'
'produce'
:
'172.18.137.37'
,
# 'test_szlc': ''
}
}
UP_SET
=
{
UP_SET
=
{
'test'
:
(
'ichunt'
,
'ichunt'
),
'test'
:
(
'ichunt'
,
'ichunt'
),
'produce'
:
(
'bigdata'
,
'bdYm2yy2mmyzlmlly'
)
'produce'
:
(
'bigdata'
,
'bdYm2yy2mmyzlmlly'
),
'test_szlc'
:
(
'szlc'
,
'szlc#zsyM'
)
}
}
def
get_mysql_conf
(
db
):
def
get_mysql_conf
(
db
):
host
=
HOST_SET
[
ENV
]
host
=
HOST_SET
[
ENV
]
up
=
UP_SET
[
ENV
]
if
db
in
COMPANY_PN_LIST
:
if
db
in
COMPANY_PN_LIST
:
db
=
'bigdata'
db
=
'bigdata'
if
ENV
==
'test'
:
if
ENV
==
'test'
:
host
=
HOST_SET
[
'test234'
]
host
=
HOST_SET
[
'test234'
]
up
=
UP_SET
[
ENV
]
if
db
in
(
'szlc'
,)
and
ENV
==
'test'
:
up
=
UP_SET
[
'test_szlc'
]
conf
=
{
conf
=
{
'host'
:
host
,
'host'
:
host
,
'port'
:
3306
,
'port'
:
3306
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment