Commit 834ae550 by lichenggang

update szlc

parent 757a2f0a
Showing with 22 additions and 18 deletions
#!encoding:utf-8 #!encoding:utf-8
import pymysql
import requests import requests
from lxml import etree from lxml import etree
import re import re
...@@ -22,8 +23,7 @@ headers = { ...@@ -22,8 +23,7 @@ headers = {
# 拿到所有一级二级分类数据 # 拿到所有一级二级分类数据
def get_szlc_all(): def get_szlc_all():
# level_name_pattern = re.compile(r'\d+\.(.*)\(.*\)') level_name_pattern = re.compile(r'\d+\.([^\(]*) \(')
level_name_pattern = re.compile(r'([^\(]*) \(')
level_num_pattern = re.compile(r'[^\(]*\((\d+)\)') level_num_pattern = re.compile(r'[^\(]*\((\d+)\)')
level2_name_pattern = re.compile(r'([^\(]*)\(') level2_name_pattern = re.compile(r'([^\(]*)\(')
url = 'https://www.szlcsc.com/catalog.html' url = 'https://www.szlcsc.com/catalog.html'
...@@ -32,30 +32,30 @@ def get_szlc_all(): ...@@ -32,30 +32,30 @@ def get_szlc_all():
dom_html = etree.HTML(resp.text) dom_html = etree.HTML(resp.text)
# cates=dom_html.xpath('//div[@class="catalog_a"]//dt/a/text()') # cates=dom_html.xpath('//div[@class="catalog_a"]//dt/a/text()')
doms_cate = dom_html.xpath('//div[@class="item"]') doms_cate = dom_html.xpath('//div[@class="catalog_a"]//dl')
all = [] all = []
for dom in doms_cate: for dom in doms_cate:
per_1_cate = {} per_1_cate = {}
print(dom.xpath('./a/text()')[2])
per_1_cate['name'] = level_name_pattern.findall(dom.xpath('./a/text()')[2])[0].strip() per_1_cate['name'] = level_name_pattern.findall(dom.xpath('./dt/a/text()')[0])[0].strip()
per_1_cate['num'] = int(level_num_pattern.findall(dom.xpath('./a/text()')[2])[0]) per_1_cate['num'] = int(level_num_pattern.findall(dom.xpath('./dt/a/text()')[0])[0])
per_1_cate['url'] = dom.xpath('./a/@href')[0] per_1_cate['url'] = dom.xpath('./dt/a/@href')[0]
per_1_cate['level'] = 1 per_1_cate['level'] = 1
per_1_cate['islast'] = 0 per_1_cate['islast'] = 0
per_1_cate['parent_id'] = 0 per_1_cate['parent_id'] = 0
doms_2cate = dom.xpath('.//div[@class="child-item"]') doms_2cate = dom.xpath('.//dd')
per_1_cate['2_list'] = [] per_1_cate['2_list'] = []
for dom_cate_2 in doms_2cate: for dom_cate_2 in doms_2cate:
per_2_cate = {} per_2_cate = {}
per_2_cate['name'] = level2_name_pattern.findall(dom_cate_2.xpath('.//a/text()')[0])[0].strip() per_2_cate['name'] = level2_name_pattern.findall(dom_cate_2.xpath('./a/text()')[0])[0].strip()
per_2_cate['num'] = int(level_num_pattern.findall(dom_cate_2.xpath('.//a/text()')[0])[0]) per_2_cate['num'] = int(level_num_pattern.findall(dom_cate_2.xpath('./a/text()')[0])[0])
per_2_cate['url'] = dom_cate_2.xpath('.//a/@href')[0] per_2_cate['url'] = dom_cate_2.xpath('./a/@href')[0]
per_1_cate['level'] = 2 per_2_cate['level'] = 2
per_1_cate['islast'] = 1 per_2_cate['islast'] = 1
per_1_cate['2_list'].append(per_2_cate) per_1_cate['2_list'].append(per_2_cate)
print(per_2_cate)
print(per_1_cate)
all.append(per_1_cate) all.append(per_1_cate)
# #
......
...@@ -7,7 +7,7 @@ import pymysql ...@@ -7,7 +7,7 @@ import pymysql
COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter', COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter',
'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole', 'eb80', 'x258', 'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole', 'eb80', 'x258',
'taojindi', 'd17', 'etlong', 'mfqyw', 'pe168', 'qymgc', 'atobo', 'czvv', 'qic', 'mmic'] 'taojindi', 'd17', 'etlong', 'mfqyw', 'pe168', 'qymgc', 'atobo', 'czvv', 'qic', 'mmic','x51dzw']
def get_env(): def get_env():
...@@ -22,21 +22,25 @@ ENV = get_env() ...@@ -22,21 +22,25 @@ ENV = get_env()
HOST_SET = { HOST_SET = {
'test': '192.168.2.232', 'test': '192.168.2.232',
'test234': '192.168.1.234', 'test234': '192.168.1.234',
'produce': '172.18.137.37' 'produce': '172.18.137.37',
# 'test_szlc': ''
} }
UP_SET = { UP_SET = {
'test': ('ichunt', 'ichunt'), 'test': ('ichunt', 'ichunt'),
'produce': ('bigdata', 'bdYm2yy2mmyzlmlly') 'produce': ('bigdata', 'bdYm2yy2mmyzlmlly'),
'test_szlc': ('szlc', 'szlc#zsyM')
} }
def get_mysql_conf(db): def get_mysql_conf(db):
host = HOST_SET[ENV] host = HOST_SET[ENV]
up = UP_SET[ENV]
if db in COMPANY_PN_LIST: if db in COMPANY_PN_LIST:
db = 'bigdata' db = 'bigdata'
if ENV == 'test': if ENV == 'test':
host = HOST_SET['test234'] host = HOST_SET['test234']
up = UP_SET[ENV] if db in ('szlc',) and ENV=='test':
up=UP_SET['test_szlc']
conf = { conf = {
'host': host, 'host': host,
'port': 3306, 'port': 3306,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment