Commit 834ae550 by lichenggang

update szlc

parent 757a2f0a
Showing with 22 additions and 18 deletions
#!encoding:utf-8
import pymysql
import requests
from lxml import etree
import re
......@@ -22,8 +23,7 @@ headers = {
# 拿到所有一级二级分类数据
def get_szlc_all():
# level_name_pattern = re.compile(r'\d+\.(.*)\(.*\)')
level_name_pattern = re.compile(r'([^\(]*) \(')
level_name_pattern = re.compile(r'\d+\.([^\(]*) \(')
level_num_pattern = re.compile(r'[^\(]*\((\d+)\)')
level2_name_pattern = re.compile(r'([^\(]*)\(')
url = 'https://www.szlcsc.com/catalog.html'
......@@ -32,30 +32,30 @@ def get_szlc_all():
dom_html = etree.HTML(resp.text)
# cates=dom_html.xpath('//div[@class="catalog_a"]//dt/a/text()')
doms_cate = dom_html.xpath('//div[@class="item"]')
doms_cate = dom_html.xpath('//div[@class="catalog_a"]//dl')
all = []
for dom in doms_cate:
per_1_cate = {}
print(dom.xpath('./a/text()')[2])
per_1_cate['name'] = level_name_pattern.findall(dom.xpath('./a/text()')[2])[0].strip()
per_1_cate['num'] = int(level_num_pattern.findall(dom.xpath('./a/text()')[2])[0])
per_1_cate['url'] = dom.xpath('./a/@href')[0]
per_1_cate['name'] = level_name_pattern.findall(dom.xpath('./dt/a/text()')[0])[0].strip()
per_1_cate['num'] = int(level_num_pattern.findall(dom.xpath('./dt/a/text()')[0])[0])
per_1_cate['url'] = dom.xpath('./dt/a/@href')[0]
per_1_cate['level'] = 1
per_1_cate['islast'] = 0
per_1_cate['parent_id'] = 0
doms_2cate = dom.xpath('.//div[@class="child-item"]')
doms_2cate = dom.xpath('.//dd')
per_1_cate['2_list'] = []
for dom_cate_2 in doms_2cate:
per_2_cate = {}
per_2_cate['name'] = level2_name_pattern.findall(dom_cate_2.xpath('.//a/text()')[0])[0].strip()
per_2_cate['num'] = int(level_num_pattern.findall(dom_cate_2.xpath('.//a/text()')[0])[0])
per_2_cate['url'] = dom_cate_2.xpath('.//a/@href')[0]
per_1_cate['level'] = 2
per_1_cate['islast'] = 1
per_2_cate['name'] = level2_name_pattern.findall(dom_cate_2.xpath('./a/text()')[0])[0].strip()
per_2_cate['num'] = int(level_num_pattern.findall(dom_cate_2.xpath('./a/text()')[0])[0])
per_2_cate['url'] = dom_cate_2.xpath('./a/@href')[0]
per_2_cate['level'] = 2
per_2_cate['islast'] = 1
per_1_cate['2_list'].append(per_2_cate)
print(per_2_cate)
print(per_1_cate)
all.append(per_1_cate)
#
......
......@@ -7,7 +7,7 @@ import pymysql
COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter',
'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole', 'eb80', 'x258',
'taojindi', 'd17', 'etlong', 'mfqyw', 'pe168', 'qymgc', 'atobo', 'czvv', 'qic', 'mmic']
'taojindi', 'd17', 'etlong', 'mfqyw', 'pe168', 'qymgc', 'atobo', 'czvv', 'qic', 'mmic','x51dzw']
def get_env():
......@@ -22,21 +22,25 @@ ENV = get_env()
HOST_SET = {
'test': '192.168.2.232',
'test234': '192.168.1.234',
'produce': '172.18.137.37'
'produce': '172.18.137.37',
# 'test_szlc': ''
}
UP_SET = {
'test': ('ichunt', 'ichunt'),
'produce': ('bigdata', 'bdYm2yy2mmyzlmlly')
'produce': ('bigdata', 'bdYm2yy2mmyzlmlly'),
'test_szlc': ('szlc', 'szlc#zsyM')
}
def get_mysql_conf(db):
host = HOST_SET[ENV]
up = UP_SET[ENV]
if db in COMPANY_PN_LIST:
db = 'bigdata'
if ENV == 'test':
host = HOST_SET['test234']
up = UP_SET[ENV]
if db in ('szlc',) and ENV=='test':
up=UP_SET['test_szlc']
conf = {
'host': host,
'port': 3306,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment