Commit 9157c712 by lichenggang

add eb80

parent 19bf894c
Showing with 60 additions and 2 deletions
from utils.base import Module_Base
import re
dom_pat = re.compile(r'\<script\>document\.write\(\"(.+)\"\)')
domain = 'http:'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}
a = Module_Base()
url = 'http://dianzi.eb80.com/gongying/'
resp = a.requests.get(url, headers=headers).text
dom = a.etree.HTML(resp)
level1_doms1 = dom.xpath('//div[@class="trade"]//li[position()<25]')
s = dom_pat.findall(resp)
level1_doms2 = [ a.etree.HTML(i) for i in s[:24]]
level1_doms = zip(level1_doms1,level1_doms2)
other_doms = dom.xpath('//div[@class="trade"]//li[position()>=25]')
def get_levels():
levels = []
for level1_dom in level1_doms:
level1 = dict()
level1['cat_name'] = level1_dom[0].xpath('.//strong//a[1]/text()')[0].strip()
level1['url'] = level1_dom[0].xpath('.//strong//a[1]/@href')[0]
level1['level'] = 1
level1['islast'] = 0
level1['parent_id'] = 0
level1['level2_list'] = []
# print('一级分类',level1)
for level2_dom in level1_dom[1].xpath('.//a'):
level2 = {}
level2['cat_name'] = level2_dom.xpath('./text()')[0].strip()
level2['url'] = level2_dom.xpath('./@href')[0]
level2['level'] = 2
level2['islast'] = 1
level1['level2_list'].append(level2)
levels.append(level1)
other_level1 = dict()
other_level1['cat_name'] = '其他'
other_level1['url'] = ''
other_level1['level'] = 1
other_level1['islast'] = 0
other_level1['parent_id'] = 0
other_level1['level2_list'] = []
for level2_dom in other_doms:
level2 = {}
level2['cat_name'] = level2_dom.xpath('.//strong//a[1]/text()')[0].strip()
level2['url'] = level2_dom.xpath('.//strong//a[1]/@href')[0]
level2['level'] = 2
level2['islast'] = 1
other_level1['level2_list'].append(level2)
levels.append(other_level1)
return levels
levels = get_levels()
print(levels)
...@@ -5,7 +5,7 @@ import time ...@@ -5,7 +5,7 @@ import time
import pymysql import pymysql
COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter', 'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole'] COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter', 'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole', 'eb80']
def get_env(): def get_env():
...@@ -18,7 +18,7 @@ def get_env(): ...@@ -18,7 +18,7 @@ def get_env():
ENV = get_env() ENV = get_env()
HOST_SET = { HOST_SET = {
'test': '192.168.1.232', 'test': '192.168.1.234',
'test234': '192.168.1.234', 'test234': '192.168.1.234',
'produce': '172.18.137.37' 'produce': '172.18.137.37'
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment