Commit ca8b590a by lichenggang

add x912688

parent 2f3c1618
Showing with 29 additions and 16 deletions
from utils.base import Module_Base from utils.base import Module_Base
headers={ headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
} }
...@@ -6,27 +7,38 @@ a = Module_Base() ...@@ -6,27 +7,38 @@ a = Module_Base()
url = 'http://www.912688.com/' url = 'http://www.912688.com/'
resp = a.requests.get(url,headers=headers) resp = a.requests.get(url,headers=headers)
resp.encoding='utf-8' resp.encoding='utf-8'
resp = resp.text resp=resp.text
dom = a.etree.HTML(resp) dom = a.etree.HTML(resp)
level1_doms=dom.xpath('//div[@id="kx_allsort"]/div//dl') level1_doms=dom.xpath('//div[@class="fore7"]//dl[@class="no-border"]//dd//a')
levels=[] def get_resps(level1):
for level1_dom in level1_doms: text = a.requests.get(level1['url'],headers=headers).text
level1=dict() dom = a.etree.HTML(text)
level1['cat_name'] = level1_dom.xpath('.//h2/text()')[0] level2_doms = dom.xpath('//div[@class="list_layout_filter"]//li/a')
level1['url'] = ''
level1['level'] = 1
level1['islast'] = 0
level1['parent_id'] = 0
level1['level2_list'] = []
# print('一级分类',level1)
level2_doms = level1_dom.xpath('.//dd/a')
for level2_dom in level2_doms: for level2_dom in level2_doms:
level2 = {} level2 = {}
level2['cat_name'] = level2_dom.xpath('./text()')[0] level2['cat_name'] = level2_dom.xpath('./@title')[0]
level2['url'] = level2_dom.xpath('./@href')[0] level2['url'] = level2_dom.xpath('./@href')[0]
level2['level'] = 2 level2['level'] = 2
level2['islast'] = 1 level2['islast'] = 1
level1['level2_list'].append(level2) level1['level2_list'].append(level2)
# print('二级分类',level2) # print('二级分类', level2)
levels.append(level1) return level1
def get_levels():
levels = []
for level1_dom in level1_doms:
level1=dict()
level1['cat_name'] = level1_dom.xpath('./text()')[0]
level1['url'] = level1_dom.xpath('./@href')[0]
level1['level'] = 1
level1['islast'] = 0
level1['parent_id'] = 0
level1['level2_list'] = []
# print('一级分类',level1)
level1_ok = get_resps(level1)
levels.append(level1_ok)
return levels
levels = get_levels()
print(levels) print(levels)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment