Commit 263fcee5 by lichenggang

add testmart

parent 9157c712
from utils.base import Module_Base
import re
dom_pat = re.compile(r'\<script\>document\.write\(\"(.+)\"\)')
domain = 'http:'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}
a = Module_Base()
url1 = 'https://www.testmart.cn/productlist/sortpage/cat/111.html'
url2 = 'https://www.testmart.cn/productlist/sortpage/cat/112.html'
resp1 = a.requests.get(url1, headers=headers).text
resp2 = a.requests.get(url2, headers=headers).text
dom1 = a.etree.HTML(resp1)
dom2 = a.etree.HTML(resp2)
level1_doms_1_1 = dom1.xpath('//div[@class="left-content"]//p')
level1_doms_1_2 = dom1.xpath('//div[@class="left-content"]//ul[@class="clearfix mb"]')
level1_doms_2_1 = dom2.xpath('//div[@class="left-content"]//p')
level1_doms_2_2 = dom2.xpath('//div[@class="left-content"]//ul[@class="clearfix mb"]')
level1_doms_1 = zip(level1_doms_1_1,level1_doms_1_2)
level1_doms_2 = zip(level1_doms_2_1,level1_doms_2_2)
def get_levels(level1_doms):
levels = []
for level1_dom in level1_doms:
level1 = dict()
level1['cat_name'] = level1_dom[0].xpath('.//a[1]/text()')[0].strip()
level1['url'] = level1_dom[0].xpath('.//a[1]/@href')[0]
level1['level'] = 1
level1['islast'] = 0
level1['parent_id'] = 0
level1['level2_list'] = []
# print('一级分类',level1)
for level2_dom in level1_dom[1].xpath('.//a'):
level2 = {}
level2['cat_name'] = level2_dom.xpath('./text()')[0].strip()
level2['url'] = level2_dom.xpath('./@href')[0]
level2['level'] = 2
level2['islast'] = 1
level1['level2_list'].append(level2)
levels.append(level1)
return levels
levels1 = get_levels(level1_doms_1)
levels2 = get_levels(level1_doms_2)
# print(levels1)
# print(levels2)
levels = levels1+levels2
print(levels)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment