Commit 52b0ecd0 by lichenggang

add china

parent 44d8054e
File mode changed
from utils.base import Module_Base
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}
a = Module_Base()
url = 'https://cn.china.cn/dianzi/'
resp = a.requests.get(url,headers=headers)
resp.encoding='gbk'
text=resp.text
dom = a.etree.HTML(text)
level1_name_doms=dom.xpath('//ul[@class="cate-list"]/li[position()<8]/div[@class="item"]//dt//a')
level1_content_doms=[]
for i in range(1,8):
level1_content1_doms1=dom.xpath('//ul[@class="cate-list"]/li[position()={}]/div[@class="item"]//dd'.format(i))
level1_content1_doms2=dom.xpath('//ul[@class="cate-list"]/li[position()={}]/div[@class="cate-extra"]//dl[not(dt)]//dd'.format(i))
level1_content1_doms = level1_content1_doms1+level1_content1_doms2
level1_content2_doms = dom.xpath('//ul[@class="cate-list"]/li[position()={}]/div[@class="cate-extra"]//dl[dt]//dd'.format(i))
level1_content_doms.append(level1_content1_doms)
level1_content_doms += level1_content2_doms
level1_doms=zip(level1_name_doms,level1_content_doms)
levels=[]
for level1_dom in level1_doms:
level1=dict()
level1['cat_name'] = level1_dom[0].xpath('./text()')[0]
level1['url'] = level1_dom[0].xpath('./@href')[0]
level1['level'] = 1
level1['islast'] = 0
level1['parent_id'] = 0
level1['level2_list'] = []
# print('一级分类',level1)
level2_doms=[]
for i in level1_dom[1]:
level2_doms+=i.xpath('.//a')
for level2_dom in level2_doms:
level2 = {}
level2['cat_name'] = level2_dom.xpath('./text()')[0]
level2['url'] = level2_dom.xpath('./@href')[0]
level2['level'] = 2
level2['islast'] = 1
level1['level2_list'].append(level2)
# print('二级分类',level2)
levels.append(level1)
# print(levels)
\ No newline at end of file
from utils.base import Module_Base
import asyncio
from aiohttp import ClientSession
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
......
......@@ -5,7 +5,7 @@ import time
import pymysql
COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao']
COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china']
def get_env():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment