Commit 757a2f0a by lichenggang

add mmic

parent 6b3e43b5
#!/usr/bin/env python
# -*- coding:utf-8 -*-
\ No newline at end of file
import os
from utils.base import Module_Base
domain='http://www.c-c.com'
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}
a = Module_Base()
# url = 'http://www.mmic.net.cn/'
# resp = a.requests.get(url,headers=headers).text
# dom = a.etree.HTML(resp)
from lxml import etree
print(os.getcwd())
with open('plat/mmic/mmic.html',encoding='utf-8') as f:
text = f.read()
dom_content=etree.HTML(text)
level1_doms1=dom_content.xpath('//h2')
level1_doms2=dom_content.xpath('//ul')
print(len(level1_doms1))
print(len(level1_doms2))
level1_doms = zip(level1_doms1,level1_doms2)
def get_levels():
levels = []
for level1_dom in level1_doms:
level1=dict()
level1['cat_name'] = level1_dom[0].xpath('.//a/text()')[0]
level1['url'] = level1_dom[0].xpath('.//a/@href')[0]
level1['level'] = 1
level1['islast'] = 0 if level1_dom[1].xpath('.//li/a') else 1
level1['parent_id'] = 0
level1['level2_list'] = []
print('一级分类',level1)
for level2_dom in level1_dom[1].xpath('.//li/a'):
level2 = {}
level2['cat_name'] = level2_dom.xpath('./text()')[0]
level2['url'] = domain + level2_dom.xpath('./@href')[0]
level2['level'] = 2
level2['islast'] = 1
level1['level2_list'].append(level2)
levels.append(level1)
return levels
levels = get_levels()
print(levels)
\ No newline at end of file
This diff could not be displayed because it is too large.
...@@ -7,7 +7,7 @@ import pymysql ...@@ -7,7 +7,7 @@ import pymysql
COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter', COMPANY_PN_LIST = ['hc360', 'huangye88', 'pangwo', 'coovee', 'liebiao', 'china', 'ic114', 'cecb2b', 'trustexporter',
'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole', 'eb80', 'x258', 'x11467', 'x912688', 'zgtghccl', 'herostart', 'testmart', 'cc', 'eastsoo', 'x51sole', 'eb80', 'x258',
'taojindi', 'd17', 'etlong', 'mfqyw', 'pe168', 'qymgc', 'atobo', 'czvv', 'qic'] 'taojindi', 'd17', 'etlong', 'mfqyw', 'pe168', 'qymgc', 'atobo', 'czvv', 'qic', 'mmic']
def get_env(): def get_env():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment