Commit d77d64f4 by lichenggang

Initial commit

parents
No preview for this file type
No preview for this file type
No preview for this file type
import re, requests
from lxml import etree
from utils.mysqlopera import MySqlOperator
operator = MySqlOperator('chip1stop')
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Host": "www.chip1stop.com",
"Referer": "https://www.chip1stop.com/CHN/zh",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
domain = 'https://www.chip1stop.com'
catelist_url = 'https://www.chip1stop.com/CHN/zh/category'
item_name_pattern = re.compile(r'([^(]*)(')
item_num_pattern = re.compile(r'[^(]*((\d*,*\d*,*\d+))')
var = {
'cat_name': None,
'parent_id': None,
'sort_order': 50,
"is_show": 1,
"url": None,
"islast": None,
"level": None,
"page_count": 0
}
def get_level_1():
resp = requests.get(catelist_url, headers=headers).text
dom = etree.HTML(resp)
# level_1doms = dom.xpath('//ul[@class="m-flex m-flex--c4 m-flex--g40 m-mt categoryUlClick"]//li')
# for li in level_1doms:
# level_1title = li.xpath('.//span[@class="m-text-16 m-font-b m-color-bl m-mt-s"]/text()')[0]
# level_1href = domain + li.xpath('./a/@href')[0]
# print(level_1title,level_1href)
level_doms = dom.xpath('//div[@class="clear box_t10 categoryanchorTop categoryanchorHeight"]//section')
for se in level_doms:
level_1 = {}
level_1['cat_name'] = se.xpath('.//h3/a/text()')[0].strip()
level_1['parent_id'] = 0
level_1['sort_order'] = 50
level_1['is_show'] = 1
level_1['url'] = domain + se.xpath('.//h3/a/@href')[0]
level_1['islast'] = 0
level_1['level'] = 1
level_1['page_count'] = 0
level_1_sql = 'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values' \
'(%s,%s,%s,%s,%s,%s,%s,%s)'
level_1_data = (
level_1['cat_name'], level_1['parent_id'], level_1['sort_order'], level_1['is_show'], level_1['url'],
level_1['islast'], level_1['level'], level_1['page_count'])
with operator.db.cursor() as cursor:
cursor.execute(level_1_sql,level_1_data)
operator.db.commit()
cursor.execute("select max(cat_id) from lie_category")
level_1_incby_id = cursor.fetchone()[0]
print('一级分类'+str(level_1_incby_id))
level_2doms = se.xpath('.//div[@class="categoryListDl clearfix"]')
for level_2dom in level_2doms:
level_2 = {}
level_2['cat_name'] = level_2dom.xpath('.//h4//a')[0].xpath('string(.)').strip()
level_2['parent_id'] = level_1_incby_id
level_2['sort_order'] = 50
level_2['is_show'] = 1
level_2['url'] =domain + level_2dom.xpath('.//h4//a/@href')[0]
level_2['islast'] = 0
level_2['level'] = 2
level_2['page_count'] = 0
level_2_sql = 'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values' \
'(%s,%s,%s,%s,%s,%s,%s,%s)'
level_2_data = (
level_2['cat_name'], level_2['parent_id'], level_2['sort_order'], level_2['is_show'], level_2['url'],
level_2['islast'], level_2['level'], level_2['page_count'])
with operator.db.cursor() as cursor:
cursor.execute(level_2_sql, level_2_data)
operator.db.commit()
cursor.execute("select max(cat_id) from lie_category")
level_2_incby_id =cursor.fetchone()[0]
print('二级分类' + str(level_2_incby_id))
level_3doms = level_2dom.xpath('.//ul//li')
for level_3dom in level_3doms:
level_3 = {}
level_3['cat_name'] = item_name_pattern.findall(level_3dom.xpath('.//a/text()')[0].strip())[0]
level_3['parent_id'] = level_2_incby_id
level_3['sort_order'] = 50
level_3['is_show'] = 1
level_3['url'] = domain + level_3dom.xpath('.//a/@href')[0]
level_3['islast'] = 1
level_3['level'] = 3
level_3['page_count'] = 0
level_3_sql = 'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values' \
'(%s,%s,%s,%s,%s,%s,%s,%s)'
level_3_data = (
level_3['cat_name'], level_3['parent_id'], level_3['sort_order'], level_3['is_show'],
level_3['url'],
level_3['islast'], level_3['level'], level_3['page_count'])
with operator.db.cursor() as cursor:
cursor.execute(level_3_sql, level_3_data)
operator.db.commit()
# level_3num = item_num_pattern.findall(level_3title)[0].replace(',', '')
# print(level_3name,level_3num,level_3href)
if __name__ == '__main__':
get_level_1()
import re, requests
from lxml import etree
from utils.mysqlopera import MySqlOperator
operator = MySqlOperator('chip1stop')
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Host": "www.chip1stop.com",
"Referer": "https://www.chip1stop.com/CHN/zh",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
domain = 'https://www.chip1stop.com'
catelist_url = 'https://www.chip1stop.com/CHN/zh/stock'
item_name_pattern = re.compile(r'([^\[]*)')
item_num_pattern = re.compile(r'\[([0-9,]*)\]')
var = {
'cat_name': None,
'parent_id': None,
'sort_order': 50,
"is_show": 1,
"url": None,
"islast": None,
"level": None,
"page_count": 0
}
def get_level_1():
resp = requests.get(catelist_url, headers=headers).text
dom = etree.HTML(resp)
# level_doms = dom.xpath('//div[@class="clear box_t10 categoryanchorTop categoryanchorHeight"]//section')
level_doms = dom.xpath('//div[@class="m-news-tab-content js-tab-target is-active"]//section[not(@id="a-product-01")]')
for se in level_doms:
level_1 = {}
level_1['cat_name'] = se.xpath('.//h3/a/text()')[0].strip()
level_1['parent_id'] = 0
level_1['sort_order'] = 50
level_1['is_show'] = 1
level_1['url'] = domain + se.xpath('.//h3/a/@href')[0]
level_1['islast'] = 0
level_1['level'] = 1
level_1['page_count'] = 0
level_1_sql = 'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values' \
'(%s,%s,%s,%s,%s,%s,%s,%s)'
level_1_data = (
level_1['cat_name'], level_1['parent_id'], level_1['sort_order'], level_1['is_show'], level_1['url'],
level_1['islast'], level_1['level'], level_1['page_count'])
with operator.db.cursor() as cursor:
cursor.execute(level_1_sql,level_1_data)
operator.db.commit()
cursor.execute("select max(cat_id) from lie_category")
level_1_incby_id = cursor.fetchone()[0]
print('一级分类:'+str(level_1['cat_name']))
level_2doms = se.xpath('.//h4[@class="m-bor-title m-text-16 m-mt"]')
for level_2dom in level_2doms:
level_2 = {}
level_2['cat_name'] = level_2dom.xpath('.//a')[0].xpath('string(.)').strip()
level_2['parent_id'] = level_1_incby_id
level_2['sort_order'] = 50
level_2['is_show'] = 1
level_2['url'] =domain + level_2dom.xpath('.//a/@href')[0]
level_2['islast'] = 0
level_2['level'] = 2
level_2['page_count'] = 0
level_2_sql = 'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values' \
'(%s,%s,%s,%s,%s,%s,%s,%s)'
level_2_data = (
level_2['cat_name'], level_2['parent_id'], level_2['sort_order'], level_2['is_show'], level_2['url'],
level_2['islast'], level_2['level'], level_2['page_count'])
with operator.db.cursor() as cursor:
cursor.execute(level_2_sql, level_2_data)
operator.db.commit()
cursor.execute("select max(cat_id) from lie_category")
level_2_incby_id =cursor.fetchone()[0]
print('二级分类:' + str(level_2['cat_name']))
level_3doms = level_2dom.xpath('./following-sibling::ul[1]//a')
for level_3dom in level_3doms:
level_3 = {}
level_3['cat_name'] = item_name_pattern.findall(level_3dom.xpath('./text()')[0].strip())[0]
level_3['parent_id'] = level_2_incby_id
level_3['sort_order'] = 50
level_3['is_show'] = 1
level_3['url'] = domain + level_3dom.xpath('./@href')[0]
level_3['islast'] = 1
level_3['level'] = 3
level_3['page_count'] = 0
level_3_sql = 'INSERT into lie_category(cat_name,parent_id,sort_order,is_show,url,islast,level,page_count) values' \
'(%s,%s,%s,%s,%s,%s,%s,%s)'
level_3_data = (
level_3['cat_name'], level_3['parent_id'], level_3['sort_order'], level_3['is_show'],
level_3['url'],
level_3['islast'], level_3['level'], level_3['page_count'])
with operator.db.cursor() as cursor:
cursor.execute(level_3_sql, level_3_data)
operator.db.commit()
# level_3num = item_num_pattern.findall(level_3title)[0].replace(',', '')
# print(level_3name,level_3num,level_3href)
print('三级分类:' + level_3['cat_name'])
if __name__ == '__main__':
get_level_1()
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
def get_env():
if sys.platform in ('darwin', 'win32'):
return 'test'
else:
return 'produce'
ENV = get_env()
PLATFORM = {
'ar': 'arrow',
'dg': 'digikey'
}
HOST_SET = {
'test': '192.168.1.232',
'produce': '172.18.137.37'
}
MQ_HOST_SET = {
'test': '192.168.1.237',
'produce': '119.23.79.136'
}
MG_HOST_SET = {
'test': '192.168.1.237',
'produce': '172.18.137.23'
}
ER_HOST_SET = {
'test': '192.168.1.235',
'produce': '172.18.137.38'
}
UP_SET = {
'test': ('ichunt', 'ichunt'),
'produce': ('bigdata', 'bdYm2yy2mmyzlmlly')
}
def get_mysql_conf(db):
host = HOST_SET[ENV]
up = UP_SET[ENV]
conf = {
'host': host,
'port': 3306,
'user': up[0],
'password': up[1],
'db': db,
'charset': 'utf8'
}
return conf
def get_redis_conf():
conf = {
'host': 'localhost',
'port': 6379,
'db': 0,
}
if ENV == 'produce':
conf['password'] = 'icDb29mLy2s'
return conf
def get_er_redis_conf():
conf = {
'host': ER_HOST_SET[ENV],
'port': 6379,
'db': 0,
'password': 'icDb29mLy2s'
}
return conf
def get_mq_conf():
host = MQ_HOST_SET[ENV]
# host = '119.23.79.136'
conf = {
'host': host,
'user': 'huntadmin',
'password': 'jy2y2900'
}
return conf
def get_mongo_conf():
host = MG_HOST_SET[ENV]
conf = {
"host": host,
"port": 27017,
"database": 'ichunt',
"user": 'ichunt',
"password": 'huntmon66499'
}
return conf
all_hosts = {'ali_ml': ('60.205.217.219:', '47.100.98.66:', '119.23.204.20:', '119.23.230.246:'),
'ali_hk': ('47.244.26.18:',),
'ukd_ml': ('106.75.177.159:', '106.75.136.234:', '106.75.222.94:', '106.75.73.84:'),
'ukd_hk': ('103.210.21.90:',)}
strli=[
'运算放大器[20,876]',
'音频放大器[2,159]',
'GPS放大器[5]',
'有线电视放大器[9]',
'射频放大器芯片[7]',
'射频放大器模块[1]',
'视频放大器[12]',
'特殊用途放大器[10]',
'比测器[4,306]',
'开关式稳压器[10,436]',
'交直流变换器[29]',
'线性稳压器[31,034]',
'电流调节器[39]',
'基准电压[6,321]',
'电源MOSFET驱动器[6,172]',
'电源管理[2,160]',
'电压监控器[1,074]',
'监督器[18,487]',
'以太网供电控制器[5]',
'直流至直流控制器[18]',
'热插拔控制器[1]',
'运动电机控制[1]',
'RMS到DC转换器[3]',
'门和电源驱动器[31]',
'PMIC解决方案[3]',
'限流开关[1]',
'USB电源开关[6]',
'电源开关[211]',
'热插拔/配电[2,157]',
'电信林格[1]',
'无线充电[1]',
'其他电源管理IC[14,216]',
'数模转换器[6,640]',
'模数转换器[8,918]',
'其他数据转换器[2,304]',
'时钟缓冲器[1,782]',
'合成器/PLL[2,665]',
'时钟和数据恢复[4]',
'锁相环[30]',
'计时器[9]',
'零延迟缓冲器[35]',
'其他时钟・计时器IC[3,116]',
'实时时钟[1,796]',
'LIN收发器[76]',
'总线收发器[3]',
'DTMF收发器[3]',
'USB收发器[29]',
'GPIO扩展器[3]',
'接口控制器[32]',
'专用接口[3]',
'录音和播放[3]',
'线路接口单元[44]',
'LVDS[245]',
'接口样品样品套件[4]',
'数字隔离器[1,055]',
'传感器和检测器接口[2]',
'触摸屏控制器[7]',
'个UART[14]',
'USB接口IC[3]',
'接口[23,882]',
'其他收发器[2]',
'延时线[394]',
'模拟开关/多路转换器[10,876]',
'总线交换机[2]',
'总线开关[33]',
'数字交叉点[1]',
'模拟前端[4]',
'数字电位器[3,139]',
'I2C[1,202]',
'电动机控制器[3,037]',
'其他模拟IC[8,978]',
'TTL逻辑[1,825]',
'CMOS逻辑[15,071]',
'BiCMOS逻辑[2,021]',
'ECL逻辑[547]',
'专用可编程逻辑[1]',
'逻辑样品套件[32]',
'其他标准逻辑[8,390]',
'计数器移位寄存器[7]',
'解码器和解复用器[28]',
'缓冲区和线路驱动器[158]',
'总线收发器[44]',
'编码器[6]',
'人字拖[4]',
'盖茨[146]',
'逆变器施密特触发器[29]',
'CPLD[1,771]',
'电平转换器[36]',
'多路复用器[31]',
'FPGA[3,322]',
'SPLD[248]',
'微控制器(MCU)[48,756]',
'芯片组[3]',
'DRAM[1,157]',
'SRAM[3,073]',
'FRAM(铁电存储器)[156]',
'MRAM[105]',
'NVRAM[521]',
'EEPROM[9,627]',
'EPROM[303]',
'闪存[3,219]',
'闪速存储器[189]',
'存储器模组[117]',
'配置器件[112]',
'SD卡[36]',
'CF卡[4]',
'FIFO(先进先出)[13]',
'其他存储器[429]',
'达林顿晶体管[253]',
'其他三极管[2,496]',
'GP BJT[18,938]',
'RF BJT[103]',
'达林顿晶体管[1,429]',
'数字晶体管[6,701]',
'IGBT[4,689]',
'IGBT模块[46]',
'JFET(型场效应晶体管)[88]',
'MOSFET[45,706]',
'微处理器(MPU)[2,792]',
'应用处理器[914]',
'数字信号处理机(DSP)[1,463]',
'视频处理器[2]',
'整流二极管[29,046]',
'定电流二极管[83]',
'可变容量二极管[390]',
'肖特基势垒二极管[5,892]',
'开关二极管[2,628]',
'快恢复二极管[74]',
'PIN型二极管[691]',
'射频二极管[1]',
'变容二极管[150]',
'TVS[22,927]',
'静电抑制器[294]',
'齐纳二极管[25,032]',
'其他二极管[2,061]',
'DIAC(双向触发二极管)[2,819]',
'晶闸管浪涌保护器件[3]',
'可控硅[177]',
'三端双向可控硅开关[2,927]',
'无线局域网模块[664]',
'蓝牙[637]',
'802.15.4/Zigbee[313]',
'射频开关[9]',
'天线[1,234]',
'数字衰减器[236]',
'不平衡变压器[226]',
'绝缘体[366]',
'循环器[3]',
'双信号耦合器[4]',
'RF样品套件[2]',
'直流模块[1]',
'射频耦合器[2]',
'读写器[142]',
'RFID标签[257]',
'结合器[11]',
'高频模组[95]',
'其他高频零件[69]',
'终端器[6]',
'RF[3,626]',
'其他射频模块[23]',
'放大器评估和开发工具[364]',
'应用软件开发工具[19]',
'时钟/定时器评估和开发工具[185]',
'数据转换开发板和套件[34]',
'开发套件和工具[56]',
'数据转换器评估和开发工具[510]',
'DSP评估和开发工具[4]',
'嵌入式系统开发板和套件[3]',
'界面评估和开发工具[128]',
'微机评估工具[1,482]',
'其他评估开发工具[6,303]',
'传感器开发板和套件[74]',
'软件开发工具[5]',
'电源管理IC评估和开发工具[2,131]',
'可编程逻辑评估开发工具[133]',
'LCD显示屏开发工具[1]',
'启动器套件[6]',
'开发套件[1,017]',
'传感器IC评估和开发工具[663]',
'加速度传感器IC[361]',
'倾斜传感器IC[232]',
'近接センサIC[1]',
'近接传感器IC[60]',
'压力传感器IC[834]',
'工業用圧力センサIC[4]',
'磁性传感器IC[1,317]',
'磁気センサIC[23]',
'温度传感器IC[154]',
'温度传感器IC[2,875]',
'温湿度センサIC[104]',
'流量传感器IC[10]',
'霍尔效应传感器IC[10]',
'光电传感器IC[19]',
'图形传感器IC[317]',
'産業用カメラIC[6]',
'颜色传感器IC[11]',
'光敏微型传感器IC[5]',
'空气质量传感器IC[1]',
'电流传感器IC[259]',
'环境光传感器IC[63]',
'陀螺仪传感器IC[62]',
'循环编码器IC[137]',
'三维运动传感器IC[2]',
'その他センサIC[16]',
'其他传感器IC[725]',
'其他通信产品[4]',
'其他外设[1]',
'其他半导体[11,322]',
'SLIC[2]',
'ATM UNI[1]',
'音频编解码器[2]',
'编解码器[6]',
'视频编解码器[1]',
'以太网控制器[11]',
'以太网收发器[14]',
'调制解调器芯片[2]',
'控制器区域网络[2]',
'ISDN控制器[3]',
'ISDN ST U接口[4]',
'PHY[1]',
'射频收发器[13]',
'ADSL驱动器和接收器[1]',
'I/O控制器[42]',
'桥式整流器[14]',
'整流器[8]',
'CCFL逆变器[6]',
'PCI桥接芯片[6]',
'应用处理器和SOC[11]',
'GPS接收器[2]',
'IrDA收发器[8]',
'IGBT和MOSFET栅极驱动光电耦合器[749]',
'光电探测器[3]',
'激光二极管[1]',
'光电耦合器[10,548]',
'光电二极管[479]',
'光电断路器[1,134]',
'光电三极管[914]',
'其他光半导体[1,768]',
'LED驱动IC[3,648]',
'LED透镜/反光[1,432]',
'LED模块[18,245]',
'LED用附件[3,161]',
'LED[10,343]',
'红外发射器[1]',
'光学镜头[22]',
'灯[363]',
'照明器材[1]',
'显示驱动[1]',
'LCD字符模块[334]',
'LED显示器分段[1]',
'OLED模块[3]',
'STN图形模块[3]',
'显示屏用附件[14]',
'TFT模块[9]',
'LCD驱动[709]',
'液晶显示器[422]',
'触摸屏[28]',
'LED显示屏[1,781]',
'有机EL显示屏[15]',
'控制面板[336]',
'触摸面板[3]',
'光纤收发器[1]',
'陶瓷电容器[107,874]',
'馈通式电容器[424]',
'薄膜电容器[24,782]',
'铝电解电容器[71,842]',
'电容阵列[353]',
'双电层电容器[1,122]',
'云母电容器[1,391]',
'氧化铌电容器[282]',
'硅电容器[562]',
'钽电解电容器[24,915]',
'电容器RC网络[7]',
'微调电容器[188]',
'电容器铝聚合物[7]',
'电容器陶瓷单层[1]',
'电容器钽聚合物[286]',
'噪声抑制电容器[29]',
'电容样本套件[142]',
'电容器配件[11]',
'其他电容[1,396]',
'Lithium Ion Capacitor[1]',
'电阻器固定单表面安装[164,591]',
'电阻固定单通孔[5,890]',
'其他电阻器[11,175]',
'网络电阻器[10,410]',
'金属皮膜电阻器[35,022]',
'薄膜网络电阻器[45]',
'金属氧化皮膜电阻器[2,878]',
'旁路电阻/电流监测电阻器[3,296]',
'保险丝电阻器[28]',
'薄膜电阻器[9]',
'碳素皮膜电阻器[2,880]',
'跳线电阻[34]',
'铂电阻[675]',
'金属陶瓷电阻[12,236]',
'卷线电阻[4,788]',
'可变电阻器(容量)[2,396]',
'热敏电阻[1]',
'电阻配件[1]',
'电阻器样本套件[165]',
'感应器[59,736]',
'电感器引线[1,017]',
'电源扼流线圈[5,732]',
'音频变压器[21]',
'开关变压器[98]',
'电信变压器[1]',
'脉冲变压器[29]',
'射频变压器[2]',
'变压器[5,596]',
'变形金刚其他[112]',
'电流变压器[6]',
'感应器样本套件[85]',
'无线充电线圈[125]',
'共模扼流器[5,298]',
'非晶磁芯[19]',
'非晶扼流圈[15]',
'NTC热敏电阻[4,083]',
'PTC热敏电阻[1,211]',
'3端子电容[23]',
'3端子滤波器[468]',
'EMC对策表[175]',
'EMI滤波器[2,316]',
'静电保护器件[4,505]',
'浪涌保护器[380]',
'芯片保险丝[4,494]',
'无保险丝断路器 [2]',
'常模滤波线圈[65]',
'保险丝支架[656]',
'夹扣式磁环[136]',
'电磁开闭器[508]',
'电磁接触器[59]',
'漏电断路器[38]',
'滤波器样本套件[1]',
'其他保护器件[64]',
'其他滤波器[984]',
'铁氧体磁头[6,384]',
'铁氧体配件[8]',
'铁素体[1,943]',
'变阻器[7,748]',
'其他回路保护产品[7,127]',
'PTC自恢复保险丝[106]',
'继电器[2,770]',
'保险丝[23,681]',
'石英滤波器[12]',
'SAW滤波器[380]',
'射频滤波器[5]',
'影音/通信用过滤[460]',
'水晶振子[10,193]',
'陶瓷振子[440]',
'弹性表面波(SAW)振子[8]',
'其他振子/振子[457]',
'MEMS/晶体振荡器[99,082]',
'MEMS振荡器[10,559]',
'可编程振动器[138]',
'温度补偿型水晶振动器(TCXO)[143]',
'电压控制水晶振动器(VCXO)[705]',
'其他水晶振动器[398]',
'实时时钟模块[144]',
'其他样品套件[2]',
'其他电子元器件[12,426]',
'线性开关电源[3,296]',
'DC-DC转换器[26,591]',
'交直流变换器[3,732]',
'变频器[105]',
'电源线路滤波器[4,761]',
'可编程电源[54]',
'高电压电源[51]',
'电源模块[50]',
'LED驱动电源[2,710]',
'UPS·不停电电源装置[134]',
'UPS配件[2]',
'太阳能电池板[35]',
'电池座/压铁/触点[1,150]',
'交流适配器[2,604]',
'多输出半定制电源[12]',
'电源分接头/电源板[2]',
'电源管理器[11]',
'电源用附件[204]',
'其他电源[166]',
'触动开关[5,036]',
'按钮开关[12,225]',
'滑动开关[2,673]',
'拨动开关[12,498]',
'旋转开关[2,879]',
'摇臂开关[5,346]',
'双列开关[4,529]',
'编码器[342]',
'键盘开关[74]',
'键锁式开关[3,906]',
'多方向操作开关[41]',
'微动开关[3,882]',
'开关选择器[1]',
'触发式开关[5]',
'簧片开关[362]',
'限位开关[3,154]',
'杠杆开关[15]',
'检测开关[269]',
'电源开关[229]',
'显示灯开关[654]',
'开关样品盒[2]',
'其他开关[4,180]',
'开关附件[4,956]',
'电源用继电器[4,682]',
'其他继电器[5,122]',
'MOSFET继电器[335]',
'固态继电器(SSR)[7,136]',
'终端继电器[9]',
'功率继电器[8,452]',
'光电继电器[184]',
'振簧继电器[1,065]',
'继电器附件[981]',
'信号用继电器[8,183]',
'直流伺服马达[33]',
'无刷电机[1]',
'交流伺服马达[20]',
'步进马达[107]',
'伺服电机[4]',
'其他马达[101]',
'接头[472]',
'电磁阀[12]',
'稳压器[114]',
'其他空压机器[62]',
'电涌放电器 [1,603]',
'过载保护器[550]',
'电火花消灭器[23]',
'音频指示器和警报[1]',
'扩音器[523]',
'麦克风[1]',
'话筒[262]',
'压电制动器[2]',
'压电发声器[504]',
'压电振动板[24]',
'其他压电设备[3]',
'蜂鸣器[456]',
'PLC[187]',
'计时器[19]',
'计时器/定时开关[189]',
'可编程继电器[4]',
'温度调节器[465]',
'指示计[16]',
'指示调节仪[3]',
'信号转换器[1]',
'加速度传感器[157]',
'加速度传感器IC[1]',
'板上压力传感器[412]',
'倾斜传感器[716]',
'近接传感器[1,660]',
'压力传感器[2,781]',
'压力传感器IC[95]',
'振动传感器[18]',
'视觉传感器[2]',
'磁性传感器[451]',
'磁性传感器IC[18]',
'温度传感器[440]',
'温度传感器[631]',
'流量传感器[187]',
'霍尔效应传感器[30]',
'光电传感器[784]',
'图形传感器[50]',
'颜色传感器[1]',
'RF传感器[5]',
'陀螺仪传感器[2]',
'光敏微型传感器[433]',
'超声波传感器[9]',
'电流传感器[442]',
'专用传感器[2]',
'温控器[93]',
'陀螺仪传感器[17]',
'应变计[26]',
'循环编码器[754]',
'太阳能电池[4]',
'三维运动传感器[2]',
'气体传感器[7]',
'其他传感器[1,311]',
'其他传感器IC[118]',
'开关集线器[65]',
'交流风扇马达[1,000]',
'直流风扇马达[3,361]',
'吹风机[3,409]',
'风扇马达附属品[403]',
'吸热器[4,674]',
'热对策薄片・带子・粘着剂[415]',
'热管理配件[4]',
'LED显示灯[3,396]',
'霓虹显示灯[94]',
'信号灯[1,587]',
'支架[49]',
'橡胶垫[61]',
'操作箱[7]',
'保险杠和调平元件[1]',
'塑料箱[9]',
'金属箱[275]',
'树脂箱[377]',
'其他箱子・盒子[7,062]',
'卡式边缘连接器[1,265]',
'光纤接插件[1,252]',
'FFC/FPC接插件[8,911]',
'HDMI连接器[61]',
'USB连接器[1,721]',
'LED阵列支架[11]',
'底板连接器[4,469]',
'DIN接插件[551]',
'D-SUB接插件[32,591]',
'方形接插件[13,222]',
'FPGA/CPLD评价插座[1,749]',
'电线对电线/电路板接插件[14,812]',
'卡座连接器[2,476]',
'其他接插件[131,558]',
'‎模块化连接器[5,892]',
'Inter连接器[8]',
'线对板连接器[61,174]',
'电路板对电路板接插件[4,709]',
'圆形接插件[1,712,549]',
'安全标准电源连接器[11]',
'电力用连接器[9,276]',
'连接器存储卡[45]',
'连接器RF[9]',
'连接器电源[4]',
'连接器SCSI[37]',
'连接器离散电线外壳[1,295]',
'连接器头和PCB插座[4,782]',
'IC插座[3,602]',
'IC插座附件[12]',
'音频/录像连接器[4,137]',
'接插件用工具[1,660]',
'接插件附件[36,178]',
'中转箱[1,853]',
'压接端子[861]',
'同轴接插件[9,651]',
'后壳[1]',
'连接器插座[27]',
'连接器接口[45]',
'端子台/终端[38,821]',
'其他端子[12,411]',
'衬套[3]',
'轴衬[44]',
'管道[1]',
'线束配件[2,059]',
'插座[79]',
'带标识配件[269]',
'插头[214]',
'电缆衬垫[279]',
'保护软管[1,285]',
'电缆线套管[108]',
'分接头[11]',
'乙烯导线[1]',
'夹子/插座/终端[937]',
'其他配线机材[15,297]',
'同轴电缆[887]',
'电缆多芯电缆[3]',
'耐热电缆[18]',
'网络缆线[6]',
'机械手电缆[2,760]',
'乙烯电缆[254]',
'扁平电缆[1,861]',
'单芯电缆[4,610]',
'其他电线/电缆[25,083]',
'同轴连接器线束[11]',
'光纤线束[14]',
'电缆组件其他[102]',
'电缆标记[5]',
'电缆附件[42]',
'仪表电缆[1]',
'圆形连接线束[83]',
'D-sub连接线束[3]',
'MDR连接线束[22]',
'热收缩软管[4,901]',
'电源线[50]',
'AC inlet线束[2,288]',
'电路板制造用品[29]',
'电路板制造用治具工具[1]',
'转换电路板[81]',
'通用电路板[442]',
'电路板附件[1,556]',
'超硬铣刀[170]',
'高速钢切刀[81]',
'滚花[95]',
'高速钢铣刀[343]',
'铣床用螺攻・倒角工具[74]',
'修补刀具[91]',
'镶刃刀具[377]',
'钻头盒[2]',
'超硬钻头[234]',
'电钻用钻头[234]',
'高速钢锥柄麻花钻[314]',
'超硬涂层钻[533]',
'高速钢涂层钻[368]',
'钻头组件[27]',
'混凝土钻头[597]',
'无心钻头[338]',
'高速钢直柄钻头[944]',
'电磁钻床切刀[108]',
'钻孔刀[497]',
'阶梯钻头[42]',
'木工用钻头・切刀[421]',
'中心钻[55]',
'倒角工具[388]',
'埋头钻[71]',
'拉丝模[50]',
'手用丝锥[90]',
'嵌入螺丝[113]',
'管用丝攻[12]',
'螺尖丝锥[142]',
'钻孔器[56]',
'无槽丝攻[7]',
'螺旋槽丝锥[93]',
'轴承[1,054]',
'钢珠螺栓[2]',
'链条[411]',
'轴承组件[72]',
'控制机器[4]',
'凸轮随动件・辊轴随动件[63]',
'机器按键[220]',
'轴环[284]',
'链轮齿[379]',
'显示灯[127]',
'连接器[231]',
'马达[19]',
'调节器[119]',
'铝框架[103]',
'把手[14]',
'把手・手柄[178]',
'门把[232]',
'油位表[180]',
'O环·油封[402]',
'活塞[4]',
'防振材[68]',
'冷却装置[120]',
'治具工具[1,071]',
'机用平口钳[2]',
'弹簧[135]',
'规格板[5]',
'钻孔机用工具[26]',
'车床・铣床用工具[1]',
'切削加工工具[39]',
'磁块[11]',
'电磁夹盘・电磁支架[106]',
'磁选机[5]',
'磁力机械手[28]',
'磁性托架[42]',
'磁性起重机[4]',
'着磁器・脱磁器[3]',
'磁性表座[4]',
'切断机[3]',
'气动砂带机[1]',
'气枪[1]',
'气动冲击扳手[2]',
'气动棘轮扳手[1]',
'气动锯[8]',
'空气软管[17]',
'空气干燥机・排水排出器[3]',
'空气过滤器・稳压器[17]',
'空气管[58]',
'空气阀门[77]',
'管子接头[54]',
'注油器[6]',
'微型阀门[115]',
'空气组件[131]',
'单触接头[34]',
'消音器[24]',
'速度控制器[53]',
'软管接头[517]',
'环境改善用品用零件[1]',
'手工作业工具用零件[6]',
'空压工具用零件[2]',
'保护具用零件[1]',
'发电机[2]',
'园艺用品[8]',
'焊接用品[1]',
'薄片(涂装・装潢)[43]',
'抹刀・刮刀[37]',
'刷毛[8]',
'缝褶机[2]',
'化学泵[4]',
'压接工具[13]',
'切断磨石[1]',
'金刚石切刀[1]',
'手工锯条[18]',
'曲线锯[16]',
'替换锯刀片[78]',
'研磨布纸[10]',
'带轴刷子[2]',
'金刚石砂纸[10]',
'手刷[160]',
'砂纸[33]',
'磨石[11]',
'薄片研磨材[9]',
'电动钻机[24]',
'卷线盘[1]',
'电动钻孔机[36]',
'电动螺丝起子[4]',
'磨削工具[1]',
'冲击起子[103]',
'冲击扳手[37]',
'电动工具・用品 附件[13]',
'往复锯・锯管机[3]',
'管螺纹机[64]',
'超声波切刀[1]',
'作业灯[32]',
'压接工具[158]',
'线槽剪[1]',
'扩口工具[7]',
'管内检査用品[52]',
'切管机[54]',
'管绞刀[6]',
'管道扳手[58]',
'弯管器[29]',
'截管器[56]',
'水泵手钳[6]',
'电线脱皮钳[69]',
'通线工具[3]',
'防爆构造接头[26]',
'绝缘工具[237]',
'棘轮扳手[80]',
'双头梅花扳手[289]',
'六角棒扳手[760]',
'活动扳手[14]',
'组合扳手[164]',
'扭矩扳手[154]',
'内六角扳手[74]',
'Ť型扳手[29]',
'夹钳[32]',
'尖嘴钳[62]',
'扳手[251]',
'剪线钳[250]',
'老虎钳[114]',
'拔具[3]',
'打孔机[121]',
'卡环钳[15]',
'切刀・剪刀[281]',
'螺丝刀[1,509]',
'螺丝刀刀头[776]',
'拔出工具[38]',
'手摇钻[18]',
'扁嘴钳[2]',
'手钳[457]',
'榔头[132]',
'注油器[1]',
'断线钳[3]',
'雕刻刀[79]',
'夹具[270]',
'套筒扳手[1,937]',
'刻线工具[13]',
'金属片用工具[1]',
'刻印[3]',
'车辆整备用工具[6]',
'铝盒・箱[7]',
'零件盒[40]',
'树脂制工具箱[8]',
'钢制工具箱[2]',
'工具固定器・袋[74]',
'工具套件[76]',
'管子[78]',
'配管支持固定件[1]',
'旋入管接头[2]',
'小径配管接头[7]',
'阀门[114]',
'电线[3]',
'电线保护资材[159]',
'断路器[1]',
'光缆资材[19]',
'配电板・框体[47]',
'锚栓[312]',
'盲孔螺母[111]',
'铆钉[797]',
'螺丝・螺母[682]',
'垫圈[1]',
'紧固件[167]',
'开口销[2]',
'弹簧[9]',
'铰链[48]',
'滑轨[70]',
'链锁[1]',
'把手[411]',
'支索[3]',
'钢索[437]',
'焊接器[450]',
'烙鐵頭[387]',
'焊锡[340]',
'焊锡用品[587]',
'除尘器・急冷剂・去垢剂[22]',
'器件・焊锡去除用品[141]',
'电路板・制作工具[9]',
'热加工机[5]',
'镊子[418]',
'消毒・清洗剂[74]',
'干燥机器[47]',
'食品检査用品[7]',
'捕虫器・杀虫器[1]',
'厨房器具[90]',
'硬毛刷・刷子[11]',
'食品用棉纱头[1]',
'药品保管库[8]',
'大口杯ー・容器[17]',
'绵棒[19]',
'蒸馏・精制装置[26]',
'冷冻・冷藏机器[2]',
'加热机器[16]',
'恒温器・干燥器[8]',
'实验用器具[1,004]',
'显微鏡[226]',
'吸液管[122]',
'泵[30]',
'研究用设备[9]',
'搅拌・振荡机器[31]',
'搬送用软管・接头[8]',
'超声波清洗机[13]',
'刀伤防止手套[15]',
'防灾面具[1]',
'防尘口罩[54]',
'防尘眼睛[9]',
'防毒口罩[47]',
'耐热・耐寒手套[37]',
'长靴[30]',
'保护服[54]',
'雨具[1]',
'安全鞋[63]',
'作业用手套[65]',
'作业鞋用品[2]',
'标示板[23]',
'保护薄片[2]',
'安全镜[2]',
'保安用品[1]',
'送风机[2]',
'空气净化机[2]',
'工厂扇[6]',
'防中暑对策用品[31]',
'警报器[2]',
'紧急用标识[2]',
'倾倒防止用品[23]',
'防盗用照明灯[3]',
'指示胶带[239]',
'反光带[1]',
'安全指示胶带[2]',
'双面胶带[26]',
'绝缘带[38]',
'胶纸带[10]',
'打包用带[2]',
'特殊用途用带[1]',
'标签机・打印机[4]',
'袋[4]',
'带子切刀[5]',
'扎带[33]',
'橡胶带[148]',
'按照用途分类的轮脚[1]',
'开关盒四轮货车[6]',
'零件管理(拉出类型)[1]',
'零件管理(架子类型)[17]',
'小型作业台[1]',
'其他产品[96]',
'模拟万用表[17]',
'数字万用表[116]',
'钳型电表[60]',
'数据记录器[44]',
'接地电阻计[1]',
'检电器[20]',
'绝缘电阻计[50]',
'LCR测试仪[7]',
'逻辑分析仪[7]',
'示波器[35]',
'检相器[8]',
'信号发生器[12]',
'光谱分析[14]',
'直流稳定化电源[5,852]',
'校正器[3]',
'其他电气计测器[79]',
'附属品[510]',
'导电率计[11]',
'照度计[9]',
'水分计[37]',
'噪音计[1]',
'流量计[18]',
'气体测量器・检测器[10]',
'硬度计[6]',
'PH计[61]',
'压力计[265]',
'折射计[3]',
'振动计・旋转计[16]',
'风速计[7]',
'其他环境测量器[439]',
'数码温度计[5]',
'双金属式温度计[4]',
'放射温度计[25]',
'记录计[22]',
'热电对[48]',
'温湿度指示器[102]',
'温湿度计[45]',
'温度计[298]',
'附属品[555]',
'其他温度计[61]',
'测距仪[1]',
'量规[92]',
'凸面[9]',
'深度规[20]',
'千分表・杠杆测试[15]',
'高度规[11]',
'测量用台[10]',
'内径测量器[16]',
'水平器[6]',
'千分尺[83]',
'直尺[42]',
'直角尺[79]',
'测厚仪[9]',
'张力计[21]',
'游标卡尺[96]',
'其他测量工具[231]',
'测量仪器配件[1]',
'秤[150]',
'称重机[1]',
'检查内装[1]',
'通信用测量器[2]',
'光学用品[134]',
'计时器/秒表[156]',
'其他检査・计测机器[474]',
'附属品[1]',
'静电防止手套[56]',
'静电防止粘着带子・橡胶带[55]',
'静电防止鞋・靴・凉鞋[103]',
'静电防止袋/薄片/托盘[272]',
'除电刷[45]',
'导电性垫子[72]',
'离子发生器(除电器)[16]',
'静电防止手环・脚带・接地线[105]',
'静电气测量器[11]',
'防静电零件开关盒[12]',
'火花制止器[1]',
'防静电零件盒[12]',
'静电气对策椅子[6]',
'静电气对策搬运机器[6]',
'电路板固定器[5]',
'其他静电气对策品[149]',
'洁净棚[35]',
'洁净室用器具[309]',
'防尘粘着带子・标签/洁净纸[45]',
'防尘衣[188]',
'防尘手套[164]',
'防尘罩子・盖子・口罩[43]',
'防尘帽[8]',
'防尘鞋[72]',
'防尘织布(抹布)/粘着清洁器/绵棒[84]',
'洁净室用用具[148]',
'洁净室用事务用品[493]',
'其他洁净产品[161]',
'导电胶带/薄片[1]',
'衬垫[128]',
'接地线噪声滤波器[8]',
'其他EMC对策品[34]',
'防止静电・带电剂[1]',
'粘着剂[86]',
'被膜剂[5]',
'清洗剂[30]',
'探伤・检査剂[5]',
'切削油剂[1]',
'润滑油・胶[15]',
'润滑剂[28]',
'剥离剂[3]',
'修补剂[11]',
'防锈剂[4]',
'密封剂[10]',
'其他药剂/化学产品[70]',
'通信机器/扩音器[3]',
'缓冲材/屏风[3]',
'标识/标签[1,817]',
'安全帽[2]',
'其他防灾/防盗/安全用品[269]',
'清洗机器[79]',
'动植物实验关连用品[138]',
'粉碎・粉体机器[462]',
'培养机器[254]',
'容器[1,669]',
'玻璃吸管/吸液管[965]',
'烧瓶/大口杯[723]',
'药品库/除湿器[370]',
'接头・接插件[364]',
'加热・冷却机器[230]',
'恒温器/干燥器[109]',
'泵/软管[201]',
'减菌・消毒机器[163]',
'各种素材・薄片・垫圈[121]',
'搅拌机器[442]',
'纯水机器[91]',
'其他研究机器[719]',
'手推车/搬运车[138]',
'天线[157]',
'开关盒[32]',
'轮脚[2]',
'工具车[71]',
'实验台[9]',
'其他收纳・保管・搬运用品[630]',
'清扫用品[615]',
'土木建筑用品[5]',
'PC外围设备[1,109]',
'办公用品[112]',
'喷雾器[24]',
'手套[502]',
'带子/纸产品[680]',
'口罩/护目镜[141]',
'打包机器/打包用品[1]',
'今治毛巾[6]',
'泵/空压机器[232]',
'螺丝/固定件[4,998]',
'鞋子/拖鞋/凉鞋[26]',
'防振资材[1]',
'晶片载体・特殊精密工具[288]',
'其他作业用品[2,212]',
'作业服[180]',
'作业台/梯子/活梯[11]',
'垫圈和孔眼[4]',
'单片机[412]',
'系统模块 - SOM[8]',
'CPU解决方案平台[34]',
'载卡和载板[7]',
'扩展板[315]',
'外壳[94]',
'相机模块[47]',
'显示模块[201]',
'单片机配件[168]',
'开发套件[162]',
'集线器和交换机[1]',
'台式电脑[11]',
'笔记本电脑[7]',
'操纵杆[3]',
'打印机[30]',
'投影机[1]',
'电脑显示器[269]',
'转换适配器[30]',
'固态硬盘[15]',
'USB闪存驱动器[2]',
]
import re
p=re.compile(r'\[([0-9,]*)\]')
a=0
for i in strli:
b=int(p.findall(i)[0].replace(',',''))
# print(b)
a+=b
print(a)
https://www.chip1stop.com/CHN/zh/view/searchResult/SearchResultTop?classCd=010234&classLv=3&dispAllFlg=true
https://www.chip1stop.com/CHN/zh/view/searchResult/SearchResultWithClassCd?classCd=010101&classLv=3&subWinSearchFlg=false&searchType=2&dispAllFlg=true&searchFlg=false&direct_Flg=true
\ No newline at end of file
import json
import re
import time
from lxml import etree
from selenium import webdriver
from multiprocessing import Pool
import redis, os, traceback
from selenium.webdriver import DesiredCapabilities
from chip1stop.config import get_redis_conf, ENV
from chip1stop.mylogger import Mylogger
from chip1stop.rabbit_conn import producer
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('blink-settings=imagesEnabled=false')
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-images')
domain = 'https://www.chip1stop.com'
partid_pattern = re.compile('partId=(.*)')
pid_driver_map = dict()
logger = Mylogger('error.log').logger
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0") #设置user-agent请求头
dcap["phantomjs.page.settings.loadImages"] = False #禁止加载图片
def gen_table_doms(url, cat_id):
pid = os.getpid()
if pid in pid_driver_map:
driver = pid_driver_map.get(pid)
else:
if ENV == 'test':
driver = webdriver.Chrome(options=chrome_options, executable_path='C:/Users/ICHUNT/Desktop/chromedriver.exe')
else:
driver = webdriver.PhantomJS(desired_capabilities=dcap)
pid_driver_map[pid] = driver
for page in range(1, 10000):
try:
if page == 1:
driver.get(url)
time.sleep(5)
else:
next_btn = driver.find_elements_by_xpath('//a[@id="searchFormForDisp:j_idt1211:nextPage"]')
if next_btn:
next_btn[0].click()
time.sleep(5)
else:
break
text = driver.page_source
if text.find('m-product-detail-spec') != -1:
prod = parse_text1(text)
prod['site_url'] = driver.current_url
prod['goods_name_style'] = partid_pattern.findall(driver.current_url)[0]
prod['goods_sn'] = prod['goods_name'] + '€€' + prod['goods_name_style']
prod['cat_id'] = cat_id
producer.send_task(json.dumps(prod))
print(prod)
else:
li = parse_text(text)
for good in li:
good['cat_id'] = cat_id
producer.send_task(json.dumps(good))
print(good)
except Exception:
err = traceback.format_exc()
logger.error(err[err.rfind('\n', 0, -2):].strip() + ' - page: ' + str(page) + ' - ' + 'url: ' + url)
def parse_text(text):
html_dom = etree.HTML(text)
table_doms = html_dom.xpath('//span[contains(@id,"searchFormForDisp:searchRepeat:")]/table/tbody/tr')
product_list = []
for tr in table_doms:
obj = {}
obj['goods_name'] = tr.xpath('.//span[@class="ellipsis-title"]/text()')[0]
obj['site_url'] = domain + tr.xpath('.//h3/a/@href')[0]
obj['goods_name_style'] = partid_pattern.findall(obj['site_url'])[0]
obj['goods_sn'] = obj['goods_name'] + '€€' + obj['goods_name_style']
obj['provider_name'] = tr.xpath('./td[1]//ul/li[1]/a/span/text()')[0]
provider_url = tr.xpath('./td[1]//ul/li[1]/a/@href')[0]
obj['provider_url'] = domain + provider_url if provider_url.startswith('/') else provider_url
obj['attributes'] = []
obj['goods_img'] = obj['goods_desc'] = obj['pdf_url'] = obj['goods_thumb'] = ''
brief = tr.xpath('.//div[@class="m-product-tbl-main-col__caption"]/p/span')
obj['goods_brief'] = brief[0].text if brief else ''
fm = tr.xpath('./td[2]/div/div/p[2]')
obj['from'] = fm[0].text.strip() if fm else ''
stock = tr.xpath('.//p[@class="m-font-b"][2]')
obj['goods_number'] = int(stock[0].text.replace('库存数:', '').replace(',', '')) if stock else 0
obj['min_buynum'] = tr.xpath('.//td[3]//input[2]/@value')[0] if tr.xpath('.//td[3]//input[2]/@value') else 1
obj['increment'] = tr.xpath('.//td[3]//input[3]/@value')[0] if tr.xpath('.//td[3]//input[3]/@value') else 1
price_doms = tr.xpath('.//td[4]//tbody//tr')
obj['prices'] = []
if price_doms:
for price_dom in price_doms:
per = dict()
per_price = price_dom.xpath('.//td')
per['purchases'] = int(per_price[0].xpath('./text()')[0].replace('+', '').replace(',', ''))
per['price'] = float(per_price[1].xpath('./text()')[0].replace('¥', '').replace(',', '').strip())
obj['prices'].append(per)
product_list.append(obj)
return product_list
def parse_text1(text):
html_dom = etree.HTML(text)
obj = dict()
obj['goods_name'] = html_dom.xpath('//h3[@class="m-text-24 m-font-b"]/text()')[0].strip()
obj['provider_name'] = html_dom.xpath('//ul[@class="m-text-16"]//li[1]//a[1]/text()')[0]
provider_url = html_dom.xpath('//ul[@class="m-text-16"]//li[1]//a[1]/@href')[0]
obj['provider_url'] = domain + provider_url if provider_url.startswith('/') else provider_url
obj['attributes'] = []
obj['goods_img'] = obj['goods_desc'] = obj['pdf_url'] = obj['goods_thumb'] = ''
brief = html_dom.xpath('//div[@class="m-product-detail-spec__r"]//ul[@class="m-text-14"][1]//li[1]/text()')
obj['goods_brief'] = brief[0].replace('产品概述:', '').strip() if brief else ''
prod_from = html_dom.xpath('//div[@class="m-flex__item"]//p[@class="m-text-13"]/text()')
obj['from'] = prod_from[0].strip() if prod_from else ''
stock = html_dom.xpath('//input[@name="zaikoSuu"]/@value')
obj['goods_number'] = int(stock[0].replace(",", "")) if stock else 0
moq_spq = html_dom.xpath(
'//div[@class="m-mt-xs m-flex m-flex--c2 m-flex--g20 m-text-14"]//div[@class="m-flex__item"][1]//ul//li[1]/text()')
if moq_spq:
obj['min_buynum'] = int(moq_spq[0].replace('MOQ/SPQ:', '').split('/')[0])
obj['increment'] = int(moq_spq[0].replace('MOQ/SPQ:', '').split('/')[1])
else:
obj['min_buynum'] = 1
obj['increment'] = 1
price_doms = html_dom.xpath('//table[@class="m-m-product-tbl-cart-tbl"]//tbody//tr')
obj['prices'] = []
if price_doms:
for price_dom in price_doms:
per = dict()
per_price = price_dom.xpath('.//td')
per['purchases'] = int(per_price[0].xpath('./text()')[0].replace('+', '').replace(',', ''))
per['price'] = float(per_price[1].xpath('./text()')[0].replace('¥', '').replace(',', '').strip())
obj['prices'].append(per)
return obj
def get_url():
pool = redis.ConnectionPool(**get_redis_conf())
r = redis.Redis(connection_pool=pool)
for _ in range(350):
cate_url = r.lpop('csc_elec_chip1stop_1')
if cate_url:
data = json.loads(cate_url.decode())
yield (data['req_key'], data['extra']['cat_id'])
else:
break
if __name__ == '__main__':
p = Pool(3)
for req in get_url():
url = req[0]
cat_id = req[1]
# for url in ['https://www.chip1stop.com/CHN/zh/view/searchResult/SearchResultWithClassCd?classCd=010233&classLv=3&dispAllFlg=true&UseSession=false']:
p.apply_async(gen_table_doms, args=(url, cat_id))
# gen_table_doms(url)
p.close()
p.join()
import logging
from logging import handlers
class Mylogger(object):
level_relations = {
'debug':logging.DEBUG,
'info':logging.INFO,
'warning':logging.WARNING,
'error':logging.ERROR,
'crit':logging.CRITICAL
}
def __init__(self,filename,level='info',when='D',backCount=3,fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
self.logger = logging.getLogger(filename)
format_str = logging.Formatter(fmt)
self.logger.setLevel(self.level_relations.get(level))
sh = logging.StreamHandler()
sh.setFormatter(format_str)
th = handlers.TimedRotatingFileHandler(filename=filename,when=when,backupCount=backCount,encoding='utf-8')
th.setFormatter(format_str)
self.logger.addHandler(sh)
self.logger.addHandler(th)
import json
import pika
from chip1stop.config import get_mq_conf
class Base:
def __init__(self, user, pwd, host, exchange=None, exchange_type=None):
credentials = pika.PlainCredentials(user, pwd)
self.conn = pika.BlockingConnection(pika.ConnectionParameters(host, credentials=credentials)) # 连接
self.ch = self.conn.channel() # 频道
self.exchange = exchange
if exchange and exchange_type:
self.ch.exchange_declare(exchange=exchange, exchange_type=exchange_type)
def send_task_fanout(self, body):
if self.exchange:
self.ch.basic_publish(exchange=self.exchange, routing_key='',
properties=pika.BasicProperties(delivery_mode=2),
body=body)
def send_task_direct(self, routing_key, body):
if self.exchange:
self.ch.basic_publish(exchange=self.exchange, routing_key=routing_key,
properties=pika.BasicProperties(delivery_mode=2),
body=body)
def __del__(self):
try:
self.ch.close()
self.conn.close()
except Exception:
pass
class Producer(Base):
def __init__(self, data_queue=None, durable=True, exchange=None, exchange_type=None):
config = get_mq_conf()
super().__init__(config['user'], config['password'],
config['host'], exchange=exchange, exchange_type=exchange_type)
self.data_queue = data_queue
self.durable = durable # 持久化
self.exchange = exchange
if self.data_queue is not None:
self.ch.queue_declare(queue=self.data_queue, durable=self.durable)
def send_task(self, body):
self.ch.basic_publish(exchange='', routing_key=self.data_queue,
properties=pika.BasicProperties(delivery_mode=2), # make message persistent
body=body)
def re_conn_channel(self):
self.ch.close()
self.ch = self.conn.channel()
self.ch.queue_declare(queue=self.data_queue, durable=self.durable)
producer = Producer('chip1stop_new_goods_store')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment