Commit 2e0478ed by lzzzzl

添加value task

parents
# Created by .ignore support plugin (hsz.mobi)
### Example user template template
### Example user template
# IntelliJ project files
.idea
*.iml
out
log
gen
/config/db_config.py
# _*_ coding:utf-8 _*_
import sys
import pymysql
from elasticsearch import Elasticsearch
zy_conn = {
'host': '172.18.137.33',
'port': 3306,
'user': 'LxDDUsedRead',
'password': '0o9u0U2oixoYmosflmxXtZmyt',
'db': 'liexin_data',
'charset': 'utf8'
}
local_conn = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': 'qaz',
'db': 'bigdata',
'charset': 'utf8'
}
# SKU
# db-user :LxiCSpuR
# db-pwd :Mysx3Tyzlo00oxlmllyR
# db-host : spu-slave.ichunt.db
local_test = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': 'qaz',
'db': 'test',
'charset': 'utf8'
}
local_mapping_conn = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'password': 'qaz',
'db': 'mapping',
'charset': 'utf8'
}
dashboard_conn = {
'host': 'localhost',
'port': 3306,
'user': 'dashboard',
'password': 'ichunt5Dashboard@',
'db': 'dashboard',
'charset': 'utf8'
}
order_conn = {
'host': '172.18.137.22',
'port': 3306,
'user': 'huntdbslave',
'password': 'mLssy2@@!!@$#yy',
'db': 'hunt2016',
'charset': 'utf8'
}
learning_conn = {
'host': 'localhost',
'port': 3306,
'user': 'lEaUx',
'password': 'Xp#psOlT@!@#lfs^%wl',
'db': 'learning_data',
'charset': 'utf8'
}
bigdata_conn = {
'host': '192.168.2.232',
'port': 3306,
'user': 'bigdata',
'password': 'bigdata2019',
'db': 'digikey',
'charset': 'utf8'
}
online_bigdata_conn = {
'host': '192.168.2.232',
'port': 3306,
'user': 'bigdata',
'password': 'bigdata2019',
'db': 'digikey',
'charset': 'utf8'
}
class_conn = {
'host': '192.168.2.250',
'port': 3306,
'user': 'root',
'password': 'root',
'db': 'liexin_pool_class',
'charset': 'utf8'
}
class_conn_online = {
'host': '192.168.2.250',
'port': 3306,
'user': 'root',
'password': 'root',
'db': 'liexin_pool_class',
'charset': 'utf8'
}
# db-user :LxiCSpuR
# db-pwd :Mysx3Tyzlo00oxlmllyR
# db-host : spu-slave.ichunt.db
spu_conn = {
'host': '192.168.1.235',
'port': 3306,
'user': 'spu',
'password': 'spu',
'db': 'liexin_spu',
'charset': 'utf8'
}
spu_conn_online = {
'host': '192.168.1.235',
'port': 3306,
'user': 'spu',
'password': 'spu',
'db': 'liexin_spu',
'charset': 'utf8'
}
local_learning_conn = {
'host': '192.168.1.235',
'port': 3306,
'user': 'learning_data',
'password': 'learning_data#zsyM',
'db': 'learning_data',
'charset': 'utf8'
}
local_spu = {
'host': '192.168.1.235',
'port': 3306,
'user': 'spu',
'password': 'spu',
'charset': 'utf8'
}
local_es = [{
'host': '192.168.2.232',
'port': 9200
}]
online_es = [{
'host': '192.168.2.232',
'port': 9200
}]
def get_env():
if sys.platform in ('darwin', 'win32'):
return 'test'
else:
return 'produce'
class DBConn:
@staticmethod
def local_conn():
return pymysql.connect(local_conn['host'],
local_conn['user'],
local_conn['password'],
local_conn['db'],
charset=local_conn['charset'])
@staticmethod
def local_test():
return pymysql.connect(local_test['host'],
local_test['user'],
local_test['password'],
local_test['db'],
charset=local_test['charset'])
@staticmethod
def local_mapping_conn():
return pymysql.connect(local_mapping_conn['host'],
local_mapping_conn['user'],
local_mapping_conn['password'],
local_mapping_conn['db'],
charset=local_mapping_conn['charset'])
@staticmethod
def zy_conn():
return pymysql.connect(zy_conn['host'],
zy_conn['user'],
zy_conn['password'],
zy_conn['db'],
charset=zy_conn['charset'])
@staticmethod
def order_conn():
return pymysql.connect(order_conn['host'],
order_conn['user'],
order_conn['password'],
order_conn['db'],
charset=order_conn['charset'])
@staticmethod
def dashboard_conn():
return pymysql.connect(dashboard_conn['host'],
dashboard_conn['user'],
dashboard_conn['password'],
dashboard_conn['db'],
charset=dashboard_conn['charset'])
@staticmethod
def learning_conn():
return pymysql.connect(learning_conn['host'],
learning_conn['user'],
learning_conn['password'],
learning_conn['db'],
charset=learning_conn['charset'])
@staticmethod
def local_es():
return Elasticsearch(local_es)
@staticmethod
def online_es():
return Elasticsearch(online_es)
@staticmethod
def es():
if get_env() == 'test':
return Elasticsearch(local_es)
else:
return Elasticsearch(online_es)
@staticmethod
def db_learning():
if get_env() == 'test':
return pymysql.connect(local_learning_conn['host'],
local_learning_conn['user'],
local_learning_conn['password'],
local_learning_conn['db'],
charset=local_learning_conn['charset'])
else:
return pymysql.connect(learning_conn['host'],
learning_conn['user'],
learning_conn['password'],
learning_conn['db'],
charset=learning_conn['charset'])
@staticmethod
def db_dashboard():
if get_env() == 'test':
return pymysql.connect(local_mapping_conn['host'],
local_mapping_conn['user'],
local_mapping_conn['password'],
local_mapping_conn['db'],
charset=local_mapping_conn['charset'])
else:
return pymysql.connect(dashboard_conn['host'],
dashboard_conn['user'],
dashboard_conn['password'],
dashboard_conn['db'],
charset=dashboard_conn['charset'])
@staticmethod
def db_bigdata():
if get_env() == 'test':
return pymysql.connect(bigdata_conn['host'],
bigdata_conn['user'],
bigdata_conn['password'],
bigdata_conn['db'],
charset=bigdata_conn['charset'])
else:
return pymysql.connect(online_bigdata_conn['host'],
online_bigdata_conn['user'],
online_bigdata_conn['password'],
online_bigdata_conn['db'],
charset=online_bigdata_conn['charset'])
@staticmethod
def db_class():
if get_env() == 'test':
return pymysql.connect(class_conn['host'],
class_conn['user'],
class_conn['password'],
class_conn['db'],
charset=class_conn['charset'])
else:
return pymysql.connect(class_conn_online['host'],
class_conn_online['user'],
class_conn_online['password'],
class_conn_online['db'],
charset=class_conn_online['charset'])
@staticmethod
def db_spu():
if get_env() == 'test':
return pymysql.connect(spu_conn['host'],
spu_conn['user'],
spu_conn['password'],
spu_conn['db'],
charset=spu_conn['charset'])
else:
return pymysql.connect(spu_conn_online['host'],
spu_conn_online['user'],
spu_conn_online['password'],
spu_conn_online['db'],
charset=spu_conn_online['charset'])
res_map = {
'μΩ': 0.000001,
'mΩ': 0.001,
'Ω': 1,
'kΩ': 1000,
'MΩ': 1000000,
'GΩ': 1000000000,
'TΩ': 1000000000000
}
cap_map = {
'pF': 1,
'nF': 1000,
'µF': 1000000,
'mF': 1000000000,
'F': 1000000000000
}
ind_map = {
'pH': 1,
'nH': 1000,
'µH': 1000000,
'mH': 1000000000,
'H' : 1000000000000,
}
cur_map = {
'mA': 1,
'A': 1000
}
vol_map = {
'V': 1,
'kV': 1000
}
pow_map = {
'W': 1,
'kW': 1000
}
acc_map = {
'%': 0.01
}
encap_list = ["0030", "008004", "01005", "015008","0201","02016","0202","0204","03015","03019","0302","0303","0306","0402","0404","0405","0406","0502","0503","0504","0505","0508",
"0602","0603","0604","0605","0606","0612","0704","0707","0709","0803","0804","0805","0806","0815","0830","0905","0909","1003","1005","1006","1007","1008","1010","1012",
"1020","1106","1111","1113","1204","1205","1206","1207","1208","1209","1210","1211","1212","1213","1218","1225","1230","1246","1305","1410","1411","1412","1414","1505","1507",
"1510","1512","1515","1530","1606","1608","1611","1612","1616","1625","1708","1715","1805","1806","1807","1808","1810","1812","1816","1825","1835","1913","1916","1919","2005",
"2008","2010","2015","2016","2018","2020","2023","2043","2211","2214","2215","2218","2220","2225","2304","2312","2323","2325","2410","2414","2416","2420","2423","2424","2512",
"2520","2525","2615","2616","2706","2709","2711","2721","2727","2728","2812","2820","2824","2825","2828","2830","2910","2913","2915","2917","2920","2924","3010","3015","3017",
"3022","3024","3025","3040","3131","3225","3226","3312","3318","3333","3640","3838","3920","3925","3931","4030","4032","4122","4520","4823","4850","5040","5329","5829","5929",
"6028","6030","6031","6039","6054","6560"]
cast_map = {
'毫欧': 'mΩ',
'欧姆': 'Ω',
'千欧': 'KΩ',
'兆欧': 'MΩ',
'最大': ''
}
brand_map = {
'Yageo': 1,
'Murata': 2,
'Samsung': 3,
'TDK': 4,
'Taiyo Yuden': 5,
'Rohm': 6,
'KEMET': 7,
'Vishay': 8,
'AVX': 9,
'Panasonic': 10,
'KOA': 11,
'Bourns': 12,
'TE': 13,
'Wurth': 14,
'Pulse': 15
}
temp_map = ["C0G","NP0","COG","NPO","X7R","X5R","Y5V","X6S","X7S","X7T","SL","U2J","UJ","X7U","X8R","Z5U","C0H","COH","U2K","X6T","X8G","X8L","Y5R","Y5U","ZLM"]
sep_temp_map = ["C0G","NP0","COG","NPO"]
# brand_id -> sort,品牌映射
brand_map = {
# '5148': {'sort': 1, 'brand': 'YAGEO(国巨)'},
# '3249': {'sort': 2, 'brand': 'muRata(村田)'},
# '4076': {'sort': 3, 'brand': 'SAMSUNG(三星)'},
# '3302': {'sort': 4, 'brand': 'TDK(东电化)'},
# '4554': {'sort': 5, 'brand': 'TAIYO YUDEN(太诱)'}, # Taiyo Yuden
# '4555': {'sort': 5, 'brand': 'TAIYO YUDEN(太诱)'},
# '1749': {'sort': 6, 'brand': 'ROHM(罗姆)'},
# '2554': {'sort': 7, 'brand': 'KEMET(基美)'}, # Kemet
# '5347': {'sort': 7, 'brand': 'KEMET(基美)'}, # Kemet
# '25': {'sort': 8, 'brand': 'VISHAY(威世)'},
# '539': {'sort': 9, 'brand': 'AVX(安施)'},
# '418': {'sort': 10, 'brand': 'Panasonic(松下)'},
# '2621': {'sort': 11, 'brand': 'KOA'},
# '703': {'sort': 12, 'brand': 'BOURNS(伯恩斯)'},
# '161': {'sort': 13, 'brand': 'TE(泰科)'},
# '3100': {'sort': 14, 'brand': 'Wurth(伍尔特)'},
# '3810': {'sort': 15, 'brand': 'PULSE(普思)'}
'YAGEO(国巨)': 1,
'muRata(村田)': 2,
'SAMSUNG(三星)': 3,
'TDK(东电化)': 4,
'Taiyo Yuden': 5,
'TAIYO YUDEN(太诱)': 5,
'ROHM(罗姆)': 6,
'Kemet': 7,
'KEMET(基美)': 7,
'VISHAY(威世)': 8,
'AVX(安施)': 9,
'Panasonic(松下)': 10,
'KOA': 11,
'BOURNS(伯恩斯)': 12,
'TE(泰科)': 13,
'Wurth(伍尔特)': 14,
'PULSE(普思)': 15
}
# 参数值 -> 列名,参数映射
param_map = {
'阻值(欧姆)': 'resistance',
'容值': 'capacity',
'电感值': 'inductance',
'额定电压': 'rated_voltage',
'额定电流': 'rated_current',
'功率': 'power',
'直流电阻(内阻)': 'dc_resistance',
'精度': 'accuracy',
'封装': 'encap',
'温漂系数(介质材料)': 'temperature'
}
param_ids = {
'阻值(欧姆)': 'trans_resistance',
'容值': 'trans_capacity',
'电感值': 'trans_inductance',
'额定电压': 'trans_rated_voltage',
'额定电流': 'trans_rated_current',
'功率': 'trans_power',
'直流电阻(内阻)': 'trans_dc_resistance',
'精度': 'trans_accuracy',
'封装': 'trans_encap',
'温漂系数(介质材料)': 'trans_temp'
}
param_unit = {
'阻值(欧姆)': 3,
'容值': 8,
'电感值': 13,
'额定电压': 20,
'额定电流': 18,
'功率': 22,
'直流电阻(内阻)': 3,
'精度': 24,
'封装': 26,
'温漂系数(介质材料)': 26
}
param_list = ['阻值(欧姆)','容值','电感值','额定电压','额定电流','功率','直流电阻(内阻)','精度','封装','温漂系数(介质材料)']
File mode changed
# _*_ coding:utf-8 _*_
import pymysql
import config.db_config as config
class PARAM_CONN:
@staticmethod
def online_sku_conn(index):
return pymysql.connect(config.online_sku['host'],
config.online_sku['user'],
config.online_sku['password'],
"liexin_sku_%d" % index,
charset="utf8")
@staticmethod
def online_spu_conn(index):
return pymysql.connect(config.online_sku['host'],
config.online_sku['user'],
config.online_sku['password'],
"liexin_sku_%d" % index,
charset="utf8")
\ No newline at end of file
# _*_ coding:utf-8 _*_
from utils.utilClass import LazyProperty
from utils.excel_handler import ExcelHandler
from utils.db_handler import DBHandler
from utils.log_handler import LogHandler
import config.zy_config as config
from utils.date_handler import DateHandler
now_ts = DateHandler.now_datetime()
class PARAM_DATA(object):
def __init__(self):
self.log = LogHandler('param_extract')
def class_mapping(self, conn):
"""
cat_id =>
:return:
"""
def get_attr_mapping(class_id):
sql = "SELECT attr_id,attr_name FROM lie_class_attr WHERE class_id = %d" % class_id
return {row[1]: row[0] for row in DBHandler.read(conn, sql)}
sql = "SELECT cat_id,class_id FROM lie_class"
attr_dict = {row[0]: get_attr_mapping(row[1]) for row in DBHandler.read(conn, sql) if get_attr_mapping(row[1])}
return attr_dict
def get_unit_value(self, conn):
sql = "SELECT attr_unit_id,attr_unit_name FROM lie_class_attr_unit"
res = DBHandler.read(conn, sql)
return {row[1]: row[0] for row in res}
def get_class_attr(self, conn):
sql = "SELECT attr_id,class_id,attr_unit_id FROM lie_class_attr"
return DBHandler.read(conn, sql)
def get_class_attr_value(self, attr_id, value, attr_unit_id, conn):
"""
attr_id: {}
:return:
"""
sql = "SELECT attr_value_id FROM lie_class_attr_value WHERE attr_id={attr_id} AND value='{value}'AND attr_unit_id={attr_unit_id}".format(
attr_id=attr_id,
value=value,
attr_unit_id=attr_unit_id)
return DBHandler.read(conn, sql)
def get_dgk_goods(self, i, ids, pams, goods_id, bg_conn):
sql = "SELECT attr_name,attr_value,attr_id,cat_id FROM lie_goods_attr_fields%d WHERE cat_id IN (%s) AND attr_id IN (%s) AND attr_value != '' AND goods_id = %d" \
% (i, ids, pams, goods_id)
data = [{'attr_name': row[0], 'attr_value': row[1], 'attr_id': row[2], 'cat_id': row[3]} for row in
DBHandler.read(bg_conn, sql)]
return data
def insert_dgk_goods(self, attr_id, value, attr_unit_id, conn):
sql = "INSERT INTO lie_class_attr_value (attr_id,value,attr_unit_id,status,remark,add_time,update_time) VALUES ('%s','%s','%s','%s','%s','%s','%s')" % \
(attr_id, value, attr_unit_id, 1, "", now_ts, now_ts)
DBHandler.insert(conn, sql)
sql2 = "SELECT max(attr_value_id) FROM lie_class_attr_value"
max_data = DBHandler.read(conn, sql2)
return max_data[0][0]
def update_spu(self, spu_id, attr_values, spu_conn, log):
index = int(str(spu_id)[-1])
sql = "UPDATE lie_spu_%d SET attr_values='%s' WHERE spu_id=%d" % (index, attr_values, spu_id)
DBHandler.update(spu_conn, sql)
log.info("spu_id: {spu_id}, attr_values: {attr_values}".format(spu_id=spu_id,
attr_values=attr_values))
def load_excel(self, fileName):
data = ExcelHandler.read_to_excel(fileName, 'Sheet1', 1)
data.pop(0)
class_list = list()
param_dict = dict()
for row in data:
param_id = row[0]
second_classify_id = row[1]
lx_attr_name = row[3]
class_list.append(str(int(second_classify_id)))
unit_id = config.param_ids[lx_attr_name]
param_dict[str(int(param_id))] = unit_id
return set(class_list), param_dict
"""
分类数据
"""
@staticmethod
def class_data(class_conn):
sql = "SELECT cat_id,class_id,parent_id FROM lie_class"
return {cat_id: {'class_id': class_id, 'parent_id': parent_id}
for cat_id, class_id, parent_id in DBHandler.read(class_conn, sql)}
"""
搜索分类ID
"""
@staticmethod
def search_cat_id(old_goods_id, bd_conn):
sql = "SELECT cat_id,sku_id FROM lie_goods WHERE goods_id = %d" % old_goods_id
result = DBHandler.read(bd_conn, sql)
if len(result) > 0:
return result[0][0], result[0][1]
else:
return 0, 0
"""
更新spu 分类id
"""
@staticmethod
def update_spu_classId(spu_id, class_id, parent_id, spu_conn, log):
db = int(str(spu_id)[-1])
sql = "UPDATE lie_spu_%d SET class_id1=%d,class_id2=%d,update_time=%d WHERE spu_id=%d" % \
(db, parent_id, class_id, DateHandler.now_datetime(), spu_id)
DBHandler.update(spu_conn, sql)
log.info("spu_id: {spu_id}, class_id1: {class_id1}, class_id2: {class_id2}".format(spu_id=spu_id,
class_id1=parent_id,
class_id2=class_id))
"""
获取class_id
"""
@staticmethod
def get_class_id(cat_id, class_conn):
sql = "SELECT class_id FROM lie_class WHERE cat_id = %d" % cat_id
result = DBHandler.read(class_conn, sql)
if len(result) > 0:
return result[0][0]
else:
return 0, 0
"""
写入class attr
"""
@staticmethod
def write_class_attr(class_id, basic_attr_name, lx_attr_name, unit_id, class_db):
sql = "INSERT INTO lie_class_attr (attr_name,class_id,attr_unit_id,remark,status,add_time,update_time) \
VALUES ('%s','%s','%s','%s','%s','%s','%s')" % (basic_attr_name, class_id, unit_id, lx_attr_name, "1", DateHandler.now_datetime(), DateHandler.now_datetime())
DBHandler.insert(class_db, sql)
# _*_ coding:utf-8 _*_
import threading
class PARAM_THREAD:
@staticmethod
def thread_func_ten(func, data):
thread_list = []
t0 = threading.Thread(target=func, args=(data[0],))
t1 = threading.Thread(target=func, args=(data[1],))
t2 = threading.Thread(target=func, args=(data[2],))
t3 = threading.Thread(target=func, args=(data[3],))
t4 = threading.Thread(target=func, args=(data[4],))
t5 = threading.Thread(target=func, args=(data[5],))
t6 = threading.Thread(target=func, args=(data[6],))
t7 = threading.Thread(target=func, args=(data[7],))
t8 = threading.Thread(target=func, args=(data[8],))
t9 = threading.Thread(target=func, args=(data[9],))
thread_list.append(t0)
thread_list.append(t1)
thread_list.append(t2)
thread_list.append(t3)
thread_list.append(t4)
thread_list.append(t5)
thread_list.append(t6)
thread_list.append(t7)
thread_list.append(t8)
thread_list.append(t9)
for t in thread_list:
t.setDaemon(True) # 设置为守护线程,不会因主线程结束而中断
t.start()
for t in thread_list:
t.join() # 子线程全部加入,主线程等所有子线程运行完毕
print('Mainthread %s ended.' % threading.current_thread().name)
\ No newline at end of file
# _*_ coding:utf-8 _*_
import re
"""
参数转换
"""
cast_map = {
'毫欧': 'mΩ',
'欧姆': 'Ω',
'千欧': 'kΩ',
'兆欧': 'MΩ',
'最大': ''
}
encap_list = ["0030", "008004", "01005", "015008", "0201", "02016", "0202", "0204", "03015", "03019", "0302", "0303",
"0306", "0402", "0404", "0405", "0406", "0502", "0503", "0504", "0505", "0508",
"0602", "0603", "0604", "0605", "0606", "0612", "0704", "0707", "0709", "0803", "0804", "0805", "0806",
"0815", "0830", "0905", "0909", "1003", "1005", "1006", "1007", "1008", "1010", "1012",
"1020", "1106", "1111", "1113", "1204", "1205", "1206", "1207", "1208", "1209", "1210", "1211", "1212",
"1213", "1218", "1225", "1230", "1246", "1305", "1410", "1411", "1412", "1414", "1505", "1507",
"1510", "1512", "1515", "1530", "1606", "1608", "1611", "1612", "1616", "1625", "1708", "1715", "1805",
"1806", "1807", "1808", "1810", "1812", "1816", "1825", "1835", "1913", "1916", "1919", "2005",
"2008", "2010", "2015", "2016", "2018", "2020", "2023", "2043", "2211", "2214", "2215", "2218", "2220",
"2225", "2304", "2312", "2323", "2325", "2410", "2414", "2416", "2420", "2423", "2424", "2512",
"2520", "2525", "2615", "2616", "2706", "2709", "2711", "2721", "2727", "2728", "2812", "2820", "2824",
"2825", "2828", "2830", "2910", "2913", "2915", "2917", "2920", "2924", "3010", "3015", "3017",
"3022", "3024", "3025", "3040", "3131", "3225", "3226", "3312", "3318", "3333", "3640", "3838", "3920",
"3925", "3931", "4030", "4032", "4122", "4520", "4823", "4850", "5040", "5329", "5829", "5929",
"6028", "6030", "6031", "6039", "6054", "6560"]
param_func = {
'阻值(欧姆)': 'trans_resistance',
'容值': 'trans_capacity',
'电感值': 'trans_inductance',
'额定电压': 'trans_rated_voltage',
'额定电流': 'trans_rated_current',
'功率': 'trans_power',
'直流电阻(内阻)': 'trans_dc_resistance',
'精度': 'trans_accuracy',
'封装': 'trans_encap',
'温漂系数(介质材料)': 'trans_temp'
}
class PARAM_TRANS:
def __init__(self):
self.param_func = param_func
@classmethod
def base_trans(cls, trans_rule, key):
if key == '-' or key == '':
return 0, 0
if str(key).startswith("."):
key = "0" + str(key)
obj = re.match(trans_rule, key.replace(" ", ""))
value = obj.group(1) if obj else 0
unit = obj.group(2) if obj else 0
return value, unit
@classmethod
def trans_resistance(cls, key):
"""
欧姆、阻值
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]Ω|µΩ|Ω)$'
key = str(key).replace("Ohms", 'Ω')
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_capacity(cls, key):
"""
容值
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]F|µF|F)$'
key = str(key).split("(")[0]
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_inductance(cls, key):
"""
电感值
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]H|µH|H)$'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_rated_voltage(cls, key):
"""
额定电压
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]V|V)$'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_rated_current(cls, key):
"""
额定电流
:param cls:
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]A|A)$'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_power(cls, key):
"""
功率
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]W|W)$'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_dc_resistance(cls, key):
"""
直流电阻
:param key:
:return:
"""
if key == '-' or key == '':
return 0, 0
for ct in cast_map:
if str(key).find(ct) != -1:
key = str(key).replace(ct, cast_map[ct])
return PARAM_TRANS.trans_resistance(key)
@classmethod
def trans_accuracy(cls, key):
"""
精度
:param key:
:return:
"""
trans_rule = r'(±\d+|±\d+.\d+|\d+|\d+\.\d+)([a-zA-Z]+|%)$'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_encap(cls, key):
"""
封装
:param key:
:return:
"""
if key == '-' or key == '':
return 0, 0
key = str(key).split(",")[0]
for encap in encap_list:
if str(key).find(encap) != -1:
return encap, 0
return key, ''
@classmethod
def trans_temp(cls, key):
"""
温度
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]W|W)$'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_time(cls, key):
"""
时间
:param key:
:return:
"""
trans_rule = r'(\d+|\d+\.\d+)([a-zA-Z]s|cs|µs|ms|ns|ps)'
return PARAM_TRANS.base_trans(trans_rule, key)
@classmethod
def trans_frequency(cls, key):
"""
频率
:param key:
:return:
"""
trans_rule = r'(±\d+|±\d+.\d+|\d+|\d+\.\d+)([a-zA-Z]ppm|ppm)$'
value, unit = PARAM_TRANS.base_trans(trans_rule, key)
return value * 10, unit
from decimal import Decimal
import re
import config.trans_config as trans_config
from decimal import getcontext
res_map = trans_config.res_map
cap_map = trans_config.cap_map
ind_map = trans_config.ind_map
vol_map = trans_config.vol_map
cur_map = trans_config.cur_map
pow_map = trans_config.pow_map
cast_map = trans_config.cast_map
encap_list = trans_config.encap_list
sep_temp_map = trans_config.sep_temp_map
temp_map = trans_config.temp_map
getcontext().prec = 15
class UnitTrans:
def remove_exponent(self, num):
return str(num.to_integral()) if num == num.to_integral() else str(num.normalize())
# 阻值(欧姆)
def trans_resistance(self, key):
if key == '-' or key == '':
return key, ''
key = str(key).split("(")[0]
key = str(key).replace("Ohms", 'Ω')
obj = re.match(r'(\d+|\d+\.\d+)([a-zA-Z]Ω|µΩ|Ω)$', key.replace(" ", ""))
value = obj.group(1) if obj else '0'
unit = obj.group(2) if obj else 1
if res_map.get(unit):
ts_value = self.remove_exponent(Decimal(Decimal(value) * Decimal(res_map[unit])))
else:
ts_value = ''
return key, ts_value
# 容值
def trans_capacity(self, key):
if key == '-' or key == '':
return key, ''
key = str(key).split("(")[0]
obj = re.match(r'(\d+|\d+\.\d+)([a-zA-Z]F|µF|F)$', key.replace(" ", ""))
value = obj.group(1) if obj else '0'
unit = obj.group(2) if obj else 1
return key, self.remove_exponent(Decimal(Decimal(value) * Decimal(cap_map[unit]))) if cap_map.get(unit) else ''
# 电感值
def trans_inductance(self, key):
if key == '-' or key == '':
return key, ''
key = str(key).split("(")[0]
obj = re.match(r'(\d+|\d+\.\d+)([a-zA-Z]H|µH|H)$', key.replace(" ", ""))
value = obj.group(1) if obj else '0'
unit = obj.group(2) if obj else 1
return key, self.remove_exponent(Decimal(Decimal(value) * Decimal(ind_map[unit]))) if ind_map.get(unit) else ''
# 额定电压
def trans_rated_voltage(self, key):
if key == '-' or key == '':
return key, ''
key = str(key).split("(")[0]
obj = re.match(r'(\d+|\d+\.\d+)([a-zA-Z]V|V)$', key.replace(" ", ""))
value = obj.group(1) if obj else '0'
unit = obj.group(2) if obj else 1
return key, self.remove_exponent(Decimal(Decimal(value) * Decimal(vol_map[unit]))) if vol_map.get(unit) else ''
# 额定电流
def trans_rated_current(self, key):
if key == '-' or key == '':
return key, ''
obj = re.match(r'(\d+|\d+\.\d+)([a-zA-Z]A|A)$', key.replace(" ", ""))
value = obj.group(1) if obj else '0'
unit = obj.group(2) if obj else 1
return key, self.remove_exponent(Decimal(Decimal(value) * Decimal(cur_map[unit]))) if cur_map.get(unit) else ''
# 功率
def trans_power(self, key):
if key == '-' or key == '':
return key, ''
key = str(key).split(",")[0]
obj = re.match(r'(\d+|\d+\.\d+|\d+/\d+)([a-zA-Z]W|W|WS)$', key.replace(" ", ""))
value = obj.group(1) if obj else '0'
unit = obj.group(2) if obj else 1
if value.find('/') != -1:
val_spl = value.split('/')
value = Decimal((int(val_spl[0]) / int(val_spl[1]))).quantize(Decimal("0.000"))
value = Decimal(value).quantize(Decimal("0.000"))
return key, self.remove_exponent(Decimal(value * Decimal(pow_map[unit]))) if pow_map.get(unit) else ''
# 直流电阻
def trans_dc_resistance(self, key):
if key == '-' or key == '':
return key, ''
for ct in cast_map:
if str(key).find(ct) != -1:
key = str(key).replace(ct, cast_map[ct])
return self.trans_resistance(key)
# 精度
def trans_accuracy(self, key):
if key == '-' or key == '':
return key, ''
obj = re.match(r'(±|)(\d+|\d+\.\d+)([a-zA-Z]+|%)$', key.replace(" ", ""))
value = obj.group(2) if obj else '0'
unit = obj.group(3) if obj else 1
return key, self.remove_exponent(Decimal(Decimal(value) * Decimal("0.01")))
# 封装
def trans_encap(self, key):
if key == '-' or key == '':
return key, ''
key = str(key).split(",")[0]
for encap in encap_list:
if str(key).find(encap) != -1:
return key, encap
return key, ''
# 温度
def trans_temp(self, key):
if key == '-' or key == '':
return key, ''
for sep_temp in sep_temp_map:
if str(key).find(sep_temp) != -1:
return key, "C0G"
for temp in temp_map:
if str(key).find(temp) != -1:
return key, temp
return key, ''
# _*_ coding:utf-8 _*_
from job.stock_job import STOCK_JOB
# _*_ coding:utf-8 _*_
from config.db_config import DBConn
from utils.log_handler import LogHandler
from utils.db_handler import DBHandler
import threading
import redis
import json
class SKU_RAW_JOB(object):
def __init__(self):
self.db = DBConn
self.log = LogHandler('stock_job')
# self.redis = redis.StrictRedis(host="192.168.1.235", port=6379, db=0, password="icDb29mLy2s", charset="UTF-8", encoding="UTF-8")
self.redis = redis.StrictRedis(host="172.18.137.38", port=6379, db=0, password="icDb29mLy2s", charset="UTF-8", encoding="UTF-8")
def goods_data(self):
sql = "SELECT goods_id,sku_id FROM lie_goods WHERE sku_id != 0"
data = DBHandler.read(DBConn.db_bigdata(), sql)
index = 0
field_dict = {
0: [],
1: [],
2: [],
3: [],
4: [],
5: [],
6: [],
7: [],
8: [],
9: [],
}
for row in data:
goods_id = str(row[0])
sku_id = row[1]
field = goods_id[-1:]
field_dict[int(field)].append({'goods_id': goods_id, 'sku_id': sku_id})
index += 1
if index % 10000 == 0:
print(index)
return field_dict
def goods_attr(self, goods_id, conn):
field = str(goods_id)[-1]
sql = "SELECT attr_value FROM lie_goods_attr_fields%s WHERE goods_id = %s AND attr_name = '包装'" % (field, goods_id)
return DBHandler.read(conn, sql)
def write_redis(self, data, field):
conn = self.db.db_bigdata()
index = 0
for row in data:
goods_id = row['goods_id']
sku_id = row['sku_id']
attrs = self.goods_attr(goods_id, conn)
index += 1
if len(attrs) == 1:
attr_value = attrs[0][0]
res = self.redis.hget("sku_raw_map", str(sku_id))
res = eval(res)
try:
if res:
if attr_value == '-' or attr_value == '':
res_json = {'raw_goods_id': res['raw_goods_id'],'raw_brand_name': res['raw_brand_name']}
self.redis.hset("sku_raw_map", sku_id, json.dumps(res_json))
# else:
# res_json = {'raw_goods_id': res['raw_goods_id'],'raw_brand_name': res['raw_brand_name'],
# 'pack': attr_value}
# self.redis.hset("sku_raw_map", sku_id, json.dumps(res_json))
except:
# print(new_res['raw_goods_id'], new_res['raw_brand_name'])
# res_json = {'raw_goods_id': new_res['raw_goods_id'], 'raw_brand_name': new_res['raw_brand_name']}
print("wrong: ", sku_id, type(res))
if index % 10000 == 0:
print(str(field) + " : " + str(index))
def run(self):
field_dict = self.goods_data()
print(len(field_dict))
thread_list = []
t0 = threading.Thread(target=self.write_redis, args=(field_dict[0], 0,))
t1 = threading.Thread(target=self.write_redis, args=(field_dict[1], 1,))
t2 = threading.Thread(target=self.write_redis, args=(field_dict[2], 2,))
t3 = threading.Thread(target=self.write_redis, args=(field_dict[3], 3,))
t4 = threading.Thread(target=self.write_redis, args=(field_dict[4], 4,))
t5 = threading.Thread(target=self.write_redis, args=(field_dict[5], 5,))
t6 = threading.Thread(target=self.write_redis, args=(field_dict[6], 6,))
t7 = threading.Thread(target=self.write_redis, args=(field_dict[7], 7,))
t8 = threading.Thread(target=self.write_redis, args=(field_dict[8], 8,))
t9 = threading.Thread(target=self.write_redis, args=(field_dict[9], 9,))
thread_list.append(t0)
thread_list.append(t1)
thread_list.append(t2)
thread_list.append(t3)
thread_list.append(t4)
thread_list.append(t5)
thread_list.append(t6)
thread_list.append(t7)
thread_list.append(t8)
thread_list.append(t9)
for t in thread_list:
t.setDaemon(True) # 设置为守护线程,不会因主线程结束而中断
t.start()
for t in thread_list:
t.join() # 子线程全部加入,主线程等所有子线程运行完毕
print('Mainthread %s ended.' % threading.current_thread().name)
# for row in data:
# attrs = self.goods_attr(row[0])
# if len(attrs) > 2:
# self.write_redis(row[1], attrs[0], attrs[1])
if __name__ == '__main__':
raw_job = SKU_RAW_JOB()
data = raw_job.run()
# print(len(data))
# _*_ coding:utf-8 _*_
import time
import requests
from config.db_config import DBConn
from utils.log_handler import LogHandler
from utils.db_handler import DBHandler
request_url = "http://so12.ichunt.com/search/other/hasStock"
index = "goods_map"
type = "goods"
class STOCK_JOB(object):
"""
库存更新任务
"""
def __init__(self):
self.db = DBConn
self.log = LogHandler('stock_job')
def attrs_data(self):
"""
获取attrs数据
:return:
"""
self.log.info("STOCK_JOB : start")
try:
self.log.info("STOCK_JOB - {func}: start".format(func="attrs_data"))
sql = "SELECT id,goods_name FROM lie_goods_attrs WHERE id > 1442447"
return DBHandler.read(DBConn.db_learning(), sql)
except Exception as e:
self.log.error("STOCK_JOB - {func}: error".format(func="attrs_data"))
self.log.error(str(e))
def request_stock(self, kw):
"""
接口请求
:param kw:
:return:
"""
try:
# self.log.info("STOCK_JOB - {func}: start - kw: {kw}".format(func="request_stock", kw=kw))
body = {'goods_name': kw, 'no_rule': 1122}
return requests.post(url=request_url, data=body).json()['data']
except Exception as e:
self.log.error("STOCK_JOB - {func}: error".format(func="request_stock"))
self.log.error(str(e))
return None
def es_update(self, goods_id, stock):
"""
ES更新数据
:param goods_id:
:param stock:
:return:
"""
try:
self.log.info("STOCK_JOB - {func}: start - goods_id: {goods_id} - stock: {stock}".
format(func="es_update", goods_id=goods_id, stock=stock))
time.sleep(0.01)
es = self.db.es()
body = {"doc": {"stock": stock}}
es.update(index=index, doc_type="goods", id=goods_id, body=body)
except Exception as e:
self.log.error("STOCK_JOB - {func}: error".format(func="es_update"))
self.log.error(str(e))
def run(self):
data = self.attrs_data()
[self.es_update(row[0], self.request_stock(row[1])) for row in data]
if __name__ == '__main__':
stock_job =STOCK_JOB()
stock_job.run()
\ No newline at end of file
import chardet
from sklearn.externals import joblib
import pandas as pd
from utils.excel_handler import ExcelHandler
model_path = ""
class BaseHandler:
def __init__(self, name):
self.name = name
# self.model = joblib.load("E:\\data\\model")
# self.extractor = joblib.load("E:\\data\\extractor")
self.model = ''
self.extractor = ''
def load_excel(self, filename, sheet, del_head=False):
"""
读取EXCEL
:param filename:
:param del_head:
:return:
"""
data = ExcelHandler.read_to_excel(filename, sheet, 1)
return data.pop(0) if del_head else data
def get_encoding(self, file):
"""
获取文件编码
:param file:
:return:
"""
with open(file, 'rb') as f:
return chardet.detect(f.read())['encoding']
def get_predict(self, kw) -> tuple:
"""
获取预测结果
:param obj:
:param kw:
:return:
"""
series = pd.Series(kw)
feature = self.extractor.transform(series)
predictions = self.model.predict(feature)
return predictions[0], kw
# _*_ coding:utf-8 _*_
import redis
import pymysql
import json
from config.db_config import DBConn
from utils.log_handler import LogHandler
from utils.db_handler import DBHandler
import config.db_config as config
class PARAM_CLASS_REDIS(object):
"""
参数提取
处理class_id
"""
def __init__(self):
self.db = DBConn
self.log = LogHandler('param_extract')
# self.redis = redis.StrictRedis(host="192.168.1.235", port=6379, db=0, password="icDb29mLy2s", charset="UTF-8", encoding="UTF-8")
self.redis = redis.StrictRedis(host="172.18.137.38", port=6379, db=0, password="icDb29mLy2s", charset="UTF-8", encoding="UTF-8")
def search_sku(self, spu_id):
for i in range(0, 10):
db = "liexin_sku_%d" % i
# conn = pymysql.connect(config.local_spu['host'], config.local_spu['user'], config.local_spu['password'], db,
# charset=config.local_spu['charset'])
conn = pymysql.connect("spu-slave.ichunt.db", "LxiCSpuR", "Mysx3Tyzlo00oxlmllyR", "liexin_sku_%d" % i,
charset="utf8")
for j in range(0, 10):
table = "lie_sku_%d" % j
sql = "SELECT spu_id,goods_id FROM %s WHERE supplier_id=7 AND spu_id=\'%d\'" % (table, spu_id)
res = DBHandler.read(conn, sql)
if len(res) > 0:
print("spu_id: {spu_id}, goods_id: {goods_id}".format(spu_id=spu_id, goods_id=res[0][1]))
def spu_data(self):
for i in range(0, 10):
# TODO 临时变更
# sql = "SELECT spu_id,class_id1,class_id2 FROM lie_spu_%d WHERE class_id1 != 0 AND class_id2 != 0 AND attr_values != ''" % i
sql = "SELECT spu_id,class_id1,class_id2 FROM lie_spu_%d WHERE class_id1 != 0 AND class_id2 != 0 \
AND attr_values != '' ORDER BY spu_id LIMIT 100 " % i
res = DBHandler.read(self.db.db_spu(), sql)
print(len(res))
for row in res:
spu_id = row[0]
class_id1 = row[1]
class_id2 = row[2]
spu_res = self.redis.hget("spu", str(spu_id))
if spu_res:
spu_res = eval(spu_res)
spu_res['class_id1'] = class_id1
spu_res['class_id2'] = class_id2
# print(spu_res)
self.redis.hset("spu", str(spu_id), json.dumps(spu_res))
# self.log.info("spu_id: {spu_id}".format(spu_id=spu_id))
self.search_sku(spu_id)
else:
continue
def run(self):
self.spu_data()
if __name__ == "__main__":
param = PARAM_CLASS_REDIS()
param.spu_data()
\ No newline at end of file
# _*_ coding:utf-8 _*_
from config.db_config import DBConn
from utils.log_handler import LogHandler
from utils.db_handler import DBHandler
from fun.param_conn import PARAM_CONN
from fun.param_data import PARAM_DATA
import time
class PARAM_EXTRACT(object):
"""
参数提取
处理class_id
"""
def __init__(self):
self.db = DBConn
self.log1 = LogHandler('param_extract', stream=True, file=False)
self.log2 = LogHandler('param_extract', stream=False, file=True)
self.dgk_dict = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}
"""
获取dgk数据
"""
def dgk_data(self):
self.log1.info("dgk_data : start")
# 遍历SKU数据
for i in range(0, 10):
db = "liexin_sku_%d" % i
# 获取sku 连接
conn = PARAM_CONN.online_sku_conn(i)
for j in range(0, 10):
table = "lie_sku_%d" % j
sql = "SELECT spu_id,old_goods_id FROM %s WHERE supplier_id = 7" % table
# 数据分类
db_data = DBHandler.read(conn, sql)
# 日志打印
self.log1.info("database: {db} - table: {table} - data_len: {size}".format(db=db,
table=table,
size=len(db_data)))
for spu_id, old_goods_id in db_data:
param_index = int(str(old_goods_id)[-1])
self.dgk_dict[param_index].append((spu_id, old_goods_id))
"""
更新class id
"""
def update_classId(self):
# DB 连接
spu_conn = DBConn.db_spu()
class_conn = DBConn.db_class()
bd_conn = DBConn.db_bigdata()
# 数据
class_data = PARAM_DATA.class_data(class_conn)
# dgk数据遍历
index = 0
for i in self.dgk_dict:
dgk_data = self.dgk_dict[i]
if i == 5:
for spu_id, old_goods_id in dgk_data:
cat_id, sku_id = PARAM_DATA.search_cat_id(old_goods_id, bd_conn)
if cat_id != 0:
parent_id = class_data[cat_id]['parent_id']
class_id = class_data[cat_id]['class_id']
time.sleep(0.01)
PARAM_DATA.update_spu_classId(int(spu_id), int(class_id), int(parent_id), spu_conn, self.log2)
index += 1
if index % 10000 == 0:
self.log1.info("run - index: {index}".format(index=index))
"""
运行
"""
def run(self):
self.dgk_data()
self.update_classId()
# PARAM_THREAD.thread_func_ten(self.update_classId, self.dgk_dict)
\ No newline at end of file
# _*_ coding:utf-8 _*_
import pymysql
from param.class_extract import CLASS_EXTRACT
from utils.db_handler import DBHandler
from utils.log_handler import LogHandler
from fun.param_data import PARAM_DATA
from fun.param_conn import PARAM_CONN
from config.db_config import DBConn
import threading
import json
import time
param_data = PARAM_DATA()
class SPU_EXTRACT(object):
"""
SPU数据提取
"""
def __init__(self):
self.log1 = LogHandler('param_extract', stream=True, file=False)
self.log2 = LogHandler('param_extract', stream=False, file=True)
self.dgk_dict = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}
def get_spu_data(self):
"""
获取SPU数据
:return:
"""
self.log1.info("GET_SPU_DATA : start")
# 遍历SKU数据
for i in range(0, 10):
self.log1.info("GET_SPU_DATA I-{i} : start".format(i=i))
# 获取sku连接
conn = PARAM_CONN.online_sku_conn(i)
for j in range(0, 10):
self.log1.info("GET_SPU_DATA J-{j} : start".format(j=j))
sql = "SELECT old_goods_id,spu_id,goods_id FROM lie_sku_%d WHERE supplier_id = 7" % j
data = DBHandler.read(conn, sql)
self.log1.info("DATA length-{length} : start".format(length=len(data)))
# 遍历数据
for row in data:
old_goods_id = row[0]
spu_id = row[1]
goods_id = row[2]
param_index = int(str(old_goods_id)[-1])
self.dgk_dict[param_index].append({'spu_id': spu_id, 'old_goods_id': old_goods_id, 'goods_id': goods_id})
def get_dgk_data(self):
"""
获取DGK数据
:return:
"""
self.log1.info("GET_DGK_DATA : start")
# DB
spu_conn = DBConn.db_spu()
class_conn = DBConn.db_class()
bg_conn = DBConn.db_bigdata()
# 映射
class_extract = CLASS_EXTRACT()
# 类型映射
cls_map = param_data.class_mapping(class_conn)
# 单位值
unit_value = param_data.get_unit_value(class_conn)
# 类别值
class_value = param_data.get_class_attr(class_conn)
class_list, param_dict = param_data.load_excel("/data2/param_data/DGK参数.xlsx")
# id、参数集合
ids = ",".join(class_list)
pams = ",".join([_ for _ in param_dict])
count = 0
for i in self.dgk_dict:
dgk_data = self.dgk_dict[i]
for row in dgk_data:
count += 1
old_goods_id = row['old_goods_id']
spu_id = row['spu_id']
goods_id = row['goods_id']
dgk_goods = param_data.get_dgk_goods(int(str(old_goods_id)[-1]), ids, pams, old_goods_id, bg_conn)
if dgk_goods:
dgk_list = list()
for goods in dgk_goods:
field_cat_id = goods['cat_id']
field_attr_id = goods['attr_id']
field_attr_name = goods['attr_name']
field_attr_value = goods['attr_value']
write_attr_id = cls_map[field_cat_id][field_attr_name]
value, unit = getattr(CLASS_EXTRACT, param_dict[str(field_attr_id)])(class_extract, field_attr_value)
# 值不为空
if value != 0 and unit != 0 and unit_value.get(unit):
attr_unit_id = unit_value[unit]
value_exist = param_data.get_class_attr_value(write_attr_id, value, attr_unit_id, class_conn)
if len(value_exist) > 0:
attr_value_id = value_exist[0][0]
else:
attr_value_id = param_data.insert_dgk_goods(write_attr_id,value,attr_unit_id, class_conn)
print('insert', attr_value_id, write_attr_id)
dgk_list.append({'attr_id': write_attr_id, 'attr_value_id': attr_value_id, 'attr_unit_id': attr_unit_id})
if len(dgk_list) > 0:
time.sleep(0.01)
param_data.update_spu(int(spu_id), json.dumps(dgk_list), spu_conn, self.log2)
if count % 10000 == 0:
self.log1.info("COUNT: {COUNT}".format(COUNT=count))
"""
[{'attr_name': '封装/外壳', 'attr_value': '0603(1608 公制)', 'attr_id': 4257, 'cat_id': 418}, {'attr_name': '温度系数', 'attr_value': 'X6S', 'attr_id': 4251, 'cat_id': 418}, {'attr_name': '容差', 'attr_value': '±10%', 'attr_id': 4249, 'cat_id': 418}, {'attr_name': '电压 - 额定', 'attr_value': '10V', 'attr_id': 4250, 'cat_id': 418}, {'attr_name': '电容', 'attr_value': '3.3µF', 'attr_id': 4248, 'cat_id': 418}]
"""
def run(self):
self.get_spu_data()
self.get_dgk_data()
if __name__ == '__main__':
extract = SPU_EXTRACT()
extract.run()
# cls_map = PARAM_DATA().class_mapping()
# print(cls_map)
# _*_ coding:utf-8 _*_
from param.base_handler import BaseHandler
from utils.db_handler import DBHandler
from utils.excel_handler import ExcelHandler
from fun.unit_trans import UnitTrans
from elasticsearch.helpers import bulk
import config.zy_config as zy_config
from config.db_config import DBConn
from utils.date_handler import DateHandler
import pymysql
import json
import time
index = "goods_map"
type = "goods"
conn = DBConn.db_bigdata()
class ZY_HANDLER(BaseHandler):
def collect_data(self):
"""
获取自营数据
:return:
"""
rd = []
sql = "SELECT g.goods_id,g.goods_name,g.attrs,b.brand_name FROM lie_goods g LEFT JOIN lie_brand b ON g.brand_id = b.brand_id WHERE g.status = 1 AND b.status = 1 AND g.attrs != '' AND g.brand_id != 0"
result = DBHandler.read(conn, sql)
for row in result:
rd.append({'goods_id': str(row[0]), 'goods_name': str(row[1]), 'attrs': str(row[2]), 'brand_name': str(row[3])})
title = ['goods_id', 'goods_name', 'attrs', 'brand_name']
content = ['goods_id', 'goods_name', 'attrs', 'brand_name']
ExcelHandler.write_to_excel(title, content, rd, "result", result_type=2)
def write_db(self):
"""
写入本地表
:return:
"""
def load_excel():
data = ExcelHandler.read_to_excel('E:\\doc\\GDB参数.xls', 'Sheet', 1)
data.pop(0)
return [{'goods_id': row[0], 'goods_name': row[1], 'attrs': row[2], 'brand_name': row[3]} for row in data]
def write_local(cols, goods_id, goods_name, brand_name, brand_sort, values):
sql = "INSERT INTO lie_goods_attrs_zy (goods_id,goods_name,brand_name,brand_sort,%s) VALUES ('%s','%s','%s',%d,%s)" % (cols, goods_id, goods_name, brand_name, brand_sort, values)
DBHandler.insert(DBConn.local_conn(), sql)
data = load_excel()
for row in data:
goods_id = row['goods_id']
goods_name = row['goods_name']
attrs = json.loads(row['attrs'])
brand_name = row['brand_name']
brand_sort = 999
if brand_name in zy_config.brand_map:
brand_sort = zy_config.brand_map[brand_name]
cols = []
values = []
for attr in attrs:
if attr['attr_name'] in zy_config.param_map:
cols.append(zy_config.param_map[attr['attr_name']])
values.append("'" + attr['attr_value'] + "'")
if len(cols) > 0:
write_local(",".join(cols), goods_id, goods_name, brand_name, brand_sort, ",".join(values))
def uplod_es(self):
def load_db():
sql = "SELECT resistance,capacity,inductance,rated_voltage,rated_current,power,dc_resistance,accuracy,encap,temperature,goods_name,brand_name,brand_sort,id FROM lie_goods_attrs"
return DBHandler.read(DBConn.db_learning(), sql)
def brand_mapping():
sql = "SELECT zy_brand,ly_brand FROM `lie_zy_ly_mapping` GROUP BY zy_brand,ly_brand"
return {row[1]: row[0] for row in DBHandler.read(DBConn.db_dashboard(), sql)}
ACTIONS = []
brand_mapping = brand_mapping()
data = load_db()
trans = UnitTrans()
for row in data:
resistance = row[0]
capacity = row[1]
inductance = row[2]
rated_voltage = row[3]
rated_current = row[4]
power = row[5]
dc_resistance = row[6]
acc_accuracy = row[7]
encap = row[8]
temperature = row[9]
goods_name = row[10]
brand_name = row[11]
brand_sort = row[12]
auto_id = row[13]
# 电阻(欧姆)
re_key, re_value = trans.trans_resistance(resistance)
# 容值
cap_key, cap_value = trans.trans_capacity(capacity)
# 电感
ind_key, ind_value = trans.trans_inductance(inductance)
# 额定电压
vol_key, vol_value = trans.trans_rated_voltage(rated_voltage)
# 额定电流
cur_key, cur_value = trans.trans_rated_current(rated_current)
# 功率
pow_key, pow_value = trans.trans_power(power)
# 直流电阻
dc_key, dc_value = trans.trans_dc_resistance(dc_resistance)
# 精度
acc_key, acc_value = trans.trans_accuracy(acc_accuracy)
# 封装
encap_key, encap_value = trans.trans_encap(encap)
# 温度
temp_key, temp_value = trans.trans_temp(temperature)
# print(row, cap_key, cap_value)
# 生成全文检索字符串
key_list = [re_key, cap_key, ind_key, vol_key, cur_key, pow_key, dc_key, acc_key, encap_key, temp_key]
key_str = " ".join(filter(lambda x: x != '', key_list))
# 生成ES上传数组
attrs = []
value_list = [re_value, cap_value, ind_value, vol_value, cur_value, pow_value, dc_value, acc_value,
encap_value, temp_value]
for key, value in zip(zy_config.param_list, value_list):
if value != '':
attrs.append({'attr_value': '%s€%s' % (key, value)})
ACTIONS.append(
{
"_index": index,
"_type": type,
"_id": auto_id,
"_source": {
"auto_id": auto_id,
"brand_name": brand_mapping[brand_name] if brand_name in brand_mapping else brand_name,
"brand_sort": brand_sort,
"goods_name": goods_name,
"attr_str": key_str,
"attrs": attrs,
"update_time": DateHandler.now_datetime()
}
}
)
if len(ACTIONS) % 100 == 0:
self.load_es(ACTIONS)
ACTIONS.clear()
# time.sleep(1)
if len(ACTIONS) > 0:
self.load_es(ACTIONS)
def load_es(self, ACTIONS):
count = 0
while count < 3:
count += 1
try:
# success, _ = bulk(DBConn.local_es(), ACTIONS, raise_on_error=True)
success, _ = bulk(DBConn.es(), ACTIONS, raise_on_error=True)
print('Performed %d actions' % success)
return
except:
print("wrong")
if __name__ == "__main__":
handler = ZY_HANDLER('hehe')
# handler.write_db()
handler.uplod_es()
# _*_ coding:utf-8 _*_
from param.param_extract import PARAM_EXTRACT
from param.spu_extract import SPU_EXTRACT
from param.param_class_redis import PARAM_CLASS_REDIS
from upload.param_mongo import PARAM_MONGO
if __name__ == '__main__':
# 更新 spu表-attr_values字段
extract = SPU_EXTRACT()
extract.run()
# 更新 spu表-class_id字段
# param_extract = PARAM_EXTRACT()
# param_extract.run()
# 更新 redis
# param_redis = PARAM_CLASS_REDIS()
# param_redis.run()
# 更新mongo
# param_mongo = PARAM_MONGO()
# param_mongo.run()
\ No newline at end of file
# _*_ coding:utf-8 _*_
from utils.log_handler import LogHandler
from utils.excel_handler import ExcelHandler
from config.db_config import DBConn
from fun.param_data import PARAM_DATA
from fun.param_trans import PARAM_TRANS
"""
值任务
"""
class VALUE_TASK:
def __init__(self):
self.class_list = list()
self.param_dict = dict()
self.class_db = DBConn.db_class()
self.param_trans = PARAM_TRANS()
self.log1 = LogHandler('param_extract', stream=True, file=False)
self.log2 = LogHandler('param_extract', stream=False, file=True)
"""
加载数据
"""
def load_data(self, file_name, sheet):
# 读取Excel数据
data = ExcelHandler.read_to_excel(file_name, sheet, 1)
data.pop(0)
# 遍历数据
for row in data:
# 参数id、二级分类id、基础参数名、猎芯参数名、函数名
param_id = row[0]
second_classify_id = row[1]
basic_attr_name = row[2]
lx_attr_name = row[3]
fuc_name = self.param_trans.param_func[lx_attr_name]
self.class_list.append(str(int(second_classify_id)))
self.param_dict[str(int(param_id))] = fuc_name
print(self.class_list)
print(self.param_dict)
if __name__ == '__main__':
task = VALUE_TASK()
task.load_data("E:\\doc\\DGK参数.xlsx", "Sheet1")
# _*_ coding:utf-8 _*_
import json
import traceback
import collections
import pymongo
from urllib import parse
from utils.mysql_handler import MysqlHandler
# 转义用户名和密码
user = parse.quote_plus("ichunt")
# pwd = parse.quote_plus("huntmon6699")
pwd = parse.quote_plus("huntmon66499")
# myclient = pymongo.MongoClient("mongodb://{0}:{1}@192.168.1.237:27017/?authSource=ichunt&authMechanism=SCRAM-SHA-1".format(user, pwd))
myclient = pymongo.MongoClient("mongodb://{0}:{1}@172.18.137.23:27017/?authSource=ichunt&authMechanism=SCRAM-SHA-1".format(user, pwd))
mydb = myclient['ichunt']
mycol = mydb["spu_attrs"]
class PARAM_MONGO(object):
def __init__(self):
self.mysql_handler = MysqlHandler('liexin_spu')
def search_mongo(self, spu_id, data):
res = mycol.find({"spu_id": spu_id}).limit(1)
res = list(res)
if len(res) == 0:
insert_data = {'spu_id': spu_id, 'attrs_extend': data}
mycol.insert_one(insert_data)
else:
mg_dt = res[0]
mg_dt['attrs_extend'] = data
mycol.save(mg_dt)
def run(self):
for index in range(0, 10):
data = self.mysql_handler.get_spu(index)
for row in data:
# try:
print(row)
spu_id = row[0]
attr_values = json.loads(row[1])
new_av_list = list()
for av in attr_values:
attr_name = self.mysql_handler.get_attr_name(av['attr_id'])
attr_value = self.mysql_handler.get_attr_value(av['attr_value_id'])
attr_unit = self.mysql_handler.get_attr_unit(av['attr_unit_id'])
if attr_name and attr_value and attr_unit:
new_av_list.append({'attr_name': attr_name, 'attr_value': attr_value, 'attr_unit': attr_unit})
self.search_mongo(spu_id, new_av_list)
# except:
# traceback.print_exc()
# GOOD CHANCE
if __name__ == '__main__':
param_mongo = PARAM_MONGO()
param_mongo.run()
# for x in mycol.find({"spu_id": 123456}):
# print(x)
# x['attrs_extends'] = [{"attr_name": "电压","attr_value": "1000","attr_unit": "V"},
# {"attr_name": "电流","attr_value": "50","attr_unit": "A"}]
# mycol.save(x)
import time
import datetime
from dateutil.relativedelta import relativedelta
class DateHandler:
"""
获取字符串格式时间
d_type 1:Y-M-D 2:YMD
"""
@staticmethod
def now_date(days, d_type):
today = datetime.date.today()
day = today - datetime.timedelta(days=days)
if d_type == 1:
return str(day)
elif d_type == 2:
return str(day.year) + str(day.month) + str(day.day)
"""
获取当前时间的时间戳
"""
@staticmethod
def now_datetime():
return int(time.time())
"""
获取UNIX格式时间
"""
@staticmethod
def date_time(days):
today = datetime.date.today()
day = today - datetime.timedelta(days=days)
return int(time.mktime(time.strptime(str(day), '%Y-%m-%d')))
"""
根据时间戳计算持续时间
t_type 1:秒 2:分 3:小时
"""
@staticmethod
def cal_duration(start, end, t_type):
if t_type == 1:
pass
elif t_type == 2:
minute = (end - start) / 60
return round(minute, 2)
"""
获取今天星期几
"""
@staticmethod
def weekday():
return datetime.datetime.now().weekday()
"""
UNIX时间戳转换为时间格式
"""
@staticmethod
def unix_to_date(timestamp, fmt="%Y-%m-%d %H:%M:%S"):
return time.strftime(fmt, time.localtime(timestamp))
"""
字符串时间转化为UNIX时间戳
"""
@staticmethod
def str_to_unix(str, fmt="%Y-%m-%d %H:%M:%S"):
# 转为时间数组
time_array = time.strptime(str, fmt)
return int(time.mktime(time_array))
"""
UNIX后推时间
"""
@staticmethod
def unix_after_days(unix_time, days):
return unix_time + days * 86400
"""
返回今天之前之后的日期
months: +前推 -后推
返回格式:2019-01-01
"""
@staticmethod
def today_between_months(months, fmt="%Y-%m-%d"):
return (datetime.date.today() - relativedelta(months=months)).strftime(fmt)
"""
返回指定时间之前之后的日期
months: +前推 -后推
返回格式:2019-01-01
"""
@staticmethod
def day_between_months(year, month, day, months, fmt="%Y-%m-%d"):
return (datetime.datetime(year, month, 1) - relativedelta(months=months)).strftime(fmt)
# _*_ coding:utf-8 _*_
import traceback
import requests
import random
import hashlib
import string
from hdfs import Client
from urllib import parse
from utils.date_handler import DateHandler
class DBHandler:
"""
MySQL读取数据
"""
@staticmethod
def read(db, sql):
results = {}
cursor = db.cursor()
try:
cursor.execute(sql)
results = cursor.fetchall()
except:
db.rollback()
# print(traceback.print_exc())
return results
"""
MySQL更新数据
"""
@staticmethod
def update(db, sql):
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
traceback.print_exc()
# print(sql)
"""
MySQL插入数据
"""
@staticmethod
def insert(db, sql):
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
# traceback.print_exc()
# print(sql)
"""
MySQL删除数据
"""
@staticmethod
def delete(db, sql):
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
traceback.print_exc()
# print(sql)
"""
读取HDFS数据
"""
@staticmethod
def hdfs_read(file):
client = Client("http://172.18.137.35:50170", root="/", timeout=100, session=False)
with client.read(file) as reader:
result = reader.read().splitlines()
return result
"""
上传HDFS数据
upload(hdfs_path, local_path, overwrite=False, n_threads=1, temp_dir=None,
chunk_size=65536,progress=None, cleanup=True, **kwargs)
overwrite:是否是覆盖性上传文件
n_threads:启动的线程数目
temp_dir:当overwrite=true时,远程文件一旦存在,则会在上传完之后进行交换
chunk_size:文件上传的大小区间
progress:回调函数来跟踪进度,为每一chunk_size字节。它将传递两个参数,
文件上传的路径和传输的字节数。一旦完成,-1将作为第二个参数
cleanup:如果在上传任何文件时发生错误,则删除该文件
"""
@staticmethod
def hdfs_upload(hdfs_path, local_path):
client = Client("http://172.18.137.35:50170", root="/", timeout=100, session=False)
client.upload(hdfs_path=hdfs_path, local_path=local_path)
"""
翻页读取ES数据
"""
@staticmethod
def scroll_read(url, body, key):
r = requests.post(url, data=body)
total = r.json()['data']['total']
final_result = r.json()['data'][key]
scroll_id = r.json()['data']['scroll_id']
if total > 1000:
page = int(total / 1000)
for i in range(page):
body = {"scroll_id": scroll_id}
r = requests.post(url, data=body)
result = r.json()['data'][key]
for r in result:
final_result.append(r)
return final_result
"""
es加密数据获取
"""
@staticmethod
def esEncryptData(key, url):
# 生成当前的时间戳
now_timestamp = DateHandler.now_datetime()
# 生成随机字符串
ran_str = ''.join(random.sample(string.ascii_letters + string.digits, 4)).lower()
# 参数字典
params_dict = {'check_time': now_timestamp, 'salt': ran_str}
# 生成sign, key为密钥
sign = parse.urlencode(params_dict).lower()
# key = 'djdj93ichuntj56dksisearchdj45eieapi'
sign = key + sign + str(ran_str)
sign = hashlib.md5(sign.encode(encoding='UTF-8')).hexdigest()
# 搜索接口
# requestUrl = "http://so12.ichunt.com/search/ServerApi/index"
# 搜索条件
search_body = {"check_time": now_timestamp, "salt": ran_str, "sign": sign}
# requests获取数据
r = requests.post(url, data=search_body)
result = r.json()['data']
return result
# _*_ coding:utf-8 _*_
import xlwt
import xlrd
# workbook相关
from openpyxl.workbook import Workbook
# ExcelWriter,封装了很强大的excel写的功能
from openpyxl.writer.excel import ExcelWriter
from openpyxl.utils import get_column_letter
class ExcelHandler:
"""
result_type 1:嵌套字典 2.列表字典
"""
@staticmethod
def write_to_excel(title, content, result, file_name, result_type):
wb = xlwt.Workbook()
sheet = wb.add_sheet("结果")
col_index = 0
row_index = 0
# 标题填充
for t in title:
sheet.write(row_index, col_index, t)
col_index += 1
# 内容填充
col_index = 0
row_index = 1
# 结果遍历
if result_type == 1:
for r in result:
for c in content:
sheet.write(row_index, col_index, result[r][c])
col_index += 1
col_index = 0
row_index += 1
elif result_type == 2:
for r in result:
for c in content:
sheet.write(row_index, col_index, r[c])
col_index += 1
col_index = 0
row_index += 1
# 结果保存
wb.save(file_name + '.xls')
"""
function:
读出*.xlsx中的每一条记录,把它保存在data_dic中返回
Param:
records: 要保存的,一个包含每一条记录的list
save_excel_name: 保存为的文件名
head_row_stu_arrive_star:
Return:
data_dic: 返回的记录的dict
"""
@staticmethod
def write_to_excel_with_openpyxl(title, content, result, file_name):
# 新建一个workbook
wb = Workbook()
# 新建一个excelWriter
ew = ExcelWriter(workbook=wb)
# 第一个sheet是ws
ws = wb.worksheets[0]
# 设置ws的名称
ws.title = "name"
# 写第一行,标题行
print(title)
for h_x in range(1, len(title) + 1):
h_col = get_column_letter(h_x)
ws.cell('%s%s' % (h_col, 1)).value = '%s' % (title[h_x - 1])
# 写第二行及其以后的那些行
i = 2
for r in result:
col_index = 1
for c in content:
col = get_column_letter(col_index)
try:
ws.cell('%s%s' % (col, i)).value = '%s' % str(result[r][c])
except:
ws.cell('%s%s' % (col, i)).value = '%s' % ''
print(result[r][c])
col_index += 1
i += 1
# 写文件
ew.save(filename=file_name)
"""
sheet_name:sheet下数据
data_type:1:以行为基础的list 2:以列为基础的list
"""
@staticmethod
def read_to_excel(file_name, sheet_name, data_type):
rd = []
wb = xlrd.open_workbook(file_name)
# 获取Sheet
sheet = wb.sheet_by_name(sheet_name)
# 获取行数,列数
row = sheet.nrows
col = sheet.ncols
# 遍历获取sheet数据
if data_type == 1:
for i in range(row):
l = []
for j in range(col):
l.append(sheet.cell_value(i, j))
rd.append(l)
elif data_type == 2:
for i in range(col):
l = []
for j in range(row):
l.append(sheet.cell_value(j, i))
rd.append(l)
return rd
# -*- coding: utf-8 -*-
import os
import logging
from logging.handlers import TimedRotatingFileHandler
# 日志级别
CRITICAL = 50
FATAL = CRITICAL
ERROR = 40
WARNING = 30
WARN = WARNING
INFO = 20
DEBUG = 10
NOTSET = 0
CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
ROOT_PATH = os.path.join(CURRENT_PATH, os.pardir)
LOG_PATH = os.path.join(ROOT_PATH, 'log')
if not os.path.exists(LOG_PATH):
os.mkdir(LOG_PATH)
class LogHandler(logging.Logger):
"""
LogHandler
"""
def __init__(self, name, level=DEBUG, stream=True, file=True):
self.name = name
self.level = level
logging.Logger.__init__(self, self.name, level=level)
if stream:
self.__setStreamHandler__()
if file:
self.__setFileHandler__()
def __setFileHandler__(self, level=None):
"""
set file handler
:param level:
:return:
"""
file_name = os.path.join(LOG_PATH, '{name}.log'.format(name=self.name))
# 设置日志回滚, 保存在log目录, 一天保存一个文件, 保留2天
file_handler = TimedRotatingFileHandler(filename=file_name, when='D', interval=1, backupCount=2)
file_handler.suffix = '%Y%m%d.log'
if not level:
file_handler.setLevel(self.level)
else:
file_handler.setLevel(level)
# formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
formatter = logging.Formatter('%(message)s')
file_handler.setFormatter(formatter)
self.file_handler = file_handler
self.addHandler(file_handler)
def __setStreamHandler__(self, level=None):
"""
set stream handler
:param level:
:return:
"""
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
stream_handler.setFormatter(formatter)
if not level:
stream_handler.setLevel(self.level)
else:
stream_handler.setLevel(level)
self.addHandler(stream_handler)
def resetName(self, name):
"""
reset name
:param name:
:return:
"""
self.name = name
self.removeHandler(self.file_handler)
self.__setFileHandler__()
if __name__ == '__main__':
log = LogHandler('test')
log.info('this is a test msg')
# _*_ coding:utf-8 _*_
from config.db_config import class_conn
from config.db_config import DBConn
from utils.db_handler import DBHandler
import pymysql
import time
import sys
def get_env():
if sys.platform in ('darwin', 'win32'):
return 'test'
else:
return 'produce'
ENV = get_env()
HOST_SET = {
'test': 'localhost'
}
def get_mysql_conf(db):
if ENV == 'produce':
user,pwd = 'root','qaz'
host = 'localhost'
else:
# user, pwd = 'root', 'qaz'
# host = 'localhost'
user, pwd = 'spu', 'spu'
host = '192.168.1.235'
conf = {
'host': host,
'port': 3306,
'user': user,
'password': pwd,
'db': db,
'charset': 'utf8'
}
return conf
class MysqlHandler(object):
def __init__(self, db_name):
config = get_mysql_conf(db_name)
print(config)
self.db = DBConn.db_spu()
self.class_db = DBConn.db_class()
def re_connect(self):
try:
self.db.ping()
except Exception:
self.db.close()
@staticmethod
def get_ts():
return int(time.time())
def get_spu(self, index):
# sql = """
# SELECT spu_id,attr_values FROM lie_spu_{0} WHERE attr_values != '' AND attr_values != '[]'
# """.format(index)
sql = "SELECT spu_id,attr_values FROM lie_spu_%d WHERE class_id1 != 0 AND class_id2 != 0 \
AND attr_values != '' ORDER BY spu_id LIMIT 100 " % index
return DBHandler.read(self.db, sql)
def get_attr_name(self, attr_id):
with self.class_db.cursor() as cursor:
sql = """
SELECT attr_name
FROM lie_class_attr
WHERE attr_id=%s
"""
cursor.execute(sql, attr_id)
result = cursor.fetchone()
return result[0]
def get_attr_value(self, attr_value_id):
with self.class_db.cursor() as cursor:
try:
sql = """
SELECT value
FROM lie_class_attr_value
WHERE attr_value_id=%s
"""
cursor.execute(sql, attr_value_id)
result = cursor.fetchone()
return result[0]
except:
return ''
def get_attr_unit(self, attr_unit_id):
with self.class_db.cursor() as cursor:
try:
sql = """
SELECT attr_unit_name
FROM lie_class_attr_unit
WHERE attr_unit_id=%s
"""
cursor.execute(sql, attr_unit_id)
result = cursor.fetchone()
return result[0]
except:
return ''
\ No newline at end of file
# -*- coding: utf-8 -*-
# !/usr/bin/env python
class LazyProperty(object):
"""
LazyProperty
explain: http://www.spiderpy.cn/blog/5/
"""
def __init__(self, func):
self.func = func
def __get__(self, instance, owner):
if instance is None:
return self
else:
value = self.func(instance)
setattr(instance, self.func.__name__, value)
return value
class Singleton(type):
"""
Singleton Metaclass
"""
_inst = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._inst:
cls._inst[cls] = super(Singleton, cls).__call__(*args)
return cls._inst[cls]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment