Commit 3d410c52 by lzzzzl

更新attr_values处理

parent 019f4a7c
...@@ -7,6 +7,14 @@ import config.db_config as config ...@@ -7,6 +7,14 @@ import config.db_config as config
class ParamConn: class ParamConn:
@staticmethod @staticmethod
def local_sku_conn(index):
return pymysql.connect(config.local_sku_conn['host'],
config.local_sku_conn['user'],
config.local_sku_conn['password'],
"liexin_sku_%d" % index,
charset="utf8")
@staticmethod
def online_sku_conn(index): def online_sku_conn(index):
return pymysql.connect(config.online_sku['host'], return pymysql.connect(config.online_sku['host'],
config.online_sku['user'], config.online_sku['user'],
......
...@@ -4,7 +4,7 @@ from utils.db_handler import DBHandler ...@@ -4,7 +4,7 @@ from utils.db_handler import DBHandler
from utils.log_handler import LogHandler from utils.log_handler import LogHandler
import config.zy_config as config import config.zy_config as config
from utils.date_handler import DateHandler from utils.date_handler import DateHandler
from fun.param_trans import ParamTrans
now_ts = DateHandler.now_datetime() now_ts = DateHandler.now_datetime()
...@@ -17,11 +17,11 @@ class ParamData(object): ...@@ -17,11 +17,11 @@ class ParamData(object):
sql = "SELECT attr_id,class_id,attr_unit_id FROM lie_class_attr" sql = "SELECT attr_id,class_id,attr_unit_id FROM lie_class_attr"
return DBHandler.read(conn, sql) return DBHandler.read(conn, sql)
def get_class_attr_value(self, attr_id, value, attr_unit_id, conn):
""" """
attr_id: {} 获取分类attr value
:return:
""" """
@staticmethod
def get_class_attr_value(attr_id, value, attr_unit_id, conn):
sql = "SELECT attr_value_id FROM lie_class_attr_value WHERE attr_id={attr_id} AND value='{value}'AND attr_unit_id={attr_unit_id}".format( sql = "SELECT attr_value_id FROM lie_class_attr_value WHERE attr_id={attr_id} AND value='{value}'AND attr_unit_id={attr_unit_id}".format(
attr_id=attr_id, attr_id=attr_id,
value=value, value=value,
...@@ -35,7 +35,11 @@ class ParamData(object): ...@@ -35,7 +35,11 @@ class ParamData(object):
DBHandler.read(bg_conn, sql)] DBHandler.read(bg_conn, sql)]
return data return data
def insert_dgk_goods(self, attr_id, value, attr_unit_id, conn): """
写入dgk数据
"""
@staticmethod
def insert_dgk_goods(attr_id, value, attr_unit_id, conn):
sql = "INSERT INTO lie_class_attr_value (attr_id,value,attr_unit_id,status,remark,add_time,update_time) VALUES ('%s','%s','%s','%s','%s','%s','%s')" % \ sql = "INSERT INTO lie_class_attr_value (attr_id,value,attr_unit_id,status,remark,add_time,update_time) VALUES ('%s','%s','%s','%s','%s','%s','%s')" % \
(attr_id, value, attr_unit_id, 1, "", now_ts, now_ts) (attr_id, value, attr_unit_id, 1, "", now_ts, now_ts)
DBHandler.insert(conn, sql) DBHandler.insert(conn, sql)
...@@ -43,16 +47,24 @@ class ParamData(object): ...@@ -43,16 +47,24 @@ class ParamData(object):
max_data = DBHandler.read(conn, sql2) max_data = DBHandler.read(conn, sql2)
return max_data[0][0] return max_data[0][0]
"""
def update_spu(self, spu_id, attr_values, spu_conn, log): 更新spu
"""
@staticmethod
def update_spu(spu_id, attr_values, spu_conn, log):
index = int(str(spu_id)[-1]) index = int(str(spu_id)[-1])
sql = "UPDATE lie_spu_%d SET attr_values='%s' WHERE spu_id=%d" % (index, attr_values, spu_id) sql = "UPDATE lie_spu_%d SET attr_values='%s' WHERE spu_id=%d" % (index, attr_values, spu_id)
DBHandler.update(spu_conn, sql) DBHandler.update(spu_conn, sql)
log.info("spu_id: {spu_id}, attr_values: {attr_values}".format(spu_id=spu_id, log.info("spu_id: {spu_id}, attr_values: {attr_values}".format(spu_id=spu_id,
attr_values=attr_values)) attr_values=attr_values))
def load_excel(self, fileName): """
data = ExcelHandler.read_to_excel(fileName, 'Sheet1', 1) 加载excel
"""
@staticmethod
def load_excel(filename):
param_trans = ParamTrans()
data = ExcelHandler.read_to_excel(filename, 'Sheet1', 1)
data.pop(0) data.pop(0)
class_list = list() class_list = list()
param_dict = dict() param_dict = dict()
...@@ -61,8 +73,9 @@ class ParamData(object): ...@@ -61,8 +73,9 @@ class ParamData(object):
second_classify_id = row[1] second_classify_id = row[1]
lx_attr_name = row[3] lx_attr_name = row[3]
class_list.append(str(int(second_classify_id))) class_list.append(str(int(second_classify_id)))
unit_id = config.param_ids[lx_attr_name] fuc_name = param_trans.param_func[lx_attr_name]
param_dict[str(int(param_id))] = unit_id unit_id = param_trans.param_unit[lx_attr_name]
param_dict[str(int(param_id))] = fuc_name
return set(class_list), param_dict return set(class_list), param_dict
""" """
...@@ -157,9 +170,31 @@ class ParamData(object): ...@@ -157,9 +170,31 @@ class ParamData(object):
@staticmethod @staticmethod
def get_dgk_goods_by_index(idx, bg_conn): def get_dgk_goods_by_index(idx, bg_conn):
sql = "SELECT attr_name,attr_value,attr_id,cat_id FROM lie_goods_attr_fields%d WHERE \ sql = "SELECT attr_name,attr_value,attr_id,cat_id FROM lie_goods_attr_fields%d WHERE \
attr_value != '' AND attr_value != '-' LIMIT 10000" % (idx) attr_value != '' AND attr_value != '-'" % (idx)
return DBHandler.read(bg_conn, sql) return DBHandler.read(bg_conn, sql)
@staticmethod
def get_dgk_goods_by_gid(idx, gid, bg_conn):
sql = "SELECT attr_name,attr_value,attr_id,cat_id FROM lie_goods_attr_fields%d WHERE attr_value != '' \
AND attr_value != '-' AND goods_id = %d" \
% (idx, gid)
data = [{'attr_name': row[0], 'attr_value': row[1], 'attr_id': row[2], 'cat_id': row[3]} for row in
DBHandler.read(bg_conn, sql)]
return data
@staticmethod
def get_all_dgk_goods(idx, bg_conn):
sql = "SELECT attr_name,attr_value,attr_id,cat_id,goods_id FROM lie_goods_attr_fields%d \
WHERE attr_value != '' AND attr_value != '-' " % idx
data = {}
for row in DBHandler.read(bg_conn, sql):
goods_id = row[4]
if goods_id not in data:
data[goods_id] = [{'attr_name': row[0], 'attr_value': row[1], 'attr_id': row[2], 'cat_id': row[3]}]
else:
data[goods_id].append({'attr_name': row[0], 'attr_value': row[1], 'attr_id': row[2], 'cat_id': row[3]})
return data
""" """
写入attr value 写入attr value
""" """
...@@ -169,3 +204,8 @@ class ParamData(object): ...@@ -169,3 +204,8 @@ class ParamData(object):
('%s','%s','%s','%s','%s','%s','%s')" % \ ('%s','%s','%s','%s','%s','%s','%s')" % \
(attr_id, value, attr_unit_id, status, remark, add_time, update_time) (attr_id, value, attr_unit_id, status, remark, add_time, update_time)
DBHandler.insert(class_conn, sql) DBHandler.insert(class_conn, sql)
@staticmethod
def get_sku_data(idx, sku_conn):
sql = "SELECT old_goods_id,spu_id,goods_id FROM lie_sku_%d WHERE supplier_id = 7" % idx
return DBHandler.read(sku_conn, sql)
\ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import threading import threading
class PARAM_THREAD: class ParamThread:
@staticmethod @staticmethod
def thread_func_ten(func, data): def thread_func_ten(func, data):
......
...@@ -78,12 +78,12 @@ class ParamTrans: ...@@ -78,12 +78,12 @@ class ParamTrans:
@classmethod @classmethod
def base_trans(cls, trans_rule, key): def base_trans(cls, trans_rule, key):
if key == '-' or key == '': if key == '-' or key == '':
return 0, 0 return 0, ''
if str(key).startswith("."): if str(key).startswith("."):
key = "0" + str(key) key = "0" + str(key)
obj = re.match(trans_rule, key.replace(" ", "")) obj = re.match(trans_rule, key.replace(" ", ""))
value = obj.group(1) if obj else 0 value = obj.group(1) if obj else ''
unit = obj.group(2) if obj else 0 unit = obj.group(2) if obj else ''
return value, unit return value, unit
@classmethod @classmethod
...@@ -157,7 +157,7 @@ class ParamTrans: ...@@ -157,7 +157,7 @@ class ParamTrans:
:return: :return:
""" """
if key == '-' or key == '': if key == '-' or key == '':
return 0, 0 return 0, ''
for ct in cast_map: for ct in cast_map:
if str(key).find(ct) != -1: if str(key).find(ct) != -1:
key = str(key).replace(ct, cast_map[ct]) key = str(key).replace(ct, cast_map[ct])
...@@ -181,11 +181,11 @@ class ParamTrans: ...@@ -181,11 +181,11 @@ class ParamTrans:
:return: :return:
""" """
if key == '-' or key == '': if key == '-' or key == '':
return 0, 0 return 0, ''
key = str(key).split(",")[0] key = str(key).split(",")[0]
for encap in encap_list: for encap in encap_list:
if str(key).find(encap) != -1: if str(key).find(encap) != -1:
return encap, 0 return encap, ''
return key, '' return key, ''
@classmethod @classmethod
...@@ -196,13 +196,13 @@ class ParamTrans: ...@@ -196,13 +196,13 @@ class ParamTrans:
:return: :return:
""" """
if key == '-' or key == '': if key == '-' or key == '':
return 0, 0 return 0, ''
for sep_temp in sep_temp_map: for sep_temp in sep_temp_map:
if str(key).find(sep_temp) != -1: if str(key).find(sep_temp) != -1:
return "C0G", 0 return "C0G", ''
for temp in temp_map: for temp in temp_map:
if str(key).find(temp) != -1: if str(key).find(temp) != -1:
return temp, 0 return temp, ''
return key, '' return key, ''
@classmethod @classmethod
......
# _*_ coding:utf-8 _*_ # _*_ coding:utf-8 _*_
from param.param_extract import PARAM_EXTRACT
from param.spu_extract import SPU_EXTRACT from param_task.db_attr_value_all import DbAttrValueAll
from param.param_class_redis import PARAM_CLASS_REDIS
from upload.param_mongo import PARAM_MONGO
if __name__ == '__main__': if __name__ == '__main__':
# 更新 spu表-attr_values字段 # 更新 spu表-attr_values字段
extract = SPU_EXTRACT() # extract = SPU_EXTRACT()
extract.run() # extract.run()
# 更新 spu表-class_id字段 # 更新 spu表-class_id字段
# param_extract = PARAM_EXTRACT() # param_extract = PARAM_EXTRACT()
...@@ -20,3 +18,6 @@ if __name__ == '__main__': ...@@ -20,3 +18,6 @@ if __name__ == '__main__':
# 更新mongo # 更新mongo
# param_mongo = PARAM_MONGO() # param_mongo = PARAM_MONGO()
# param_mongo.run() # param_mongo.run()
db_value = DbAttrValueAll()
db_value.run()
\ No newline at end of file
# -*- coding: utf-8 -*-
# !/usr/bin/env python
from fun.param_trans import ParamTrans
from fun.param_thread import ParamThread
from param_task.task import Task
from param_task.value_task_all import ValueTaskAll
from utils.log_handler import LogHandler
import json
class DbAttrValueAll(Task):
def __init__(self):
super().__init__()
self.value_at = ValueTaskAll()
self.unit_value = self.param_data.get_unit_value(self.class_db)
self.cls_map = self.param_data.class_mapping(self.class_db)
self.class_list, self.param_dict = self.param_data.load_excel("/data2/param_data/dgk.xlsx")
self.dgk_dict = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: []}
self.log1 = LogHandler('data_attr_value_summary', stream=True, file=True)
self.log2 = LogHandler('data_attr_value', stream=False, file=True)
"""
初始化dgk数据
"""
def init_data(self):
self.log1.info("spu_data : start")
# 遍历SKU数据
for i in range(0, 10):
self.log1.info("spu_data Database: {i}".format(i=i))
# 获取sku连接
conn = self.param_conn.online_sku_conn(i)
for j in range(0, 10):
data = self.param_data.get_sku_data(j, conn)
# self.log1.info("data Table: {table} length: {length}".format(table=j, length=len(data)))
# 遍历数据
for row in data:
old_goods_id = row[0]
spu_id = row[1]
goods_id = row[2]
param_index = int(str(old_goods_id)[-1])
self.dgk_dict[param_index].append(
{'spu_id': spu_id, 'old_goods_id': old_goods_id, 'goods_id': goods_id})
"""
读取所有spu数据
"""
def get_spu_data(self, dgk_data):
"""
获取SPU数据
:return:
"""
count = 0
bigdata_db = self.DBConn.db_bigdata()
class_db = self.DBConn.db_class()
spu_db = self.DBConn.db_spu()
print(len(dgk_data))
# 遍历SKU数据
for row in dgk_data:
count += 1
old_goods_id = row['old_goods_id']
spu_id = row['spu_id']
goods_id = row['goods_id']
param_index = int(str(old_goods_id)[-1])
try:
dgk_goods = self.param_data.get_dgk_goods_by_gid(param_index, old_goods_id, bigdata_db)
if dgk_goods:
dgk_list = list()
for goods in dgk_goods:
field_cat_id = goods['cat_id']
field_attr_id = goods['attr_id']
field_attr_name = goods['attr_name']
field_attr_value = goods['attr_value']
if self.cls_map[field_cat_id].get(field_attr_name):
write_attr_id = self.cls_map[field_cat_id][field_attr_name]
# 是否解析
if field_attr_id in self.value_at.attrs_id_effect:
value, unit = getattr(ParamTrans, self.param_dict[str(field_attr_id)])(field_attr_value)
elif field_attr_id in self.value_at.attrs_id:
value = field_attr_value
unit = ''
else:
continue
if value != 0 and self.unit_value.get(unit):
attr_unit_id = self.unit_value[unit]
value_exist = self.param_data.get_class_attr_value(write_attr_id, value,
attr_unit_id, class_db)
# TODO 临时插入
if len(value_exist) > 0:
attr_value_id = value_exist[0][0]
# attr_id value_id unit_id
dgk_list.append("{attr_id},{value_id},{unit_id}".format(attr_id=write_attr_id,
value_id=attr_value_id,
unit_id=attr_unit_id))
# else:
# attr_value_id = self.param_data.insert_dgk_goods(write_attr_id, value,
# attr_unit_id,
# class_db)
# print('insert', attr_value_id, write_attr_id, value)
# dgk_list.append({'attr_id': write_attr_id, 'attr_value_id': attr_value_id,
# 'attr_unit_id': attr_unit_id})
# 长度过长退出
if len(dgk_list) > 20:
break
if len(dgk_list) > 0:
self.param_data.update_spu(int(spu_id), json.dumps(dgk_list), spu_db,
self.log2)
if count % 3000 == 0:
self.log1.info("INDEX: {INDEX}, COUNT: {COUNT}".format(INDEX=param_index, COUNT=count))
except:
self.log1.info("INDEX: {INDEX}, COUNT: {COUNT}".format(INDEX=param_index, COUNT=count))
def run(self):
self.init_data()
self.value_at.load_attr()
ParamThread.thread_func_ten(self.get_spu_data, self.dgk_dict)
# self.get_spu_data(self.dgk_dict[0])
if __name__ == '__main__':
all = DbAttrValueAll()
all.run()
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# !/usr/bin/env python # !/usr/bin/env python
from utils.date_handler import DateHandler from utils.date_handler import DateHandler
from utils.log_handler import LogHandler
from fun.param_data import ParamData from fun.param_data import ParamData
from fun.param_trans import ParamTrans from fun.param_trans import ParamTrans
from fun.param_conn import ParamConn from fun.param_conn import ParamConn
...@@ -10,9 +11,12 @@ from config.db_config import DBConn ...@@ -10,9 +11,12 @@ from config.db_config import DBConn
class Task: class Task:
def __init__(self): def __init__(self):
self.DBConn = DBConn
self.now = DateHandler.now_datetime() self.now = DateHandler.now_datetime()
self.param_data = ParamData self.param_data = ParamData
self.param_conn = ParamConn
self.param_trans = ParamTrans() self.param_trans = ParamTrans()
self.class_db = DBConn.db_class() self.class_db = DBConn.db_class()
self.bigdata_db = DBConn.db_bigdata() self.bigdata_db = DBConn.db_bigdata()
self.spu_db = DBConn.db_spu()
self.redis = ParamConn.local_redis_conn() self.redis = ParamConn.local_redis_conn()
\ No newline at end of file
...@@ -10,6 +10,7 @@ class ValueTaskAll(Task): ...@@ -10,6 +10,7 @@ class ValueTaskAll(Task):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.attrs_id = [] self.attrs_id = []
self.attrs_id_effect = []
self.log1 = LogHandler('value_task_all_info', stream=True, file=True) self.log1 = LogHandler('value_task_all_info', stream=True, file=True)
""" """
...@@ -18,15 +19,11 @@ class ValueTaskAll(Task): ...@@ -18,15 +19,11 @@ class ValueTaskAll(Task):
def load_attr(self): def load_attr(self):
self.log1.info("加载参数") self.log1.info("加载参数")
# 有效参数 # 有效参数
effect_attrs = [] effect = ExcelHandler.read_to_excel("/data2/param_data/dgk.xlsx", "Sheet1", 1)
effect_1 = ExcelHandler.read_to_excel("C:\\data\\lx\\dgk_1.xlsx", "Sheet1", 1) effect.pop(0)
effect_1.pop(0) [self.attrs_id_effect.append(int(row[0])) for row in effect]
[effect_attrs.append(int(row[0])) for row in effect_1]
effect_2 = ExcelHandler.read_to_excel("C:\\data\\lx\\dgk_2.xlsx", "Sheet1", 1)
effect_2.pop(0)
[effect_attrs.append(int(row[4])) for row in effect_2]
# 全部参数 # 全部参数
all_data = ExcelHandler.read_to_excel("C:\\data\\lx\\dgk_all.xlsx", "Sheet1", 1) all_data = ExcelHandler.read_to_excel("/data2/param_data/dgk_all.xlsx", "Sheet1", 1)
all_data.pop(0) all_data.pop(0)
for row in all_data: for row in all_data:
# 参数id、二级分类id、基础参数名、猎芯参数名 # 参数id、二级分类id、基础参数名、猎芯参数名
...@@ -35,7 +32,7 @@ class ValueTaskAll(Task): ...@@ -35,7 +32,7 @@ class ValueTaskAll(Task):
basic_attr_name = row[5] basic_attr_name = row[5]
is_important = row[6] is_important = row[6]
# 非重要参数 + 非有效参数 # 非重要参数 + 非有效参数
if is_important == 1 and param_id not in effect_attrs: if is_important == 1 and param_id not in self.attrs_id_effect:
class_id = self.param_data.get_class_id(second_classify_id, self.class_db) class_id = self.param_data.get_class_id(second_classify_id, self.class_db)
unit_id = self.param_trans.empty_unit_id unit_id = self.param_trans.empty_unit_id
self.attrs_id.append(param_id) self.attrs_id.append(param_id)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment