Commit 00d16687 by lichenggang

优化封装模型

parent 43e1a07d
...@@ -170,7 +170,7 @@ class DicPredict(BasePredictor): ...@@ -170,7 +170,7 @@ class DicPredict(BasePredictor):
for col_diffrate in sort_li_diffrate[1:]: for col_diffrate in sort_li_diffrate[1:]:
temp_pre_model_res.pop(col_diffrate[0]) temp_pre_model_res.pop(col_diffrate[0])
# 若有多个参数列且没有封装列, 就先进行封装列的提取(封装率需要大于0), 再进行参数特征的数量比较, 特征最多的选为目标列 # 若有多个参数列且没有封装列, 就先进行封装列的提取(封装率需要大于0.3), 再进行参数特征的数量比较, 特征最多的选为目标列
prob_param_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '参数'] prob_param_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '参数']
self.info.info(f'可能的参数列有{str(prob_param_cols)}') self.info.info(f'可能的参数列有{str(prob_param_cols)}')
if len(prob_param_cols) >= 2: if len(prob_param_cols) >= 2:
...@@ -178,7 +178,7 @@ class DicPredict(BasePredictor): ...@@ -178,7 +178,7 @@ class DicPredict(BasePredictor):
li_encap_rate = [(col, BasePredictor.get_encap_rate(not_null_dic_data[col])) for col in li_encap_rate = [(col, BasePredictor.get_encap_rate(not_null_dic_data[col])) for col in
prob_param_cols] prob_param_cols]
sort_li_encaprate = sorted(li_encap_rate, key=lambda x: x[1], reverse=True) sort_li_encaprate = sorted(li_encap_rate, key=lambda x: x[1], reverse=True)
if sort_li_encaprate[0][1] >= 0: if sort_li_encaprate[0][1] > 0.3:
temp_pre_model_res[sort_li_encaprate[0][0]] = '封装' temp_pre_model_res[sort_li_encaprate[0][0]] = '封装'
prob_param_cols.remove(sort_li_encaprate[0][0]) prob_param_cols.remove(sort_li_encaprate[0][0])
...@@ -230,11 +230,11 @@ class DicPredict(BasePredictor): ...@@ -230,11 +230,11 @@ class DicPredict(BasePredictor):
def predict(self, dic_data, predict_type='all'): def predict(self, dic_data, predict_type='all'):
self.info.info('预测类型为: %s, 接收数据: %s' % (predict_type, str(dic_data))) self.info.info('预测类型为: %s, 接收数据: %s' % (predict_type, str(dic_data)))
dic_data = self.pre_deal(dic_data) dealed_dic_data = self.pre_deal(dic_data)
if predict_type == 'all': if predict_type == 'all':
head_id_res = self.head_predict(dic_data) head_id_res = self.head_predict(dic_data)
model_id_res = self.model_predict(dic_data) model_id_res = self.model_predict(dealed_dic_data)
self.info.info('表头预测结果: ' + str(head_id_res)) self.info.info('表头预测结果: ' + str(head_id_res))
self.info.info('模型预测结果: ' + str(model_id_res)) self.info.info('模型预测结果: ' + str(model_id_res))
...@@ -259,7 +259,7 @@ class DicPredict(BasePredictor): ...@@ -259,7 +259,7 @@ class DicPredict(BasePredictor):
return res return res
elif predict_type == 'model': elif predict_type == 'model':
model_id_res = self.model_predict(dic_data) model_id_res = self.model_predict(dealed_dic_data)
if model_id_res: if model_id_res:
return model_id_res return model_id_res
...@@ -269,9 +269,12 @@ class DicPredict(BasePredictor): ...@@ -269,9 +269,12 @@ class DicPredict(BasePredictor):
return head_id_res return head_id_res
def pre_deal(self, dic_data): def pre_deal(self, dic_data):
"""
模型预测不预测空置率大于0.8的列
"""
new_dic_data = {} new_dic_data = {}
for k, v in dic_data.items(): for k, v in dic_data.items():
# 去掉空置率大于等于0.8的列 # 去掉空置率大于0.8的列
counter = 0 counter = 0
for item in v: for item in v:
if str(item).strip() == BasePredictor.PLACEHOLDER: if str(item).strip() == BasePredictor.PLACEHOLDER:
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
li_category = ["类别", "分类", "名称", "类别名称", "类型", "产品分类"] li_category = ["类别", "分类", "名称", "类别名称", "类型", "产品分类"]
li_param = ["参数", "规格", "描述", "值", "description"] li_param = ["参数", "规格", "描述", "值", "description"]
li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件", "规格型号"] li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件", "规格型号"]
li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "单板数量", "采购数量"] li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "单板数量", "采购数量", "单机用量", "单机数量", "单板用量"]
li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"] li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"]
li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", "packagereference"] li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", "packagereference"]
li_position = ["位号", "位置", "标号", "点位"] li_position = ["位号", "位置", "标号", "点位"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment