Commit 00d16687 by lichenggang

优化封装模型

parent 43e1a07d
......@@ -170,7 +170,7 @@ class DicPredict(BasePredictor):
for col_diffrate in sort_li_diffrate[1:]:
temp_pre_model_res.pop(col_diffrate[0])
# 若有多个参数列且没有封装列, 就先进行封装列的提取(封装率需要大于0), 再进行参数特征的数量比较, 特征最多的选为目标列
# 若有多个参数列且没有封装列, 就先进行封装列的提取(封装率需要大于0.3), 再进行参数特征的数量比较, 特征最多的选为目标列
prob_param_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '参数']
self.info.info(f'可能的参数列有{str(prob_param_cols)}')
if len(prob_param_cols) >= 2:
......@@ -178,7 +178,7 @@ class DicPredict(BasePredictor):
li_encap_rate = [(col, BasePredictor.get_encap_rate(not_null_dic_data[col])) for col in
prob_param_cols]
sort_li_encaprate = sorted(li_encap_rate, key=lambda x: x[1], reverse=True)
if sort_li_encaprate[0][1] >= 0:
if sort_li_encaprate[0][1] > 0.3:
temp_pre_model_res[sort_li_encaprate[0][0]] = '封装'
prob_param_cols.remove(sort_li_encaprate[0][0])
......@@ -230,11 +230,11 @@ class DicPredict(BasePredictor):
def predict(self, dic_data, predict_type='all'):
self.info.info('预测类型为: %s, 接收数据: %s' % (predict_type, str(dic_data)))
dic_data = self.pre_deal(dic_data)
dealed_dic_data = self.pre_deal(dic_data)
if predict_type == 'all':
head_id_res = self.head_predict(dic_data)
model_id_res = self.model_predict(dic_data)
model_id_res = self.model_predict(dealed_dic_data)
self.info.info('表头预测结果: ' + str(head_id_res))
self.info.info('模型预测结果: ' + str(model_id_res))
......@@ -259,7 +259,7 @@ class DicPredict(BasePredictor):
return res
elif predict_type == 'model':
model_id_res = self.model_predict(dic_data)
model_id_res = self.model_predict(dealed_dic_data)
if model_id_res:
return model_id_res
......@@ -269,9 +269,12 @@ class DicPredict(BasePredictor):
return head_id_res
def pre_deal(self, dic_data):
"""
模型预测不预测空置率大于0.8的列
"""
new_dic_data = {}
for k, v in dic_data.items():
# 去掉空置率大于等于0.8的列
# 去掉空置率大于0.8的列
counter = 0
for item in v:
if str(item).strip() == BasePredictor.PLACEHOLDER:
......
......@@ -4,7 +4,7 @@
li_category = ["类别", "分类", "名称", "类别名称", "类型", "产品分类"]
li_param = ["参数", "规格", "描述", "值", "description"]
li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件", "规格型号"]
li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "单板数量", "采购数量"]
li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "单板数量", "采购数量", "单机用量", "单机数量", "单板用量"]
li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"]
li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", "packagereference"]
li_position = ["位号", "位置", "标号", "点位"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment