Commit a9c6c7dd by lichenggang

品牌列增加空置率比较

parent aa3481ef
...@@ -184,7 +184,7 @@ class BasePredictor: ...@@ -184,7 +184,7 @@ class BasePredictor:
PCS_LEVEL = 0.5 PCS_LEVEL = 0.5
count = 0 count = 0
for i in data: for i in data:
if str(i).lower().strip() == 'pcs': if str(i).lower().strip() in ['pcs', 'pc']:
count += 1 count += 1
return round(count / len(data), 3) >= PCS_LEVEL or False return round(count / len(data), 3) >= PCS_LEVEL or False
......
...@@ -153,6 +153,20 @@ class DicPredict(BasePredictor): ...@@ -153,6 +153,20 @@ class DicPredict(BasePredictor):
for col_nullrate in sort_li_nullrate[1:]: for col_nullrate in sort_li_nullrate[1:]:
temp_pre_model_res.pop(col_nullrate[0]) temp_pre_model_res.pop(col_nullrate[0])
# 若有多个品牌列,则进行空置率的比较, 空置率最低的选为目标列
prob_brand_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '品牌']
if len(prob_brand_cols) >= 2:
li_nullrate = []
for prob_brand_col in prob_brand_cols:
counter = 0
for item in dic_data[prob_brand_col]:
if str(item).strip() == self.PLACEHOLDER:
counter += 1
li_nullrate.append((prob_brand_col, counter / len(dic_data[prob_brand_col])))
sort_li_nullrate = sorted(li_nullrate, key=lambda x: x[1])
for col_nullrate in sort_li_nullrate[1:]:
temp_pre_model_res.pop(col_nullrate[0])
# 如果没有数量列则把之前的序号列(其实是数字列)当成数字列 # 如果没有数量列则把之前的序号列(其实是数字列)当成数字列
if not prob_num_cols: if not prob_num_cols:
num_col = None num_col = None
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# 可能的头部字段 # 可能的头部字段
PROB_FIELDS = ["序号", "名称", "规格", "mpn", "用量(pcs)", "用量", "pcs", "位号", "描述", "值", "数量", "封装", "类别", "a面位置", "b面位置", "备注", PROB_FIELDS = ["序号", "名称", "规格", "mpn", "用量(pcs)", "用量", "pcs", "位号", "描述", "值", "数量", "封装", "类别", "a面位置", "b面位置", "备注",
"需求数量", "售价", "封装", "封装规格", '型号', '参数', '数量', '品牌', '型号', '类型', "quantity", "qty", "buy qty", "需求数量", "售价", "封装", "封装规格", '型号', '参数', '数量', '品牌', '型号', '类型', "quantity", "qty", "buy qty",
"buy quantity", "需求用量" "buy quantity", "需求用量","manufacturer制造商", "manufacturer"
"参考品牌", "品牌", "item", "厂商编码", "品牌/厂商", "参考料号", "参考供应商", "top面", "bottom面", "designator", "remark", "元器件", "参考品牌", "品牌", "item", "厂商编码", "品牌/厂商", "参考料号", "参考供应商", "top面", "bottom面", "designator", "remark", "元器件",
"标号"] "标号"]
PROB_FIELDS_1 = ['*' + i for i in PROB_FIELDS] PROB_FIELDS_1 = ['*' + i for i in PROB_FIELDS]
...@@ -15,7 +15,7 @@ STD_FIELDS_MAP = { ...@@ -15,7 +15,7 @@ STD_FIELDS_MAP = {
"参数": ["参数", "*参数", "规格", "描述", "值"], "参数": ["参数", "*参数", "规格", "描述", "值"],
"型号": ["型号", "*型号", "参考料号", "料号", "mpn", "厂商编码", "元器件"], "型号": ["型号", "*型号", "参考料号", "料号", "mpn", "厂商编码", "元器件"],
"数量": ["数量", "*数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "需求用量"], "数量": ["数量", "*数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "需求用量"],
"品牌": ["品牌", "*品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商"]} "品牌": ["品牌", "*品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer"]}
# 必须返回也必须验证的标准字段 # 必须返回也必须验证的标准字段
MUST_STD_FIELDS = ['参数', '数量'] MUST_STD_FIELDS = ['参数', '数量']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment