位号设置为标准字段

2b38709c · lichenggang · 824e334e · 2b38709c · 2b38709c
Commit 2b38709c authored Jun 16, 2020 by lichenggang
Showing with 11 additions and 8 deletions
predict/dict_predict.py
static_config.py
--- a/predict/dict_predict.py
+++ b/predict/dict_predict.py
@@ -25,7 +25,7 @@ HEAD_ROW = 7
 # 空置率阈值
 NAN_RATE = 0.8
 # 参数列空置率阈值
-PARAM_NAN_RATE = 0.3
+PARAM_NAN_RATE = 0.6

 class DicPredict(BasePredictor):

@@ -122,7 +122,7 @@ class DicPredict(BasePredictor):
                temp_pre_model_res[k] = '数量'
                continue
            if self.is_ref(no_null_v):
-                ab_result[k] = '位号'
+                temp_pre_model_res[k] = '位号'
                continue
            if self.is_pcs(no_null_v):
                ab_result[k] = '单位'
@@ -134,7 +134,7 @@ class DicPredict(BasePredictor):
            if self.is_catecol(no_null_v):
                temp_pre_model_res[k] = '类别'

-        # 对列元素进行去重并处理掉占位符
+        # 对列元素进行[去重!]并处理掉占位符,去重是为了防止某个单独的元素重复许多次且同时又被单项预测错误导致整列预测错误
        set_not_null_dic_data = {k: set(list(filter(lambda x: x != BasePredictor.PLACEHOLDER, dic_data[k]))) for k in
                                 prob_columns}
        for k, v in set_not_null_dic_data.items():
@@ -151,7 +151,7 @@ class DicPredict(BasePredictor):
                continue
            # 参数列单独要求空置率不能高于PARAM_NAN_RATE
            if result == 'param':
-                if BasePredictor.get_nan_rate(v) >= PARAM_NAN_RATE:
+                if BasePredictor.get_nan_rate(dic_data[k]) >= PARAM_NAN_RATE:
                    continue
            temp_pre_model_res[k] = EN_TO_ZH_MAP[result]


--- a/static_config.py
+++ b/static_config.py
@@ -7,6 +7,7 @@ li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件",
 li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "需求用量", "单板数量", "采购数量"]
 li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"]
 li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", 'packagereference']
+li_position = ["位号", "位置", "标号", "点位"]

 li_category.extend(['*' + i for i in li_category])
 li_param.extend(['*' + i for i in li_param])
@@ -14,11 +15,12 @@ li_gn.extend(['*' + i for i in li_gn])
 li_num.extend(['*' + i for i in li_num])
 li_brand.extend(['*' + i for i in li_brand])
 li_encap.extend(['*' + i for i in li_encap])
+li_position.extend(['*' + i for i in li_position])

-PROB_FIELDS = ["序号", "位号", "a面位置", "位置", "b面位置", "备注", "售价", "item", "top面", "bottom面", "designator", "remark", "标号"]
+PROB_FIELDS = ["序号", "a面位置", "b面位置", "备注", "售价", "item", "top面", "bottom面", "designator", "remark"]
 AB_FIELDS = PROB_FIELDS + ['*' + i for i in PROB_FIELDS]
 # 可能的头部字段
-ALL_FIELDS = AB_FIELDS + li_category + li_param + li_gn + li_num + li_brand + li_encap
+ALL_FIELDS = AB_FIELDS + li_category + li_param + li_gn + li_num + li_brand + li_encap + li_position

 STD_FIELDS_MAP = {
    "类别": li_category,
@@ -26,14 +28,15 @@ STD_FIELDS_MAP = {
    "型号": li_gn,
    "数量": li_num,
    "品牌": li_brand,
-    "封装": li_encap
+    "封装": li_encap,
+    "位号": li_position
 }

 # 必须返回也必须验证的标准字段
 MUST_STD_FIELDS = ['参数', '数量']

 # 参数名和中文的映射
-EN_TO_ZH_MAP = {'brand_name': '品牌', 'param': '参数', 'goods_name': '型号', 'other': '其他', 'encap': '封装'}
+EN_TO_ZH_MAP = {'brand_name': '品牌', 'param': '参数', 'goods_name': '型号', 'other': '其他', 'encap': '封装', 'position': '位号'}

 # 类别合集 从learning_data.lie_category导入, 并添加了部分短英文Category
 CATEGORY = ["半导体", "嵌入式", "光电子", "光源", "无源", "连接器", "断路器", "指示灯", "声源", "接触器", "铁氧芯", "冷热系统", "电源", "电线", "机械",