Commit 2b38709c by lichenggang

位号设置为标准字段

parent 824e334e
Showing with 11 additions and 8 deletions
...@@ -25,7 +25,7 @@ HEAD_ROW = 7 ...@@ -25,7 +25,7 @@ HEAD_ROW = 7
# 空置率阈值 # 空置率阈值
NAN_RATE = 0.8 NAN_RATE = 0.8
# 参数列空置率阈值 # 参数列空置率阈值
PARAM_NAN_RATE = 0.3 PARAM_NAN_RATE = 0.6
class DicPredict(BasePredictor): class DicPredict(BasePredictor):
...@@ -122,7 +122,7 @@ class DicPredict(BasePredictor): ...@@ -122,7 +122,7 @@ class DicPredict(BasePredictor):
temp_pre_model_res[k] = '数量' temp_pre_model_res[k] = '数量'
continue continue
if self.is_ref(no_null_v): if self.is_ref(no_null_v):
ab_result[k] = '位号' temp_pre_model_res[k] = '位号'
continue continue
if self.is_pcs(no_null_v): if self.is_pcs(no_null_v):
ab_result[k] = '单位' ab_result[k] = '单位'
...@@ -134,7 +134,7 @@ class DicPredict(BasePredictor): ...@@ -134,7 +134,7 @@ class DicPredict(BasePredictor):
if self.is_catecol(no_null_v): if self.is_catecol(no_null_v):
temp_pre_model_res[k] = '类别' temp_pre_model_res[k] = '类别'
# 对列元素进行去重并处理掉占位符 # 对列元素进行[去重!]并处理掉占位符,去重是为了防止某个单独的元素重复许多次且同时又被单项预测错误导致整列预测错误
set_not_null_dic_data = {k: set(list(filter(lambda x: x != BasePredictor.PLACEHOLDER, dic_data[k]))) for k in set_not_null_dic_data = {k: set(list(filter(lambda x: x != BasePredictor.PLACEHOLDER, dic_data[k]))) for k in
prob_columns} prob_columns}
for k, v in set_not_null_dic_data.items(): for k, v in set_not_null_dic_data.items():
...@@ -151,7 +151,7 @@ class DicPredict(BasePredictor): ...@@ -151,7 +151,7 @@ class DicPredict(BasePredictor):
continue continue
# 参数列单独要求空置率不能高于PARAM_NAN_RATE # 参数列单独要求空置率不能高于PARAM_NAN_RATE
if result == 'param': if result == 'param':
if BasePredictor.get_nan_rate(v) >= PARAM_NAN_RATE: if BasePredictor.get_nan_rate(dic_data[k]) >= PARAM_NAN_RATE:
continue continue
temp_pre_model_res[k] = EN_TO_ZH_MAP[result] temp_pre_model_res[k] = EN_TO_ZH_MAP[result]
......
...@@ -7,6 +7,7 @@ li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件", ...@@ -7,6 +7,7 @@ li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件",
li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "需求用量", "单板数量", "采购数量"] li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "需求用量", "单板数量", "采购数量"]
li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"] li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"]
li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", 'packagereference'] li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", 'packagereference']
li_position = ["位号", "位置", "标号", "点位"]
li_category.extend(['*' + i for i in li_category]) li_category.extend(['*' + i for i in li_category])
li_param.extend(['*' + i for i in li_param]) li_param.extend(['*' + i for i in li_param])
...@@ -14,11 +15,12 @@ li_gn.extend(['*' + i for i in li_gn]) ...@@ -14,11 +15,12 @@ li_gn.extend(['*' + i for i in li_gn])
li_num.extend(['*' + i for i in li_num]) li_num.extend(['*' + i for i in li_num])
li_brand.extend(['*' + i for i in li_brand]) li_brand.extend(['*' + i for i in li_brand])
li_encap.extend(['*' + i for i in li_encap]) li_encap.extend(['*' + i for i in li_encap])
li_position.extend(['*' + i for i in li_position])
PROB_FIELDS = ["序号", "位号", "a面位置", "位置", "b面位置", "备注", "售价", "item", "top面", "bottom面", "designator", "remark", "标号"] PROB_FIELDS = ["序号", "a面位置", "b面位置", "备注", "售价", "item", "top面", "bottom面", "designator", "remark"]
AB_FIELDS = PROB_FIELDS + ['*' + i for i in PROB_FIELDS] AB_FIELDS = PROB_FIELDS + ['*' + i for i in PROB_FIELDS]
# 可能的头部字段 # 可能的头部字段
ALL_FIELDS = AB_FIELDS + li_category + li_param + li_gn + li_num + li_brand + li_encap ALL_FIELDS = AB_FIELDS + li_category + li_param + li_gn + li_num + li_brand + li_encap + li_position
STD_FIELDS_MAP = { STD_FIELDS_MAP = {
"类别": li_category, "类别": li_category,
...@@ -26,14 +28,15 @@ STD_FIELDS_MAP = { ...@@ -26,14 +28,15 @@ STD_FIELDS_MAP = {
"型号": li_gn, "型号": li_gn,
"数量": li_num, "数量": li_num,
"品牌": li_brand, "品牌": li_brand,
"封装": li_encap "封装": li_encap,
"位号": li_position
} }
# 必须返回也必须验证的标准字段 # 必须返回也必须验证的标准字段
MUST_STD_FIELDS = ['参数', '数量'] MUST_STD_FIELDS = ['参数', '数量']
# 参数名和中文的映射 # 参数名和中文的映射
EN_TO_ZH_MAP = {'brand_name': '品牌', 'param': '参数', 'goods_name': '型号', 'other': '其他', 'encap': '封装'} EN_TO_ZH_MAP = {'brand_name': '品牌', 'param': '参数', 'goods_name': '型号', 'other': '其他', 'encap': '封装', 'position': '位号'}
# 类别合集 从learning_data.lie_category导入, 并添加了部分短英文Category # 类别合集 从learning_data.lie_category导入, 并添加了部分短英文Category
CATEGORY = ["半导体", "嵌入式", "光电子", "光源", "无源", "连接器", "断路器", "指示灯", "声源", "接触器", "铁氧芯", "冷热系统", "电源", "电线", "机械", CATEGORY = ["半导体", "嵌入式", "光电子", "光源", "无源", "连接器", "断路器", "指示灯", "声源", "接触器", "铁氧芯", "冷热系统", "电源", "电线", "机械",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment