Commit 43e1a07d by lichenggang

static_config优化, 封装先进行简单识别

parent 2b38709c
...@@ -21,6 +21,10 @@ MULTI_SAME_LEVEL = 3 ...@@ -21,6 +21,10 @@ MULTI_SAME_LEVEL = 3
# 位号列阈值 # 位号列阈值
REF_LEVEL = 0.5 REF_LEVEL = 0.5
# 简单封装率阈值
SIMPLE_ENCAP_LEVEL = 0.2
class ClassBasePredictorMeta(type): class ClassBasePredictorMeta(type):
def __init__(cls, name, bases, attrs): def __init__(cls, name, bases, attrs):
super(ClassBasePredictorMeta, cls).__init__(name, bases, attrs) super(ClassBasePredictorMeta, cls).__init__(name, bases, attrs)
...@@ -196,7 +200,21 @@ class BasePredictor(metaclass=ClassBasePredictorMeta): ...@@ -196,7 +200,21 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
""" """
位号列预测 位号列预测
""" """
# pat = re.compile(r'[CJRULX][1-9]{1,3}') # pat = re.compile(r'((?<!.)1?[0-9]?[CJRULXKPD][1-9]{1,3}(?![^,,/  ]))[,,/  ]?(.*)')
# # pat_res = pat.findall(s)
# # if pat_res:
# # first, other = pat_res[0]
# # if first and not other:
# # print(True)
# # elif first and other:
# # pat_res2 = pat.findall(s)
# # if pat_res2:
# # print(True)
# # else:
# # print(False)
# # else:
# # print(False)
pat = re.compile(r'(?<!.)1?[0-9]?[CJRULXKPD][1-9]{1,3}(?![^,,/  ])') pat = re.compile(r'(?<!.)1?[0-9]?[CJRULXKPD][1-9]{1,3}(?![^,,/  ])')
count = 0 count = 0
for i in data: for i in data:
...@@ -258,6 +276,15 @@ class BasePredictor(metaclass=ClassBasePredictorMeta): ...@@ -258,6 +276,15 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
count += 1 count += 1
return round(count / len(data), 3) return round(count / len(data), 3)
@classmethod
def is_encap(cls, data):
count = 0
encap_pat = re.compile(r'(?:^[CRL]?0\d0\d$|^0\d0\d[CRL]?$)', re.I)
for item in data:
if encap_pat.findall(str(item)):
count += 1
return round(count / len(data), 3) >= SIMPLE_ENCAP_LEVEL
if __name__ == "__main__": if __name__ == "__main__":
print(BasePredictor.is_ref( print(BasePredictor.is_ref(
['?', '位置', '?', '4C33', '3C5,11C8', '3C6,3C7,2C13,2C14,C283,C285,C286', '?', 'C287,C575,10C33,10C34,10C36,10C37', '4C6,4C26,4C49,4C65', '10C7,10C10,10C11,10C29', '8C4,8C5', '8C40,8C41', '1C7,5C2,5C3,10C2,1C25,1C41,2C11,2C21,2C25,2C37,2C40,4C62,8C29,8C30', '10C12', '10C26,10C35', '1C24,1C43', '10C16,10C20,10C22,10C23', '10C19,10C25', '1C1,1C6,2C1,2C3,2C4,2C6,2C7,3C1,3C2,3C4,3C8,4C2,6C6,6C8,6C9,7C6,7C9,8C7,8C8,8C9,10C4,10C5,10C6,10C9,1C10,1C13,11C4,1C15,1C16,11C6,11C7,1C19,1C22,1C28,1C37,1C44,1C57,1C59,1C63,2C15,2C17,2C29,2C33,2C36,2C38,2C39,3C13,4C12,4C13,4C14,4C15,4C16,4C18,4C19,4C20,4C22,4C23,4C24,4C25,4C28,4C30,4C32,4C34,4C41,4C44,4C45,4C46,4C48,4C51,4C54,4C55,4C57,4C58,4C59,4C60,4C61,6C14,6C16,7C13,8C11,8C17,8C21,10C14,10C17,10C18,10C21,10C28,10C30,10C32,10C42,4C111,4C118,4C121,4C122,C28,C97,C292,C293,C294,C296,C298,C299,C300,C301,C304,C571\n', '10C3,10C15', '2C2,2C9,3C3,4C4,5C6,5C7,5C8,5C9,6C7,7C3,7C7,8C3,2C16,2C18,2C22,2C23,2C26,2C27,2C31,4C17,4C47,4C56,4C63,4C93,5C10,6C46,8C10,8C16,8C18,8C23,8C25,8C31,8C32,8C47,9C21,9C28,4C100,C98,11C9,C290,C291,C295,C297,11C10', '10C8,10C24', '5C1,5C5,2C20,2C30,4C36,4C86,9C27,2C10', '2C12,8C53,9C22', '2C5,3C14,8C37,8C44,10C27', '1C9,4C1,4C7,10C1,1C11,1C14,1C17,1C18,1C20,1C21,1C53,1C58,1C60,1C62,1C64,2C34,2C35,3C15,4C11,4C29,4C31,4C42,4C64,4C82,4C83,8C15,8C42,10C31,4C157,C96,C288,C289,C302,C303,C574', '1C2,1C8,1C30,1C32,1C38,1C70', '1C4,1C5,2C8,4C3,7C4,1C12,1C23,1C26,1C29,1C31,1C34,1C42,1C48,2C32,7C11,7C14,C1,C570', '1C51,10C39,10C41', '1C66,1C71', '?', '?', '10R9,3R24,8R45,8R46', '3C10,7R6,R154,R155,2R21,3R25,7R25,9R32,9R33,9R34,9R35,9R36,9R37,9R38,9R39,9R40,9R41,9R42,9R43,9R44,9R45,9R46,9R47,9R48,9R49,9R50,9R51,9R52,9R53,10R18', 'R1,R2,R3,R4', '7R3,7R4,7R7,7R8,7R23,7R24,8R39,8R41,8R42,8R44', '1R1,1R2,1R16,1R19,1R26,1R49', '10R1,10R8,10R10,10R13', '2R7,2R8,3R3,3R4,3R5,3R6,3R7,2R10,3R20,3R33,3R34,3R35,3R36,3R37', '8R3,8R29,8R30', '6R3', '8R33,8R34,8R35,8R36', '?', '8R2,3R16', '8R37,8R38,8R40,8R43', '?', '2R4,2R5,10R2,10R3,3R17,5R29,5R30,5R32,5R33,5R38,9R13,9R14', '?', '3R15', '5R42', '4R10,4R11,4R12,4R28,4R35', '?', '10R4,10R14', '6R2', '9R5', '1R50,1R53,1R54,1R55,1R56,2R19,5R14,5R19,5R24,5R25,11R11,11R12', '5R40', '1R51', '8R1,4R15,4R16,4R23,4R24,4R33,4R34,10R22,10R30,10R40,10R41', '8R5,8R7', 'R156,R161,1R6,2R2,10R5,8R24,9R15,9R16,11R13,11R15,11R16', '5R28,5R41', '8R18', 'R158', '1R7', '8R16', '2R9', '7R1,8R4,8R6,9R1,10R6,1R21,1R42,1R57,1R58,R160,2R26,3R19,3R23,8R20,8R22,10R11,R167,9R54', '7R5,1R22,1R27,5R16', '1R43', '8R21,8R28', '10R17', '2R12', '1R31', '1R28,10R12', '1R3,5R4,5R37', '1R32', '1R5', '1R4,1R13,1R17,1R23,1R33', '1R14,1R39', '1R15', '5R2,1R12,1R20,1R44,5R11,5R12,5R13,5R17,5R18,5R35', '8R26,8R31', 'R164', '1R29', '2R3', '1R34', '5R3,1R10,1R41,1R60,5R36,11R14', 'R163', '1R18', 'R162,R165', 'R159', '5RN3,5RN4,5RN5,5RN6', '?', '?', '7FB5', '8FB1,10FB1,11FB1,FB4,FB6,FB8,8L2', '?', '10FB2,10FB3', '1L3,1L4', '?', '1L2', 'L2,3L1', 'L1', '1L5', '1L1', '1L6', '5L1,5L2', '10L1,10L2,10L3,10L4', '?', '?', 'D45', 'Q23', '1Q1,1Q8,3Q1,5Q3,5Q4,1Q5,1Q7', '1Q3', '?', '1Q6', '5Q2', '5Q5', '5D3,5D4,5D5,5D6', '5D7,5D8,5D9,7D1,7D2,7D3,7D4,7D5,8D3,8D4,11D1,11D2,5D10,5D11,5D12,5D20,5D22,5D23,5D24', '8D1', '8D2', 'T4', 'U1', 'U7,U18', '?', 'U9', 'U17', '1U2,1U3,1U10', '1U4,1U6,1U7', '2U1', '4U1,4U2,4U3,4U4', '?', '1U1,1U9', '1U5', '8U1', '2U2', '1U11', '3U2', '10U1', '3Y2', '?', '2Y1', 'Y1', '3J1', '8T1', '8JA5', 'CN13', 'J45', '?', '5JA2,5JA3', '1CN2', 'J2,J3', 'J53,J56,J58', '2CN1', 'J4', 'J1', 'J52', 'P2', 'J43'])) ['?', '位置', '?', '4C33', '3C5,11C8', '3C6,3C7,2C13,2C14,C283,C285,C286', '?', 'C287,C575,10C33,10C34,10C36,10C37', '4C6,4C26,4C49,4C65', '10C7,10C10,10C11,10C29', '8C4,8C5', '8C40,8C41', '1C7,5C2,5C3,10C2,1C25,1C41,2C11,2C21,2C25,2C37,2C40,4C62,8C29,8C30', '10C12', '10C26,10C35', '1C24,1C43', '10C16,10C20,10C22,10C23', '10C19,10C25', '1C1,1C6,2C1,2C3,2C4,2C6,2C7,3C1,3C2,3C4,3C8,4C2,6C6,6C8,6C9,7C6,7C9,8C7,8C8,8C9,10C4,10C5,10C6,10C9,1C10,1C13,11C4,1C15,1C16,11C6,11C7,1C19,1C22,1C28,1C37,1C44,1C57,1C59,1C63,2C15,2C17,2C29,2C33,2C36,2C38,2C39,3C13,4C12,4C13,4C14,4C15,4C16,4C18,4C19,4C20,4C22,4C23,4C24,4C25,4C28,4C30,4C32,4C34,4C41,4C44,4C45,4C46,4C48,4C51,4C54,4C55,4C57,4C58,4C59,4C60,4C61,6C14,6C16,7C13,8C11,8C17,8C21,10C14,10C17,10C18,10C21,10C28,10C30,10C32,10C42,4C111,4C118,4C121,4C122,C28,C97,C292,C293,C294,C296,C298,C299,C300,C301,C304,C571\n', '10C3,10C15', '2C2,2C9,3C3,4C4,5C6,5C7,5C8,5C9,6C7,7C3,7C7,8C3,2C16,2C18,2C22,2C23,2C26,2C27,2C31,4C17,4C47,4C56,4C63,4C93,5C10,6C46,8C10,8C16,8C18,8C23,8C25,8C31,8C32,8C47,9C21,9C28,4C100,C98,11C9,C290,C291,C295,C297,11C10', '10C8,10C24', '5C1,5C5,2C20,2C30,4C36,4C86,9C27,2C10', '2C12,8C53,9C22', '2C5,3C14,8C37,8C44,10C27', '1C9,4C1,4C7,10C1,1C11,1C14,1C17,1C18,1C20,1C21,1C53,1C58,1C60,1C62,1C64,2C34,2C35,3C15,4C11,4C29,4C31,4C42,4C64,4C82,4C83,8C15,8C42,10C31,4C157,C96,C288,C289,C302,C303,C574', '1C2,1C8,1C30,1C32,1C38,1C70', '1C4,1C5,2C8,4C3,7C4,1C12,1C23,1C26,1C29,1C31,1C34,1C42,1C48,2C32,7C11,7C14,C1,C570', '1C51,10C39,10C41', '1C66,1C71', '?', '?', '10R9,3R24,8R45,8R46', '3C10,7R6,R154,R155,2R21,3R25,7R25,9R32,9R33,9R34,9R35,9R36,9R37,9R38,9R39,9R40,9R41,9R42,9R43,9R44,9R45,9R46,9R47,9R48,9R49,9R50,9R51,9R52,9R53,10R18', 'R1,R2,R3,R4', '7R3,7R4,7R7,7R8,7R23,7R24,8R39,8R41,8R42,8R44', '1R1,1R2,1R16,1R19,1R26,1R49', '10R1,10R8,10R10,10R13', '2R7,2R8,3R3,3R4,3R5,3R6,3R7,2R10,3R20,3R33,3R34,3R35,3R36,3R37', '8R3,8R29,8R30', '6R3', '8R33,8R34,8R35,8R36', '?', '8R2,3R16', '8R37,8R38,8R40,8R43', '?', '2R4,2R5,10R2,10R3,3R17,5R29,5R30,5R32,5R33,5R38,9R13,9R14', '?', '3R15', '5R42', '4R10,4R11,4R12,4R28,4R35', '?', '10R4,10R14', '6R2', '9R5', '1R50,1R53,1R54,1R55,1R56,2R19,5R14,5R19,5R24,5R25,11R11,11R12', '5R40', '1R51', '8R1,4R15,4R16,4R23,4R24,4R33,4R34,10R22,10R30,10R40,10R41', '8R5,8R7', 'R156,R161,1R6,2R2,10R5,8R24,9R15,9R16,11R13,11R15,11R16', '5R28,5R41', '8R18', 'R158', '1R7', '8R16', '2R9', '7R1,8R4,8R6,9R1,10R6,1R21,1R42,1R57,1R58,R160,2R26,3R19,3R23,8R20,8R22,10R11,R167,9R54', '7R5,1R22,1R27,5R16', '1R43', '8R21,8R28', '10R17', '2R12', '1R31', '1R28,10R12', '1R3,5R4,5R37', '1R32', '1R5', '1R4,1R13,1R17,1R23,1R33', '1R14,1R39', '1R15', '5R2,1R12,1R20,1R44,5R11,5R12,5R13,5R17,5R18,5R35', '8R26,8R31', 'R164', '1R29', '2R3', '1R34', '5R3,1R10,1R41,1R60,5R36,11R14', 'R163', '1R18', 'R162,R165', 'R159', '5RN3,5RN4,5RN5,5RN6', '?', '?', '7FB5', '8FB1,10FB1,11FB1,FB4,FB6,FB8,8L2', '?', '10FB2,10FB3', '1L3,1L4', '?', '1L2', 'L2,3L1', 'L1', '1L5', '1L1', '1L6', '5L1,5L2', '10L1,10L2,10L3,10L4', '?', '?', 'D45', 'Q23', '1Q1,1Q8,3Q1,5Q3,5Q4,1Q5,1Q7', '1Q3', '?', '1Q6', '5Q2', '5Q5', '5D3,5D4,5D5,5D6', '5D7,5D8,5D9,7D1,7D2,7D3,7D4,7D5,8D3,8D4,11D1,11D2,5D10,5D11,5D12,5D20,5D22,5D23,5D24', '8D1', '8D2', 'T4', 'U1', 'U7,U18', '?', 'U9', 'U17', '1U2,1U3,1U10', '1U4,1U6,1U7', '2U1', '4U1,4U2,4U3,4U4', '?', '1U1,1U9', '1U5', '8U1', '2U2', '1U11', '3U2', '10U1', '3Y2', '?', '2Y1', 'Y1', '3J1', '8T1', '8JA5', 'CN13', 'J45', '?', '5JA2,5JA3', '1CN2', 'J2,J3', 'J53,J56,J58', '2CN1', 'J4', 'J1', 'J52', 'P2', 'J43']))
...@@ -127,6 +127,9 @@ class DicPredict(BasePredictor): ...@@ -127,6 +127,9 @@ class DicPredict(BasePredictor):
if self.is_pcs(no_null_v): if self.is_pcs(no_null_v):
ab_result[k] = '单位' ab_result[k] = '单位'
continue continue
if self.is_encap(no_null_v):
temp_pre_model_res[k] = '封装'
continue
if bol: if bol:
prob_columns.append(k) prob_columns.append(k)
...@@ -167,17 +170,17 @@ class DicPredict(BasePredictor): ...@@ -167,17 +170,17 @@ class DicPredict(BasePredictor):
for col_diffrate in sort_li_diffrate[1:]: for col_diffrate in sort_li_diffrate[1:]:
temp_pre_model_res.pop(col_diffrate[0]) temp_pre_model_res.pop(col_diffrate[0])
# 若有多个参数列, 先进行封装列的提取(封装率需要大于0), 再进行参数特征的数量比较, 特征最多的选为目标列 # 若有多个参数列且没有封装列, 就先进行封装列的提取(封装率需要大于0), 再进行参数特征的数量比较, 特征最多的选为目标列
prob_param_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '参数'] prob_param_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '参数']
self.info.info(f'可能的参数列有{str(prob_param_cols)}') self.info.info(f'可能的参数列有{str(prob_param_cols)}')
if len(prob_param_cols) >= 2: if len(prob_param_cols) >= 2:
if '封装' not in str(temp_pre_model_res):
li_encap_rate = [(col, BasePredictor.get_encap_rate(not_null_dic_data[col])) for col in li_encap_rate = [(col, BasePredictor.get_encap_rate(not_null_dic_data[col])) for col in
prob_param_cols] prob_param_cols]
sort_li_encaprate = sorted(li_encap_rate, key=lambda x: x[1], reverse=True) sort_li_encaprate = sorted(li_encap_rate, key=lambda x: x[1], reverse=True)
if sort_li_encaprate[0][1] >= 0: if sort_li_encaprate[0][1] >= 0:
temp_pre_model_res[sort_li_encaprate[0][0]] = '封装' temp_pre_model_res[sort_li_encaprate[0][0]] = '封装'
prob_param_cols.remove(sort_li_encaprate[0][0]) prob_param_cols.remove(sort_li_encaprate[0][0])
li_feature_rate = [(col, BasePredictor.get_param_featurerate(not_null_dic_data[col])) for col in li_feature_rate = [(col, BasePredictor.get_param_featurerate(not_null_dic_data[col])) for col in
prob_param_cols] prob_param_cols]
......
...@@ -3,12 +3,20 @@ ...@@ -3,12 +3,20 @@
# 标准名和代名词的映射 # 标准名和代名词的映射
li_category = ["类别", "分类", "名称", "类别名称", "类型", "产品分类"] li_category = ["类别", "分类", "名称", "类别名称", "类型", "产品分类"]
li_param = ["参数", "规格", "描述", "值", "description"] li_param = ["参数", "规格", "描述", "值", "description"]
li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件", "需求型号", "规格型号"] li_gn = ["型号", "参考料号", "料号", "mpn", "厂商编码", "元器件", "规格型号"]
li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "需求用量", "单板数量", "采购数量"] li_num = ["数量", "用量(pcs)", "用量", "pcs", "quantity", "qty", "buy qty", "buy quantity", "单板数量", "采购数量"]
li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"] li_brand = ["品牌", "品牌/厂商", "参考品牌", "参考供应商", "厂商", "参考供应商", "参考厂商", "manufacturer制造商", "manufacturer", "厂牌"]
li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", 'packagereference'] li_encap = ["封装", "封装规格", "encapsulation", "footprint封装", "packagereference"]
li_position = ["位号", "位置", "标号", "点位"] li_position = ["位号", "位置", "标号", "点位"]
li_category.extend(['需求' + i for i in li_category])
li_param.extend(['需求' + i for i in li_param])
li_gn.extend(['需求' + i for i in li_gn])
li_num.extend(['需求' + i for i in li_num])
li_brand.extend(['需求' + i for i in li_brand])
li_encap.extend(['需求' + i for i in li_encap])
li_position.extend(['需求' + i for i in li_position])
li_category.extend(['*' + i for i in li_category]) li_category.extend(['*' + i for i in li_category])
li_param.extend(['*' + i for i in li_param]) li_param.extend(['*' + i for i in li_param])
li_gn.extend(['*' + i for i in li_gn]) li_gn.extend(['*' + i for i in li_gn])
...@@ -17,6 +25,8 @@ li_brand.extend(['*' + i for i in li_brand]) ...@@ -17,6 +25,8 @@ li_brand.extend(['*' + i for i in li_brand])
li_encap.extend(['*' + i for i in li_encap]) li_encap.extend(['*' + i for i in li_encap])
li_position.extend(['*' + i for i in li_position]) li_position.extend(['*' + i for i in li_position])
PROB_FIELDS = ["序号", "a面位置", "b面位置", "备注", "售价", "item", "top面", "bottom面", "designator", "remark"] PROB_FIELDS = ["序号", "a面位置", "b面位置", "备注", "售价", "item", "top面", "bottom面", "designator", "remark"]
AB_FIELDS = PROB_FIELDS + ['*' + i for i in PROB_FIELDS] AB_FIELDS = PROB_FIELDS + ['*' + i for i in PROB_FIELDS]
# 可能的头部字段 # 可能的头部字段
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment