Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
bom_identify
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
8913e032
authored
Jul 08, 2020
by
lichenggang
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
添加品牌列的黑名单
parent
fadba796
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
12 deletions
predict/base_handler.py
predict/dict_predict.py
static_config.py
predict/base_handler.py
View file @
8913e032
...
...
@@ -290,6 +290,6 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
return
round
(
count
/
len
(
data
),
3
)
>=
SIMPLE_ENCAP_LEVEL
if
__name__
==
"__main__"
:
print
(
BasePredictor
.
is_
num
([
'数量'
,
1000
]))
print
(
BasePredictor
.
is_
catecol
([
'刁妍月 TK17091576628-10套 客供 单面 14.2*26.2 PCB订单号:588259 顺丰寄付 X-RAY检测 气泡袋包装'
,
'种类'
,
'TRA'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'RES'
,
'OTH'
,
'MAG'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'IC'
,
'DIO'
,
'DIO'
,
'DIO'
,
'DIO'
,
'CAP'
,
'CAP'
,
'CAP'
,
'CAP'
,
'CAP'
,
'CAP'
,
'CAP'
]))
# print(re.match(r'(\d+|\d+(\.\d+))($|(K)|[个十百千万])$', '1k', re.M | re.I))
\ No newline at end of file
predict/dict_predict.py
View file @
8913e032
...
...
@@ -200,16 +200,23 @@ class DicPredict(BasePredictor):
for
col_nullrate
in
sort_li_nullrate
[
1
:]:
temp_pre_model_res
.
pop
(
col_nullrate
[
0
])
#
若有多个品牌列,则
进行空置率的比较, 空置率最低的选为目标列
#
模型对于品牌的识别不太精准, 若出现有多个品牌列, 先进行黑名单词汇的过滤, 若仍有多列最后再
进行空置率的比较, 空置率最低的选为目标列
prob_brand_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'品牌'
]
if
len
(
prob_brand_cols
)
>=
2
:
li_nullrate
=
[]
for
prob_brand_col
in
prob_brand_cols
:
nanrate
=
BasePredictor
.
get_nan_rate
(
dic_data
[
prob_brand_col
])
li_nullrate
.
append
((
prob_brand_col
,
nanrate
))
sort_li_nullrate
=
sorted
(
li_nullrate
,
key
=
lambda
x
:
x
[
1
])
for
col_nullrate
in
sort_li_nullrate
[
1
:]:
temp_pre_model_res
.
pop
(
col_nullrate
[
0
])
for
i
in
dic_data
[
prob_brand_col
]:
if
str
(
i
)
.
strip
()
.
lower
()
in
BRAND_BLACK_LIST
:
temp_pre_model_res
.
pop
(
prob_brand_col
)
break
filter_brand_black_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'品牌'
]
if
len
(
filter_brand_black_cols
)
>=
2
:
li_nullrate
=
[]
for
prob_brand_col
in
filter_brand_black_cols
:
nanrate
=
BasePredictor
.
get_nan_rate
(
dic_data
[
prob_brand_col
])
li_nullrate
.
append
((
prob_brand_col
,
nanrate
))
sort_li_nullrate
=
sorted
(
li_nullrate
,
key
=
lambda
x
:
x
[
1
])
for
col_nullrate
in
sort_li_nullrate
[
1
:]:
temp_pre_model_res
.
pop
(
col_nullrate
[
0
])
# 如果没有数量列则把之前的序号列(其实是数字列)当成数字列
if
not
prob_num_cols
:
...
...
static_config.py
View file @
8913e032
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 标准名和代名词的映射
li_category
=
[
"类别"
,
"分类"
,
"名称"
,
"类别名称"
,
"类型"
,
"产品分类"
]
li_category
=
[
"类别"
,
"分类"
,
"名称"
,
"类别名称"
,
"类型"
,
"产品分类"
,
"种类"
]
li_param
=
[
"参数"
,
"规格"
,
"描述"
,
"值"
,
"description"
]
li_gn
=
[
"型号"
,
"参考料号"
,
"料号"
,
"mpn"
,
"厂商编码"
,
"元器件"
,
"规格型号"
]
li_num
=
[
"数量"
,
"用量(pcs)"
,
"用量"
,
"pcs"
,
"quantity"
,
"qty"
,
"buy qty"
,
"buy quantity"
,
"单板数量"
,
"采购数量"
,
"单机用量"
,
"单机数量"
,
"单板用量"
]
li_brand
=
[
"品牌"
,
"品牌/厂商"
,
"参考品牌"
,
"参考供应商"
,
"厂商"
,
"参考供应商"
,
"参考厂商"
,
"manufacturer制造商"
,
"manufacturer"
,
"厂牌"
]
li_brand
=
[
"品牌"
,
"品牌/厂商"
,
"参考品牌"
,
"参考供应商"
,
"厂商"
,
"参考供应商"
,
"参考厂商"
,
"manufacturer制造商"
,
"manufacturer"
,
"厂牌"
,
"公司"
]
li_encap
=
[
"封装"
,
"封装规格"
,
"encapsulation"
,
"footprint封装"
,
"packagereference"
]
li_position
=
[
"位号"
,
"位置"
,
"标号"
,
"点位"
]
...
...
@@ -27,7 +27,7 @@ li_position.extend(['*' + i for i in li_position])
PROB_FIELDS
=
[
"序号"
,
"a面位置"
,
"b面位置"
,
"备注"
,
"售价"
,
"item"
,
"top面"
,
"bottom面"
,
"designator"
,
"remark"
]
PROB_FIELDS
=
[
"序号"
,
"a面位置"
,
"b面位置"
,
"备注"
,
"售价"
,
"item"
,
"top面"
,
"bottom面"
,
"designator"
,
"remark"
,
"工艺"
]
AB_FIELDS
=
PROB_FIELDS
+
[
'*'
+
i
for
i
in
PROB_FIELDS
]
# 可能的头部字段
ALL_FIELDS
=
AB_FIELDS
+
li_category
+
li_param
+
li_gn
+
li_num
+
li_brand
+
li_encap
+
li_position
...
...
@@ -61,7 +61,10 @@ CATEGORY = ["半导体", "嵌入式", "光电子", "光源", "无源", "连接
"铰接件"
,
"盖塞与遮蔽"
,
"支撑脚和轮"
,
"型材与座"
,
"减震器"
,
"轴承"
,
"把手"
,
"直线导轨"
,
"技术密封"
,
"切换夹具和夹紧螺栓"
,
"边缘保护套"
,
"磁铁"
,
"报警器"
,
"分线盒"
,
"变送器"
,
"调制与控制"
,
"气动"
,
"螺线管"
,
"启动器"
,
"阀门和汇流板"
,
"空气设备"
,
"仪表"
,
"气动配件"
,
"测量"
,
"软焊和焊接设备"
,
"PCB原型制作与生产"
,
"化学制剂"
,
"工具"
,
"防静电保护"
,
"健康与安全"
,
"收音机和CB用框架及半框架"
,
"扬声器罩,前面板固定座"
,
"排线"
,
"电机控制模块"
,
"微型电动机"
,
"底板"
,
"3D打印机"
,
"RTV服务"
,
"配件"
,
"三极管"
,
'钽电容'
,
'MOS管'
,
'芯片'
,
'器件'
,
'模组'
,
'接线'
,
'按键'
,
'IC'
,
'LDO'
]
"RTV服务"
,
"配件"
,
"三极管"
,
'钽电容'
,
'MOS管'
,
'芯片'
,
'器件'
,
'模组'
,
'接线'
,
'按键'
,
'IC'
,
'LDO'
,
'DIO'
,
'CAP'
,
'RES'
,
'TRA'
]
# 模型识别黑名单词汇列表, 在可能目标列大于等于2的时候使用
BRAND_BLACK_LIST
=
[
'smt'
,
'ic'
,
'res'
]
# 冲突时识别优先级,暂时没用
DIC_PRIORITY
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment