Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
bom_identify
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
d2477e8b
authored
Jun 01, 2020
by
lichenggang
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
配置分离
parent
4bd8fa67
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
59 additions
and
26 deletions
predict/dict_predict.py
static_config.py
predict/dict_predict.py
View file @
d2477e8b
...
...
@@ -3,26 +3,7 @@
from
collections
import
Counter
from
predict.base_handler
import
BasePredictor
# 可能的头部字段
PROB_FIELDS
=
[
"序号"
,
"名称"
,
"规格"
,
"MPN"
,
"用量(pcs)"
,
"用量"
,
"pcs"
,
"位号"
,
"描述"
,
"值"
,
"数量"
,
"封装"
,
"类别"
,
"a面位置"
,
"b面位置"
,
"备注"
,
"需求数量"
,
"售价"
,
"封装"
,
"封装规格"
,
"参考品牌"
,
"品牌"
,
"item"
,
"厂商编码"
,
"品牌/厂商"
,
"参考料号"
,
"参考供应商"
,
"top面"
,
"bottom面"
]
# 标准名和代名词的映射
STD_FIELDS_MAP
=
{
"类别"
:
[
"类别"
,
"分类"
,
"名称"
,
"类别名称"
],
"参数"
:
[
"参数"
,
"规格"
,
"描述"
,
"值"
],
"型号"
:
[
"型号"
,
"参考料号"
,
"料号"
,
"MPN"
,
"厂商编码"
],
"数量"
:
[
"数量"
,
"用量(pcs)"
,
"PCS"
,
"用量"
,
"用量(PCS)"
,
"pcs"
],
"品牌"
:
[
"品牌"
,
"品牌/厂商"
,
"参考品牌"
,
"参考供应商"
,
"厂商"
,
"参考供应商"
,
"参考厂商"
]}
# 必须返回也必须验证的标准字段
MUST_STD_FIELDS
=
[
'参数'
,
'数量'
]
#
order_list
=
[
'序号'
]
en_to_zh_map
=
{
'brand_name'
:
'品牌'
,
'param'
:
'参数'
,
'goods_name'
:
'型号'
}
from
static_config
import
*
def
fun
(
seri
):
...
...
@@ -31,12 +12,14 @@ def fun(seri):
if
str
(
field
)
.
lower
()
in
PROB_FIELDS
:
return
field
,
seri
.
name
def
repeat_max
(
li
):
result
=
Counter
(
li
)
# [('brand_name', 4), ('goods_name', 3), ('param', 2)]
li_sort
=
sorted
(
result
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
return
li_sort
[
0
][
0
]
# 取前多少行
HEAD_ROW
=
7
# 空置率阈值
...
...
@@ -86,10 +69,24 @@ class DicPredict(BasePredictor):
std_result
[
i
[
'column_name'
]]
=
i
[
'std_name'
]
else
:
ab_result
[
i
[
'column_name'
]]
=
i
[
'pronoun'
]
# 数量列和参数列需要再验证, 若不符合要求则剔除
list_num_col_and_param_col
=
[]
for
k
,
v
in
std_result
.
items
():
if
v
==
'数量'
:
if
not
self
.
is_num
(
dict_data
[
k
]):
list_num_col_and_param_col
.
append
(
k
)
if
v
==
'参数'
:
if
not
self
.
v_chain
(
dict_data
[
k
]):
list_num_col_and_param_col
.
append
(
k
)
for
col
in
list_num_col_and_param_col
:
std_result
.
pop
(
col
)
pre_id_res
=
{
'std_result'
:
std_result
,
'ab_result'
:
ab_result
,
}
return
pre_id_res
def
model_predict
(
self
,
dic_data
):
...
...
@@ -103,15 +100,15 @@ class DicPredict(BasePredictor):
ab_result
=
{}
for
k
,
v
in
dic_data
.
items
():
bol
=
self
.
v_chain
(
v
)
if
bol
:
prob_columns
.
append
(
k
)
continue
if
self
.
is_seq
(
v
):
ab_result
[
k
]
=
'序号'
continue
if
self
.
is_num
(
v
):
temp_pre_model_res
[
k
]
=
'数量'
continue
if
bol
:
prob_columns
.
append
(
k
)
continue
if
self
.
is_catecol
(
v
):
temp_pre_model_res
[
k
]
=
'类别'
continue
...
...
@@ -122,10 +119,11 @@ class DicPredict(BasePredictor):
single_pred_res
,
probdic
=
self
.
get_single_predict
(
string
)
li_single_pred_res
.
append
(
single_pred_res
)
result
=
repeat_max
(
li_single_pred_res
)
temp_pre_model_res
[
k
]
=
en_to_zh_map
[
result
]
temp_pre_model_res
[
k
]
=
EN_TO_ZH_MAP
[
result
]
# 参数和型号列出现多条相同值则丢弃
prob_param_and_gn_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'参数'
or
temp_pre_model_res
[
i
]
==
'型号'
]
prob_param_and_gn_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'参数'
or
temp_pre_model_res
[
i
]
==
'型号'
]
for
col
in
prob_param_and_gn_cols
:
if
self
.
is_multi_same
(
temp_dic_data
[
col
]):
temp_pre_model_res
.
pop
(
col
)
...
...
@@ -150,7 +148,7 @@ class DicPredict(BasePredictor):
if
comprehensive_res
:
res
=
{
'std_result'
:
comprehensive_res
,
'ab_result'
:
pre_id_res
[
'ab_result'
],
'ab_result'
:
pre_id_res
[
'ab_result'
]
or
model_std_result
[
'ab_result'
]
,
}
return
res
...
...
static_config.py
0 → 100644
View file @
d2477e8b
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 可能的头部字段
PROB_FIELDS
=
[
"序号"
,
"名称"
,
"规格"
,
"MPN"
,
"用量(pcs)"
,
"用量"
,
"pcs"
,
"位号"
,
"描述"
,
"值"
,
"数量"
,
"封装"
,
"类别"
,
"a面位置"
,
"b面位置"
,
"备注"
,
"需求数量"
,
"售价"
,
"封装"
,
"封装规格"
,
"参考品牌"
,
"品牌"
,
"item"
,
"厂商编码"
,
"品牌/厂商"
,
"参考料号"
,
"参考供应商"
,
"top面"
,
"bottom面"
]
# 标准名和代名词的映射
STD_FIELDS_MAP
=
{
"类别"
:
[
"类别"
,
"分类"
,
"名称"
,
"类别名称"
],
"参数"
:
[
"参数"
,
"规格"
,
"描述"
,
"值"
],
"型号"
:
[
"型号"
,
"参考料号"
,
"料号"
,
"MPN"
,
"厂商编码"
],
"数量"
:
[
"数量"
,
"用量(pcs)"
,
"PCS"
,
"用量"
,
"用量(PCS)"
,
"pcs"
],
"品牌"
:
[
"品牌"
,
"品牌/厂商"
,
"参考品牌"
,
"参考供应商"
,
"厂商"
,
"参考供应商"
,
"参考厂商"
]}
# 必须返回也必须验证的标准字段
MUST_STD_FIELDS
=
[
'参数'
,
'数量'
]
# 参数名和中文的映射
EN_TO_ZH_MAP
=
{
'brand_name'
:
'品牌'
,
'param'
:
'参数'
,
'goods_name'
:
'型号'
}
# 类别合集 从learning_data.lie_category导入
CATEGORY
=
[
"半导体"
,
"嵌入式"
,
"光电子"
,
"光源"
,
"无源"
,
"连接器"
,
"断路器"
,
"指示灯"
,
"声源"
,
"接触器"
,
"铁氧芯"
,
"冷热系统"
,
"电源"
,
"电线"
,
"机械"
,
"外壳"
,
"自动化"
,
"工作场所设备"
,
"汽车"
,
"机器人"
,
"放大器"
,
"音频"
,
"时钟"
,
"转换器"
,
"模具技术"
,
"接口"
,
"隔离"
,
"隔离器件"
,
"控制器"
,
"驱动"
,
"处理器"
,
"RF和微波"
,
"传感器"
,
"多路复用器"
,
"无线"
,
"计时"
,
"裸片"
,
"DLP产品"
,
"射频&微波"
,
"保险丝"
,
"电池"
,
"电容"
,
"电路保护"
,
"通信"
,
"数据转换"
,
"分立器件"
,
"显示器"
,
"DSP"
,
"滤波器"
,
"电感器"
,
"互连"
,
"套件与工具"
,
"照明"
,
"逻辑"
,
"存储器"
,
"杂项"
,
"电机"
,
"光电器件"
,
"可编程逻辑"
,
"电阻"
,
"换能器"
,
"软件"
,
"存储"
,
"继电器"
,
"系统"
,
"开关"
,
"热管理"
,
"变压器"
,
"晶圆服务"
,
"GPS模块"
,
"风扇"
,
"电缆"
,
"定时"
,
"二极管"
,
"桥式整流器"
,
"晶闸管"
,
"三端双向可控硅"
,
"晶体管"
,
"集成电路"
,
"编程器和擦除器"
,
"开发套件"
,
"通信模块"
,
"单片机"
,
"智能显示屏模块"
,
"LED"
,
"显示屏"
,
"光耦合器"
,
"激光元件"
,
"光敏元件"
,
"迷你光源"
,
"电筒"
,
"电位器"
,
"编码器"
,
"旋钮"
,
"HV发生器"
,
"天线"
,
"放电管"
,
"功率因子修正"
,
"音频元器件"
,
"操纵杆"
,
"键板"
,
"发声器"
,
"变流器"
,
"线圈架"
,
"加热垫"
,
"温度指示器"
,
"加热元件"
,
"空调"
,
"换热器"
,
"机柜通风系统"
,
"光电模组"
,
"线缆"
,
"管道和绝缘套"
,
"热缩管"
,
"螺栓"
,
"肩头螺钉"
,
"螺钉"
,
"螺母"
,
"螺纹插入件"
,
"垫圈"
,
"铆钉"
,
"安全元件"
,
"金属衬"
,
"塑料衬"
,
"接头"
,
"PCB支架和导轨"
,
"手轮和指针"
,
"液压系统"
,
"紧固把手"
,
"锁和卡环"
,
"铰接件"
,
"盖塞与遮蔽"
,
"支撑脚和轮"
,
"型材与座"
,
"减震器"
,
"轴承"
,
"把手"
,
"直线导轨"
,
"技术密封"
,
"切换夹具和夹紧螺栓"
,
"边缘保护套"
,
"磁铁"
,
"报警器"
,
"分线盒"
,
"变送器"
,
"调制与控制"
,
"气动"
,
"螺线管"
,
"启动器"
,
"阀门和汇流板"
,
"空气设备"
,
"仪表"
,
"气动配件"
,
"测量"
,
"软焊和焊接设备"
,
"PCB原型制作与生产"
,
"化学制剂"
,
"工具"
,
"防静电保护"
,
"健康与安全"
,
"收音机和CB用框架及半框架"
,
"扬声器罩,前面板固定座"
,
"排线"
,
"电机控制模块"
,
"微型电动机"
,
"底板"
,
"3D打印机"
,
"RTV服务"
,
"配件"
,
"三极管"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment