Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
bom_identify
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
76ff0b71
authored
May 29, 2020
by
lzzzzl
Browse files
Options
_('Browse Files')
Download
Plain Diff
添加重复列识别
parents
6c386050
273f2f67
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
199 additions
and
40 deletions
classify_server.py
client/client.py
predic_fac.py
predict/__init__.py
predict/dict_predict.py
predict/list_predict.py
utils/excel_manager.py
utils/predic_fac.py
classify_server.py
View file @
76ff0b71
from
concurrent
import
futures
import
time
import
time
,
json
import
grpc
from
protobuf
import
classify_pb2
from
protobuf
import
classify_pb2_grpc
from
utils.config
import
model_config
from
utils.
predic_fac
import
PredictorFac
from
predic_fac
import
PredictorFac
from
utils.log_manager
import
get_logger
from
utils.excel_manager
import
read_from_excel
log_server
=
get_logger
(
'server'
)
...
...
@@ -17,13 +16,14 @@ class Classify(classify_pb2_grpc.classifyServicer):
log_server
.
info
(
'classify server start!'
)
self
.
predictorfac
=
PredictorFac
(
model_config
)
def
Classify
(
self
,
request
,
context
):
log_server
.
debug
(
'接收数据: '
+
request
.
keyword
)
res
=
self
.
predictorfac
.
predic
(
request
.
keyword
)
print
(
'接收数据: '
+
request
.
keyword
)
res
=
self
.
predictorfac
.
predic
t
(
request
.
keyword
)
return
classify_pb2
.
ClassifyReply
(
message
=
'result {msg}'
.
format
(
msg
=
res
))
def
fac_test_predic
(
self
,
data
):
res
=
self
.
predictorfac
.
predic
(
data
)
res
=
self
.
predictorfac
.
predic
t
(
data
)
return
res
...
...
@@ -41,7 +41,8 @@ def serve():
if
__name__
==
'__main__'
:
# serve()
data
=
read_from_excel
(
'DZ0901_V1.4_BOM.xlsx'
,
'DZ0901_V1.3BOM清单'
)
print
(
Classify
()
.
fac_test_predic
(
data
))
serve
()
# data = read_from_excel('DZ0901_V1.4_BOM.xlsx', 'DZ0901_V1.3BOM清单')
# print(data)
# print(Classify().fac_test_predic(data))
client/client.py
View file @
76ff0b71
import
grpc
from
protobuf
import
classify_pb2
from
protobuf
import
classify_pb2_grpc
import
pandas
as
pd
import
json
def
get_test_data
():
df
=
pd
.
read_excel
(
r'C:\Users\ICHUNT\Desktop\bomlist\DZ0901_V1.4_BOM.xlsx'
,
header
=
None
)
df
.
fillna
(
' '
,
inplace
=
True
)
dic_dft
=
df
.
to_dict
(
orient
=
'list'
)
return
json
.
dumps
(
dic_dft
)
def
run
():
# 连接 rpc 服务器
channel
=
grpc
.
insecure_channel
(
'localhost:50051'
)
# 调用 rpc 服务
stub
=
classify_pb2_grpc
.
classifyStub
(
channel
)
response
=
stub
.
Classify
(
classify_pb2
.
ClassifyRequest
(
keyword
=
'czl'
))
test
=
get_test_data
()
response
=
stub
.
Classify
(
classify_pb2
.
ClassifyRequest
(
keyword
=
test
))
print
(
"Classify client received: "
+
response
.
message
)
if
__name__
==
'__main__'
:
...
...
predic_fac.py
0 → 100644
View file @
76ff0b71
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from
sklearn.externals
import
joblib
import
pickle
,
json
from
predict
import
dict_predict
,
kw_predict
class
PredictorFac
():
def
__init__
(
self
,
config
):
with
open
(
config
[
'extractor_path'
],
'rb'
)
as
f
:
extractor
=
pickle
.
load
(
f
)
classifier
=
joblib
.
load
(
config
[
'model_path'
])
self
.
kw_predictor
=
kw_predict
.
KwPredict
(
'single'
,
extractor
,
classifier
)
self
.
dict_predictor
=
dict_predict
.
DicPredict
(
'dict'
,
extractor
,
classifier
)
def
_get_predictor
(
self
,
data
):
if
isinstance
(
data
,
str
):
return
self
.
kw_predictor
elif
isinstance
(
data
,
dict
):
return
self
.
dict_predictor
def
predict
(
self
,
data
):
dic_data
=
json
.
loads
(
data
)
predictor
=
self
.
_get_predictor
(
dic_data
)
res
=
predictor
.
predict
(
dic_data
)
return
res
if
__name__
==
"__main__"
:
def
get_test_data
():
import
pandas
as
pd
import
json
df
=
pd
.
read_excel
(
r'C:\Users\ICHUNT\Desktop\bomlist\DZ0901_V1.4_BOM.xlsx'
,
header
=
None
)
df
.
fillna
(
' '
,
inplace
=
True
)
dic_dft
=
df
.
to_dict
(
orient
=
'list'
)
return
json
.
dumps
(
dic_dft
)
from
utils.config
import
model_config
data
=
get_test_data
()
p
=
PredictorFac
(
model_config
)
res
=
p
.
predict
(
data
)
print
(
res
)
predict/__init__.py
View file @
76ff0b71
#!/usr/bin/env python
# -*- coding:utf-8 -*-
__all__
=
[
'kw_predict'
,
'list_predict'
]
\ No newline at end of file
__all__
=
[
'kw_predict'
,
'dict_predict.py'
]
\ No newline at end of file
predict/dict_predict.py
0 → 100644
View file @
76ff0b71
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from
predict.base_handler
import
BasePredictor
# 可能的头部字段
prob_fields
=
[
"序号"
,
"名称"
,
"规格"
,
"MPN"
,
"用量(pcs)"
,
"用量"
,
"pcs"
,
"位号"
,
"描述"
,
"值"
,
"数量"
,
"封装"
,
"类别"
,
"a面位置"
,
"b面位置"
,
"备注"
,
"需求数量"
,
"参考品牌"
,
"品牌"
,
"item"
,
"厂商编码"
,
"品牌/厂商"
,
"参考料号"
,
"参考供应商"
,
"top面"
,
"bottom面"
]
# 标准名和代名词的映射
fields_map
=
{
"序号"
:
[
"序号"
],
"类别"
:
[
"类别"
,
"分类"
,
"名称"
,
"类别名称"
],
"参数"
:
[
"参数"
,
"规格"
,
"描述"
],
"型号"
:
[
"型号"
,
"参考料号"
,
"料号"
,
"MPN"
],
"数量"
:
[
"数量"
,
"用量(pcs)"
,
"PCS"
,
"用量"
,
"用量(PCS)"
,
"pcs"
],
"封装"
:
[
"封装"
,
"封装规格"
],
"品牌"
:
[
"品牌"
,
"品牌/厂商"
,
"参考品牌"
,
"厂商编码"
,
"参考供应商"
,
"厂商"
,
"参考供应商"
,
"参考厂商"
]}
#
order_list
=
[
'序号'
]
def
fun
(
seri
):
li_seri
=
seri
.
tolist
()
for
field
in
li_seri
:
if
str
(
field
)
.
lower
()
in
prob_fields
:
return
field
,
seri
.
name
# 取前多少行
HEAD_ROW
=
5
class
DicPredict
(
BasePredictor
):
def
id_by_field
(
self
,
df_head
):
"""
:param df_head: 传入接收数据的头部Dataframe(默认5行)
:return li_res: 返回结果列表
"""
series
=
df_head
.
apply
(
fun
)
series
.
dropna
(
inplace
=
True
)
li_fie
=
series
.
tolist
()
li_res
=
[]
for
field
,
column_name
in
li_fie
:
dic
=
{
'std_name'
:
''
,
'pronoun'
:
field
,
'column_name'
:
column_name
}
for
k
,
v
in
fields_map
.
items
():
if
field
.
lower
()
in
fields_map
[
k
]:
dic
[
'std_name'
]
=
k
li_res
.
append
(
dic
)
return
li_res
def
pre_predict
(
self
,
dict_data
):
columns
=
[]
li_data
=
[]
for
k
,
v
in
dict_data
.
items
():
# 去掉空置率大于等于0.8的列
counter
=
0
for
item
in
v
:
if
not
str
(
item
)
.
strip
():
counter
+=
1
if
counter
/
len
(
v
)
>=
0.8
:
continue
columns
.
append
(
k
)
li_data
.
append
(
v
)
df
=
self
.
pd
.
DataFrame
(
li_data
)
dft
=
df
.
T
.
head
(
HEAD_ROW
)
dft
.
columns
=
columns
li_res_raw
=
self
.
id_by_field
(
dft
)
std_result
=
[]
ab_result
=
[]
for
i
in
li_res_raw
:
if
i
.
get
(
'std_name'
):
dic_has_res
=
{
i
[
'column_name'
]:
i
[
'std_name'
]}
std_result
.
append
(
dic_has_res
)
else
:
dic_ab_res
=
{
i
[
'column_name'
]:
i
[
'pronoun'
]}
ab_result
.
append
(
dic_ab_res
)
id_res
=
{
'std_result'
:
std_result
,
'ab_result'
:
ab_result
,
}
return
id_res
def
predict
(
self
,
dic_data
):
res
=
self
.
pre_predict
(
dic_data
)
if
res
:
return
res
if
len
(
dic_data
)
>
0
:
self
.
order_predict
(
dic_data
[
0
])
pass
def
order_predict
(
self
,
data
):
collect_num
=
[
int
(
kw
)
for
kw
in
data
if
isinstance
(
kw
,
float
)
or
isinstance
(
kw
,
int
)]
judge
=
self
.
IsIncrease
(
collect_num
,
len
(
collect_num
))
print
(
'judge: '
+
str
(
judge
))
return
judge
"""
判断列表元素是否递增
"""
def
IsIncrease
(
self
,
arr
,
size
):
if
size
==
1
:
return
True
return
(
arr
[
size
-
1
]
>=
arr
[
size
-
2
])
and
self
.
IsIncrease
(
arr
,
size
-
1
)
predict/list_predict.py
View file @
76ff0b71
...
...
@@ -5,6 +5,7 @@ from predict.base_handler import BasePredictor
RIGHT_LEVEL
=
0.7
REPEAT_TIMES
=
3
class
LiPredict
(
BasePredictor
):
...
...
@@ -50,5 +51,22 @@ class LiPredict(BasePredictor):
else
:
return
False
def
isRepeat
(
self
,
data
):
repeat_dict
=
{}
for
kw
in
data
:
if
repeat_dict
.
get
(
kw
):
repeat_dict
[
kw
]
+=
1
else
:
repeat_dict
[
kw
]
=
1
print
(
repeat_dict
)
#
# """
# 判断是否重复列
# """
# def vailed(self, data):
utils/excel_manager.py
View file @
76ff0b71
...
...
@@ -23,7 +23,6 @@ def read_from_excel(file_name, sheet_name):
for
index
in
data
.
columns
:
result_dict
[
count
]
=
data
[
index
]
.
tolist
()
count
+=
1
print
(
result_dict
)
return
result_dict
utils/predic_fac.py
deleted
100644 → 0
View file @
6c386050
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from
sklearn.externals
import
joblib
import
pickle
from
predict
import
*
class
PredictorFac
():
def
__init__
(
self
,
config
):
with
open
(
config
[
'extractor_path'
],
'rb'
)
as
f
:
extractor
=
pickle
.
load
(
f
)
classifier
=
joblib
.
load
(
config
[
'model_path'
])
self
.
kw_predictor
=
kw_predict
.
KwPredict
(
'single'
,
extractor
,
classifier
)
self
.
list_predictor
=
list_predict
.
LiPredict
(
'list'
,
extractor
,
classifier
)
def
_get_predictor
(
self
,
data
):
if
isinstance
(
data
,
str
):
return
self
.
kw_predictor
elif
isinstance
(
data
,
dict
):
return
self
.
list_predictor
def
predic
(
self
,
data
):
predictor
=
self
.
_get_predictor
(
data
)
res
=
predictor
.
predict
(
data
)
return
res
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment