Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
bom_identify
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
efc76efc
authored
May 29, 2020
by
lichenggang
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
模型预测
parent
d8dd3982
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
101 additions
and
51 deletions
extractor
model
predic_fac.py
predict/base_handler.py
predict/dict_predict.py
utils/excel_manager.py
extractor
View file @
efc76efc
No preview for this file type
model
View file @
efc76efc
No preview for this file type
predic_fac.py
View file @
efc76efc
...
...
@@ -19,9 +19,9 @@ class PredictorFac():
elif
isinstance
(
data
,
dict
):
return
self
.
dict_predictor
def
predict
(
self
,
dic_data
):
def
predict
(
self
,
dic_data
,
predict_type
=
'all'
):
predictor
=
self
.
_get_predictor
(
dic_data
)
res
=
predictor
.
predict
(
dic_data
)
res
=
predictor
.
predict
(
dic_data
,
predict_type
)
return
res
...
...
@@ -29,8 +29,8 @@ if __name__ == "__main__":
def
get_test_data
():
import
pandas
as
pd
import
json
df
=
pd
.
read_excel
(
r'C:\Users\ICHUNT\Desktop\bomlist\
DZ0901_V1.4_BOM.xlsx'
,
header
=
None
)
df
.
fillna
(
'
'
,
inplace
=
True
)
df
=
pd
.
read_excel
(
r'C:\Users\ICHUNT\Desktop\bomlist\
51AB0571_ CCTV ASST询价_SZIMS.xlsx'
,
header
=
None
,
sheet_name
=
'1'
)
df
.
fillna
(
'
?
'
,
inplace
=
True
)
dic_dft
=
df
.
to_dict
(
orient
=
'list'
)
return
json
.
dumps
(
dic_dft
)
...
...
@@ -40,5 +40,5 @@ if __name__ == "__main__":
data
=
get_test_data
()
p
=
PredictorFac
(
model_config
)
data
=
json
.
loads
(
data
)
res
=
p
.
predict
(
data
)
res
=
p
.
predict
(
data
,
predict_type
=
'model'
)
print
(
res
)
predict/base_handler.py
View file @
efc76efc
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import
re
from
utils.log_manager
import
get_logger
from
utils.robots
import
dd_send_msg
import
pandas
as
pd
RIGHT_LEVEL
=
0.7
class
BasePredictor
:
'''
预测类基类
...
...
@@ -20,6 +21,7 @@ class BasePredictor:
self
.
robot_msg
=
dd_send_msg
self
.
pd
=
pd
def
predict
(
self
,
key
):
raise
NotImplementedError
...
...
@@ -37,3 +39,44 @@ class BasePredictor:
deal_list
=
[
round
(
i
,
3
)
for
i
in
proba
[
0
]
.
tolist
()]
dic_proba
=
{
k
:
v
for
k
,
v
in
zip
(
classes
,
deal_list
)}
return
predictions
[
0
],
dic_proba
def
isseq
(
self
,
data
):
"""
序号列预测
"""
collect_order
=
[
int
(
kw
)
for
kw
in
data
if
isinstance
(
kw
,
float
)
or
isinstance
(
kw
,
int
)]
judge
=
self
.
isIncrease
(
collect_order
,
len
(
collect_order
))
if
len
(
collect_order
)
>
0
else
False
return
judge
def
isnum
(
self
,
data
):
"""
数量列预测
"""
collect_num
=
[
kw
for
kw
in
data
if
isinstance
(
kw
,
int
)
or
self
.
isNumberCol
(
kw
)]
rate
=
round
(
len
(
collect_num
)
/
len
(
data
),
3
)
return
True
if
rate
>=
RIGHT_LEVEL
else
False
def
isIncrease
(
self
,
arr
,
size
):
"""
判断列表元素是否递增
"""
if
size
==
1
:
return
True
return
(
arr
[
size
-
1
]
>=
arr
[
size
-
2
])
and
self
.
isIncrease
(
arr
,
size
-
1
)
def
isNumberCol
(
self
,
kw
):
"""
是否是数量列
"""
if
isinstance
(
kw
,
str
):
return
re
.
match
(
r'(\d+)((K)|([\u4E00-\u9FA5]{1,3}))$'
,
kw
,
re
.
M
|
re
.
I
)
else
:
return
False
def
valid_seq
(
self
,
data
):
"""取反"""
return
not
self
.
isseq
(
data
)
def
valid_num
(
self
,
data
):
return
not
self
.
isnum
(
data
)
\ No newline at end of file
predict/dict_predict.py
View file @
efc76efc
...
...
@@ -96,13 +96,22 @@ class DicPredict(BasePredictor):
:param dic_data:
:return:
"""
print
(
dic_data
)
prob_columns
=
[]
temp_pre_model_res
=
{}
for
k
,
v
in
dic_data
.
items
():
if
self
.
valid_chain
(
v
):
bol
=
self
.
v_chain
(
v
)
if
bol
:
print
(
k
,
bol
)
prob_columns
.
append
(
k
)
continue
if
self
.
isnum
(
v
):
temp_pre_model_res
[
k
]
=
'数量'
continue
if
self
.
isseq
(
v
):
temp_pre_model_res
[
k
]
=
'序号'
temp_dic_data
=
{
k
:
list
(
filter
(
lambda
x
:
x
!=
PLACEHOLDER
,
dic_data
[
k
]))
for
k
in
prob_columns
}
temp_pre_model_res
=
{}
for
k
,
v
in
temp_dic_data
.
items
():
li_single_pred_res
=
[]
for
string
in
v
:
...
...
@@ -111,35 +120,28 @@ class DicPredict(BasePredictor):
result
=
Counter
(
li_single_pred_res
)
# [('brand_name', 4), ('goods_name', 3), ('param', 2)]
li_sort
=
sorted
(
result
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
print
(
k
,
li_sort
)
temp_pre_model_res
[
k
]
=
en_to_zh_map
[
li_sort
[
0
][
0
]]
print
(
temp_pre_model_res
)
# pre_model_res = {}
def
predict
(
self
,
dic_data
):
def
predict
(
self
,
dic_data
,
predict_type
=
'all'
):
dic_data
=
self
.
pre_deal
(
dic_data
)
pre_id_res
=
self
.
pre_predict
(
dic_data
)
if
pre_id_res
:
return
pre_id_res
if
len
(
dic_data
)
>
0
:
self
.
order_predict
(
dic_data
[
0
])
pass
def
order_predict
(
self
,
data
):
collect_num
=
[
int
(
kw
)
for
kw
in
data
if
isinstance
(
kw
,
float
)
or
isinstance
(
kw
,
int
)]
judge
=
self
.
IsIncrease
(
collect_num
,
len
(
collect_num
))
print
(
'judge: '
+
str
(
judge
))
return
judge
"""
判断列表元素是否递增
"""
def
IsIncrease
(
self
,
arr
,
size
):
if
size
==
1
:
return
True
return
(
arr
[
size
-
1
]
>=
arr
[
size
-
2
])
and
self
.
IsIncrease
(
arr
,
size
-
1
)
if
predict_type
==
'all'
:
pre_id_res
=
self
.
pre_predict
(
dic_data
)
if
pre_id_res
:
return
pre_id_res
elif
predict_type
==
'model'
:
model_id_res
=
self
.
model_predict
(
dic_data
)
if
model_id_res
:
return
model_id_res
elif
predict_type
==
'pre'
:
pre_id_res
=
self
.
pre_predict
(
dic_data
)
if
pre_id_res
:
return
pre_id_res
def
pre_deal
(
self
,
dic_data
):
new_dic_data
=
{}
...
...
@@ -154,5 +156,16 @@ class DicPredict(BasePredictor):
return
new_dic_data
def
valid_chain
(
self
,
li
):
pass
\ No newline at end of file
def
v_chain
(
self
,
li
):
"""
验证链,验证方法中某个环节返回了False则返回False
:param li:
:return:
"""
for
fun_name
in
dir
(
self
):
if
fun_name
.
startswith
(
'valid_'
):
fun
=
getattr
(
self
,
fun_name
)
if
not
fun
(
li
):
return
False
else
:
return
True
utils/excel_manager.py
View file @
efc76efc
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import
xlwt
import
xlrd
def
read_from_excel
(
file_name
,
sheet_name
):
wb
=
xlrd
.
open_workbook
(
file_name
)
sheet
=
wb
.
sheet_by_name
(
sheet_name
)
row
=
sheet
.
nrows
col
=
sheet
.
ncols
result_dict
=
{}
for
i
in
range
(
col
):
col_list
=
[]
for
j
in
range
(
row
):
col_list
.
append
(
sheet
.
cell_value
(
j
,
i
))
result_dict
[
i
]
=
col_list
return
result_dict
l
=
[
' '
,
' '
,
' '
,
'不需要报价'
,
' '
,
' '
,
' '
,
'不需要报价'
,
' '
,
' '
,
'不需要报价'
,
' '
,
' '
,
' '
,
'不需要报价'
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
,
' '
]
for
v
in
l
:
# 去掉空置率大于等于0.8的列
counter
=
0
for
item
in
v
:
if
str
(
item
)
.
strip
()
==
PLACEHOLDER
:
counter
+=
1
if
counter
/
len
(
v
)
<=
NAN_RATE
:
new_dic_data
[
k
]
=
v
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment