Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lichenggang
/
bom_identify
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
b7f946c4
authored
Jun 10, 2020
by
lichenggang
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
多个参数列的选择从比较不同率改为比较特征率
parent
a9c6c7dd
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
20 deletions
predict/base_handler.py
predict/dict_predict.py
predict/base_handler.py
View file @
b7f946c4
...
...
@@ -61,7 +61,7 @@ class BasePredictor:
# return not self.is_catecol(data)
@classmethod
def
is_num
(
self
,
data
):
def
is_num
(
cls
,
data
):
"""
数量列预测
"""
...
...
@@ -87,7 +87,7 @@ class BasePredictor:
return
True
if
rate
>=
RIGHT_LEVEL
else
False
@classmethod
def
is_catecol
(
self
,
data
):
def
is_catecol
(
cls
,
data
):
cates
=
[]
for
i
in
data
:
for
j
in
CATEGORY
:
...
...
@@ -98,14 +98,14 @@ class BasePredictor:
return
rate
>=
CATE_LEVEL
@classmethod
def
is_multi_same
(
self
,
data
):
no_null_data
=
list
(
filter
(
lambda
x
:
x
!=
self
.
PLACEHOLDER
,
data
))
def
is_multi_same
(
cls
,
data
):
no_null_data
=
list
(
filter
(
lambda
x
:
x
!=
cls
.
PLACEHOLDER
,
data
))
result
=
Counter
(
no_null_data
)
li_sort
=
sorted
(
result
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
return
li_sort
[
0
][
1
]
>=
MULTI_SAME_LEVEL
@classmethod
def
is_seq
(
self
,
data
):
def
is_seq
(
cls
,
data
):
"""
序号列预测
"""
...
...
@@ -126,7 +126,7 @@ class BasePredictor:
return
True
if
rate
>=
SEQ_LEVEL
else
False
@classmethod
def
get_diffrate
(
self
,
data
):
def
get_diffrate
(
cls
,
data
):
"""
得到不同率
"""
...
...
@@ -134,14 +134,14 @@ class BasePredictor:
return
rate
@classmethod
def
repeat_max
(
self
,
li
):
def
repeat_max
(
cls
,
li
):
result
=
Counter
(
li
)
# [('brand_name', 4), ('goods_name', 3), ('param', 2)]
li_sort
=
sorted
(
result
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
return
li_sort
[
0
][
0
]
@classmethod
def
get_comprehensive_res
(
self
,
head_std_result
,
model_std_result
):
def
get_comprehensive_res
(
cls
,
head_std_result
,
model_std_result
):
vote_count
=
{
"类别"
:
[],
...
...
@@ -165,7 +165,7 @@ class BasePredictor:
return
comprehensive_res
@classmethod
def
is_ref
(
self
,
data
):
def
is_ref
(
cls
,
data
):
"""
位号列预测
"""
...
...
@@ -177,7 +177,7 @@ class BasePredictor:
return
round
(
count
/
len
(
data
),
3
)
>=
REF_LEVEL
or
False
@classmethod
def
is_pcs
(
self
,
data
):
def
is_pcs
(
cls
,
data
):
"""
pcs列
"""
...
...
@@ -188,5 +188,19 @@ class BasePredictor:
count
+=
1
return
round
(
count
/
len
(
data
),
3
)
>=
PCS_LEVEL
or
False
@classmethod
def
get_param_featurerate
(
cls
,
data
):
"""
参数列的特征率
"""
feature_li
=
[
'pf'
,
'
%
'
,
'±'
,
'uf'
]
count
=
0
for
i
in
data
:
for
feature
in
feature_li
:
if
feature
in
str
(
i
)
.
strip
():
count
+=
1
break
return
round
(
count
/
len
(
data
),
3
)
if
__name__
==
"__main__"
:
print
(
BasePredictor
.
is_num
([
3400.0
,
5920.0
,
4849.0
,
2544.0
,
3270.0
,
52751.0
,
2031.0
,
5302.0
,
726.0
,
1247.0
,
2472.0
,
689.0
,
6049.0
,
26796.0
,
6164.0
,
1605.0
,
4346.0
,
640.0
,
960.0
,
960.0
,
320.0
,
160.0
,
860.0
,
160.0
,
320.0
,
3183.0
,
10151.0
,
640.0
,
130.0
,
1237.0
,
800.0
,
960.0
,
3740.0
,
17701.0
,
2146.0
,
1280.0
,
160.0
,
1120.0
,
160.0
,
480.0
,
960.0
,
480.0
,
160.0
,
4717.0
,
160.0
,
160.0
,
160.0
,
640.0
,
160.0
,
320.0
,
160.0
,
160.0
,
800.0
,
800.0
,
480.0
,
1600.0
,
155.0
,
960.0
,
320.0
,
944.0
,
160.0
,
160.0
,
1280.0
,
1852.0
,
7680.0
,
7680.0
,
2880.0
,
160.0
,
224.0
,
480.0
,
480.0
,
640.0
,
160.0
,
640.0
,
320.0
,
1760.0
,
640.0
,
480.0
,
960.0
,
160.0
,
160.0
,
160.0
,
160.0
,
1920.0
,
160.0
,
5600.0
,
480.0
,
2560.0
,
160.0
,
160.0
,
160.0
,
160.0
,
160.0
,
1280.0
,
160.0
,
160.0
,
160.0
,
160.0
,
160.0
,
320.0
,
0.0
,
160.0
,
160.0
]))
predict/dict_predict.py
View file @
b7f946c4
...
...
@@ -89,7 +89,7 @@ class DicPredict(BasePredictor):
temp_pre_model_res
=
{}
ab_result
=
{}
for
k
,
v
in
dic_data
.
items
():
no_null_v
=
list
(
filter
(
lambda
x
:
x
!=
self
.
PLACEHOLDER
,
v
))
no_null_v
=
list
(
filter
(
lambda
x
:
x
!=
BasePredictor
.
PLACEHOLDER
,
v
))
bol
=
self
.
v_chain
(
v
)
if
self
.
is_seq
(
no_null_v
):
ab_result
[
k
]
=
'序号'
...
...
@@ -111,7 +111,7 @@ class DicPredict(BasePredictor):
temp_pre_model_res
[
k
]
=
'类别'
# 对列元素进行去重并处理掉占位符
set_not_null_dic_data
=
{
k
:
set
(
list
(
filter
(
lambda
x
:
x
!=
self
.
PLACEHOLDER
,
dic_data
[
k
])))
for
k
in
set_not_null_dic_data
=
{
k
:
set
(
list
(
filter
(
lambda
x
:
x
!=
BasePredictor
.
PLACEHOLDER
,
dic_data
[
k
])))
for
k
in
prob_columns
}
for
k
,
v
in
set_not_null_dic_data
.
items
():
li_single_pred_res
=
[]
...
...
@@ -127,11 +127,11 @@ class DicPredict(BasePredictor):
continue
temp_pre_model_res
[
k
]
=
EN_TO_ZH_MAP
[
result
]
# 若有多个
参数列或
型号列,则进行不同率的比较, 不同率最高的选为目标列
not_null_dic_data
=
{
k
:
list
(
filter
(
lambda
x
:
x
!=
self
.
PLACEHOLDER
,
dic_data
[
k
]))
for
k
in
prob_columns
}
prob_param_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'参数'
]
# 若有多个型号列,则进行不同率的比较, 不同率最高的选为目标列
not_null_dic_data
=
{
k
:
list
(
filter
(
lambda
x
:
x
!=
BasePredictor
.
PLACEHOLDER
,
dic_data
[
k
]))
for
k
in
prob_columns
}
#
prob_param_cols = [i for i in temp_pre_model_res if temp_pre_model_res[i] == '参数']
prob_gn_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'型号'
]
for
param_or_gn_col_list
in
[
prob_
param_cols
,
prob_
gn_cols
]:
for
param_or_gn_col_list
in
[
prob_gn_cols
]:
if
len
(
param_or_gn_col_list
)
>=
2
:
li_diffrate
=
[(
col
,
BasePredictor
.
get_diffrate
(
not_null_dic_data
[
col
]))
for
col
in
param_or_gn_col_list
]
...
...
@@ -139,6 +139,15 @@ class DicPredict(BasePredictor):
for
col_diffrate
in
sort_li_diffrate
[
1
:]:
temp_pre_model_res
.
pop
(
col_diffrate
[
0
])
# 若有多个参数列,则进行参数特征的数量比较, 特征最多的选为目标列
prob_param_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'参数'
]
if
len
(
prob_param_cols
)
>=
2
:
li_feature_rate
=
[(
col
,
BasePredictor
.
get_param_featurerate
(
not_null_dic_data
[
col
]))
for
col
in
prob_param_cols
]
sort_li_fearate
=
sorted
(
li_feature_rate
,
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
for
col_fearate
in
sort_li_fearate
[
1
:]:
temp_pre_model_res
.
pop
(
col_fearate
[
0
])
# 若有多个数量列,则进行空置率的比较, 空置率最低的选为目标列, #TODO 后续可能需要改成数量元素的占比率
prob_num_cols
=
[
i
for
i
in
temp_pre_model_res
if
temp_pre_model_res
[
i
]
==
'数量'
]
if
len
(
prob_num_cols
)
>=
2
:
...
...
@@ -146,7 +155,7 @@ class DicPredict(BasePredictor):
for
prob_num_col
in
prob_num_cols
:
counter
=
0
for
item
in
dic_data
[
prob_num_col
]:
if
str
(
item
)
.
strip
()
==
self
.
PLACEHOLDER
:
if
str
(
item
)
.
strip
()
==
BasePredictor
.
PLACEHOLDER
:
counter
+=
1
li_nullrate
.
append
((
prob_num_col
,
counter
/
len
(
dic_data
[
prob_num_col
])))
sort_li_nullrate
=
sorted
(
li_nullrate
,
key
=
lambda
x
:
x
[
1
])
...
...
@@ -160,7 +169,7 @@ class DicPredict(BasePredictor):
for
prob_brand_col
in
prob_brand_cols
:
counter
=
0
for
item
in
dic_data
[
prob_brand_col
]:
if
str
(
item
)
.
strip
()
==
self
.
PLACEHOLDER
:
if
str
(
item
)
.
strip
()
==
BasePredictor
.
PLACEHOLDER
:
counter
+=
1
li_nullrate
.
append
((
prob_brand_col
,
counter
/
len
(
dic_data
[
prob_brand_col
])))
sort_li_nullrate
=
sorted
(
li_nullrate
,
key
=
lambda
x
:
x
[
1
])
...
...
@@ -197,7 +206,7 @@ class DicPredict(BasePredictor):
head_std_result
=
head_id_res
.
get
(
'std_result'
)
model_std_result
=
model_id_res
.
get
(
'std_result'
)
comprehensive_res
=
BasePredictor
.
get_comprehensive_res
(
head_std_result
,
model_std_result
)
self
.
info
.
info
(
'综合预测结果: '
+
str
(
comprehensive_res
))
if
comprehensive_res
:
res
=
{
'std_result'
:
comprehensive_res
,
...
...
@@ -221,7 +230,7 @@ class DicPredict(BasePredictor):
# 去掉空置率大于等于0.8的列
counter
=
0
for
item
in
v
:
if
str
(
item
)
.
strip
()
==
self
.
PLACEHOLDER
:
if
str
(
item
)
.
strip
()
==
BasePredictor
.
PLACEHOLDER
:
counter
+=
1
if
counter
/
len
(
v
)
<=
NAN_RATE
:
new_dic_data
[
k
]
=
v
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment