Commit b2d26416 by lichenggang

各判断方法改为类方法

parent 1231ed2a
Showing with 58 additions and 38 deletions
...@@ -6,17 +6,22 @@ from collections import Counter ...@@ -6,17 +6,22 @@ from collections import Counter
from utils.log_manager import get_logger from utils.log_manager import get_logger
from utils.robots import dd_send_msg from utils.robots import dd_send_msg
import pandas as pd import pandas as pd
CATEGORY = ['二极管'] CATEGORY = ['二极管']
RIGHT_LEVEL = 0.7 RIGHT_LEVEL = 0.7
SEQ_LEVEL = 0.5 SEQ_LEVEL = 0.5
CATE_LEVEL = 0.5 CATE_LEVEL = 0.5
MULTI_SAME_LEVEL = 3 MULTI_SAME_LEVEL = 3
class BasePredictor: class BasePredictor:
''' '''
预测类基类 预测类基类
''' '''
# 占位符
PLACEHOLDER = '?'
def __init__(self, name, extractor, classifier): def __init__(self, name, extractor, classifier):
self.name = name self.name = name
self.classifier = classifier self.classifier = classifier
...@@ -25,8 +30,6 @@ class BasePredictor: ...@@ -25,8 +30,6 @@ class BasePredictor:
self.info = get_logger(self.name + '_info') self.info = get_logger(self.name + '_info')
self.robot_msg = dd_send_msg self.robot_msg = dd_send_msg
self.pd = pd self.pd = pd
# 占位符
self.PLACEHOLDER = '?'
def predict(self, key): def predict(self, key):
raise NotImplementedError raise NotImplementedError
...@@ -47,42 +50,36 @@ class BasePredictor: ...@@ -47,42 +50,36 @@ class BasePredictor:
dic_proba = {k: v for k, v in zip(classes, deal_list)} dic_proba = {k: v for k, v in zip(classes, deal_list)}
return predictions[0], dic_proba return predictions[0], dic_proba
def is_seq(self, data): def valid_seq(self, data):
""" """取反"""
序号列预测 return not self.is_seq(data)
"""
collect_seq = [int(kw) for kw in data if isinstance(kw, float) or isinstance(kw, int)] def valid_num(self, data):
if len(collect_seq) < 1 or not self.isIncrease(collect_seq, len(collect_seq)): return not self.is_num(data)
return False
else:
rate = round(len(collect_seq) / len(data), 3)
return True if rate >= SEQ_LEVEL else False
def valid_cate(self, data):
return not self.is_catecol(data)
@classmethod
def is_num(self, data): def is_num(self, data):
""" """
数量列预测 数量列预测
""" """
collect_num = [kw for kw in data if isinstance(kw, int) or self.isNumberCol(kw)]
rate = round(len(collect_num) / len(data), 3)
return True if rate >= RIGHT_LEVEL else False
def isIncrease(self, arr, size): def isNumberCol(kw):
""" """
判断列表元素是否递增 是否是数量列辅助函数
""" """
if size == 1: if isinstance(kw, str):
return True return re.match(r'(\d+)((K)|([\u4E00-\u9FA5]{1,3}))$', kw, re.M | re.I)
return (arr[size - 1] >= arr[size - 2]) and self.isIncrease(arr, size - 1) else:
return False
def isNumberCol(self, kw): collect_num = [kw for kw in data if isinstance(kw, int) or isNumberCol(kw)]
""" rate = round(len(collect_num) / len(data), 3)
是否是数量列辅助函数 return True if rate >= RIGHT_LEVEL else False
"""
if isinstance(kw, str):
return re.match(r'(\d+)((K)|([\u4E00-\u9FA5]{1,3}))$', kw, re.M | re.I)
else:
return False
@classmethod
def is_catecol(self, data): def is_catecol(self, data):
cates = [] cates = []
for i in data: for i in data:
...@@ -92,18 +89,42 @@ class BasePredictor: ...@@ -92,18 +89,42 @@ class BasePredictor:
rate = round(len(cates) / len(data), 3) rate = round(len(cates) / len(data), 3)
return rate >= CATE_LEVEL return rate >= CATE_LEVEL
@classmethod
def is_multi_same(self, data): def is_multi_same(self, data):
no_null_data = list(filter(lambda x: x != self.PLACEHOLDER, data)) no_null_data = list(filter(lambda x: x != self.PLACEHOLDER, data))
result = Counter(no_null_data) result = Counter(no_null_data)
li_sort = sorted(result.items(), key=lambda x: x[1], reverse=True) li_sort = sorted(result.items(), key=lambda x: x[1], reverse=True)
return li_sort[0][1] >= MULTI_SAME_LEVEL return li_sort[0][1] >= MULTI_SAME_LEVEL
def valid_seq(self, data): @classmethod
"""取反""" def is_seq(self, data):
return not self.is_seq(data) """
序号列预测
"""
def valid_num(self, data): def isIncrease(arr, size):
return not self.is_num(data) """
判断列表元素是否递增
"""
if size == 1:
return True
return (arr[size - 1] >= arr[size - 2]) and isIncrease(arr, size - 1)
def valid_cate(self, data): collect_seq = [int(kw) for kw in data if isinstance(kw, float) or isinstance(kw, int)]
return not self.is_catecol(data) if len(collect_seq) < 1 or not isIncrease(collect_seq, len(collect_seq)):
\ No newline at end of file return False
else:
rate = round(len(collect_seq) / len(data), 3)
return True if rate >= SEQ_LEVEL else False
if __name__ == "__main__":
li = ['?', 3400.0, 5920.0, 4849.0, 2544.0, 3270.0, 52751.0, 2031.0, 5302.0, 726.0, 1247.0, 2472.0, 689.0, 6049.0,
26796.0, 6164.0, 1605.0, 4346.0, 640.0, 960.0, 960.0, 320.0, 160.0, 860.0, 160.0, 320.0, 3183.0, 10151.0,
640.0, 130.0, 1237.0, 800.0, 960.0, 3740.0, 17701.0, 2146.0, 1280.0, 160.0, 1120.0, 160.0, 480.0, 960.0,
480.0, 160.0, 4717.0, 160.0, 160.0, 160.0, 640.0, 160.0, 320.0, 160.0, 160.0, 800.0, 800.0, 480.0, 1600.0,
155.0, 960.0, 320.0, 944.0, 160.0, 160.0, 1280.0, 1852.0, 7680.0, 7680.0, 2880.0, 160.0, 224.0, 480.0, 480.0,
640.0, 160.0, 640.0, 320.0, 1760.0, 640.0, 480.0, 960.0, 160.0, 160.0, 160.0, 160.0, 1920.0, 160.0, 5600.0,
480.0, 2560.0, 160.0, 160.0, 160.0, 160.0, 160.0, 1280.0, 160.0, 160.0, 160.0, 160.0, 160.0, 320.0, 0.0,
160.0, 160.0],
print(BasePredictor.is_num(li))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment