Commit efc76efc by lichenggang

模型预测

parent d8dd3982
No preview for this file type
No preview for this file type
......@@ -19,9 +19,9 @@ class PredictorFac():
elif isinstance(data, dict):
return self.dict_predictor
def predict(self, dic_data):
def predict(self, dic_data, predict_type='all'):
predictor = self._get_predictor(dic_data)
res = predictor.predict(dic_data)
res = predictor.predict(dic_data, predict_type)
return res
......@@ -29,8 +29,8 @@ if __name__ == "__main__":
def get_test_data():
import pandas as pd
import json
df = pd.read_excel(r'C:\Users\ICHUNT\Desktop\bomlist\DZ0901_V1.4_BOM.xlsx', header=None)
df.fillna(' ', inplace=True)
df = pd.read_excel(r'C:\Users\ICHUNT\Desktop\bomlist\51AB0571_ CCTV ASST询价_SZIMS.xlsx', header=None, sheet_name='1')
df.fillna('?', inplace=True)
dic_dft = df.to_dict(orient='list')
return json.dumps(dic_dft)
......@@ -40,5 +40,5 @@ if __name__ == "__main__":
data = get_test_data()
p = PredictorFac(model_config)
data = json.loads(data)
res = p.predict(data)
res = p.predict(data,predict_type='model')
print(res)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
from utils.log_manager import get_logger
from utils.robots import dd_send_msg
import pandas as pd
RIGHT_LEVEL = 0.7
class BasePredictor:
'''
预测类基类
......@@ -20,6 +21,7 @@ class BasePredictor:
self.robot_msg = dd_send_msg
self.pd = pd
def predict(self, key):
raise NotImplementedError
......@@ -37,3 +39,44 @@ class BasePredictor:
deal_list = [round(i, 3) for i in proba[0].tolist()]
dic_proba = {k: v for k, v in zip(classes, deal_list)}
return predictions[0], dic_proba
def isseq(self, data):
"""
序号列预测
"""
collect_order = [int(kw) for kw in data if isinstance(kw, float) or isinstance(kw, int)]
judge = self.isIncrease(collect_order, len(collect_order)) if len(collect_order) > 0 else False
return judge
def isnum(self, data):
"""
数量列预测
"""
collect_num = [kw for kw in data if isinstance(kw, int) or self.isNumberCol(kw)]
rate = round(len(collect_num) / len(data), 3)
return True if rate >= RIGHT_LEVEL else False
def isIncrease(self, arr, size):
"""
判断列表元素是否递增
"""
if size == 1:
return True
return (arr[size - 1] >= arr[size - 2]) and self.isIncrease(arr, size - 1)
def isNumberCol(self, kw):
"""
是否是数量列
"""
if isinstance(kw, str):
return re.match(r'(\d+)((K)|([\u4E00-\u9FA5]{1,3}))$', kw, re.M | re.I)
else:
return False
def valid_seq(self, data):
"""取反"""
return not self.isseq(data)
def valid_num(self, data):
return not self.isnum(data)
\ No newline at end of file
......@@ -96,13 +96,22 @@ class DicPredict(BasePredictor):
:param dic_data:
:return:
"""
print(dic_data)
prob_columns = []
temp_pre_model_res = {}
for k, v in dic_data.items():
if self.valid_chain(v):
bol = self.v_chain(v)
if bol:
print(k, bol)
prob_columns.append(k)
continue
if self.isnum(v):
temp_pre_model_res[k] = '数量'
continue
if self.isseq(v):
temp_pre_model_res[k] = '序号'
temp_dic_data = {k: list(filter(lambda x: x != PLACEHOLDER, dic_data[k]))for k in prob_columns}
temp_pre_model_res = {}
for k, v in temp_dic_data.items():
li_single_pred_res = []
for string in v:
......@@ -111,35 +120,28 @@ class DicPredict(BasePredictor):
result = Counter(li_single_pred_res)
# [('brand_name', 4), ('goods_name', 3), ('param', 2)]
li_sort = sorted(result.items(), key=lambda x: x[1], reverse=True)
print(k, li_sort)
temp_pre_model_res[k] = en_to_zh_map[li_sort[0][0]]
print(temp_pre_model_res)
# pre_model_res = {}
def predict(self, dic_data):
def predict(self, dic_data, predict_type='all'):
dic_data = self.pre_deal(dic_data)
pre_id_res = self.pre_predict(dic_data)
if pre_id_res:
return pre_id_res
if len(dic_data) > 0:
self.order_predict(dic_data[0])
pass
def order_predict(self, data):
collect_num = [int(kw) for kw in data if isinstance(kw, float) or isinstance(kw, int)]
judge = self.IsIncrease(collect_num, len(collect_num))
print('judge: ' + str(judge))
return judge
"""
判断列表元素是否递增
"""
def IsIncrease(self, arr, size):
if size == 1:
return True
return (arr[size - 1] >= arr[size - 2]) and self.IsIncrease(arr, size - 1)
if predict_type == 'all':
pre_id_res = self.pre_predict(dic_data)
if pre_id_res:
return pre_id_res
elif predict_type == 'model':
model_id_res = self.model_predict(dic_data)
if model_id_res:
return model_id_res
elif predict_type == 'pre':
pre_id_res = self.pre_predict(dic_data)
if pre_id_res:
return pre_id_res
def pre_deal(self, dic_data):
new_dic_data = {}
......@@ -154,5 +156,16 @@ class DicPredict(BasePredictor):
return new_dic_data
def valid_chain(self, li):
pass
\ No newline at end of file
def v_chain(self, li):
"""
验证链,验证方法中某个环节返回了False则返回False
:param li:
:return:
"""
for fun_name in dir(self):
if fun_name.startswith('valid_'):
fun = getattr(self, fun_name)
if not fun(li):
return False
else:
return True
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import xlwt
import xlrd
def read_from_excel(file_name, sheet_name):
wb = xlrd.open_workbook(file_name)
sheet = wb.sheet_by_name(sheet_name)
row = sheet.nrows
col = sheet.ncols
result_dict = {}
for i in range(col):
col_list = []
for j in range(row): col_list.append(sheet.cell_value(j, i))
result_dict[i] = col_list
return result_dict
l=[' ', ' ', ' ', '不需要报价', ' ', ' ', ' ', '不需要报价', ' ', ' ', '不需要报价', ' ', ' ', ' ', '不需要报价', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
for v in l:
# 去掉空置率大于等于0.8的列
counter = 0
for item in v:
if str(item).strip() == PLACEHOLDER:
counter += 1
if counter / len(v) <= NAN_RATE:
new_dic_data[k] = v
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment