Commit b9ac888b by lichenggang

添加异常处理,添加文本识别

parent 00d16687
......@@ -5,8 +5,8 @@ import grpc
from utils.log_manager import bom_log
from protobuf import classify_pb2
from protobuf import classify_pb2_grpc
import traceback
from predic_fac import PredictorFac
from utils.status import code2msg
class Classify(classify_pb2_grpc.classifyServicer):
......@@ -16,32 +16,17 @@ class Classify(classify_pb2_grpc.classifyServicer):
def Classify(self, request, context):
bom_log.info(f'grpc收到数据: {request.keyword}')
err_msg = None
try:
dic_data = json.loads(request.keyword)
res = self.predictorfac.predict(dic_data)
res['status'] = 1
if '数量' not in str(res):
err_msg = '未识别到数量列'
if '参数' not in str(res) and '型号' not in str(res):
err_msg = '未识别到参数列'
code, res = self.predictorfac.predict(dic_data)
except json.decoder.JSONDecodeError:
res = self.predictorfac.predict(request.keyword)
res['status'] = 1
except:
res = {
'status': 0,
'err_msg': '未知'
}
bom_log.error(f'grpc报错: \n' + traceback.format_exc())
if err_msg:
res = {
'status': 0,
'err_msg': err_msg
}
bom_log.info(f'返回结果: ' + str(res))
return classify_pb2.ClassifyReply(message='result {msg}'.format(msg=res))
code, res = self.predictorfac.predict(request.keyword)
if code != 1:
res = code2msg(code)
bom_log.error(f'grpc识别报错: ' + res['message'])
else:
res['status'] = code
return classify_pb2.ClassifyReply(message=json.dumps(res))
def fac_test_predic(self, data):
res = self.predictorfac.predict(data)
......
......@@ -5,10 +5,10 @@ from urllib.parse import unquote
import tornado.web
import tornado.ioloop
import traceback
from utils.log_manager import bom_log
from predic_fac import PredictorFac
from utils.status import code2msg
class BaseHandler(tornado.web.RequestHandler):
......@@ -18,53 +18,33 @@ class BaseHandler(tornado.web.RequestHandler):
class KwHandler(BaseHandler):
async def get(self):
try:
target = unquote(self.get_argument('keyword'))
predict_type = unquote(self.get_argument('type', 'all'))
bom_log.info(f'http收到识别关键词: {target}, 预测类型为{predict_type}')
res = self.predictorfac.predict(target, predict_type)
res['status'] = 1
except:
res = {
'status': 0,
'err_msg': '未知'
}
bom_log.error(f'http关键词识别报错: \n' + traceback.format_exc())
code, res = self.predictorfac.predict(target, predict_type)
if code != 1:
res = code2msg(code)
bom_log.error(f'http关键词识别报错: ' + res['message'])
else:
res['status'] = code
self.write(res)
class DictHandler(BaseHandler):
async def post(self):
err_msg = None
try:
bom_log.info(f'http收到识别字典: {self.request.body}')
bom_log.info(f'http收到识别: {self.request.body}')
dic_data = json.loads(self.request.body)
res = self.predictorfac.predict(dic_data)
res['status'] = 1
if '数量' not in str(res):
err_msg = '未识别到数量列'
if '参数' not in str(res) and '型号' not in str(res):
err_msg = '未识别到参数列'
code, res = self.predictorfac.predict(dic_data)
except json.decoder.JSONDecodeError:
res = self.predictorfac.predict(self.request.body)
res['status'] = 1
except:
res = {
'status': 0,
'err_msg': '未知'
}
bom_log.error(f'http字典识别报错: \n' + traceback.format_exc())
if err_msg:
res = {
'status': 0,
'err_msg': err_msg
}
bom_log.info(f'返回结果: ' + str(res))
code, res = self.predictorfac.predict(self.request.body)
if code != 1:
res = code2msg(code)
bom_log.error(f'http识别报错: ' + res['message'])
else:
res['status'] = code
self.write(res)
def gen_app():
return tornado.web.Application(handlers=[(k, v) for k, v in register_tornado_handlers.items()])
......
......@@ -15,10 +15,12 @@ def run(http_port, grpc_port):
p_http.start()
p_grpc.start()
def http_run(http_port):
http_server(http_port)
if __name__ == "__main__":
http_port = 50052
grpc_port = 50051
run(http_port, grpc_port)
# run(http_port, grpc_port)
http_run(http_port)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import json
from predict import dict_predict, kw_predict
from predict import dict_predict, kw_predict, txt_predict
from utils.status import code2msg, Status, StatusCode
class PredictorFac:
Status = Status
def __init__(self):
self.kw_predictor = kw_predict.KwPredict('single')
self.dict_predictor = dict_predict.DicPredict('dict')
self.txt_predictor = txt_predict.TxtPredict('txt')
def _get_predictor(self, data):
if isinstance(data, dict):
return self.dict_predictor
elif isinstance(data, list):
return self.txt_predictor
else:
return self.kw_predictor
def predict(self, dic_data, predict_type='all'):
predictor = self._get_predictor(dic_data)
try:
res = predictor.predict(dic_data, predict_type)
return res
if isinstance(predictor, dict_predict.DicPredict):
if '数量' not in str(res):
raise self.Status.NoQtyError
if '参数' not in str(res) and '型号' not in str(res):
raise self.Status.NoParamGnError
return 1, res
except Status.StatusErrTup as e:
stat_code = e.value
return stat_code, code2msg(stat_code)
if __name__ == "__main__":
def get_test_data():
import pandas as pd
import json
df = pd.read_excel(r'C:\Users\ICHUNT\Desktop\bomlist\sky-新建 XLSX 工作表 (2).xlsx', header=None)
df = pd.read_excel(r'C:\Users\ICHUNT\Desktop\bomlist\BOM_型号匹配问题.xls', header=None)
df.fillna('?', inplace=True)
dic_dft = df.to_dict(orient='list')
return json.dumps(dic_dft)
data = get_test_data()
return dic_dft
p = PredictorFac()
data = json.loads(data)
pretty_col = {'第%s列' % k: v for k, v in data.items()}
print(pretty_col)
res = p.predict(pretty_col, predict_type='all')
# data = get_test_data()
# data = {}
# pretty_col = {'第%s列' % k: v for k, v in data.items()}
# print(pretty_col)
data=['lm358dt 3', 'C0806fwefhkds 5k']
data = json.dumps(data)
res = p.predict(data, predict_type='all')
print(res)
......@@ -2,6 +2,8 @@
# -*- coding:utf-8 -*-
import re, os, pickle
from collections import Counter
from copy import deepcopy
from sklearn.externals import joblib
from config import model_config
......@@ -9,7 +11,7 @@ from utils.log_manager import get_logger
from utils.robots import dd_send_msg
import pandas as pd
from static_config import *
from utils.status import StatusCode, code2msg
# 数量列阈值
RIGHT_LEVEL = 0.7
# 序号列阈值
......@@ -39,6 +41,7 @@ class ClassBasePredictorMeta(type):
model = joblib.load(modextr_path + i)
setattr(cls, i, model)
def __new__(mcs, name, bases, attrs):
return type.__new__(mcs, name, bases, attrs)
......@@ -50,7 +53,6 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
# 占位符
PLACEHOLDER = '?'
pd = pd
def __init__(self, name):
self.name = name
......@@ -100,7 +102,7 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
是否是数量列辅助函数
"""
if isinstance(kw, str):
return re.match(r'(\d+|\d+(\.\d+))($|(K)|([\u4E00-\u9FA5]{1,3}))$', kw, re.M | re.I)
return re.match(r'(\d+|\d+(\.\d+))($|(K)|[个十百千万])$', kw, re.M | re.I)
else:
return False
......@@ -171,6 +173,7 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
@classmethod
def get_comprehensive_res(cls, head_result, model_result, major='head', vote_type='standard'):
if vote_type == 'standard':
vote_count = {k: [] for k in STD_FIELDS_MAP.keys()}
else:
......@@ -183,8 +186,11 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
for k, v in head_result.items():
vote_count[v].append(k)
vote_count_copy = deepcopy(vote_count)
for k, v in model_result.items():
if len(vote_count_copy[v]) == 1:
continue
vote_count[v].append(k)
# 此处并没有处理表头和模型两者预测冲突的情况, 但因为repeat_max方法是稳定的,所以冲突后会取第一个也就是表头预测的结果, 之后按照major参数决定冲突结果
......@@ -286,5 +292,6 @@ class BasePredictor(metaclass=ClassBasePredictorMeta):
return round(count / len(data), 3) >= SIMPLE_ENCAP_LEVEL
if __name__ == "__main__":
print(BasePredictor.is_ref(
['?', '位置', '?', '4C33', '3C5,11C8', '3C6,3C7,2C13,2C14,C283,C285,C286', '?', 'C287,C575,10C33,10C34,10C36,10C37', '4C6,4C26,4C49,4C65', '10C7,10C10,10C11,10C29', '8C4,8C5', '8C40,8C41', '1C7,5C2,5C3,10C2,1C25,1C41,2C11,2C21,2C25,2C37,2C40,4C62,8C29,8C30', '10C12', '10C26,10C35', '1C24,1C43', '10C16,10C20,10C22,10C23', '10C19,10C25', '1C1,1C6,2C1,2C3,2C4,2C6,2C7,3C1,3C2,3C4,3C8,4C2,6C6,6C8,6C9,7C6,7C9,8C7,8C8,8C9,10C4,10C5,10C6,10C9,1C10,1C13,11C4,1C15,1C16,11C6,11C7,1C19,1C22,1C28,1C37,1C44,1C57,1C59,1C63,2C15,2C17,2C29,2C33,2C36,2C38,2C39,3C13,4C12,4C13,4C14,4C15,4C16,4C18,4C19,4C20,4C22,4C23,4C24,4C25,4C28,4C30,4C32,4C34,4C41,4C44,4C45,4C46,4C48,4C51,4C54,4C55,4C57,4C58,4C59,4C60,4C61,6C14,6C16,7C13,8C11,8C17,8C21,10C14,10C17,10C18,10C21,10C28,10C30,10C32,10C42,4C111,4C118,4C121,4C122,C28,C97,C292,C293,C294,C296,C298,C299,C300,C301,C304,C571\n', '10C3,10C15', '2C2,2C9,3C3,4C4,5C6,5C7,5C8,5C9,6C7,7C3,7C7,8C3,2C16,2C18,2C22,2C23,2C26,2C27,2C31,4C17,4C47,4C56,4C63,4C93,5C10,6C46,8C10,8C16,8C18,8C23,8C25,8C31,8C32,8C47,9C21,9C28,4C100,C98,11C9,C290,C291,C295,C297,11C10', '10C8,10C24', '5C1,5C5,2C20,2C30,4C36,4C86,9C27,2C10', '2C12,8C53,9C22', '2C5,3C14,8C37,8C44,10C27', '1C9,4C1,4C7,10C1,1C11,1C14,1C17,1C18,1C20,1C21,1C53,1C58,1C60,1C62,1C64,2C34,2C35,3C15,4C11,4C29,4C31,4C42,4C64,4C82,4C83,8C15,8C42,10C31,4C157,C96,C288,C289,C302,C303,C574', '1C2,1C8,1C30,1C32,1C38,1C70', '1C4,1C5,2C8,4C3,7C4,1C12,1C23,1C26,1C29,1C31,1C34,1C42,1C48,2C32,7C11,7C14,C1,C570', '1C51,10C39,10C41', '1C66,1C71', '?', '?', '10R9,3R24,8R45,8R46', '3C10,7R6,R154,R155,2R21,3R25,7R25,9R32,9R33,9R34,9R35,9R36,9R37,9R38,9R39,9R40,9R41,9R42,9R43,9R44,9R45,9R46,9R47,9R48,9R49,9R50,9R51,9R52,9R53,10R18', 'R1,R2,R3,R4', '7R3,7R4,7R7,7R8,7R23,7R24,8R39,8R41,8R42,8R44', '1R1,1R2,1R16,1R19,1R26,1R49', '10R1,10R8,10R10,10R13', '2R7,2R8,3R3,3R4,3R5,3R6,3R7,2R10,3R20,3R33,3R34,3R35,3R36,3R37', '8R3,8R29,8R30', '6R3', '8R33,8R34,8R35,8R36', '?', '8R2,3R16', '8R37,8R38,8R40,8R43', '?', '2R4,2R5,10R2,10R3,3R17,5R29,5R30,5R32,5R33,5R38,9R13,9R14', '?', '3R15', '5R42', '4R10,4R11,4R12,4R28,4R35', '?', '10R4,10R14', '6R2', '9R5', '1R50,1R53,1R54,1R55,1R56,2R19,5R14,5R19,5R24,5R25,11R11,11R12', '5R40', '1R51', '8R1,4R15,4R16,4R23,4R24,4R33,4R34,10R22,10R30,10R40,10R41', '8R5,8R7', 'R156,R161,1R6,2R2,10R5,8R24,9R15,9R16,11R13,11R15,11R16', '5R28,5R41', '8R18', 'R158', '1R7', '8R16', '2R9', '7R1,8R4,8R6,9R1,10R6,1R21,1R42,1R57,1R58,R160,2R26,3R19,3R23,8R20,8R22,10R11,R167,9R54', '7R5,1R22,1R27,5R16', '1R43', '8R21,8R28', '10R17', '2R12', '1R31', '1R28,10R12', '1R3,5R4,5R37', '1R32', '1R5', '1R4,1R13,1R17,1R23,1R33', '1R14,1R39', '1R15', '5R2,1R12,1R20,1R44,5R11,5R12,5R13,5R17,5R18,5R35', '8R26,8R31', 'R164', '1R29', '2R3', '1R34', '5R3,1R10,1R41,1R60,5R36,11R14', 'R163', '1R18', 'R162,R165', 'R159', '5RN3,5RN4,5RN5,5RN6', '?', '?', '7FB5', '8FB1,10FB1,11FB1,FB4,FB6,FB8,8L2', '?', '10FB2,10FB3', '1L3,1L4', '?', '1L2', 'L2,3L1', 'L1', '1L5', '1L1', '1L6', '5L1,5L2', '10L1,10L2,10L3,10L4', '?', '?', 'D45', 'Q23', '1Q1,1Q8,3Q1,5Q3,5Q4,1Q5,1Q7', '1Q3', '?', '1Q6', '5Q2', '5Q5', '5D3,5D4,5D5,5D6', '5D7,5D8,5D9,7D1,7D2,7D3,7D4,7D5,8D3,8D4,11D1,11D2,5D10,5D11,5D12,5D20,5D22,5D23,5D24', '8D1', '8D2', 'T4', 'U1', 'U7,U18', '?', 'U9', 'U17', '1U2,1U3,1U10', '1U4,1U6,1U7', '2U1', '4U1,4U2,4U3,4U4', '?', '1U1,1U9', '1U5', '8U1', '2U2', '1U11', '3U2', '10U1', '3Y2', '?', '2Y1', 'Y1', '3J1', '8T1', '8JA5', 'CN13', 'J45', '?', '5JA2,5JA3', '1CN2', 'J2,J3', 'J53,J56,J58', '2CN1', 'J4', 'J1', 'J52', 'P2', 'J43']))
print(BasePredictor.is_num(['1k','1k','1k','1k']))
print(re.match(r'(\d+|\d+(\.\d+))($|(K)|[个十百千万])$', '1k', re.M | re.I))
\ No newline at end of file
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from predict.base_handler import BasePredictor
from static_config import EN_TO_ZH_MAP
import re
class TxtPredict(BasePredictor):
@classmethod
def isnum(cls, kw):
"""
是否是数字串
"""
if str(kw).startswith('0'):
return False
return re.match(r'(\d+|\d+(\.\d+))($|(K)|[个十百千万])$', kw, re.M | re.I)
def pre_deal(self, li: list):
data = []
resp = {}
for row_index, i in enumerate(li):
raw_items = re.split(r'(?:,|,|;|\s)\s*', i)
no_null_raw_items = [j.strip() for j in raw_items if j.strip()]
resp_data = {
'qty': None,
'brand': None,
'gn_or_param': None,
'statu_code': 1
}
for item in no_null_raw_items:
if self.isnum(item):
resp_data['qty'] = item
no_null_raw_items.remove(item)
break
else:
resp_data = {'message': '未检测到数量', 'statu_code': 0}
resp[row_index] = resp_data
data.append((row_index, resp_data, no_null_raw_items))
continue
if not no_null_raw_items:
resp_data = {'message': '未检测到参数/型号', 'statu_code': 0}
resp[row_index] = resp_data
data.append((row_index, resp_data, no_null_raw_items))
return data
def predict(self, li: list, predict_type='all'):
self.info.info('预测类型为: %s, 接收数据: %s' % (predict_type, li))
data = self.pre_deal(li)
resp = {}
for row_index, resp_data, nonull_nonum_items in data:
if resp_data['statu_code'] == 0:
resp[row_index] = resp_data
continue
if len(nonull_nonum_items) == 1:
resp_data['gn_or_param'] = nonull_nonum_items[0]
elif len(nonull_nonum_items) >= 2:
result_1, prab = self.get_single_predict(nonull_nonum_items[-1], BasePredictor.model, BasePredictor.extractor)
result0, prab = self.get_single_predict(nonull_nonum_items[0], BasePredictor.model, BasePredictor.extractor)
if EN_TO_ZH_MAP[result_1] == '品牌':
resp_data['brand'] = nonull_nonum_items[-1]
resp_data['gn_or_param'] = ' '.join(nonull_nonum_items[:-1])
elif EN_TO_ZH_MAP[result0] == '品牌':
resp_data['brand'] = nonull_nonum_items[0]
resp_data['gn_or_param'] = ' '.join(nonull_nonum_items[1:])
else:
resp_data['gn_or_param'] = ' '.join(nonull_nonum_items)
resp[row_index] = resp_data
return {'results': resp}
if __name__ == "__main__":
print(TxtPredict.is_num('1K'))
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import inspect
def code2msg(code):
if code in StatusCode.StatusDict:
return {"status": code, "message": StatusCode.StatusDict[code][1]}
else:
return {"status": "0", "message": StatusCode.StatusDict["0"][1]}
class StatusCode:
Succeed = "1"
Unknown = "0"
ParamError = "100001"
MissingParam = "100002"
NoQtyError = "100003"
NoParamGnError = "100004"
StatusDict = {
"1": ("succeed", "正常"),
"0": ("unknown", "未知错误"),
"100001": ("param error", "输入参数错误"),
"100002": ("missing param", "缺少输入的参数"),
"100003": ("no qty", "未检测到数量"),
"100004": ("no param or gn", "未检测到参数/型号列"),
}
class BaseStatusError(Exception):
def __init__(self, *args, **kwargs):
super().__init__(*args)
class_name = kwargs["class_name"] if "class_name" in kwargs else self.__class__.__name__
self.value = kwargs["value"] if "value" in kwargs else getattr(StatusCode, class_name)
if "msg" in kwargs:
self.msg = kwargs["msg"]
elif args:
self.msg = args[0]
else:
self.msg = StatusCode.StatusDict[self.value][0]
error_logging = False if "off_error_logging" in kwargs else True
self.error_logging = bool(kwargs["error_logging"]) if "error_logging" in kwargs else error_logging
self.frames = inspect.getouterframes(inspect.currentframe())[1] if self.error_logging else None
def __str__(self):
return str(self.msg)
class Status:
class Succeed(BaseStatusError):
class NotFound(BaseStatusError):
def __init__(self, *args, **kwargs):
super().__init__(off_error_logging=True, *args, **kwargs)
def __init__(self, *args, **kwargs):
super().__init__(off_error_logging=True, *args, **kwargs)
self.result = args[0]
class MissingParam(BaseStatusError):
def __init__(self, *args, **kwargs):
super().__init__(off_error_logging=True, *args, **kwargs)
class ParamError(BaseStatusError):
def __init__(self, *args, **kwargs):
super().__init__(off_error_logging=True, *args, **kwargs)
class NoQtyError(BaseStatusError):
def __init__(self, *args, **kwargs):
super().__init__(off_error_logging=True, *args, **kwargs)
class NoParamGnError(BaseStatusError):
def __init__(self, *args, **kwargs):
super().__init__(off_error_logging=True, *args, **kwargs)
class Unknown(BaseStatusError):
pass
StatusErrTup = (Succeed.NotFound, Unknown,
ParamError, MissingParam, ParamError, NoQtyError, NoParamGnError)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment