Commit d6361eca by 陈森彬

新增参数提取,切割,转换

parent 91bd21e6
Showing with 254 additions and 12 deletions
......@@ -15,3 +15,72 @@ else:
model_config['modextr_path'] = project_path + r'/' + 'models_and_extractors/'
redis_config = {'host': '127.0.0.1', 'port': 6379, 'decode_responses': True, 'password': 'icDb29mLy2s'}
unit_map = {
'Ω': {'μΩ': 0.000001,
'uΩ': 0.000001,
'mΩ': 0.001,
'Ω': 1,
'kΩ': 1000,
'KΩ': 1000,
'MΩ': 1000000,
'GΩ': 1000000000,
'TΩ': 1000000000000},
'pF': {'pF': 1,
'PF': 1,
'Pf': 1,
'pf': 1,
'nF': 1000,
'NF': 1000,
'Nf': 1000,
'nf': 1000,
'µF': 1000000,
'uF': 1000000,
'UF': 1000000,
'Uf': 1000000,
'uf': 1000000,
'mF': 1000000000,
'MF': 1000000000,
'Mf': 1000000000,
'mf': 1000000000,
'F': 1000000000000},
'pH': {'pH': 1,
'nH': 1000,
'µH': 1000000,
'UH': 1000000,
'μh': 1000000,
'uh': 1000000,
'Uh': 1000000,
'mH': 1000000000,
'Mh': 1000000000,
'MH': 1000000000,
'mh': 1000000000,
'H': 1000000000000},
'mA': {'mA': 1,
'A': 1000},
'V': {'V': 1,
'kV': 1000,
'KV': 1000},
'W': {'W': 1,
'kW': 1000, },
'%': {'%': 0.01, },
}
cast_map = {
'毫欧': 'mΩ',
'欧姆': 'Ω',
'欧': 'Ω',
'千欧': 'kΩ',
'兆欧': 'MΩ',
'伏特': 'V',
'伏': 'V',
'千伏': 'kV',
'瓦特': 'W',
'瓦': 'W',
}
unit_regex = "μΩ|uΩ|mΩ|Ω|kΩ|KΩ|MΩ|GΩ|TΩ|pF|PF|Pf|pf|nF|NF|Nf|nf|µF|uF|UF|Uf|uf|mF|MF|Mf|mf|F|pH|nH|µH|UH|μh|uh|Uh|mH|Mh|MH|mh|H|mA|A|V|kV|KV|W|kW|%|毫欧|欧姆|欧|千欧|兆欧|伏特|伏|千伏|瓦特|瓦"
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
from config import unit_map, cast_map, unit_regex
unit_pattern = re.compile('(\d+|\d+[\\\./]\d+)(' + unit_regex + ')')
def is_float(s):
xiaoshu_new=str(s)
xiaoshu_new = str(s)
if xiaoshu_new.count(".") ==1:
left,right = xiaoshu_new.split(".")
if left.isdigit() and right.isdigit():
return True
if xiaoshu_new.count(".") == 1:
left, right = xiaoshu_new.split(".")
if left.isdigit() and right.isdigit():
return True
elif left.startswith('-') and left.count('-') == 1 and right.isdigit():
elif left.startswith('-') and left.count('-') == 1 and right.isdigit():
lleft = left.split('-')[-1]
if lleft.isdigit():
return True
return False
def pre_judge(kw):
if is_float(kw) or len(kw) == 1 or kw.isdigit() or len(kw) >= 30:
return False
return True
\ No newline at end of file
return True
def cut_params(kw):
res = kw.split(" ")
if len(res) > 2:
return res
res = kw.split("|")
if len(res) > 2:
return res
res = kw.split(",")
if len(res) > 2:
return res
res = kw.split(",")
if len(res) > 2:
return res
res = kw.split("/")
if len(res) > 2:
return res
return [kw]
def check_param(kw):
"""
提取参数,从一个字符串里面提取的参数大于2的时候才返回
:param kw:
:return:
"""
check_res = unit_pattern.findall(kw)
if len(check_res) < 2:
check_res = [kw]
else:
new_list = []
for param in check_res:
new_list.append(param[0] + param[1])
check_res = new_list
return check_res
def get_regex():
regex = ""
for k, v in unit_map.items():
for unit_key in v.keys():
regex += unit_key + "|"
return regex[0:-1]
def word_conversion(unit_res, kw):
"""
包含中文的参数转换
:param unit_res:
:param kw:
:return:
"""
unit_str = get_unit(unit_res)
if unit_str in cast_map:
for unit_key, unit_data in unit_map.items():
if cast_map[unit_str] in unit_data:
if unit_data[cast_map[unit_str]] == 1:
true_unit = unit_key
else:
true_unit = str(unit_data[cast_map[unit_str]]) + unit_key
return kw.replace(unit_str, true_unit)
return None
def unit_conversion(unit_res, kw):
"""
没有中文的参数转换
:param unit_res:
:param kw:
:return:
"""
unit_str = get_unit(unit_res)
for unit_key, unit_data in unit_map.items():
if unit_str in unit_data:
if "/" not in kw: # 单独处理除号
if "%" in kw: # 百分号的单独处理
unit_num = int(kw.replace(unit_str, ""))
true_unit = str(unit_num * unit_data[unit_str])
elif "." in kw:
unit_num = float(kw.replace(unit_str, ""))
true_unit = str(unit_num * unit_data[unit_str]) + unit_key
else:
unit_num = int(kw.replace(unit_str, ""))
true_unit = str(unit_num * unit_data[unit_str]) + unit_key
else:
num_list = kw.replace(unit_str, "").split("/")
unit_num = round_up(int(num_list[0]) / int(num_list[1]))
true_unit = str(unit_num * unit_data[unit_str]) + unit_key
return true_unit
return None
def round_up(value):
return round(value * 1000) / 1000.0
def get_not_exist_list(attrs_list, cut_list):
exist_list = []
for attr in attrs_list:
for cut_word in cut_list:
if attr in cut_word:
exist_list.append(cut_word)
not_exist_list = []
for cut_word in cut_list:
if cut_word not in exist_list:
not_exist_list.append(cut_word)
return not_exist_list
def get_unit(unit_list):
unit_str = ""
for u_str in unit_list:
if u_str:
unit_str += u_str
return unit_str
......@@ -6,7 +6,7 @@ from urllib.parse import unquote
import tornado.web
import tornado.ioloop
from utils.functions import pre_judge
from utils.functions import pre_judge, word_conversion, cut_params, check_param, unit_conversion, get_not_exist_list
from utils.redis_cli import redis_cli
from predict.kw_predict import KwPredict
......@@ -14,6 +14,7 @@ from utils.status import code2msg
import re
def predict_gn_param(predict: KwPredict, kw) -> int:
'''
传入预测类实例和关键字,返回是否是型号名或参数
......@@ -23,7 +24,7 @@ def predict_gn_param(predict: KwPredict, kw) -> int:
'''
pat = re.compile(r'(?:.*\-.*|\d+)')
if len(kw) <= 2:
if len(kw) <= 1:
return 0
elif pat.findall(kw):
return 1
......@@ -81,15 +82,63 @@ class KwHandler(tornado.web.RequestHandler):
for kw in list_data:
dic_res[kw] = predict_gn_param(self.predic, kw)
self.write({'status': 1,'result':dic_res})
self.write({'status': 1, 'result': dic_res})
class UCHandler(tornado.web.RequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.predic = KwPredict('validSingle')
self.w_par = re.compile(r'[\u4e00-\u9fa5]+')
self.unit_pattern = re.compile('([^0-9±/,.])')
async def post(self):
data_list = json.loads(self.request.body)
if not data_list:
code = '100002'
res = code2msg(code)
else:
res = {}
attrs_list = []
words_list = []
encap_list = []
old_attrs_list = []
for kw in data_list:
param_data = check_param(kw) # 提取参数
for param in param_data:
kw_res = predict_gn_param(self.predic, param)
if kw_res == 1: # 判断是否参数
unit_res = self.unit_pattern.findall(param) # 提取单位
if unit_res:
if self.w_par.findall(param): # 判断是否有中文
res_data = word_conversion(unit_res, param)
if res_data != None:
attrs_list.append(res_data)
old_attrs_list.append(param)
else:
res_data = unit_conversion(unit_res, param)
if res_data:
attrs_list.append(res_data)
old_attrs_list.append(param)
cut_list = cut_params(kw)
unknown_list = get_not_exist_list(old_attrs_list, cut_list)
print("unknown", unknown_list)
res['status'] = 1
res['words'] = words_list
res['attrs'] = attrs_list
res['encap'] = encap_list
print(res)
self.write(res)
def gen_app():
return tornado.web.Application(handlers=[(k, v) for k, v in register_tornado_handlers.items()])
register_tornado_handlers = {'/keyword': KwHandler}
register_tornado_handlers = {'/keyword': KwHandler,
'/unit_conversion': UCHandler,
}
def http_server(port):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment