Commit 777aca61 by 陈森彬

测试匹配未能识别的型号

parent 86b97c91
......@@ -16,6 +16,8 @@ else:
redis_config = {'host': '127.0.0.1', 'port': 6379, 'decode_responses': True, 'password': 'icDb29mLy2s'}
# redis_config = {'host': '192.168.1.235', 'port': 6379, 'decode_responses': True, 'password': 'icDb29mLy2s'} #部署在252时
# brand_redis_config = {'host': '39.108.51.147', 'port': 6379, 'decode_responses': True, 'password': 'icDb29mLy2s'}
MG_HOST_SET = {
'test': '192.168.1.237',
'produce': '172.18.137.23'
......
......@@ -14,6 +14,7 @@ num_pattern = re.compile('(\s|[\u4e00-\u9fa5])(\d+\.\d+|\d+)(\s|个|片)')
k_num_pattern = re.compile('(\s|[\u4e00-\u9fa5])(\d+\.\d+|\d+)(K)', re.I)
pcs_num_pattern = re.compile('(\s|[\u4e00-\u9fa5])(\d+\.\d+|\d+)(pcs)', re.I)
price_num_pattern = re.compile('(\d+\.\d+|\d+)')
price2_num_pattern = re.compile('([.a-zA-Z\\-]?)(\d+\.\d+|\d+)([.a-zA-Z\\-]?)')
zh_pattern = re.compile('[\u4e00-\u9fa5]+')
usd_num_pattern = re.compile('(\d+\.\d+|\d+)(usd)', re.I)
time_pattern = re.compile(
......@@ -425,9 +426,9 @@ def check_price(kw_info):
if res:
return res[0][0].strip()
else:
res = price_num_pattern.findall(kw_info)
if res and "+" not in kw_info:
return res[0].strip()
res = price2_num_pattern.findall(kw_info)
if res and "+" not in kw_info and not res[0] and not res[2]:
return res[1].strip()
else:
return False
......
......@@ -5,3 +5,4 @@ import redis
from config import redis_config
redis_cli = redis.Redis(**redis_config)
# brand_redis_cli = redis.Redis(**brand_redis_config)
......@@ -138,6 +138,7 @@ class ImHandler(tornado.web.RequestHandler):
super().__init__(*args, **kwargs)
self.predic = KwPredict('validSingle')
self.redis_cli = redis_cli
# self.brand_redis_cli = brand_redis_cli #暂未启用
self.num_pattern = re.compile('') # 单位匹配
......@@ -150,7 +151,7 @@ class ImHandler(tornado.web.RequestHandler):
code = '100001'
res = code2msg(code)
else:
brand_set = redis_cli.smembers('brand_set')
brand_set = self.redis_cli.smembers('brand_set')
data = []
for data_info in data_list:
data_obj = {}
......@@ -163,7 +164,7 @@ class ImHandler(tornado.web.RequestHandler):
new_data_info = []
# print(data_info.decode("gbk").encode("utf-8"))
data_info = replace_symbol(data_info).replace("(", "(").replace(")", ")").replace("\xa0","").replace("\t"," ") # 统一分隔符,替换中文括号
data_info = replace_symbol(data_info).replace("(", "(").replace(")", ")").replace("\xa0", "").replace("\t", " ") # 统一分隔符,替换中文括号
data_cut_list = data_info.split(" ")
new_cut_info = ""
for cut_info in data_cut_list: # 第一遍过滤
......@@ -260,7 +261,7 @@ class ImHandler(tornado.web.RequestHandler):
for end_cut_info in end_cut_list: # 将剩下的未知单词剔除包含中文的单词
if end_cut_info.strip() and check_zh(end_cut_info.strip()) and len(end_cut_info.strip()) > 4:
len_lsit.append(end_cut_info)
if len_lsit and len(len_lsit) == 1 and not data_obj["gn"]: # 若剔除完中文后,此时型号还空,则剩下的词很大概率是型号,暂时按时型号处理
if len_lsit and len(len_lsit) == 1 and not data_obj["gn"]: # 若剔除完中文后,此时型号还空,则剩下的词很大概率是型号,暂时按时型号处理
data_obj["gn"].append(len_lsit[0].strip())
if data_obj:
data.append(data_obj)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment