Commit 8a77d06e by 岳巧源

modify some content

parent e047253a
Showing with 19 additions and 15 deletions
...@@ -80,13 +80,13 @@ count = 0 ...@@ -80,13 +80,13 @@ count = 0
# 如果是采用单线程的模式跑这个脚本,解析完4000多行数据恐怕要1个小时了 所以还是采用多进程的方式吧 用四个脚本同时解析一个文件的不同行 # 如果是采用单线程的模式跑这个脚本,解析完4000多行数据恐怕要1个小时了 所以还是采用多进程的方式吧 用四个脚本同时解析一个文件的不同行
def main(): def main():
df = pd.read_excel('table.xlsx') df = pd.read_excel('jd.xlsx')
data = df.iloc[:, [0]].values data = df.iloc[:, [0]].values
ans = [] ans = []
result = [] result = []
goods_name_list = []
for i in range(len(data)): for i in range(len(data)):
if i > 9: # for debug, don't forget to delete this line.
break # for debug, don't forget to delete this line.
sku_name_str = data[i][0] sku_name_str = data[i][0]
res_tmp = match_sku(sku_name_str) res_tmp = match_sku(sku_name_str)
ans.append(res_tmp) ans.append(res_tmp)
...@@ -97,7 +97,7 @@ def main(): ...@@ -97,7 +97,7 @@ def main():
tmp_goods_name = ans[i][j] tmp_goods_name = ans[i][j]
tmp_query = query tmp_query = query
tmp_query["query"]["bool"]["must"][0]["match"]["goods_name"] = tmp_goods_name tmp_query["query"]["bool"]["must"][0]["match"]["goods_name"] = tmp_goods_name
tmp_map = process(tmp_query) tmp_map = process(tmp_query, tmp_goods_name)
if len(tmp_map) != 0: if len(tmp_map) != 0:
score_in_one_row.append(tmp_map) score_in_one_row.append(tmp_map)
if len(score_in_one_row) == 0: if len(score_in_one_row) == 0:
...@@ -148,7 +148,7 @@ def post_to_elasticsearch(q: dict): ...@@ -148,7 +148,7 @@ def post_to_elasticsearch(q: dict):
ans = response.json() ans = response.json()
return ans return ans
def process(query_str: dict): def process(query_str: dict, key_word: str):
res = post_to_elasticsearch(query_str) res = post_to_elasticsearch(query_str)
if res["hits"]["max_score"] is not None and res["hits"]["max_score"] > 80: if res["hits"]["max_score"] is not None and res["hits"]["max_score"] > 80:
for i in range(len(res["hits"]["hits"])): for i in range(len(res["hits"]["hits"])):
...@@ -157,7 +157,7 @@ def process(query_str: dict): ...@@ -157,7 +157,7 @@ def process(query_str: dict):
_goods_name = res["hits"]["hits"][i]["_source"]["goods_name"] _goods_name = res["hits"]["hits"][i]["_source"]["goods_name"]
_goods_id = res["hits"]["hits"][i]["_source"]["goods_id"] _goods_id = res["hits"]["hits"][i]["_source"]["goods_id"]
_brand_name = res["hits"]["hits"][i]["_source"]["brand_name"] _brand_name = res["hits"]["hits"][i]["_source"]["brand_name"]
return {"goods_name": _goods_name, "goods_id": _goods_id, "brand_name": _brand_name, "score": _score} return {"goods_name": _goods_name, "goods_id": _goods_id, "brand_name": _brand_name, "score": _score, "key_word": key_word}
return {} return {}
...@@ -179,26 +179,30 @@ def write_to_xlsx(info_map_list: list): ...@@ -179,26 +179,30 @@ def write_to_xlsx(info_map_list: list):
# 追加写 # 追加写
def write_to_xlsx_append(info_map_list: list): def write_to_xlsx_append(info_map_list: list):
info_map = {'goods_name': [], 'goods_id': [], 'brand_name': []} info_map = {'goods_name': [], 'goods_id': [], 'brand_name': [], 'key_word': []}
for i in range(len(info_map_list)): for i in range(len(info_map_list)):
if len(info_map_list[i]) != 0: if len(info_map_list[i]) != 0:
info_map["goods_name"].append(info_map_list[i]["goods_name"]) info_map["goods_name"].append(info_map_list[i]["goods_name"])
info_map["goods_id"].append(str(info_map_list[i]["goods_id"])) info_map["goods_id"].append(str(info_map_list[i]["goods_id"]))
info_map["brand_name"].append(info_map_list[i]["brand_name"]) info_map["brand_name"].append(info_map_list[i]["brand_name"])
info_map["key_word"].append(info_map_list[i]["key_word"])
else: else:
info_map["goods_name"].append('') info_map["goods_name"].append('')
info_map["goods_id"].append('') info_map["goods_id"].append('')
info_map["brand_name"].append('') info_map["brand_name"].append('')
data = openpyxl.load_workbook('table.xlsx') info_map["key_word"].append('')
data = openpyxl.load_workbook('jd.xlsx')
table = data[data.sheetnames[0]] table = data[data.sheetnames[0]]
table.cell(1, 4).value = 'goods_name' table.cell(1, 6).value = 'goods_name'
table.cell(1, 5).value = 'goods_id' table.cell(1, 7).value = 'goods_id'
table.cell(1, 6).value = 'brand_name' table.cell(1, 8).value = 'brand_name'
table.cell(1, 9).value = 'key_word'
for i in range(2, len(info_map_list)+2): for i in range(2, len(info_map_list)+2):
table.cell(i, 4).value = info_map['goods_name'][i-2] table.cell(i, 6).value = info_map['goods_name'][i-2]
table.cell(i, 5).value = info_map['goods_id'][i-2] table.cell(i, 7).value = info_map['goods_id'][i-2]
table.cell(i, 6).value = info_map['brand_name'][i-2] table.cell(i, 8).value = info_map['brand_name'][i-2]
data.save('table.xlsx') table.cell(i, 9).value = info_map['key_word'][i-2]
data.save('jd.xlsx')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment