Commit 00da6858 by larosa

add parse xlsx

parent 3874935e
Showing with 129 additions and 0 deletions
import copy
import re
import json
import pandas as pd
import requests
url = "http://172.18.137.46:9211/liexin_all_sku/_search"
goods_name = "BNC/SMA-JJ"
query = {
"_source": ["goods_id", "goods_name", "brand_name"],
"query": {
"bool": {
"must": [
{
"match": {
"goods_name": goods_name
}
},
{
"term": {
"goods_status": 1
}
},
{
"term": {
"forbid_status": 0
}
},
{
"range": {
"stock": {
"gte": 0
}
}
}
]
}
}
}
black_list = {"eastsheep", "ABB", "BRsanyuan", "JCSTRONG", "TECHNOLOGY",
"OMRON", "Schneider", "Electric", "ZWZDH", "GKNC",
"ALINX", "DELL", "ANALOG", "DEVICES", "LR-LINK", "molex", "NVIDIA"
"Xilinx", "Anybus", "FTDEVICE", "JZLDP",
"LunderwaterH", "PANT",
"TRANSCELL", "unistrong", "VOLTCEL", "FULANGDE", "CNC", "AMD", "TEJIATE",
"ChangHong", "ABI", "USB", "BEICHEN", "HuiKong", "JKJI", "LONCONT", "WEKDK", "JUWEIDE",
"ZKE", "AZBIL", "CDOE", "DLEN", "GDSW", "Pilz", "HONEYWELL", "QMA", "SANGFOR", "SKINHEAD",
"HITTERY", "TURCK", "WAGO", "Xilinx", "EMA", "ZLG", "LEFILTER", "BIAO", "FENG", "CDIY",
"CPC", "HW", "Gjqs", "SZCOMARK", "tyuhk", "ERIKOLE", "LZALY", "LZQLY", "SIEMENS", "0.8mm", "100G", "11p",
"12p", "16mm", "2.4G", "3.7v", "905nm", "DIEWU", "EB-LINK", "SUNON", "TEXAS", "XINQY", "LOBOROBOT", "H3C",
"EB-LINK", "cutersre", "HEMEI", "HRE", "ZHSY", "kankeirr", "21KE", "RITAR", "AERFID", "KEDACOM", "KSD",
"NEWSTARTOS", "TADIRAN", "FENGYINGZI", "ADI", "BOURNS", "COMFAST", "Continental", "CTS", "CYPRESS",
"DIODES",
"Mosfet", "ECS", "ERNI", "ERNI", "HIK", "HOTTECH", "HUASHUO", "INFINEON", "Infineon", "INFINEON", "INTEL",
"Isabellenhuette", "KAMAYA", "Knowles", "KOA", "KYOCERA", "MAXIM", "MDD", "Microchip", "Mini-Circuits",
"Murata", "NEXPERIA", "Nichicon", "NXP", "Panasonic", "PEM", "ROHM", "ROQANG", "Semitec", "SILERGY",
"SXN",
"TDK", "THine", "CAN", "TOSHIBA", "UMW", "UniOhm", "VISHAY", "Vox", "Power", "YANGJIE", "youyeetoo",
"Chian-Chia", "CCO", "COILCRAFT", "ZHOUZHI", "Arty", "Xilinx", "Sipeed", "Tang", "Primer",
"WENHAOYONGSHUN",
"Infineon", "SILICON", "Arduino", "BINJIASHENG", "cjiang", "Continental", "CrossChip", "DIODES",
"GOOD-ARK",
"GWINSTEK", "HGSEMI", "HOTA", "HGSEMI", "INFINEON", "Knowles", "Littelfuse", "MCC",
"muRata", "Nexperia", "ONSEMI", "PSA", "Slkor", "UTC", "YAGEO", "YLPTEC", "ASUNDAR", "BOSENDE", "HGSEMI",
"YXC", "INFINEON", "FANUC", "HMILU", "Sinexcel", "SONY", "ALINX", "BEICHEN", "EAO", "TaoTimeClub",
"KONNAD", "Tenda", "Kingston", "LEACH", "Multilane", "10dB", "20dB", "18GHz", "3dB", "Gratool",
"HIKVISION",
"UGREEN", "xiaovv", "maxhub", "SAMSUNG", "SAMZHE", "ALINX", "Xilinx", "Lenovo", "MeeTap", "BCNET",
"LANGQING",
"ORICO", "HUB", "Panasonic", "SDKELI", "10m", "9m", "8m", "7m", "6m", "5m", "4m", "3m", "2m", "1m"
}
count = 0
def main():
df = pd.read_excel('table.xlsx')
data = df.iloc[:, [0]].values
ans = []
for i in range(len(data)):
sku_name_str = data[i][0]
res_tmp = match_sku(sku_name_str)
ans.append(res_tmp)
for i in range(len(ans)):
for j in range(len(ans[i])):
tmp_goods_name = ans[i][j]
tmp_query = query
tmp_query["query"]["bool"]["must"][0]["match"]["goods_name"] = tmp_goods_name
def match_sku(s: str) -> []:
# s = "RG58/U射频连接线SMA公头转BNC公头延长线 SMA/BNC-JJ Q9/SMA-JJ RG58/U-SMA/BNC-JJ 2m"
res = []
arr = s.split(" ")
for i in range(len(arr)):
tmp = arr[i].strip(" ")
if len(tmp) == 0 or tmp in black_list:
continue
# 正则表达式检测被空格分开的每个字符串,每个字符串 最多检测三个 英文字母,"/", "-", 数字0-9组成的子字符串
pattern = re.compile(
r'[^0-9a-zA-Z]*([0-9a-zA-Z][0-9a-zA-Z/-]{2,})[^0-9a-zA-Z/-]*([0-9a-zA-Z][0-9a-zA-Z/-]{2,})*[^0-9a-zA-Z]*([0-9a-zA-Z][0-9a-zA-Z/-]{2,})*')
m = pattern.match(tmp)
if m is not None:
for k in range(len(m.groups())):
tmp_str = m.groups()[k]
if tmp_str is not None:
# 黑名单中的不会加入结果列表
if tmp_str in black_list:
continue
# 检查是否为纯数字,纯数字不会加入结果列表
rex = re.compile("^[0-9]*$")
mat = rex.search(tmp_str)
if mat is not None:
continue
res.append(tmp_str)
return res
def post_to_elasticsearch(q: dict):
response = requests.post(url=url, data=json.dumps(q), headers={"Content-Type": "application/json"})
ans = response.json()
return ans
if __name__ == '__main__':
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment