Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
岳巧源
/
my-awesome-project
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
8a77d06e
authored
Jun 21, 2024
by
岳巧源
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
modify some content
parent
e047253a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
15 deletions
main.py
main.py
View file @
8a77d06e
...
...
@@ -80,13 +80,13 @@ count = 0
# 如果是采用单线程的模式跑这个脚本,解析完4000多行数据恐怕要1个小时了 所以还是采用多进程的方式吧 用四个脚本同时解析一个文件的不同行
def
main
():
df
=
pd
.
read_excel
(
'
table
.xlsx'
)
df
=
pd
.
read_excel
(
'
jd
.xlsx'
)
data
=
df
.
iloc
[:,
[
0
]]
.
values
ans
=
[]
result
=
[]
goods_name_list
=
[]
for
i
in
range
(
len
(
data
)):
if
i
>
9
:
break
# for debug, don't forget to delete this line.
# for debug, don't forget to delete this line.
sku_name_str
=
data
[
i
][
0
]
res_tmp
=
match_sku
(
sku_name_str
)
ans
.
append
(
res_tmp
)
...
...
@@ -97,7 +97,7 @@ def main():
tmp_goods_name
=
ans
[
i
][
j
]
tmp_query
=
query
tmp_query
[
"query"
][
"bool"
][
"must"
][
0
][
"match"
][
"goods_name"
]
=
tmp_goods_name
tmp_map
=
process
(
tmp_query
)
tmp_map
=
process
(
tmp_query
,
tmp_goods_name
)
if
len
(
tmp_map
)
!=
0
:
score_in_one_row
.
append
(
tmp_map
)
if
len
(
score_in_one_row
)
==
0
:
...
...
@@ -148,7 +148,7 @@ def post_to_elasticsearch(q: dict):
ans
=
response
.
json
()
return
ans
def
process
(
query_str
:
dict
):
def
process
(
query_str
:
dict
,
key_word
:
str
):
res
=
post_to_elasticsearch
(
query_str
)
if
res
[
"hits"
][
"max_score"
]
is
not
None
and
res
[
"hits"
][
"max_score"
]
>
80
:
for
i
in
range
(
len
(
res
[
"hits"
][
"hits"
])):
...
...
@@ -157,7 +157,7 @@ def process(query_str: dict):
_goods_name
=
res
[
"hits"
][
"hits"
][
i
][
"_source"
][
"goods_name"
]
_goods_id
=
res
[
"hits"
][
"hits"
][
i
][
"_source"
][
"goods_id"
]
_brand_name
=
res
[
"hits"
][
"hits"
][
i
][
"_source"
][
"brand_name"
]
return
{
"goods_name"
:
_goods_name
,
"goods_id"
:
_goods_id
,
"brand_name"
:
_brand_name
,
"score"
:
_score
}
return
{
"goods_name"
:
_goods_name
,
"goods_id"
:
_goods_id
,
"brand_name"
:
_brand_name
,
"score"
:
_score
,
"key_word"
:
key_word
}
return
{}
...
...
@@ -179,26 +179,30 @@ def write_to_xlsx(info_map_list: list):
# 追加写
def
write_to_xlsx_append
(
info_map_list
:
list
):
info_map
=
{
'goods_name'
:
[],
'goods_id'
:
[],
'brand_name'
:
[]}
info_map
=
{
'goods_name'
:
[],
'goods_id'
:
[],
'brand_name'
:
[]
,
'key_word'
:
[]
}
for
i
in
range
(
len
(
info_map_list
)):
if
len
(
info_map_list
[
i
])
!=
0
:
info_map
[
"goods_name"
]
.
append
(
info_map_list
[
i
][
"goods_name"
])
info_map
[
"goods_id"
]
.
append
(
str
(
info_map_list
[
i
][
"goods_id"
]))
info_map
[
"brand_name"
]
.
append
(
info_map_list
[
i
][
"brand_name"
])
info_map
[
"key_word"
]
.
append
(
info_map_list
[
i
][
"key_word"
])
else
:
info_map
[
"goods_name"
]
.
append
(
''
)
info_map
[
"goods_id"
]
.
append
(
''
)
info_map
[
"brand_name"
]
.
append
(
''
)
data
=
openpyxl
.
load_workbook
(
'table.xlsx'
)
info_map
[
"key_word"
]
.
append
(
''
)
data
=
openpyxl
.
load_workbook
(
'jd.xlsx'
)
table
=
data
[
data
.
sheetnames
[
0
]]
table
.
cell
(
1
,
4
)
.
value
=
'goods_name'
table
.
cell
(
1
,
5
)
.
value
=
'goods_id'
table
.
cell
(
1
,
6
)
.
value
=
'brand_name'
table
.
cell
(
1
,
6
)
.
value
=
'goods_name'
table
.
cell
(
1
,
7
)
.
value
=
'goods_id'
table
.
cell
(
1
,
8
)
.
value
=
'brand_name'
table
.
cell
(
1
,
9
)
.
value
=
'key_word'
for
i
in
range
(
2
,
len
(
info_map_list
)
+
2
):
table
.
cell
(
i
,
4
)
.
value
=
info_map
[
'goods_name'
][
i
-
2
]
table
.
cell
(
i
,
5
)
.
value
=
info_map
[
'goods_id'
][
i
-
2
]
table
.
cell
(
i
,
6
)
.
value
=
info_map
[
'brand_name'
][
i
-
2
]
data
.
save
(
'table.xlsx'
)
table
.
cell
(
i
,
6
)
.
value
=
info_map
[
'goods_name'
][
i
-
2
]
table
.
cell
(
i
,
7
)
.
value
=
info_map
[
'goods_id'
][
i
-
2
]
table
.
cell
(
i
,
8
)
.
value
=
info_map
[
'brand_name'
][
i
-
2
]
table
.
cell
(
i
,
9
)
.
value
=
info_map
[
'key_word'
][
i
-
2
]
data
.
save
(
'jd.xlsx'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment