Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
lzzzzl
/
param_data
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
3d410c52
authored
Nov 09, 2020
by
lzzzzl
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
更新attr_values处理
parent
019f4a7c
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
219 additions
and
41 deletions
fun/param_conn.py
fun/param_data.py
fun/param_thread.py
fun/param_trans.py
param_match.py
param_task/db_attr_value_all.py
param_task/task.py
param_task/value_task_all.py
fun/param_conn.py
View file @
3d410c52
...
...
@@ -7,6 +7,14 @@ import config.db_config as config
class
ParamConn
:
@staticmethod
def
local_sku_conn
(
index
):
return
pymysql
.
connect
(
config
.
local_sku_conn
[
'host'
],
config
.
local_sku_conn
[
'user'
],
config
.
local_sku_conn
[
'password'
],
"liexin_sku_
%
d"
%
index
,
charset
=
"utf8"
)
@staticmethod
def
online_sku_conn
(
index
):
return
pymysql
.
connect
(
config
.
online_sku
[
'host'
],
config
.
online_sku
[
'user'
],
...
...
fun/param_data.py
View file @
3d410c52
...
...
@@ -4,7 +4,7 @@ from utils.db_handler import DBHandler
from
utils.log_handler
import
LogHandler
import
config.zy_config
as
config
from
utils.date_handler
import
DateHandler
from
fun.param_trans
import
ParamTrans
now_ts
=
DateHandler
.
now_datetime
()
...
...
@@ -17,11 +17,11 @@ class ParamData(object):
sql
=
"SELECT attr_id,class_id,attr_unit_id FROM lie_class_attr"
return
DBHandler
.
read
(
conn
,
sql
)
def
get_class_attr_value
(
self
,
attr_id
,
value
,
attr_unit_id
,
conn
):
"""
attr_id: {}
:return:
"""
"""
获取分类attr value
"""
@staticmethod
def
get_class_attr_value
(
attr_id
,
value
,
attr_unit_id
,
conn
):
sql
=
"SELECT attr_value_id FROM lie_class_attr_value WHERE attr_id={attr_id} AND value='{value}'AND attr_unit_id={attr_unit_id}"
.
format
(
attr_id
=
attr_id
,
value
=
value
,
...
...
@@ -35,7 +35,11 @@ class ParamData(object):
DBHandler
.
read
(
bg_conn
,
sql
)]
return
data
def
insert_dgk_goods
(
self
,
attr_id
,
value
,
attr_unit_id
,
conn
):
"""
写入dgk数据
"""
@staticmethod
def
insert_dgk_goods
(
attr_id
,
value
,
attr_unit_id
,
conn
):
sql
=
"INSERT INTO lie_class_attr_value (attr_id,value,attr_unit_id,status,remark,add_time,update_time) VALUES ('
%
s','
%
s','
%
s','
%
s','
%
s','
%
s','
%
s')"
%
\
(
attr_id
,
value
,
attr_unit_id
,
1
,
""
,
now_ts
,
now_ts
)
DBHandler
.
insert
(
conn
,
sql
)
...
...
@@ -43,16 +47,24 @@ class ParamData(object):
max_data
=
DBHandler
.
read
(
conn
,
sql2
)
return
max_data
[
0
][
0
]
def
update_spu
(
self
,
spu_id
,
attr_values
,
spu_conn
,
log
):
"""
更新spu
"""
@staticmethod
def
update_spu
(
spu_id
,
attr_values
,
spu_conn
,
log
):
index
=
int
(
str
(
spu_id
)[
-
1
])
sql
=
"UPDATE lie_spu_
%
d SET attr_values='
%
s' WHERE spu_id=
%
d"
%
(
index
,
attr_values
,
spu_id
)
DBHandler
.
update
(
spu_conn
,
sql
)
log
.
info
(
"spu_id: {spu_id}, attr_values: {attr_values}"
.
format
(
spu_id
=
spu_id
,
attr_values
=
attr_values
))
def
load_excel
(
self
,
fileName
):
data
=
ExcelHandler
.
read_to_excel
(
fileName
,
'Sheet1'
,
1
)
"""
加载excel
"""
@staticmethod
def
load_excel
(
filename
):
param_trans
=
ParamTrans
()
data
=
ExcelHandler
.
read_to_excel
(
filename
,
'Sheet1'
,
1
)
data
.
pop
(
0
)
class_list
=
list
()
param_dict
=
dict
()
...
...
@@ -61,8 +73,9 @@ class ParamData(object):
second_classify_id
=
row
[
1
]
lx_attr_name
=
row
[
3
]
class_list
.
append
(
str
(
int
(
second_classify_id
)))
unit_id
=
config
.
param_ids
[
lx_attr_name
]
param_dict
[
str
(
int
(
param_id
))]
=
unit_id
fuc_name
=
param_trans
.
param_func
[
lx_attr_name
]
unit_id
=
param_trans
.
param_unit
[
lx_attr_name
]
param_dict
[
str
(
int
(
param_id
))]
=
fuc_name
return
set
(
class_list
),
param_dict
"""
...
...
@@ -157,9 +170,31 @@ class ParamData(object):
@staticmethod
def
get_dgk_goods_by_index
(
idx
,
bg_conn
):
sql
=
"SELECT attr_name,attr_value,attr_id,cat_id FROM lie_goods_attr_fields
%
d WHERE
\
attr_value != '' AND attr_value != '-'
LIMIT 10000
"
%
(
idx
)
attr_value != '' AND attr_value != '-'"
%
(
idx
)
return
DBHandler
.
read
(
bg_conn
,
sql
)
@staticmethod
def
get_dgk_goods_by_gid
(
idx
,
gid
,
bg_conn
):
sql
=
"SELECT attr_name,attr_value,attr_id,cat_id FROM lie_goods_attr_fields
%
d WHERE attr_value != ''
\
AND attr_value != '-' AND goods_id =
%
d"
\
%
(
idx
,
gid
)
data
=
[{
'attr_name'
:
row
[
0
],
'attr_value'
:
row
[
1
],
'attr_id'
:
row
[
2
],
'cat_id'
:
row
[
3
]}
for
row
in
DBHandler
.
read
(
bg_conn
,
sql
)]
return
data
@staticmethod
def
get_all_dgk_goods
(
idx
,
bg_conn
):
sql
=
"SELECT attr_name,attr_value,attr_id,cat_id,goods_id FROM lie_goods_attr_fields
%
d
\
WHERE attr_value != '' AND attr_value != '-' "
%
idx
data
=
{}
for
row
in
DBHandler
.
read
(
bg_conn
,
sql
):
goods_id
=
row
[
4
]
if
goods_id
not
in
data
:
data
[
goods_id
]
=
[{
'attr_name'
:
row
[
0
],
'attr_value'
:
row
[
1
],
'attr_id'
:
row
[
2
],
'cat_id'
:
row
[
3
]}]
else
:
data
[
goods_id
]
.
append
({
'attr_name'
:
row
[
0
],
'attr_value'
:
row
[
1
],
'attr_id'
:
row
[
2
],
'cat_id'
:
row
[
3
]})
return
data
"""
写入attr value
"""
...
...
@@ -169,3 +204,8 @@ class ParamData(object):
('
%
s','
%
s','
%
s','
%
s','
%
s','
%
s','
%
s')"
%
\
(
attr_id
,
value
,
attr_unit_id
,
status
,
remark
,
add_time
,
update_time
)
DBHandler
.
insert
(
class_conn
,
sql
)
@staticmethod
def
get_sku_data
(
idx
,
sku_conn
):
sql
=
"SELECT old_goods_id,spu_id,goods_id FROM lie_sku_
%
d WHERE supplier_id = 7"
%
idx
return
DBHandler
.
read
(
sku_conn
,
sql
)
\ No newline at end of file
fun/param_thread.py
View file @
3d410c52
...
...
@@ -2,7 +2,7 @@
import
threading
class
P
ARAM_THREAD
:
class
P
aramThread
:
@staticmethod
def
thread_func_ten
(
func
,
data
):
...
...
fun/param_trans.py
View file @
3d410c52
...
...
@@ -78,12 +78,12 @@ class ParamTrans:
@classmethod
def
base_trans
(
cls
,
trans_rule
,
key
):
if
key
==
'-'
or
key
==
''
:
return
0
,
0
return
0
,
''
if
str
(
key
)
.
startswith
(
"."
):
key
=
"0"
+
str
(
key
)
obj
=
re
.
match
(
trans_rule
,
key
.
replace
(
" "
,
""
))
value
=
obj
.
group
(
1
)
if
obj
else
0
unit
=
obj
.
group
(
2
)
if
obj
else
0
value
=
obj
.
group
(
1
)
if
obj
else
''
unit
=
obj
.
group
(
2
)
if
obj
else
''
return
value
,
unit
@classmethod
...
...
@@ -157,7 +157,7 @@ class ParamTrans:
:return:
"""
if
key
==
'-'
or
key
==
''
:
return
0
,
0
return
0
,
''
for
ct
in
cast_map
:
if
str
(
key
)
.
find
(
ct
)
!=
-
1
:
key
=
str
(
key
)
.
replace
(
ct
,
cast_map
[
ct
])
...
...
@@ -181,11 +181,11 @@ class ParamTrans:
:return:
"""
if
key
==
'-'
or
key
==
''
:
return
0
,
0
return
0
,
''
key
=
str
(
key
)
.
split
(
","
)[
0
]
for
encap
in
encap_list
:
if
str
(
key
)
.
find
(
encap
)
!=
-
1
:
return
encap
,
0
return
encap
,
''
return
key
,
''
@classmethod
...
...
@@ -196,13 +196,13 @@ class ParamTrans:
:return:
"""
if
key
==
'-'
or
key
==
''
:
return
0
,
0
return
0
,
''
for
sep_temp
in
sep_temp_map
:
if
str
(
key
)
.
find
(
sep_temp
)
!=
-
1
:
return
"C0G"
,
0
return
"C0G"
,
''
for
temp
in
temp_map
:
if
str
(
key
)
.
find
(
temp
)
!=
-
1
:
return
temp
,
0
return
temp
,
''
return
key
,
''
@classmethod
...
...
param_match.py
View file @
3d410c52
# _*_ coding:utf-8 _*_
from
param.param_extract
import
PARAM_EXTRACT
from
param.spu_extract
import
SPU_EXTRACT
from
param.param_class_redis
import
PARAM_CLASS_REDIS
from
upload.param_mongo
import
PARAM_MONGO
from
param_task.db_attr_value_all
import
DbAttrValueAll
if
__name__
==
'__main__'
:
# 更新 spu表-attr_values字段
extract
=
SPU_EXTRACT
()
extract
.
run
()
#
extract = SPU_EXTRACT()
#
extract.run()
# 更新 spu表-class_id字段
# param_extract = PARAM_EXTRACT()
...
...
@@ -19,4 +17,7 @@ if __name__ == '__main__':
# 更新mongo
# param_mongo = PARAM_MONGO()
# param_mongo.run()
\ No newline at end of file
# param_mongo.run()
db_value
=
DbAttrValueAll
()
db_value
.
run
()
\ No newline at end of file
param_task/db_attr_value_all.py
0 → 100644
View file @
3d410c52
# -*- coding: utf-8 -*-
# !/usr/bin/env python
from
fun.param_trans
import
ParamTrans
from
fun.param_thread
import
ParamThread
from
param_task.task
import
Task
from
param_task.value_task_all
import
ValueTaskAll
from
utils.log_handler
import
LogHandler
import
json
class
DbAttrValueAll
(
Task
):
def
__init__
(
self
):
super
()
.
__init__
()
self
.
value_at
=
ValueTaskAll
()
self
.
unit_value
=
self
.
param_data
.
get_unit_value
(
self
.
class_db
)
self
.
cls_map
=
self
.
param_data
.
class_mapping
(
self
.
class_db
)
self
.
class_list
,
self
.
param_dict
=
self
.
param_data
.
load_excel
(
"/data2/param_data/dgk.xlsx"
)
self
.
dgk_dict
=
{
0
:
[],
1
:
[],
2
:
[],
3
:
[],
4
:
[],
5
:
[],
6
:
[],
7
:
[],
8
:
[],
9
:
[]}
self
.
log1
=
LogHandler
(
'data_attr_value_summary'
,
stream
=
True
,
file
=
True
)
self
.
log2
=
LogHandler
(
'data_attr_value'
,
stream
=
False
,
file
=
True
)
"""
初始化dgk数据
"""
def
init_data
(
self
):
self
.
log1
.
info
(
"spu_data : start"
)
# 遍历SKU数据
for
i
in
range
(
0
,
10
):
self
.
log1
.
info
(
"spu_data Database: {i}"
.
format
(
i
=
i
))
# 获取sku连接
conn
=
self
.
param_conn
.
online_sku_conn
(
i
)
for
j
in
range
(
0
,
10
):
data
=
self
.
param_data
.
get_sku_data
(
j
,
conn
)
# self.log1.info("data Table: {table} length: {length}".format(table=j, length=len(data)))
# 遍历数据
for
row
in
data
:
old_goods_id
=
row
[
0
]
spu_id
=
row
[
1
]
goods_id
=
row
[
2
]
param_index
=
int
(
str
(
old_goods_id
)[
-
1
])
self
.
dgk_dict
[
param_index
]
.
append
(
{
'spu_id'
:
spu_id
,
'old_goods_id'
:
old_goods_id
,
'goods_id'
:
goods_id
})
"""
读取所有spu数据
"""
def
get_spu_data
(
self
,
dgk_data
):
"""
获取SPU数据
:return:
"""
count
=
0
bigdata_db
=
self
.
DBConn
.
db_bigdata
()
class_db
=
self
.
DBConn
.
db_class
()
spu_db
=
self
.
DBConn
.
db_spu
()
print
(
len
(
dgk_data
))
# 遍历SKU数据
for
row
in
dgk_data
:
count
+=
1
old_goods_id
=
row
[
'old_goods_id'
]
spu_id
=
row
[
'spu_id'
]
goods_id
=
row
[
'goods_id'
]
param_index
=
int
(
str
(
old_goods_id
)[
-
1
])
try
:
dgk_goods
=
self
.
param_data
.
get_dgk_goods_by_gid
(
param_index
,
old_goods_id
,
bigdata_db
)
if
dgk_goods
:
dgk_list
=
list
()
for
goods
in
dgk_goods
:
field_cat_id
=
goods
[
'cat_id'
]
field_attr_id
=
goods
[
'attr_id'
]
field_attr_name
=
goods
[
'attr_name'
]
field_attr_value
=
goods
[
'attr_value'
]
if
self
.
cls_map
[
field_cat_id
]
.
get
(
field_attr_name
):
write_attr_id
=
self
.
cls_map
[
field_cat_id
][
field_attr_name
]
# 是否解析
if
field_attr_id
in
self
.
value_at
.
attrs_id_effect
:
value
,
unit
=
getattr
(
ParamTrans
,
self
.
param_dict
[
str
(
field_attr_id
)])(
field_attr_value
)
elif
field_attr_id
in
self
.
value_at
.
attrs_id
:
value
=
field_attr_value
unit
=
''
else
:
continue
if
value
!=
0
and
self
.
unit_value
.
get
(
unit
):
attr_unit_id
=
self
.
unit_value
[
unit
]
value_exist
=
self
.
param_data
.
get_class_attr_value
(
write_attr_id
,
value
,
attr_unit_id
,
class_db
)
# TODO 临时插入
if
len
(
value_exist
)
>
0
:
attr_value_id
=
value_exist
[
0
][
0
]
# attr_id value_id unit_id
dgk_list
.
append
(
"{attr_id},{value_id},{unit_id}"
.
format
(
attr_id
=
write_attr_id
,
value_id
=
attr_value_id
,
unit_id
=
attr_unit_id
))
# else:
# attr_value_id = self.param_data.insert_dgk_goods(write_attr_id, value,
# attr_unit_id,
# class_db)
# print('insert', attr_value_id, write_attr_id, value)
# dgk_list.append({'attr_id': write_attr_id, 'attr_value_id': attr_value_id,
# 'attr_unit_id': attr_unit_id})
# 长度过长退出
if
len
(
dgk_list
)
>
20
:
break
if
len
(
dgk_list
)
>
0
:
self
.
param_data
.
update_spu
(
int
(
spu_id
),
json
.
dumps
(
dgk_list
),
spu_db
,
self
.
log2
)
if
count
%
3000
==
0
:
self
.
log1
.
info
(
"INDEX: {INDEX}, COUNT: {COUNT}"
.
format
(
INDEX
=
param_index
,
COUNT
=
count
))
except
:
self
.
log1
.
info
(
"INDEX: {INDEX}, COUNT: {COUNT}"
.
format
(
INDEX
=
param_index
,
COUNT
=
count
))
def
run
(
self
):
self
.
init_data
()
self
.
value_at
.
load_attr
()
ParamThread
.
thread_func_ten
(
self
.
get_spu_data
,
self
.
dgk_dict
)
# self.get_spu_data(self.dgk_dict[0])
if
__name__
==
'__main__'
:
all
=
DbAttrValueAll
()
all
.
run
()
\ No newline at end of file
param_task/task.py
View file @
3d410c52
# -*- coding: utf-8 -*-
# !/usr/bin/env python
from
utils.date_handler
import
DateHandler
from
utils.log_handler
import
LogHandler
from
fun.param_data
import
ParamData
from
fun.param_trans
import
ParamTrans
from
fun.param_conn
import
ParamConn
...
...
@@ -10,9 +11,12 @@ from config.db_config import DBConn
class
Task
:
def
__init__
(
self
):
self
.
DBConn
=
DBConn
self
.
now
=
DateHandler
.
now_datetime
()
self
.
param_data
=
ParamData
self
.
param_conn
=
ParamConn
self
.
param_trans
=
ParamTrans
()
self
.
class_db
=
DBConn
.
db_class
()
self
.
bigdata_db
=
DBConn
.
db_bigdata
()
self
.
spu_db
=
DBConn
.
db_spu
()
self
.
redis
=
ParamConn
.
local_redis_conn
()
\ No newline at end of file
param_task/value_task_all.py
View file @
3d410c52
...
...
@@ -10,6 +10,7 @@ class ValueTaskAll(Task):
def
__init__
(
self
):
super
()
.
__init__
()
self
.
attrs_id
=
[]
self
.
attrs_id_effect
=
[]
self
.
log1
=
LogHandler
(
'value_task_all_info'
,
stream
=
True
,
file
=
True
)
"""
...
...
@@ -18,15 +19,11 @@ class ValueTaskAll(Task):
def
load_attr
(
self
):
self
.
log1
.
info
(
"加载参数"
)
# 有效参数
effect_attrs
=
[]
effect_1
=
ExcelHandler
.
read_to_excel
(
"C:
\\
data
\\
lx
\\
dgk_1.xlsx"
,
"Sheet1"
,
1
)
effect_1
.
pop
(
0
)
[
effect_attrs
.
append
(
int
(
row
[
0
]))
for
row
in
effect_1
]
effect_2
=
ExcelHandler
.
read_to_excel
(
"C:
\\
data
\\
lx
\\
dgk_2.xlsx"
,
"Sheet1"
,
1
)
effect_2
.
pop
(
0
)
[
effect_attrs
.
append
(
int
(
row
[
4
]))
for
row
in
effect_2
]
effect
=
ExcelHandler
.
read_to_excel
(
"/data2/param_data/dgk.xlsx"
,
"Sheet1"
,
1
)
effect
.
pop
(
0
)
[
self
.
attrs_id_effect
.
append
(
int
(
row
[
0
]))
for
row
in
effect
]
# 全部参数
all_data
=
ExcelHandler
.
read_to_excel
(
"
C:
\\
data
\\
lx
\\
dgk_all.xlsx"
,
"Sheet1"
,
1
)
all_data
=
ExcelHandler
.
read_to_excel
(
"
/data2/param_data/
dgk_all.xlsx"
,
"Sheet1"
,
1
)
all_data
.
pop
(
0
)
for
row
in
all_data
:
# 参数id、二级分类id、基础参数名、猎芯参数名
...
...
@@ -35,7 +32,7 @@ class ValueTaskAll(Task):
basic_attr_name
=
row
[
5
]
is_important
=
row
[
6
]
# 非重要参数 + 非有效参数
if
is_important
==
1
and
param_id
not
in
effect_attrs
:
if
is_important
==
1
and
param_id
not
in
self
.
attrs_id_effect
:
class_id
=
self
.
param_data
.
get_class_id
(
second_classify_id
,
self
.
class_db
)
unit_id
=
self
.
param_trans
.
empty_unit_id
self
.
attrs_id
.
append
(
param_id
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment