Commit 6e53d19f by lichenggang

完善数量列的判断,修改品牌列和类别列的判断

parent de473695
......@@ -77,7 +77,8 @@ class BasePredictor:
collect_num = []
for kw in data:
if str(kw).startswith('0') and len(str(kw)) >= 2:
# 若某元素以0开头且长度大于2,那么可能是封装也可能是0.0, 再排除掉0.0那基本就是封装了
if str(kw).startswith('0') and len(str(kw)) >= 2 and str(kw) != '0.0':
return False
if isinstance(kw, int) or isinstance(kw, float) or isNumberCol(str(kw)):
collect_num.append(kw)
......@@ -165,4 +166,4 @@ class BasePredictor:
if __name__ == "__main__":
print(BasePredictor.is_num([100,200]))
print(BasePredictor.is_num([3400.0, 5920.0, 4849.0, 2544.0, 3270.0, 52751.0, 2031.0, 5302.0, 726.0, 1247.0, 2472.0, 689.0, 6049.0, 26796.0, 6164.0, 1605.0, 4346.0, 640.0, 960.0, 960.0, 320.0, 160.0, 860.0, 160.0, 320.0, 3183.0, 10151.0, 640.0, 130.0, 1237.0, 800.0, 960.0, 3740.0, 17701.0, 2146.0, 1280.0, 160.0, 1120.0, 160.0, 480.0, 960.0, 480.0, 160.0, 4717.0, 160.0, 160.0, 160.0, 640.0, 160.0, 320.0, 160.0, 160.0, 800.0, 800.0, 480.0, 1600.0, 155.0, 960.0, 320.0, 944.0, 160.0, 160.0, 1280.0, 1852.0, 7680.0, 7680.0, 2880.0, 160.0, 224.0, 480.0, 480.0, 640.0, 160.0, 640.0, 320.0, 1760.0, 640.0, 480.0, 960.0, 160.0, 160.0, 160.0, 160.0, 1920.0, 160.0, 5600.0, 480.0, 2560.0, 160.0, 160.0, 160.0, 160.0, 160.0, 1280.0, 160.0, 160.0, 160.0, 160.0, 160.0, 320.0, 0.0, 160.0, 160.0]))
......@@ -116,6 +116,9 @@ class DicPredict(BasePredictor):
# 如果该列被预测为其他, 则不做改动
if result == 'other':
continue
# 如果类别列被预测为品牌, 则不做改动, 此处是基于目前的模型容易把中文多的列预测为品牌, 无奈之下所做的逻辑
if temp_pre_model_res.get(k) == '类别' and result == 'brand_name':
continue
temp_pre_model_res[k] = EN_TO_ZH_MAP[result]
# 若有多个参数列或型号列,则进行不同率的比较, 不同率最高的选为目标列
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment