完善数量列的判断,修改品牌列和类别列的判断

6e53d19f · lichenggang · de473695 · 6e53d19f · 6e53d19f
Commit 6e53d19f authored Jun 09, 2020 by lichenggang
Showing with 6 additions and 2 deletions
predict/base_handler.py
predict/dict_predict.py
--- a/predict/base_handler.py
+++ b/predict/base_handler.py
@@ -77,7 +77,8 @@ class BasePredictor:

        collect_num = []
        for kw in data:
-            if str(kw).startswith('0') and len(str(kw)) >= 2:
+            # 若某元素以0开头且长度大于2,那么可能是封装也可能是0.0, 再排除掉0.0那基本就是封装了
+            if str(kw).startswith('0') and len(str(kw)) >= 2 and str(kw) != '0.0':
                return False
            if isinstance(kw, int) or isinstance(kw, float) or isNumberCol(str(kw)):
                collect_num.append(kw)
@@ -165,4 +166,4 @@ class BasePredictor:


 if __name__ == "__main__":
-    print(BasePredictor.is_num([100,200]))
+    print(BasePredictor.is_num([3400.0, 5920.0, 4849.0, 2544.0, 3270.0, 52751.0, 2031.0, 5302.0, 726.0, 1247.0, 2472.0, 689.0, 6049.0, 26796.0, 6164.0, 1605.0, 4346.0, 640.0, 960.0, 960.0, 320.0, 160.0, 860.0, 160.0, 320.0, 3183.0, 10151.0, 640.0, 130.0, 1237.0, 800.0, 960.0, 3740.0, 17701.0, 2146.0, 1280.0, 160.0, 1120.0, 160.0, 480.0, 960.0, 480.0, 160.0, 4717.0, 160.0, 160.0, 160.0, 640.0, 160.0, 320.0, 160.0, 160.0, 800.0, 800.0, 480.0, 1600.0, 155.0, 960.0, 320.0, 944.0, 160.0, 160.0, 1280.0, 1852.0, 7680.0, 7680.0, 2880.0, 160.0, 224.0, 480.0, 480.0, 640.0, 160.0, 640.0, 320.0, 1760.0, 640.0, 480.0, 960.0, 160.0, 160.0, 160.0, 160.0, 1920.0, 160.0, 5600.0, 480.0, 2560.0, 160.0, 160.0, 160.0, 160.0, 160.0, 1280.0, 160.0, 160.0, 160.0, 160.0, 160.0, 320.0, 0.0, 160.0, 160.0]))
--- a/predict/dict_predict.py
+++ b/predict/dict_predict.py
@@ -116,6 +116,9 @@ class DicPredict(BasePredictor):
            # 如果该列被预测为其他, 则不做改动
            if result == 'other':
                continue
+            # 如果类别列被预测为品牌, 则不做改动, 此处是基于目前的模型容易把中文多的列预测为品牌, 无奈之下所做的逻辑
+            if temp_pre_model_res.get(k) == '类别' and result == 'brand_name':
+                continue
            temp_pre_model_res[k] = EN_TO_ZH_MAP[result]

        # 若有多个参数列或型号列,则进行不同率的比较, 不同率最高的选为目标列