模型预测

efc76efc · lichenggang · d8dd3982 · efc76efc · efc76efc · efc76efc
Commit efc76efc authored May 29, 2020 by lichenggang
Showing with 101 additions and 51 deletions
extractor
model
predic_fac.py
predict/base_handler.py
predict/dict_predict.py
utils/excel_manager.py
--- a/extractor
+++ b/extractor
--- a/model
+++ b/model
--- a/predic_fac.py
+++ b/predic_fac.py
@@ -19,9 +19,9 @@ class PredictorFac():
        elif isinstance(data, dict):
            return self.dict_predictor
-    def predict(self, dic_data):
+    def predict(self, dic_data, predict_type='all'):
        predictor = self._get_predictor(dic_data)
-        res = predictor.predict(dic_data)
+        res = predictor.predict(dic_data, predict_type)
        return res
@@ -29,8 +29,8 @@ if __name__ == "__main__":
    def get_test_data():
        import pandas as pd
        import json
-        df = pd.read_excel(r'C:\Users\ICHUNT\Desktop\bomlist\DZ0901_V1.4_BOM.xlsx', header=None)
+        df = pd.read_excel(r'C:\Users\ICHUNT\Desktop\bomlist\51AB0571_ CCTV ASST询价_SZIMS.xlsx', header=None, sheet_name='1')
-        df.fillna(' ', inplace=True)
+        df.fillna('?', inplace=True)
        dic_dft = df.to_dict(orient='list')
        return json.dumps(dic_dft)
@@ -40,5 +40,5 @@ if __name__ == "__main__":
    data = get_test_data()
    p = PredictorFac(model_config)
    data = json.loads(data)
-    res = p.predict(data)
+    res = p.predict(data,predict_type='model')
    print(res)
--- a/predict/base_handler.py
+++ b/predict/base_handler.py
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
+import re
 from utils.log_manager import get_logger
 from utils.robots import dd_send_msg
 import pandas as pd
+RIGHT_LEVEL = 0.7
 class BasePredictor:
    '''
        预测类基类
@@ -20,6 +21,7 @@ class BasePredictor:
        self.robot_msg = dd_send_msg
        self.pd = pd
    def predict(self, key):
        raise NotImplementedError
@@ -37,3 +39,44 @@ class BasePredictor:
        deal_list = [round(i, 3) for i in proba[0].tolist()]
        dic_proba = {k: v for k, v in zip(classes, deal_list)}
        return predictions[0], dic_proba
+    def isseq(self, data):
+        """
+        序号列预测
+        """
+        collect_order = [int(kw) for kw in data if isinstance(kw, float) or isinstance(kw, int)]
+        judge = self.isIncrease(collect_order, len(collect_order)) if len(collect_order) > 0 else False
+        return judge
+    def isnum(self, data):
+        """
+        数量列预测
+        """
+        collect_num = [kw for kw in data if isinstance(kw, int) or self.isNumberCol(kw)]
+        rate = round(len(collect_num) / len(data), 3)
+        return True if rate >= RIGHT_LEVEL else False
+    def isIncrease(self, arr, size):
+        """
+        判断列表元素是否递增
+        """
+        if size == 1:
+            return True
+        return (arr[size - 1] >= arr[size - 2]) and self.isIncrease(arr, size - 1)
+    def isNumberCol(self, kw):
+        """
+        是否是数量列
+        """
+        if isinstance(kw, str):
+            return re.match(r'(\d+)((K)|([\u4E00-\u9FA5]{1,3}))$', kw, re.M | re.I)
+        else:
+            return False
+    def valid_seq(self, data):
+        """取反"""
+        return not self.isseq(data)
+    def valid_num(self, data):
+        return not self.isnum(data)
\ No newline at end of file
--- a/predict/dict_predict.py
+++ b/predict/dict_predict.py
@@ -96,13 +96,22 @@ class DicPredict(BasePredictor):
        :param dic_data:
        :return:
        """
+        print(dic_data)
        prob_columns = []
+        temp_pre_model_res = {}
        for k, v in dic_data.items():
-            if self.valid_chain(v):
+            bol = self.v_chain(v)
+            if bol:
+                print(k, bol)
                prob_columns.append(k)
+                continue
+            if self.isnum(v):
+                temp_pre_model_res[k] = '数量'
+                continue
+            if self.isseq(v):
+                temp_pre_model_res[k] = '序号'
        temp_dic_data = {k: list(filter(lambda x: x != PLACEHOLDER, dic_data[k]))for k in prob_columns}
-        temp_pre_model_res = {}
        for k, v in temp_dic_data.items():
            li_single_pred_res = []
            for string in v:
@@ -111,35 +120,28 @@ class DicPredict(BasePredictor):
            result = Counter(li_single_pred_res)
            # [('brand_name', 4), ('goods_name', 3), ('param', 2)]
            li_sort = sorted(result.items(), key=lambda x: x[1], reverse=True)
+            print(k, li_sort)
            temp_pre_model_res[k] = en_to_zh_map[li_sort[0][0]]
        print(temp_pre_model_res)
        # pre_model_res = {}
-    def predict(self, dic_data):
+    def predict(self, dic_data, predict_type='all'):
        dic_data = self.pre_deal(dic_data)
-        pre_id_res = self.pre_predict(dic_data)
-        if pre_id_res:
+        if predict_type == 'all':
-            return pre_id_res
+            pre_id_res = self.pre_predict(dic_data)
+            if pre_id_res:
-        if len(dic_data) > 0:
+                return pre_id_res
-            self.order_predict(dic_data[0])
-        pass
+        elif predict_type == 'model':
+            model_id_res = self.model_predict(dic_data)
-    def order_predict(self, data):
+            if model_id_res:
-        collect_num = [int(kw) for kw in data if isinstance(kw, float) or isinstance(kw, int)]
+                return model_id_res
-        judge = self.IsIncrease(collect_num, len(collect_num))
+        elif predict_type == 'pre':
-        print('judge: ' + str(judge))
+            pre_id_res = self.pre_predict(dic_data)
-        return judge
+            if pre_id_res:
+                return pre_id_res
-    """
-    判断列表元素是否递增
-    """
-    def IsIncrease(self, arr, size):
-        if size == 1:
-            return True
-        return (arr[size - 1] >= arr[size - 2]) and self.IsIncrease(arr, size - 1)
    def pre_deal(self, dic_data):
        new_dic_data = {}
@@ -154,5 +156,16 @@ class DicPredict(BasePredictor):
        return new_dic_data
-    def valid_chain(self, li):
+    def v_chain(self, li):
-        pass
+        """
\ No newline at end of file
+        验证链,验证方法中某个环节返回了False则返回False
+        :param li:
+        :return:
+        """
+        for fun_name in dir(self):
+            if fun_name.startswith('valid_'):
+                fun = getattr(self, fun_name)
+                if not fun(li):
+                    return False
+        else:
+            return True
--- a/utils/excel_manager.py
+++ b/utils/excel_manager.py
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
-import xlwt
+l=[' ', ' ', ' ', '不需要报价', ' ', ' ', ' ', '不需要报价', ' ', ' ', '不需要报价', ' ', ' ', ' ', '不需要报价', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
-import xlrd
+for v in l:
+    # 去掉空置率大于等于0.8的列
+    counter = 0
-def read_from_excel(file_name, sheet_name):
+    for item in v:
-    wb = xlrd.open_workbook(file_name)
+        if str(item).strip() == PLACEHOLDER:
-    sheet = wb.sheet_by_name(sheet_name)
+            counter += 1
-    row = sheet.nrows
+    if counter / len(v) <= NAN_RATE:
-    col = sheet.ncols
+        new_dic_data[k] = v
-    result_dict = {}
\ No newline at end of file
-    for i in range(col):
-        col_list = []
-        for j in range(row): col_list.append(sheet.cell_value(j, i))
-        result_dict[i] = col_list
-    return result_dict