Commit 7052c283 by lzzzzl

init

parents
<component name="InspectionProjectProfileManager">
<settings>
<option name="useProjectProfile" value="false" />
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.5 (C:\Program Files\python3\python.exe)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/mlbom.iml" filepath="$PROJECT_DIR$/.idea/mlbom.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="74424fd9-281f-4ea1-84e9-bc84e7952456" name="Default" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="bom.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/bom.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="720">
<caret line="20" column="23" lean-forward="true" selection-start-line="20" selection-start-column="23" selection-end-line="20" selection-end-column="23" />
<folding>
<element signature="e#0#91#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="recognize.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/fun/recognize.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="5760">
<caret line="160" column="31" lean-forward="false" selection-start-line="160" selection-start-column="31" selection-end-line="160" selection-end-column="31" />
<folding>
<element signature="e#0#91#0" expanded="true" />
<marker date="1565074123029" expanded="true" signature="1838:1901" ph="SELECT 1 FRO... lie_bom_class_name" />
<marker date="1565074123029" expanded="true" signature="2456:2519" ph="SELECT 1 FRO... lie_bom_brand_name" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="tfidf.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/model/tfidf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="6" column="13" lean-forward="false" selection-start-line="6" selection-start-column="13" selection-end-line="6" selection-end-column="13" />
<folding />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/model/tfidf.py" />
<option value="$PROJECT_DIR$/config/db.py" />
<option value="$PROJECT_DIR$/config/conn_list.py" />
<option value="$PROJECT_DIR$/fun/function.py" />
<option value="$PROJECT_DIR$/fun/recognize.py" />
<option value="$PROJECT_DIR$/bom.py" />
</list>
</option>
</component>
<component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
<component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
<component name="JsGulpfileManager">
<detection-done>true</detection-done>
<sorting>DEFINITION_ORDER</sorting>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="-11" />
<option name="y" value="-11" />
<option name="width" value="2278" />
<option name="height" value="1466" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="model" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="fun" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="mlbom" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="config" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="settings.editor.selected.configurable" value="preferences.lookFeel" />
<property name="WebServerToolWindowFactoryState" value="false" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\59702\PycharmProjects\mlbom" />
<recent name="C:\Users\59702\PycharmProjects\mlbom\fun" />
<recent name="C:\Users\59702\PycharmProjects\mlbom\model" />
</key>
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager">
<configuration default="true" type="DjangoTestsConfigurationType" factoryName="Django tests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="mlbom" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<option name="TARGET" value="" />
<option name="SETTINGS_FILE" value="" />
<option name="CUSTOM_SETTINGS" value="false" />
<option name="USE_OPTIONS" value="false" />
<option name="OPTIONS" value="" />
<method />
</configuration>
<configuration default="true" type="JavaScriptTestRunnerJest" factoryName="Jest">
<node-interpreter value="project" />
<working-dir value="" />
<envs />
<scope-kind value="ALL" />
<method />
</configuration>
<configuration default="true" type="JavaScriptTestRunnerProtractor" factoryName="Protractor">
<config-file value="" />
<node-interpreter value="project" />
<envs />
<method />
</configuration>
<configuration default="true" type="JavascriptDebugType" factoryName="JavaScript Debug">
<method />
</configuration>
<configuration default="true" type="PyBehaveRunConfigurationType" factoryName="Behave">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="mlbom" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<option name="ADDITIONAL_ARGS" value="" />
<method />
</configuration>
<configuration default="true" type="PyLettuceRunConfigurationType" factoryName="Lettuce">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="mlbom" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<option name="ADDITIONAL_ARGS" value="" />
<method />
</configuration>
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="mlbom" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<option name="SCRIPT_NAME" value="" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="true" type="Tox" factoryName="Tox">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<module name="mlbom" />
<method />
</configuration>
<configuration default="true" type="js.build_tools.gulp" factoryName="Gulp.js">
<method />
</configuration>
<configuration default="true" type="js.build_tools.npm" factoryName="npm">
<command value="run" />
<scripts />
<node-interpreter value="project" />
<envs />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Doctests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="mlbom" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Unittests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="mlbom" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
<option name="_new_additionalArguments" value="&quot;&quot;" />
<option name="_new_target" value="&quot;.&quot;" />
<option name="_new_targetType" value="&quot;PATH&quot;" />
<method />
</configuration>
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="74424fd9-281f-4ea1-84e9-bc84e7952456" name="Default" comment="" />
<created>1565072592998</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1565072592998</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-11" y="-11" width="2278" height="1466" extended-state="6" />
<layout>
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="processedProjectFiles" value="true" />
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/config/supplier.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/config/db.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="288">
<caret line="8" column="1" lean-forward="false" selection-start-line="8" selection-start-column="1" selection-end-line="8" selection-end-column="1" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/fun/db_handler.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/config/conn_list.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="396">
<caret line="11" column="0" lean-forward="true" selection-start-line="11" selection-start-column="0" selection-end-line="11" selection-end-column="0" />
<folding>
<element signature="e#0#14#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/model/tfidf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="6" column="13" lean-forward="false" selection-start-line="6" selection-start-column="13" selection-end-line="6" selection-end-column="13" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/fun/recognize.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="5760">
<caret line="160" column="31" lean-forward="false" selection-start-line="160" selection-start-column="31" selection-end-line="160" selection-end-column="31" />
<folding>
<element signature="e#0#91#0" expanded="true" />
<marker date="1565074123029" expanded="true" signature="1838:1901" ph="SELECT 1 FRO... lie_bom_class_name" />
<marker date="1565074123029" expanded="true" signature="2456:2519" ph="SELECT 1 FRO... lie_bom_brand_name" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/bom.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="720">
<caret line="20" column="23" lean-forward="true" selection-start-line="20" selection-start-column="23" selection-end-line="20" selection-end-column="23" />
<folding>
<element signature="e#0#91#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
No preview for this file type
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble
from sklearn.externals import joblib
from fun.recognize import Recognize
import pandas as pd
import numpy as np
import pymysql
def main():
test = pd.read_excel('BOM选型标准模板_ICkey2.xlsx')
test.dropna()
test.fillna('NA')
re = Recognize()
columns = list(test.columns)
for col in columns:
test[col] = test[col].fillna('NA')
col_index = {'索引': -1, '数量': -1, '分类': -1, '品牌': -1, '参数': -1, '型号': -1, '封装': -1}
col_list = []
# 索引列
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re.verify_index(test[col]):
col_index['索引'] = i
col_list.append(i)
break
# 数量列
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re.verify_number(test[col]):
col_index['数量'] = i
col_list.append(i)
break
# 分类列
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re.verify_classify(test[col]):
col_index['分类'] = i
col_list.append(i)
break
# 品牌列
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re(test[col]):
col_index['品牌'] = i
col_list.append(i)
break
# 参数列
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re(test[col]):
col_index['参数'] = i
col_list.append(i)
break
# 型号
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re(test[col]):
col_index['型号'] = i
col_list.append(i)
break
# 封装
for i in range(0, len(columns)):
col = columns[i]
if i not in col_list and re(test[col]):
col_index['封装'] = i
col_list.append(i)
break
print(col_list)
if __name__ == '__main__':
main()
import pymysql
from config.db import *
class ConnList:
@staticmethod
def Dashboard():
conf = dashboard_server
return pymysql.connect(str(conf['host']), conf['user'], conf['password'],
conf['db_name'], charset='utf8')
#!/usr/bin/env python
# -*- coding:utf-8 -*-
dashboard_server = {
'host': 'localhost',
'user': 'dashboard',
'password': 'ichunt5Dashboard@',
'db_name': 'dashboard'
}
\ No newline at end of file
import traceback
import requests
import random
import hashlib
import string
from hdfs import Client
from urllib import parse
from utils.date_handler import DateHandler
class DBHandler:
"""
MySQL读取数据
"""
@staticmethod
def read(db, sql):
results = {}
cursor = db.cursor()
try:
cursor.execute(sql)
results = cursor.fetchall()
except:
db.rollback()
print(traceback.print_exc())
return results
"""
MySQL更新数据
"""
@staticmethod
def update(db, sql):
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
traceback.print_exc()
print(sql)
"""
MySQL插入数据
"""
@staticmethod
def insert(db, sql):
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
traceback.print_exc()
print(sql)
"""
MySQL删除数据
"""
@staticmethod
def delete(db, sql):
cursor = db.cursor()
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
traceback.print_exc()
print(sql)
"""
读取HDFS数据
"""
@staticmethod
def hdfs_read(file):
client = Client("http://172.18.137.35:50170", root="/", timeout=100, session=False)
with client.read(file) as reader:
result = reader.read().splitlines()
return result
"""
上传HDFS数据
upload(hdfs_path, local_path, overwrite=False, n_threads=1, temp_dir=None,
chunk_size=65536,progress=None, cleanup=True, **kwargs)
overwrite:是否是覆盖性上传文件
n_threads:启动的线程数目
temp_dir:当overwrite=true时,远程文件一旦存在,则会在上传完之后进行交换
chunk_size:文件上传的大小区间
progress:回调函数来跟踪进度,为每一chunk_size字节。它将传递两个参数,
文件上传的路径和传输的字节数。一旦完成,-1将作为第二个参数
cleanup:如果在上传任何文件时发生错误,则删除该文件
"""
@staticmethod
def hdfs_upload(hdfs_path, local_path):
client = Client("http://172.18.137.35:50170", root="/", timeout=100, session=False)
client.upload(hdfs_path=hdfs_path, local_path=local_path)
"""
翻页读取ES数据
"""
@staticmethod
def scroll_read(url, body, key):
r = requests.post(url, data=body)
total = r.json()['data']['total']
final_result = r.json()['data'][key]
scroll_id = r.json()['data']['scroll_id']
if total > 1000:
page = int(total / 1000)
for i in range(page):
body = {"scroll_id": scroll_id}
r = requests.post(url, data=body)
result = r.json()['data'][key]
for r in result:
final_result.append(r)
return final_result
"""
es加密数据获取
"""
@staticmethod
def esEncryptData(key, url):
# 生成当前的时间戳
now_timestamp = DateHandler.now_datetime()
# 生成随机字符串
ran_str = ''.join(random.sample(string.ascii_letters + string.digits, 4)).lower()
# 参数字典
params_dict = {'check_time': now_timestamp, 'salt': ran_str}
# 生成sign, key为密钥
sign = parse.urlencode(params_dict).lower()
# key = 'djdj93ichuntj56dksisearchdj45eieapi'
sign = key + sign + str(ran_str)
sign = hashlib.md5(sign.encode(encoding='UTF-8')).hexdigest()
# 搜索接口
# requestUrl = "http://so12.ichunt.com/search/ServerApi/index"
# 搜索条件
search_body = {"check_time": now_timestamp, "salt": ran_str, "sign": sign}
# requests获取数据
r = requests.post(url, data=search_body)
result = r.json()['data']
return result
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble
from sklearn.externals import joblib
from config.conn_list import ConnList
from fun.db_handler import DBHandler
import pandas, numpy, string
import pandas as pd
import numpy as np
class Recognize:
def __init__(self):
self.db = ConnList.Dashboard()
self.model = joblib.load("train_model.m")
self.tfidf_vect_ngram_chars = joblib.load("tfidf_vect_ngram_chars.m")
# 判断是否索引列
def verify_index(self, data):
# 分值
point = 0
max_num = 0
total = len(data)
# 遍历数据
try:
for i in data:
# 判断是否正整数
if str(int(i)).isdigit() and int(i) >= 0:
# 递增且递增差值小于100
if (i > max_num) and (max_num - i <= 10):
max_num = i
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
# 判断是否数量列
def verify_number(self, data):
# 分值
point = 0
total = len(data)
# 遍历数据
try:
for i in data:
# 判断是否正整数
if str(int(i)).isdigit():
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
# 判断分类
def verify_classify(self, data):
# 分值
point = 0
total = len(data)
# 遍历数据
try:
for i in data:
# 判断分类
sql = "SELECT 1 FROM lie_bom_class_name WHERE class_name like '%%%s%%'" % i
result = DBHandler.read(self.db, sql)
if len(result) > 0:
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
# 判断品牌
def verify_brand(self, data):
# 分值
point = 0
total = len(data)
# 遍历数据
try:
for i in data:
brand_name = str(i).split('(')[0].upper()
# 判断分类
sql = "SELECT 1 FROM lie_bom_brand_name WHERE brand_name like '%%%s%%'" % i
result = DBHandler.read(self.db, sql)
if len(result) > 0:
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
# 判断参数
def verify_param(self, data):
# 分值
point = 0
total = len(data)
# 遍历数据
try:
test = pd.Series(data)
test_tfidf_ngram_chars = self.tfidf_vect_ngram_chars.transform(test)
predictions = self.model.predict(test_tfidf_ngram_chars)
predictions
for pre in predictions:
if pre == 'param':
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
# 判断型号
def verify_goods_name(self, data):
# 分值
point = 0
total = len(data)
# 遍历数据
try:
test = pd.Series(data)
test_tfidf_ngram_chars = self.tfidf_vect_ngram_chars.transform(test)
predictions = self.model.predict(test_tfidf_ngram_chars)
predictions
for pre in predictions:
if pre == 'goods_name':
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
# 判断封装
def verify_encap(self, data):
# 分值
point = 0
total = len(data)
# 遍历数据
try:
test = pd.Series(data)
test_tfidf_ngram_chars = self.tfidf_vect_ngram_chars.transform(test)
predictions = self.model.predict(test_tfidf_ngram_chars)
predictions
for pre in predictions:
if pre == 'encap':
point += 1
except:
pass
# 判断该列是否为索引,占比6成以上即认定为True
if (point / total) >= 0.6:
return True
else:
return False
\ No newline at end of file
No preview for this file type
No preview for this file type
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble
import pandas, numpy, string
import pandas as pd
import numpy as np
# 训练数据
train = pd.read_excel('lie_bom_goods_name_train.xls')
train.dropna()
train.fillna('null')
train_x, valid_x, train_y, valid_y = model_selection.train_test_split(train['target'], train['value'])
# word level tf-idf 单词级别分词
tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', max_features=5000)
tfidf_vect.fit(train['target'])
xtrain_tfidf = tfidf_vect.transform(train_x)
xvalid_tfidf = tfidf_vect.transform(valid_x)
# ngram level tf-idf N元模型
tfidf_vect_ngram = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', ngram_range=(2,3), max_features=5000)
tfidf_vect_ngram.fit(train['target'])
xtrain_tfidf_ngram = tfidf_vect_ngram.transform(train_x)
xvalid_tfidf_ngram = tfidf_vect_ngram.transform(valid_x)
# characters level tf-idf 字符级别分词
tfidf_vect_ngram_chars = TfidfVectorizer(analyzer='char', token_pattern=r'\w{1,}', ngram_range=(2,3), max_features=5000)
tfidf_vect_ngram_chars.fit(train['target'])
xtrain_tfidf_ngram_chars = tfidf_vect_ngram_chars.transform(train_x)
xvalid_tfidf_ngram_chars = tfidf_vect_ngram_chars.transform(valid_x)
\ No newline at end of file
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment