Commit 7ab7cd7a by lichenggang

Initial commit

parents
seotest
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (lseo)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/../seotest/.idea/seotest.iml" filepath="$PROJECT_DIR$/../seotest/.idea/seotest.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="4007b60f-39fd-4f58-919d-947ddb1c6e19" name="Default Changelist" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CoverageDataManager">
<SUITE FILE_PATH="coverage/seotest$test.coverage" NAME="test Coverage Results" MODIFIED="1566973873935" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="375">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="210">
<caret line="10" selection-start-line="10" selection-end-line="10" />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>1366</find>
<find>article</find>
<find>2019</find>
</findStrings>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/test.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="7">
<option name="x" value="528" />
<option name="width" value="1400" />
<option name="height" value="909" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="seotest" type="b2602c69:ProjectViewProjectNode" />
<item name="seotest" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager">
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="seotest" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/test.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.test" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="4007b60f-39fd-4f58-919d-947ddb1c6e19" name="Default Changelist" comment="" />
<created>1566529509632</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1566529509632</updated>
<workItem from="1566529511079" duration="1984000" />
<workItem from="1566868724482" duration="1354000" />
<workItem from="1566899977552" duration="1870000" />
<workItem from="1567049988122" duration="587000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="5795000" />
</component>
<component name="ToolWindowManager">
<frame x="528" y="0" width="1400" height="909" extended-state="7" />
<layout>
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.18674698" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info active="true" anchor="bottom" id="Run" order="2" visible="true" weight="0.5121387" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="Version Control" order="8" />
<window_info anchor="bottom" id="Database Changes" order="9" />
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" />
<window_info anchor="bottom" id="Terminal" order="11" weight="0.32947975" />
<window_info anchor="bottom" id="Python Console" order="12" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
<window_info anchor="right" id="SciView" order="3" />
<window_info anchor="right" id="Database" order="4" />
</layout>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="210">
<caret line="10" selection-start-line="10" selection-end-line="10" />
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
#!/usr/bin/env python
# -*- coding:utf-8 -*-
\ No newline at end of file
import tornado.web
import tornado.ioloop
from aiohttp.client_exceptions import ServerDisconnectedError
from utils.data_base import dict_data
from utils.base_handler import TornadoBaseHandler
import re
from urllib.parse import quote
from lxml import etree
from traceback import format_exc
base_url = 'https://www.baidu.com/s?wd={}'
main_headers={
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding":"gzip, deflate, br",
"Accept-Language":"zh-CN,zh;q=0.9",
"Cache-Control":"no-cache",
"Connection":"keep-alive",
# "Cookie":"sugstore=0; BIDUPSID=4B040DA2662547DD78020CA62CA89B58; PSTM=1568098190; BAIDUID=737706451117D0B879B98A3A0C702EF7:FG=1; BD_UPN=12314753; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; ispeed_lsm=2; H_PS_PSSID=1450_21117_18559_29523_29721_29568_29221_26350_29589; yjs_js_security_passport=3726eaca955446d4228d8ff4462a6c288d632334_1568968075_js; H_PS_645EC=6c8dZ2NCQwQuroL3u9NmQzWkTImtuyF368MKcho936G4EsqdQJQ4lrTBesE; delPer=0; BD_CK_SAM=1; PSINO=6; BDSVRTM=0",
"Host":"www.baidu.com",
"Pragma":"no-cache",
"Sec-Fetch-Mode":"navigate",
"Sec-Fetch-Site":"same-origin",
"Sec-Fetch-User":"?1",
"Upgrade-Insecure-Requests":"1",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
}
detail_referer="https://www.baidu.com/s?ie=UTF-8&wd={}"
snap_time_pat=re.compile(r'以下是该网页在北京时间 (.*) 的快照;')
detail_headers={
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding":"gzip, deflate",
"Accept-Language":"zh-CN,zh;q=0.9",
"Cache-Control":"no-cache",
"Connection":"keep-alive",
"Host":"cache.baiducontent.com",
"Pragma":"no-cache",
"Upgrade-Insecure-Requests":"1",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
}
class SeoCheckHandler(TornadoBaseHandler):
async def get(self, *args, **kwargs):
try:
data = await self.get_data()
except ServerDisconnectedError:
data = {'error':1,
'msg':'Temporarily unavailable'}
self.logging.error(format_exc())
except:
data={'error':1,
'msg':format_exc()}
self.logging.error(format_exc())
self.write(data)
async def get_data(self):
target = quote(self.get_argument('target'))
self.logging.info(base_url.format(target))
async with self.proxy_request(base_url.format(target),headers=main_headers) as resp:
t = await resp.text()
dom=etree.HTML(t)
data = dict_data()
data['baidu_url'] = base_url.format(target)
if dom.xpath('//div[@id="content_left"]'):
data['has_included'] = 1
snap_shot_url = dom.xpath('//a[contains(text(),"百度快照")]/@href')[0]
detail_headers['Referer'] = detail_referer.format(target)
async with self.proxy_request(snap_shot_url, headers=main_headers,allow_redirects=True) as resp:
detail = await resp.text()
snap_time = snap_time_pat.findall(detail)
if snap_time:
data['included_time'] = snap_time[0]
else:
data['included_time'] = '未知'
else:
data['has_included'] = 0
return data
def gen_app():
return tornado.web.Application(handlers=[(k, v) for k, v in register_tornado_handlers.items()])
if __name__ == '__main__':
register_tornado_handlers = {'/seocheck': SeoCheckHandler}
app = gen_app()
app.listen(9421)
tornado.ioloop.IOLoop.current().start()
\ No newline at end of file
import re
from urllib.parse import quote
import requests
pat=re.compile(r'GET (.*)HTTP/1.1')
from traceback import print_exc,format_exc
with open(r'C:\Users\ICHUNT\Desktop\FlumeData.1569254401514') as f:
text = f.readlines()
for i in text:
a=pat.findall(i)
if a:
prefix= 'http://127.0.0.1:9421/seocheck?target='
domain = 'www.ichunt.com'
url = prefix+domain+quote(a[0])
try:
res = requests.get(url).json()
print(res)
except:
print(url)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from utils.log_manager import LogManager
test_log = LogManager('test').get_logger_and_add_handlers(log_path='./logs', log_filename='test.log')
test_log.debug('这一句不会重复打印四次和写入日志四次')
#!/usr/bin/env python
# -*- coding:utf-8 -*-
\ No newline at end of file
No preview for this file type
No preview for this file type
No preview for this file type
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import tornado.web
from utils.log_manager import LogManager
from utils.proxy_client_util import MyProxyContextManger, proxy_manger
from utils.connecter_util import session_manger
class TornadoBaseHandler(tornado.web.RequestHandler):
session = session_manger.get_session()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.logging = LogManager('seocheck').get_logger_and_add_handlers(log_level_int=4,log_path='./logs', log_filename='seocheck.log',is_add_stream_handler=True)
def proxy_request(self, url, headers=None, params=None, session=None, method='GET', vendor=None,
keep_same_proxy=False, request_timeout=15, allow_redirects=True, data=None, log=False,
verify_ssl=None, switch=True, max_redirects=10, proxy=None, proxy_auth=None):
if switch:
if proxy is None:
proxy, proxy_auth = proxy_manger.get_next(vendor, keep_same_proxy, self)
else:
proxy = proxy_auth = None
if session is None:
session = self.session
return MyProxyContextManger(None,
session._request(method, url, headers=headers, params=params,
timeout=request_timeout,
allow_redirects=allow_redirects, data=data, proxy=proxy,
proxy_auth=proxy_auth, verify_ssl=verify_ssl,
max_redirects=max_redirects))
\ No newline at end of file
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import asyncio
import aiohttp
aio_session_max_client = 1000
class SessionManger(object):
def __init__(self, concurrency_limit=aio_session_max_client, loop=None,
disable_cookie=False):
self.session = self._generate_session(concurrency_limit=concurrency_limit, loop=loop,
disable_cookie=disable_cookie)
@staticmethod
def _generate_connector(limit, loop=None):
"""
https://github.com/KeepSafe/aiohttp/issues/883
if connector is passed to session, it is not available anymore
"""
if not loop:
loop = asyncio.get_event_loop()
return aiohttp.TCPConnector(limit=limit, loop=loop, verify_ssl=False)
def _generate_session(self, concurrency_limit, loop=None, disable_cookie=False):
if not loop:
loop = asyncio.get_event_loop()
cookie_jar = aiohttp.DummyCookieJar() if disable_cookie is True else None
return aiohttp.ClientSession(connector=self._generate_connector(limit=concurrency_limit, loop=loop),
loop=loop, cookie_jar=cookie_jar)
def get_session(self):
return self.session
# async def reset_session(self):
# await self.session.close()
# self.session = self._generate_session(concurrency_limit=1000, loop=None)
# def __del__(self):
# self.session.close()
# def __aexit__(self, exc_type, exc_val, exc_tb):
# self.session.close()
session_manger = SessionManger()
#!/usr/bin/env python
# -*- coding:utf-8 -*-
def dict_data():
data={
'has_included':0,
'included_time':None,
'baidu_url':None,
'error':0,
'msg':''
}
return data
\ No newline at end of file
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import base64
import datetime
import html
import json
import random
import re
import string
import time
from urllib.parse import quote, quote_plus
import redis
# from fake_useragent import UserAgent
from lxml.etree import HTML, XML, XMLSyntaxError, tostring
# ua = UserAgent()
_local_pool = redis.ConnectionPool(host="localhost", port=6379, db=0)
_local_redis = redis.StrictRedis(connection_pool=_local_pool)
utc_distance = (datetime.datetime.now() - datetime.datetime.utcnow()).seconds
UA_list = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
"Opera/9.80 (Windows NT 5.1; U; zh-cn) Presto/2.9.168 Version/11.50",
"Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0",
"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
"Opera/9.27 (Windows NT 5.2; U; zh-cn)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
"Opera/8.0 (Windows NT 5.1; U; en)",
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:63.0) Gecko/20100101 Firefox/63.0"
)
def ncr2unicode(string):
return html.unescape(string)
class LazyProperty(object):
"""
LazyProperty
explain: http://www.spiderpy.cn/blog/5/
"""
def __init__(self, func):
self.func = func
def __get__(self, instance, owner):
if instance is None:
return self
else:
value = self.func(instance)
setattr(instance, self.func.__name__, value)
return value
def datetime2timestamp(dt):
"""
datetime2timestamp("2016-1-1 12:12:12") -> 1471234567
"""
if "T" in dt:
struct_time = time.strptime(dt, "%Y-%m-%dT%H:%M:%S")
return int(time.mktime(struct_time)) + utc_distance
else:
struct_time = time.strptime(dt, "%Y-%m-%d %H:%M:%S")
return int(time.mktime(struct_time))
def timestamp2datetime(value):
"""
timestamp2datetime(1471234567) -> "2016-1-1 12:12:12"
"""
structure = time.localtime(value)
dt = time.strftime('%Y-%m-%d %H:%M:%S', structure)
return dt
def timestamp2utc_datetime(value):
structure = time.gmtime(value)
dt = time.strftime('%Y-%m-%dT%H:%M:%S', structure)
return dt
def formatter_date_utc(before, structure, is_utc=False):
if is_utc:
ts = time.mktime(time.strptime(before, structure))
else:
ts = time.mktime(time.strptime(before, structure)) - 28800
dt = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%dT%H:%M:%S")
return dt
def url_encode_plus(s, safe='', encoding=None, errors=None):
return quote_plus(s, safe=safe, encoding=encoding, errors=errors)
def url_encode(s, safe='/', encoding=None, errors=None):
return quote(s, safe=safe, encoding=encoding, errors=errors)
def doc2header(doc):
header = {}
for line in doc.split('\n'):
k, v = line.split(': ')
header[k] = v
return header
def lxml_html(text, parser=None, base_url=None):
try:
return HTML(text, parser=parser, base_url=base_url)
except XMLSyntaxError:
return
def lxml_xml(text, parser=None, base_url=None):
return XML(text, parser=parser, base_url=base_url)
def etree_tostring(element_or_tree):
return tostring(element_or_tree)
def get_ts(k=10):
return "%d" % int(time.time() * (10 ** (k - 10)))
def get_ts_int(k=10):
return int(time.time() * (10 ** (k - 10)))
def check_json_format(raw_msg):
"""
用于判断一个字符串是否符合Json格式
"""
if isinstance(raw_msg, bytes) or isinstance(raw_msg, str): # 首先判断变量是否为字符串
try:
json.loads(raw_msg)
except Exception:
return False
return True
else:
return False
def b64decode(s):
return base64.b64decode(s).decode('utf8')
def b64encode(s):
return base64.b64encode(s.encode('utf8')).decode('utf8')
def random_ua():
return random.choice(UA_list)
# def random_ua_2():
# return ua.random
def gen_chars(size, charset=string.ascii_letters + string.digits, prefix="", postfix=""):
"""
从charset中产生长度为size的字符串
:param size: 字符串长度
:param charset: 字符集参数
:param prefix: 前缀
:param postfix: 后缀
:return:
"""
return prefix + "".join((random.choice(charset) for _ in range(size))) + postfix
def gen_sep_chars(alist, sep="-", charset=string.ascii_letters + string.digits, prefix="", postfix=""):
"""
从alist中获取各部分长度并产生字符串,并以sep为分隔符,将它们连接起来
:param alist: 长度列表
:param sep: 分隔符
:param charset: 字符集
:param prefix: 前缀
:param postfix: 后缀
:return:
"""
return prefix + sep.join((gen_chars(size, charset) for size in alist)) + postfix
def random_choice(s):
return random.choice(s)
def random_int(s, e):
return random.randint(s, e)
def random_float():
return random.random()
def get_token(key):
return _local_redis.get(key)
def rand_token(platform):
token_key = "tas_token_{platform}".format(platform=platform)
val_str = _local_redis.srandmember(token_key)
return json.loads(val_str)
def format_date(raw_date):
year = datetime.datetime.now().year
month = datetime.datetime.now().month
day = datetime.datetime.now().day
hour = datetime.datetime.now().hour
minute = datetime.datetime.now().minute
second = datetime.datetime.now().second
# b_day, b_hours, b_mins, b_secs = 0, 0, 0, 0
if '秒' in raw_date:
b_secs = int(re.search(r'\d+', raw_date).group(0))
time_tuple = (year, month, day, hour, minute, second - b_secs, 0, 0, 0)
ts = int(time.mktime(time_tuple))
dt = timestamp2datetime(ts)
elif '分' in raw_date:
b_mins = int(re.search(r'\d+', raw_date).group(0))
time_tuple = (year, month, day, hour, minute - b_mins, second, 0, 0, 0)
ts = int(time.mktime(time_tuple))
dt = timestamp2datetime(ts)
else:
dt = raw_date[:10] + " " + raw_date[10:]
ts = datetime2timestamp(dt)
return dt, ts
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import random
import aiohttp
from aiohttp.client import _RequestContextManager
class Vendor:
dobel_t3 = "dobel_t3"
dobel_passwd = 'oK97IIl298'
dobel_host_t3 = "http://http-proxy-t3.dobel.cn:9180"
max_dobel = 2
min_dobel = 1
dobel_t1 = "dobel_t1"
dobel_host_t1 = "http://http-proxy-t1.dobel.cn:9180"
t1_auth_name = 'LIEXINHTT3'
class DobelProxy:
@staticmethod
def get_proxy_t3():
i = random.randint(Vendor.min_dobel, Vendor.max_dobel)
while True:
while i <= Vendor.max_dobel:
auth_name = "LIEXINHTT%d" % (i,)
yield Vendor.dobel_host_t3, aiohttp.BasicAuth(auth_name, Vendor.dobel_passwd)
i += 1
i = Vendor.min_dobel
@staticmethod
def get_proxy_t1():
while True:
yield Vendor.dobel_host_t1, aiohttp.BasicAuth(Vendor.t1_auth_name, Vendor.dobel_passwd)
class AbuyunProxy:
@staticmethod
def get_proxy_pro():
while 1:
yield 'http://http-pro.abuyun.com:9010', aiohttp.BasicAuth('HI6MSKA0807328TP', '68D3A39E47889817')
class ProxyManger(object):
def __init__(self):
super().__init__()
self.vendor_generator_map = {
"dobel_t3": self.dobel_t3, # 套餐3 200并发 随机ip 每个请求随机分配IP
"dobel_t1": self.dobel_t1, # 套餐1 10并发 IP持续使用时间1~2分钟 支持自主切换IP
"abuyun_pro": self.aabuyun_pro # abuyun pro test
}
@property
def dobel_t3(self):
return DobelProxy.get_proxy_t3()
@property
def dobel_t1(self):
return DobelProxy.get_proxy_t1()
@property
def aabuyun_pro(self):
return AbuyunProxy.get_proxy_pro()
def get_next(self, vendor, keep_same_proxy, tornado_instance):
if keep_same_proxy and hasattr(tornado_instance, "vendor_previous_map"):
return list(tornado_instance.vendor_previous_map.values())[0]
if vendor is None:
vendor = self.get_vendor()
elif vendor not in self.vendor_generator_map:
raise KeyError("Unknown vendor: %s" % (str(vendor),))
values = next(self.vendor_generator_map[vendor])
tornado_instance.vendor_previous_map = {vendor: values}
return values
@staticmethod
def get_vendor():
return Vendor.dobel_t3
class MyProxyContextManger(_RequestContextManager):
def __init__(self, logging, coro):
super().__init__(coro)
self.logging = logging
async def __aenter__(self):
# begin_time = time.time()
self._resp = await self._coro
# end_time = time.time()
# self.logging.error("proxy request cost: %.3f seconds" % (end_time - begin_time,), StatusCode.ProxyRequest)
return self._resp
proxy_manger = ProxyManger()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment