Commit 2919eafc by lichenggang

add testmart

parent 263fcee5
Showing with 3 additions and 5 deletions
from utils.base import Module_Base from utils.base import Module_Base
import re from utils.proxy_util import proxies
dom_pat = re.compile(r'\<script\>document\.write\(\"(.+)\"\)')
domain = 'http:' domain = 'http:'
headers = { headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
...@@ -9,8 +7,8 @@ headers = { ...@@ -9,8 +7,8 @@ headers = {
a = Module_Base() a = Module_Base()
url1 = 'https://www.testmart.cn/productlist/sortpage/cat/111.html' url1 = 'https://www.testmart.cn/productlist/sortpage/cat/111.html'
url2 = 'https://www.testmart.cn/productlist/sortpage/cat/112.html' url2 = 'https://www.testmart.cn/productlist/sortpage/cat/112.html'
resp1 = a.requests.get(url1, headers=headers).text resp1 = a.requests.get(url1, headers=headers, proxies=proxies).text
resp2 = a.requests.get(url2, headers=headers).text resp2 = a.requests.get(url2, headers=headers, proxies=proxies).text
dom1 = a.etree.HTML(resp1) dom1 = a.etree.HTML(resp1)
dom2 = a.etree.HTML(resp2) dom2 = a.etree.HTML(resp2)
level1_doms_1_1 = dom1.xpath('//div[@class="left-content"]//p') level1_doms_1_1 = dom1.xpath('//div[@class="left-content"]//p')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment