说明:
今天需要使用selenium测试一下携带含有账号+密码的代理ip,以为和无密码一样简单,但是弄了好久(将近3个小时吧。。。)最后看了很多博客,谷歌百度什么都搜索,最终找到了比较好用的方法,所以呀,有时间遇到问题真的是需要耐心的去找资源,不会就学嘛,肯定有人会,不会搜索,慢慢搜,肯定能找到,如果你正好需要,并且看到我的这个,恭喜你,不用再折磨自己了,我找的都快看了二三十个博客了。。。。
1、无密码(ip+port):
这个是我很久之前看到的,这个添加代理ip,比较简单,一起整理写出来吧,直接把代码复制过来,需要的可以直接复制,改下代理ip就可以用了。
import time
from selenium import webdriver
chromeOptions = webdriver.ChromeOptions()
# 设置代理
chromeOptions.add_argument("--proxy-server=http://ip:port")
# 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
browser = webdriver.Chrome(chrome_options=chromeOptions)
# 查看本机ip,查看代理是否起作用
browser.get("https://www.baidu.com/")
time.sleep(20)
print(browser.page_source)
# 退出,清除浏览器缓存
browser.quit()
2、有账号和密码的代理ip(这个就是我花了几个小时找到一个不错的)
直接把代理复制过来吧,需要的自己拿去用吧
import string
import zipfile
from selenium import webdriver
# 代理服务器(ip+port)
proxyHost = "ip"
proxyPort = "port"
# 代理隧道验证信息(账号+密码)
proxyUser = "user"
proxyPass = "password"
def create_proxy_auth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http', plugin_path=None):
if plugin_path is None:
plugin_path = r'{}_{}@http-dyn.dobel.com_9020.zip'.format(proxy_username, proxy_password)
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Dobel Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = string.Template(
"""
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "${scheme}",
host: "${host}",
port: parseInt(${port})
},
bypassList: ["foobar.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "${username}",
password: "${password}"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
"""
).substitute(
host=proxy_host,
port=proxy_port,
username=proxy_username,
password=proxy_password,
scheme=scheme,
)
with zipfile.ZipFile(plugin_path, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return plugin_path
proxy_auth_plugin_path = create_proxy_auth_extension(
proxy_host=proxyHost,
proxy_port=proxyPort,
proxy_username=proxyUser,
proxy_password=proxyPass)
option = webdriver.ChromeOptions()
# option.add_argument('--no-sandbox')
# option.add_argument('--disable-gpu')
# option.add_argument("--start-maximized")
option.add_extension(proxy_auth_plugin_path)
# drive = webdriver.Chrome(executable_path="../../config/chromedriver_mac",chrome_options=option)
drive = webdriver.Chrome(chrome_options=option)
# drive.get("http://httpbin.org/ip")
drive.get("https://www.baidu.com/")
print(drive.page_source)
# drive.close()
注意:
substitute方法其实就是字符串拼接的方法,甚至你换成format方法拼接都可以的。
3、对2需要进行了小小的优化(推荐直接使用):
import string
import zipfile
from selenium import webdriver
def create_proxy_auth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http',
plugin_path=None):
if plugin_path is None:
plugin_path = r'{}_{}@http-dyn.dobel.com_9020.zip'.format(proxy_username, proxy_password)
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Dobel Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = string.Template(
"""
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "${scheme}",
host: "${host}",
port: parseInt(${port})
},
bypassList: ["foobar.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "${username}",
password: "${password}"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
"""
).substitute(
host=proxy_host,
port=proxy_port,
username=proxy_username,
password=proxy_password,
scheme=scheme,
)
with zipfile.ZipFile(plugin_path, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return plugin_path
def from_proxy_get_daili(proxy):
# proxy是这种格式 user:pass@ip:port
user_pass_str, ip_port_str = proxy.split('@')
proxyHost, proxyPort = ip_port_str.split(':')
proxyUser, proxyPass = user_pass_str.split(':')
return proxyHost, proxyPort, proxyUser, proxyPass
def get_driver(proxy):
proxyHost, proxyPort, proxyUser, proxyPass = from_proxy_get_daili(proxy)
proxy_auth_plugin_path = create_proxy_auth_extension(
proxy_host=proxyHost,
proxy_port=proxyPort,
proxy_username=proxyUser,
proxy_password=proxyPass)
option = webdriver.ChromeOptions()
option.add_extension(proxy_auth_plugin_path)
drive = webdriver.Chrome(chrome_options=option)
return drive
if __name__ == '__main__':
# 代理服务器
proxy = 'user:pass@ip:port'
drive = get_driver(proxy)
# drive.get("http://httpbin.org/ip")
print(drive.page_source)
改进之后,直接传入一个正常的有密码的代理proxy (格式: ‘user:pass@ip:port’),这个样返回一个浏览器对象driver,直接使用即可,不用再把代理一个一个进行分开复制使用。
找资源真的很累人,感觉这个博主。
有秘密的原博客:https://blog.csdn.net/ywdhzxf/article/details/83211631