说明:

今天需要使用selenium测试一下携带含有账号+密码的代理ip,以为和无密码一样简单,但是弄了好久(将近3个小时吧。。。)最后看了很多博客,谷歌百度什么都搜索,最终找到了比较好用的方法,所以呀,有时间遇到问题真的是需要耐心的去找资源,不会就学嘛,肯定有人会,不会搜索,慢慢搜,肯定能找到,如果你正好需要,并且看到我的这个,恭喜你,不用再折磨自己了,我找的都快看了二三十个博客了。。。。

1、无密码(ip+port):

这个是我很久之前看到的,这个添加代理ip,比较简单,一起整理写出来吧,直接把代码复制过来,需要的可以直接复制,改下代理ip就可以用了。

import time

from selenium import webdriver

chromeOptions = webdriver.ChromeOptions()

# 设置代理
chromeOptions.add_argument("--proxy-server=http://ip:port")
# 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
browser = webdriver.Chrome(chrome_options=chromeOptions)

# 查看本机ip,查看代理是否起作用
browser.get("https://www.baidu.com/")
time.sleep(20)
print(browser.page_source)

# 退出,清除浏览器缓存
browser.quit()

2、有账号和密码的代理ip(这个就是我花了几个小时找到一个不错的)

直接把代理复制过来吧,需要的自己拿去用吧

import string
import zipfile
from selenium import webdriver

# 代理服务器(ip+port)
proxyHost = "ip"
proxyPort = "port"
# 代理隧道验证信息(账号+密码)
proxyUser = "user"
proxyPass = "password"


def create_proxy_auth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http', plugin_path=None):
    if plugin_path is None:
        plugin_path = r'{}_{}@http-dyn.dobel.com_9020.zip'.format(proxy_username, proxy_password)

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Dobel Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
        """
        var config = {
            mode: "fixed_servers",
            rules: {
                singleProxy: {
                    scheme: "${scheme}",
                    host: "${host}",
                    port: parseInt(${port})
                },
                bypassList: ["foobar.com"]
            }
          };

        chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

        function callbackFn(details) {
            return {
                authCredentials: {
                    username: "${username}",
                    password: "${password}"
                }
            };
        }

        chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
        );
        """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )

    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path


proxy_auth_plugin_path = create_proxy_auth_extension(
    proxy_host=proxyHost,
    proxy_port=proxyPort,
    proxy_username=proxyUser,
    proxy_password=proxyPass)

option = webdriver.ChromeOptions()
# option.add_argument('--no-sandbox')
# option.add_argument('--disable-gpu')
# option.add_argument("--start-maximized")
option.add_extension(proxy_auth_plugin_path)

# drive = webdriver.Chrome(executable_path="../../config/chromedriver_mac",chrome_options=option)
drive = webdriver.Chrome(chrome_options=option)

# drive.get("http://httpbin.org/ip")
drive.get("https://www.baidu.com/")



print(drive.page_source)

# drive.close()

注意:

substitute方法其实就是字符串拼接的方法,甚至你换成format方法拼接都可以的。

3、对2需要进行了小小的优化(推荐直接使用):

import string
import zipfile

from selenium import webdriver


def create_proxy_auth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http',
                                plugin_path=None):
    if plugin_path is None:
        plugin_path = r'{}_{}@http-dyn.dobel.com_9020.zip'.format(proxy_username, proxy_password)

    manifest_json = """
    {
        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Dobel Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = string.Template(
        """
        var config = {
            mode: "fixed_servers",
            rules: {
                singleProxy: {
                    scheme: "${scheme}",
                    host: "${host}",
                    port: parseInt(${port})
                },
                bypassList: ["foobar.com"]
            }
          };

        chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

        function callbackFn(details) {
            return {
                authCredentials: {
                    username: "${username}",
                    password: "${password}"
                }
            };
        }

        chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
        );
        """
    ).substitute(
        host=proxy_host,
        port=proxy_port,
        username=proxy_username,
        password=proxy_password,
        scheme=scheme,
    )

    with zipfile.ZipFile(plugin_path, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

    return plugin_path


def from_proxy_get_daili(proxy):
    # proxy是这种格式 user:pass@ip:port
    user_pass_str, ip_port_str = proxy.split('@')
    proxyHost, proxyPort = ip_port_str.split(':')
    proxyUser, proxyPass = user_pass_str.split(':')
    return proxyHost, proxyPort, proxyUser, proxyPass


def get_driver(proxy):
    proxyHost, proxyPort, proxyUser, proxyPass = from_proxy_get_daili(proxy)
    proxy_auth_plugin_path = create_proxy_auth_extension(
        proxy_host=proxyHost,
        proxy_port=proxyPort,
        proxy_username=proxyUser,
        proxy_password=proxyPass)
    option = webdriver.ChromeOptions()
    option.add_extension(proxy_auth_plugin_path)
    drive = webdriver.Chrome(chrome_options=option)
    return drive


if __name__ == '__main__':
    # 代理服务器
    proxy = 'user:pass@ip:port'

    drive = get_driver(proxy)
    # drive.get("http://httpbin.org/ip")
    print(drive.page_source)

改进之后,直接传入一个正常的有密码的代理proxy (格式: ‘user:pass@ip:port’),这个样返回一个浏览器对象driver,直接使用即可,不用再把代理一个一个进行分开复制使用。

找资源真的很累人,感觉这个博主。
有秘密的原博客:https://blog.csdn.net/ywdhzxf/article/details/83211631