需求:有时候有些网站的反爬对cookies有限制,
比如亚马逊会结合cookies+ip+user-agent,进行反爬,然后这时候需要找代理ip,然后每次随机一个代理ip,user_agent也要每次随机一个,代理ip高匿性和user-agent,我之前写过,不会的可以参考下面,这次博客主要写我使用selenium批量搜集cookies的方法;
这个是我之前写的user-agent博客,里面有好多我搜集去重之后的,够你用的
https://blog.csdn.net/weixin_42081389/article/details/90291488
ip高匿性进行判断,可以参考我之前的博客,高匿性参考我推荐的方法
(判断高匿代理ip的另外俩个方法(刚找到的,推荐这个))
https://blog.csdn.net/weixin_42081389/article/details/88528285
批量获取cookies代码
本人不喜欢说废话,直接上代码,需要的可以copy走,如果觉得不错或者对你有帮助,给我点个赞哈 (#^.^#)
from selenium import webdriver
def get_driver():
chromeOptions = webdriver.ChromeOptions()
# 设置代理
# ip = '137.59.50.78:8080'
chromeOptions.add_argument("--proxy-server=http://137.59.50.78:8080")
# 不加载图片设置
prefs = {'profile.default_content_setting_values': {'images': 2}}
chromeOptions.add_experimental_option('prefs', prefs)
# 配置参数
browser = webdriver.Chrome(chrome_options=chromeOptions)
browser.implicitly_wait(20)
# 查看本机ip,查看代理是否起作用
return browser
def get_cookies(cookie_dict):
cookie_list = []
# cookie_dict = [{'domain': 'www.amazon.com', 'expiry': 1624241135, 'httpOnly': False, 'name': 'csm-hit', 'path': '/', 'secure': False, 'value': 'tb:s-83ZY0YEFA6K823JDFZ0R|1563761128250&t:1563761135902&adb:adblk_no'}, {'domain': '.amazon.com', 'expiry': 2082787201.750852, 'httpOnly': False, 'name': 'session-token', 'path': '/', 'secure': False, 'value': 'T0RzE0GdgQ7YaoWEZ4BPcs+ac6CstOzAtvEyACFtAYxPyufPdXVEMcC5dYURLBaEv7LIKnyTQWMc6ldHQDH6kPosJkV/Vj7UOEqoYXKOA0U4NVqnvz063mhC6hTDRmhLxbMNRsiqAJo9ql1KMokVNlUtHiJUlYkrS4madKNxZeTfCYmNsxg37FVPyOC1roI6'}, {'domain': '.amazon.com', 'expiry': 2082787201.580912, 'httpOnly': False, 'name': 'session-id', 'path': '/', 'secure': False, 'value': '140-6940904-2207954'}, {'domain': '.amazon.com', 'expiry': 2082787201.580891, 'httpOnly': False, 'name': 'session-id-time', 'path': '/', 'secure': False, 'value': '2082787201l'}, {'domain': '.amazon.com', 'expiry': 2082787201.678984, 'httpOnly': False, 'name': 'x-wl-uid', 'path': '/', 'secure': False, 'value': '1s3BYPM0IPZSIlnsFHdNB2u2u7Iose7A6GR5udQ/qsfwlZHiKok7togAiSq4EnuJQEETkIH4kmBU='}, {'domain': '.amazon.com', 'expiry': 2082787201.580857, 'httpOnly': False, 'name': 'ubid-main', 'path': '/', 'secure': False, 'value': '135-4849511-5650047'}, {'domain': '.amazon.com', 'expiry': 2082787201.982547, 'httpOnly': False, 'name': 'i18n-prefs', 'path': '/', 'secure': False, 'value': 'USD'}]
for i in cookie_dict:
name = i['name']
value = i['value']
cookie_split = "{}={}".format(name, value)
cookie_list.append(cookie_split)
cookie = ';'.join(cookie_list)
return cookie
def run(url_list):
browser = get_driver()
for url in url_list:
browser.get(url)
cookie_dict = browser.get_cookies()
cookie = get_cookies(cookie_dict)
with open('cookies.txt', 'a', encoding='utf-8') as file_write_cookie:
file_write_cookie.write(cookie + '\n')
print(cookie)
if __name__ == '__main__':
with open('detail_url.txt', 'r', encoding='utf-8') as file_read_url:
ret = file_read_url.readlines()
url_list = (i.strip() for i in ret)
run(url_list)