博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Flask开发系列之Flask+redis实现IP代理池
阅读量:7071 次
发布时间:2019-06-28

本文共 6608 字,大约阅读时间需要 22 分钟。

Flask开发系列之Flask+redis实现IP代理池

 

6.11-6.15号完善...

简易实现版

import requestsimport reimport timeimport redisfrom bloom_filter import BloomFilterimport astpool = redis.ConnectionPool(host='localhost',password='xxx', port=6379, decode_responses=True)r = redis.Redis(connection_pool=pool)bloombloom = BloomFilter(max_elements=10000, error_rate=0.1)bloombloom.add(str({
'http': '117.91.232.53:9999'}))def get_ip(i): ip_list=[] url = 'https://www.kuaidaili.com/free/inha/' url = url + str(i + 1) html = requests.get(url=url, ).text regip = '
(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*?
(\d{1,5})' matcher = re.compile(regip, re.S) ipstr = re.findall(matcher, html) time.sleep(1) for j in ipstr: ip_list.append(j[0] + ':' + j[1]) print('共收集到%d个代理ip' % len(ip_list)) print(ip_list) return ip_listdef valVer(proxys): global badNum,goodNum,good_list good = [] for proxy in proxys: try: proxy_host = proxy protocol = 'https' if 'https' in proxy_host else 'http' proxies = {protocol: proxy_host} print('现在正在测试的IP:', proxies) response = requests.get('http://www.baidu.com', proxies=proxies, timeout=2) if response.status_code != 200: badNum += 1 print(proxy_host, 'bad proxy') else: goodNum += 1 good.append(proxies) good_list.append(proxies) print(proxy_host, 'success proxy') except Exception as e: print(e) # print proxy_host, 'bad proxy' badNum += 1 continue print('success proxy num : ', goodNum) print('bad proxy num : ', badNum) print("这次:",good) print("此时全部:",good_list) return gooddef time_valVer(proxys): good = [] for proxy in proxys: try: print('现在正在定时测试的IP:',proxy) proxy = ast.literal_eval(proxy) response = requests.get('http://www.baidu.com', proxies=proxy, timeout=2) if response.status_code != 200: r.lrem("ip_list", proxy, 1) print(proxy, 'bad proxy') else: good.append(proxy) good_list.append(proxy) print(proxy, 'success proxy') except Exception as e: print(e) continuedef stone(good): for IP in good: if str(IP) in bloombloom: print("%s不能存储,有相同的IP",IP) continue else: print("存储的IP:", IP) bloombloom.add(str(IP)) r.rpush("ip_list", str(IP))if __name__ == '__main__': badNum = 0 goodNum = 0 good_list = [] for i in range(0,10): if i%10 == 0 and i!=0: proxy_list = [] for i in range(0, r.llen("ip_list")): proxy_list.append(r.lindex("ip_list", i)) time_valVer(proxy_list) else: ip_list = get_ip(i) good = valVer(ip_list) stone(good)

 

 

from flask import Flaskimport redis   # 导入redis模块,通过python操作redis 也可以直接在redis主机的服务端操作缓存数据库r = redis.Redis(host='localhost', port=6379,password='xxx',decode_responses=True)app = Flask(__name__)@app.route('/ip/
')def reponse(index): print(index) print(r.lindex("ip_list", index)) return r.lindex("ip_list", index)if __name__ == '__main__': app.run(debug=True)

 

 获取ip:

 改进版

import requestsimport reimport timeimport redisfrom bloom_filter import BloomFilterimport astpool = redis.ConnectionPool(host='localhost',password='XXX', port=6379, decode_responses=True)r = redis.Redis(connection_pool=pool)bloombloom = BloomFilter(max_elements=10000, error_rate=0.1)def get_ip(i):    ip_list=[]    url = 'https://www.kuaidaili.com/free/inha/'    url = url + str(i + 1)    html = requests.get(url=url, ).text    regip = '
(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*?
(\d{1,5})' matcher = re.compile(regip, re.S) ipstr = re.findall(matcher, html) time.sleep(1) for j in ipstr: ip_list.append(j[0] + ':' + j[1]) print('共收集到%d个代理ip' % len(ip_list)) print(ip_list) return ip_listdef valVer(proxys): global badNum,goodNum,good_list good = [] for proxy in proxys: try: proxy_host = proxy protocol = 'https' if 'https' in proxy_host else 'http' proxies = {protocol: proxy_host} response = requests.get('http://www.baidu.com', proxies=proxies, timeout=2) if response.status_code != 200: badNum += 1 else: goodNum += 1 good.append(proxies) good_list.append(proxies) except Exception as e: print(e) badNum += 1 continue print('success proxy num : ', goodNum) print('bad proxy num : ', badNum) print("这次:",good) print("此时全部:",good_list) return gooddef time_valVer(proxys): for proxy in proxys: try: print('现在正在定时测试的IP:',proxy) proxy = ast.literal_eval(proxy) response = requests.get('http://www.baidu.com', proxies=proxy, timeout=2) if response.status_code != 200: r.lrem("ip_list", proxy, 1) except Exception as e: print(e) continuedef stone_redis(good): for IP in good: if str(IP) in bloombloom: print("%s不能存储,有相同的IP",IP) continue else: print("存储的IP:", IP) bloombloom.add(str(IP)) r.rpush("ip_list", str(IP))def init(): for i in range(0, r.llen("ip_list")): print(r.lindex("ip_list", i)) bloombloom.add(r.lindex("ip_list", i))if __name__ == '__main__': badNum = 0 goodNum = 0 good_list = [] init() for i in range(0,10): if i%2 == 0 and i!=0: proxy_list = [] for i in range(0, r.llen("ip_list")): proxy_list.append(r.lindex("ip_list", i)) time_valVer(proxy_list) else: ip_list = get_ip(i) good = valVer(ip_list) stone_redis(good)

 

 

from flask import Flask, abort, request, jsonifyimport redis   # 导入redis模块,通过python操作redis 也可以直接在redis主机的服务端操作缓存数据库r = redis.Redis(host='localhost', port=6379,password='XXX',decode_responses=True)app = Flask(__name__)@app.route('/ip/
', methods=['GET'])def reponse(index): print(index) ip = {
"ip":r.lindex("ip_list", index)} print(r.lindex("ip_list", index)) return jsonify(ip)if __name__ == '__main__': app.run(debug=True)

 

获取ip:

 

转载于:https://www.cnblogs.com/-wenli/p/11002902.html

你可能感兴趣的文章
为 iPhone 和 iPad 自定义网站的主屏幕图标
查看>>
怎样快速搜索自己所需的资料?(90%的人不会使用此方法)[转]
查看>>
【Android】使用SearchView时软键盘不支持actionSearch的问题
查看>>
url请求返回结果测试工具(CURL)
查看>>
虚拟机安装教程
查看>>
java对文件的检索
查看>>
Marquee滚动字幕设置(转)
查看>>
linux系统下调度数据库类型资源库中的kettle job
查看>>
8UFTP
查看>>
VC 2005 解决方案的目录结构设置和管理
查看>>
吾爱论坛浏览器分享
查看>>
java内存模型优化建议
查看>>
解决Ubuntu Kylin 1610安装ANSYS17.2的NVIDIA显卡驱动问题
查看>>
Linux下如何修改Apache根目录
查看>>
JAVA入门[2]-安装Maven
查看>>
什么是回调函数
查看>>
HDU 2588 GCD && GCD问题总结
查看>>
2015年北京大学软件project学科优秀大学生夏令营上机考试---C:单词翻转面试题...
查看>>
cocos2d-x 3.0的坑有哪些
查看>>
awk条件语句
查看>>