Jsoup爬虫--使用阿布云代理
代理类:
package com.qianxing.API;import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.AuthCache; import org.apache.http.client.CredentialsProvider; import org.apache.http.client.HttpRequestRetryHandler; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.LayeredConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.impl.auth.BasicScheme; import org.apache.http.impl.client.BasicAuthCache; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import org.springframework.stereotype.Component;import java.io.IOException; import java.util.ArrayList; import java.util.List;@Component public class HttpProxy {// 代理服务器final static String proxyHost = "proxy.abuyun.com";final static Integer proxyPort = 9020;// 代理隧道验证信息final static String proxyUser = "HL9946L10868L0ID";final static String proxyPass = "A329CEFABD0A7067";// IP切换协议头final static String switchIpHeaderKey = "Proxy-Switch-Ip";final static String switchIpHeaderVal = "yes";private static PoolingHttpClientConnectionManager cm = null;private static HttpRequestRetryHandler httpRequestRetryHandler = null;private static HttpHost proxy = null;private static CredentialsProvider credsProvider = null;private static RequestConfig reqConfig = null;static {ConnectionSocketFactory plainsf = PlainConnectionSocketFactory.getSocketFactory();LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory.getSocketFactory();Registry registry = RegistryBuilder.create().register("http", plainsf).register("https", sslsf).build();cm = new PoolingHttpClientConnectionManager(registry);cm.setMaxTotal(20);cm.setDefaultMaxPerRoute(5);proxy = new HttpHost(proxyHost, proxyPort, "http");credsProvider = new BasicCredentialsProvider();credsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(proxyUser, proxyPass));reqConfig = RequestConfig.custom().setConnectionRequestTimeout(20000).setConnectTimeout(20000).setSocketTimeout(20000).setExpectContinueEnabled(false).setProxy(new HttpHost(proxyHost, proxyPort)).build();}public static String doRequest(HttpRequestBase httpReq) {CloseableHttpResponse httpResp = null;int statusCode = 0;try {setHeaders(httpReq);httpReq.setConfig(reqConfig);CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).setDefaultCredentialsProvider(credsProvider).build();AuthCache authCache = new BasicAuthCache();authCache.put(proxy, new BasicScheme());HttpClientContext localContext = HttpClientContext.create();localContext.setAuthCache(authCache);httpResp = httpClient.execute(httpReq, localContext);statusCode = httpResp.getStatusLine().getStatusCode();HttpEntity entity = httpResp.getEntity();String jsonStr = EntityUtils.toString(entity);if(statusCode==200) return jsonStr;else return null;} catch (Exception e) {System.out.println("当前 statusCode --> " + statusCode);System.out.println("ssl问题,重跑一次");return doRequest(httpReq);} finally {try {if (httpResp != null) {httpResp.close();}} catch (IOException e) {e.printStackTrace();}}}/*** 设置请求头** @param httpReq*/private static void setHeaders(HttpRequestBase httpReq) {httpReq.setHeader("Accept-Encoding", null);httpReq.setHeader(switchIpHeaderKey, switchIpHeaderVal);}public static void doPostRequest() {try {// 要访问的目标页面HttpPost httpPost = new HttpPost("https://test.abuyun.com/proxy.php");// 设置表单参数List params = new ArrayList();params.add(new BasicNameValuePair("method", "next"));params.add(new BasicNameValuePair("params", "{\"broker\":\"abuyun\":\"site\":\"https://www.abuyun.com\"}"));httpPost.setEntity(new UrlEncodedFormEntity(params, "utf-8"));doRequest(httpPost);} catch (Exception e) {e.printStackTrace();}}public static String doGetRequest(String targetUrl) {// 要访问的目标页面//String targetUrl = "https://api.douban.com/v2/book/search?tag=小说&start="+start+"&count=100";//String targetUrl = "http://proxy.abuyun.com/switch-ip";//String targetUrl = "http://proxy.abuyun.com/current-ip";try {HttpGet httpGet = new HttpGet(targetUrl);return doRequest(httpGet);} catch (Exception e) {e.printStackTrace();return null;}} }
转载于:https://www.cnblogs.com/yzwhykd/p/6568041.html
Jsoup爬虫--使用阿布云代理相关推荐
- [Python3网络爬虫开发实战] 付费讯代理、阿布云代理的使用
相对免费代理来说,付费代理的稳定性相对更高一点,本节介绍一下爬虫付费代理的相关使用过程. 付费代理分类 在这里将付费代理分为两类: 提供接口获取海量代理,按天或者按量付费,如讯代理 搭建了代理隧道,直 ...
- scrapy——7 scrapy-redis分布式爬虫,用药助手实战,Boss直聘实战,阿布云代理设置...
scrapy--7 什么是scrapy-redis 怎么安装scrapy-redis scrapy-redis常用配置文件 scrapy-redis键名介绍 实战-利用scrapy-redis分布式爬 ...
- 第二十八节:隧道代理阿布云代理
阿布云为我们提供了隧道代理IP的服务,通过阿布云HTTP隧道的动态版可以让我们的爬虫很好的使用动态代理IP 由此可知我们可以得到requests接入代码 1 # -*- coding:utf-8 -* ...
- Scrapy使用阿布云代理
1.打开middlewares.py文件 2.添加代理类 import base64 """ 阿布云代理配置""" proxy_serv ...
- Scrapy + 阿布云代理 + 手动自动配置
代理原理 介绍 代理指的就是代理服务器,英文 叫作 proxy server,它的功能是代理网络用户 去取得网络信 息.形象地说,它是网络信息的中转站(可以直白的理解为中介). 当我们请求一个网站时, ...
- jsoup+蚂蚁代理/阿布云代理
final static String appkey = "xxxxx";//蚂蚁代理key final static String secret = "xxxxxxxx ...
- pyppeteer使用阿布云代理隧道问题以及代码 和代理是否成功 python3 selenium设置代理ip的实现 IP 非真人甑别
python3 selenium模块Chrome设置代理ip的实现 selenium模块Chrome设置代理ip的实现代码: from selenium import webdriver chrome ...
- 阿布云代理的使用方法
import requests HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537 ...
- pyppeteer 使用阿布云代理
proxyHost = 地址proxyPort = 端口代理隧道验证信息proxyServer = "http://" + proxyHost + ":" + ...
最新文章
- Openssl:构建CA的过程并实现web服务基于https访问的网络架构
- php和java融合_JSP、PHP与JAVA融合的开发环境之二
- php鼠标经过显示文本,jQuery实现鼠标单击网页文字后在文本框显示的方法
- HDU 2066 一个人的旅行
- 8255数码管显示0到9_汇编语言--键盘扫描及显示实验(含代码解释)
- Python中的三个特殊函数
- BZOJ 2442: [Usaco2011 Open]修剪草坪( dp )
- java生成world文件_Hello World 项目创建与项目配置文件介绍
- 计算机一级报名照片是几寸的,一级建造师报名照片要几寸的
- 基于Python实现网页版之复杂图片去水印
- PS画实线、虚线箭头
- 【记录贴】Windows连接L2TP
- Gilbert Elliot丢包模型
- python-套接字基础与 UDP 通信
- Implement strStr() -- LeetCode
- 小傻蛋的妹妹跟随小甲鱼学习Python的第二十节020
- mysql拒绝访问root用户_对于出现拒绝访问root用户的解决方案
- Python基础——csv文件中某列数据替换为数字
- matlab语音加入正弦噪声,基于Matlab的语音信号去噪声处理 毕业论文.doc
- kafka关于副本的理解
热门文章
- c语言从入门到入狱段子,坑人的套路一问一答
- 最长对称字符串php_对称子字符串的最大长度
- 安全日志审计系统服务器,日志审计服务器
- 【推荐架构day3】微博推荐引擎的体系结构:技术实现
- SIGAI机器学习算法地图
- 递归算法与两个经典问题:汉诺塔问题和八皇后问题
- s19王者荣耀苹果服务器维护,王者荣耀3月31日更新失败解决方法 S19IOS更新失败怎么办...
- FTP连接 出现200 Type set to A. 227 Entering Passive Mode
- SwitchyOmega 配置前端代理
- 都说码农发展前景不好,那些35岁以上的大龄程序员未来之路在何方?