jsoup教程_2 http-client 讲解
项目源代码 https://gitee.com/fakerlove/jsoup
文章目录
- 2. http-client 讲解
- 2.1 get 请求
- 2.2 get带请求
- 工具类
- 发送请求
- 2.3 Post 请求
- 2.4 Post 带参数
- 2.5 连接池
- 2.6 参数
2. http-client 讲解
2.1 get 请求
请求的网址
https://www.baidu.com
java 代码
package com.ak;import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;//@Slf4j
public class demo {public static void main(String[] args) {// 打开浏览器CloseableHttpClient aDefault = HttpClients.createDefault();// 输入网址 https://movie.douban.com/chartHttpGet httpGet=new HttpGet("https://www.baidu.com");CloseableHttpResponse response=null;try {// 获取响应内容response= aDefault.execute(httpGet);// System.out.println(response);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content.length());}} catch (IOException e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}
2.2 get带请求
请求的网址
https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=电影
工具类
package com.ak.utils;import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;public class HttpUtils {public static PoolingHttpClientConnectionManager cm;public static ArrayList<String> agents;static {// 创建连接池管理器cm = new PoolingHttpClientConnectionManager();// 设置连接数cm.setMaxTotal(100);// 设置每个主机(理解为网站,如:百度10个、网易10个)的最大连接数cm.setDefaultMaxPerRoute(10);//初始化 User-Agent 信息agents = new ArrayList<String>();// 添加 User-Agent 信息agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");agents.add("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");agents.add("Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36");System.out.println("<--------- HttpUtils initialization success --------->");}public static RequestConfig getConfig() {RequestConfig config = RequestConfig.custom()// 创建连接的最长时间.setConnectTimeout(1000)// 获取连接最长时间.setConnectionRequestTimeout(1000)// 数据传输最长时间.setSocketTimeout(10 * 1000).build();return config;}
}
发送请求
package com.ak;import com.ak.utils.HttpUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;/*** 请求内容* 爬取豆瓣的内容,因为豆瓣使用了爬虫所以需要一些工具类*/
public class demo2 {public static void main(String[] args) {// 打开浏览器CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();CloseableHttpResponse response=null;// 设置参数// 分析这个// https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%8A%A8%E4%BD%9C,%E7%94%B5%E5%BD%B1try {URIBuilder uriBuilder = new URIBuilder("https://movie.douban.com/tag/");uriBuilder.setParameter("sort","U");uriBuilder.setParameter("range","0,10");uriBuilder.setParameter("tags","电影");// 输入网址HttpGet httpGet=new HttpGet(uriBuilder.build());httpGet.setConfig(HttpUtils.getConfig());int agentNum = new Random().nextInt(HttpUtils.agents.size());httpGet.addHeader("User-Agent", HttpUtils.agents.get(agentNum));// 获取响应内容response= httpClient.execute(httpGet);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (Exception e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}
爬取
https://www.baidu.com/s&wd=faker
类似于想要查找faker 的信息
package com.ak;import com.ak.utils.HttpUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;/*** 请求内容* 爬取豆瓣的内容,因为豆瓣使用了爬虫所以需要一些工具类*/
public class demo2 {public static void main(String[] args) {// 打开浏览器CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();CloseableHttpResponse response=null;// 设置参数// 分析这个// https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%8A%A8%E4%BD%9C,%E7%94%B5%E5%BD%B1try {// https://movie.douban.com/tag/URIBuilder uriBuilder = new URIBuilder("https://www.baidu.com/s");uriBuilder.setParameter("wd","faker");
// uriBuilder.setParameter("range","0,10");
// uriBuilder.setParameter("tags","电影");// 输入网址HttpGet httpGet=new HttpGet(uriBuilder.build());httpGet.setConfig(HttpUtils.getConfig());int agentNum = new Random().nextInt(HttpUtils.agents.size());httpGet.addHeader("User-Agent", HttpUtils.agents.get(agentNum));// 获取响应内容response= httpClient.execute(httpGet);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (Exception e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}
2.3 Post 请求
请求网址
https://www.baidu.com
java代码
区别在于他们之间的HttpPost和HttpGet
package com.ak;import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;//@Slf4j
public class demo3 {public static void main(String[] args) {// 打开浏览器CloseableHttpClient aDefault = HttpClients.createDefault();// 输入网址 https://movie.douban.com/chartHttpPost httppost=new HttpPost("https://www.baidu.com");CloseableHttpResponse response=null;try {// 获取响应内容response= aDefault.execute(httppost);// System.out.println(response);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (IOException e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}
2.4 Post 带参数
爬取网址
http://yun.itheima.com/search
java 代码
package com.ak;import com.ak.utils.HttpUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;//@Slf4j
public class demo4 {public static void main(String[] args) {CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();CloseableHttpResponse response=null;// 设置参数// 分析这个// https://www.baidu.com/stry {// 封装表单对象List<NameValuePair> params=new ArrayList<>();params.add(new BasicNameValuePair("keys","java"));// 输入网址HttpPost httpPost=new HttpPost("http://yun.itheima.com/search");UrlEncodedFormEntity formEntity=new UrlEncodedFormEntity(params,"utf-8");int agentNum = new Random().nextInt(HttpUtils.agents.size());// 设置配置httpPost.setConfig(HttpUtils.getConfig());// 设置表单内容httpPost.setEntity(formEntity);// 设置代理httpPost.addHeader("User-Agent", HttpUtils.agents.get(agentNum));// 获取响应内容response= httpClient.execute(httpPost);System.out.println(response);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (Exception e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}
2.5 连接池
package com.ak.utils;import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;public class HttpUtils {public static PoolingHttpClientConnectionManager cm;public static ArrayList<String> agents;static {// 创建连接池管理器cm = new PoolingHttpClientConnectionManager();// 设置连接数cm.setMaxTotal(100);// 设置每个主机(理解为网站,如:百度10个、网易10个)的最大连接数cm.setDefaultMaxPerRoute(10);//初始化 User-Agent 信息agents = new ArrayList<String>();// 添加 User-Agent 信息agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");agents.add("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");agents.add("Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36");System.out.println("<--------- HttpUtils initialization success --------->");}/*** 获取页面源代码** @param url 网页链接* @return 页面源代码*/public String doGetHtml(String url) {// 通过连接池获取 httpClientCloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();HttpGet httpGet = new HttpGet(url);// 伪造 User-Agent(反反爬虫)// 生成一个范围在 0-x(不包含x)内的任意正整数int agentNum = new Random().nextInt(agents.size());httpGet.addHeader("User-Agent", agents.get(agentNum));// 设置请求信息httpGet.setConfig(getConfig());// 定义 response,方便 finally 中关闭CloseableHttpResponse response = null;try {response = httpClient.execute(httpGet);// 获取并判断,状态码是否正常(正常值:200)if (response.getStatusLine().getStatusCode() == 200) {// 判断响应体是否为空,不为空则获取内容if (response.getEntity() != null) {// 获取响应体,并指定 UTF-8 编码String content = EntityUtils.toString(response.getEntity(), "utf8");return content;}}} catch (IOException e) {e.printStackTrace();} finally {// 判断并关闭 responseif (response != null) {try {response.close();} catch (IOException e) {e.printStackTrace();}}// 不关闭 httpClient,交给连接池管理}System.out.println("<--------- doGetHtml() ERROR --------->");return "";}public static RequestConfig getConfig() {RequestConfig config = RequestConfig.custom()// 创建连接的最长时间.setConnectTimeout(1000)// 获取连接最长时间.setConnectionRequestTimeout(1000)// 数据传输最长时间.setSocketTimeout(10 * 1000).build();return config;}
}
2.6 参数
public static RequestConfig getConfig() {RequestConfig config = RequestConfig.custom()// 创建连接的最长时间.setConnectTimeout(1000)// 获取连接最长时间.setConnectionRequestTimeout(1000)// 数据传输最长时间.setSocketTimeout(10 * 1000).build();return config;
}
jsoup教程_2 http-client 讲解相关推荐
- Git教程_2 所有操作讲解
https://gitee.com/fakerlove/git 文章目录 2. 所有操作讲解 2.1 创建版本库 2.2 文件的基本操作 1. 创建 2. 添加 3. 提交 2.3 工作区和暂存区 2 ...
- JSOUP 教程—— Java爬虫,简易入门,秒杀htmlparser
转载自 JSOUP 教程-- Java爬虫,简易入门,秒杀htmlparser 关于爬虫,之前一直用做第一个站的时候,记得那时候写的 爬虫 是爬sina 的数据,用的就是 htmlparser 可 ...
- 微信小游戏开发教程-2D游戏原理讲解
微信小游戏开发教程-2D游戏原理讲解 原理 为了更加形象的描述,这里先上一张图: 背景 a. 首先,我们看到背景好像是一张无限长的图片在向下移动.实际则不然,这是一张顶部和底部刚好重叠的图片.这是一种 ...
- 【编程实践】Git命令基础教程和代码实例讲解
Git命令基础教程和代码实例讲解 Git是一个开源的分布式版本控制系统,用于敏捷高效地处理任何或小或大的项目.Git与常用的版本控制工具CVS.Subversion等不同,它采用了分布式版本库的方式. ...
- ps2019布尔运算快捷键_ps教程——布尔运算的操作讲解
原标题:ps教程--布尔运算的操作讲解 首先在PS里按Ctrl+K打开设置面板,选择工具,然后勾选"将适量工具与变化和像素网格对其". 然后选择黑箭头(快捷键A)在勾选对其边缘.大 ...
- jsoup教程_3 Jsoup 讲解
项目源代码 https://gitee.com/fakerlove/jsoup 文章目录 3. Jsoup 讲解 3.1 解析Url 引入依赖 测试 3.2 解析字符串 3.3 解析文件 3.4 使用 ...
- Nacos教程_2 讲解
教程原稿 https://gitee.com/fakerlove/joker-nacos 文章目录 2. 讲解 2.1 入门流程 发布配置 添加依赖 main 函数 远程获取结果 2.2 配置管理 1 ...
- SpringSecurity入门到入土教程_2 Oauth教程
https://gitee.com/fakerlove/spring-security 文章目录 SpringOauth 教程 1. 简介 1.1 oauth2 概念 架构图 验证流程 spring ...
- appium java类库下载,appium 简明教程 (4)——appium client 的安装
appium client 是对 webdriver 原生 api 的一些扩展和封装.它可以帮助我们更容易的写出用例,写出更好懂的用例. appium client 是配合原生的 webdriver ...
最新文章
- 《新一代SDN——VMware NSX 网络原理与实践》——导读
- 【Python】torrentParser1.00
- QByteArray怎么转化为QString类型,并且在QLineEdit上面显示出来
- Java 中按文件名称分类,按文件大小分类,按照文件类型分类,按照最后修改时间分类的工具类
- 面向对象基础知识01
- java abstractrequest,Java AbstractJackson2HttpMessageConverter類代碼示例
- Spring : Spring Aop 创建代理
- 将Visual Studio Code和Windows Subsystem for Linux一起使用
- orm php 自定义,Eloquent ORM 自定义 builder
- php5 mysql一键安装包_常用PHP5运行环境一键安装包
- 耳机煲机软件测试自学,耳机煲机工具Test Tone Generator蹂躏新耳机教程
- C++语言的表达式模板:表达式模板的入门性介绍
- SCI收录期刊——声学学科 (转载)
- ASP.NET Web Pages - 教程
- 微软 MSCRM 教育成功案例 界面展示
- VerilogHDL正弦信号发生器
- CSRNet: Dilated Convolutional Neural Networks for Understanding the Highly Congested Scen 论文阅读
- 安霸Ambarella_海思Hisilicon_AI芯片参数对比
- html实现安卓手机重启,这12行代码分分钟让你电脑崩溃手机重启
- python运维脚本简书_通过python+selenium3实现浏览器刷简书文章阅读量