项目源代码 https://gitee.com/fakerlove/jsoup

文章目录

  • 2. http-client 讲解
    • 2.1 get 请求
    • 2.2 get带请求
      • 工具类
      • 发送请求
    • 2.3 Post 请求
    • 2.4 Post 带参数
    • 2.5 连接池
    • 2.6 参数

2. http-client 讲解

2.1 get 请求

请求的网址

https://www.baidu.com

java 代码

package com.ak;import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;//@Slf4j
public class demo {public static void main(String[] args) {// 打开浏览器CloseableHttpClient aDefault = HttpClients.createDefault();// 输入网址 https://movie.douban.com/chartHttpGet httpGet=new HttpGet("https://www.baidu.com");CloseableHttpResponse response=null;try {// 获取响应内容response= aDefault.execute(httpGet);// System.out.println(response);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content.length());}} catch (IOException e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}

2.2 get带请求

请求的网址

https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=电影

工具类

package com.ak.utils;import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;public class HttpUtils {public static PoolingHttpClientConnectionManager cm;public static ArrayList<String> agents;static  {// 创建连接池管理器cm = new PoolingHttpClientConnectionManager();// 设置连接数cm.setMaxTotal(100);// 设置每个主机(理解为网站,如:百度10个、网易10个)的最大连接数cm.setDefaultMaxPerRoute(10);//初始化 User-Agent 信息agents = new ArrayList<String>();// 添加 User-Agent 信息agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");agents.add("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");agents.add("Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36");System.out.println("<--------- HttpUtils initialization success --------->");}public static RequestConfig getConfig() {RequestConfig config = RequestConfig.custom()// 创建连接的最长时间.setConnectTimeout(1000)// 获取连接最长时间.setConnectionRequestTimeout(1000)// 数据传输最长时间.setSocketTimeout(10 * 1000).build();return config;}
}

发送请求

package com.ak;import com.ak.utils.HttpUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;/*** 请求内容* 爬取豆瓣的内容,因为豆瓣使用了爬虫所以需要一些工具类*/
public class demo2 {public static void main(String[] args) {// 打开浏览器CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();CloseableHttpResponse response=null;// 设置参数// 分析这个// https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%8A%A8%E4%BD%9C,%E7%94%B5%E5%BD%B1try {URIBuilder uriBuilder = new URIBuilder("https://movie.douban.com/tag/");uriBuilder.setParameter("sort","U");uriBuilder.setParameter("range","0,10");uriBuilder.setParameter("tags","电影");// 输入网址HttpGet httpGet=new HttpGet(uriBuilder.build());httpGet.setConfig(HttpUtils.getConfig());int agentNum = new Random().nextInt(HttpUtils.agents.size());httpGet.addHeader("User-Agent", HttpUtils.agents.get(agentNum));// 获取响应内容response= httpClient.execute(httpGet);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (Exception e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}

爬取

https://www.baidu.com/s&wd=faker

类似于想要查找faker 的信息

package com.ak;import com.ak.utils.HttpUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;/*** 请求内容* 爬取豆瓣的内容,因为豆瓣使用了爬虫所以需要一些工具类*/
public class demo2 {public static void main(String[] args) {// 打开浏览器CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();CloseableHttpResponse response=null;// 设置参数// 分析这个// https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%8A%A8%E4%BD%9C,%E7%94%B5%E5%BD%B1try {// https://movie.douban.com/tag/URIBuilder uriBuilder = new URIBuilder("https://www.baidu.com/s");uriBuilder.setParameter("wd","faker");
//            uriBuilder.setParameter("range","0,10");
//            uriBuilder.setParameter("tags","电影");// 输入网址HttpGet httpGet=new HttpGet(uriBuilder.build());httpGet.setConfig(HttpUtils.getConfig());int agentNum = new Random().nextInt(HttpUtils.agents.size());httpGet.addHeader("User-Agent", HttpUtils.agents.get(agentNum));// 获取响应内容response= httpClient.execute(httpGet);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (Exception e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}

2.3 Post 请求

请求网址

https://www.baidu.com

java代码

区别在于他们之间的HttpPost和HttpGet

package com.ak;import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.io.IOException;//@Slf4j
public class demo3 {public static void main(String[] args) {// 打开浏览器CloseableHttpClient aDefault = HttpClients.createDefault();// 输入网址 https://movie.douban.com/chartHttpPost httppost=new HttpPost("https://www.baidu.com");CloseableHttpResponse response=null;try {// 获取响应内容response= aDefault.execute(httppost);// System.out.println(response);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (IOException e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}

2.4 Post 带参数

爬取网址

http://yun.itheima.com/search

java 代码

package com.ak;import com.ak.utils.HttpUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;//@Slf4j
public class demo4 {public static void main(String[] args) {CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();CloseableHttpResponse response=null;// 设置参数// 分析这个// https://www.baidu.com/stry {// 封装表单对象List<NameValuePair> params=new ArrayList<>();params.add(new BasicNameValuePair("keys","java"));// 输入网址HttpPost httpPost=new HttpPost("http://yun.itheima.com/search");UrlEncodedFormEntity formEntity=new UrlEncodedFormEntity(params,"utf-8");int agentNum = new Random().nextInt(HttpUtils.agents.size());// 设置配置httpPost.setConfig(HttpUtils.getConfig());// 设置表单内容httpPost.setEntity(formEntity);// 设置代理httpPost.addHeader("User-Agent", HttpUtils.agents.get(agentNum));// 获取响应内容response= httpClient.execute(httpPost);System.out.println(response);if(response.getStatusLine().getStatusCode()==200){String content = EntityUtils.toString(response.getEntity());System.out.println(content);}} catch (Exception e) {e.printStackTrace();}finally {// 关闭流if(response!=null){try {response.close();} catch (IOException e) {e.printStackTrace();}}}}
}

2.5 连接池

package com.ak.utils;import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;public class HttpUtils {public static PoolingHttpClientConnectionManager cm;public static ArrayList<String> agents;static  {// 创建连接池管理器cm = new PoolingHttpClientConnectionManager();// 设置连接数cm.setMaxTotal(100);// 设置每个主机(理解为网站,如:百度10个、网易10个)的最大连接数cm.setDefaultMaxPerRoute(10);//初始化 User-Agent 信息agents = new ArrayList<String>();// 添加 User-Agent 信息agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");agents.add("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");agents.add("Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36");System.out.println("<--------- HttpUtils initialization success --------->");}/*** 获取页面源代码** @param url 网页链接* @return 页面源代码*/public String doGetHtml(String url) {// 通过连接池获取 httpClientCloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();HttpGet httpGet = new HttpGet(url);// 伪造 User-Agent(反反爬虫)// 生成一个范围在 0-x(不包含x)内的任意正整数int agentNum = new Random().nextInt(agents.size());httpGet.addHeader("User-Agent", agents.get(agentNum));// 设置请求信息httpGet.setConfig(getConfig());// 定义 response,方便 finally 中关闭CloseableHttpResponse response = null;try {response = httpClient.execute(httpGet);// 获取并判断,状态码是否正常(正常值:200)if (response.getStatusLine().getStatusCode() == 200) {// 判断响应体是否为空,不为空则获取内容if (response.getEntity() != null) {// 获取响应体,并指定 UTF-8 编码String content = EntityUtils.toString(response.getEntity(), "utf8");return content;}}} catch (IOException e) {e.printStackTrace();} finally {// 判断并关闭 responseif (response != null) {try {response.close();} catch (IOException e) {e.printStackTrace();}}// 不关闭 httpClient,交给连接池管理}System.out.println("<--------- doGetHtml() ERROR --------->");return "";}public static RequestConfig getConfig() {RequestConfig config = RequestConfig.custom()// 创建连接的最长时间.setConnectTimeout(1000)// 获取连接最长时间.setConnectionRequestTimeout(1000)// 数据传输最长时间.setSocketTimeout(10 * 1000).build();return config;}
}

2.6 参数

 public static RequestConfig getConfig() {RequestConfig config = RequestConfig.custom()// 创建连接的最长时间.setConnectTimeout(1000)// 获取连接最长时间.setConnectionRequestTimeout(1000)// 数据传输最长时间.setSocketTimeout(10 * 1000).build();return config;
}

jsoup教程_2 http-client 讲解相关推荐

  1. Git教程_2 所有操作讲解

    https://gitee.com/fakerlove/git 文章目录 2. 所有操作讲解 2.1 创建版本库 2.2 文件的基本操作 1. 创建 2. 添加 3. 提交 2.3 工作区和暂存区 2 ...

  2. JSOUP 教程—— Java爬虫,简易入门,秒杀htmlparser

    转载自 JSOUP 教程-- Java爬虫,简易入门,秒杀htmlparser 关于爬虫,之前一直用做第一个站的时候,记得那时候写的 爬虫  是爬sina 的数据,用的就是 htmlparser  可 ...

  3. 微信小游戏开发教程-2D游戏原理讲解

    微信小游戏开发教程-2D游戏原理讲解 原理 为了更加形象的描述,这里先上一张图: 背景 a. 首先,我们看到背景好像是一张无限长的图片在向下移动.实际则不然,这是一张顶部和底部刚好重叠的图片.这是一种 ...

  4. 【编程实践】Git命令基础教程和代码实例讲解

    Git命令基础教程和代码实例讲解 Git是一个开源的分布式版本控制系统,用于敏捷高效地处理任何或小或大的项目.Git与常用的版本控制工具CVS.Subversion等不同,它采用了分布式版本库的方式. ...

  5. ps2019布尔运算快捷键_ps教程——布尔运算的操作讲解

    原标题:ps教程--布尔运算的操作讲解 首先在PS里按Ctrl+K打开设置面板,选择工具,然后勾选"将适量工具与变化和像素网格对其". 然后选择黑箭头(快捷键A)在勾选对其边缘.大 ...

  6. jsoup教程_3 Jsoup 讲解

    项目源代码 https://gitee.com/fakerlove/jsoup 文章目录 3. Jsoup 讲解 3.1 解析Url 引入依赖 测试 3.2 解析字符串 3.3 解析文件 3.4 使用 ...

  7. Nacos教程_2 讲解

    教程原稿 https://gitee.com/fakerlove/joker-nacos 文章目录 2. 讲解 2.1 入门流程 发布配置 添加依赖 main 函数 远程获取结果 2.2 配置管理 1 ...

  8. SpringSecurity入门到入土教程_2 Oauth教程

    https://gitee.com/fakerlove/spring-security 文章目录 SpringOauth 教程 1. 简介 1.1 oauth2 概念 架构图 验证流程 spring ...

  9. appium java类库下载,appium 简明教程 (4)——appium client 的安装

    appium client 是对 webdriver 原生 api 的一些扩展和封装.它可以帮助我们更容易的写出用例,写出更好懂的用例. appium client 是配合原生的 webdriver ...

最新文章

  1. 《新一代SDN——VMware NSX 网络原理与实践》——导读
  2. 【Python】torrentParser1.00
  3. QByteArray怎么转化为QString类型,并且在QLineEdit上面显示出来
  4. Java 中按文件名称分类,按文件大小分类,按照文件类型分类,按照最后修改时间分类的工具类
  5. 面向对象基础知识01
  6. java abstractrequest,Java AbstractJackson2HttpMessageConverter類代碼示例
  7. Spring : Spring Aop 创建代理
  8. 将Visual Studio Code和Windows Subsystem for Linux一起使用
  9. orm php 自定义,Eloquent ORM 自定义 builder
  10. php5 mysql一键安装包_常用PHP5运行环境一键安装包
  11. 耳机煲机软件测试自学,耳机煲机工具Test Tone Generator蹂躏新耳机教程
  12. C++语言的表达式模板:表达式模板的入门性介绍
  13. SCI收录期刊——声学学科 (转载)
  14. ASP.NET Web Pages - 教程
  15. 微软 MSCRM 教育成功案例 界面展示
  16. VerilogHDL正弦信号发生器
  17. CSRNet: Dilated Convolutional Neural Networks for Understanding the Highly Congested Scen 论文阅读
  18. 安霸Ambarella_海思Hisilicon_AI芯片参数对比
  19. html实现安卓手机重启,这12行代码分分钟让你电脑崩溃手机重启
  20. python运维脚本简书_通过python+selenium3实现浏览器刷简书文章阅读量

热门文章

  1. Android查看CPU和GPU使用率
  2. Android系统信息获取 之二:版本信息获取
  3. wpf之DragDrop研究
  4. centos6.9终端命令
  5. redis存储对象_redis内存优化总结
  6. 阿里云申请免费ssl证书并配置nginx
  7. Python将彩色图像转为灰度图像
  8. win10 linux子系统开启桌面显示
  9. Uncaught ReferenceError: jie is not defined
  10. php项目网页音乐播放器插件,10个免费开源的JS音乐播放器插件