java爬虫_从腾讯视频播放界面爬取视频并存到本地

源码如下：

package com.example.demo.test.db;import org.apache.commons.lang.RandomStringUtils;import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;/*** 获取视频接口的json* @author Administrator**/
public class CatchVedio {//    Socket client = new Scoket();private URL url;private HttpURLConnection urlConnection;private int responseCode;private BufferedReader reader;private BufferedWriter writer;public static void main(String[] args) {CatchVedio cv = new CatchVedio();try {String[] VedioURL = cv.get_VedioURL();//接收for(String temp:VedioURL) {//temp是每一个视频的播放地址StringBuilder stringBuilder = new StringBuilder();String filename= RandomStringUtils.randomAlphanumeric(10);String s = stringBuilder.append("D:\\worm\\video").append(File.separator).append(filename).append(".mp4").toString();CatchIMG.getImg(cv.analyse(cv.get_Json(temp)),s);//cv.toDownloadURL(cv.analyse(cv.get_Json(temp)));//写出到文件}} catch (IOException e) {// TODO 自动生成的 catch 块e.printStackTrace();}finally {try {cv.reader.close();cv.writer.close();} catch (IOException e) {// TODO 自动生成的 catch 块e.printStackTrace();}}}void toDownloadURL(String real_url) throws IOException {//将对应下载地址输出到文件this.writer = new BufferedWriter(new FileWriter("D:/worm/downloadURL.txt",true));//定义追加方式写入的流
//        this.writer.append(real_url);this.writer.write(real_url+"\r\n");this.writer.flush();}String analyse(String json) {//分析json,传回完整下载地址int fvkey_index = json.indexOf("\"fvkey\":\"")+9;int endIndex = json.indexOf("\"",fvkey_index);String fvkey = json.substring(fvkey_index,endIndex);//获取到fvkey
//        System.out.println(fvkey);int fn_index = json.indexOf("\"fn\":\"")+6;int fn_end = json.indexOf("\"",fn_index);String fn = json.substring(fn_index,fn_end);//获取到视频文件名
//        System.out.println(fn);String head = "http://ugcws.video.gtimg.com/";StringBuffer real_url = new StringBuffer();real_url.append(head);//加入头部real_url.append(fn+"?");//加入文件名real_url.append("vkey="+fvkey);//加入解锁码/*构造成功*/
//        System.out.println(real_url.toString());return real_url.toString();}String get_Json(String url) throws UnsupportedEncodingException, IOException {String line = "";StringBuffer sb = new StringBuffer();this.url = new URL(url);this.urlConnection = (HttpURLConnection)this.url.openConnection();this.responseCode = this.urlConnection.getResponseCode();if (this.responseCode == 200) {this.reader = new BufferedReader(new InputStreamReader(this.urlConnection.getInputStream(), "UTF-8"));while ((line = this.reader.readLine()) != null) {sb.append(line);// 网页传回的只有一行}return sb.toString();}return "";}String[] get_VedioURL() throws IOException {
//    void get_VedioURL() throws IOException {File file = new File("D:/worm/vedioURL.txt");String line = "";this.reader = new BufferedReader(new FileReader(file));String[] t = new String[0];List<String> container = new ArrayList<String>();while(null!=(line = this.reader.readLine())) {if(line.equals("")) {continue;}line = this.change(line);//转换一下container.add(line);//装入容器}return container.toArray(t);}/*** http://vv.video.qq.com/getinfo?vids=x0164ytbgov&platform=101001&charge=0&otype=json&defn=shd //格式* @param str* @return* https://v.qq.com/x/page/f08302y6rof.html//页面地址示例* https://v.qq.com/x/page/y083158hphd.html* https://v.qq.com/x/page/c08503oe58c.html*/String change(String str) {//定义从页面播放地址获取vid转换到后台接口地址的方法String head = "http://vv.video.qq.com/getinfo?vids=";String tail = "&platform=101001&charge=0&otype=json&defn=shd";String vid = str.substring(str.indexOf("page/")+5,str.indexOf(".html"));return head+vid+tail;}
}

package com.example.demo.test.db;import org.apache.commons.lang.RandomStringUtils;import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;/*** @ClassName: CatchIMG* @Description: 爬取一个指定地址的网络資源* @author penny* @date 2017年12月3日 下午9:00:05**/
public class CatchIMG {/**** @Title: getImg* @Description: 通过一个url 去获取資源* @param @param url 连接地址* @param @throws IOException* @throws*/public static void getImg(String url, String img) throws IOException{long startTime = System.currentTimeMillis();URL imgURL = new URL(url.trim());//转换URLHttpURLConnection urlConn = (HttpURLConnection) imgURL.openConnection();//构造连接urlConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36");urlConn.connect();System.out.println(CatchIMG.class.toString()+":获取连接="+urlConn.getResponseMessage());if(urlConn.getResponseCode()==200){//返回的状态码是200 表示成功InputStream ins = urlConn.getInputStream(); //获取输入流,从网站读取数据到 内存中OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(img)));int len=0;byte[] buff = new byte[1024*10];//10k缓冲流 视你内存大小而定咯while(-1!=(len=(new BufferedInputStream(ins)).read(buff))){//长度保存到len,内容放入到 buffout.write(buff, 0, len);//将图片数组内容写入到图片文件
//              System.out.println(CatchIMG.class.toString()+":"+len+"byte已经写入到文件中，内容:  "+new String(buff));}urlConn.disconnect();ins.close();out.close();//System.out.println(CatchIMG.class.toString()+"：获取图片完成,耗时="+((System.currentTimeMillis()-startTime)/1000)+"s");}}/*** @throws IOException* @Title: main* @Description: 测试方法* @throws*/
//    public static void main(String[] args) throws IOException {
//        //文件名称
//        StringBuilder stringBuilder = new StringBuilder();
//        String filename= RandomStringUtils.randomAlphanumeric(10);
//        String s = stringBuilder.append("F:\\img").append(File.separator).append(filename).append(".avi").toString();
//        CatchIMG.getImg("https://www.imooc.com/video/20607/",s);
//    }}

java爬虫_从腾讯视频播放界面爬取视频并存到本地相关推荐

爬虫概念与编程学习之如何爬取视频网站页面（三）
先看,前一期博客,理清好思路. 爬虫概念与编程学习之如何爬取网页源代码(一) 爬虫概念与编程学习之如何爬取视频网站页面(用HttpClient)(二) 不多说,直接上代码. 编写代码运行 <! ...
java爬虫系列（三）——漫画网站爬取实战
项目地址数据库设计 ORM框架代理浏览器实战目标代码解读 com.ouyang.crawlers.Manhua start() chapterBean() contentBean() 实测启 ...
[完整爬虫]java爬虫基础对36Kr快讯数据进行爬取以及数据筛选过滤
由于九月事件把爬虫推到风口浪尖而我写这些只是分享技术不涉及隐私等个人资料的获取并且是在不会对对方服务器造成压力的情况下进行的爬取特此声明 36Kr 也叫36氪,是一个我非常喜欢的网站,网罗天下 ...
java爬虫案例——SpringBoot使用HttpClient、Jsoup爬取京东手机数据
文章目录前言一.准备工作二.项目文件 1.项目依赖 2.项目配置文件 3.pojo 4.dao接口 5.service接口及其实现类 6.HttpClient封装工具类 7.爬取任务实现 8.启 ...
爬虫概念与编程学习之如何爬取视频网站页面（用HttpClient）（二）
编写代码运行 <!DOCTYPE html><html><head><meta http-equiv="X-UA-Compatible" ...
Python-Selenium爬虫之网易云音乐歌曲歌词爬取并保存到本地（详解分析+代码实现）
文章目录一.项目介绍二.所需技术三.网页分析 3.1 分析一级页面响应内容 3.2 分析二级页面响应内容 3.3 分析三级页面响应内容 3.4 分析四级页面响应内容四.分析小结五.代码实现 ...
Java爬虫代码示例｜入门教程 1- 快速爬取百度美图
文章目录前言源码参考前言说起搜索,大家一定对百度不陌生.每个搜索引擎后边都是一个大型爬虫调度系统. 第一篇我们先以百度为例每天都被它爬,今天我们来爬一下它. 源码 url分析 https: ...
python天眼查爬虫_普通用户的天眼查爬取
[Python] 纯文本查看复制代码#-*- coding:utf-8 -*- import requests from lxml import etree import json import t ...
python网络爬虫_Python网络爬虫——爬取视频网站源视频！
原标题:Python网络爬虫--爬取视频网站源视频! 学习前提 1.了解python基础语法 2.了解re.selenium.BeautifulSoup.os.requests等python第三方库 ...

java爬虫_从腾讯视频播放界面爬取视频并存到本地

java爬虫_从腾讯视频播放界面爬取视频并存到本地相关推荐

最新文章

热门文章