如有侵权,请联系作者删除

水平有限,还望大牛指点

<dependency>    <groupId>org.jsoup</groupId>    <artifactId>jsoup</artifactId>    <version>1.8.3</version></dependency>

import com.sun.tools.doclets.formats.html.SourceToHTMLConverter;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/*** Created with Chenquan.* Description: 淘宝抓包* Date: 2018-12-13* Time: 15:12*/
public class TaobaoCatch {public static void main(String[] args) {int i = 0;/*        String url = "https://acs.m.taobao.com/h5/mtop.taobao.wsearch.h5search/1.0/?jsv=2.3.16&appKey=12574478&t=1545023581359&sign=e3476c9041a75de0a9190da470204d93&api=mtop.taobao.wsearch.h5search&v=1.0&H5Request=true&ecode=1&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22q%22%3A%22%E4%BB%99%E6%B6%B5%E5%86%85%E8%A1%A3%22%2C%22search%22%3A%22%E6%8F%90%E4%BA%A4%22%2C%22tab%22%3A%22all%22%2C%22sst%22%3A%221%22%2C%22n%22%3A20%2C%22buying%22%3A%22buyitnow%22%2C%22m%22%3A%22api4h5%22%2C%22token4h5%22%3A%22%22%2C%22abtest%22%3A%221%22%2C%22wlsort%22%3A%221%22%2C%22page%22%3A1%7D";Connection con = Jsoup.connect(url);con.header("Cookie", "cna=TA+aFFGXQFUCAXQaRYGZVU8Q; t=efa81a9785cd86f885e13998b6d5f9cb; thw=cn; uc3=vt3=F8dByRzMU9X8Hvccr00%3D&id2=W8zLpWipxVFu&nk2=0PLo6GHZOM8%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; tracknick=%5Cu9648%5Cu94E81992; lgc=%5Cu9648%5Cu94E81992; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; enc=4rB%2FfKFx8DJKgPpoHlZjr824CEYw%2BlPaKBDWbFO4fnh6svGA97NoZNGERui4fOo2tXSnSVN1ygkfn5R5ekztTQ%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_1; _m_h5_tk=e501ac7690832934d663aef19ee36be5_1545033419107; _m_h5_tk_enc=5147579a652b4fb508dc886d59c37045; isg=BFVVgDOkpYNz64H7Z31pC9thZFHP-goqhI4h7tf6EUwbLnUgn6IZNGPv_DSYLiEc");
//        con.header("referer", "https://item.taobao.com/item.htm ");Connection.Response resp = null;try {resp = con.method(Connection.Method.GET).ignoreContentType(true).execute();} catch (IOException e) {e.printStackTrace();}String body = resp.body();
//        System.out.println(body);body = body.substring(12, body.length() - 1);JSONObject jb = JSONObject.fromObject(body);JSONArray jsonArray = jb.getJSONObject("data").getJSONArray("listItem");//        while(i<100){i++;for (int j = 0; j < jsonArray.size(); j++) {JSONObject jsonObject = jsonArray.getJSONObject(j);String item_id = jsonObject.getString("item_id");System.out.println("item_id: "+item_id);getAll(item_id);}
//        }*///传链接上的产品idgetAll("577996531297");}public static void getAll(String item_id ) {try {Thread.sleep(2000);//一个休息5s,太快会被禁} catch (InterruptedException e) {e.printStackTrace();}System.out.println("开始时间:" + new Date());Date dateStart = new Date();Document doc = null;String id = "";try {
//            int i = 0;
//            while(i < 10000){
//            i++;String url = "https://item.taobao.com/item.htm?id="+item_id;id = getParamByUrl(url, "id");doc = Jsoup.connect(url).ignoreContentType(true).get();/* String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格String enc = "{\"itemNumId\":\"582061497975\"}";String gbk = URLEncoder.encode(enc, "utf-8");String sds = url + gbk;System.out.println("库存、价格"+sds);doc = Jsoup.connect(sds).ignoreContentType(true).get();*///设置请求头
//                    Connection con = Jsoup.connect(url);
//                    con.header("Cookie", " enc=1LWJWtPGgf6MF1NVsn2rbeRb3%2FU1%2Fk5ZiiedHbVedmxmfvUUWDPmFeyKeLYl7NVchBB19JCIVnX0eFv4otK9HA%3D%3D;" +
//                            "x5sec=7b2264657461696c736b69703b32223a226235653133353933646637396131353230343663346139633633653038326465434c6a4e7a654146454e447739724732716644534b426f4c4f4455774d7a51304e7a4d794f7a453d227d;" );
//                    con.header("referer", "https://item.taobao.com/item.htm ");
//                    Connection.Response resp=con.method(Connection.Method.GET).execute();
//                    Map<String,String> cookies = resp.cookies();
//                    Connection.Request request = con.request();
//                    String body = resp.body();
} catch (IOException e) {e.printStackTrace();}if (doc.baseUri().contains("tmall")) {System.out.println("商品名称:"+ doc.select("h1[data-spm=\"1000983\"]").text());}else {System.out.println("商品名称:" + doc.select("h3[class=\"tb-main-title\"]").text());}Elements imgSrcElement = doc.select("#J_UlThumb > li");for (Element element : imgSrcElement) {String imgSrc = "";if (element.baseUri().contains("tmall")){imgSrc = element.getElementsByTag("img").attr("src");}else{imgSrc = element.getElementsByTag("img").attr("data-src");}imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/", "");imgSrc = imgSrc.substring(0, imgSrc.length() - 10);
//            imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据System.out.println("主图url:" + imgSrc);}// 规格参数Elements selectRules = doc.select(".J_TSaleProp");List<List<String>> liHashMap = new ArrayList<>();for (Element ulElement : selectRules) {String ul = ulElement.getElementsByTag("ul").attr("data-property");System.out.println("ul:" + ul);List<String> liString = new ArrayList<>();for (Element liElement : ulElement.getElementsByTag("li")) {String liDataValue = liElement.getElementsByTag("li").attr("data-value");System.out.println("liDataValue: " + liDataValue);liString.add(liDataValue);String aStyle = liElement.getElementsByTag("a").attr("style");if (StringUtils.isNotBlank(aStyle)) {aStyle = aStyle.replaceAll("background:url\\(", "");aStyle = aStyle.substring(0, aStyle.length() - 29);
//                aStyle = aStyle.replaceAll("_40x40q90.jpg\\) center no-repeat;", "");System.out.println("aStyle: " + aStyle);}String spanText = liElement.getElementsByTag("span").text();if (StringUtils.isNotBlank(spanText)) {System.out.println("spanText: " + spanText);}}liHashMap.add(liString);}List<String> combination = test.combination(liHashMap);//获取价格、库存Elements eles = doc.getElementsByTag("script");for (Element ele : eles) {String s = ele.toString();if (!ele.baseUri().contains("tmall")) {//淘宝String rgex = "";String subUtilSimple = "";if (s.contains("skuMap")) {//获取sku的idrgex = "skuMap(.*?)propertyMemoMap";String skuId = s.replaceAll("\\s*", "");//                System.out.println(s);subUtilSimple = getSubUtilSimple(skuId, rgex);subUtilSimple = subUtilSimple.substring(1, subUtilSimple.length() - 1);//
                    JSONObject jb = JSONObject.fromObject(subUtilSimple);JSONObject finalJb = jb;List<String> skuList = new ArrayList<>();combination.forEach(p->{JSONObject jsonObject = finalJb.getJSONObject(";" + p + ";");if (!jsonObject.isNullObject()) {String o = jsonObject.getString("skuId");System.out.println("sku的id: " + o);skuList.add(o);}});String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格String enc = "{\"itemNumId\":\"" + id + "\"}";String substore = "";String store = "";try {String gbk = URLEncoder.encode(enc, "utf-8");String sds = url + gbk;System.out.println("库存、价格" + sds);doc = Jsoup.connect(sds).ignoreContentType(true).get();store = doc.toString();rgex = "sku2info(.*?)skuItem";substore = getSubUtilSimple(store, rgex);substore = substore.substring(3, substore.length() - 3);String sub = substore.replaceAll("\\\\", "").replaceAll("\\s*", "");JSONObject sb = JSONObject.fromObject(sub);skuList.stream().forEach(p->{if (sb.has(p)) {//判断是否有值,没值不取,不然会报错String string = sb.getString(p);System.out.println("淘宝的价格库存==============" + string);}});} catch (Exception e) {System.out.println("报错的地方store:" + store);
//                        System.out.println("报错的地方substore:" + substore);
                        e.printStackTrace();System.out.println("=====================================程序报错,提前结束===================================================" );return;}}if (s.contains("descUrl") && s.contains("counterApi")) {//                System.out.println(s);//详情链接rgex = "protocol(.*?)desc\\.alicdn\\.com";subUtilSimple = getSubUtilSimple(s, rgex);subUtilSimple = subUtilSimple.substring(14, subUtilSimple.length() - 7);System.out.println("详情链接: " + subUtilSimple);try {doc = Jsoup.connect("http:" + subUtilSimple).get();} catch (IOException e) {e.printStackTrace();}Elements imgDetail = doc.getElementsByTag("img");for (Element element : imgDetail) {String imgSrc = element.getElementsByTag("img").attr("src");//                    imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/","");//                    imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据if (StringUtils.isNotBlank(imgSrc)) {System.out.println("详情图url:" + imgSrc);}}}}else {//天猫的if (s.contains("TShop.Setup")) {//                String rgex = "<bdocid>(.*?)</bdocid>";String rgex = "skuMap(.*?)salesProp";String subUtilSimple = getSubUtilSimple(s, rgex);subUtilSimple = subUtilSimple.substring(2, subUtilSimple.length() - 2);JSONObject jb = JSONObject.fromObject(subUtilSimple);List<String> skuList = new ArrayList<>();combination.forEach(p->{JSONObject jsonObject = jb.getJSONObject(";" + p + ";");if (!jsonObject.isNullObject()) {String skuId = jsonObject.getString("skuId");System.out.println(skuId);skuList.add(skuId);}});//库存、价格String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格String enc = "{\"itemNumId\":\"" + id + "\"}";String substore = "";String store = "";try {String gbk = URLEncoder.encode(enc, "utf-8");String sds = url + gbk;System.out.println("库存、价格" + sds);doc = Jsoup.connect(sds).ignoreContentType(true).get();store = doc.toString();rgex = "sku2info(.*?)skuItem";substore = getSubUtilSimple(store, rgex);substore = substore.substring(3, substore.length() - 3);String sub = substore.replaceAll("\\\\", "").replaceAll("\\s*", "");JSONObject sb = JSONObject.fromObject(sub);skuList.stream().forEach(p->{if (sb.has(p)) {//判断是否有值,没值不取,不然会报错String string = sb.getString(p);System.out.println("天猫的价格库存==============" + string);}});} catch (Exception e) {System.out.println("报错的地方store:" + store);
//                        System.out.println("报错的地方substore:" + substore);
                        e.printStackTrace();System.out.println("=====================================程序报错,提前结束===================================================" );return;}//详情链接rgex = "httpsDescUrl(.*?)fetchDcUrl";subUtilSimple = getSubUtilSimple(s, rgex);subUtilSimple = subUtilSimple.substring(3, subUtilSimple.length() - 3);System.out.println(subUtilSimple);try {doc = Jsoup.connect("http:"+subUtilSimple).get();} catch (IOException e) {e.printStackTrace();}Elements imgDetail = doc.getElementsByTag("img");for (Element element :imgDetail) {String imgSrc = element.getElementsByTag("img").attr("src");
//                    imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/","");
//                    imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据System.out.println("详情图url:"+imgSrc);}break;}}}System.out.println("结束时间:" + new Date());Date dateEnd = new Date();long number = dateEnd.getTime()-dateStart.getTime();//然后在将毫秒转换为date类型就可以了System.out.println("时间差为: "+number/1000);}/*** 返回单个字符串,若匹配到多个的话就返回第一个,方法与getSubUtil一样** @param soap* @param rgex* @return*/public static String getSubUtilSimple(String soap, String rgex) {Pattern pattern = Pattern.compile(rgex);// 匹配的模式Matcher m = pattern.matcher(soap);while (m.find()) {return m.group(1);}return "";}/*** 获取指定url中的某个参数** @param url* @param name* @return*/public static String getParamByUrl(String url, String name) {url += "&";String pattern = "(\\?|&){1}#{0,1}" + name + "=[a-zA-Z0-9]*(&{1})";Pattern r = Pattern.compile(pattern);Matcher m = r.matcher(url);if (m.find()) {
//            System.out.println(m.group(0));return m.group(0).split("=")[1].replace("&", "");} else {return null;}}}


import com.google.gson.JsonObject;
import net.sf.json.JSONObject;import java.util.ArrayList;
import java.util.List;/*** Created with Chenquan.* Description:* Date: 2018-12-16* Time: 10:27*/
public class test {public static void main(String[] args) {List<String> li = new ArrayList<>();li.add("aa");li.add("bb");li.add("cc");List<String> bi = new ArrayList<>();bi.add("ee");bi.add("rr");bi.add("tt");List<String> ci = new ArrayList<>();ci.add("yy");ci.add("uu");ci.add("ii");List<List<String>> list = new ArrayList<>();list.add(li);list.add(bi);list.add(ci);List<String> vv = new ArrayList<>();List<String> combination = combination(list);System.out.println(combination);}/*** 若干个集合元素的组合** @param groups 多个集合* @return 组合结果*/public static List<String> combination(List<List<String>> groups) {if (invalid(groups) || invalid(groups.get(0))) {return null;}List<String> combine = groups.get(0);for (int i = 1; i < groups.size(); i++) {combine = cartesianProduct(combine, groups.get(i));if (combine == null) {return null;}}return combine;}/*** 两个集合元素的组合** @param c1 集合1* @param c2 集合2* @return 组合结果*/public static List<String> cartesianProduct(List<String> c1, List<String> c2) {if (invalid(c1) || invalid(c2)) {return null;}List<String> combine = new ArrayList<>();for (String s : c1) {for (String t : c2) {combine.add(String.format("%s;%s", s, t));//combine.add(String.format("%s%s", t, s));}}return combine;}/*** 验证集合是否无效** @param c 集合* @return true 无效*/private static boolean invalid(List<?> c) {return c == null || c.isEmpty();}}

  

转载于:https://www.cnblogs.com/itchenfirst/p/10131526.html

天猫、淘宝商品详情、库存、价格抓包相关推荐

  1. Python爬虫淘宝商品详情页价格、类似数据

      在讲爬取淘宝详情页数据之前,先来介绍一款 Chrome 插件:Toggle JavaScript (它可以选择让网页是否显示 js 动态加载的内容),如下图所示: 当这个插件处于关闭状态时,待爬取 ...

  2. 淘宝商品详情接口抓取

    听说都2021年了,还有人不知道如何抓取淘宝商品详情信息,看一下下面的代码吧! import requestsdef fetch_mtop_getdetail():#替换日期itemNumId = 6 ...

  3. 淘宝/天猫获得淘宝商品详情高级版 API 返回值说明

    item_get_pro-获得淘宝商品详情高级版 [查看演示] API测试工具 注册开通 onebound.taobao.item_get_pro 公共参数 请求地址: https://api-gw. ...

  4. 万邦淘宝/天猫获得淘宝商品详情 API 返回值说明

    item_get-获得淘宝商品详情 公共参数 请求地址: https://console.open.onebound.cn/console/?i=Anzexi 名称 类型 必须 描述 key Stri ...

  5. 淘宝/天猫API接口,获得淘宝商品详情高级版

    淘宝/天猫获得淘宝商品详情高级版 API 返回值说明      立即测试 item_get_pro-获得淘宝商品详情高级版 公共参数 请求地址: https://console.open.onebou ...

  6. 淘宝/天猫api数据接口,获得淘宝商品详情 API 返回值说明

    立即测试 测试结果: Request address: https://api-gw.onebound.cn/taobao/item_get/?key=& &num_iid=52081 ...

  7. 超稳定的接口——淘宝/天猫获得淘宝商品详情

    item_get-获得淘宝商品详情 注册开通 onebound.taobao.item_get 公共参数 名称 类型 必须 描述 key String 是 调用key(必须以GET方式拼接在URL中) ...

  8. 淘宝商品详情接口,淘宝详情页接口,宝贝详情页接口,商品属性接口,商品信息查询,商品详细信息接口,h5详情,淘宝APP详情

    一.接口参数说明: 提取淘宝商品详情页各项数据,包含skuid.价格.收藏数.加购数.月销售量.主图.标题.详情页图片,点击获取请求key和secret 二.建议使用场景 1.商品销售情况分析,根据销 ...

  9. 通过封装接口实现淘宝商品详情数据上架京东店铺实现商品搬家上货操作实施代码展示

    ​​业务场景:作为全球最大的 B2C 电子商务平台之一,淘宝平台提供了丰富的商品资源,吸引了大量的全球买家和卖家.为了方便开发者接入淘宝平台,淘宝平台提供了丰富的 API 接口,其中商品详情接口是非常 ...

  10. 淘宝商品详情api接口代码对接分享

    一.淘宝商品详情api接口代码对接如下: 1.公共参数: 名称 类型 必须 描述 key String 是 调用key(必须以GET方式拼接在URL中)t secret String 是 调用密钥  ...

最新文章

  1. XenServer和VMware vSphere技术比较
  2. FCOS:全卷积一阶段Anchor Free物体检测器,多种视觉任务的统一框架
  3. Lamp(fastcgi)环境的搭建
  4. 来聊一聊Cookie(小甜饼),及其涉及到的web安全吧
  5. (Z) Linux与BSD之间的10个不同之处
  6. ios开发中计算代码运算时间_理解Unity中的优化(二):内存
  7. 复刻了一个史上最强 Redis 6.0 版本
  8. c语言基础知识孙小红,基于J2EE的C语言在线学习系统的设计与实现
  9. sklearn中的降维算法(PCA)(原理相关)-1
  10. iOS 三步完成购买苹果开发者账号
  11. 鲍威尔c 语言程序,优化设计-鲍威尔法程序(c语言).doc
  12. 瑞萨RH850开发环境搭建
  13. 线面图标设计样式解析
  14. vue 判断页面是首次加载还是刷新
  15. 制造业生产过程中多源异构数据处理方法综述
  16. 无中介租房搜房工具 V1.0
  17. redis manager desktop下载、安装、连接redis教程(官网)
  18. 《三国志·战略版》爆火,友盟+助力手游实现高效促活
  19. FPGA - Zynq - 加载 - BootRom
  20. leetcode之Kth Largest Element in an Array

热门文章

  1. 传手机ODM厂商拿到三星大单 将带动产业链走出寒冬?
  2. OpenGL第十讲——像素图
  3. 网站查询服务器租期,游戏服务器租期
  4. java.exe,javac.exe,javaw.exe 是什么进程? (转载)
  5. 安卓图书信息管理系统
  6. Grad-CAM可视化
  7. 快消品B2B平台的仓储物流模式
  8. Python学好兼职无忧,接单兼职平台曝光,除了主业,副业也能让你月入过万
  9. 软件测试(六)——缺陷以及总结
  10. 00后会不会改变软件测试行业现状?