/// <summary>/// 根据页面内容获取宝贝信息/// </summary>/// <param name="GoodPageStr"></param>/// <param name="Sku0"></param>/// <param name="IsGoodId_Code"></param>/// <param name="IsColor_SkuPic"></param>/// <returns></returns>public Dictionary<string, string> Get_ItemInfo(string GoodPageStr, int Sku0, int IsGoodId_Code, int IsColor_SkuPic){string keyWord = "";string goodID = "";string split1 = "*M*-*FG2*";string split2 = "*M*-*FG3*";Dictionary<string, string> SizeIDs = new Dictionary<string, string>(); //保存尺码IDDictionary<string, string> ColorIDs = new Dictionary<string, string>();//保存颜色IDDictionary<string, string> Prices = new Dictionary<string, string>();//SKU价格Dictionary<string, string> PromoPrices = new Dictionary<string, string>();//SKU折扣价格Dictionary<string, string> SellStock = new Dictionary<string, string>();//SKU销售库存Dictionary<string, string> result = new Dictionary<string, string>();List<string> rStr = null;List<string> rStr1 = null;List<string> rStr2 = null;List<string> rStr3 = null;#region 获取商品标题if (GoodPageStr.IndexOf("此宝贝已下架") > 0){//下架商品不存在result.Add("商品存在", "0");return result;}string beginStr = "查看宝贝标题";string endStr = "</h3>";rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr.Count > 0){beginStr = "data-title=";endStr = ">";rStr = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 1, false, false, false, "", "", "\"", "");string title = rStr[0];result.Add("商品存在", "1");result.Add("商品标题", title);}else{//读取不到标题商品不存在result.Add("商品存在", "0");return result;}beginStr = "data-catid=\"";endStr = "\"";rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr.Count > 0){result.Add("TaobaoCateID", rStr[0]);}#endregion#region 获取展示视频//有问题,暂时获取不到//beginStr = "<video ";//endStr = ">";//rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");//if (rStr != null && rStr.Count > 0)//{//    beginStr = "src=\"";//    endStr = "\"";//    rStr1 = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");//    if (rStr1 != null && rStr1.Count > 0)//    {//        result.Add("[展示视频]:"+ rStr1);//    }//}#endregion#region 获取商品图片beginStr = "<ul id=\"J_UlThumb\" class=\"tb-thumb tb-clearfix\">";endStr = "</ul>";if (rStr != null && rStr.Count > 0){rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");beginStr = "<img data-src=\"";endStr = "\"";rStr = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "", "_50x50.jpg", "");string pics = "";foreach (string s in rStr){if (pics != ""){pics += split1 + (s.IndexOf("//")==0? "http:" + s:s);}else{pics = (s.IndexOf("//") == 0 ? "http:" + s : s);}}result.Add("商品主图", pics);}#endregion#region 获取商品颜色beginStr = "<ul data-property=\"";endStr = "\"";rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 0, true, false, false, "", "", "", "");if (rStr != null && rStr.Count > 0){foreach (string prop in rStr){if (prop.Length > 10){continue;}beginStr = "<ul data-property=\"" + prop + "\" class=\"J_TSaleProp tb-img tb-clearfix\">";endStr = "</ul>";rStr1 = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr1 != null && rStr1.Count > 0){beginStr = "<li ";endStr = "</li>";rStr2 = BaseTools.str_FindStr(rStr1[0], beginStr, endStr, 0, false, false, false, "", "", "", "");string Colors = "";foreach (string ss in rStr2){//取颜色名beginStr = "<span>";endStr = "</span>";string color = BaseTools.str_FindStr(ss, beginStr, endStr, 0, false, false, false, "", "", "", "")[0];//取图片beginStr = "background:url(";endStr = "_30x30.jpg)";rStr3 = BaseTools.str_FindStr(ss, beginStr, endStr, 0, false, false, false, "", "", "", "");//拼字符串if (Colors != ""){Colors += split1 + color;if (rStr3 != null && rStr3.Count > 0){Colors += split2 + rStr3[0];}}else{Colors = color;if (rStr3 != null && rStr3.Count > 0){Colors += split2 + rStr3[0];}}//取颜色对应SKUbeginStr = "data-value=\"";endStr = "\"";string skuID = BaseTools.str_FindStr(ss, beginStr, endStr, 0, false, false, false, "", "", "", "")[0];ColorIDs.Add(skuID, color);}//foreach (string ss in rStr2)result.Add(prop, Colors);}//if (rStr != null && rStr.Count > 0)}//foreach (string prop in rStr)}//if (rStr != null && rStr.Count > 0)#endregion#region 获取商品尺码beginStr = "<ul data-property=\"";endStr = "\" class=\"J_TSaleProp tb-clearfix\">";rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 0, true, false, false, "", "", "", "");if (rStr != null && rStr.Count > 0){foreach (string prop in rStr){if (prop.Length > 10){continue;}beginStr = "<ul data-property=\"" + prop + "\" class=\"J_TSaleProp tb-clearfix\">";endStr = "</ul>";rStr1 = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");beginStr = "<li ";endStr = "</li>";rStr2 = BaseTools.str_FindStr(rStr1[0], beginStr, endStr, 0, false, false, false, "", "", "", "");string Sizes = "";foreach (string s in rStr2){//取尺码名beginStr = "<span>";endStr = "</span>";string size = BaseTools.str_FindStr(s, beginStr, endStr, 0, false, false, false, "", "", "", "")[0];//拼字符串if (Sizes != ""){Sizes += split1 + size;}else{Sizes = size;}//取尺码对应SKUbeginStr = "data-value=\"";endStr = "\">";string skuID = BaseTools.str_FindStr(s, beginStr, endStr, 0, false, false, false, "", "", "", "")[0];SizeIDs.Add(skuID, size);}result.Add(prop, Sizes);//获取尺码IDs}}#endregion          #region 获取商品属性beginStr = "<ul class=\"attributes-list\">";endStr = "</ul>";rStr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr != null && rStr.Count > 0){beginStr = ">";endStr = "</li>";rStr = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "", "&nbsp;", "");string attribute = "";foreach (string s in rStr){if (attribute != ""){attribute += split1 + s;}else{attribute = s;}}result.Add("商品属性", attribute);}#endregion#region 获取商品详情介绍图beginStr = "var g_config = {";endStr = "}";string Strr = BaseTools.str_FindStr(GoodPageStr, beginStr, endStr, 1, true, false, false, "", "", "", "")[0];Strr = Strr.Replace(" ", "");beginStr = "itemId:'";endStr = "',";rStr1 = BaseTools.str_FindStr(Strr, beginStr, endStr, 1, false, false, false, "", "", "", "");result.Add("商品货号", rStr1[0]);goodID = rStr1[0];//淘宝的商品详情介绍图是后期动态加载的,先获取它的链接beginStr = "location.protocol==='http:'?'";endStr = "'";rStr1 = BaseTools.str_FindStr(Strr, beginStr, endStr, 1, false, false, false, "", "", "", "");string url = "http:" + rStr1[0];//打开详情介绍图页面string PageContext = this.OpenUrl(url, "Get", "", false, null, Encoding.GetEncoding("gbk"));beginStr = "src=\"";endStr = "\"";rStr1 = BaseTools.str_FindStr(PageContext, beginStr, endStr, 0, false, false, false, "", "", "", "");string imgs = "";foreach (string s in rStr1){if (imgs != ""){imgs += split1 + s;}else{imgs = s;}}result.Add("商品详情图", imgs);#endregion#region SkuMapurl = "https://detailskip.taobao.com/service/getData/1/p2/item/detail/sib.htm?itemId=" + goodID + "&modules=dynStock,price,originalPrice,xmpPromotion";//打开SkuMap数据Dictionary<string, string> heards = new Dictionary<string, string>();heards.Add("Referer", "https://item.taobao.com/item.htm?id=" + goodID);PageContext = this.OpenUrl(url, "Get", "", true, heards, Encoding.GetEncoding("gbk"));PageContext = PageContext.Replace("\\\"", "\"").Replace("\\\\", "\\").Replace("\\/", "/");PageContext = BaseTools.UnicodeReplace(PageContext);//验证if (PageContext.IndexOf("\"code\":0,\"message\":\"SUCCESS\"}") >= 0){//获取价格部分beginStr = "\"originalPrice\"";endStr = "\"dynStock\"";rStr = BaseTools.str_FindStr(PageContext, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr != null && rStr.Count > 0){//区间价beginStr = "\"def\":{\"price\":\"";endStr = "\"}";rStr1 = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "", "", "");result.Add("原价", rStr1[0]);//原价beginStr = "\";";endStr = "}";rStr1 = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "", "", "");foreach (string s in rStr1){string skuId1 = s.Substring(0, s.IndexOf(";\""));int begin = s.IndexOf("price\":\"") + 8;int end = s.IndexOf("\"", begin);string price = s.Substring(begin, end - begin);Prices.Add(skuId1, price);}}//if (rStr != null && rStr.Count > 0)//获取库存部分beginStr = "\"sku\"";endStr = "\"promotion\"";rStr = BaseTools.str_FindStr(PageContext, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr != null && rStr.Count > 0){beginStr = "\";";endStr = "}";rStr1 = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "}", "", "");foreach (string s in rStr1){string skuId2 = s.Substring(0, s.IndexOf(";\""));//int begin = s.IndexOf(":{");//int end = s.IndexOf("}", begin);//string saleInfo = s.Substring(begin, end - begin);//saleInfo = saleInfo.Replace("\"holdQuantity\"", "持有数量");//saleInfo = saleInfo.Replace("\"oversold\"", "超卖");//saleInfo = saleInfo.Replace("\"sellableQuantity\"", "销售数量");//saleInfo = saleInfo.Replace("\"stock\"", "库存");keyWord = "\"stock\":";int begin = s.IndexOf(keyWord);if (begin == -1) continue;begin = begin + keyWord.Length;int end = s.IndexOf("}", begin);string saleInfo = s.Substring(begin, end - begin);SellStock.Add(skuId2, saleInfo);}}//if (rStr != null && rStr.Count > 0)//获取折扣价部分beginStr = "\"promotion\":{\"promoData\":{";endStr = "\"saleDetailMap\"";rStr = BaseTools.str_FindStr(PageContext, beginStr, endStr, 1, false, false, false, "", "", "", "");if (rStr != null && rStr.Count > 0){//折扣区间价beginStr = "\"def\":";endStr = "}]";rStr1 = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "", "", "");if (rStr1 != null && rStr1.Count > 0){keyWord = "\"price\":\"";int begin = rStr1[0].IndexOf(keyWord);if (begin > -1){begin = begin + keyWord.Length;int end = rStr1[0].IndexOf("\",", begin);string saleInfo = rStr1[0].Substring(begin, end - begin);result.Add("商品价格", saleInfo);}}//折扣价beginStr = "\";";endStr = "}]";rStr1 = BaseTools.str_FindStr(rStr[0], beginStr, endStr, 0, false, false, false, "", "}", "", "");foreach (string s in rStr1){string skuId3 = s.Substring(0, s.IndexOf(";\""));keyWord = "\"price\":\"";int begin = s.IndexOf(keyWord);if (begin == -1) continue;begin = begin + keyWord.Length;int end = s.IndexOf("\",", begin);string saleInfo = s.Substring(begin, end - begin);PromoPrices.Add(skuId3, saleInfo);}}//if (rStr != null && rStr.Count > 0)string skuStr = "";foreach (string skuid in Prices.Keys){string[] ids = skuid.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);//1SKU代码(颜色代码+尺码代码) skuStr += skuid + split2;//2颜色 3尺码 if (ids != null && ids.Count() > 0){if (ColorIDs.Count > 0 && ColorIDs.ContainsKey(ids[0])){skuStr += ColorIDs[ids[0]] + split2;}else if(ColorIDs.Count > 0 && ColorIDs.ContainsKey(ids[1])){skuStr += ColorIDs[ids[1]] + split2;}if (SizeIDs.Count > 0 && SizeIDs.ContainsKey(ids[0])){skuStr += SizeIDs[ids[0]] + split2;}else if (SizeIDs.Count > 0 && SizeIDs.ContainsKey(ids[1])){skuStr += SizeIDs[ids[1]] + split2;}//skuStr += ColorIDs[ids[1]] + split2 + SizeIDs[ids[0]] + split2;}else{skuStr += "--" + split2 + "--" + split2;}//4SKU图skuStr += "--" + split2;//5商品价格if (PromoPrices.Count > 0 && PromoPrices.ContainsKey(skuid)){skuStr += PromoPrices[skuid] + split2;}else{skuStr += Prices[skuid] + split2;}//6商品原价skuStr += Prices[skuid] + split2;//7商品库存if (SellStock.Count > 0 && SellStock.ContainsKey(skuid)){skuStr += SellStock[skuid] + split2;}else{skuStr += "-" + split2;}//8SKU编码skuStr += goodID + split2;//9上下架skuStr += "1";skuStr += split1 + "\r\n";}result.Add("商品SKU图构造", skuStr);}//if (PageContext.IndexOf("\"code\":0,\"message\":\"SUCCESS\"}") >= 0)#endregionreturn result;}

抓取淘宝商品信息的C#代码相关推荐

  1. python获取登录按钮_Python:Selenium模拟Chrome浏览器抓取淘宝商品信息

    对于采用异步加载技术的网页,有时候想通过逆向工程的方式来设计爬虫进行爬取会比较困难,因此,要想通过python获取异步加载数据往往可以使用Selenium模拟浏览器的方式来获取. Selenium是一 ...

  2. 抓取淘宝商品信息并制作商品信息比价表(以口红为例)

    快速抓取淘宝上口红信息 import requests import re import os def getHtmlText(url): try: r=requests.get(url,timeou ...

  3. 使用selenium+Chrome()无图版模拟浏览器进行抓取淘宝商品信息

    说起淘宝,大家肯定先想起来的是各种各样的吃的喝的玩的,那么什么样的吃的喝的玩的销量高呢,有没有一种方法将商品信息抓下来我们做一个参考呢,下边就为大家带来我之前在崔庆才大神的参考下,将代码改写了,相对没 ...

  4. 爬虫学习笔记——Selenium爬取淘宝商品信息并保存

    在使用selenium来模拟浏览器操作,抓取淘宝商品信息前,先完成一些准备工作. 准备工作:需要安装selenium,pyquery,以及Chrome浏览器并配置ChromeDriver. 安装sel ...

  5. 3.使用Selenium模拟浏览器抓取淘宝商品美食信息

    # 使用selenium+phantomJS模拟浏览器爬取淘宝商品信息 # 思路: # 第一步:利用selenium驱动浏览器,搜索商品信息,得到商品列表 # 第二步:分析商品页数,驱动浏览器翻页,并 ...

  6. 利用Selenium爬取淘宝商品信息

    文章来源:公众号-智能化IT系统. 一.  Selenium和PhantomJS介绍 Selenium是一个用于Web应用程序测试的工具,Selenium直接运行在浏览器中,就像真正的用户在操作一样. ...

  7. 采集淘宝API数据,抓取淘宝商品资料无需申请appkey

    为了进行淘宝的API开发,首先我们需要做下面几件事情. 1)开发者注册一个账号 2)然后为每个淘宝应用注册一个应用程序键(App Key) . 3)下载淘宝API的SDK并掌握基本的API基础知识和调 ...

  8. 江湖小白之一起学Python (五)爬取淘宝商品信息

    趁热需打铁,随着这几天的鸡血澎湃,我们来实现一下爬取淘宝商品信息,我记得几年前曾用python写了下抓取淘宝天猫,京东,拍拍的爬虫,专门采集商品信息,图片,评论及评论图片,我还用pyqt开发了个客户端 ...

  9. 爬取淘宝商品信息selenium+pyquery+mongodb

    ''' 爬取淘宝商品信息,通过selenium获得渲染后的源码,pyquery解析,mongodb存储 '''from selenium import webdriver from selenium. ...

最新文章

  1. win10添加java路径_小编解惑win10系统设置java路径变量的还原方案
  2. 多进程与多线程的区别 - jihite
  3. Java Byte类的compareTo()方法和示例
  4. 陶晶驰stm32_陶晶驰串口屏学习日记(1)
  5. Java番外篇1——正则表达式
  6. CVPR2021 | 用Transformers无监督预训练进行目标检测
  7. ONLY三行脚本, SQL数据恢复到指定时间点
  8. 同济大学 线性代数 第六版 pdf_线性代数同济第六版第五章课后习题答案!
  9. Silverlight 2应用程序中XAP文件揭秘
  10. OPPOA83_OPPOA83T_官方线刷包_救砖包_解账户锁
  11. 色彩处理的5大颜色空间和区别
  12. web如何加入视频?video
  13. C# Web页面打印网页
  14. tmux 使用鼠标上下滚动
  15. 微服务组件之限流器与熔断器
  16. AE开发 遇到未能加载文件或程序集的问题 FileNotFoundException
  17. java Swing中JTextField自动补全功能例子
  18. 两种降维方法的总结——LLE(局部线性嵌入),ISOMAP(等度量映射)
  19. 初学者怎么学java编程
  20. SAP-abap学习记录(21)

热门文章

  1. python基础运算符讲解(原码、补码、反码)
  2. 在线导入Excel自定义报表,助力快速攻克金融系统开发难点
  3. 用matlab在RGB三色背景图上生成随机的点或线
  4. Java:Excel模板下载
  5. java爬虫框架哪个好_java爬虫框架的使用
  6. C语言0基础全面教程
  7. Unity | VS2019中代码颜色的更改
  8. TCP/IP协议 | 四层模型
  9. 兄弟7360清零后无法传真、扫描的故障,变成英文
  10. python爬取图片并写入excel