winform抓取淘宝宝贝详细页的上下架时间等信息

在界面上拖拉几个界面，如下图所示：

后台编码：

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Text.RegularExpressions;public partial class Form9 : Form{private List<Product> proList = new List<Product>();public Form9(){InitializeComponent();this.dgResult.Columns["StartTime"].DefaultCellStyle.Format = "yyyy-MM-dd hh:mm:ss";this.dgResult.Columns["EndTime"].DefaultCellStyle.Format = "yyyy-MM-dd hh:mm:ss";}/// <summary>/// 将Unix时间戳转换为DateTime类型时间(Unix时间戳指是从1970年1月1日（UTC/GMT的午夜）开始所经过的秒数)/// </summary>/// <param name="d">double 型数字</param>/// <returns>DateTime</returns>public System.DateTime UnixToDateTime(long d){System.DateTime time = System.DateTime.MinValue;System.DateTime startTime = TimeZone.CurrentTimeZone.ToLocalTime(new System.DateTime(1970, 1, 1));time = startTime.AddSeconds(d);return time;}/// <summary>/// 提取字符串里面的Unix时间戳/// </summary>/// <param name="input"></param>/// <param name="pattern"></param>/// <returns></returns>private long GetUnixTick(string input, string pattern){long result = 0;Regex rx = new Regex(pattern, RegexOptions.IgnoreCase);MatchCollection mc = rx.Matches(input);string value = mc[0].Value;Int64.TryParse(value, out result);if (result > 0)//1401119998000需要去除三个0result = result / 1000;return result;}      private void btnQuery_Click(object sender, EventArgs e){string input = txtInput.Text.Trim();string[] arrUrl = Regex.Split(input, "\\s+");foreach (string url in arrUrl){string html = Utils.GetHtmlSource(url, Encoding.GetEncoding("GBK"));if (string.IsNullOrEmpty(html))               continue;               HtmlNode rootNode = null;HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument();document.LoadHtml(html);rootNode = document.DocumentNode;string unixStr = GetNodeAttr(rootNode, "//button[@id='J_listBuyerOnView']");if (string.IsNullOrEmpty(unixStr))               continue;//上架时间long unixTickStart = GetUnixTick(unixStr, "(?<=starts=)(.*?)(?=&item_id)");DateTime dtStart = UnixToDateTime(unixTickStart);//下架时间long unixTickEnd = GetUnixTick(unixStr, "(?<=ends=)(.*?)(?=&starts)");DateTime dtEnd = UnixToDateTime(unixTickEnd);//标题string title = GetNodeText(rootNode, "//title");//价格decimal price = 0;string priceTmp = GetNodeText(rootNode, "//em[@class='tb-rmb-num']");decimal.TryParse(priceTmp, out price);proList.Add(new Product(){Url = url,Title = title,Price = price,StartTime = dtStart,EndTime = dtEnd});}if (proList.Count == 0){MessageBox.Show("没有找到符合条件的数据，输入网址是否正确？");return;}var bindingList = new BindingList<Product>(proList);var source = new BindingSource(bindingList, null);dgResult.DataSource = source;            }private string GetNodeAttr(HtmlNode rootNode, string path){HtmlNode temp = rootNode.SelectSingleNode(path);if (temp != null)return temp.Attributes["data-api"].Value;return "";}private string GetNodeText(HtmlNode rootNode, string path){HtmlNode temp = rootNode.SelectSingleNode(path);if (temp != null)return temp.InnerText;return "";}private void btnClear_Click(object sender, EventArgs e){proList.Clear();var bindingList = new BindingList<Product>(proList);var source = new BindingSource(bindingList, null);dgResult.DataSource = source;}}public class Product{public string Url { get; set; }public string Title { get; set; }public decimal Price { get; set; }public DateTime StartTime { get; set; }public DateTime EndTime { get; set; }}

运行结果如下：

winform抓取淘宝宝贝详细页的上下架时间等信息相关推荐

scrapy抓取淘宝女郎
scrapy抓取淘宝女郎准备工作首先在淘宝女郎的首页这里查看,当然想要爬取更多的话,当然这里要查看翻页的url,不过这操蛋的地方就是这里的翻页是使用javascript加载的,这个就有点尴尬了,找 ...
Python爬虫实战八之利用Selenium抓取淘宝匿名旺旺
其实本文的初衷是为了获取淘宝的非匿名旺旺,在淘宝详情页的最下方有相关评论,含有非匿名旺旺号,快一年了淘宝都没有修复这个. 很多人学习python,不知道从何学起. 很多人学习python,掌握了基本语 ...
scrapy抓取淘宝女郎 1
scrapy抓取淘宝女郎准备工作首先在淘宝女郎的首页这里查看,当然想要爬取更多的话,当然这里要查看翻页的url,不过这操蛋的地方就是这里的翻页是使用javascript加载的,这个就有点尴尬了,找 ...
python3 爬虫实战案例（抓取淘宝信息）（淘宝加了搜索必须登录的验证，此方法所到的结果都是0）
需求:对比足球,篮球,乒乓球,羽毛球,网球,相关物品的销售量保存到excle中和抓取淘宝关键字相关信息的销售量,这和之前抓取csdn网站浏览量取不同,抓取csdn浏览量主要是通过bs4Tag标签,而 ...
Python爬虫实战（4）：抓取淘宝MM照片
Python爬虫入门(1):综述 Python爬虫入门(2):爬虫基础了解 Python爬虫入门(3):Urllib库的基本使用 Python爬虫入门(4):Urllib库的高级用法 Python爬虫 ...
python爬虫淘宝视频_Python2爬虫：以抓取淘宝MM为例（实战）
本篇目标 1.抓取淘宝MM的姓名,头像,年龄 2.抓取每一个MM的资料简介以及写真图片 3.把每一个MM的写真图片按照文件夹保存到本地 4.熟悉文件保存的过程 1.URL的格式在这里我们用到的URL ...
Python爬虫实战：抓取淘宝MM照片
本篇目标 1.抓取淘宝MM的姓名,头像,年龄 2.抓取每一个MM的资料简介以及写真图片 3.把每一个MM的写真图片按照文件夹保存到本地 4.熟悉文件保存的过程 1.URL的格式在这里我们用到的URL ...
抓取淘宝司法拍卖数据
抓取淘宝司法拍卖数据之前在某平台看到一些人发布需求,需要爬取淘宝司法拍卖的数据.在这里给大家分享一下,有需要的就直接复制我的代码,粘贴回去就可以直接用了,今天下午才用了的是可以完整的抓取想要的所有数 ...
python爬虫抓收费图片_简单的抓取淘宝图片的Python爬虫
写了一个抓taobao图片的爬虫,全是用if,for,while写的,比较简陋,入门作品. 从网页http://mm.taobao.com/json/request_top_list.htm?type ...

winform抓取淘宝宝贝详细页的上下架时间等信息

winform抓取淘宝宝贝详细页的上下架时间等信息相关推荐

最新文章

热门文章