源码以及最新稳定爬全国移动,联通详单账单软件购买 !请点击此!

整体效果如下:

所有运营商抓取到的数据都放到了一个库的三个表里面,后期做数据分析用。

下面分享几个核心的源代码给 正在研究这个的朋友们。

简单架构:

爬虫核心代码:

代码有些乱,基本把整个联通上的数据都能抓全了,大家自己优化代码把。

(原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)

using Crawler.Common;
using Crawler.Interface;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using YXRepository.Log;
using YXRepository.Model;namespace Crawler {public class CrawlerUNC : CrawlerBase, ICrawler {HttpHelperNew hhn;static IList<String> loglist;private string currentUVC {get {return HttpHelperNew.UNCuacverifykey;}}/// <summary>/// 中国联通 初始化链接/// </summary>public CrawlerUNC(string number, string pwd) {hhn = new HttpHelperNew();loglist = new List<string>();currentPhoneNumber = number;currentPhoneServicePwd = pwd;loginIsNeedVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CheckNeedVerify";loginVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CreateImage";loginToVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CtaIdyChk";}public bool IsLoginNeedVerify() {string _url = loginIsNeedVerifyImgRequestUrl;string queryData = "callback=jQuery17205245009011952871_" + TimeStamp.GetTimeStamp_13() + "&userName="+currentPhoneNumber+"&pwdType=01&_="+ TimeStamp.GetTimeStamp_13();string retString = hhn.HttpGet(_url, queryData, HttpForType.联通);//添加日志记录:CollectJsonLog(_url,queryData,0,retString);//
            return retString.Contains(@"""resultCode"":""false""") ? false : true;  }/// <summary>/// 联通登录/// </summary>/// <param name="imgcode"></param>/// <returns></returns>public bool IsLoginImgVerifyOk(string imgcode) {currentLoginImgCode = imgcode;string _url = loginToVerifyImgRequestUrl;string queryData = "callback=jQuery17208163765012834383_1463034583178&verifyCode=" + currentLoginImgCode + "&verifyType=1&_=1463034805373";string retString = hhn.HttpGet(_url, queryData);//添加日志记录:CollectJsonLog(_url, queryData, 0, retString);//
            return retString.Contains(@"""resultCode"":""true""") ? true : false;}public string GetLoginImg() {loginVerifyImgStream = string.Empty;string queryData = "t=1463034742570";string part1 = "data:image/png;base64,";string part2 = "";//添加日志记录:CollectJsonLog(loginVerifyImgRequestUrl, queryData, 0, part2);//
            if (!string.IsNullOrEmpty(part2))loginVerifyImgStream = part1 + part2;return loginVerifyImgStream;}public bool LogOut() { bool retValue = true;return retValue;}public bool Login(out string loginret) {loginret = string.Empty;loginRequestUrl = "";bool retValue = false;string retString = string.Empty;do {retString = hhn.HttpGet(loginRequestUrl, "", HttpForType.联通);Thread.Sleep(500);}while (retString.Contains(@"所属省份系统正在升级")); CollectJsonLog(loginRequestUrl,"",0,retString);return retValue;}
/// <summary>/// 联通/// </summary>public  void UNCInitPage() {string url = "https://login.10010.com/captchazh.htm?type=05";string retS =  hhn.HttpGet(url, "", HttpForType.联通);//添加日志记录:CollectJsonLog(url, "", 0, retS);////设置Cookie"WT_FPC"string wt_fpc = JsHelper.GetJsMethd("GetWT_FPC", null);CookieCollection hcc = new CookieCollection();Cookie wtcookie = new Cookie() {Expires = DateTime.Now.AddYears(10),Path = "/",Domain = ".10010.com",Name = "WT_FPC",Value = "id=2c78d939da42319e6221460629342754:lv=1460686951978:ss=1460685811376"//Value = wt_fpc.Substring(wt_fpc.IndexOf('=') + 1, wt_fpc.Length - 7)//此处 写死也可以,服务器不做校验。
            };hcc.Add(wtcookie);hhn.cookie.Add(wtcookie);}public bool SendQuerySms() { return true;}
public IList<T> GetQueryData<T>(T temp) {return null;}private string getMyDetails() {if (checkLogin) { string infoUrl = "http://iservice.10010.com/e3/static/query/searchPerInfo/?_=1464073258330&menuid=000100020001";string retString = hhn.HttpPost(infoUrl, "", HttpForType.联通);//添加日志记录:CollectJsonLog(infoUrl,"",1,retString);//
                return retString;}return "";}public TXInfoModel GetInfo() {TXInfoModel tim = new TXInfoModel();#region 第一部分string infoUrl = "https://uac.10010.com/cust/infomgr/anonymousInfoAJAX";string retString = hhn.HttpGet(infoUrl, "");//添加日志记录:CollectJsonLog(infoUrl,"",0,retString);//
            tim.CustomerName = Utilities.QuMiddle(retString, @"name"":""", @"""");tim.CustomerSex = Utilities.QuMiddle(retString, @"sex"":""", @"""")=="1"?"男":"女";#endregion#region 第二部分string retString2 = getMyDetails();tim.Email = Utilities.QuMiddle(retString2, @"sendemail"":""", @"""");DateTime innettime;DateTime.TryParse(Utilities.QuMiddle(retString2, @"opendate"":""", @""""), out innettime);tim.InNetTime = innettime;tim.Grade = "";//星级得分tim.IDCard = Utilities.QuMiddle(retString2, @"certnum"":""", @"""");tim.PhoneNumber = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); ;tim.ProviderName = "中国联通:" + Utilities.QuMiddle(retString2, @"brand"":""", @"""") + "-" + Utilities.QuMiddle(retString2, @"productName"":""", @"""");//01 ,02,03tim.RegAddress = Utilities.QuMiddle(retString2, @"certaddr"":""", @"""");tim.ContactNum = Utilities.QuMiddle(retString2, @"usernumber"":""", @"""");tim.NetAge = "";tim.PhoneStatus = Utilities.QuMiddle(retString2, @"subscrbstat"":""", @"""");tim.RealNameInfo = Utilities.QuMiddle(retString2, @"certtype"":""", @"""");tim.StarLevel = Utilities.QuMiddle(retString2, @"custlvl"":""", @"""");tim.LevelInfo = "";tim.ZipCode = "";  #endregion#region 第三部分 话费余额/储蓄余额 string infoUrl11 = "http://iservice.10010.com/e3/static/query/accountBalance/search?_=1464858050369&menuid=000100010002";string retString11 = hhn.HttpPost(infoUrl11, "type=onlyAccount", HttpForType.联通);//添加日志记录:CollectJsonLog(infoUrl11, "", 0, retString11);//
            tim.CurFee = Decimal.Parse(Utilities.QuMiddle(retString11, @"userbalance"":""", @""""));tim.CurFeeTotal = Decimal.Parse(Utilities.QuMiddle(retString11, @"acctbalance"":""", @""""));#endregion#region 第三部分 积分string infoUrl22 = "http://iservice.10010.com/e3/static/query/headerView";string retString22 = hhn.HttpPost(infoUrl22, "", HttpForType.联通);//添加日志记录:CollectJsonLog(infoUrl22, "", 0, retString22);//
            int score;int.TryParse(Utilities.QuMiddle(retString22, @"sore"":""", @""""), out score);//联通某些类型卡 返回的json中可能没有 积分这个 字段。tim.PointValue = score;#endregion#region 第四部分 归属地查询tim.PhoneAttribution = PhoneAttribution.getGuiShuDiNet(tim.PhoneNumber);#endregionreturn tim;   }/// <summary>/// 获取五个月账单概括/// </summary>/// <returns></returns>public IList<TXZhangDanModel> GetZhangDan() {IList<TXZhangDanModel> listZD = new List<TXZhangDanModel>();TXZhangDanModel temp;string retS = string.Empty;string[] temptimes;int loopi=0;List<string> tempss = GetZhangDanPostData(out temptimes);if (checkLogin) {foreach (var data in tempss) {//当前月份的通话账单进行处理!loopi++;string PostdataS = data;retS = hhn.HttpPost(infoUrl, PostdataS, HttpForType.联通);//添加日志记录:CollectJsonLog(infoUrl, PostdataS,1,retS);////解析每月账单数据构建modelDateTime start1,end1;decimal d1;string temps1= new Regex(@"""billcycle""(:)("".*?"")").Match(retS).ToString();}}return listZD;   }/// <summary>/// 获取详单/// </summary>/// <returns></returns>public IList<TXXiangDanModel> GetXiangDan() {IList<TXXiangDanModel> listXD = new List<TXXiangDanModel>();TXXiangDanModel temp;string retS = string.Empty;List<string> tempss = GetXiangDanPostData();if (checkLogin) {foreach (var data in tempss) {retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);//添加日志记录:CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);while (retS.Contains("暂时无法为您提供服务")) {retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);Thread.Sleep(500);}if (retS.Contains("系统检测您的访问过于频繁")) {throw new Exception("访问获取详单链接过于频繁!请明天再试");} //注意,以下代码解析当月记录总数,获得所有确定的游标集合。string currentMonthTotalNum = Utilities.QuMiddle(retS, @"totalRecord"":", @","""); //从上面返回串,获取当月记录总数List<String> curCurorlist = GetAllcurCuror(currentMonthTotalNum);//集合,翻页用.foreach (var curcuror in curCurorlist) {//当前月份的通话详单进行处理!PostdataS = "beginDate=" + data.Split('&')[0] + "&endDate=" + data.Split('&')[1] + "&pageNo=" + curcuror + "&pageSize=50";retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);//添加日志记录:CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);////匹配startTime dateMatchCollection stlist = new Regex(@"""calldate""(:)("".*?"")").Matches(retS);//匹配startTime timeMatchCollection stlist2 = new Regex(@"""calltime""(:)("".*?"")").Matches(retS);//匹配commPlacMatchCollection cplist = new Regex(@"""homeareaName""(:)("".*?"")").Matches(retS);//匹配commModeMatchCollection cmlist = new Regex(@"""calltypeName""(:)("".*?"")").Matches(retS);//匹配anotherNm MatchCollection anlist = new Regex(@"""othernum""(:)("".*?"")").Matches(retS);//匹配commTime MatchCollection ctilist = new Regex(@"""calllonghour""(:)("".*?"")").Matches(retS);//匹配commType //MatchCollection ctylist = new Regex(@"""romatype""(:)("".*?"")").Matches(retS);//匹配commType MatchCollection ctylist = new Regex(@"""landtype""(:)("".*?"")").Matches(retS);//匹配commFee MatchCollection cflist = new Regex(@"""totalfee""(:)("".*?"")").Matches(retS);if ((stlist.Count == cplist.Count) && (cplist.Count == cmlist.Count) && (cmlist.Count == anlist.Count)&& (anlist.Count == ctilist.Count) && (ctilist.Count == ctylist.Count) && (ctylist.Count == cflist.Count)) {//解析每月详单数据构建modelfor (int i = 0; i < stlist.Count; i++) {temp = new TXXiangDanModel() {anotherNm = Utilities.QuMiddle(anlist[i].Value, @"othernum"":""", @""""),commFee = decimal.Parse(Utilities.QuMiddle(cflist[i].Value, @"totalfee"":""", @"""")),commMode = Utilities.QuMiddle(cmlist[i].Value, @"calltypeName"":""", @""""),commPlac = Utilities.QuMiddle(cplist[i].Value, @"homeareaName"":""", @""""),commTime = Utilities.QuMiddle(ctilist[i].Value, @"calllonghour"":""", @""""),commType = Utilities.QuMiddle(ctylist[i].Value, @"landtype"":""", @""""),startTime = DateTime.Parse(Utilities.QuMiddle(stlist[i].Value, @"calldate"":""", @"""")+ " " + Utilities.QuMiddle(stlist2[i].Value, @"calltime"":""", @""""))};listXD.Add(temp);}}}}}return listXD;}/// <summary>/// 翻页索引(1,2,3,4,5),用于联通翻页查询账单。默认每页50条记录/// </summary>/// <param name="totalNum">当月份总数目</param>/// <returns></returns>private List<String> GetAllcurCuror(string totalNum) {List<string> retlist = new List<string>();int totalnum1;int.TryParse(totalNum, out totalnum1);if (totalnum1 == 0)return retlist;else {int yushu = totalnum1 % 50;//比如totalNum 201,余数1int curcurorCount = totalnum1 / 50 + (yushu == 0 ? 0 : 1);//5页for (int i = 0; i < curcurorCount; i++) {retlist.Add((i+1).ToString());//1,2,3,4,5
                }return retlist;}}/// <summary>/// 最近5个月账单需要的Post数据()/// </summary>/// <returns></returns>private List<string> GetZhangDanPostData(out string[] startendS) {//            string dataS = "billdate=201604&querycode=0001&querytype=0001";List<string> retlist = new List<string>(); startendS = new string[5];DateTime nowtime = DateTime.Now;for (int i = 0; i < 5; i++) {string mm =  nowtime.AddMonths((-1)*i).ToString("yyyyMM");string dataS = "billdate="+mm+"&querycode=0001&querytype=0001";string seS;if(i==0)seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + nowtime.AddMonths((-1) * i).ToString(); //整理账单起&止月份,非本月elseseS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).AddMonths(1).AddDays(-1).ToString(); //整理账单起&止月份,本月startendS[i] = (seS);retlist.Add(dataS);}return retlist;}/// <summary>/// 获取5个月详单需要的post数据(2016-04-01&2016-04-30格式)/// </summary>/// <returns></returns>private List<string> GetXiangDanPostData() {List<string> retlist = new List<string>();int year = DateTime.Now.Year;//当前年  int mouth = DateTime.Now.Month;//当前月  int beforeYear = 0;int beforeMouth = 0;for (int i = 0; i < 5; i++) {if (mouth <= 1 && i!=0 )//如果当前月是一月,那么年份就要减1
              {beforeYear = year - i;beforeMouth = 12;//上个月  } else {beforeYear = year;beforeMouth = mouth - i;//上个月
              }string beforeMouthOneDay = beforeYear + "-" + beforeMouth + "-" + "01";//上个月第一天  string beforeMouthLastDay;if (i != 0)beforeMouthLastDay = beforeYear + "-" + beforeMouth + "-" + DateTime.DaysInMonth(year, beforeMouth);//上个月最后一天elsebeforeMouthLastDay = DateTime.Now.ToString("yyyy-MM-dd");retlist.Add(DateTime.Parse(beforeMouthOneDay).ToString("yyyy-MM-dd") + "&" + DateTime.Parse(beforeMouthLastDay).ToString("yyyy-MM-dd"));}return retlist;}public static void CollectJsonLog(string url, string data, int method1, string responseS) {string method = method1 == 1 ? "Post" : "Get";loglist.Add(string.Format("【请求url:{0} , 请求数据:{1} , 请求方式:{2}, 返回数据:{3} 】", url, data, method, responseS));}public IList<String> GetAllJsonLog() {return loglist;}}
}

源码,请联系我

转载于:https://www.cnblogs.com/x-poior/p/5641437.html

【最新原创】中国移动(中国联通)_通信账单,详单,个人信息抓取爬虫代码相关推荐

  1. python爬虫公众号_python爬虫_微信公众号推送信息爬取的实例

    问题描述 利用搜狗的微信搜索抓取指定公众号的最新一条推送,并保存相应的网页至本地. 注意点 搜狗微信获取的地址为临时链接,具有时效性. 公众号为动态网页(JavaScript渲染),使用request ...

  2. python微信爬取教程_python爬虫_微信公众号推送信息爬取的实例

    问题描述 利用搜狗的微信搜索抓取指定公众号的最新一条推送,并保存相应的网页至本地. 注意点 搜狗微信获取的地址为临时链接,具有时效性. 公众号为动态网页(JavaScript渲染),使用request ...

  3. python微信公众号推送_python爬虫_微信公众号推送信息爬取的实例

    问题描述 利用搜狗的微信搜索抓取指定公众号的最新一条推送,并保存相应的网页至本地. 注意点 搜狗微信获取的地址为临时链接,具有时效性. 公众号为动态网页(JavaScript渲染),使用request ...

  4. python爬取小说出现乱码_详解Python解决抓取内容乱码问题(decode和encode解码)

    一.乱码问题描述 经常在爬虫或者一些操作的时候,经常会出现中文乱码等问题,如下 原因是源网页编码和爬取下来后的编码格式不一致 二.利用encode与decode解决乱码问题 字符串在Python内部的 ...

  5. 利用HttpClient抓取话费详单等信息

    由于项目需要,需要获取授权用户的在运营商(中国移动.中国联通.中国电信)那里的个人信息.话费详单.月汇总账单信息(需要指出的是电信用户的个人信息无法从网上营业厅获取).抓取用户信息肯定是要模仿用户登录 ...

  6. python微信公众号爬虫_微信公众号推送信息爬取---python爬虫

    问题描述 利用搜狗的微信搜索抓取指定公众号的最新一条推送,并保存相应的网页至本地. 注意点 搜狗微信获取的地址为临时链接,具有时效性. 公众号为动态网页(JavaScript渲染),使用request ...

  7. python爬取微信公众号推送_微信公众号推送信息爬取---python爬虫

    问题描述 利用搜狗的微信搜索抓取指定公众号的最新一条推送,并保存相应的网页至本地. 注意点 搜狗微信获取的地址为临时链接,具有时效性. 公众号为动态网页(JavaScript渲染),使用request ...

  8. python爬取数据案例分析_基于Python及webdriver的网页抓取案例

    上次有朋友问怎么抓取交易所网站的数据,特别是历史数据,这里特别推荐使用selenium这一自动化测试框架. 原本selenium是用来完成大量基于浏览器的自动化测试的,但由于可以方便地执行JS代码,摸 ...

  9. python每隔半个小时执行一次_一篇文章教你用Python抓取微博评论

    [Part1--理论篇] 试想一个问题,如果我们要抓取某个微博大V微博的评论数据,应该怎么实现呢?最简单的做法就是找到微博评论数据接口,然后通过改变参数来获取最新数据并保存.首先从微博api寻找抓取评 ...

  10. python数据抓取工具_【重磅开源】Hawk-数据抓取工具:简明教程

    Hawk: Advanced Crawler& ETL tool written in C#/WPF 1.软件介绍 Hawk3已经发布,本文的很多信息已经不完整或过期,所有更新信息和下载地址都 ...

最新文章

  1. J.U.C系列(四)FutrueTask的使用
  2. python画星空的程序_用python画星空源代码是什么?
  3. pcm 采样率转换_PCM编码与Waveform音频文件(.wav)格式详解
  4. 用动画切换按钮的状态
  5. java过去不到空单元格,Java POI。空白时跳过单元格
  6. nodeAPI--TCP
  7. Rocket - tilelink - AtomicAutomata
  8. 公众号网课搜题API系统对接教程
  9. 下一跳配置的原则--ensp
  10. policy服务器未能登录,win7电脑提示group policy client服务未能登录的解决方法
  11. dsp28335 Ecap总结
  12. mongodb 基本操作:文档查询
  13. 公众号php关键词回复小程序,微信自定义关键词回复信息
  14. win10更新不动_Win10更新总失败?学会这三招搞定它
  15. python main传参args_python argh/argparse:如何将列表作为命令行参数传递?
  16. 春季儿童吃什么有助于长高,3款适合孩子长高的食谱做法,学起来
  17. 10个无版权限制的免费图片素材资源网站
  18. ZYNQ-AX7020学习笔记
  19. 数据分析师招聘情况之python分析
  20. 吾爱第三课-修改版权和资源

热门文章

  1. 保姆级windows下mysql数据库安装教程
  2. visio中绘制空间坐标系
  3. Adobe完全卸载工具
  4. 左程云 - 大厂刷题班 - 一种字符在左,另一种字符在右的最少交换次数
  5. Docker 管理之 --- 资源限制
  6. termux如何下载metasploit(msf)
  7. ROS与VREP通信
  8. 形式语言与自动机学习心得
  9. 灰度决策--如何解决棘手复杂问题
  10. VMware安装教程