网页解析器接口定义
View Code

public interface IParser
{
#region Events
event ActivatedEventHandler Activated;
event DeActivatedEventHandler DeActivated;
#endregion

#region Methods
void Activate();
void DeActivate();
void FillDataTable(string xmlstr, params string[] args);
void GetTorrentsList(string source, string uri, CancellationToken t);
void Initalize(Form OwnerForm, params object[] args);
DynamicObjectData GetObjectData(DataRow row, HttpRequestCacheLevel cache);
string ParseURI(ref Navigation.ParseOptions options);
#endregion

#region Properties
bool CanPost { get; set; }
bool Initalized { get; set; }
bool LoginRequired { get; set; }
CookieContainer Cookie { get; set; }
ParserConfig Config { get; set; }
ParserURIs Refs { get; set; }
ParserData Data { get; set; }
Image FavIcon { get; set; }
string CookieFile { get; }
string Host { get; set; }
string Name { get; set; }
string ThumbnailDirectory { get; }
#endregion
}

网页解析器抽象基类

public abstract class ParserBase : IParser{#region Constantsprivate const int KB = 0x400;private const int MB = 0x100000;private const int GB = 0x40000000;private const string ErrorScreenList = "";#endregion#region Private Fieldsprivate bool _canpost;private DefaultConfig iConfig;private CookieContainer iCookie;private string iHost;private object iCookieL;private object iName;private object iVersion;private ParserData iData;private ParserURIs iHostList;#endregionpublic object CookieLock = new object();#region Eventspublic event ActivatedEventHandler Activated;public event DeActivatedEventHandler DeActivated;#endregion#region Constructorspublic ParserBase(){this.ToolStripDropdown = new ContextMenuStrip();this.AllowedFormats = new List<string> { "jpeg", "jpg", "bmp", "png", "gif" };this.LoginRequired = true;this.FavIcon = null;this.iCookieL = RuntimeHelpers.GetObjectValue(new object());this.iCookie = new CookieContainer();this.iVersion = 1;this.iData = new ParserData();this.iHostList = new ParserURIs();this.iConfig = new DefaultConfig();this.BestMatchSearchType = "Default";this.Local = false;this.SubButtons = new List<ToolStripItem>();this.HasOptions = false;this.ToolStripDropdown.RenderMode = ToolStripRenderMode.System;}~ParserBase(){this.SaveConfig();}#endregionpublic virtual void Activate(){if (Activated != null){Activated();}this.Initalized = true;Core.MainForm.ToolstripmenuButtonAdd(this.ToolStripDropdown);}public void ConstructCatString(){string str = "";if (this.OwnerForm.catL.SelectedIndex >= 1){str = Convert.ToString(Microsoft.VisualBasic.CompilerServices.Operators.ConcatenateObject(this.Data.Query_Cat + "=", this.Data.CatList[Convert.ToString(this.OwnerForm.catL.SelectedItem)]));}this.CatString = str;}public void ConstructPageString(int currentpage){this.PageString = "";if (currentpage > this.Data.StartPageIndex){this.PageString = this.Data.Query_Page + "=" + Convert.ToString(currentpage);}}public void ConstructSortString(){string str = "";if (this.OwnerForm.SortL.SelectedIndex >= 1){str = Convert.ToString(Microsoft.VisualBasic.CompilerServices.Operators.ConcatenateObject(this.Data.Query_Sort + "=", this.Data.SortList[Convert.ToString(this.OwnerForm.SortL.SelectedItem)]));}this.SortString = str;}public void ConstructTagString(){this.TagString = "";if (Core.CurrentAddress.TagList.Count >= 1){string[] destination = new string[(Core.CurrentAddress.TagList.Count - 1) + 1];Core.CurrentAddress.TagList.CopyTo(destination, 0);this.TagString = this.Data.Query_Search + "=" + String.Join(this.Data.Query_SearchDelim, destination);}}public void ConstructUserString(){this.UserString = "";}public abstract void Create(Form OwnerForm, params object[] args);/// <summary>/// 创建查询部分/// </summary>/// <param name="QueryParams"></param>/// <param name="QueryURI"></param>/// <param name="CurrentAddressText"></param>/// <param name="CurrentPage"></param>/// <param name="reflectUI"></param>/// <returns></returns>public virtual string CreateQuery(string[] QueryParams, string QueryURI, string CurrentAddressText, int CurrentPage, bool reflectUI){string str4 = "";string currentURI = "";char[] separator = Conversions.ToCharArrayRankOne("&");currentURI = CurrentAddressText;this.ConstructCatString();this.ConstructSortString();this.ConstructTagString();this.ConstructPageString(CurrentPage);this.ConstructUserString();string str3 = "";this.ParseURIExceptions(currentURI, ref QueryURI, reflectUI);List<string> list2 = (this.UserString + "&" + this.TagString + "&" + this.CatString + "&" + this.SortString + "&" + this.PageString).Split(separator, StringSplitOptions.RemoveEmptyEntries).ToList<string>();Core.PrimitiveList list = new Core.PrimitiveList(list2.ToArray());foreach (string str5 in QueryParams){string arg = CheggitBrowser.Local.SplitArgs(str5)[0];if (!this.Data.QueryList.Contains(arg, true) && !list.Contains(str5, true)){list2.Add(str5);}}if (list2.Count >= 1){str3 = "?";}str4 = str3 + Microsoft.VisualBasic.Strings.Join(list2.ToArray(), "&");return (QueryURI + str4);}public virtual void DeActivate(){if (DeActivated != null){DeActivated();}Core.MainForm.ToolstripmenuButtonRemove(this.ToolStripDropdown);}public abstract void FillDataTable(string xmlstr, params string[] args);public string GetHTMLText(string text){StringReader reader = new StringReader(text);StringBuilder builder = new StringBuilder();builder.AppendLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<tr>");for (string str2 = reader.ReadLine(); str2 != null; str2 = reader.ReadLine()){str2 = str2.Replace(" ", " ");while (Regex.IsMatch(str2, "<[^<>]*>")){str2 = str2.Replace(HTMLParser.Search("<[^<>]*>", str2), "");}builder.AppendLine("<item>" + str2.Trim() + "</item>");}builder.AppendLine("</tr>");return builder.ToString();}public virtual DynamicObjectData GetObjectData(DataRow row, HttpRequestCacheLevel cache){return new DynamicObjectData();}public object GetRowThumbnailPath(long id){return Path.Combine(this.ThumbnailDirectory, Conversions.ToString(id) + ".jpg");}public abstract void GetTorrentsList(string source, string uri, CancellationToken t);public virtual void Initialize(){if (this.ToolStripDropdown.Items.Count > 0){this.ToolStripDropdown.Items.Insert(0, new ToolStripSeparator());}if (this.HasOptions){this.ToolStripDropdown.Items.Insert(0, new ToolStripMenuItem("Site Options", null, new EventHandler((s, e) => this.ShowOptions())));}this.ToolStripDropdown.Items.Insert(0, new ToolStripMenuItem("Page Filters", null, new EventHandler((s, e) => Core.ShowFilterForm())));this.Host = new Uri(this.Refs.BaseUri).Host;this.Config.Path = Path.Combine(MyProject.Application.Info.DirectoryPath, "config", this.DefaultFileName);this.Refs.BaseURI_UserList = this.Refs.BaseUri + "/" + this.Refs.BaseURI_UserPage;this.Refs.BaseURI_UserScript = this.Refs.BaseURI_UserPage + "?" + this.Refs.BaseURI_UserIdentifer;this.Refs.BaseURI_User = this.Refs.BaseUri + "/" + this.Refs.BaseURI_UserScript;if (!Directory.Exists("config")){Directory.CreateDirectory("config");}if (!Directory.Exists(this.CookieDirectory)){Directory.CreateDirectory(this.CookieDirectory);}if (!Directory.Exists(this.ThumbnailDirectory)){Directory.CreateDirectory(this.ThumbnailDirectory);}foreach (PropertyInfo info in Core.GetPropertyList(this.Data)){if (info.Name.StartsWith("Query_")){string item = Convert.ToString(Core.GetProperty(this.Data, info.Name, ""));if (item != ""){this.Data.QueryList.List.Add(item);}}}this.Config.LoadSettingsXML();}public virtual void LoadQuery(string[] objs, string QueryURI){Core.CurrentAddress.TotalPages = 0;if (this.OwnerForm.catL.Items.Count >= 1){this.OwnerForm.catL.SelectedIndex = 0;}if (this.OwnerForm.SortL.Items.Count >= 1){this.OwnerForm.SortL.SelectedIndex = 0;}this.OwnerForm.TagListbox.Items.Clear();this.OwnerForm.PageOffset = this.Data.StartPageIndex;foreach (string str in objs){string strTemp;if (!string.IsNullOrEmpty(str)){strTemp = Uri.UnescapeDataString(str);}string[] strArray = str.Split(new string[] { "=" }, StringSplitOptions.RemoveEmptyEntries);if (strArray.Length >= 2){string str3 = strArray[0];if (str3 == this.Refs.BaseURI_UserIdentifer){this.UserString = strArray[1];}else if (str3 == this.Data.Query_Search){this.OwnerForm.TagListbox.Items.AddRange(strArray[1].Split(new char[] { Convert.ToChar(this.Data.Query_SearchDelim) }));this.ConstructTagString();}else if (str3 == this.Data.Query_Cat){this.OwnerForm.catL.SelectedIndex = Convert.ToInt32(Core.IndexOfValue(strArray[1], this.Data.CatList));this.ConstructCatString();}else if (str3 == this.Data.Query_Sort){this.OwnerForm.SortL.SelectedIndex = Convert.ToInt32(Core.IndexOfValue(strArray[1], this.Data.SortList));this.ConstructSortString();}else if (str3 == this.Data.Query_Page){string str2 = strArray[1];this.OwnerForm.PageOffset = Convert.ToInt32(strArray[1]);}}}}public virtual long ParseFileSize(string CellSize, string GB, string MB, string KB){SizeType gB;CellSize = CellSize.ToLower().Trim();GB = GB.ToLower();MB = MB.ToLower();KB = KB.ToLower();CellSize = CellSize.Replace(",", ".");if (CellSize.Contains(GB)){CellSize = CellSize.Replace(GB, "").Trim();gB = SizeType.GB;}else if (CellSize.Contains(MB)){CellSize = CellSize.Replace(MB, "").Trim();gB = SizeType.MB;}else if (CellSize.Contains(KB)){CellSize = CellSize.Replace(KB, "").Trim();gB = SizeType.KB;}else{CellSize = Conversions.ToString(0);gB = SizeType.B;}return (long)Math.Round((double)(double.Parse(CellSize, NumberFormatInfo.InvariantInfo) * Math.Pow(1024.0, (double)gB)));}public abstract void ParsePage(string uri, CancellationToken t);public virtual void ParseSearchQuery(ref string text){}public string ParseURI(ref Navigation.ParseOptions options){switch (options.Type){case Navigation.Mode.Create:return this.CreateQuery(options.AddressAndQuery.Params, options.AddressAndQuery.Address, options.CurrentAddress, options.CurrentPage, options.ReflectUI);case Navigation.Mode.Load:this.LoadQuery(options.AddressAndQuery.Params, options.AddressAndQuery.Address);break;}return "";}public virtual void ParseURIExceptions(string CurrentURI, ref string QueryURI, bool AllowUIChange){}public virtual void SaveConfig(){this.Config.WriteSettingsXML();}public virtual void ShowOptions(){}#region Propertiespublic List<string> AllowedFormats{get;set;}public string BestMatchSearchType{get;set;}public bool CanPost{get{return this._canpost;}set{this._canpost = value;}}public string CatString{get;set;}public ParserConfig Config{get{return (ParserConfig)this.iConfig;}set{this.iConfig = value;}}public CookieContainer Cookie{get{object iCookieL = this.iCookieL;ObjectFlowControl.CheckForSyncLockOnValueType(iCookieL);lock (iCookieL){return this.iCookie;}}set{object iCookieL = this.iCookieL;ObjectFlowControl.CheckForSyncLockOnValueType(iCookieL);lock (iCookieL){this.iCookie = value;}}}private string CookieDirectory{[MethodImpl(MethodImplOptions.NoOptimization | MethodImplOptions.NoInlining)]get{return Path.Combine(MyProject.Application.Info.DirectoryPath, "cookies");}}public string CookieFilename{get{return Path.Combine(this.CookieDirectory, this.DefaultFileName);}}public ParserData Data{get{return this.iData;}set{this.iData = value;}}public string DefaultFileName{get{return (this.Name + ".ini");}}public Image FavIcon{get;set;}public bool HasOptions{get;set;}public string Host{get{return this.iHost;}set{this.iHost = value;}}public bool Initalized{get;set;}public bool IsDefault{get{return this.TypeName.Equals(Core.AppConf.DefaultParser.Value);}}public bool Local{get;set;}public bool LoginRequired{get;set;}public string Name{get{return Convert.ToString(this.iName);}set{this.iName = value;}}public Main OwnerForm{get;set;}public string PageString{get;set;}public ParserURIs Refs{get{return this.iHostList;}set{this.iHostList = value;}}public DataTable SchemaTable{get;set;}public string SortString{get;set;}public string StatusLabel{set{Core.StatusLabel = value;}}public List<ToolStripItem> SubButtons{get;set;}public string TagString{get;set;}public string ThumbnailDirectory{get{return Path.Combine(MyProject.Application.Info.DirectoryPath, "thumbs", new Uri(this.Refs.BaseUri).Host);}}public ContextMenuStrip ToolStripDropdown{get;set;}public string TypeName{get{return (this.GetType().Namespace + "-" + this.GetType().Name);}}public string UserString{get;set;}public enum SizeType{B,KB,MB,GB}public string CookieFile{get;set;}#endregion#region IParser 成员public void Initalize(Form OwnerForm, params object[] args){throw new NotImplementedException();}#endregion}

通用页面解析器

public abstract class WebParser : ParserBase{public const string ScreenlistFailed = "Unable to get screenlist.";private object _ParserLogin = new object();protected bool NeedLogin{get;set;}public WebParser(){this.NeedLogin = true;this.Config = new WebParserConfig();}~WebParser(){this.SaveCookie();}public override void Create(Form OwnerForm, params object[] args){}public abstract object CreateNewCommentPost(ForumPost post);protected virtual LoginResult DoLogin(){this.WipeCookies();this.NeedLogin = false;Core.StatusText = "";return new LoginResult("", true);}public virtual PostResult DoPost(ForumPost post){throw new NotImplementedException();}public override void FillDataTable(string xmlstr, params string[] args){}public void ForceLogin(){LoginResult result = this.DoLogin();Core.StatusText = "";if (this.NeedLogin){Core.StatusText = result.StatusText;}else{Core.StatusText = "Login Succeeded";}}public virtual string GetCommentLink(string strtext){return "";}public abstract List<string> GetScreensFromHTML(ScreenshotListOptions args);/// <summary>/// 获取BT种子数据/// </summary>/// <param name="tor_id"></param>/// <returns></returns>public string GetTorrentData(long tor_id){string responseText="";CancellationToken token;string url=this.Refs.BaseURI_ID + Convert.ToString(tor_id);Rqt_Request req=new Rqt_Request(url, HttpRequestCacheLevel.Revalidate, 30000, "", token, null, null);using (Rqt_State state = this.ParseRequest(req)){if (state.Status == RequestStatus.Succeded){responseText = state.ResponseText;}}return responseText;}public override void Initialize(){base.Initialize();this.LoadCookie();}/// <summary>/// 加载Cookie/// </summary>private void LoadCookie(){this.WipeCookies();object obj=SerializationBinary.BinaryLoad(this.CookieFilename, typeof(CookieContainer));if (obj != null){CookieContainer cc = (CookieContainer)obj;if (cc.Count != 0){this.Cookie = cc;this.NeedLogin = false;}}}/// <summary>/// 解析下载/// </summary>/// <param name="e"></param>/// <returns></returns>public virtual string ParseDownload(Core.RemoteAddress e){return (this.Refs.BaseURI_Download + e.Torrent.ID);}/// <summary>/// 解析页面/// </summary>/// <param name="uri"></param>/// <param name="t"></param>public sealed override void ParsePage(string uri, CancellationToken t){using (Rqt_State state = ((WebParser)Core.IParserActive()).ParseRequest(new Rqt_Request(uri, HttpRequestCacheLevel.BypassCache, 0x7530, "", t, null, null))){if (state.Status != RequestStatus.Succeded){if (state.Exception != null){throw state.Exception;}}else{Core.CurrentAddress.Source = state.ResponseText;this.GetTorrentsList(Core.CurrentAddress.Source, uri, t);Core.DB.Manager.ConverToViewTable(this.SchemaTable.AsDataView());}}}/// <summary>/// 解析请求/// </summary>/// <param name="req"></param>/// <returns></returns>public Rqt_State ParseRequest(Rqt_Request req){object expression = this._ParserLogin;ObjectFlowControl.CheckForSyncLockOnValueType(expression);lock (expression){if (this.LoginRequired && this.NeedLogin){LoginResult result = this.DoLogin();if (this.NeedLogin){Core.StatusText = result.StatusText;return new Rqt_State();}}}req.Cookies = this.Cookie;Rqt_State state2 = Network.Rqt_Send(req);if (state2.Status == RequestStatus.Succeded){this.Cookie.Add(state2.Response.Cookies);}return state2;}public virtual string PrintComments(string data){return "";}private void SaveCookie(){SerializationBinary.BinarySave(this.CookieFilename, this.Cookie);}/// <summary>/// 清除Cookie信息/// </summary>public void WipeCookies(){this.Cookie = new CookieContainer();this.NeedLogin = this.LoginRequired;}public WebParserConfig Config{get{return (WebParserConfig)base.Config;}set{base.Config = value;}}}

Cheggit网站指定解析器

internal class CheggitParser : WebParser{public object _Login_lock = new object();public const string DownloadTorrentAction = "&action=downloadtorrent";public const string InvalidUserNameOrPassword = "Invalid username or password";public const string LoginPage = "http://cheggit.net/index.php";public const string LoginQueryNew = "username={0}&password={1}&{2}&button=Login&action=login";public const string PatPageError = "<li class=\"error\">.*</li>";public const string PostFormQuery = "action=replytopost&p={0}&f=8&subject={1}&body={2}&format_type=bbcode";public const string recaptchaqueryformat = "recaptcha_challenge_field={0}&recaptcha_response_field={1}";public bool TryingToLogin;/// <summary>/// 校验是否本人的评价/// </summary>/// <param name="strtext"></param>/// <returns></returns>public bool CheckMyComments(string strtext){string pattern = string.Format("<p class=\"info\">posted by <a href=\".*\">{0}</a>.*</p>", this.Config.Login.Value.ToString());Match match = Regex.Match(strtext, pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase);return match.Success;}public override void Create(Form OwnerForm, params object[] args){this.Name = "Cheggit.net";this.OwnerForm = (Main)OwnerForm;this.Refs.BaseUri = "http://cheggit.net";this.Refs.BaseURI_List = this.Refs.BaseUri + "/browsetorrents.php";this.Refs.BaseURI_ID = this.Refs.BaseUri + "/torrents.php?torrentid=";this.Refs.BaseURI_Download = this.Refs.BaseUri + "/download.php?torrentid=";this.Refs.BaseURI_Search = this.Refs.BaseURI_List + "?";this.Refs.BaseURI_UserIdentifer = "userid=";this.Refs.BaseURI_UserPage = "users.php";this.Data.StartPageIndex = 1;this.Refs.BaseURI_PostQuery = "p=$p&torrentid=$id";this.Refs.BaseURI_NewComment = "forums.php?op=post";this.FavIcon = Properties.Resources.love;this.Data.Query_Search = "filter";this.Data.Query_Page = "p";this.Data.Query_Sort = "sort";this.Data.Query_Cat = "cat";this.Data.Query_SearchUser = "who:";this.Data.Query_SearchDelim = ";";this.Refs.BaseURI_LoginQuery = "";this.Refs.BaseURI_Pics = @"/pics/(\d{4})/(\d{2})/(\d{2})/t";this.Refs.BaseURI_PicsThumbString = "/t";this.Data.SortList.Clear();this.Data.SortList.Add("Default", 0);this.Data.SortList.Add("Size", 1);this.Data.SortList.Add("Seed", 2);this.Data.SortList.Add("Leech", 3);this.Data.SortList.Add("Peer", 4);this.Data.CatList.Clear();this.Data.CatList.Add("Inactive", 0);this.Data.CatList.Add("Straight", 1);this.Data.CatList.Add("Gay", 2);this.Data.CatList.Add("Lesbian", 3);this.Data.CatList.Add("Ethnic", 4);this.Data.CatList.Add("Fetish", 5);this.Data.CatList.Add("Animated", 6);this.Data.CatList.Add("Games", 7);this.Data.CatList.Add("Other", 8);this.Data.SearchTypes.Clear();this.Data.SearchTypes.Add("Default", "");this.Data.SearchTypes.Add("All", "all");this.Data.SearchTypes.Add("Title", "title");this.Data.SearchTypes.Add("Description", "desc");this.Data.SearchTypes.Add("Tags", "tags");this.Data.SearchTypes.Add("User", "user");this.CanPost = true;}public override object CreateNewCommentPost(ForumPost post){throw new NotImplementedException();}/// <summary>/// 登录/// </summary>/// <returns></returns>protected override LoginResult DoLogin(){base.DoLogin();LoginResult result = new LoginResult(string.Format("Unable to login to {0}", this.Host), false);try{CancellationToken token;Rqt_Request indexReq = new Rqt_Request(Parser.LoginPage, HttpRequestCacheLevel.Reload, 30000, "", token, null, null);Rqt_State state = Network.Rqt_Send(indexReq);if (state.Status != RequestStatus.Succeded){if (state.Exception != null){throw state.Exception;}}else{ReCaptchaResult query = Recaptcha.GetQuery(state.ResponseText, this);if (query.Success){string str = string.Format(Parser.LoginQueryNew, this.Config.Login.Value, this.Config.Password.Value, string.Format(Parser.recaptchaqueryformat, query.ChallageKey, query.Text));Rqt_Request loginReq = new Rqt_Request(Parser.LoginPage, HttpRequestCacheLevel.Reload, 0x7530, str, token, null, null);state = Network.Rqt_Send(loginReq);if (state.Status != RequestStatus.Succeded){if (state.Exception != null){throw state.Exception;}}else{Match match = Core.RegExFirst(state.ResponseText, "<li class=\"error\">.*</li>");if (match.Success){HTMLParser.HtmlTag tag = new HTMLParser.HtmlTag(match.Value);string text = tag.Text;switch (text){case "Invalid username or password":break;}if (MessageBox.Show(text) == DialogResult.Retry){//@todo: 重新登录处理}result.StatusText = text;}else{this.Cookie.Add(state.Response.Cookies);result.Success = true;}}}}}catch (Exception ex){DialogResult dr = MessageBox.Show(ex.Message, "Unable to login to " + this.Host, MessageBoxButtons.RetryCancel);if (dr == DialogResult.Retry){//重新登录}}this.NeedLogin = !result.Success;return result;}public override PostResult DoPost(ForumPost pt){Task<PostResult> task = new Task<PostResult>(new Func<object, PostResult>((pr) =>{string torrentData = this.GetTorrentData(pt.TorrentID);PostResult result2 = new PostResult();if (!string.IsNullOrEmpty(torrentData)){CancellationToken token;string commentLink = this.GetCommentLink(torrentData);if (pt.SkipIfHasComments && this.CheckMyComments(torrentData)){result2.Success = true;return result2;}long num = Conversions.ToLong(Navigation.GetQueryParamValue("p", commentLink));string str2 = string.Format("action=replytopost&p={0}&f=8&subject={1}&body={2}&format_type=bbcode", num, Uri.EscapeDataString(pt.Subject), Uri.EscapeDataString(pt.Body));Rqt_Request req = new Rqt_Request(commentLink, HttpRequestCacheLevel.Default, 0x7530, "", token, null, null){Query = str2,Timeout = 0x4e20};using (Rqt_State state = this.ParseRequest(req)){if (state.Status == RequestStatus.Succeded){result2.Success = true;}else if (state.Exception != null){result2.Data = state.Exception.Message;}}}return result2;}), pt);task.Start();try{Task.WaitAll(new Task[] { task }, 0x4e20);}catch (AggregateException ex){Console.WriteLine(ex.Message);}return task.Result;}/// <summary>/// 根据XML字符串填充数据表格/// </summary>/// <param name="xmlstr"></param>/// <param name="args"></param>public override void FillDataTable(string xmlstr, params string[] args){short num;string str = args[0];List<string> list = new List<string>();XmlDocument document = new XmlDocument();document.LoadXml(xmlstr.ToString());DataRow dr = this.SchemaTable.NewRow();dr["Title"] = document.DocumentElement.FirstChild.FirstChild["a"].InnerXml.Trim();dr["ID"] = document.DocumentElement.LastChild.ChildNodes[0].InnerText.Trim();dr["Files"] = document.DocumentElement.LastChild.ChildNodes[1].InnerText.Trim();dr["Com"] = short.TryParse(document.DocumentElement.LastChild.ChildNodes[2].InnerText.Trim(), out num) ? num : num;dr["Size"] = this.ParseFileSize(document.DocumentElement.LastChild.ChildNodes[3].InnerText, "GB", "MB", "KB");dr["Sd"] = document.DocumentElement.LastChild.ChildNodes[4].InnerText.Trim();dr["Lch"] = document.DocumentElement.LastChild.ChildNodes[5].InnerText.Trim();dr["UserName"] = document.DocumentElement.LastChild.ChildNodes[6].InnerText.Trim();IEnumerator enumerator = document.DocumentElement.LastChild.GetEnumerator();while (enumerator.MoveNext()){XmlElement current = (XmlElement)enumerator.Current;if (current.OuterXml.ToString().Contains(Core.IParserActive().Refs.BaseURI_UserScript)){string str2 = current.ChildNodes[0].Attributes.Item(0).Value;dr["UserID"] = current.ChildNodes[0].Attributes.Item(0).Value.ToString().Substring(str2.IndexOf("=") + 1);IEnumerator enumerator2 = document.DocumentElement.FirstChild.LastChild.GetEnumerator();while (enumerator2.MoveNext()){//@todo 待确认dynamic objectValue = enumerator2.Current;string innerText = objectValue.InnerText.Trim();if (!string.IsNullOrEmpty(innerText)){list.Add(innerText);}}}}dr["Tags"] = String.Join(" ", list.ToArray());dr["DateShared"] = str.Trim();dr["Host"] = this.Host;Core.FinalizeRow(ref dr, this);}/// <summary>/// 获取评价链接/// </summary>/// <param name="strtext"></param>/// <returns></returns>public override string GetCommentLink(string strtext){string str2 = "";Match match = Regex.Match(strtext, "<a href=.*op=post.*?>", RegexOptions.Multiline | RegexOptions.IgnoreCase);if (match.Success){HTMLParser.HtmlTag tag = new HTMLParser.HtmlTag(match.Value + "</a>");str2 = tag.Attributes["href"];str2 = this.Refs.BaseUri + "/" + Core.CleanXML(str2.Replace("href=", ""));}return str2;}public override DynamicObjectData GetObjectData(DataRow row, HttpRequestCacheLevel cache){base.GetObjectData(row, cache);try{string format = "<li>{0}: [^<>]*";List<string> list = new List<string>();string pattern = "<div class=\"post\" .*>";list.Add("Category");list.Add("Size");list.Add("Views");list.Add("Hits");list.Add("Seeders");list.Add("Leechers");list.Add("Snatches");Dictionary<string, object> dictionary = new Dictionary<string, object>();Rqt_Request req = new Rqt_Request{URI = Conversions.ToString(string.Format("{0}&{1}", this.Refs.BaseURI_ID, row["ID"])),Cache = cache};Rqt_State state = this.ParseRequest(req);if (state.Status != RequestStatus.Succeded){if (state.Exception != null){Core.StatusText = state.Exception.Message;}}else{StringBuilder builder = new StringBuilder(state.ResponseText);foreach (string str3 in list){object obj2;Match match = Core.RegExFirst(builder.ToString(), string.Format(format, str3));if (match.Success){obj2 = match.Value.Split(":".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)[1].Trim();}else{obj2 = DBNull.Value;}dictionary.Add(str3, RuntimeHelpers.GetObjectValue(obj2));}MatchCollection matchs = Regex.Matches(builder.ToString(), pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase);dictionary.Add("Com", matchs.Count);return new DynamicObjectData { Dictionary = dictionary };}}catch (Exception ex){Console.WriteLine(ex.Message);}return null;}public override List<string> GetScreensFromHTML(ScreenshotListOptions args){StringReader reader = new StringReader(args.Source);List<string> list2 = new List<string>();bool flag2 = false;bool flag = false;while (true){string str = reader.ReadLine();if ((str == null) | flag){return list2;}if (str.ToLower().Contains("<!-- torrent description -->".ToLower())){flag2 = true;}if (str.Contains("<div class=\"torrentwarning\"><H1>This post has been marked for deletion</H1>")){args.Icon = Properties.Resources.mail_remove;}if (str.ToLower().Contains("<!-- BEGIN description middle -->".ToLower())){flag = true;}if ((str.ToLower().Contains("src=") | str.ToLower().Contains("href=")) && flag2){string[] strArray = str.Split(new char[] { '\'', '"', '=', '<', '>' }, StringSplitOptions.RemoveEmptyEntries);if (strArray != null){int num2 = strArray.Length - 1;for (int i = 0 ; i <= num2 ; i++){if (strArray[i].ToLower().StartsWith("http://")){bool flag3 = false;foreach (string str2 in this.AllowedFormats){if (strArray[i].ToLower().EndsWith(str2)){flag3 = true;}}if (flag3 && !Core.BlockImages.TargetContains(strArray[i], false)){if (args.Poster){if (!Regex.Match(strArray[i], this.Refs.BaseURI_Pics, RegexOptions.IgnoreCase).Success){list2.Clear();list2.Add(strArray[i]);return list2;}list2.Add(strArray[i]);}else if (!list2.Contains(strArray[i]) & !list2.Contains(strArray[i].Replace(this.Refs.BaseURI_PicsThumbString, "/"))){list2.Add(strArray[i]);}}}}}}}}/// <summary>/// 获取BT种子列表/// </summary>/// <param name="source"></param>/// <param name="uri"></param>/// <param name="t"></param>public override void GetTorrentsList(string source, string uri, CancellationToken t){StringReader reader = new StringReader(source);string str2 = "";while (true){bool flag = false;string currentval = reader.ReadLine();if (currentval == null){break;}if ((((!flag && (((currentval.Contains("class=\"error\"")|| currentval.Contains("<h1>"))|| currentval.Contains("<h2>"))|| currentval.Contains("<h3>"))))|| currentval.Contains("<p>No results.</p>"))){try{XElement element = XElement.Parse(Core.CleanXML(currentval).Trim());this.StatusLabel = element.Value;flag = true;}catch (XmlException exception1){ProjectData.SetProjectError(exception1);XmlException exception = exception1;}}if ((currentval.Contains("class=\"tablehead\"") & !currentval.Contains("browsetorrents.php")) && (currentval.Contains("<strong>") & !currentval.Contains("href"))){str2 = currentval.Substring(currentval.IndexOf("<strong>") + 8);str2 = str2.Remove(str2.IndexOf("</"));}if (currentval.Contains("<td align=\"left\" class=\"tabletext\">")){XmlDocument document = new XmlDocument();StringBuilder builder = new StringBuilder();builder.AppendLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<root>");while (!currentval.Contains("</tr>")){if (!currentval.Contains("&action=downloadtorrent") & !currentval.Contains("inline_icon")){if (!currentval.Contains("href=\"\"#\"")){builder.AppendLine(currentval);}else{builder.AppendLine("<td>");string str3 = currentval.Substring(currentval.IndexOf("?") + 11);builder.AppendLine("<td>" + str3.Remove(str3.IndexOf("'")) + "</td>");}}else if (!currentval.Contains("inline_icon")){builder.AppendLine("<td>");string str4 = currentval.Substring(currentval.IndexOf("?") + 11);builder.AppendLine("<td>" + str4.Remove(str4.IndexOf("&")) + "</td>");}else{builder.AppendLine("<td>unknown</td>");}currentval = Regex.Replace(reader.ReadLine(), "&(cat|sort|filter)=", "");}builder.AppendLine("</td>");builder.AppendLine("</root>");this.FillDataTable(builder.ToString(), new string[] { str2 });}}reader.Close();}public override string ParseDownload(Core.RemoteAddress e){return (base.ParseDownload(e) + "&action=downloadtorrent");}/// <summary>/// 解析查询字符串/// @todo:待验证/// </summary>/// <param name="text"></param>public override void ParseSearchQuery(ref string text){base.ParseSearchQuery(ref text);string pattern = "(" + String.Join("|", this.Data.SearchTypes.Values.ToArray<object>()) + @"):\[.*\]";if (!Regex.IsMatch(text, pattern, RegexOptions.IgnoreCase)){string str = "";foreach (KeyValuePair<string, object> pair in this.Data.SearchTypes){if (pair.Key.Equals(this.Config.SearchType.Value)){str = Convert.ToString(pair.Value);switch (str){case "":text = str + ":[" + text + "]"; break;}}}}}public override void ParseURIExceptions(string CurrentURI, ref string QueryURI, bool ReflectUI){if (CurrentURI.Contains(this.Refs.BaseURI_User)){QueryURI = this.Refs.BaseURI_UserList;this.UserString = CurrentURI.Substring(CurrentURI.IndexOf(".php?") + 5);if (this.UserString.Contains("&")){this.UserString = this.UserString.Remove(this.UserString.IndexOf("&"));}}}}

转载于:https://www.cnblogs.com/czllfy/archive/2011/04/25/2028802.html

Cheggit解析器模块分析相关推荐

  1. UDP编程-DNS解析器的分析与实现(C语言)

    基本知识 基本介绍 域名系统(英文:Domain Name System,缩写:DNS)的作用是将人类可读的域名 (如,www.example.com) 转换为机器可读的 IP 地址 (如,192.0 ...

  2. 自定义协议的命令解析器

    文章目录 前言 Switch...case类型命令解析的弊端 程序的改进目标 对命令解析器的分析 命令解析器的代码示例 用于代码工程需要解决的问题 可用于代码工程的命令解析器实现 一些新的认识 前言 ...

  3. mybatis源码解析一 xml解析(解析器)

    最近闲来无事,看着一些源码类的书籍,只是光看,好像并不能给自己很好的益处,无法记下来,所以就有了这个Mybatis源码解析系列的博客.网上也有大量的源码解析,在此记录有两个原因,一是为了加深自己的印象 ...

  4. wireshark协议解析器原理与插件编写

    工作原理 每个解析器解码自己的协议部分, 然后把封装协议的解码传递给后续协议. 因此它可能总是从一个Frame解析器开始, Frame解析器解析捕获文件自己的数据包细节(如:时间戳), 将数据交给一个 ...

  5. SQL解析器的性能测试

    对同一个sql语句,使用3种解析器解析出ast语法树(这是编译原理上的说法,在sql解析式可能就是解析器自定义的statement类型),执行100万次的时间对比. package demo.test ...

  6. PackageManagerService启动详解(五)之Android包信息体和解析器(中)

        PKMS启动详解(五)之Android包信息体和包解析器(中) Android PackageManagerService系列博客目录: PKMS启动详解系列博客概要 PKMS启动详解(一)之 ...

  7. Java高性能解析器实现思路及方法

    在某些情况下,你可能需要在Java中实现你自己的数据或语言解析器,也许是这种数据格式或语言缺乏标准的Java或开源解析器可以使用.或者虽然有现成的解析器实现,但它们要么太慢,要么太占内存,要么就是没有 ...

  8. 实现一个webpack模块解析器

    最近在学习 webpack源码,由于源码比较复杂,就先梳理了一下整体流程,就参考官网的例子,手写一个最基本的 webpack 模块解析器. 代码很少,github地址:手写webpack模块解析器 整 ...

  9. Postgresql源码(85)查询执行——表达式解析器分析(select 1+1如何执行)

    相关 <Postgresql源码(61)查询执行--最外层Portal模块> <Postgresql源码(62)查询执行--子模块ProcessUtility> <Pos ...

最新文章

  1. scala编写排序算法
  2. Linux Tomcat安装,Linux配置Tomcat,Linux Tomcat修改内存,Linux tomcat修改端口
  3. 三十九、文件的逻辑结构
  4. 第十、十一周项目一-点-圆-圆柱类族的设计(1)
  5. 自动化监控--zabbix中的用户和用户组详解
  6. MySQL带ANY关键字的子查询
  7. JVM基础系列第10讲:垃圾回收的几种类型
  8. 【图像隐写】基于matlab LBP数字水印嵌入攻击提取【含Matlab源码 1672期】
  9. Linux 怎么找回管理员密码?
  10. matlab5.0软件下载,MATLAB手机版
  11. 2018年湘潭大学程序设计竞赛 E 吃货
  12. JAVA代码重复率多少达标_【案例】代码重复率太高不要怕,求真老师教你化险为夷!...
  13. linux gem安装软件,安装gem报错
  14. windows10安装MySQL8.0
  15. RestTemplate application/octet-stream处理
  16. ictclas包 java_ICTCLAS分词系统Java调用接口在Eclipse中的安装
  17. Excel 多个 Sheet 数据关联
  18. 机械革命计算机配置,单品:机械革命X6Ti-S_机械革命笔记本电脑_笔记本评测-中关村在线...
  19. 把Fusioncharts封装成Ext组件的思路
  20. 用scratch编程一个问答游戏

热门文章

  1. mutable和volatile关键字
  2. 【 ORA-27102: out of memory处理】
  3. CentOS7 安装Mysql5.6 后启动失败处理 The server quit without updating PID file
  4. BufferedInputStream的read()方法源码解析
  5. 微擎应用笔记3--manifest.xml文件使用说明
  6. springboot整合通用mapper操作数据库
  7. 汇总少了退款汇总 多了一笔支付汇总 很可能是因为商户退款配置的是正交易权限(配置的问题)
  8. 实战系列-Spring Boot跨域解决方案
  9. CentOS7 防火墙关闭
  10. Golang中Goroutine与线程