C语言实现下载网页的方法【原创】
Author:张继飞
这些日子由于要做RSS订阅,想把服务器端的XML文件下载到本地,这样解析起来更加方便,呵呵。所以使用socket做一个向服务器请求下载网页的功能,参考了网上一些代码,终于实现了。哈哈哈。
环境:linux
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <netdb.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <errno.h>
#include <stdlib.h>
#include <arpa/inet.h>
#define MAXHEADERSIZE 1024
#define true 0
#define false -1
char m_requestheader[1024];
char m_ResponseHeader[1024];
int m_port = 80;
char m_ipaddr[256];
int m_bConnected = 0;
int m_s = -1;
struct hostent *m_phostent = NULL;
int m_nCurIndex = 0;
int m_bResponsed = 0;
int m_nResponseHeaderSize = -1;
int GetServerState();
int GetField(const char* szSession,char *szValue);
int GetResponseLine(char *pLine,int nMaxLength);
const char *GetResponseHeader(int Length);
long FormatRequestHeader(char *pServer,char *pObject,char* pCookie,char *pReferer,long nFrom,long nTo,int nServerType);
const int GetRequestHeader(char *pHeader,int nMaxLength);
int SendRequest(const char* pRequestHeader,long Length);
long Receive(char* pBuffer,long nMaxLength);
int Connect(char* szHostName,int nPort);
int CloseSocket();
int Socket();
int main(void)
{
int i;
FILE *stream;
for(i=0;i<256;i++)
m_ipaddr[i]='\0';
memset(m_requestheader,0,MAXHEADERSIZE);
memset(m_ResponseHeader,0,MAXHEADERSIZE);
char *strServer = "rss.news.yahoo.com";
char *strObject = "/rss/terrorism";
long nLength;
//AfxParseURL("http://rss.news.yahoo.com/rss/terrorism",dwServiceType,strServer,strObject,nPort);
nLength = FormatRequestHeader(strServer,strObject,NULL,NULL,0,0,0);
Socket();
Connect(strServer,80);
SendRequest(NULL,0);
char szValue[30];
GetField("Content-Length",szValue);
int nFileSize = atoi(szValue);
int nCompletedSize = 0;
char pData[1024];
int nReceSize = 0;
if ((stream = fopen("rss.xml", "w+")) == NULL)
{
printf("can not open file");
}
while((nCompletedSize < nFileSize) && (stream != NULL))
{
nReceSize = Receive(pData,1024);
if(nReceSize == 0)
{
printf("receive from server only 0!\n");
break;
}
else if(nReceSize == -1)
{
printf("receive from server error!\n");
break;
}
else
{
fwrite(pData, nReceSize, 1, stream);
printf("strlen(pData) = %d\n",strlen(pData));
}
printf("size = %d\n",nReceSize);
}
fclose(stream);
CloseSocket();
return 0;
}
int Socket()
{
if(m_bConnected)return false;
m_s=socket(AF_INET,SOCK_STREAM,0);
printf("m_s %d\n",m_s);
if(m_s==-1)
{
printf("open socket error!\n");
return false;
}
return true;
}
int Connect(char *szHostName,int nPort)
{
if(szHostName==NULL)
return false;
if(m_bConnected)
{
CloseSocket();
}
m_port=nPort;
m_phostent=gethostbyname(szHostName);
if(m_phostent==NULL)
{
printf("gethostbyname error!\n");
return false;
}
/* struct in_addr ip_addr;
memcpy(&ip_addr.s_addr,m_phostent->h_addr_list[0],4);
serv_addr.sin_addr = *((struct in_addr *)host->h_addr)*/
struct sockaddr_in destaddr;
memset((void *)&destaddr,0,sizeof(destaddr));
destaddr.sin_family=AF_INET;
destaddr.sin_port=htons(80);
destaddr.sin_addr=*((struct in_addr *)m_phostent->h_addr);//必须包含#include <netdb.h>
printf("ms = %d\n",m_s);
if(connect(m_s,(struct sockaddr*)&destaddr,sizeof(struct sockaddr))==-1)
{
//CloseSocket();
//m_s=NULL;
printf("Can not connect!\n");
return false;
}
m_bConnected=1;
return true;
}
long FormatRequestHeader(char *pServer,char *pObject, char *pCookie,char *pReferer,long nFrom,long nTo,int nServerType)
{
char szPort[10];
char szTemp[20];
long Length;
sprintf(szPort,"%d",m_port);
memset(m_requestheader,'\0',1024);
strcat(m_requestheader,"GET ");
strcat(m_requestheader,pObject);
strcat(m_requestheader," HTTP/1.1");
strcat(m_requestheader,"\r\n");
strcat(m_requestheader,"Host:");
strcat(m_requestheader,pServer);
strcat(m_requestheader,"\r\n");
if(pReferer != NULL)
{
strcat(m_requestheader,"Referer:");
strcat(m_requestheader,pReferer);
strcat(m_requestheader,"\r\n");
}
strcat(m_requestheader,"Accept:*/*");
strcat(m_requestheader,"\r\n");
strcat(m_requestheader,"User-Agent:Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)");
strcat(m_requestheader,"\r\n");
strcat(m_requestheader,"Connection:Keep-Alive");
strcat(m_requestheader,"\r\n");
strcat(m_requestheader,"\r\n");
Length=strlen(m_requestheader);
return Length;
}
int SendRequest(const char *pRequestHeader, long Length)
{
if(!m_bConnected)return false;
if(pRequestHeader==NULL)
pRequestHeader=m_requestheader;
if(Length==0)
Length=strlen(m_requestheader);
if(send(m_s,pRequestHeader,Length,0)==-1)
{
printf("send error\n");
return false;
}
printf("send ok\n");
int nLength;
GetResponseHeader(nLength);
return true;
}
long Receive(char* pBuffer,long nMaxLength)
{
if(!m_bConnected)return -1;
long nLength;
nLength=recv(m_s,pBuffer,nMaxLength,0);
if(nLength <= 0)
{
printf("receive error! %d\n",nLength);
CloseSocket();
}
return nLength;
}
int CloseSocket()
{
if(m_s != -1)
{
if(close(m_s)==-1)
{
printf("closesocket error!\n");
return false;
}
}
m_s = -1;
m_bConnected=0;
return true;
}
const int GetRequestHeader(char *pHeader, int nMaxLength)
{
int nLength;
if((strlen(m_requestheader))>nMaxLength)
{
nLength=nMaxLength;
}
else
{
nLength=strlen(m_requestheader);
}
memcpy(pHeader,m_requestheader,nLength);
return nLength;
}
const char *GetResponseHeader(int nLength)
{
if(!m_bResponsed)
{
char c = 0;
int nIndex = 0;
int bEndResponse = 0;
while(!bEndResponse && nIndex < MAXHEADERSIZE)
{
recv(m_s,&c,1,0);
m_ResponseHeader[nIndex++] = c;
if(nIndex >= 4)
{
if(m_ResponseHeader[nIndex - 4] == '\r' && m_ResponseHeader[nIndex - 3] == '\n'
&& m_ResponseHeader[nIndex - 2] == '\r' && m_ResponseHeader[nIndex - 1] == '\n')
bEndResponse = 1;
}
}
m_nResponseHeaderSize = nIndex;
m_bResponsed = 1;
}
printf("response length =%d\n",m_nResponseHeaderSize);
printf("response content =%s\n",m_ResponseHeader);
nLength = m_nResponseHeaderSize;
return m_ResponseHeader;
}
int GetResponseLine(char *pLine, int nMaxLength)
{
if(m_nCurIndex >= m_nResponseHeaderSize)
{
m_nCurIndex = 0;
return -1;
}
int nIndex = 0;
char c = 0;
do
{
c = m_ResponseHeader[m_nCurIndex++];
pLine[nIndex++] = c;
} while(c != '\n' && m_nCurIndex < m_nResponseHeaderSize && nIndex < nMaxLength);
return nIndex;
}
int GetField(const char *szSession, char *szValue)
{
if(!m_bResponsed) return -1;
printf("GetField\n");
char strRespons[1024];
strcpy(strRespons, m_ResponseHeader);
char *p = NULL;
p = strtok(strRespons, "\r\n");
while(p)
{
if (0 == strncmp(p, szSession, 14))
{
strcpy(szValue,p+16);
printf("Content-Length: %s\n",szValue);
break;
}
else
{
printf("no Content-Length\n");
p = strtok(NULL, "\r\n");
}
}
return 0;
}
C语言实现下载网页的方法【原创】相关推荐
- python下载网页图片方法总结(含打开网页提示下载)
最近写爬虫,有需求把图片拿下来,现将所有尝试过的方法总结如下 1.使用urlopen方式 import urllib url = "http://difang.kaiwind.com/zhe ...
- Ubuntu中文语言包下载失败解决方法
Ubuntu的默认软件下载源是美国 更改软件下载源 在菜单主界面中找到如下图标并打开 按照下方图片进行配置,点击Download from:下拉菜单,选择Other(其他站点) 找到China在Chi ...
- 下载网页视频音频方法(djyeye为例)
方法一: 三步操作: 选择media即可. m4a即为音频实际地址. 方法二: 方法三: 遨游浏览器 感谢知乎 https://www.zhihu.com/question/26938393 转载于: ...
- python批量下载网页的方法
import urllib.request import ssl ssl._create_default_https_context = ssl._create_unverified_context ...
- html语言的网页制作方法与技巧探讨,HTML语言的网页制作方法与技巧探讨
HTML语言的网页制作方法与技巧探讨 匡成宝 [期刊名称]<电脑迷> [年(卷),期]2017(000)003 [摘要]现代社会网络应用已经是极为常见的,甚至是不可缺少的,无论是工作还是生 ...
- 怎么在python下载网站内容-Python下载网页的几种方法
get和post方式总结 get方式:以URL字串本身传递数据参数,在服务器端可以从'QUERY_STRING'这个变量中直接读取,效率较高,但缺乏安全性,也无法来处理复杂的数据(只能是字符串,比如在 ...
- python获取网页图片_Python获取网页上图片下载地址的方法
本文实例讲述了Python获取网页上图片下载地址的方法.分享给大家供大家参考.具体如下: 这里获取网页上图片的下载地址是正在写的数据采集中的一段,代码如下: #!/user/bin/python3 i ...
- 百度网盘百度云不限速下载几种方法介绍汇总(借助网页或者软件等)
百度网盘百度云不限速下载几种方法介绍汇总(借助网页或者软件等) 网页版 地址1: https://www.baiduwp.com 地址2: https://baiduwangpan.com Pando ...
- 下载网页所有图片的最简单的方法
下载网页所有图片的最简单的方法 作者:chszs,转载需注明.博客主页:http://blog.csdn.net/chszs Firefox浏览器提供了一种下载网页上所有图片的方法,操作非常简单.只需 ...
最新文章
- 多线程利器-队列(queue)
- Python文件的多种读写方式及游标
- Codevs 3134 Circle
- 【ARM】数据操作指令(下)
- EditPlus3 添加 PHP代码格式化
- java valueof_Java Short类valueOf()方法及示例
- 支持ie8的时分秒的html,兼容ie8的漂亮jQuery计时器插件
- 【AI视野·今日NLP 自然语言处理论文速览 第二十期】Thu, 8 Jul 2021
- C语言#define预处理
- RubyonRails on linux配置
- 星星之火-49:5G 移动边缘计算MEC快览
- 樽海鞘算法(Salp Swarm Algorithm: SSA)
- 微信扫码支付开发流程
- Windows安装Visio流程图软件
- 无人机倾斜摄影技术在不动产项目中的实际运用
- win7与internet时间同步出错_时间同步出错,教您时间同步出错
- 隐私保护之密码管理工具 KeePassXC
- 一文读懂 delete和delete[ ]
- 大学四年自学走来,这些私藏的实用工具/学习网站我贡献出来了
- 玩转python(一)——微信远程控制电脑