网络套接字如何将数据发生出去的呢?这个需要从网络协议初始化开始分析。

网络协议初始化:

所在文件net/ipv4/af_inet.c

static int __init inet_init(void)

上面接口将初始化ipv4相关协议

socket模块初始化:

所在文件net/socket.c

static int __init sock_init(void)
{int err;/**      Initialize the network sysctl infrastructure.*/err = net_sysctl_init();if (err)goto out;/**      Initialize skbuff SLAB cache*/skb_init();/**      Initialize the protocols module.*/init_inodecache();/*注册网络socket文件系统*/err = register_filesystem(&sock_fs_type);if (err)goto out_fs;/*挂载文件系统*/sock_mnt = kern_mount(&sock_fs_type);if (IS_ERR(sock_mnt)) {err = PTR_ERR(sock_mnt);goto out_mount;}/* The real protocol initialization is performed in later initcalls.*/#ifdef CONFIG_NETFILTERerr = netfilter_init();if (err)goto out;
#endifptp_classifier_init();out:return err;out_mount:unregister_filesystem(&sock_fs_type);
out_fs:goto out;
}

上面接口将注册socket文件系统

其中主要虚拟文件系统接口:

所在文件include/linux/fs.h

#define kern_mount(type) kern_mount_data(type, NULL)

所在文件fs/namespace.c

struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
{struct vfsmount *mnt;mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);if (!IS_ERR(mnt)) {/** it is a longterm mount, don't release mnt until* we unmount before file sys is unregistered*/real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;}return mnt;
}
static struct dentry *sockfs_mount(struct file_system_type *fs_type,int flags, const char *dev_name, void *data)
{return mount_pseudo(fs_type, "socket:", &sockfs_ops,&sockfs_dentry_operations, SOCKFS_MAGIC);
}static struct vfsmount *sock_mnt __read_mostly;static struct file_system_type sock_fs_type = {.name =        "sockfs",.mount =    sockfs_mount,.kill_sb =    kill_anon_super,
};static const struct super_operations sockfs_ops = {.alloc_inode  = sock_alloc_inode,.destroy_inode  = sock_destroy_inode,.statfs       = simple_statfs,
};

1、net/socket.c

创建套接字结构体:

struct socket {socket_state      state;kmemcheck_bitfield_begin(type);short          type;kmemcheck_bitfield_end(type);unsigned long     flags;struct socket_wq __rcu    *wq;struct file     *file;struct sock       *sk;const struct proto_ops  *ops;
};
int sock_create(int family, int type, int protocol, struct socket **res)int __sock_create(struct net *net, int family, int type, int protocol,struct socket **res, int kern)
{int err;struct socket *sock;const struct net_proto_family *pf;/**      Check protocol is in range*/if (family < 0 || family >= NPROTO)return -EAFNOSUPPORT;if (type < 0 || type >= SOCK_MAX)return -EINVAL;/* Compatibility.This uglymoron is moved from INET layer to here to avoiddeadlock in module load.*/if (family == PF_INET && type == SOCK_PACKET) {static int warned;if (!warned) {warned = 1;pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",current->comm);}family = PF_PACKET;}err = security_socket_create(family, type, protocol, kern);if (err)return err;/** Allocate the socket and allow the family to set things up. if*  the protocol is 0, the family is instructed to select an appropriate*   default.*//*调用socket文件系统接口申请inode文件节点和socket结构体*/sock = sock_alloc();if (!sock) {net_warn_ratelimited("socket: no more sockets\n");return -ENFILE;   /* Not exactly a match, but its theclosest posix thing */}sock->type = type;#ifdef CONFIG_MODULES/* Attempt to load a protocol module if the find failed.** 12/09/1996 Marcin: But! this makes REALLY only sense, if the user* requested real, full-featured networking support upon configuration.* Otherwise module support will break!*/if (rcu_access_pointer(net_families[family]) == NULL)request_module("net-pf-%d", family);
#endifrcu_read_lock();/*获取网络协议族,网络协议族在static int __init inet_init(void)中注册*/pf = rcu_dereference(net_families[family]);  err = -EAFNOSUPPORT;if (!pf)goto out_release;/** We will call the ->create function, that possibly is in a loadable* module, so we have to bump that loadable module refcnt first.*/if (!try_module_get(pf->owner))goto out_release;/* Now protected by module ref count */rcu_read_unlock();/*调用网络协议接口创建socket对象下的sock对象并对其初始化*/err = pf->create(net, sock, protocol, kern);if (err < 0)goto out_module_put;/** Now to bump the refcnt of the [loadable] module that owns this* socket at sock_release time we decrement its refcnt.*/if (!try_module_get(sock->ops->owner))goto out_module_busy;/** Now that we're done with the ->create function, the [loadable]* module can have its refcnt decremented*/module_put(pf->owner);err = security_socket_post_create(sock, family, type, protocol, kern);if (err)goto out_sock_release;*res = sock;return 0;out_module_busy:err = -EAFNOSUPPORT;
out_module_put:sock->ops = NULL;module_put(pf->owner);
out_sock_release:sock_release(sock);return err;out_release:rcu_read_unlock();goto out_sock_release;
}

static struct socket *sock_alloc(void)

该接口通过socket虚拟文件系统对象static struct vfsmount *sock_mnt创建socket文件节点,socket文件节点包含socket和inode两个对象,其中inode用于文件系统相关操作,socket为网络相关操作对象。虚拟文件系统对象创建流程:

sock_alloc(void)接口只是创建socket结构而已,并未创建socket->sk,sk(struct sock结构体)才是真正用于网络操作的对象。struct sock结构体通过协议族创建,网络协议族在net/ipv4/af_inet.c 文件中的static int __init inet_init(void)中注册。

static int __init inet_init(void)
{struct inet_protosw *q;struct list_head *r;int rc = -EINVAL;sock_skb_cb_check_size(sizeof(struct inet_skb_parm));rc = proto_register(&tcp_prot, 1);if (rc)goto out;rc = proto_register(&udp_prot, 1);if (rc)goto out_unregister_tcp_proto;rc = proto_register(&raw_prot, 1);if (rc)goto out_unregister_udp_proto;rc = proto_register(&ping_prot, 1);if (rc)goto out_unregister_raw_proto;/**  Tell SOCKET that we are alive...*/(void)sock_register(&inet_family_ops);#ifdef CONFIG_SYSCTLip_static_sysctl_init();
#endif/**   Add all the base protocols.*/if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)pr_crit("%s: Cannot add ICMP protocol\n", __func__);if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)pr_crit("%s: Cannot add UDP protocol\n", __func__);if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)pr_crit("%s: Cannot add TCP protocol\n", __func__);
#ifdef CONFIG_IP_MULTICASTif (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)pr_crit("%s: Cannot add IGMP protocol\n", __func__);
#endif/* Register the socket-side information for inet_create. */for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)INIT_LIST_HEAD(r);for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)inet_register_protosw(q);/** Set the ARP module up*/arp_init();/**   Set the IP module up*/ip_init();tcp_v4_init();/* Setup TCP slab cache for open requests. */tcp_init();/* Setup UDP memory threshold */udp_init();/* Add UDP-Lite (RFC 3828) */udplite4_register();ping_init();/**   Set the ICMP layer up*/if (icmp_init() < 0)panic("Failed to create the ICMP control socket.\n");/**    Initialise the multicast router*/
#if defined(CONFIG_IP_MROUTE)if (ip_mr_init())pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
#endifif (init_inet_pernet_ops())pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);/**  Initialise per-cpu ipv4 mibs*/if (init_ipv4_mibs())pr_crit("%s: Cannot init ipv4 mibs\n", __func__);ipv4_proc_init();ipfrag_init();dev_add_pack(&ip_packet_type);ip_tunnel_core_init();rc = 0;
out:return rc;
out_unregister_raw_proto:proto_unregister(&raw_prot);
out_unregister_udp_proto:proto_unregister(&udp_prot);
out_unregister_tcp_proto:proto_unregister(&tcp_prot);goto out;
}

网络协议初始化流程:

用户空间调用write系统调用接口向socket写数据,write 系统调用定义如下:

所在文件fs/read_write.c

SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,size_t, count)
{struct fd f = fdget_pos(fd);ssize_t ret = -EBADF;if (f.file) {loff_t pos = file_pos_read(f.file);ret = vfs_write(f.file, buf, count, &pos);if (ret >= 0)file_pos_write(f.file, pos);fdput_pos(f);}return ret;
}
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,loff_t *pos)
static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };struct kiocb kiocb;struct iov_iter iter;ssize_t ret;init_sync_kiocb(&kiocb, filp);kiocb.ki_pos = *ppos;iov_iter_init(&iter, WRITE, &iov, 1, len);ret = filp->f_op->write_iter(&kiocb, &iter);BUG_ON(ret == -EIOCBQUEUED);if (ret > 0)*ppos = kiocb.ki_pos;return ret;
}
filp->f_op->write_iter(&kiocb, &iter);

所在文件net/socket.c

static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
{struct file *file = iocb->ki_filp;struct socket *sock = file->private_data;struct msghdr msg = {.msg_iter = *from,.msg_iocb = iocb};ssize_t res;if (iocb->ki_pos != 0)return -ESPIPE;if (file->f_flags & O_NONBLOCK)msg.msg_flags = MSG_DONTWAIT;if (sock->type == SOCK_SEQPACKET)msg.msg_flags |= MSG_EOR;res = sock_sendmsg(sock, &msg);*from = msg.msg_iter;return res;
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{int err = security_socket_sendmsg(sock, msg,msg_data_left(msg));return err ?: sock_sendmsg_nosec(sock, msg);
}static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));BUG_ON(ret == -EIOCBQUEUED);return ret;
}

数据包发送调用流程(linux内核版本4.4):

1、tcp_sendmsg(tcp_ipv4.c)将应用层数据创建成数据skb包

2、tcp_push_one(tcp_output.c)、tcp_push(tcp_output.c)、__tcp_push_pending_frames(tcp_output.c)

3、tcp_write_xmit(tcp_output.c)

4、tcp_transmit_skb(tcp_output.c)

该接口主要功能:

添加tcp头

通过icsk->icsk_af_ops->queue_xmit调用ip层发送接口

在tcp_ipv4.c中定义如下:

const struct inet_connection_sock_af_ops ipv4_specific = {.queue_xmit      = ip_queue_xmit,.send_check    = tcp_v4_send_check,.rebuild_header    = inet_sk_rebuild_header,.sk_rx_dst_set    = inet_sk_rx_dst_set,.conn_request     = tcp_v4_conn_request,.syn_recv_sock       = tcp_v4_syn_recv_sock,.net_header_len     = sizeof(struct iphdr),.setsockopt     = ip_setsockopt,.getsockopt    = ip_getsockopt,.addr2sockaddr     = inet_csk_addr2sockaddr,.sockaddr_len     = sizeof(struct sockaddr_in),.bind_conflict    = inet_csk_bind_conflict,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_ip_setsockopt,.compat_getsockopt = compat_ip_getsockopt,
#endif.mtu_reduced     = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);

以上为传输层(TCP)

5、ip_queue_xmit(ip_output.c)添加ip头

6、ip_local_out(ip_output.c)

Netfilter防火墙处理(NF_INET_LOCAL_OUT)

7、include/net/dst.h

static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)

{

return skb_dst(skb)->output(net, sk, skb);

}

8、output在route.c中定义如下:

static struct rtable *rt_dst_alloc(struct net_device *dev,unsigned int flags, u16 type,bool nopolicy, bool noxfrm, bool will_cache)
{struct rtable *rt;rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,(will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |(nopolicy ? DST_NOPOLICY : 0) |(noxfrm ? DST_NOXFRM : 0));if (rt) {rt->rt_genid = rt_genid_ipv4(dev_net(dev));rt->rt_flags = flags;rt->rt_type = type;rt->rt_is_input = 0;rt->rt_iif = 0;rt->rt_pmtu = 0;rt->rt_gateway = 0;rt->rt_uses_gateway = 0;rt->rt_table_id = 0;INIT_LIST_HEAD(&rt->rt_uncached);rt->dst.output = ip_output;if (flags & RTCF_LOCAL)rt->dst.input = ip_local_deliver;}return rt;
}

9、ip_output(ip_output.c)

Netfilter防火墙处理(NF_INET_POST_ROUTING)

10、ip_finish_output(ip_output.c)

11、ip_finish_output2(ip_output.c)

12、dst_neigh_output(dst.h) 邻居写处理

13、neigh_hh_output(net/neighbour.h)

以上为网络层(IP),包含路由(route)协议、邻居(arp)协议

14、dev_queue_xmit(net/core/dev.c)

15、__dev_queue_xmit(net/core/dev.c)

该接口分流量控制和直接发送两种处理情况

流量控制发送接口:

__dev_xmit_skb(net/core/dev.c)

主要功能:

将skb添加到流量控制Qos队列

依次调用以下接口触发软中断:

__qdisc_run (sch_generic.c)

__netif_schedule

__netif_reschedule

raise_softirq_irqoff(NET_TX_SOFTIRQ);

net_tx_action(软中断处理函数)

qdisc_run

__qdisc_run

qdisc_restart

sch_direct_xmit (sch_generic.c)最终调用链路层硬件发送。

直接通过链路层的硬件发送:

dev_hard_start_xmit(net/core/dev.c)

xmit_one (net/core/dev.c)

netdev_start_xmit (linux/netdevice.h)

__netdev_start_xmit (linux/netdevice.h)

该接口调用驱动程序发送接口将数据发送到物理层。

以e100网卡驱动(e100.c)为例ops->ndo_start_xmit定义如下:

static const struct net_device_ops e100_netdev_ops = {.ndo_open     = e100_open,.ndo_stop      = e100_close,.ndo_start_xmit       = e100_xmit_frame,.ndo_validate_addr   = eth_validate_addr,.ndo_set_rx_mode   = e100_set_multicast_list,.ndo_set_mac_address = e100_set_mac_address,.ndo_change_mtu     = e100_change_mtu,.ndo_do_ioctl        = e100_do_ioctl,.ndo_tx_timeout        = e100_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER.ndo_poll_controller   = e100_netpoll,
#endif.ndo_set_features = e100_set_features,
};

linux网络数据发送流程相关推荐

  1. Linux: 网络数据收发流程简析

    文章目录 1. 前言 2. 背景 3. 网卡数据收发流程 3.1 网络数据接收流程 3.1.1 网卡数据接收流程 3.1.2 网卡数据向上传递给L3,L4的流程 3.2 网卡数据发送流程 1. 前言 ...

  2. linux网络数据包流程

    一.介绍 对于调试linux网卡驱动或者wifi驱动性能,或者排查网络数据丢包的时候,需要对内核处理包要与基本的了解,从而排查出丢包出现在哪个环节,这里给出大致流程和常用排查方法 二.基本框架 1.硬 ...

  3. Linux内核网络数据发送(六)——网络设备驱动

    Linux内核网络数据发送(六)--网络设备驱动 1. 前言 2. 驱动回调函数注册 3. `ndo_start_xmit` 发送数据 4. `igb_tx_map` 1. 前言 本文主要介绍设备通过 ...

  4. Linux内核网络数据发送(五)——排队规则

    Linux内核网络数据发送(五)--排队规则 1. 前言 2. `qdisc_run_begin()` and `qdisc_run_end()`:仅设置 qdisc 状态位 3. `__qdisc_ ...

  5. linux内核源码分析之网络数据收发流程

    目录 一.TCP/IP 模型与 ISO模型 二.内核中分层模型的结构 三.数据帧的封装 四.协议栈收发包流程 1.网络包接收流程 2.网络包发送流程 一.TCP/IP 模型与 ISO模型 内核中使用的 ...

  6. Linux网络 - 数据包的接收过程

    Linux网络 - 数据包的接收过程 嵌入式Linux中文站 嵌入式Linux中文站 微信号 emblinux 功能介绍 嵌入式Linux中文站提供专业嵌入式Linux开发技术资讯 Table of ...

  7. Linux网络数据包的揭秘以及常见的调优方式总结

    Linux网络数据包的揭秘以及常见的调优方式总结 (网易游戏运维平台) 关注我们,获一手游戏运维方案 lott 网易游戏业务 SRE, 专注于业务运维的质量和效率 , 喜欢研究 Linux 系统原理. ...

  8. linux内核网络协议栈--发送流程及函数(十)

    本章会一步一步的分析,在linux内核中,数据是如何从网络中接收并最后到达应用程序的. 用户数据的发送流程如下图所示,不管是tfp,telnet,http都是类似的.当然我们在使用应用的时候,根本不会 ...

  9. Linux网络-数据包的接收流程(基于RTL8139网卡驱动程序)

    本文将介绍Linux系统中,基于RTL8139网卡驱动程序,是如何一步一步将接收到的数据包传送到内核的网络协议栈的. 下图展示了数据包(packet)如何进入内存,并被内核的网络模块开始处理: +-- ...

最新文章

  1. 活得太累,只因为你索求的太多
  2. telnet命令发送邮件
  3. java 逻辑表达式 布尔_使用基本逻辑门实现布尔表达式
  4. 致歉!抖音Semi Design承认参考阿里Ant Design
  5. 共建数据库软件全生态,新数科技宣布 ShinSight 开放共享!
  6. SVN记录转excel文件的小程序
  7. 封装数据库增删该通用方法
  8. 谈谈基于SQL Server 的Exception Handling[中篇]
  9. 项目按jar包方式部署
  10. 6.Shell 编程从入门到精通 --- 文件和文件系统
  11. 装ubuntu_系统安装_win10下安装Ubuntu后,启动时无win10选项的解决办法。
  12. 蓝牙版本avrcp怎么选_新款AirPods怎么选?还是不算好耳机,但是最配的iPhone蓝牙耳机...
  13. [转]win10 vs2010安装教程(超详细,附下载链接)
  14. java优先级 六级和七级_百度知道六级和七级得区别是什么?如何升到七级?
  15. VCIP2020:基于深度学习的HEVC帧内预测的非线性变换
  16. 微信小程序image图片标签(超详细)
  17. 台达PLC与紫金桥监控组态软件的连接
  18. 实战1--航空公司客户价值分析(画图篇)
  19. PS4常用DNS一览 PSN下载速度慢登录失败解决办法
  20. 好利来背后隐形富豪家族:店面千家,年入百亿

热门文章

  1. [转载]我们是如何对一个网站进行优化的
  2. 本科毕业工作五年,一点感慨(转)
  3. 微型计算机2019年4月,2019年4月有几个工作日 4月份休息几天
  4. The Sixth Week
  5. kindeditor富文本编译器
  6. c语言鱼图形怎样编,如何编写C语图形程序.pdf
  7. tp5 php 使用 PhpOffice\PhpWord 扩展生成pdf 文件
  8. matlab画频散曲线,关于lamb频散曲线的绘制问题
  9. 数据集的创建(digits)
  10. mysql建表语句规范