Linux内核协议栈-一个socket的调用过程,从用户态接口到底层硬件
用户创建socket
调用内核__sock_create
int __sock_create(struct net *net, int family, int type, int protocol,struct socket **res, int kern)
{int err;struct socket *sock;const struct net_proto_family *pf;/** Check protocol is in range*/if (family < 0 || family >= NPROTO)return -EAFNOSUPPORT;if (type < 0 || type >= SOCK_MAX)return -EINVAL;/* Compatibility.This uglymoron is moved from INET layer to here to avoiddeadlock in module load.*/if (family == PF_INET && type == SOCK_PACKET) {pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",current->comm);family = PF_PACKET;}err = security_socket_create(family, type, protocol, kern);if (err)return err;/** Allocate the socket and allow the family to set things up. if* the protocol is 0, the family is instructed to select an appropriate* default.*/sock = sock_alloc();if (!sock) {net_warn_ratelimited("socket: no more sockets\n");return -ENFILE; /* Not exactly a match, but its theclosest posix thing */}sock->type = type;#ifdef CONFIG_MODULES/* Attempt to load a protocol module if the find failed.** 12/09/1996 Marcin: But! this makes REALLY only sense, if the user* requested real, full-featured networking support upon configuration.* Otherwise module support will break!*/if (rcu_access_pointer(net_families[family]) == NULL)request_module("net-pf-%d", family);
#endifrcu_read_lock();pf = rcu_dereference(net_families[family]);err = -EAFNOSUPPORT;if (!pf)goto out_release;/** We will call the ->create function, that possibly is in a loadable* module, so we have to bump that loadable module refcnt first.*/if (!try_module_get(pf->owner))goto out_release;/* Now protected by module ref count */rcu_read_unlock();err = pf->create(net, sock, protocol, kern);if (err < 0)goto out_module_put;/** Now to bump the refcnt of the [loadable] module that owns this* socket at sock_release time we decrement its refcnt.*/if (!try_module_get(sock->ops->owner))goto out_module_busy;/** Now that we're done with the ->create function, the [loadable]* module can have its refcnt decremented*/module_put(pf->owner);err = security_socket_post_create(sock, family, type, protocol, kern);if (err)goto out_sock_release;*res = sock;return 0;out_module_busy:err = -EAFNOSUPPORT;
out_module_put:sock->ops = NULL;module_put(pf->owner);
out_sock_release:sock_release(sock);return err;out_release:rcu_read_unlock();goto out_sock_release;
}
EXPORT_SYMBOL(__sock_create);/*** sock_create - creates a socket* @family: protocol family (AF_INET, ...)* @type: communication type (SOCK_STREAM, ...)* @protocol: protocol (0, ...)* @res: new socket** A wrapper around __sock_create().* Returns 0 or an error. This function internally uses GFP_KERNEL.*/int sock_create(int family, int type, int protocol, struct socket **res)
{return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
EXPORT_SYMBOL(sock_create);
利用sock_ioctl控制socket
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{struct socket *sock;struct sock *sk;void __user *argp = (void __user *)arg;int pid, err;struct net *net;sock = file->private_data;sk = sock->sk;net = sock_net(sk);if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {struct ifreq ifr;bool need_copyout;if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))return -EFAULT;err = dev_ioctl(net, cmd, &ifr, &need_copyout);if (!err && need_copyout)if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))return -EFAULT;} else
#ifdef CONFIG_WEXT_COREif (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {err = wext_handle_ioctl(net, cmd, argp);} else
#endifswitch (cmd) {case FIOSETOWN:case SIOCSPGRP:err = -EFAULT;if (get_user(pid, (int __user *)argp))break;err = f_setown(sock->file, pid, 1);break;case FIOGETOWN:case SIOCGPGRP:err = put_user(f_getown(sock->file),(int __user *)argp);break;case SIOCGIFBR:case SIOCSIFBR:case SIOCBRADDBR:case SIOCBRDELBR:err = -ENOPKG;if (!br_ioctl_hook)request_module("bridge");mutex_lock(&br_ioctl_mutex);if (br_ioctl_hook)err = br_ioctl_hook(net, cmd, argp);mutex_unlock(&br_ioctl_mutex);break;case SIOCGIFVLAN:case SIOCSIFVLAN:err = -ENOPKG;if (!vlan_ioctl_hook)request_module("8021q");mutex_lock(&vlan_ioctl_mutex);if (vlan_ioctl_hook)err = vlan_ioctl_hook(net, argp);mutex_unlock(&vlan_ioctl_mutex);break;case SIOCADDDLCI:case SIOCDELDLCI:err = -ENOPKG;if (!dlci_ioctl_hook)request_module("dlci");mutex_lock(&dlci_ioctl_mutex);if (dlci_ioctl_hook)err = dlci_ioctl_hook(cmd, argp);mutex_unlock(&dlci_ioctl_mutex);break;case SIOCGSKNS:err = -EPERM;if (!ns_capable(net->user_ns, CAP_NET_ADMIN))break;err = open_related_ns(&net->ns, get_net_ns);break;case SIOCGSTAMP_OLD:case SIOCGSTAMPNS_OLD:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_OLD,!IS_ENABLED(CONFIG_64BIT));break;case SIOCGSTAMP_NEW:case SIOCGSTAMPNS_NEW:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_NEW,false);break;default:err = sock_do_ioctl(net, sock, cmd, arg);break;}return err;
}
虚拟层inet协议族的创建inet_create
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{struct socket *sock;struct sock *sk;void __user *argp = (void __user *)arg;int pid, err;struct net *net;sock = file->private_data;sk = sock->sk;net = sock_net(sk);if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {struct ifreq ifr;bool need_copyout;if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))return -EFAULT;err = dev_ioctl(net, cmd, &ifr, &need_copyout);if (!err && need_copyout)if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))return -EFAULT;} else
#ifdef CONFIG_WEXT_COREif (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {err = wext_handle_ioctl(net, cmd, argp);} else
#endifswitch (cmd) {case FIOSETOWN:case SIOCSPGRP:err = -EFAULT;if (get_user(pid, (int __user *)argp))break;err = f_setown(sock->file, pid, 1);break;case FIOGETOWN:case SIOCGPGRP:err = put_user(f_getown(sock->file),(int __user *)argp);break;case SIOCGIFBR:case SIOCSIFBR:case SIOCBRADDBR:case SIOCBRDELBR:err = -ENOPKG;if (!br_ioctl_hook)request_module("bridge");mutex_lock(&br_ioctl_mutex);if (br_ioctl_hook)err = br_ioctl_hook(net, cmd, argp);mutex_unlock(&br_ioctl_mutex);break;case SIOCGIFVLAN:case SIOCSIFVLAN:err = -ENOPKG;if (!vlan_ioctl_hook)request_module("8021q");mutex_lock(&vlan_ioctl_mutex);if (vlan_ioctl_hook)err = vlan_ioctl_hook(net, argp);mutex_unlock(&vlan_ioctl_mutex);break;case SIOCADDDLCI:case SIOCDELDLCI:err = -ENOPKG;if (!dlci_ioctl_hook)request_module("dlci");mutex_lock(&dlci_ioctl_mutex);if (dlci_ioctl_hook)err = dlci_ioctl_hook(cmd, argp);mutex_unlock(&dlci_ioctl_mutex);break;case SIOCGSKNS:err = -EPERM;if (!ns_capable(net->user_ns, CAP_NET_ADMIN))break;err = open_related_ns(&net->ns, get_net_ns);break;case SIOCGSTAMP_OLD:case SIOCGSTAMPNS_OLD:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_OLD,!IS_ENABLED(CONFIG_64BIT));break;case SIOCGSTAMP_NEW:case SIOCGSTAMPNS_NEW:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_NEW,false);break;default:err = sock_do_ioctl(net, sock, cmd, arg);break;}return err;
}
其中协议族的结构proto_ops结构如下
struct proto_ops {int family;struct module *owner;int (*release) (struct socket *sock);int (*bind) (struct socket *sock,struct sockaddr *myaddr,int sockaddr_len);int (*connect) (struct socket *sock,struct sockaddr *vaddr,int sockaddr_len, int flags);int (*socketpair)(struct socket *sock1,struct socket *sock2);int (*accept) (struct socket *sock,struct socket *newsock, int flags, bool kern);int (*getname) (struct socket *sock,struct sockaddr *addr,int peer);__poll_t (*poll) (struct file *file, struct socket *sock,struct poll_table_struct *wait);int (*ioctl) (struct socket *sock, unsigned int cmd,unsigned long arg);
#ifdef CONFIG_COMPATint (*compat_ioctl) (struct socket *sock, unsigned int cmd,unsigned long arg);
#endifint (*gettstamp) (struct socket *sock, void __user *userstamp,bool timeval, bool time32);int (*listen) (struct socket *sock, int len);int (*shutdown) (struct socket *sock, int flags);int (*setsockopt)(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen);int (*getsockopt)(struct socket *sock, int level,int optname, char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPATint (*compat_setsockopt)(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen);int (*compat_getsockopt)(struct socket *sock, int level,int optname, char __user *optval, int __user *optlen);
#endifvoid (*show_fdinfo)(struct seq_file *m, struct socket *sock);int (*sendmsg) (struct socket *sock, struct msghdr *m,size_t total_len);/* Notes for implementing recvmsg:* ===============================* msg->msg_namelen should get updated by the recvmsg handlers* iff msg_name != NULL. It is by default 0 to prevent* returning uninitialized memory to user space. The recvfrom* handlers can assume that msg.msg_name is either NULL or has* a minimum size of sizeof(struct sockaddr_storage).*/int (*recvmsg) (struct socket *sock, struct msghdr *m,size_t total_len, int flags);int (*mmap) (struct file *file, struct socket *sock,struct vm_area_struct * vma);ssize_t (*sendpage) (struct socket *sock, struct page *page,int offset, size_t size, int flags);ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,struct pipe_inode_info *pipe, size_t len, unsigned int flags);int (*set_peek_off)(struct sock *sk, int val);int (*peek_len)(struct socket *sock);/* The following functions are called internally by kernel with* sock lock already held.*/int (*read_sock)(struct sock *sk, read_descriptor_t *desc,sk_read_actor_t recv_actor);int (*sendpage_locked)(struct sock *sk, struct page *page,int offset, size_t size, int flags);int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,size_t size);int (*set_rcvlowat)(struct sock *sk, int val);
};
其包含的functions和struct proto很类似,但是其在socket层
上图来源:https://blog.csdn.net/feiwatson/article/details/82785697
接下来分为字节流(TCP)和数据报(UDP),二者结构分别为:
const struct proto_ops inet_stream_ops = {.family = PF_INET,.owner = THIS_MODULE,.release = inet_release,.bind = inet_bind,.connect = inet_stream_connect,.socketpair = sock_no_socketpair,.accept = inet_accept,.getname = inet_getname,.poll = tcp_poll,.ioctl = inet_ioctl,.gettstamp = sock_gettstamp,.listen = inet_listen,.shutdown = inet_shutdown,.setsockopt = sock_common_setsockopt,.getsockopt = sock_common_getsockopt,.sendmsg = inet_sendmsg,.recvmsg = inet_recvmsg,
#ifdef CONFIG_MMU.mmap = tcp_mmap,
#endif.sendpage = inet_sendpage,.splice_read = tcp_splice_read,.read_sock = tcp_read_sock,.sendmsg_locked = tcp_sendmsg_locked,.sendpage_locked = tcp_sendpage_locked,.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_sock_common_setsockopt,.compat_getsockopt = compat_sock_common_getsockopt,.compat_ioctl = inet_compat_ioctl,
#endif.set_rcvlowat = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);const struct proto_ops inet_dgram_ops = {.family = PF_INET,.owner = THIS_MODULE,.release = inet_release,.bind = inet_bind,.connect = inet_dgram_connect,.socketpair = sock_no_socketpair,.accept = sock_no_accept,.getname = inet_getname,.poll = udp_poll,.ioctl = inet_ioctl,.gettstamp = sock_gettstamp,.listen = sock_no_listen,.shutdown = inet_shutdown,.setsockopt = sock_common_setsockopt,.getsockopt = sock_common_getsockopt,.sendmsg = inet_sendmsg,.recvmsg = inet_recvmsg,.mmap = sock_no_mmap,.sendpage = inet_sendpage,.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_sock_common_setsockopt,.compat_getsockopt = compat_sock_common_getsockopt,.compat_ioctl = inet_compat_ioctl,
#endif
};
EXPORT_SYMBOL(inet_dgram_ops);
以数据报(UDP)为例,结构udp_prot结构如下:
struct proto udp_prot = {.name = "UDP",.owner = THIS_MODULE,.close = udp_lib_close,.pre_connect = udp_pre_connect,.connect = ip4_datagram_connect,.disconnect = udp_disconnect,.ioctl = udp_ioctl,.init = udp_init_sock,.destroy = udp_destroy_sock,.setsockopt = udp_setsockopt,.getsockopt = udp_getsockopt,.sendmsg = udp_sendmsg,.recvmsg = udp_recvmsg,.sendpage = udp_sendpage,.release_cb = ip4_datagram_release_cb,.hash = udp_lib_hash,.unhash = udp_lib_unhash,.rehash = udp_v4_rehash,.get_port = udp_v4_get_port,.memory_allocated = &udp_memory_allocated,.sysctl_mem = sysctl_udp_mem,.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),.obj_size = sizeof(struct udp_sock),.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_udp_setsockopt,.compat_getsockopt = compat_udp_getsockopt,
#endif.diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
tcp_prot结构如下:
struct proto tcp_prot = {.name = "TCP",.owner = THIS_MODULE,.close = tcp_close,.pre_connect = tcp_v4_pre_connect,.connect = tcp_v4_connect,.disconnect = tcp_disconnect,.accept = inet_csk_accept,.ioctl = tcp_ioctl,.init = tcp_v4_init_sock,.destroy = tcp_v4_destroy_sock,.shutdown = tcp_shutdown,.setsockopt = tcp_setsockopt,.getsockopt = tcp_getsockopt,.keepalive = tcp_set_keepalive,.recvmsg = tcp_recvmsg,.sendmsg = tcp_sendmsg,.sendpage = tcp_sendpage,.backlog_rcv = tcp_v4_do_rcv,.release_cb = tcp_release_cb,.hash = inet_hash,.unhash = inet_unhash,.get_port = inet_csk_get_port,.enter_memory_pressure = tcp_enter_memory_pressure,.leave_memory_pressure = tcp_leave_memory_pressure,.stream_memory_free = tcp_stream_memory_free,.sockets_allocated = &tcp_sockets_allocated,.orphan_count = &tcp_orphan_count,.memory_allocated = &tcp_memory_allocated,.memory_pressure = &tcp_memory_pressure,.sysctl_mem = sysctl_tcp_mem,.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),.max_header = MAX_TCP_HEADER,.obj_size = sizeof(struct tcp_sock),.slab_flags = SLAB_TYPESAFE_BY_RCU,.twsk_prot = &tcp_timewait_sock_ops,.rsk_prot = &tcp_request_sock_ops,.h.hashinfo = &tcp_hashinfo,.no_autobind = true,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_tcp_setsockopt,.compat_getsockopt = compat_tcp_getsockopt,
#endif.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);
其具体对应的钩子函数(回调函数)结构如下图所示:
经过一系列关系,最终进入网络接口层,改成负责完成网络驱动的功能,然后就进入额具体的网络设备。
Linux内核协议栈-一个socket的调用过程,从用户态接口到底层硬件相关推荐
- 实验六:分析Linux内核创建一个新进程的过程
20135108 李泽源 阅读理解task_struct数据结构http://codelab.shiyanlou.com/xref/linux-3.18.6/include/linux/sched.h ...
- 6、分析Linux内核创建一个新进程的过程
姓名:周毅原创作品转载请注明出处 <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 这篇文章主要分析lin ...
- linux内核创建用户,分析Linux内核创建一个新进程的过程
谢文杰 + 原创作品转载请注明出处 + <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 一.实验目的 阅 ...
- linux搭建一个的过程,Linux内核创建一个新进程的过程
此文仅用于MOOCLinux内核分析作业 task_struct数据结构 根据wiki的定义,进程是计算机中已运行程序的实体.在面向线程设计的系统(Linux 2.6及更新的版本)中,进程本身不是基本 ...
- Linux内核创建一个新进程的过程
作者:王鹤楼 原创作品转载请注明出处 <Linux内核分析>MOOC课程 http://mooc.study.163.com/course/USTC-1000029000 操作系统的三大功 ...
- Linux内核协议栈- 创建socket:__sock_create函数调用关系
Table of Contents __sock_create函数 结构 socket_state struct socket struct sock struct proto_ops 函数原型 __ ...
- Linux内核开发:创建proc文件并与用户空间接口
目录 Proc文件系统 创建一个新的Proc文件 实现读取处理程序 与用户空间交换数据 实现写处理程序 用户空间应用 在第一篇文章中,我们构建了一个具有初始化和退出功能的简单内核模块 ,并介绍了内核编 ...
- linux内核协议栈 UDP之数据报接收过程
UDP报文接收概述 UDP数据报的接收要分两部分来看: 网络层接收完数据包后递交给UDP后,UDP的处理过程.该过程UDP需要做的工作就是接收数据包并对其进行校验,校验成功后将其放入接收队列 sk_r ...
- Linux网络协议栈:网络包接收过程
目录 一 Linux网络收包总览 二 Linux启动 2.1 创建ksoftirqd内核线程 2.2 网络子系统初始化 2.3 协议栈注册 2.4 网卡驱动初始化 2.5 启动网卡 三 迎接数据的到来 ...
最新文章
- Mysql 中创建数据库并插入数据
- Codeforces Round #131 (Div. 2) B. Hometask dp
- 如何搞懂容器的核心技术点?
- Application_Error
- 让华泰浮盈10亿美金的AssetMark-统包资产管理平台价值几何?
- linux chmod 777 r,chmod -R 777 的3种补救办法,附有linux chmod命令语法和结构详解
- 网络爬虫(网络蜘蛛)之网页抓取
- 电子邮件收发原理和JavaMail开发
- android 微信6.1版本,微信6.1老版本-微信旧版6.1安卓版下载-Appfound
- 《微信小程序-进阶篇》组件封装-Icon组件的实现(二)
- Mac系统文件在Win解压乱码问题
- [小工具] LenovoOneLite 多屏协同工具
- Design Patterns of SourceMaking
- 2017-2018-2 20179216 《网络攻防与实践》 第八周总结
- CSMA/CD与CSMA/CA的区别
- 发布工程到私有仓库maven
- 美团即时零售的优势不止“快”
- SheetJS生成/解析Excel
- Matlab--创建函数(function)
- xwiki开发者指南-编写一个XWiki组件
热门文章
- 【★】EIGRP终极解析!
- JavaScript全局变量的本质及页面共享问题
- java dev guide
- maven 打包数据库加密_SpringBoot项目application.yml文件数据库配置密码加密的方法...
- 计算机考研哪个专业代码少,考研专业代码到底是什么意思
- 编译是检查c语言,为什么CMake检查C编译器?
- mysql一对多增删改查_SQLAlchemy 增删改查 一对多 多对多
- com 组件调用不起来_Spring Cloud Alibaba,分布式服务调用(四)
- 社工大师_社工,与弱势者同行 | TED演讲
- 国潮中国风工作学习总结学习PPT模板