用户创建socket

调用内核__sock_create

int __sock_create(struct net *net, int family, int type, int protocol,struct socket **res, int kern)
{int err;struct socket *sock;const struct net_proto_family *pf;/**      Check protocol is in range*/if (family < 0 || family >= NPROTO)return -EAFNOSUPPORT;if (type < 0 || type >= SOCK_MAX)return -EINVAL;/* Compatibility.This uglymoron is moved from INET layer to here to avoiddeadlock in module load.*/if (family == PF_INET && type == SOCK_PACKET) {pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",current->comm);family = PF_PACKET;}err = security_socket_create(family, type, protocol, kern);if (err)return err;/** Allocate the socket and allow the family to set things up. if*  the protocol is 0, the family is instructed to select an appropriate*   default.*/sock = sock_alloc();if (!sock) {net_warn_ratelimited("socket: no more sockets\n");return -ENFILE;  /* Not exactly a match, but its theclosest posix thing */}sock->type = type;#ifdef CONFIG_MODULES/* Attempt to load a protocol module if the find failed.** 12/09/1996 Marcin: But! this makes REALLY only sense, if the user* requested real, full-featured networking support upon configuration.* Otherwise module support will break!*/if (rcu_access_pointer(net_families[family]) == NULL)request_module("net-pf-%d", family);
#endifrcu_read_lock();pf = rcu_dereference(net_families[family]);err = -EAFNOSUPPORT;if (!pf)goto out_release;/** We will call the ->create function, that possibly is in a loadable* module, so we have to bump that loadable module refcnt first.*/if (!try_module_get(pf->owner))goto out_release;/* Now protected by module ref count */rcu_read_unlock();err = pf->create(net, sock, protocol, kern);if (err < 0)goto out_module_put;/** Now to bump the refcnt of the [loadable] module that owns this* socket at sock_release time we decrement its refcnt.*/if (!try_module_get(sock->ops->owner))goto out_module_busy;/** Now that we're done with the ->create function, the [loadable]* module can have its refcnt decremented*/module_put(pf->owner);err = security_socket_post_create(sock, family, type, protocol, kern);if (err)goto out_sock_release;*res = sock;return 0;out_module_busy:err = -EAFNOSUPPORT;
out_module_put:sock->ops = NULL;module_put(pf->owner);
out_sock_release:sock_release(sock);return err;out_release:rcu_read_unlock();goto out_sock_release;
}
EXPORT_SYMBOL(__sock_create);/***   sock_create - creates a socket* @family: protocol family (AF_INET, ...)*   @type: communication type (SOCK_STREAM, ...)*  @protocol: protocol (0, ...)*  @res: new socket** A wrapper around __sock_create().*  Returns 0 or an error. This function internally uses GFP_KERNEL.*/int sock_create(int family, int type, int protocol, struct socket **res)
{return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
EXPORT_SYMBOL(sock_create);

利用sock_ioctl控制socket

static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{struct socket *sock;struct sock *sk;void __user *argp = (void __user *)arg;int pid, err;struct net *net;sock = file->private_data;sk = sock->sk;net = sock_net(sk);if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {struct ifreq ifr;bool need_copyout;if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))return -EFAULT;err = dev_ioctl(net, cmd, &ifr, &need_copyout);if (!err && need_copyout)if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))return -EFAULT;} else
#ifdef CONFIG_WEXT_COREif (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {err = wext_handle_ioctl(net, cmd, argp);} else
#endifswitch (cmd) {case FIOSETOWN:case SIOCSPGRP:err = -EFAULT;if (get_user(pid, (int __user *)argp))break;err = f_setown(sock->file, pid, 1);break;case FIOGETOWN:case SIOCGPGRP:err = put_user(f_getown(sock->file),(int __user *)argp);break;case SIOCGIFBR:case SIOCSIFBR:case SIOCBRADDBR:case SIOCBRDELBR:err = -ENOPKG;if (!br_ioctl_hook)request_module("bridge");mutex_lock(&br_ioctl_mutex);if (br_ioctl_hook)err = br_ioctl_hook(net, cmd, argp);mutex_unlock(&br_ioctl_mutex);break;case SIOCGIFVLAN:case SIOCSIFVLAN:err = -ENOPKG;if (!vlan_ioctl_hook)request_module("8021q");mutex_lock(&vlan_ioctl_mutex);if (vlan_ioctl_hook)err = vlan_ioctl_hook(net, argp);mutex_unlock(&vlan_ioctl_mutex);break;case SIOCADDDLCI:case SIOCDELDLCI:err = -ENOPKG;if (!dlci_ioctl_hook)request_module("dlci");mutex_lock(&dlci_ioctl_mutex);if (dlci_ioctl_hook)err = dlci_ioctl_hook(cmd, argp);mutex_unlock(&dlci_ioctl_mutex);break;case SIOCGSKNS:err = -EPERM;if (!ns_capable(net->user_ns, CAP_NET_ADMIN))break;err = open_related_ns(&net->ns, get_net_ns);break;case SIOCGSTAMP_OLD:case SIOCGSTAMPNS_OLD:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_OLD,!IS_ENABLED(CONFIG_64BIT));break;case SIOCGSTAMP_NEW:case SIOCGSTAMPNS_NEW:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_NEW,false);break;default:err = sock_do_ioctl(net, sock, cmd, arg);break;}return err;
}

虚拟层inet协议族的创建inet_create

static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{struct socket *sock;struct sock *sk;void __user *argp = (void __user *)arg;int pid, err;struct net *net;sock = file->private_data;sk = sock->sk;net = sock_net(sk);if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {struct ifreq ifr;bool need_copyout;if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))return -EFAULT;err = dev_ioctl(net, cmd, &ifr, &need_copyout);if (!err && need_copyout)if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))return -EFAULT;} else
#ifdef CONFIG_WEXT_COREif (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {err = wext_handle_ioctl(net, cmd, argp);} else
#endifswitch (cmd) {case FIOSETOWN:case SIOCSPGRP:err = -EFAULT;if (get_user(pid, (int __user *)argp))break;err = f_setown(sock->file, pid, 1);break;case FIOGETOWN:case SIOCGPGRP:err = put_user(f_getown(sock->file),(int __user *)argp);break;case SIOCGIFBR:case SIOCSIFBR:case SIOCBRADDBR:case SIOCBRDELBR:err = -ENOPKG;if (!br_ioctl_hook)request_module("bridge");mutex_lock(&br_ioctl_mutex);if (br_ioctl_hook)err = br_ioctl_hook(net, cmd, argp);mutex_unlock(&br_ioctl_mutex);break;case SIOCGIFVLAN:case SIOCSIFVLAN:err = -ENOPKG;if (!vlan_ioctl_hook)request_module("8021q");mutex_lock(&vlan_ioctl_mutex);if (vlan_ioctl_hook)err = vlan_ioctl_hook(net, argp);mutex_unlock(&vlan_ioctl_mutex);break;case SIOCADDDLCI:case SIOCDELDLCI:err = -ENOPKG;if (!dlci_ioctl_hook)request_module("dlci");mutex_lock(&dlci_ioctl_mutex);if (dlci_ioctl_hook)err = dlci_ioctl_hook(cmd, argp);mutex_unlock(&dlci_ioctl_mutex);break;case SIOCGSKNS:err = -EPERM;if (!ns_capable(net->user_ns, CAP_NET_ADMIN))break;err = open_related_ns(&net->ns, get_net_ns);break;case SIOCGSTAMP_OLD:case SIOCGSTAMPNS_OLD:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_OLD,!IS_ENABLED(CONFIG_64BIT));break;case SIOCGSTAMP_NEW:case SIOCGSTAMPNS_NEW:if (!sock->ops->gettstamp) {err = -ENOIOCTLCMD;break;}err = sock->ops->gettstamp(sock, argp,cmd == SIOCGSTAMP_NEW,false);break;default:err = sock_do_ioctl(net, sock, cmd, arg);break;}return err;
}

其中协议族的结构proto_ops结构如下

struct proto_ops {int        family;struct module    *owner;int      (*release)   (struct socket *sock);int      (*bind)      (struct socket *sock,struct sockaddr *myaddr,int sockaddr_len);int     (*connect)   (struct socket *sock,struct sockaddr *vaddr,int sockaddr_len, int flags);int       (*socketpair)(struct socket *sock1,struct socket *sock2);int        (*accept)    (struct socket *sock,struct socket *newsock, int flags, bool kern);int     (*getname)   (struct socket *sock,struct sockaddr *addr,int peer);__poll_t  (*poll)      (struct file *file, struct socket *sock,struct poll_table_struct *wait);int        (*ioctl)     (struct socket *sock, unsigned int cmd,unsigned long arg);
#ifdef CONFIG_COMPATint     (*compat_ioctl) (struct socket *sock, unsigned int cmd,unsigned long arg);
#endifint       (*gettstamp) (struct socket *sock, void __user *userstamp,bool timeval, bool time32);int        (*listen)    (struct socket *sock, int len);int     (*shutdown)  (struct socket *sock, int flags);int       (*setsockopt)(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen);int     (*getsockopt)(struct socket *sock, int level,int optname, char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPATint     (*compat_setsockopt)(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen);int      (*compat_getsockopt)(struct socket *sock, int level,int optname, char __user *optval, int __user *optlen);
#endifvoid      (*show_fdinfo)(struct seq_file *m, struct socket *sock);int     (*sendmsg)   (struct socket *sock, struct msghdr *m,size_t total_len);/* Notes for implementing recvmsg:* ===============================* msg->msg_namelen should get updated by the recvmsg handlers* iff msg_name != NULL. It is by default 0 to prevent* returning uninitialized memory to user space.  The recvfrom* handlers can assume that msg.msg_name is either NULL or has* a minimum size of sizeof(struct sockaddr_storage).*/int       (*recvmsg)   (struct socket *sock, struct msghdr *m,size_t total_len, int flags);int        (*mmap)      (struct file *file, struct socket *sock,struct vm_area_struct * vma);ssize_t       (*sendpage)  (struct socket *sock, struct page *page,int offset, size_t size, int flags);ssize_t    (*splice_read)(struct socket *sock,  loff_t *ppos,struct pipe_inode_info *pipe, size_t len, unsigned int flags);int     (*set_peek_off)(struct sock *sk, int val);int       (*peek_len)(struct socket *sock);/* The following functions are called internally by kernel with* sock lock already held.*/int      (*read_sock)(struct sock *sk, read_descriptor_t *desc,sk_read_actor_t recv_actor);int       (*sendpage_locked)(struct sock *sk, struct page *page,int offset, size_t size, int flags);int       (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,size_t size);int      (*set_rcvlowat)(struct sock *sk, int val);
};

其包含的functions和struct proto很类似,但是其在socket层

上图来源:https://blog.csdn.net/feiwatson/article/details/82785697

接下来分为字节流(TCP)和数据报(UDP),二者结构分别为:

const struct proto_ops inet_stream_ops = {.family          = PF_INET,.owner           = THIS_MODULE,.release     = inet_release,.bind           = inet_bind,.connect       = inet_stream_connect,.socketpair      = sock_no_socketpair,.accept           = inet_accept,.getname     = inet_getname,.poll           = tcp_poll,.ioctl          = inet_ioctl,.gettstamp    = sock_gettstamp,.listen           = inet_listen,.shutdown    = inet_shutdown,.setsockopt    = sock_common_setsockopt,.getsockopt       = sock_common_getsockopt,.sendmsg      = inet_sendmsg,.recvmsg    = inet_recvmsg,
#ifdef CONFIG_MMU.mmap         = tcp_mmap,
#endif.sendpage    = inet_sendpage,.splice_read       = tcp_splice_read,.read_sock       = tcp_read_sock,.sendmsg_locked    = tcp_sendmsg_locked,.sendpage_locked   = tcp_sendpage_locked,.peek_len       = tcp_peek_len,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_sock_common_setsockopt,.compat_getsockopt = compat_sock_common_getsockopt,.compat_ioctl      = inet_compat_ioctl,
#endif.set_rcvlowat    = tcp_set_rcvlowat,
};
EXPORT_SYMBOL(inet_stream_ops);const struct proto_ops inet_dgram_ops = {.family           = PF_INET,.owner           = THIS_MODULE,.release     = inet_release,.bind           = inet_bind,.connect       = inet_dgram_connect,.socketpair       = sock_no_socketpair,.accept           = sock_no_accept,.getname      = inet_getname,.poll           = udp_poll,.ioctl          = inet_ioctl,.gettstamp    = sock_gettstamp,.listen           = sock_no_listen,.shutdown     = inet_shutdown,.setsockopt    = sock_common_setsockopt,.getsockopt       = sock_common_getsockopt,.sendmsg      = inet_sendmsg,.recvmsg    = inet_recvmsg,.mmap           = sock_no_mmap,.sendpage       = inet_sendpage,.set_peek_off      = sk_set_peek_off,
#ifdef CONFIG_COMPAT.compat_setsockopt = compat_sock_common_setsockopt,.compat_getsockopt = compat_sock_common_getsockopt,.compat_ioctl      = inet_compat_ioctl,
#endif
};
EXPORT_SYMBOL(inet_dgram_ops);

以数据报(UDP)为例,结构udp_prot结构如下:

struct proto udp_prot = {.name          = "UDP",.owner           = THIS_MODULE,.close           = udp_lib_close,.pre_connect       = udp_pre_connect,.connect     = ip4_datagram_connect,.disconnect     = udp_disconnect,.ioctl            = udp_ioctl,.init          = udp_init_sock,.destroy       = udp_destroy_sock,.setsockopt     = udp_setsockopt,.getsockopt       = udp_getsockopt,.sendmsg      = udp_sendmsg,.recvmsg     = udp_recvmsg,.sendpage        = udp_sendpage,.release_cb     = ip4_datagram_release_cb,.hash            = udp_lib_hash,.unhash         = udp_lib_unhash,.rehash           = udp_v4_rehash,.get_port      = udp_v4_get_port,.memory_allocated    = &udp_memory_allocated,.sysctl_mem        = sysctl_udp_mem,.sysctl_wmem_offset   = offsetof(struct net, ipv4.sysctl_udp_wmem_min),.sysctl_rmem_offset   = offsetof(struct net, ipv4.sysctl_udp_rmem_min),.obj_size     = sizeof(struct udp_sock),.h.udp_table     = &udp_table,
#ifdef CONFIG_COMPAT.compat_setsockopt  = compat_udp_setsockopt,.compat_getsockopt = compat_udp_getsockopt,
#endif.diag_destroy     = udp_abort,
};
EXPORT_SYMBOL(udp_prot);

tcp_prot结构如下:

struct proto tcp_prot = {.name          = "TCP",.owner           = THIS_MODULE,.close           = tcp_close,.pre_connect       = tcp_v4_pre_connect,.connect      = tcp_v4_connect,.disconnect       = tcp_disconnect,.accept           = inet_csk_accept,.ioctl           = tcp_ioctl,.init          = tcp_v4_init_sock,.destroy        = tcp_v4_destroy_sock,.shutdown        = tcp_shutdown,.setsockopt     = tcp_setsockopt,.getsockopt       = tcp_getsockopt,.keepalive        = tcp_set_keepalive,.recvmsg       = tcp_recvmsg,.sendmsg     = tcp_sendmsg,.sendpage        = tcp_sendpage,.backlog_rcv        = tcp_v4_do_rcv,.release_cb        = tcp_release_cb,.hash         = inet_hash,.unhash            = inet_unhash,.get_port        = inet_csk_get_port,.enter_memory_pressure = tcp_enter_memory_pressure,.leave_memory_pressure = tcp_leave_memory_pressure,.stream_memory_free    = tcp_stream_memory_free,.sockets_allocated    = &tcp_sockets_allocated,.orphan_count     = &tcp_orphan_count,.memory_allocated  = &tcp_memory_allocated,.memory_pressure   = &tcp_memory_pressure,.sysctl_mem     = sysctl_tcp_mem,.sysctl_wmem_offset   = offsetof(struct net, ipv4.sysctl_tcp_wmem),.sysctl_rmem_offset   = offsetof(struct net, ipv4.sysctl_tcp_rmem),.max_header       = MAX_TCP_HEADER,.obj_size     = sizeof(struct tcp_sock),.slab_flags      = SLAB_TYPESAFE_BY_RCU,.twsk_prot      = &tcp_timewait_sock_ops,.rsk_prot     = &tcp_request_sock_ops,.h.hashinfo        = &tcp_hashinfo,.no_autobind       = true,
#ifdef CONFIG_COMPAT.compat_setsockopt  = compat_tcp_setsockopt,.compat_getsockopt = compat_tcp_getsockopt,
#endif.diag_destroy     = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);

其具体对应的钩子函数(回调函数)结构如下图所示:

经过一系列关系,最终进入网络接口层,改成负责完成网络驱动的功能,然后就进入额具体的网络设备。

Linux内核协议栈-一个socket的调用过程,从用户态接口到底层硬件相关推荐

  1. 实验六:分析Linux内核创建一个新进程的过程

    20135108 李泽源 阅读理解task_struct数据结构http://codelab.shiyanlou.com/xref/linux-3.18.6/include/linux/sched.h ...

  2. 6、分析Linux内核创建一个新进程的过程

    姓名:周毅原创作品转载请注明出处 <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 这篇文章主要分析lin ...

  3. linux内核创建用户,分析Linux内核创建一个新进程的过程

    谢文杰 + 原创作品转载请注明出处 + <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 一.实验目的 阅 ...

  4. linux搭建一个的过程,Linux内核创建一个新进程的过程

    此文仅用于MOOCLinux内核分析作业 task_struct数据结构 根据wiki的定义,进程是计算机中已运行程序的实体.在面向线程设计的系统(Linux 2.6及更新的版本)中,进程本身不是基本 ...

  5. Linux内核创建一个新进程的过程

    作者:王鹤楼 原创作品转载请注明出处 <Linux内核分析>MOOC课程 http://mooc.study.163.com/course/USTC-1000029000 操作系统的三大功 ...

  6. Linux内核协议栈- 创建socket:__sock_create函数调用关系

    Table of Contents __sock_create函数 结构 socket_state struct socket struct sock struct proto_ops 函数原型 __ ...

  7. Linux内核开发:创建proc文件并与用户空间接口

    目录 Proc文件系统 创建一个新的Proc文件 实现读取处理程序 与用户空间交换数据 实现写处理程序 用户空间应用 在第一篇文章中,我们构建了一个具有初始化和退出功能的简单内核模块 ,并介绍了内核编 ...

  8. linux内核协议栈 UDP之数据报接收过程

    UDP报文接收概述 UDP数据报的接收要分两部分来看: 网络层接收完数据包后递交给UDP后,UDP的处理过程.该过程UDP需要做的工作就是接收数据包并对其进行校验,校验成功后将其放入接收队列 sk_r ...

  9. Linux网络协议栈:网络包接收过程

    目录 一 Linux网络收包总览 二 Linux启动 2.1 创建ksoftirqd内核线程 2.2 网络子系统初始化 2.3 协议栈注册 2.4 网卡驱动初始化 2.5 启动网卡 三 迎接数据的到来 ...

最新文章

  1. Mysql 中创建数据库并插入数据
  2. Codeforces Round #131 (Div. 2) B. Hometask dp
  3. 如何搞懂容器的核心技术点?
  4. Application_Error
  5. 让华泰浮盈10亿美金的AssetMark-统包资产管理平台价值几何?
  6. linux chmod 777 r,chmod -R 777 的3种补救办法,附有linux chmod命令语法和结构详解
  7. 网络爬虫(网络蜘蛛)之网页抓取
  8. 电子邮件收发原理和JavaMail开发
  9. android 微信6.1版本,微信6.1老版本-微信旧版6.1安卓版下载-Appfound
  10. 《微信小程序-进阶篇》组件封装-Icon组件的实现(二)
  11. Mac系统文件在Win解压乱码问题
  12. [小工具] LenovoOneLite 多屏协同工具
  13. Design Patterns of SourceMaking
  14. 2017-2018-2 20179216 《网络攻防与实践》 第八周总结
  15. CSMA/CD与CSMA/CA的区别
  16. 发布工程到私有仓库maven
  17. 美团即时零售的优势不止“快”
  18. SheetJS生成/解析Excel
  19. Matlab--创建函数(function)
  20. xwiki开发者指南-编写一个XWiki组件

热门文章

  1. 【★】EIGRP终极解析!
  2. JavaScript全局变量的本质及页面共享问题
  3. java dev guide
  4. maven 打包数据库加密_SpringBoot项目application.yml文件数据库配置密码加密的方法...
  5. 计算机考研哪个专业代码少,考研专业代码到底是什么意思
  6. 编译是检查c语言,为什么CMake检查C编译器?
  7. mysql一对多增删改查_SQLAlchemy 增删改查 一对多 多对多
  8. com 组件调用不起来_Spring Cloud Alibaba,分布式服务调用(四)
  9. 社工大师_社工,与弱势者同行 | TED演讲
  10. 国潮中国风工作学习总结学习PPT模板