作者:王鹤楼
原创作品转载请注明出处 《Linux内核分析》MOOC课程
http://mooc.study.163.com/course/USTC-1000029000

操作系统的三大功能

进程管理
内存管理
文件系统

task_struct

用来描写叙述进程的数据结构,能够理解为进程的属性。

进程状态、进程调度信息、各种标识符、进程通信有关信息、时间和定时器信息、进程链接信息、文件系统信息、虚拟内存信息、页面管理信息、对称多处理器和处理器相关的环境等,该数据结构被定义为task_struct

进程控制块 PCB

是操作系统核心中一种数据结构,主要表示进程状态。

进程状态

在实验楼里进行例如以下操作,因为进行到一半环境太卡,没有充分截图
运行操作:
rm -rf menu
然后克隆一份新的代码:
git clone https://github.com/mengning/menu.git
cd menu
mv fork_test.c test.c
make rootfs
出现菜单后运行fork命令,能够看出信息打印,已经创建了子进程

qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -S

gdb
target remote:1234
file linux-3.18.6/vmlinux
b sys_clone
b do_fork
b dup_task_struct
b copy_process
b copy_thread
b ret_from_fork
c //開始运行

fork函数

调用一次返回两次,父进程中返加子进程的pid,子进程中返回0

//fork
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMUreturn do_fork(SIGCHLD, 0, 0, NULL, NULL);
#else/* can not support in nommu mode */return -EINVAL;
#endif
}
#endif//vfork
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,0, NULL, NULL);
}
#endif//clone
#ifdef __ARCH_WANT_SYS_CLONE
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,int __user *, parent_tidptr,int, tls_val,int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,int __user *, parent_tidptr,int __user *, child_tidptr,int, tls_val)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,int, stack_size,int __user *, parent_tidptr,int __user *, child_tidptr,int, tls_val)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,int __user *, parent_tidptr,int __user *, child_tidptr,int, tls_val)
#endif
{return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
}
#endif

从以上代码中能够看出fork,vfork,clone终于都是调用 do_fork来创建新进程

do_fork

long do_fork(unsigned long clone_flags,unsigned long stack_start,unsigned long stack_size,int __user *parent_tidptr,int __user *child_tidptr)
{//创建进程描写叙述符指针struct task_struct *p;//……//复制进程描写叙述符。copy_process()的返回值是一个 task_struct 指针。p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);if (!IS_ERR(p)) {struct completion vfork;struct pid *pid;trace_sched_process_fork(current, p);//得到新创建的进程描写叙述符中的pidpid = get_task_pid(p, PIDTYPE_PID);nr = pid_vnr(pid);if (clone_flags & CLONE_PARENT_SETTID)put_user(nr, parent_tidptr);//假设调用的 vfork()方法,初始化 vfork 完毕处理信息。if (clone_flags & CLONE_VFORK) {p->vfork_done = &vfork;init_completion(&vfork);get_task_struct(p);}//将子进程增加到调度器中,为其分配 CPU,准备运行wake_up_new_task(p);//fork 完毕,子进程即将開始运行if (unlikely(trace))ptrace_event_pid(trace, pid);//假设是 vfork,将父进程增加至等待队列,等待子进程完毕if (clone_flags & CLONE_VFORK) {if (!wait_for_vfork_done(p, &vfork))ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);}put_pid(pid);} else {nr = PTR_ERR(p);}return nr;
}

do_fork 流程

调用 copy_process 为子进程复制出一份进程信息
假设是 vfork 初始化完毕处理信息
调用 wake_up_new_task 将子进程增加调度器。为之分配 CPU
假设是 vfork,父进程等待子进程完毕 exec 替换自己的地址空间

进入到copy_process函数

static struct task_struct *copy_process(unsigned long clone_flags,unsigned long stack_start,unsigned long stack_size,int __user *child_tidptr,struct pid *pid,int trace)
{int retval;//创建进程描写叙述符指针struct task_struct *p;if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))return ERR_PTR(-EINVAL);if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))return ERR_PTR(-EINVAL);/** Thread groups must share signals as well, and detached threads* can only be started up within the thread group.*/if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))return ERR_PTR(-EINVAL);/** Shared signal handlers imply shared VM. By way of the above,* thread groups also imply shared VM. Blocking this case allows* for various simplifications in other code.*/if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))return ERR_PTR(-EINVAL);/** Siblings of global init remain as zombies on exit since they are* not reaped by their parent (swapper). To solve this and to avoid* multi-rooted process trees, prevent global and container-inits* from creating siblings.*/if ((clone_flags & CLONE_PARENT) &&current->signal->flags & SIGNAL_UNKILLABLE)return ERR_PTR(-EINVAL);/** If the new process will be in a different pid namespace* don't allow the creation of threads.*/if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&(task_active_pid_ns(current) != current->nsproxy->pid_ns))return ERR_PTR(-EINVAL);retval = security_task_create(clone_flags);if (retval)goto fork_out;retval = -ENOMEM;//复制当前的 task_structp = dup_task_struct(current);if (!p)goto fork_out;ftrace_graph_init_task(p);get_seccomp_filter(p);//初始化相互排斥变量 rt_mutex_init_task(p);#ifdef CONFIG_PROVE_LOCKINGDEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endifretval = -EAGAIN;//检查进程数是否超过限制。由操作系统定义if (atomic_read(&p->real_cred->user->processes) >=task_rlimit(p, RLIMIT_NPROC)) {if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&p->real_cred->user != INIT_USER)goto bad_fork_free;}current->flags &= ~PF_NPROC_EXCEEDED;retval = copy_creds(p, clone_flags);if (retval < 0)goto bad_fork_free;/** If multiple threads are within copy_process(), then this check* triggers too late. This doesn't hurt, the check is only there* to stop root fork bombs.*/retval = -EAGAIN;//检查进程数是否超过 max_threads 由内存大小决定if (nr_threads >= max_threads)goto bad_fork_cleanup_count;if (!try_module_get(task_thread_info(p)->exec_domain->module))goto bad_fork_cleanup_count;p->did_exec = 0;delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */copy_flags(clone_flags, p);INIT_LIST_HEAD(&p->children);INIT_LIST_HEAD(&p->sibling);rcu_copy_process(p);p->vfork_done = NULL;//初始化自旋锁spin_lock_init(&p->alloc_lock);//初始化挂起信号init_sigpending(&p->pending);p->utime = p->stime = p->gtime = 0;p->utimescaled = p->stimescaled = 0;
#ifndef CONFIG_VIRT_CPU_ACCOUNTINGp->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GENseqlock_init(&p->vtime_seqlock);p->vtime_snap = 0;p->vtime_snap_whence = VTIME_SLEEPING;
#endif#if defined(SPLIT_RSS_COUNTING)memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endifp->default_timer_slack_ns = current->timer_slack_ns;task_io_accounting_init(&p->ioac);acct_clear_integrals(p);//初始化CPU定时器posix_cpu_timers_init(p);do_posix_clock_monotonic_gettime(&p->start_time);p->real_start_time = p->start_time;monotonic_to_bootbased(&p->real_start_time);p->io_context = NULL;p->audit_context = NULL;if (clone_flags & CLONE_THREAD)threadgroup_change_begin(current);cgroup_fork(p);
#ifdef CONFIG_NUMAp->mempolicy = mpol_dup(p->mempolicy);if (IS_ERR(p->mempolicy)) {retval = PTR_ERR(p->mempolicy);p->mempolicy = NULL;goto bad_fork_cleanup_cgroup;}mpol_fix_fork_child_flag(p);
#endif
#ifdef CONFIG_CPUSETSp->cpuset_mem_spread_rotor = NUMA_NO_NODE;p->cpuset_slab_spread_rotor = NUMA_NO_NODE;seqcount_init(&p->mems_allowed_seq);
#endif
#ifdef CONFIG_TRACE_IRQFLAGSp->irq_events = 0;p->hardirqs_enabled = 0;p->hardirq_enable_ip = 0;p->hardirq_enable_event = 0;p->hardirq_disable_ip = _THIS_IP_;p->hardirq_disable_event = 0;p->softirqs_enabled = 1;p->softirq_enable_ip = _THIS_IP_;p->softirq_enable_event = 0;p->softirq_disable_ip = 0;p->softirq_disable_event = 0;p->hardirq_context = 0;p->softirq_context = 0;
#endif
#ifdef CONFIG_LOCKDEPp->lockdep_depth = 0; /* no locks held yet */p->curr_chain_key = 0;p->lockdep_recursion = 0;
#endif#ifdef CONFIG_DEBUG_MUTEXESp->blocked_on = NULL; /* not blocked yet */
#endif
#ifdef CONFIG_MEMCGp->memcg_batch.do_batch = 0;p->memcg_batch.memcg = NULL;
#endif/* Perform scheduler related setup. Assign this task to a CPU. *///初始化进程数据结构,并把进程状态设置为 TASK_RUNNINGsched_fork(p);retval = perf_event_init_task(p);//复制全部进程信息。包含文件系统、信号处理函数、信号、内存管理等if (retval)goto bad_fork_cleanup_policy;retval = audit_alloc(p);if (retval)goto bad_fork_cleanup_policy;/* copy all the process information */retval = copy_semundo(clone_flags, p);if (retval)goto bad_fork_cleanup_audit;retval = copy_files(clone_flags, p);if (retval)goto bad_fork_cleanup_semundo;retval = copy_fs(clone_flags, p);if (retval)goto bad_fork_cleanup_files;retval = copy_sighand(clone_flags, p);if (retval)goto bad_fork_cleanup_fs;retval = copy_signal(clone_flags, p);if (retval)goto bad_fork_cleanup_sighand;retval = copy_mm(clone_flags, p);if (retval)goto bad_fork_cleanup_signal;retval = copy_namespaces(clone_flags, p);if (retval)goto bad_fork_cleanup_mm;retval = copy_io(clone_flags, p);if (retval)goto bad_fork_cleanup_namespaces;//初始化子进程内核栈retval = copy_thread(clone_flags, stack_start, stack_size, p);if (retval)goto bad_fork_cleanup_io;if (pid != &init_struct_pid) {retval = -ENOMEM;//为新进程分配新的 pidpid = alloc_pid(p->nsproxy->pid_ns);if (!pid)goto bad_fork_cleanup_io;}//设置子进程 pid p->pid = pid_nr(pid);p->tgid = p->pid;if (clone_flags & CLONE_THREAD)p->tgid = current->tgid;p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ?

child_tidptr : NULL; /* * Clear TID on mm_release()?

*/

p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif uprobe_copy_process(p); /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(&current->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(&current->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(&current->signal->live); atomic_inc(&current->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) { ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(&current->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); cgroup_exit(p, 0); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }

copy_thread函数

int copy_thread(unsigned long clone_flags, unsigned long sp,unsigned long arg, struct task_struct *p)
{//获取寄存器信息struct pt_regs *childregs = task_pt_regs(p);struct task_struct *tsk;int err;p->thread.sp = (unsigned long) childregs;p->thread.sp0 = (unsigned long) (childregs+1);memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));if (unlikely(p->flags & PF_KTHREAD)) {//内核线程memset(childregs, 0, sizeof(struct pt_regs));p->thread.ip = (unsigned long) ret_from_kernel_thread;task_user_gs(p) = __KERNEL_STACK_CANARY;childregs->ds = __USER_DS;childregs->es = __USER_DS;childregs->fs = __KERNEL_PERCPU;childregs->bx = sp; /* function */childregs->bp = arg;childregs->orig_ax = -1;childregs->cs = __KERNEL_CS | get_kernel_rpl();childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;p->thread.io_bitmap_ptr = NULL;return 0;}//将当前寄存器信息复制给子进程*childregs = *current_pt_regs();//子进程 eax 置 0,因此fork 在子进程返回0childregs->ax = 0;if (sp)childregs->sp = sp;//子进程ip 设置为ret_from_fork,因此子进程从ret_from_fork開始运行p->thread.ip = (unsigned long) ret_from_fork;//……return err;
}

ret_from_fork

ENTRY(ret_from_fork)CFI_STARTPROCpushl_cfi %eaxcall schedule_tailGET_THREAD_INFO(%ebp)popl_cfi %eaxpushl_cfi $0x0202       # Reset kernel eflagspopfl_cfijmp syscall_exitCFI_ENDPROC
END(ret_from_fork)

总结

dup_task_struct中为子进程分配了新的堆栈
调用了sched_fork。将其置为TASK_RUNNING
copy_thread中将父进程的寄存器上下文复制给子进程。保证了父子进程的堆栈信息是一致的
将ret_from_fork的地址设置为eip寄存器的值
终于子进程从ret_from_fork開始运行

Linux内核创建一个新进程的过程相关推荐

  1. 实验六:分析Linux内核创建一个新进程的过程

    20135108 李泽源 阅读理解task_struct数据结构http://codelab.shiyanlou.com/xref/linux-3.18.6/include/linux/sched.h ...

  2. linux内核创建用户,分析Linux内核创建一个新进程的过程

    谢文杰 + 原创作品转载请注明出处 + <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 一.实验目的 阅 ...

  3. linux搭建一个的过程,Linux内核创建一个新进程的过程

    此文仅用于MOOCLinux内核分析作业 task_struct数据结构 根据wiki的定义,进程是计算机中已运行程序的实体.在面向线程设计的系统(Linux 2.6及更新的版本)中,进程本身不是基本 ...

  4. 6、分析Linux内核创建一个新进程的过程

    姓名:周毅原创作品转载请注明出处 <Linux内核分析>MOOC课程http://mooc.study.163.com/course/USTC-1000029000 这篇文章主要分析lin ...

  5. 6. Linux内核创建一个新进程的过程分析

    ##################################### 作者:张卓 原创作品转载请注明出处:<Linux操作系统分析>MOOC课程 http://www.xuetang ...

  6. Linux内核协议栈-一个socket的调用过程,从用户态接口到底层硬件

    用户创建socket 调用内核__sock_create int __sock_create(struct net *net, int family, int type, int protocol,s ...

  7. Linux内核分析 笔记六 进程的描述和进程的创建 ——by王玥

    一.知识点总结 (一)进程的描述 1.操作系统内核里有三大功能: 进程管理 内存管理 文件系统 2.进程描述符:task_struct 2.进程描述符--struct task_struct 1. p ...

  8. 《深入理解Linux内核》-3.3. 进程切换

    3.3. 进程切换 为了控制进程的执行,内核必须能够挂起正在运行的进程并恢复运行其他之前被挂起的进程.这个活动通过进程切换,任务切换或上下文切换执行这种各样的操作.接下来的章节介绍Linux系统上的进 ...

  9. 假设Linux系统中不存在文件newfile,现要创建一个新文件newfile,以下哪个命令无法实现该功能。...

    要创建一个新文件newfile,可以使用Linux系统中的touch命令.例如: touchnewfile 这样就可以在Linux系统中创建一个名为newfile的空文件了. 如果使用的是cp命令,那 ...

最新文章

  1. python遍历queryset_查询集 QuerySet
  2. MFC Map 许多警告
  3. 一种算法的实现,几个相同大小的div组合在一起,判断是不是矩形
  4. 利用Docker volume修改Nginx Docker镜像里index.html
  5. json字符串生成C#实体类的工具
  6. div跳转html页面底部,即使没有内容,如何强制DIV块扩展到页面底部?
  7. 老李谈HTTP1.1的长连接
  8. (Photo Metadata Remover)Android App 一键去除照片 EXIF 隐私信息
  9. SQL Server 2005 安装问题 性能监视器计数器要求 (错误) 的解决办法
  10. Atitit ioc 之道 attilax著 第2章 装配Bean 33 2.1 Spring配置的可选方案 34 2.2 自动化装配bean 35 2.2.1 创建可被发现的bean 35 2.
  11. 三维扫描3D打印在创客教育中的实际应用
  12. uniapp app 腾讯云 IM 创建群组(陌生交友)
  13. Android 暗黑模式
  14. android 手机ssh客户端,android手机ssh客户端ConnectBot
  15. uniapp引入font-awsome字体图标-疑难解决
  16. matlab反双曲函数表达,matlab求反正切
  17. 2021年最新互联网大厂中秋礼盒PK!
  18. 告马云书 -- 谈阿里云OS 删除用户应用事件
  19. USB 检测外接摄像头
  20. gitlab导入project

热门文章

  1. TabLayout属性详解
  2. 项目中使用completablefuture_“工业冷冻用中大型冷盐水机组使用R290替代R22项目”通过验收...
  3. linux里c库和gnu c库,Linux下的C的库文件和头文件有什么区别-
  4. Java基础:Java异常处理
  5. 详解Linux多线程编程
  6. dom4j生成、解析xml
  7. Luogu4099 HEOI2013 SAO 组合、树形DP
  8. Bootstrap 字体图标(Glyphicons)
  9. 【20171115中】nmap 使用脚本爆破telnet密码
  10. 新建git仓库--留