linux内核启动过程5:启动用户空间
上一篇<<linux内核启动过程4:内核运行时>>分析到了内核进入运行时状态(不退出),本篇分析用户空间(用户层)的加载过程。
启动应用空间
进入kernel_init函数,在这里做了用户空间的初始化及启动(pid)1进程工作:
static int __ref kernel_init(void *unused)
{int ret;kernel_init_freeable();
进入kernel_init_freeable函数:
/** Wait until kthreadd is all set-up.*/wait_for_completion(&kthreadd_done);/* Now the scheduler is fully set up and can do blocking allocations */gfp_allowed_mask = __GFP_BITS_MASK;// 调度器已经完全设置好,可以执行阻塞分配/** init can allocate pages on any node*/set_mems_allowed(node_states[N_MEMORY]);//设置init可以在任何内存中分配(常规、高、可移动内存)
等待kthreadd(pid 2)进程设置完成后(执行到complete(&kthreadd_done)),继续向下执行,此时调度器已经进入工作状态
cad_pid = task_pid(current);//保存init id,cad_pid用于在内核启动过程中执行ctrl-alt-del重新启动(默认值为yes) smp_prepare_cpus(setup_max_cpus);workqueue_init();// 这是两阶段工作队列子系统初始化的后半部分,初始化numa队列(cpu可使用numa内存),初始化工作池等等init_mm_internals(); ...
赋值cad_pid,初始化工作队列等等
/** check if there is an early userspace init. If yes, let it do all* the work*/if (init_eaccess(ramdisk_execute_command) != 0) {ramdisk_execute_command = NULL;prepare_namespace();}
进入prepare_namespace函数:
/** wait for the known devices to complete their probing** Note: this is a potential source of long boot delays.* For example, it is not atypical to wait 5 seconds here* for the touchpad of a laptop to initialize.*/wait_for_device_probe();md_run_setup();if (saved_root_name[0]) {root_device_name = saved_root_name;if (!strncmp(root_device_name, "mtd", 3) ||!strncmp(root_device_name, "ubi", 3)) {mount_block_root(root_device_name, root_mountflags);goto out;}ROOT_DEV = name_to_dev_t(root_device_name);if (strncmp(root_device_name, "/dev/", 5) == 0)root_device_name += 5;}if (initrd_load())goto out;
进入initrd_load函数:
#ifdef CONFIG_BLK_DEV_INITRDbool __init initrd_load(void);#else static inline bool initrd_load(void) { return false; }#endif
默认CONFIG_BLK_DEV_INITRD未开启,这里直接执行return false; ,不加载initrd
mount_root();||\/#ifdef CONFIG_BLOCK{int err = create_dev("/dev/root", ROOT_DEV);if (err < 0)pr_emerg("Failed to create /dev/root: %d\n", err);mount_block_root("/dev/root", root_mountflags);}#endif
创建/dev/root(CONFIG_SECURITY_PATH未启动,只是创建路径)
out:devtmpfs_mount();// devtmpfs挂载到/dev目录init_mount(".", "/", NULL, MS_MOVE, NULL);// 挂载根分区到/目录(默认分区为/dev/mapper/cl-root,可以自定义分区)init_chroot(".");//设置系统根位置(环境)到/
挂载devtmpfs(/dev),系统根位置等等,回到kernel_init_freeable函数:
/** Ok, we have completed the initial bootup, and* we're essentially up and running. Get rid of the* initmem segments and start the user-mode stuff..** rootfs is available now, try loading the public keys* and default modules*/integrity_load_keys();
}
现在已经完成了初始启动,去掉initmem段并启动用户模式,回到kernel_init函数:
/* need to finish all async __init code before freeing the memory */async_synchronize_full();kprobe_free_init_mem();ftrace_free_init_mem();free_initmem();
释放部分内存,放回buddy系统(以供后续使用)
/** Kernel mappings are now finalized - update the userspace page-table* to finalize PTI.*/pti_finalize();// 更新(克隆)用户空间页表system_state = SYSTEM_RUNNING;numa_default_policy(); // 有好多空壳函数,一般不加注释rcu_end_inkernel_boot();do_sysctl_args();if (ramdisk_execute_command) { // 没有执行ret = run_init_process(ramdisk_execute_command);if (!ret)return 0;pr_err("Failed to execute %s (error %d)\n",ramdisk_execute_command, ret);}/** We try each of these until one succeeds.** The Bourne shell can be used instead of init if we are* trying to recover a really broken machine.*/if (execute_command) { // 没有执行ret = run_init_process(execute_command);if (!ret)return 0;panic("Requested init %s failed (error %d).",execute_command, ret);}if (CONFIG_DEFAULT_INIT[0] != '\0') { // 没有执行,CONFIG_DEFAULT_INIT=""ret = run_init_process(CONFIG_DEFAULT_INIT);if (ret)pr_err("Default init %s failed (error %d)\n",CONFIG_DEFAULT_INIT, ret);elsereturn 0;}if (!try_to_run_init_process("/sbin/init") ||!try_to_run_init_process("/etc/init") ||!try_to_run_init_process("/bin/init") ||!try_to_run_init_process("/bin/sh"))return 0;
更新(克隆)用户空间页表后,执行init程序,这里执行到了/sbin/init(在systemd安装包中,查看src/core/main.c),查看main函数执行流程:
int main(int argc, char *argv[]) {dual_timestamp initrd_timestamp = DUAL_TIMESTAMP_NULL, userspace_timestamp = DUAL_TIMESTAMP_NULL, kernel_timestamp = DUAL_TIMESTAMP_NULL,security_start_timestamp = DUAL_TIMESTAMP_NULL, security_finish_timestamp = DUAL_TIMESTAMP_NULL;struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false, reexecute = false;char *switch_root_dir = NULL, *switch_root_init = NULL;usec_t before_startup, after_startup;static char systemd[] = "systemd";char timespan[FORMAT_TIMESPAN_MAX];const char *shutdown_verb = NULL, *error_message = NULL;int r, retval = EXIT_FAILURE;Manager *m = NULL;FDSet *fds = NULL;redirect_telinit(argc, argv);//重定向 tel
执行systemctl
/* Figure out whether we need to do initialize the system, or if we already did that because we are* reexecuting */skip_setup = early_skip_setup_check(argc, argv);/* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent reexecution we* are then called 'systemd'. That is confusing, hence let's call us systemd right-away. */program_invocation_short_name = systemd;(void) prctl(PR_SET_NAME, systemd);/* Save the original command line */saved_argv = argv;saved_argc = argc;
检查是否需要重新启动,设置进程名称为systemd
if (getpid_cached() == 1) {/* When we run as PID 1 force system mode */arg_system = true;
systemd进程必须是(pid)1
if (detect_container() <= 0) {
//detect_container函数检查是否已经设置容器(如lxc、lxc-libvirt、docker等),如果没有设置容器相关变量跳转到check_sched,检查/proc/1/sched文件(记录着thread相关参数)
重定向 tel,检查进程是否重新启动(swith-root默认情况下为系统初始化启动 或序列化),检查是否设置容器
/* Running outside of a container as PID 1 */log_set_target(LOG_TARGET_KMSG);log_open();// 打开控制台日志,/dev/kmsgif (in_initrd()) // 没有打开initrd参数(内核配置)initrd_timestamp = userspace_timestamp;if (!skip_setup) {r = mount_setup_early();if (r < 0) {error_message = "Failed to mount early API filesystems";goto finish;}/* Let's open the log backend a second time, in case the first time didn't* work. Quite possibly we have mounted /dev just now, so /dev/kmsg became* available, and it previously wasn't. */log_open();r = initialize_security(&loaded_policy,&security_start_timestamp,&security_finish_timestamp,&error_message);// 初始化安全相关模式,如selinux、smackif (r < 0)goto finish;} ...initialize_coredump(skip_setup);// 初始化核心转储
打开日志功能(如控制台日志,/dev/kmsg),初始化安全相关模块
r = fixup_environment();if (r < 0) {log_emergency_errno(r, "Failed to fix up PID 1 environment: %m");error_message = "Failed to fix up PID1 environment";goto finish;}
检查pid、控制台等是否正确/启动
if (arg_system) {/* Try to figure out if we can use colors with the console. No need to do that for user instances since* they never log into the console. */log_show_color(colors_enabled());// 检查控制台是否支持彩色,tty为关闭彩色r = make_null_stdio();// 使用传入的三个文件描述符设置stdin、stdout和stderr。如果任何描述符指定为-1,它将改为与/dev/null连接if (r < 0)log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");}
检查控制台,设置stdin、stdout和stderr
/* Mount /proc, /sys and friends, so that /proc/cmdline and* /proc/$PID/fd is available. */if (getpid_cached() == 1) {/* Load the kernel modules early. */if (!skip_setup)kmod_setup();// 加班部分内核模块,如autofs4、ip_tables等等r = mount_setup(loaded_policy);// 创建一些目录,如/run/systemd、/run/systemd/inaccessible(mknod相关函数)if (r < 0) {error_message = "Failed to mount API filesystems";goto finish;}}
加载部分内核模块,创建一些特殊文件
/* Reset all signal handlers. */(void) reset_all_signal_handlers();(void) ignore_signals(SIGNALS_IGNORE, -1);r = load_configuration(argc, argv, &saved_rlimit_nofile, &saved_rlimit_memlock, &error_message);// 解析传入参数if (r < 0) goto finish;r = safety_checks();// 检查环境变量,及根路径(/proc/1/root保存根内文件夹/文件名称)if (r < 0)goto finish;
解析传参,检查环境变量等
if (arg_action == ACTION_RUN) {/* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */log_close();/* Remember open file descriptors for later deserialization */r = collect_fds(&fds, &error_message);if (r < 0)goto finish;/* Give up any control of the console, but make sure its initialized. */setup_console_terminal(skip_setup);/* Open the logging devices, if possible and necessary */log_open();}log_execution_mode(&first_boot);r = initialize_runtime(skip_setup,&saved_rlimit_nofile,&saved_rlimit_memlock,&error_message);// 初始化cpu运行时间,及获取nofile和memlock限制数if (r < 0)goto finish;
设置fds,初始化cpu运行时间
r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,&m);if (r < 0) {log_emergency_errno(r, "Failed to allocate manager object: %m");error_message = "Failed to allocate manager object";goto finish;}
设置管理器参数、环境等等
m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;set_manager_defaults(m);set_manager_settings(m);manager_set_first_boot(m, first_boot);/* Remember whether we should queue the default job */queue_default_job = !arg_serialization || arg_switched_root;before_startup = now(CLOCK_MONOTONIC);r = manager_startup(m, arg_serialization, fds);// 管理器启动,设置cgroups_agent、设置事件io(sd_event_add_io)、事件源等if (r < 0) {log_error_errno(r, "Failed to fully start up daemon: %m");error_message = "Failed to start up manager";goto finish;}
设置、启动管理器
after_startup = now(CLOCK_MONOTONIC);...(void) invoke_main_loop(m,&saved_rlimit_nofile,&saved_rlimit_memlock,&reexecute,&retval,&shutdown_verb,&fds,&switch_root_dir,&switch_root_init,&error_message);
正常流程到这里用户空间加载完成,并循环检查调用(systemd在root用户中执行),后面的内容属于序列化或者其他异常操作(如非系统初始化执行systemd)
后续内容(如非系统初始化执行systemd)
finish:pager_close();if (m) {arg_shutdown_watchdog = m->shutdown_watchdog;m = manager_free(m);}reset_arguments();mac_selinux_finish();if (reexecute)do_reexecute(argc, argv,&saved_rlimit_nofile,&saved_rlimit_memlock,fds,switch_root_dir,switch_root_init,&error_message); /* This only returns if reexecution failed */...
目录预览
<<linux内核make menuconfig执行过程>>
<<linux内核make执行过程>>
<<linux内核启动过程>>
<<linux内核压缩制作bzImage>>
<<linux内核启动过程2:保护模式执行流程>>
<<linux内核启动过程3:内核初始化阶段>>
<<linux内核启动过程4:内核运行时>>
linux内核启动过程5:启动用户空间相关推荐
- Linux内核源码分析—从用户空间复制数据到内核空间
Linux内核源码分析-从用户空间复制数据到内核空间 本文主要参考<深入理解Linux内核>,结合2.6.11.1版的内核代码,分析从用户空间复制数据到内核空间函数. 1.不描述内核同步. ...
- Linux入职基础-4.10_系统启动过程(3):Linux内核(vmlinuz)启动
Linux系统启动过程(3):Linux内核(vmlinuz)启动 上节回顾:详解见上一篇<内核的引导程序>,内核模块在获取控制权后开始工作,内核(vmlinuz-2.6.18-238.e ...
- Linux内核如何装载和启动一个可执行程序-----实验7
2015108 李泽源 Linux内核如何装载和启动一个可执行程序 理解编译链接的过程和ELF可执行文件格式,详细内容参考本周第一节: 编程使用exec*库函数加载一个可执行文件,动态链接分为可执行程 ...
- ARM 之八 Cortex-M/R 内核启动过程 / 程序启动流程(基于IAR)
在前面的文章<ARM 之 Cortex-M/R 内核启动过程 / 程序启动流程(基于ARMCC)>中已经介绍过了 Cortex-M/R 内核相关内容.这里基于 IAR 的启动流程与之前 ...
- :linux内核编译过程的最终总结版
参考了linuxsir和水母的linux版的精华区,本人不保留任何版权. 经过归纳整理,看看上面的就可以了,包括补丁如何打.具体的一些选项可以往下看,一些一看就懂的白痴选项,并没有选进来,因此适合对电 ...
- android uboot启动过程,Android启动流程简析(一)
最近一时兴起,想对Android的启动流程进行一次分析,经过一番整理,从以下几个方面进行总结,代码部分只讨论思路,不论细节. Android架构介绍 Android启动概述 BootLoader介绍 ...
- 基于windows PE文件的恶意代码分析;使用SystemInternal工具与内核调试器研究windows用户空间与内核空间...
基于windows PE文件的恶意代码分析:使用SystemInternal工具与内核调试器研究windows用户空间与内核空间 ******************** 既然本篇的主角是PE文件,那 ...
- linux内核mount过程超复杂的do_mount()、do_loopback()、attach_recursive_mnt()、propagate_mnt()函数详解
linux内核mount过程复杂的do_loopback().attach_recursive_mnt().propagate_mnt()函数详解 本文对mount过程流程做了较详细的解释.首先以mo ...
- tomcat启动过程-start启动
tomcat源码系列导航栏 tomcat源码分析环境搭建 tomcat启动过程-load初始化 tomcat启动过程-start启动 目录 前言 启动流程 启动入口main函数 代码块一 start ...
- linux下jtag命令,[转载]LINUX内核调试过程(使用OpenJtag + OpenOCD)
[转载]LINUX内核调试过程(使用OpenJtag + OpenOCD) (2012-04-12 02:02:27) 标签: 杂谈 [转载]LINUX内核调试过程(使用OpenJtag + Open ...
最新文章
- Guru of the Week 条款01: 变量的初始化
- 被动声呐 相移波束形成_100天计划-DAY9-拖曳声呐
- HDU 1686 Oulipo【kmp求子串出现的次数】
- postgreSQL源码分析——索引的建立与使用——各种索引类型的管理和操作(1)
- 阿里腾讯百度们已经占据了全球互联网半壁江山!
- php-5.6.31安装视频教程_php安装图解教程
- 金字塔原理--公开演讲
- NYOJ--13--Fibonacci数
- python实验收获与反思 100字_期中考试总结与反思100字4篇
- 存储虚拟化技术之解读
- GitLab 内置了 CI/CD 工具,强大啊!!
- esxi linux 网络不可达,将ESXI所有的端口组迁移到分布式交换机的步骤
- 史上最全计算机网络大纲
- dellR230服务器如何进PE系统,戴尔服务器怎么进入u盘启动模式 选择oneshotbios
- 人生十个阶段,每七年周期变化
- 浮窗---创建Activity浮窗(可拖动)
- 霹雳灯双灯c语言程序,单片机霹雳游侠灯源程序
- 直接在浏览器运行Python代码
- Hadoop-HFDS知识点总结
- 2020.8.25 斗鱼Android开发二面面经