i386 Linux内核进入保护模式引导流程
在系统引导过程中,Bootloader将内核镜像加载到内存后,并将控制权转交给内核 ,通过长转移指令跳转到入口startup_32。
实际上进入startup_32入口前,CPU已经处于了保护模式下的段式寻址方式。而CS寄存器已经设置成了
Linux准备进入保护模式主要做了下面的几项初始化的工作:
- 设置ds,es,fs,gs等段寄存器(在进入startup_32入口前,CPU已经处于了保护模式下的段式寻址方式,cs寄存器在进入startup_32前就已经设置成了__KERNEL_CS)
- 初始化页目录项(再编译时已经初始化好了)
- 初始化页表
- 初始化cr3寄存器,使CR3寄存器指向初始化好的页面目录项首地址。
- 设置CR0寄存器,开启页面映射,自此CPU进入保护模式并开启页面映射。
我们结合代码来看一下(整段代码)
/** linux/arch/i386/head.S -- the 32-bit startup code.** Copyright (C) 1991, 1992 Linus Torvalds** Enhanced CPU detection and feature setting code by Mike Jagdis* and Martin Mares, November 1997.*/.text
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/desc.h>#define OLD_CL_MAGIC_ADDR 0x90020
#define OLD_CL_MAGIC 0xA33F
#define OLD_CL_BASE_ADDR 0x90000
#define OLD_CL_OFFSET 0x90022
#define NEW_CL_POINTER 0x228 /* Relative to real mode data *//** References to members of the boot_cpu_data structure.*/#define CPU_PARAMS SYMBOL_NAME(boot_cpu_data)
#define X86 CPU_PARAMS+0
#define X86_VENDOR CPU_PARAMS+1
#define X86_MODEL CPU_PARAMS+2
#define X86_MASK CPU_PARAMS+3
#define X86_HARD_MATH CPU_PARAMS+6
#define X86_CPUID CPU_PARAMS+8
#define X86_CAPABILITY CPU_PARAMS+12
#define X86_VENDOR_ID CPU_PARAMS+16/** swapper_pg_dir is the main page directory, address 0x00101000** On entry, %esi points to the real-mode code as a 32-bit pointer.*/
ENTRY(stext)
ENTRY(_stext)
startup_32: /*Linux内核入口*/
/** Set segments to known values*/cldmovl $(__KERNEL_DS),%eaxmovl %eax,%ds /*将这几个段寄存器设置为__KERNEL_DS */movl %eax,%esmovl %eax,%fsmovl %eax,%gs
#ifdef CONFIG_SMP /*SMP相关的代码可以先不用看*/orw %bx,%bxjz 1f/** New page tables may be in 4Mbyte page mode and may* be using the global pages. ** NOTE! If we are on a 486 we may have no cr4 at all!* So we do not try to touch it unless we really have* some bits in it to set. This won't work if the BSP* implements cr4 but this AP does not -- very unlikely* but be warned! The same applies to the pse feature* if not equally supported. --macro** NOTE! We have to correct for the fact that we're* not yet offset PAGE_OFFSET..*/
#define cr4_bits mmu_cr4_features-__PAGE_OFFSETcmpl $0,cr4_bitsje 3fmovl %cr4,%eax # Turn on paging options (PSE,PAE,..)orl cr4_bits,%eaxmovl %eax,%cr4jmp 3f
1:
#endif
/** Initialize page tables /*初始化页表*/*/movl $pg0-__PAGE_OFFSET,%edi /* initialize page tables */movl $007,%eax /* "007" doesn't mean with right to kill, butPRESENT+RW+USER */
2: stosladd $0x1000,%eaxcmp $empty_zero_page-__PAGE_OFFSET,%edijne 2b/** Enable paging*/
3:movl $swapper_pg_dir-__PAGE_OFFSET,%eaxmovl %eax,%cr3 /* set the page table pointer.. */ /* 设置cr3寄存器 */movl %cr0,%eax orl $0x80000000,%eax /* 设置cr0寄存器 */ movl %eax,%cr0 /* ..and set paging (PG) bit */jmp 1f /* flush the prefetch-queue */
1:movl $1f,%eaxjmp *%eax /* make sure eip is relocated */
1:/* Set up the stack pointer */lss stack_start,%esp#ifdef CONFIG_SMPorw %bx,%bxjz 1f /* Initial CPU cleans BSS */pushl $0popfljmp checkCPUtype
1:
#endif CONFIG_SMP/** Clear BSS first so that there are no surprises...* No need to cld as DF is already clear from cld above...*/xorl %eax,%eaxmovl $ SYMBOL_NAME(__bss_start),%edimovl $ SYMBOL_NAME(_end),%ecxsubl %edi,%ecxrepstosb/** start system 32-bit setup. We need to re-do some of the things done* in 16-bit mode for the "real" operations.*/call setup_idt
/** Initialize eflags. Some BIOS's leave bits like NT set. This would* confuse the debugger if this code is traced.* XXX - best to initialize before switching to protected mode.*/pushl $0popfl
/** Copy bootup parameters out of the way. First 2kB of* _empty_zero_page is for boot parameters, second 2kB* is for the command line.** Note: %esi still has the pointer to the real-mode data.*/movl $ SYMBOL_NAME(empty_zero_page),%edimovl $512,%ecxcldrepmovslxorl %eax,%eaxmovl $512,%ecxrepstoslmovl SYMBOL_NAME(empty_zero_page)+NEW_CL_POINTER,%esiandl %esi,%esijnz 2f # New command line protocolcmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDRjne 1fmovzwl OLD_CL_OFFSET,%esiaddl $(OLD_CL_BASE_ADDR),%esi
2:movl $ SYMBOL_NAME(empty_zero_page)+2048,%edimovl $512,%ecxrepmovsl
1:
#ifdef CONFIG_SMP
checkCPUtype:
#endifmovl $-1,X86_CPUID # -1 for no CPUID initially/* check if it is 486 or 386. */
/** XXX - this does a lot of unnecessary setup. Alignment checks don't* apply at our cpl of 0 and the stack ought to be aligned already, and* we don't need to preserve eflags.*/movl $3,X86 # at least 386pushfl # push EFLAGSpopl %eax # get EFLAGSmovl %eax,%ecx # save original EFLAGSxorl $0x40000,%eax # flip AC bit in EFLAGSpushl %eax # copy to EFLAGSpopfl # set EFLAGSpushfl # get new EFLAGSpopl %eax # put it in eaxxorl %ecx,%eax # change in flagsandl $0x40000,%eax # check if AC bit changedje is386movl $4,X86 # at least 486movl %ecx,%eaxxorl $0x200000,%eax # check ID flagpushl %eaxpopfl # if we are on a straight 486DX, SX, orpushfl # 487SX we can't change itpopl %eaxxorl %ecx,%eaxpushl %ecx # restore original EFLAGSpopflandl $0x200000,%eaxje is486/* get vendor info */xorl %eax,%eax # call CPUID with 0 -> return vendor IDcpuidmovl %eax,X86_CPUID # save CPUID levelmovl %ebx,X86_VENDOR_ID # lo 4 charsmovl %edx,X86_VENDOR_ID+4 # next 4 charsmovl %ecx,X86_VENDOR_ID+8 # last 4 charsorl %eax,%eax # do we have processor info as well?je is486movl $1,%eax # Use the CPUID instruction to get CPU typecpuidmovb %al,%cl # save reg for future useandb $0x0f,%ah # mask processor familymovb %ah,X86andb $0xf0,%al # mask modelshrb $4,%almovb %al,X86_MODELandb $0x0f,%cl # mask mask revisionmovb %cl,X86_MASKmovl %edx,X86_CAPABILITYis486:movl %cr0,%eax # 486 or betterandl $0x80000011,%eax # Save PG,PE,ETorl $0x50022,%eax # set AM, WP, NE and MPjmp 2fis386: pushl %ecx # restore original EFLAGSpopflmovl %cr0,%eax # 386andl $0x80000011,%eax # Save PG,PE,ETorl $2,%eax # set MP
2: movl %eax,%cr0call check_x87
#ifdef CONFIG_SMPincb ready
#endiflgdt gdt_descrlidt idt_descrljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registersmovl %eax,%ds # after changing gdt.movl %eax,%esmovl %eax,%fsmovl %eax,%gs
#ifdef CONFIG_SMPmovl $(__KERNEL_DS), %eaxmovl %eax,%ss # Reload the stack pointer (segment only)
#elselss stack_start,%esp # Load processor stack
#endifxorl %eax,%eaxlldt %axcld # gcc2 wants the direction flag cleared at all times
#ifdef CONFIG_SMPmovb ready, %cl cmpb $1,%clje 1f # the first CPU calls start_kernel# all other CPUs call initialize_secondarycall SYMBOL_NAME(initialize_secondary)jmp L6
1:
#endifcall SYMBOL_NAME(start_kernel)
L6:jmp L6 # main should never return here, but# just in case, we know what happens.#ifdef CONFIG_SMP
ready: .byte 0
#endif/** We depend on ET to be correct. This checks for 287/387.*/
check_x87:movb $0,X86_HARD_MATHcltsfninitfstsw %axcmpb $0,%alje 1fmovl %cr0,%eax /* no coprocessor: have to set bits */xorl $4,%eax /* set EM */movl %eax,%cr0retALIGN
1: movb $1,X86_HARD_MATH.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ret/** setup_idt** sets up a idt with 256 entries pointing to* ignore_int, interrupt gates. It doesn't actually load* idt - that can be done only after paging has been enabled* and the kernel moved to PAGE_OFFSET. Interrupts* are enabled elsewhere, when we can be relatively* sure everything is ok.*/
setup_idt:lea ignore_int,%edxmovl $(__KERNEL_CS << 16),%eaxmovw %dx,%ax /* selector = 0x0010 = cs */movw $0x8E00,%dx /* interrupt gate - dpl=0, present */lea SYMBOL_NAME(idt_table),%edimov $256,%ecx
rp_sidt:movl %eax,(%edi)movl %edx,4(%edi)addl $8,%edidec %ecxjne rp_sidtretENTRY(stack_start).long SYMBOL_NAME(init_task_union)+8192.long __KERNEL_DS/* This is the default interrupt "handler" :-) */
int_msg:.asciz "Unknown interrupt\n"ALIGN
ignore_int:cldpushl %eaxpushl %ecxpushl %edxpushl %espushl %dsmovl $(__KERNEL_DS),%eaxmovl %eax,%dsmovl %eax,%espushl $int_msgcall SYMBOL_NAME(printk)popl %eaxpopl %dspopl %espopl %edxpopl %ecxpopl %eaxiret/** The interrupt descriptor table has room for 256 idt's,* the global descriptor table is dependent on the number* of tasks we can have..*/
#define IDT_ENTRIES 256
#define GDT_ENTRIES (__TSS(NR_CPUS)).globl SYMBOL_NAME(idt)
.globl SYMBOL_NAME(gdt)ALIGN.word 0
idt_descr:.word IDT_ENTRIES*8-1 # idt contains 256 entries
SYMBOL_NAME(idt):.long SYMBOL_NAME(idt_table).word 0
gdt_descr:.word GDT_ENTRIES*8-1
SYMBOL_NAME(gdt):.long SYMBOL_NAME(gdt_table)/** This is initialized to create an identity-mapping at 0-8M (for bootup* purposes) and another mapping of the 0-8M area at virtual address* PAGE_OFFSET.*/
.org 0x1000
ENTRY(swapper_pg_dir) /* 页面目录初始化 */.long 0x00102007.long 0x00103007.fill BOOT_USER_PGD_PTRS-2,4,0/* default: 766 entries */.long 0x00102007.long 0x00103007/* default: 254 entries */.fill BOOT_KERNEL_PGD_PTRS-2,4,0/** The page tables are initialized to only 8MB here - the final page* tables are set up later depending on memory size.*/
.org 0x2000
ENTRY(pg0).org 0x3000
ENTRY(pg1)/** empty_zero_page must immediately follow the page tables ! (The* initialization loop counts until empty_zero_page)*/.org 0x4000
ENTRY(empty_zero_page).org 0x5000
ENTRY(empty_bad_page).org 0x6000
ENTRY(empty_bad_pte_table)#if CONFIG_X86_PAE.org 0x7000ENTRY(empty_bad_pmd_table).org 0x8000#else.org 0x7000#endif/** This starts the data section. Note that the above is all* in the text section because it has alignment requirements* that we cannot fulfill any other way.*/
.dataALIGN
/** This contains typically 140 quadwords, depending on NR_CPUS.** NOTE! Make sure the gdt descriptor in head.S matches this if you* change anything.*/
ENTRY(gdt_table) /* gdt 表初始化 */.quad 0x0000000000000000 /* NULL descriptor */.quad 0x0000000000000000 /* not used */.quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */.quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */.quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */.quad 0x0000000000000000 /* not used */.quad 0x0000000000000000 /* not used *//** The APM segments have byte granularity and their bases* and limits are set at run time.*/.quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */.quad 0x00409a0000000000 /* 0x48 APM CS code */.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */.quad 0x0040920000000000 /* 0x58 APM DS data */.fill NR_CPUS*4,8,0 /* space for TSS's and LDT's *//** This is to aid debugging, the various locking macros will be putting* code fragments here. When an oops occurs we'd rather know that it's* inside the .text.lock section rather than as some offset from whatever* function happens to be last in the .text segment.*/
.section .text.lock
ENTRY(stext_lock)
这里先解释一下,内核镜像在编译时已经在所有符号地址上增加了一个偏移量0xC000000,并且内核镜像是加载到物理内存0x10 0000即1MB,也就是说startup_32实际上被加载到物理内存0x100000地址上。所以startup_32的虚拟地址为0xC0100000处,但是CPU在进入startup_32入口时,还是位于保护模式下的段式寻址方式,所以实际上此时的IP地址为0x100000 而不是0xC0100000,所以从代码中可以看到有很多类似的**地址-__PAGE_OFFSET,__PAGE_OFFSET为0xC0000000,这样做就是为了在段式寻址中可以找到正确的物理地址。如下图所示:
设置页表也只是初始化了2张页表,物理页面的基地址分别为0x0, 0x1000, 0x2000,依次类推,也就是物理页面中的0, 1, 2。 一个页面表大小为4K, 2张页表能存2K个表项(4K / 4 * 2),所以两张页表可以映射8M 的存储空间,这就是Linux内核对内存大小的最低限度要求。代码如下:
/** Initialize page tables*/movl $pg0-__PAGE_OFFSET,%edi /* initialize page tables */movl $007,%eax /* "007" doesn't mean with right to kill, butPRESENT+RW+USER */
2: stosladd $0x1000,%eaxcmp $empty_zero_page-__PAGE_OFFSET,%edijne 2b
下面我们再来看页面目录的初始化,页面目录的初始化,不是在内核运行时由代码初始化的,而是编译时决定的,在内核镜像加载时初始化好的。 实际上该段代码初始化了1024个目录项,其中BOOT_USER_PGD_PTRS为768代表用户空间的目录表项
,BOOT_KERNEL_PGD_PTRS为256,代表系统空间的目录表项,也就是说,共有4GB的虚拟内存空间,其中前0-3G代表用户空间,后3G-4G代表系统空间。如下图所示:
上面内容的代码:
.org 0x1000
ENTRY(swapper_pg_dir).long 0x00102007.long 0x00103007.fill BOOT_USER_PGD_PTRS-2,4,0/* default: 766 entries */.long 0x00102007.long 0x00103007/* default: 254 entries */.fill BOOT_KERNEL_PGD_PTRS-2,4,0
这里还有一点需要注意,一共初始化2个系统空间目录项和2个用户空间目录项,但是他们具有相同的映射,都指向了pg0(0x2000)这块物理地址。这是为什么呢。我们知道现在的IP寄存器里面存的地址是按照保护模式下段式映射存的,所以一旦开启了页面映射后,IP寄存器还是指向低区,这样就会导致指令不能继续执行,所以现在将低区地址的映射映射到和高区相同的物理地址就解决了这个问题。
设置完相应表项后,就设置相应寄存器,如cr3,指向页面目录的首地址,并设置Cr0寄存器,开启保护模式页式映射。
3:movl $swapper_pg_dir-__PAGE_OFFSET,%eaxmovl %eax,%cr3 /* set the page table pointer.. */movl %cr0,%eaxorl $0x80000000,%eaxmovl %eax,%cr0 /* ..and set paging (PG) bit */jmp 1f /* flush the prefetch-queue */
自此,Linux访问地址就不再需要使用 某地址-__PAGE_OFFSET, CPU也进入保护模式下的页式映射。
(内核源码为2.4.0, 参考《Linux内核源码情景分析》
附:进入startup_32的相关代码(不知道该代码是不是bootloader的,如果有知道的同学,希望可以回复评论,谢谢O(∩_∩)O):
/*** Trampoline.S Derived from Setup.S by Linus Torvalds** 4 Jan 1997 Michael Chastain: changed to gnu as.** Entry: CS:IP point to the start of our code, we are * in real mode with no stack, but the rest of the * trampoline page to make our stack and everything else* is a mystery.** In fact we don't actually need a stack so we don't* set one up.** We jump into the boot/compressed/head.S code. So you'd* better be running a compressed kernel image or you* won't get very far.** On entry to trampoline_data, the processor is in real mode* with 16-bit addressing and 16-bit data. CS has some value* and IP is zero. Thus, data addresses need to be absolute* (no relocation) and are taken with regard to r_base.** If you work on this file, check the object module with objdump* --full-contents --reloc to make sure there are no relocation* entries except for the gdt one..*/#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>.data.code16ENTRY(trampoline_data)
r_base = .mov %cs, %ax # Code and data in the same placemov %ax, %dsmov $1, %bx # Flag an SMP trampolinecli # We should be safe anywaymovl $0xA5A5A5A5, trampoline_data - r_base# write marker for master knows we're runninglidt idt_48 - r_base # load idt with 0, 0 lgdt gdt_48 - r_base # load gdt with whatever is appropriate #设置gdt寄存器xor %ax, %axinc %ax # protected mode (PE) bitlmsw %ax # into protected modejmp flush_instr
flush_instr:ljmpl $__KERNEL_CS, $0x00100000 #跳入startup_32入口地址# jump to startup_32 in arch/i386/kernel/head.Sidt_48: #idt表项(没有).word 0 # idt limit = 0.word 0, 0 # idt base = 0Lgdt_48: #gdt表项.word 0x0800 # gdt limit = 2048, 256 GDT entries.long gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU).globl SYMBOL_NAME(trampoline_end)
SYMBOL_NAME_LABEL(trampoline_end)
i386 Linux内核进入保护模式引导流程相关推荐
- Linux内核网络数据包处理流程
Linux内核网络数据包处理流程 from kernel-4.9: 0. Linux内核网络数据包处理流程 - 网络硬件 网卡工作在物理层和数据链路层,主要由PHY/MAC芯片.Tx/Rx FIFO. ...
- linux内核启动过程2:保护模式执行流程
上一篇<<linux内核压缩制作bzImage>>分析了bzImage制作流程,本篇继续分析内核启动过程,从实模式跳转到保护模式及后续执行流程. protected_mode_ ...
- Linux基础自学记录六-引导流程解析2
第5讲.Linux引导流程解析 9.GRUB配置文件 GRUB的配置文件默认在/boot/grub/grub.conf,/etc/grub.conf是它的软链接:在备份系统时,/boot目录首先应做 ...
- 一文讲解Linux内核中根文件系统挂载流程
根文件系统的概念 根文件系统是控制权从linux内核转移到用户空间的一个桥梁.linux内核就类似于一个黑匣子,只向用户提供各种功能的接口,但是功能的具体实现不可见,用户程序通过对这些功能接口的不同整 ...
- linux内核那些事之mmap_region流程梳理
承接<linux内核那些事之mmap>,mmap_region()是申请一个用户进程虚拟空间 并根据匿名映射或者文件映射做出相应动作,是实现mmap关键函数,趁这几天有空闲时间 整理下mm ...
- 从操作系统内核看设计模式--linux内核的facade模式
linux的内核当中处处充满了设计模式,本文先讨论一下外观模式.外观模式就是将客户和子系统解耦,为客户将复杂的子系统进行封装,从而使得客户可以使用简单易用的接口. 众所周知,linux和unix是 ...
- linux更新硬件时钟,Linux内核“11分钟模式”可以做的最大的硬件时钟更新是什么?...
当某些与时间相关的程序(如ntpd)在 Linux系统上运行时,内核将切换到所谓的"十一分钟模式"(参见hwclock手册页),它将自动从系统时钟更新硬件时钟十一分钟 在SLES1 ...
- Linux内核的VGA模式对照表
版权声明:可以任意转载,但转载时必须标明原作者charlee.原始链接http://tech.idv2.com/2006/09/22/linux-kernel-vga-mode/以及本声明. 在 /b ...
- linux程序获取透传参数,Linux内核中TCP SACK处理流程分析
frankzfz2014-07-27 17:32 demo121:frankzfz您好: 我想请教一个问题,就是将写好的GenericApp项目(没有配置工具),我加入zigbee协议栈的配置工具后还 ...
最新文章
- 网站用户登录验证:Servlet+JSP VS Struts书剑恩仇录
- 五分钟学会悲观乐观锁-java vs mysql vs redis三种实现
- 安装SSL证书-解决导入证书时的ASN1 bad tag value met错误
- 在jenkins上配置Android项目(git管理,gradle构建)
- 每周.NET前沿技术文章摘要(2017-05-24)
- 重温《数据库系统概论》【第一篇 基础篇】【第2章 关系数据库】
- a href点击无效_jquery click()方法模拟点击事件对a标签不生效的解决办法
- scala java抽象理解_Scala学习笔记(五) 抽象类以及类中的一些语法糖
- svn图形化控制(svnmanager)
- 华为手机全面鸿蒙,彻底与安卓说拜拜,华为手机全面鸿蒙时代即将到来
- python交互式数据可视化_基于Python实现交互式数据可视化的工具,你用过几种?...
- linux查看双机热备运行,linux双机热备,故障切换!!!
- 【 理想的机器学习书】
- c语言开发实际,21实际c语言教程-1 (转)
- Java 窗口设置图标及背景图片
- JavaWeb教程———过滤器
- 城市智慧停车管理模式方案科普
- html下拉和收起,Vue中实现菜单下拉、收起的动画效果
- 用powershell代码安装Windows软件
- 地图名片:百度地图 高德地图 pc 移动端快速实现地图生成、标记以及标注获取链接及代码(霸霸看了都说好)