文件系统--open系统调用详解

arm64平台关于32位系统调用的定义：

#define __NR_open 5
__SYSCALL(__NR_open, compat_sys_open)#undef __SYSCALL
#define __SYSCALL(nr, sym)  [nr] = sym,/** The sys_call_table array must be 4K aligned to be accessible from* kernel/entry.S.*/
void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = {[0 ... __NR_compat_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd32.h>
};

arm64平台上64位系统调用的定义：

#define __NR_open                 8
__SYSCALL(  8, sys_open, 3)#undef __SYSCALL
#define __SYSCALL(nr, sym)  [nr] = sym,/** The sys_call_table array must be 4K aligned to be accessible from* kernel/entry.S.*/
void * const sys_call_table[__NR_syscalls] __aligned(4096) = {[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h>
};

下面看一下sys_open的实现：

fs/open.c：SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{if (force_o_largefile())flags |= O_LARGEFILE;return do_sys_open(AT_FDCWD, filename, flags, mode);
}

通过宏定义展开，实际上上述定义实现了一个sys_open函数，可以看到核心是调用了do_sys_open函数去处理系统调用。下面来看一下它的实现：

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{struct open_flags op;int fd = build_open_flags(flags, mode, &op); //创建和生成文件打开flagsstruct filename *tmp;if (fd)return fd;tmp = getname(filename);  //获取一个filenameif (IS_ERR(tmp))return PTR_ERR(tmp);fd = get_unused_fd_flags(flags); //获取一个未用的fdif (fd >= 0) {struct file *f = do_filp_open(dfd, tmp, &op); //打开操作，创建并填充对应的struct file结构体，前面已经获取了filename，这里是创建一个file结构体//每次打开操作都会创建一个file，即使是同一个文件if (IS_ERR(f)) {put_unused_fd(fd);fd = PTR_ERR(f);} else {fsnotify_open(f);fd_install(fd, f);  //绑定对应的fd和struct file结构体，其实就是在对应的fd array上存上struct file}}putname(tmp);return fd;
}

getname

下面分步骤来看，第一步就是getname，它最近调用的是getname_flags，这个函数的目的就是获取并初始化一个filename结构体：

struct filename {const char      *name;  /* pointer to actual string */const __user char   *uptr;  /* original userland pointer */struct audit_names  *aname;int         refcnt;bool            separate; /* should "name" be freed? */
};

filename的结构体如上所示，它的size大小是很小的，但是一个slab申请的内存大小为1024 bytes，所以除了这个结构体占用的长度之外，后面的长度可以用于存储实际的路径字符串。

struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{struct filename *result, *err;int len;long max;char *kname;result = audit_reusename(filename);  //从audit_names链表中查看是否存在相同的filenameif (result)return result;result = __getname();  //从slab中申请一块filename内存空间，注意这个大小是PATH_MAX=1024，而不是struct filename的sizeif (unlikely(!result))return ERR_PTR(-ENOMEM);result->refcnt = 1;/** First, try to embed the struct filename inside the names_cache* allocation*//*先尝试在filename的结构体后面位置存储对应的路径字符串* 由于是跟结构体连续在一起存储的，所以separate设置为false*/kname = (char *)result + sizeof(*result);result->name = kname;result->separate = false;max = EMBEDDED_NAME_MAX;recopy:len = strncpy_from_user(kname, filename, max); //拷贝路径字符串到指定位置if (unlikely(len < 0)) {err = ERR_PTR(len);goto error;}/** Uh-oh. We have a name that's approaching PATH_MAX. Allocate a* separate struct filename so we can dedicate the entire* names_cache allocation for the pathname, and re-do the copy from* userland.*///运行到此处，发现要拷贝的路径字符串实际是大于申请的slab空间的//因此需要重新分配内存进行拷贝操作，seperate设置为trueif (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {kname = (char *)result;result = kzalloc(sizeof(*result), GFP_KERNEL);if (!result) {err = ERR_PTR(-ENOMEM);result = (struct filename *)kname;goto error;}result->name = kname;result->separate = true;result->refcnt = 1;max = PATH_MAX;goto recopy; //申请到内存后跳转到recopy，重新执行拷贝操作}/* The empty path is special. */if (unlikely(!len)) {if (empty)*empty = 1;err = ERR_PTR(-ENOENT);if (!(flags & LOOKUP_EMPTY))goto error;}err = ERR_PTR(-ENAMETOOLONG);if (unlikely(len >= PATH_MAX))goto error;result->uptr = filename;result->aname = NULL;audit_getname(result);  //把对应新创建的filename加入audit审计管理链表中return result;error:putname(result);return err;
}

这里需要注意一点就是audit_names，每个进程都有对应的审计上下文，其中会保存一个audit_names链表，每个打开的filename都会对应一个audit_names结构，它的作用是用于审计，比如selinux权限检查。

get_unused_fd_flags

下面是第二步，根据传入的flags获取未用的fd结构：

int get_unused_fd_flags(unsigned flags)
{return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}
EXPORT_SYMBOL(get_unused_fd_flags);

/** allocate a file descriptor, mark it busy.*/
int __alloc_fd(struct files_struct *files,unsigned start, unsigned end, unsigned flags)
{unsigned int fd;int error;struct fdtable *fdt;spin_lock(&files->file_lock);
repeat:fdt = files_fdtable(files); //获取files_struct中的fdtable成员，fdtable会存在一个file结构体数组fd = start;if (fd < files->next_fd) //检查files_struct中记录到下一个fd号fd = files->next_fd;if (fd < fdt->max_fds) //从下一个fd号开始寻找到下一个未用的fd号//这是一个位图操作，max_fds有多大，就对应多少个bit的位图，初始化为一个long类型，对于arm32就是32个bitsfd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); /** N.B. For clone tasks sharing a files structure, this test* will limit the total number of files that can be opened.*/error = -EMFILE;if (fd >= end)goto out;/*如果发现fd是大于max_fds时会执行到这里，那么会进行expand操作* 实际上跟进去会发现它会新创建更大的fdtable并更新到files_struct中。*/error = expand_files(files, fd);if (error < 0)goto out;/** If we needed to expand the fs array we* might have blocked - try again.*/if (error)goto repeat;if (start <= files->next_fd)files->next_fd = fd + 1;__set_open_fd(fd, fdt);if (flags & O_CLOEXEC)__set_close_on_exec(fd, fdt); //和open_fds一样是位图操作，初始化为一个long type，对于arm32就是32个bitselse__clear_close_on_exec(fd, fdt);error = fd;
#if 1/* Sanity check */if (rcu_access_pointer(fdt->fd[fd]) != NULL) {printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);rcu_assign_pointer(fdt->fd[fd], NULL);}
#endifout:spin_unlock(&files->file_lock);return error;
}

do_filp_open

看下他的代码实现：

struct file *do_filp_open(int dfd, struct filename *pathname,const struct open_flags *op)
{   struct nameidata nd;int flags = op->lookup_flags;struct file *filp;filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); //默认第一次打开是通过rcu walk形式进行打开操作，效率最高if (unlikely(filp == ERR_PTR(-ECHILD)))filp = path_openat(dfd, pathname, &nd, op, flags); //RCU打开失败后，ref-walk形式打开，可能会睡眠if (unlikely(filp == ERR_PTR(-ESTALE)))filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);//这种是在前面打开都已经失败的情况下才执行的，比如文件过期（STALE）return filp;
}

这个函数最终都会调用path_openat进行下一步操作：

static struct file *path_openat(int dfd, struct filename *pathname,struct nameidata *nd, const struct open_flags *op, int flags)
{struct file *file;struct path path;int opened = 0;int error;file = get_empty_filp(); //1.申请slab，获取一个空的struct file结构体if (IS_ERR(file))return file;file->f_flags = op->open_flag;if (unlikely(file->f_flags & __O_TMPFILE)) {error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened);goto out;}error = path_init(dfd, pathname->name, flags, nd); //2.检索对应目标文件所属的父目录项，初始化nameidata结构体，主要包含filename（dentry）和inode等信息if (unlikely(error))goto out;error = do_last(nd, &path, file, op, &opened, pathname); //3.最后处理最后一级子目录项，如果发现最后一级是一个链接，那么会报错返回while (unlikely(error > 0)) { /* trailing symlink */  //4.如果发现最后一级子目录项是一个链接，那么要继续跟踪到实际目录struct path link = path;void *cookie;if (!(nd->flags & LOOKUP_FOLLOW)) {path_put_conditional(&path, nd);path_put(&nd->path);error = -ELOOP;break;}error = may_follow_link(&link, nd);if (unlikely(error))break;nd->flags |= LOOKUP_PARENT;nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);error = follow_link(&link, nd, &cookie);if (unlikely(error))break;error = do_last(nd, &path, file, op, &opened, pathname);//5.跟踪到实际目录后，重新调用do_last进行处理最后一级put_link(nd, &link, cookie);}
out:path_cleanup(nd);if (!(opened & FILE_OPENED)) {BUG_ON(!error);put_filp(file);}if (unlikely(error)) {if (error == -EOPENSTALE) {if (flags & LOOKUP_RCU)error = -ECHILD;elseerror = -ESTALE;}file = ERR_PTR(error);}return file;
}

开始介绍path_init之前，先要介绍一下struct nameidata结构体，这个结构体就是用于目录检索时保存信息的，每次检索一级目录都会更新该结构体中的内容。

struct nameidata {struct path path;struct qstr last;struct path root;struct inode    *inode; /* path.dentry.d_inode */unsigned int    flags;unsigned    seq, m_seq;int     last_type;unsigned    depth;struct file *base;char *saved_names[MAX_NESTED_LINKS + 1];
};

其中path表示当前查找的这一级目录路径，last表示当前自路径及其散列值，root表示当前目录对应的根目录，seq是目录项顺序锁，m_seq是文件系统mount顺序锁，last_type表示当前目录的类型：

/** Type of the last component on LOOKUP_PARENT*/
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};

其中LAST_NORM表示普通文件类型，LAST_ROOT表示root类型，LAST_DOT表示“.”，LAST_DOTDOT表示“…”，LAST_BIND表示文件连接类型。

static int path_init(int dfd, const char *name, unsigned int flags,struct nameidata *nd)
{int retval = 0;nd->last_type = LAST_ROOT; /* if there are only slashes... */  //默认类型为ROOT类型nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;nd->depth = 0;nd->base = NULL;if (flags & LOOKUP_ROOT) { //如果打开标志包含LOOKUP_ROOTstruct dentry *root = nd->root.dentry;struct inode *inode = root->d_inode;if (*name) {if (!d_can_lookup(root))return -ENOTDIR;retval = inode_permission(inode, MAY_EXEC);if (retval)return retval;}nd->path = nd->root;nd->inode = inode;if (flags & LOOKUP_RCU) {rcu_read_lock();nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);nd->m_seq = read_seqbegin(&mount_lock);} else {path_get(&nd->path);}goto done;}nd->root.mnt = NULL;nd->m_seq = read_seqbegin(&mount_lock);if (*name=='/') {   //1.第一种情况如果路径名第一个为/，说明是绝对路径if (flags & LOOKUP_RCU) {rcu_read_lock();nd->seq = set_root_rcu(nd);} else {set_root(nd);path_get(&nd->root);}nd->path = nd->root; //设置绝对路径的nd->path为nd->root} else if (dfd == AT_FDCWD) {//2.第二种情况相对路径是当前进程的工作路径if (flags & LOOKUP_RCU) {struct fs_struct *fs = current->fs;unsigned seq;rcu_read_lock();do {seq = read_seqcount_begin(&fs->seq);nd->path = fs->pwd; //设置nd->pathnd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);} while (read_seqcount_retry(&fs->seq, seq));} else {get_fs_pwd(current->fs, &nd->path); //设置nd->path}} else { //3.第三种情况相对路径是一个用户指定的路径名/* Caller must check execute permissions on the starting path component */struct fd f = fdget_raw(dfd);struct dentry *dentry;if (!f.file)return -EBADF;dentry = f.file->f_path.dentry;if (*name) {if (!d_can_lookup(dentry)) {fdput(f);return -ENOTDIR;}}nd->path = f.file->f_path; //设置nd->pathif (flags & LOOKUP_RCU) {if (f.flags & FDPUT_FPUT)nd->base = f.file;nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);rcu_read_lock();} else {path_get(&nd->path);fdput(f);}}nd->inode = nd->path.dentry->d_inode; //设置nd->inode为对应nd->path.dentry->d_inodeif (!(flags & LOOKUP_RCU))goto done;if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))goto done;if (!(nd->flags & LOOKUP_ROOT))nd->root.mnt = NULL;rcu_read_unlock();return -ECHILD;
done:current->total_link_count = 0;return link_path_walk(name, nd); //设置完了nd->path和nd->inode之后就开始最终的link_path_walk了
}

path_init的目标是搜索目标路径的父目录，并把父目录的信息更新到nameidata结构体中，通过上面的注释可以看到，他会先设置nd->path为起始路径，从起始路径开始进行检索。比如，对于AT_FDCWD类型的系统调用来说，会以进程当前路径作为起始检索路径开始搜索。设置nd->path为起始路径后开始执行link_path_walk开始进行循环检索，每次检索一级目录，并且更新nameidata结构体，直到最终的父目录。

static int link_path_walk(const char *name, struct nameidata *nd)
{struct path next;int err;while (*name=='/')name++; //过滤掉起始位置多余的/字符if (!*name)return 0;//如果发现/后面已经没有字符，说明是根目录直接返回/* At this point we know we have a real path component. */for(;;) {u64 hash_len;int type;err = may_lookup(nd);if (err)break;hash_len = hash_name(name);  //开始对第一级路径做hash处理type = LAST_NORM;if (name[0] == '.') switch (hashlen_len(hash_len)) { //处理当前目录名为.和..的情况case 2:if (name[1] == '.') {type = LAST_DOTDOT;nd->flags |= LOOKUP_JUMPED;}break;case 1:type = LAST_DOT;}if (likely(type == LAST_NORM)) { //处理类型为LAST_NORM的情况，也就是普通文件struct dentry *parent = nd->path.dentry;nd->flags &= ~LOOKUP_JUMPED;if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {  //判断是否要重新进行一次HASH操作struct qstr this = { { .hash_len = hash_len }, .name = name };err = parent->d_op->d_hash(parent, &this); if (err < 0)break;hash_len = this.hash_len;name = this.name;}}nd->last.hash_len = hash_len;nd->last.name = name;nd->last_type = type;name += hashlen_len(hash_len);  //name指向当前目录的下一级目录if (!*name)return 0;/** If it wasn't NUL, we know it was '/'. Skip that* slash, and continue until no more slashes.*/do {name++;} while (unlikely(*name == '/'));  //这里需要特别注意，为什么说本函数只处理到最终路径的父目录if (!*name)                        //当发现当前目录的下一级目录为空，说明当前为最后一级目录时，直接返回，不做walk_component核心处理return 0;                  //这样就跳过了最后一级的处理，留给后面的do_last进行处理err = walk_component(nd, &next, LOOKUP_FOLLOW); //该步骤是walk的核心步骤，如果当前目录是一个普通的文件，那么会更新ndif (err < 0)                                    //如果当前目录是链接文件，那么nd不更新，会把链接文件路径更新到nextreturn err;if (err) {err = nested_symlink(&next, nd);  //根据next去定位实际目录路径，并把实际目录更新到nd中，其中会嵌套判断并follow多级的链接文件if (err)return err;}if (!d_can_lookup(nd->path.dentry)) {err = -ENOTDIR; break;}}terminate_walk(nd);return err;
}

上述函数会在walk_component中尝试多种方式定位dentry，首先从lookup_fast快速walk目录项，如果内存缓存中没有，那么就进入slow_path从实际文件系统中读取，最终该函数会返回父目录的信息（包括path、dentry、inode等）到nameidata结构体，并把该结构传递给do_last进行最后一级目录的处理。

     error = do_last(nd, &path, file, op, &opened, pathname);while (unlikely(error > 0)) { /* trailing symlink */struct path link = path;void *cookie;if (!(nd->flags & LOOKUP_FOLLOW)) {path_put_conditional(&path, nd);path_put(&nd->path);error = -ELOOP;break;}error = may_follow_link(&link, nd);if (unlikely(error))break;nd->flags |= LOOKUP_PARENT;nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);error = follow_link(&link, nd, &cookie);if (unlikely(error))break;error = do_last(nd, &path, file, op, &opened, pathname);put_link(nd, &link, cookie);}

如果最后一级目录是一个symlink的话，那么do_last会返回1，接着就会去follow对应的symlink，去找到真正的目录项，这里由于symlink是可能嵌套的，比如a->b->c，所以才会使用一个while循环进行follow，最终do_last进行处理。

static int do_last(struct nameidata *nd, struct path *path,struct file *file, const struct open_flags *op,int *opened, struct filename *name)
{struct dentry *dir = nd->path.dentry;int open_flag = op->open_flag;bool will_truncate = (open_flag & O_TRUNC) != 0;bool got_write = false;int acc_mode = op->acc_mode;struct inode *inode;bool symlink_ok = false;struct path save_parent = { .dentry = NULL, .mnt = NULL };bool retried = false;int error;nd->flags &= ~LOOKUP_PARENT;nd->flags |= op->intent;if (nd->last_type != LAST_NORM) {error = handle_dots(nd, nd->last_type);if (error)return error;goto finish_open;  //如果发现本次搜索的目录项是一个symlink类型，跳转到finish_open}if (!(open_flag & O_CREAT)) { //判断是否创建文件，如果不创建则执行如下操作if (nd->last.name[nd->last.len])nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))symlink_ok = true;  //如果打开文件存在O_PATH标记，并且nd->flags中不存在LOOKUP_FOLLOW，则搜索到symlink，不再进一步walk/* we _can_ be in RCU mode here */error = lookup_fast(nd, path, &inode); //lookup的fast路径，从内存中执行搜寻if (likely(!error))goto finish_lookup; //如果内存缓存中存在该目录项，则直接跳转到finish_lookupif (error < 0)goto out;BUG_ON(nd->inode != dir->d_inode);} else {              //判断打开文件标记存在O_CREAT，则执行如下/* create side of things *//** This will *only* deal with leaving RCU mode - LOOKUP_JUMPED* has been cleared when we got to the last component we are* about to look up*/error = complete_walk(nd);//该函数用于退出RCU walk mode，后面会执行ref walk。if (error)return error;audit_inode(name, dir, LOOKUP_PARENT);error = -EISDIR;/* trailing slashes? */if (nd->last.name[nd->last.len])goto out;}retry_lookup:  //如果代码执行到此，说明RCU walk失败了，下面开始使用核心的lookup_open进行walk查找if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { //如果打开标记有写入的权限error = mnt_want_write(nd->path.mnt);  //获取freeze write lock，防止文件系统冻住if (!error)got_write = true;/** do _not_ fail yet - we might not need that or fail with* a different error; let lookup_open() decide; we'll be* dropping this one anyway.*/}mutex_lock(&dir->d_inode->i_mutex);error = lookup_open(nd, path, file, op, got_write, opened); //这个函数执行后该文件可能被打开，也可能仅仅是lookup找到对应的目录项（会先从dcache寻找，失败后从real fs中寻找）mutex_unlock(&dir->d_inode->i_mutex);if (error <= 0) { //返回0说明已经atomic的创建和打开if (error)goto out;if ((*opened & FILE_CREATED) ||!S_ISREG(file_inode(file)->i_mode))will_truncate = false;audit_inode(name, file->f_path.dentry, 0);goto opened;//跳转到opened}if (*opened & FILE_CREATED) { //这里说明error=1，说明还没有执行完打开操作/* Don't check for write permission, don't truncate */open_flag &= ~O_TRUNC;will_truncate = false;acc_mode = MAY_OPEN;path_to_nameidata(path, nd);goto finish_open_created; //如果文件已经被创建成功，则跳转到finish_open_created}/** create/update audit record if it already exists.*/if (d_is_positive(path->dentry))audit_inode(name, path->dentry, 0);/** If atomic_open() acquired write access it is dropped now due to* possible mount and symlink following (this might be optimized away if* necessary...)*/if (got_write) {mnt_drop_write(nd->path.mnt);got_write = false;}error = -EEXIST;if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))goto exit_dput;error = follow_managed(path, nd->flags);if (error < 0)goto exit_dput;if (error)nd->flags |= LOOKUP_JUMPED;BUG_ON(nd->flags & LOOKUP_RCU);inode = path->dentry->d_inode;
finish_lookup: //运行到这里进行lookup的结尾工作/* we _can_ be in RCU mode here */error = -ENOENT;if (!inode || d_is_negative(path->dentry)) {path_to_nameidata(path, nd);goto out;}if (should_follow_link(path->dentry, !symlink_ok)) {if (nd->flags & LOOKUP_RCU) {if (unlikely(unlazy_walk(nd, path->dentry))) {error = -ECHILD;goto out;}}BUG_ON(inode != path->dentry->d_inode);return 1;}if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {path_to_nameidata(path, nd);} else {save_parent.dentry = nd->path.dentry;save_parent.mnt = mntget(path->mnt);nd->path.dentry = path->dentry;}nd->inode = inode;/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
finish_open://运行到这里进行open的结尾工作error = complete_walk(nd);if (error) {path_put(&save_parent);return error;}audit_inode(name, nd->path.dentry, 0);error = -EISDIR;if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))goto out;error = -ENOTDIR;if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))goto out;if (!S_ISREG(nd->inode->i_mode))will_truncate = false;if (will_truncate) {error = mnt_want_write(nd->path.mnt);if (error)goto out;got_write = true;}
finish_open_created://运行到这里进行create后的open结尾工作，因为create执行后说明以上部分已经做过了，可以直接跳转到此error = may_open(&nd->path, acc_mode, open_flag);if (error)goto out;BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */error = vfs_open(&nd->path, file, current_cred());if (!error) {*opened |= FILE_OPENED;} else {if (error == -EOPENSTALE)goto stale_open;goto out;}
opened: //运行到这里说明已经open成功了，进行一些check操作就可以返回了error = open_check_o_direct(file);if (error)goto exit_fput;error = ima_file_check(file, op->acc_mode, *opened);if (error)goto exit_fput;if (will_truncate) {error = handle_truncate(file);if (error)goto exit_fput;}
out:if (got_write)mnt_drop_write(nd->path.mnt);path_put(&save_parent);terminate_walk(nd);return error;exit_dput:path_put_conditional(path, nd);goto out;
exit_fput:fput(file);goto out;stale_open:/* If no saved parent or already retried then can't retry */if (!save_parent.dentry || retried)goto out;BUG_ON(save_parent.dentry != dir);path_put(&nd->path);nd->path = save_parent;nd->inode = dir->d_inode;save_parent.mnt = NULL;save_parent.dentry = NULL;if (got_write) {mnt_drop_write(nd->path.mnt);got_write = false;}retried = true;goto retry_lookup;
}

fd_install
上面的所有都是为了返回一个打开的文件结构体struct file，获取后还需要和特定fd进行绑定操作，就是通过下面的__fd_install来做的。

struct fdtable {unsigned int max_fds;struct file __rcu **fd;      /* current fd array */unsigned long *close_on_exec;unsigned long *open_fds;struct rcu_head rcu;
};  void __fd_install(struct files_struct *files, unsigned int fd,struct file *file)
{struct fdtable *fdt;spin_lock(&files->file_lock);fdt = files_fdtable(files);BUG_ON(fdt->fd[fd] != NULL);rcu_assign_pointer(fdt->fd[fd], file); //更新对应进程fd对应的file结构体spin_unlock(&files->file_lock);
}

这里使用的rcu的方式更新对应的file结构体指针。

文件系统--open系统调用详解相关推荐

Linux系统调用详解（实现机制分析）
为什么需要系统调用 linux内核中设置了一组用于实现系统功能的子程序,称为系统调用.系统调用和普通库函数调用非常相似,只是系统调用由操作系统核心提供,运行于内核态,而普通的函数调用由函数库或用户 ...
转载：linux驱动层到应用层的重要接口sys文件系统---/sys目录详解
linux驱动层到应用层的重要接口sys文件系统---/sys目录详解 Linux2.6内核中引入了sysfs文件系统.sysfs文件系统整理的设备驱动的相关文件节点,被视为dev文件系统的替代者.同 ...
Linux-0.11 文件系统bitmap.c详解
Linux-0.11 文件系统bitmap.c详解模块简介该模块包含了两对函数,第一对是和i节点相关的free_inode()和new_inode().第二对是和逻辑块相关的free_block( ...
U盘的FAT32/NTFS/exFAT文件系统类型区别详解
U盘的FAT32/NTFS/exFAT文件系统类型区别详解 U盘相信大家非常熟悉了,用户在格式化U盘的时候可以选择文件系统:FAT32/NTFS/exFAT,那么这三个文件系统又有什么区别呢?下面我们 ...
Linux系统调用详解（实现机制分析）--linux内核剖析（六）
本文介绍了系统调用的一些实现细节.首先分析了系统调用的意义,它们与库函数和应用程序接口(API)有怎样的关系.然后,我们考察了Linux内核如何实现系统调用,以及执行系统调用的连锁反应:陷入内核,传递 ...
linux内核剖析---Linux系统调用详解（实现机制分析）
原文博客 http://blog.csdn.net/gatieme/article/details/50779184 本文介绍了系统调用的一些实现细节.首先分析了系统调用的意义,它们与库函数和应用程序 ...
nginx文件系统大小_详解Nginx系列
1.Nginx特点 Nginx是一个事件驱动架构,而非传统过程驱动架构.具有内存占用低,当并发连接大时,能够预测内存使用率.Nginx改变了传统的web服务器体系架构,提高了响应速度,起初Nginx开 ...
linux内核删除文件_Linux内核与根文件系统的关系详解
Linux内核与根文件系统的关系开篇题外话:对于Linux初学者来说,这是一个很纠结的问题,但这也是一个很关键的问题! 一语破天机: "尽管内核是 Linux 的核心,但文件却是用户与操作 ...
linux 系统调用详解
转载自:http://blog.csdn.net/orange_os/article/details/7485069 目录: 1. Linux系统调用原理 2. 系统调用的实现 3. Linux系统调 ...

文件系统--open系统调用详解

文件系统--open系统调用详解相关推荐

最新文章

热门文章