linux内核奇遇记之md源代码解读之十一raid5d
- 4626 /*
- 4627 * This is our raid5 kernel thread.
- 4628 *
- 4629 * We scan the hash table for stripes which can be handled now.
- 4630 * During the scan, completed stripes are saved for us by the interrupt
- 4631 * handler, so that they will not have to wait for our next wakeup.
- 4632 */
- 4633 static void raid5d(struct mddev *mddev)
- 4634 {
- 4635 struct r5conf *conf = mddev->private;
- 4636 int handled;
- 4637 struct blk_plug plug;
- 4638
- 4639 pr_debug("+++ raid5d active\n");
- 4640
- 4641 md_check_recovery(mddev);
- 4642
- 4643 blk_start_plug(&plug);
- 4644 handled = 0;
- 4645 spin_lock_irq(&conf->device_lock);
- 4646 while (1) {
- 4647 struct bio *bio;
- 4648 int batch_size;
- 4649
- 4650 if (
- 4651 !list_empty(&conf->bitmap_list)) {
- 4652 /* Now is a good time to flush some bitmap updates */
- 4653 conf->seq_flush++;
- 4654 spin_unlock_irq(&conf->device_lock);
- 4655 bitmap_unplug(mddev->bitmap);
- 4656 spin_lock_irq(&conf->device_lock);
- 4657 conf->seq_write = conf->seq_flush;
- 4658 activate_bit_delay(conf);
- 4659 }
4657行,更新bitmap批量写请求的序号。
- 4660 raid5_activate_delayed(conf);
- 4661
4660行,看函数名就是激活延迟条带的意思。那么为什么要延迟条带的处理呢?按照块设备常用的手段,延迟处理是为了合并请求,这里也是同样的道理。那么条带什么时候做延迟处理呢?我们跟进raid5_activate_delayed函数:
- 3691static void raid5_activate_delayed(struct r5conf *conf)
- 3692{
- 3693 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
- 3694 while (!list_empty(&conf->delayed_list)) {
- 3695 struct list_head *l = conf->delayed_list.next;
- 3696 struct stripe_head *sh;
- 3697 sh = list_entry(l, struct stripe_head, lru);
- 3698 list_del_init(l);
- 3699 clear_bit(STRIPE_DELAYED, &sh->state);
- 3700 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
- 3701 atomic_inc(&conf->preread_active_stripes);
- 3702 list_add_tail(&sh->lru, &conf->hold_list);
- 3703 }
- 3704 }
- 3705}
- 204 if (test_bit(STRIPE_DELAYED, &sh->state) &&
- 205 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
- 206 list_add_tail(&sh->lru, &conf->delayed_list);
- 2772static void handle_stripe_dirtying(struct r5conf *conf,
- 2773 struct stripe_head *sh,
- 2774 struct stripe_head_state *s,
- 2775 int disks)
- 2776{
- ...
- 2808 set_bit(STRIPE_HANDLE, &sh->state);
- 2809 if (rmw < rcw && rmw > 0)
- ...
- 2825 } else {
- 2826 set_bit(STRIPE_DELAYED, &sh->state);
- 2827 set_bit(STRIPE_HANDLE, &sh->state);
- 2828 }
- 2829 }
- 2830 }
- 2831 if (rcw <= rmw && rcw > 0) {
- ...
- 2851 } else {
- 2852 set_bit(STRIPE_DELAYED, &sh->state);
- 2853 set_bit(STRIPE_HANDLE, &sh->state);
- 2854 }
这里有两种情况会设置STRIPE_DELAYED,rcw和rmw。不管是rcw还是rmw,都不是满条带写,都需要去磁盘预读,因此在效率上肯定比不上满条带写。所以这里需要延迟处理以合并请求。那么合并请求的流程是怎么样的呢?我们这里根据代码流程简要说明一下:
- 4662 while ((bio = remove_bio_from_retry(conf))) {
- 4663 int ok;
- 4664 spin_unlock_irq(&conf->device_lock);
- 4665 ok = retry_aligned_read(conf, bio);
- 4666 spin_lock_irq(&conf->device_lock);
- 4667 if (!ok)
- 4668 break;
- 4669 handled++;
- 4670 }
- 4672 batch_size = handle_active_stripes(conf);
- 4673 if (!batch_size)
- 4674 break;
- 4601#define MAX_STRIPE_BATCH 8
- 4602static int handle_active_stripes(struct r5conf *conf)
- 4603{
- 4604 struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
- 4605 int i, batch_size = 0;
- 4606
- 4607 while (batch_size < MAX_STRIPE_BATCH &&
- 4608 (sh = __get_priority_stripe(conf)) != NULL)
- 4609 batch[batch_size++] = sh;
- 4610
- 4611 if (batch_size == 0)
- 4612 return batch_size;
- 4613 spin_unlock_irq(&conf->device_lock);
- 4614
- 4615 for (i = 0; i < batch_size; i++)
- 4616 handle_stripe(batch[i]);
- 4617
- 4618 cond_resched();
- 4619
- 4620 spin_lock_irq(&conf->device_lock);
- 4621 for (i = 0; i < batch_size; i++)
- 4622 __release_stripe(conf, batch[i]);
- 4623 return batch_size;
- 4624}
- 3966/* __get_priority_stripe - get the next stripe to process
- 3967 *
- 3968 * Full stripe writes are allowed to pass preread active stripes up until
- 3969 * the bypass_threshold is exceeded. In general the bypass_count
- 3970 * increments when the handle_list is handled before the hold_list; however, it
- 3971 * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a
- 3972 * stripe with in flight i/o. The bypass_count will be reset when the
- 3973 * head of the hold_list has changed, i.e. the head was promoted to the
- 3974 * handle_list.
- 3975 */
- 3976static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
- 3977{
- 3978 struct stripe_head *sh;
- 3979
- 3980 pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
- 3981 __func__,
- 3982 list_empty(&conf->handle_list) ? "empty" : "busy",
- 3983 list_empty(&conf->hold_list) ? "empty" : "busy",
- 3984 atomic_read(&conf->pending_full_writes), conf->bypass_count);
- 3985
- 3986 if (!list_empty(&conf->handle_list)) {
- 3987 sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
- 3988
- 3989 if (list_empty(&conf->hold_list))
- 3990 conf->bypass_count = 0;
- 3991 else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
- 3992 if (conf->hold_list.next == conf->last_hold)
- 3993 conf->bypass_count++;
- 3994 else {
- 3995 conf->last_hold = conf->hold_list.next;
- 3996 conf->bypass_count -= conf->bypass_threshold;
- 3997 if (conf->bypass_count < 0)
- 3998 conf->bypass_count = 0;
- 3999 }
- 4000 }
- 4001 } else if (!list_empty(&conf->hold_list) &&
- 4002 ((conf->bypass_threshold &&
- 4003 conf->bypass_count > conf->bypass_threshold) ||
- 4004 atomic_read(&conf->pending_full_writes) == 0)) {
- 4005 sh = list_entry(conf->hold_list.next,
- 4006 typeof(*sh), lru);
- 4007 conf->bypass_count -= conf->bypass_threshold;
- 4008 if (conf->bypass_count < 0)
- 4009 conf->bypass_count = 0;
- 4010 } else
- 4011 return NULL;
- 4012
- 4013 list_del_init(&sh->lru);
- 4014 atomic_inc(&sh->count);
- 4015 BUG_ON(atomic_read(&sh->count) != 1);
- 4016 return sh;
- 4017}
- 4675 handled += batch_size;
- 4676
- 4677 if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
- 4678 spin_unlock_irq(&conf->device_lock);
- 4679 md_check_recovery(mddev);
- 4680 spin_lock_irq(&conf->device_lock);
- 4681 }
- 4682 }
linux内核奇遇记之md源代码解读之十一raid5d相关推荐
- linux内核奇遇记之md源代码解读之八阵列同步二
linux内核奇遇记之md源代码解读之八阵列同步二 转载请注明出处:http://blog.csdn.net/liumangxiong 在上一小节里讲到启动同步线程: 7824 mddev->s ...
- linux内核奇遇记之md源代码解读之十二raid读写
linux内核奇遇记之md源代码解读之十二raid读写 转载请注明出处:http://blog.csdn.net/liumangxiong 我们都知道,对一个linux块设备来说,都有一个对应的请求队 ...
- 复制linux内核,linux内核写时复制机制源代码解读
作者简介 写时复制技术(一下简称COW)是linux内核比较重要的一种机制,我们都知道:父进程fork子进程的时候,子进程会和父进程会以只读的方式共享所有私有的可写页,当有一方将要写的时候会发生COW ...
- 高通linux内核目录,高通 android 源代码以及目标系统目录结构
下面为高通android源代码结构 build/ – Build 环境建立和makefiles生成4 bionic/ – Android C 库 dalvik/ – Android Java 虚拟机 ...
- 《Linux内核设计与实现》读书笔记(十一)- 定时器和时间管理
系统中有很多与时间相关的程序(比如定期执行的任务,某一时间执行的任务,推迟一段时间执行的任务),因此,时间的管理对于linux来说非常重要. 主要内容: 系统时间 定时器 定时器相关概念 定时器执行流 ...
- linux内核网络协议栈--kernel bridge转发逻辑(十一)
1. netdev_rx_handler_register 在分析之前首先要介绍一个重要函数:netdev_rx_handler_register,这个函数是2.6内核所没有的. netdev_rx_ ...
- xilinx linux内核,Xilinx-Zynq Linux内核源码编译过程
本文内容依据http://www.wiki.xilinx.com网址编写,编译所用操作系统为ubuntu 14 1.交叉编译环境的安装配置 2.uboot的编译 1)下载uboot源代码 下载uboo ...
- Linux内核调试方法【转】
转自:http://www.cnblogs.com/shineshqw/articles/2359114.html kdb:只能在汇编代码级进行调试: 优点是不需要两台机器进行调试. gdb:在调试模 ...
- Linux内核移植之一:内核源码结构与Makefile分析
内容来自 韦东山<嵌入式Linux应用开发完全手册> 一.内核介绍 1.版本及其特点 Linux内核的版本号可以从源代码的顶层目录下的Makefile中看到,比如下面几行它们构成了Linu ...
- 《Linux内核设计与实现》读书笔记(十九)- 可移植性
linux内核的移植性非常好, 目前的内核也支持非常多的体系结构(有20多个). 但是刚开始时, linux也只支持 intel i386 架构, 从 v1.2版开始支持 Digital Alpha, ...
最新文章
- 吴恩达 coursera AI 专项四第四课总结+作业答案
- 自定义图框_Smart3D自定义图纸属性及其应用
- bzoj 1597 [Usaco2008 Mar]土地购买——斜率优化dp
- power bi 参数_参数化Power BI报表入门
- 人民日报发推欢迎Google重返大陆,FB上长文阐述详细立场
- 安装 tensorflow 环境
- 模拟小型电子商务网站绘制ER图
- 分子系统学-多序列比对和系统进化分析教程
- Graph Neural Network(GAE,GVAE,ARGA)
- EM算法及python实现
- 通信中带宽与数据传输速率的联系与区别
- CentOS的下载和安装
- 人工智能α-β树剪支图文
- 《University Calculus》-chape12-偏导数-基本概念
- Fluent最全complied错误原因分析:Error: The UDF library you are trying to load (libudf) is not compiled for
- 建筑安全检查标准怎么计算机,建筑施工安全检查标准评分 怎么算
- 蓝桥杯 模板Template Part9:PCF8591 ADC/DAC
- 项目一 认识Linux操作系统
- 趣节点:互联网信息大爆炸时代,企业品牌口碑营销需要注意什么?
- java反射和反编译