详解 ARM PMU (Performance Monitoring Unit)
本文会详细讲解ARM PMU模块原理及代码流程
kernel version=4.14.90, arch=arm64
初始化流程:
arch/arm64/kernel/perf_event.cdevice_initcall(armv8_pmu_driver_init)
这里使用device_initcall调用进行函数的初始化流程,关于device_initcall的定义及实现流程再额外研究,这里简单列一下initcall系列函数的调用顺序
include/linux/init.h#define pure_initcall(fn) __define_initcall(fn, 0)#define core_initcall(fn) __define_initcall(fn, 1)
#define core_initcall_sync(fn) __define_initcall(fn, 1s)
#define postcore_initcall(fn) __define_initcall(fn, 2)
#define postcore_initcall_sync(fn) __define_initcall(fn, 2s)
#define arch_initcall(fn) __define_initcall(fn, 3)
#define arch_initcall_sync(fn) __define_initcall(fn, 3s)
#define subsys_initcall(fn) __define_initcall(fn, 4)
#define subsys_initcall_sync(fn) __define_initcall(fn, 4s)
#define fs_initcall(fn) __define_initcall(fn, 5)
#define fs_initcall_sync(fn) __define_initcall(fn, 5s)
#define rootfs_initcall(fn) __define_initcall(fn, rootfs)
#define device_initcall(fn) __define_initcall(fn, 6)
#define device_initcall_sync(fn) __define_initcall(fn, 6s)
#define late_initcall(fn) __define_initcall(fn, 7)
#define late_initcall_sync(fn) __define_initcall(fn, 7s)
在初始化函数调用过后就开进进入主初始化函数,这里对ARM/X86架构进行了区分
static int __init armv8_pmu_driver_init(void)
{if (acpi_disabled)return platform_driver_register(&armv8_pmu_driver);elsereturn arm_pmu_acpi_probe(armv8_pmuv3_init);
}
对于arch=arm64,当然是走的platform_driver_register(&armv8_pmu_driver),arm架构PMU使用的platform_driver,虚拟platform总线模式,关于platform_driver再额外研究
接下来我们来看armv8_pmu_driver,这里定义出 platform driver所必须的.driver & .probe回调函数,在platform dirver准备就绪后就会以此调用每个driver中的probe?
接着就会调用PMU的通用driver,arm_pmu_device_probe函数定义在driver/perf/arm_pmu_platform.c中
static int armv8_pmu_device_probe(struct platform_device *pdev)
{return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
}
这里需要说明一下PMU的驱动结构
分为两个大部分
1) 位于driver/perf下的通用driver
2) 与架构相关的,定义在arch/arm64/下的,与指令集相关的,perf_event.c / perf_event_v8.c 等等
arm_pmu_device_probe函数主要做了以下几个事情
pmu = armpmu_alloc();
ret = pmu_parse_irqs(pmu);
ret = init_fn(pmu);
ret = armpmu_request_irqs(pmu);
ret = armpmu_register(pmu);
armpmu_alloc
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
pmu->hw_events = alloc_percpu(struct pmu_hw_events);
pmu->pmu = (struct pmu) {.pmu_enable = armpmu_enable,.pmu_disable = armpmu_disable,.event_init = armpmu_event_init,.add = armpmu_add,.del = armpmu_del,.start = armpmu_start,.stop = armpmu_stop,.read = armpmu_read,.filter_match = armpmu_filter_match,.attr_groups = pmu->attr_groups,/** This is a CPU PMU potentially in a heterogeneous* configuration (e.g. big.LITTLE). This is not an uncore PMU,* and we have taken ctx sharing into account (e.g. with our* pmu::filter_match callback and pmu::event_init group* validation).*/.capabilities = PERF_PMU_CAP_HETEROGENEOUS_CPUS,
};for_each_possible_cpu(cpu) {struct pmu_hw_events *events;events = per_cpu_ptr(pmu->hw_events, cpu);raw_spin_lock_init(&events->pmu_lock);events->percpu_pmu = pmu;
}
这里需要粘贴以下,PMU的结构体
struct arm_pmu {struct pmu pmu;cpumask_t active_irqs;cpumask_t supported_cpus;char *name;irqreturn_t (*handle_irq)(int irq_num, void *dev);void (*enable)(struct perf_event *event);void (*disable)(struct perf_event *event);int (*get_event_idx)(struct pmu_hw_events *hw_events,struct perf_event *event);void (*clear_event_idx)(struct pmu_hw_events *hw_events,struct perf_event *event);int (*set_event_filter)(struct hw_perf_event *evt,struct perf_event_attr *attr);u32 (*read_counter)(struct perf_event *event);void (*write_counter)(struct perf_event *event, u32 val);void (*start)(struct arm_pmu *);void (*stop)(struct arm_pmu *);void (*reset)(void *);int (*map_event)(struct perf_event *event);int (*filter_match)(struct perf_event *event);int num_events;u64 max_period;bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);struct platform_device *plat_device;struct pmu_hw_events __percpu *hw_events;struct hlist_node node;struct notifier_block cpu_pm_nb;/* the attr_groups array must be NULL-terminated */const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];/* Only to be used by ACPI probing code */unsigned long acpi_cpuid;
};
上面填充了该结构体的第一个变量pmu,该结构体定义为:
include/linux/perf_event.h/*** struct pmu - generic performance monitoring unit*/
struct pmu {struct list_head entry;struct module *module;struct device *dev;const struct attribute_group **attr_groups;const char *name;int type;/** various common per-pmu feature flags*/int capabilities;int * __percpu pmu_disable_count;struct perf_cpu_context * __percpu pmu_cpu_context;atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */int task_ctx_nr;int hrtimer_interval_ms;/* number of address filters this PMU can do */unsigned int nr_addr_filters;/** Fully disable/enable this PMU, can be used to protect from the PMI* as well as for lazy/batch writing of the MSRs.*/void (*pmu_enable) (struct pmu *pmu); /* optional */void (*pmu_disable) (struct pmu *pmu); /* optional *//** Try and initialize the event for this PMU.** Returns:* -ENOENT -- @event is not for this PMU** -ENODEV -- @event is for this PMU but PMU not present* -EBUSY -- @event is for this PMU but PMU temporarily unavailable* -EINVAL -- @event is for this PMU but @event is not valid* -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported* -EACCESS -- @event is for this PMU, @event is valid, but no privilidges** 0 -- @event is for this PMU and valid** Other error return values are allowed.*/int (*event_init) (struct perf_event *event);/** Notification that the event was mapped or unmapped. Called* in the context of the mapping task.*/void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional *//** Flags for ->add()/->del()/ ->start()/->stop(). There are* matching hw_perf_event::state flags.*/
#define PERF_EF_START 0x01 /* start the counter when adding */
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping *//** Adds/Removes a counter to/from the PMU, can be done inside a* transaction, see the ->*_txn() methods.** The add/del callbacks will reserve all hardware resources required* to service the event, this includes any counter constraint* scheduling etc.** Called with IRQs disabled and the PMU disabled on the CPU the event* is on.** ->add() called without PERF_EF_START should result in the same state* as ->add() followed by ->stop().** ->del() must always PERF_EF_UPDATE stop an event. If it calls* ->stop() that must deal with already being stopped without* PERF_EF_UPDATE.*/int (*add) (struct perf_event *event, int flags);void (*del) (struct perf_event *event, int flags);/** Starts/Stops a counter present on the PMU.** The PMI handler should stop the counter when perf_event_overflow()* returns !0. ->start() will be used to continue.** Also used to change the sample period.** Called with IRQs disabled and the PMU disabled on the CPU the event* is on -- will be called from NMI context with the PMU generates* NMIs.** ->stop() with PERF_EF_UPDATE will read the counter and update* period/count values like ->read() would.** ->start() with PERF_EF_RELOAD will reprogram the the counter* value, must be preceded by a ->stop() with PERF_EF_UPDATE.*/void (*start) (struct perf_event *event, int flags);void (*stop) (struct perf_event *event, int flags);/** Updates the counter value of the event.** For sampling capable PMUs this will also update the software period* hw_perf_event::period_left field.*/void (*read) (struct perf_event *event);/** Group events scheduling is treated as a transaction, add* group events as a whole and perform one schedulability test.* If the test fails, roll back the whole group** Start the transaction, after this ->add() doesn't need to* do schedulability tests.** Optional.*/void (*start_txn) (struct pmu *pmu, unsigned int txn_flags);/** If ->start_txn() disabled the ->add() schedulability test* then ->commit_txn() is required to perform one. On success* the transaction is closed. On error the transaction is kept* open until ->cancel_txn() is called.** Optional.*/int (*commit_txn) (struct pmu *pmu);/** Will cancel the transaction, assumes ->del() is called* for each successful ->add() during the transaction.** Optional.*/void (*cancel_txn) (struct pmu *pmu);/** Will return the value for perf_event_mmap_page::index for this event,* if no implementation is provided it will default to: event->hw.idx + 1.*/int (*event_idx) (struct perf_event *event); /*optional *//** context-switches callback*/void (*sched_task) (struct perf_event_context *ctx,bool sched_in);/** PMU specific data size*/size_t task_ctx_size;/** Set up pmu-private data structures for an AUX area*/void *(*setup_aux) (int cpu, void **pages,int nr_pages, bool overwrite);/* optional *//** Free pmu-private AUX data structures*/void (*free_aux) (void *aux); /* optional *//** Validate address range filters: make sure the HW supports the* requested configuration and number of filters; return 0 if the* supplied filters are valid, -errno otherwise.** Runs in the context of the ioctl()ing process and is not serialized* with the rest of the PMU callbacks.*/int (*addr_filters_validate) (struct list_head *filters);/* optional *//** Synchronize address range filter configuration:* translate hw-agnostic filters into hardware configuration in* event::hw::addr_filters.** Runs as a part of filter sync sequence that is done in ->start()* callback by calling perf_event_addr_filters_sync().** May (and should) traverse event::addr_filters::list, for which its* caller provides necessary serialization.*/void (*addr_filters_sync) (struct perf_event *event);/* optional *//** Filter events for PMU-specific reasons.*/int (*filter_match) (struct perf_event *event); /* optional */
};
详解 ARM PMU (Performance Monitoring Unit)相关推荐
- linux命令详解(arm交叉编译器)
PC机用的win10,虚拟机使用的是VMware12,Linux版本是redhat6. 1.arm-linux-gcc 编译器 举例:arm-linux-gcc hello.c –o hello ...
- arm linux 进程调度,详解ARM Linux 2.4.x进程调度
Linux2.4.x是一个基于非抢占式的多任务的分时操作系统,虽然在用户进程的调度上采用抢占式策略,但是而在内核还是采用了轮转的方法,如果有个内核态的线程恶性占有CPU不释放,那系统无法从中解脱出来, ...
- 详解ARM Cortex-M33处理器:性能/功耗/安全的最佳平衡
基于ARM Cortex处理器的片上系统(SoC)解决方案适用于多种嵌入式设计细分市场,如物联网.电机控制.医疗.汽车.家电自动化等.我们的处理器品种丰富且基于同一个标准架构,针对不同的产品市场提供广 ...
- 浏览器Performance面板性能监控详解
研背景介绍 1.1 What Chrome DevTools的Performance面板:可以记录和分析页面在运行时的所有活动. 1.2 Why 使用Performance面板解决应用性能瓶颈 为应用 ...
- intel 性能监控计数器PMC寄存器详解
所有内容来自intel官方手册,章节号已给出... 一 以下内容来自(P279):30.1 PERFORMANCE MONITORING OVERVIEW 从Pentium奔腾处理器开始,Intel ...
- (转)CPU 参数详解
CPU 参数详解 CPU是Central Processing Unit(中央处理器)的缩写,CPU一般由逻辑运算单元.控制单元和存储单元组成.在逻辑运算和控制单元中包括一些寄存器,这些寄存器用于CP ...
- [system] systemd详解
文章目录 systemd 由来 概述 查看版本号 兼容性 系统管理 systemctl 管理系统. systemd-analyze 查看启动耗时. hostnamectl 看当前主机的信息. loca ...
- systemd的unit配置文件详解
目录 unit类型 systemctl常用命令 unit配置文件 [Unit]部分详解 [Service]部分详解 service专有参数 进程执行环境 进程环境变量 如何杀死进程 进程资源控制 [I ...
- arm 饱和指令_ARM平台下NEON使用方法详解
NEON介绍 在移动平台上进行一些复杂算法的开发,一般需要用到指令集来进行加速.NEON 技术是 ARM Cortex™-A 系列处理器的 128 位 SIMD(单指令,多数据)架构扩展,专门针对大规 ...
最新文章
- 一定要搜藏的20个非常有用的PHP类库
- 定向输出命令_网络工程师之linux重定向命令和管道命令详解
- 《c语言从入门到精通》看书笔记——第16章 网络套接字编程(上)——网络
- java 三大集合_java中的三大集合入门笔记(简单实用)
- 部分 I. 教程_第 2 章 SQL语言_2.2. 概念
- spring boot 集成 Oracle Access Manager(OAM)单点登录
- Derby安装使用说明
- 深度学习在Airbnb搜索推荐中的应用实践
- leetcode 703. 数据流中的第K大元素(Kth Largest Element in a Stream)
- 书店智能机器人编程与拼装体验课堂_让人工智能与编程教育走进初中教学课堂...
- AppStore图片尺寸以及内容要求Screenshot specifications
- 自己开发的天视通局域网电脑监控软件,需要的来下载
- 湖北省贷款贴息扶持政策申报指南,2022年申报条件以及贴息奖励标准
- 政府行业微信公众号有哪些吸引粉丝的技巧
- ML - 分类算法的评价
- Java兔子生兔子问题
- Adobe官方公布的RTMP协议规范
- 生物信息学算法之Python实现|Rosalind刷题笔记:013 随机DNA序列
- oracle索引整理
- 一分耕耘一分收获,精诚所至金石为开
热门文章
- [C#] 控制系统音量-第二章
- AI、大数据时代,智能安防在智慧城市建设中的发展与应用趋势
- 当英文遇上汉语 就知道汉语有多强大了
- Excel分列小技巧
- 语音对话声空计算机APP,空空语音app
- python怎么设置随机数种子_Pytorch在dataloader类中设置shuffle的随机数种子方式
- 网盘下载速度太慢的话请看这里!
- 长尾关键词排名优化技巧
- 关于Nginx mmap(MAP_ANON|MAP_SHARED, 314572800)报错
- Keras的loss_weights和class_weight