neon浮点运算_Linux下VFP NEON浮点编译
http://blog.csdn.net/liujia2100/article/details/27236477
NEON:SIMD(Single Instruction Multiple Data 单指令多重数据) 指令集, 其针对多媒体和讯号处理程式具备标准化的加速能力。
VFP: (Vector Float Point), 向量浮点运算单元,arm11(s3c6410 支持VFPv2),Cortex-A8(s5pv210)支持VFPv3.
NEON和VFPv3 浮点协处理器共享寄存器组,所以在汇编时,指令是一样的。
编译选项:
-mfpu = name(neon or vfpvx)指定FPU 单元
-mfloat-abi = name(soft、hard、 softfp):指定软件浮点或硬件浮点或兼容软浮点调用接口
如果只指定 -mfpu,那么默认编译不会选择选择硬件浮点指令集
如果只指定 -mfloat-abi = hard或者softfp,那么编译会使用硬件浮点指令集
测试C文件
int main(void)
{
float f1, f2, f3;
f1 = 1.2;
f2 = 1.3;
f3 = f1 / f2;
return 0;
}
1、 arm-eabi-gcc -S hello.c -mfpu=neon
.arch armv5te
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.global __aeabi_fdiv
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
stmfd sp!, {fp, lr}
.save {fp, lr}
.LCFI0:
.setfp fp, sp, #4
add fp, sp, #4
.LCFI1:
.pad #16
sub sp, sp, #16
.LCFI2:
ldr r3, .L3 @ float
str r3, [fp, #-16] @ float
ldr r3, .L3+4 @ float
str r3, [fp, #-12] @ float
ldr r0, [fp, #-16] @ float
ldr r1, [fp, #-12] @ float
bl __aeabi_fdiv
mov r3, r0
str r3, [fp, #-8] @ float
mov r3, #0
mov r0, r3
sub sp, fp, #4
ldmfd sp!, {fp, pc}
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
2、 arm-eabi-gcc -S hello.c -mfpu=vfp
.arch armv5te
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.global __aeabi_fdiv
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
stmfd sp!, {fp, lr}
.save {fp, lr}
.LCFI0:
.setfp fp, sp, #4
add fp, sp, #4
.LCFI1:
.pad #16
sub sp, sp, #16
.LCFI2:
ldr r3, .L3 @ float
str r3, [fp, #-16] @ float
ldr r3, .L3+4 @ float
str r3, [fp, #-12] @ float
ldr r0, [fp, #-16] @ float
ldr r1, [fp, #-12] @ float
bl __aeabi_fdiv
mov r3, r0
str r3, [fp, #-8] @ float
mov r3, #0
mov r0, r3
sub sp, fp, #4
ldmfd sp!, {fp, pc}
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
可以看到上面两个例子,使用的是 .fpu softvfp
3、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=hard
.arch armv5te
.eabi_attribute 27, 3
.eabi_attribute 28, 1
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
str fp, [sp, #-4]!
.save {fp}
.LCFI0:
.setfp fp, sp, #0
add fp, sp, #0
.LCFI1:
.pad #20
sub sp, sp, #20
.LCFI2:
flds s15, .L3
fsts s15, [fp, #-16]
flds s15, .L3+4
fsts s15, [fp, #-12]
flds s14, [fp, #-16]
flds s15, [fp, #-12]
fdivs s15, s14, s15
fsts s15, [fp, #-8]
mov r3, #0
mov r0, r3
add sp, fp, #0
ldmfd sp!, {fp}
bx lr
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
4、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=softfp
.arch armv5te
.eabi_attribute 27, 3
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
str fp, [sp, #-4]!
.save {fp}
.LCFI0:
.setfp fp, sp, #0
add fp, sp, #0
.LCFI1:
.pad #20
sub sp, sp, #20
.LCFI2:
flds s15, .L3
fsts s15, [fp, #-16]
flds s15, .L3+4
fsts s15, [fp, #-12]
flds s14, [fp, #-16]
flds s15, [fp, #-12]
fdivs s15, s14, s15
fsts s15, [fp, #-8]
mov r3, #0
mov r0, r3
add sp, fp, #0
ldmfd sp!, {fp}
bx lr
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
5、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=softfp
.arch armv5te
.eabi_attribute 27, 3
.fpu vfpv3
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
str fp, [sp, #-4]!
.save {fp}
.LCFI0:
.setfp fp, sp, #0
add fp, sp, #0
.LCFI1:
.pad #20
sub sp, sp, #20
.LCFI2:
flds s15, .L3
fsts s15, [fp, #-16]
flds s15, .L3+4
fsts s15, [fp, #-12]
flds s14, [fp, #-16]
flds s15, [fp, #-12]
fdivs s15, s14, s15
fsts s15, [fp, #-8]
mov r3, #0
mov r0, r3
add sp, fp, #0
ldmfd sp!, {fp}
bx lr
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
6、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=hard
.arch armv5te
.eabi_attribute 27, 3
.eabi_attribute 28, 1
.fpu vfpv3
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
str fp, [sp, #-4]!
.save {fp}
.LCFI0:
.setfp fp, sp, #0
add fp, sp, #0
.LCFI1:
.pad #20
sub sp, sp, #20
.LCFI2:
flds s15, .L3
fsts s15, [fp, #-16]
flds s15, .L3+4
fsts s15, [fp, #-12]
flds s14, [fp, #-16]
flds s15, [fp, #-12]
fdivs s15, s14, s15
fsts s15, [fp, #-8]
mov r3, #0
mov r0, r3
add sp, fp, #0
ldmfd sp!, {fp}
bx lr
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
从上面可以看到,使用softfp和hard使用的指令集是一样的,都是硬件浮点, neon和vfp的区别,仅仅体现在.fpu vfpv3和.fpu neon.
7、 arm-eabi-gcc -S hello.c -mfloat-abi=hard
.arch armv5te
.eabi_attribute 27, 3
.eabi_attribute 28, 1
.fpu vfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 6
.eabi_attribute 18, 4
.file "hello.c"
.text
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB0:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
str fp, [sp, #-4]!
.save {fp}
.LCFI0:
.setfp fp, sp, #0
add fp, sp, #0
.LCFI1:
.pad #20
sub sp, sp, #20
.LCFI2:
flds s15, .L3
fsts s15, [fp, #-16]
flds s15, .L3+4
fsts s15, [fp, #-12]
flds s14, [fp, #-16]
flds s15, [fp, #-12]
fdivs s15, s14, s15
fsts s15, [fp, #-8]
mov r3, #0
mov r0, r3
add sp, fp, #0
ldmfd sp!, {fp}
bx lr
.L4:
.align 2
.L3:
.word 1067030938
.word 1067869798
.LFE0:
.fnend
.size main, .-main
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
.section .note.GNU-stack,"",%progbits
当直接使用-mfloat-abi=hard时,会默认使用.fpu vfp硬件浮点。
neon浮点运算_Linux下VFP NEON浮点编译相关推荐
- stm32 vscode 编译_linux 下 VSCODE 使用CMake编译STM32程序
摘要: M32L0xx_HAL_Driver/Src/stm32l0xx_hal_uart_ex.c.obj[67%]Builttargetstm32l051Scanningdependencieso ...
- neon浮点运算_ARM 浮点运算详解
原标题:ARM 浮点运算详解 一:早期 上的浮点模拟器: 早期的ARM没有协处理器,所以是由CPU来模拟的,即所需浮点运算均在浮点运算模拟器(float math emulation)上进行,需要的浮 ...
- neon浮点运算_NEON简单介绍
"ARM Advanced SIMD",nick-named "NEON", it provides:(1).A set of interesting scal ...
- neon浮点运算_ARM NEON指令集优化理论与实践
ARM NEON指令集优化理论与实践 一.简介 NEON就是一种基于SIMD思想的ARM技术,相比于ARMv6或之前的架构,NEON结合了64-bit和128-bit的SIMD指令集,提供128-bi ...
- linux内核态加速文件读取,学习在kernel态下使用NEON对算法进行加速的方法
本文跟着小编一起来学习在linux kernel态下如何使用NEON对算法进行加速的技巧,内容通过图文实例给大家做了详细分析,一起来看下. ARM处理器从cortex系列开始集成NEON处理单元,该单 ...
- ARM SIMD NEON 简介 (翻译自 Introducing NEON Development Article)
目录 NEON简介 SIMD是什么? ARM SIMD 指令集 NEON是什么? NEON架构概览 支持的数据类型 NEON寄存器 NEON指令 NEON开发 汇编器 Intrinsics 自动向量化 ...
- 【genius_platform软件平台开发】第八十二讲:ARM Neon指令集一(ARM NEON Intrinsics, SIMD运算, 优化心得)
1. ARM Neon Intrinsics 编程 1.入门:基本能上手写Intrinsics 1.1 Neon介绍.简明案例与编程惯例 1.2 如何检索Intrinsics 1.3 优化效果案例 1 ...
- Ubuntu 18.04 下搭建 C/C++编译开发环境及GCC多版本切换
关注公众号 风色年代(itfantasycc) 领 500G Java 微服务 开发资料 Ubuntu 18.04 下搭建 C/C++编译开发环境及GCC多版本切换_Linux教程_Linux公社-L ...
- android ndk neon,Android NDK开发之 NEON使用介绍
首先找到了要在C源代码中只用NEON库需要的头文件 arm_neon.h. #include //在代码中先添加了这行语句,然后执行ndk-build 却提示了错误 //提示要增加什么标志,自己在 L ...
最新文章
- 鲁棒图的三元素:抽象对象,实体对象和控制对象
- 百度地图-解决新版百度定位失败问题
- ios - 使用@try、catch捕获异常:
- boost::math模块使用二项分布复制 NAG 库调用的测试程序
- 鸿蒙股票深度分析,本月华为鸿蒙概念股市回顾分析(3月31日)
- 前端学习(2564):如何触发组件的更新
- myeclipse 安装jad反编译插件
- 使用nginx负载均衡的webservice wsdl访问不到_谁说前端不用懂,Nginx 反向代理与负载均衡(超实用)...
- mysql limit (1-1)10_110,当您知道只有1个结果时,是否将'LIMIT 1'添加到MySQL查询中使它们更快?...
- 利用第三方库XML解析 (TBXML)转化成模型数据
- shiro会话监听_Shiro权限控制_(二)_session
- 5.10300 - Ecological Premium
- 将U盘分成 启动盘+文件存储区
- [分享]我们团队管理的最佳实践——企业积分制度应该如何建立?
- 王强是如何学计算机的,浙江大学计算机科学与技术学院导师介绍:王强
- 解决屏保时间过短的问题(电脑息屏过快)
- 【JY】为什么要了解和学习多款仿真软件?
- Word文档怎样翻译?Word文档翻译方法大分享
- day21:接口测试实战(充值recharge)
- Javascript中引用数据类型
热门文章
- how is SAP CRM One Order processType fragment initialized
- react-native init MyProject之后发生了什么
- outlook的插件管理(Web界面)
- 如何处理object family XXX is not intended for installed base category issue
- Document Builder: 如何分析document template里某个字段未被web service填充的问题
- Workflow Administration
- SAP BRF+ Interpretation Mode与Generation Mode
- Cordova Embedded Server
- Jerry的通过CDS view + Smart Template 开发Fiori应用的blog合集
- Word2019上面的MathType7.4插件忽然消失了【终极解决办法记录】