SIMD相关头文件包括:

//#include <ivec.h>//MMX
//#include <fvec.h>//SSE(also include ivec.h)
//#include <dvec.h>//SSE2(also include fvec.h)#include <mmintrin.h> //MMX
#include <xmmintrin.h> //SSE(include mmintrin.h)
#include <emmintrin.h> //SSE2(include xmmintrin.h)
#include <pmmintrin.h> //SSE3(include emmintrin.h)
#include <tmmintrin.h>//SSSE3(include pmmintrin.h)
#include <smmintrin.h>//SSE4.1(include tmmintrin.h)
#include <nmmintrin.h>//SSE4.2(include smmintrin.h)
#include <wmmintrin.h>//AES(include nmmintrin.h)
#include <immintrin.h>//AVX(include wmmintrin.h)
#include <intrin.h>//(include immintrin.h)

mmintrin.h为MMX头文件,其中__m64的定义为:

typedef union __declspec(intrin_type) _CRT_ALIGN(8) __m64
{unsigned __int64    m64_u64;float               m64_f32[2];__int8              m64_i8[8];__int16             m64_i16[4];__int32             m64_i32[2];    __int64             m64_i64;unsigned __int8     m64_u8[8];unsigned __int16    m64_u16[4];unsigned __int32    m64_u32[2];
} __m64;

mmintrin.h 文件中各函数的介绍

 /* General support intrinsics *///Empties the multimedia state,清除MMX寄存器中的内容,即初始化(以避免和浮点数//操作发生冲突),详细说明见参考文献1void  _m_empty(void);//_mm_empty//Converts the integer object _I to a 64-bit __m64 object, r0=_I, r1=0__m64 _m_from_int(int _I);//_mm_cvtsi32_si64//Converts the lower 32 bits of the __m64 object _M to an integer, r=_M0int   _m_to_int(__m64 _M);//_mm_cvtsi64_si32//Packs the four 16-bit values from _MM1 into the lower four 8-bit values of//the result with signed saturation, and packs the four 16-bit values from _MM2//into the upper four 8-bit values of the result with signed saturation__m64 _m_packsswb(__m64 _MM1, __m64 _MM2);//_mm_packs_pi16//Packs the two 32-bit values from _MM1 into the lower two 16-bit values of the// result with signed saturation, and packs the two 32-bit values from _MM2 into// the upper two 16-bit values of the result with signed saturation__m64 _m_packssdw(__m64 _MM1, __m64 _MM2);//_mm_packs_pi32//Packs the four 16-bit values from _MM1 into the lower four 8-bit values of the//result with unsigned saturation, and packs the four 16-bit values from _MM2 into//the upper four 8-bit values of the result with unsigned saturation__m64 _m_packuswb(__m64 _MM1, __m64 _MM2);//_mm_packs_pu16//_MM1=(_MM10, _MM11, _MM12, _MM13, _MM14, _MM15, _MM16, _MM17),//_MM2=(_MM20, _MM21, _MM22, _MM23, _MM24, _MM25, _MM26, _MM27),//则r=(_MM14, _MM24, _MM15, _MM25, _MM16, _MM26, _MM17, _MM27)__m64 _m_punpckhbw(__m64 _MM1, __m64 _MM2);//_mm_unpackhi_pi8 //_MM1=(_MM10, _MM11, _MM12, _MM13),_MM10为低位,_MM2=(_MM20, _MM21, _MM22, _MM23),//则r=(_MM12, _MM22, _MM13, _MM23)__m64 _m_punpckhwd(__m64 _MM1, __m64 _MM2);//_mm_unpackhi_pi16//MM1=(_MM10, _MM11),_MM10为低位,_MM2=(_MM20, _MM21),则r=(_MM11, _MM21)__m64 _m_punpckhdq(__m64 _MM1, __m64 _MM2);//_mm_unpackhi_pi32//_MM1=(_MM10, _MM11, _MM12, _MM13, _MM14, _MM15, _MM16, _MM17),//_MM2=(_MM20, _MM21, _MM22, _MM23, _MM24, _MM25, _MM26, _MM27),//则r=(_MM10, _MM20, _MM11, _MM21, _MM12, _MM22, _MM13, _MM23)__m64 _m_punpcklbw(__m64 _MM1, __m64 _MM2);//_mm_unpacklo_pi8//_MM1=(_MM10, _MM11, _MM12, _MM13),_MM10为低位,_MM2=(_MM20, _MM21, _MM22, _MM23),//则r=(_MM10, _MM20, _MM11, _MM21)__m64 _m_punpcklwd(__m64 _MM1, __m64 _MM2);//_mm_unpacklo_pi16//MM1=(_MM10, _MM11),_MM10为低位,_MM2=(_MM20, _MM21),则r=(_MM10, _MM20)__m64 _m_punpckldq(__m64 _MM1, __m64 _MM2);//mm_unpacklo_pi32/* Packed arithmetic intrinsics *///Adds the eight 8-bit values in _MM1 to the eight 8-bit values in _MM2__m64 _m_paddb(__m64 _MM1, __m64 _MM2);//_mm_add_pi8//Adds the four 16-bit values in _MM1 to the four 16-bit values in _MM2__m64 _m_paddw(__m64 _MM1, __m64 _MM2);//_mm_add_pi16//Adds the two 32-bit values in _MM1 to the two 32-bit values in _MM2__m64 _m_paddd(__m64 _MM1, __m64 _MM2);//_mm_add_pi32//Adds the eight signed 8-bit values in _MM1 to the eight signed 8-bit values in _MM2//and saturates__m64 _m_paddsb(__m64 _MM1, __m64 _MM2);//_mm_adds_pi8//Adds the four signed 16-bit values in _MM1 to the four signed 16-bit values in _MM2//and saturates__m64 _m_paddsw(__m64 _MM1, __m64 _MM2);//_mm_adds_pi16//Adds the eight unsigned 8-bit values in _MM1 to the eight unsigned 8-bit values //in _MM2 and saturates__m64 _m_paddusb(__m64 _MM1, __m64 _MM2);//_mm_adds_pu8//Add the four unsigned 16-bit values in _MM1 to the four unsigned 16-bit values //in _MM2 and saturates__m64 _m_paddusw(__m64 _MM1, __m64 _MM2);//_mm_adds_pu16//Subtracts the eight 8-bit values in _MM2 from the eight 8-bit values in _MM1__m64 _m_psubb(__m64 _MM1, __m64 _MM2);//_mm_sub_pi8 //Subtracts the four 16-bit values in _MM2 from the four 16-bit values in _MM1__m64 _m_psubw(__m64 _MM1, __m64 _MM2);//_mm_sub_pi16//Subtracts the two 32-bit values in _MM2 from the two 32-bit values in _MM1__m64 _m_psubd(__m64 _MM1, __m64 _MM2);//_mm_sub_pi32//Subtracts the eight signed 8-bit values in _MM2 from the eight signed 8-bit//values in _MM1 and saturates__m64 _m_psubsb(__m64 _MM1, __m64 _MM2);//_mm_subs_pi8//Subtracts the four signed 16-bit values in _MM2 from the four signed 16-bit//values in _MM1 and saturates__m64 _m_psubsw(__m64 _MM1, __m64 _MM2);//_mm_subs_pi16//Subtracts the eight unsigned 8-bit values in _MM2 from the eight unsigned 8-bit//values in _MM1 and saturates__m64 _m_psubusb(__m64 _MM1, __m64 _MM2);//_mm_subs_pu8//Subtracts the four unsigned 16-bit values in _MM2 from the four unsigned 16-bit//values in _MM1 and saturates__m64 _m_psubusw(__m64 _MM1, __m64 _MM2);//_mm_subs_pu16//Multiplies four 16-bit values in _MM1 by four 16-bit values in _MM2 to produce//four 32-bit intermediate results, which are then summed by pairs to produce two//32-bit results,r0=_MM10*_MM20+_MM11*_MM21, r1=_MM12*_MM22+_MM13*_MM23__m64 _m_pmaddwd(__m64 _MM1, __m64 _MM2);//_mm_madd_pi16//Multiplies four signed 16-bit values in _MM1 by four signed 16-bit values in _MM2//and produces the high 16 bits of the four results__m64 _m_pmulhw(__m64 _MM1, __m64 _MM2);//_mm_mulhi_pi16//Multiplies four 16-bit values in _MM1 by four 16-bit values in _MM2 and produces//the low 16 bits of the four results__m64 _m_pmullw(__m64 _MM1, __m64 _MM2);//_mm_mullo_pi16/* Shift intrinsics *///Shifts four 16-bit values in _M left the amount specified by _Count //while shifting in zeros,左移_Count位,移出位补0__m64 _m_psllw(__m64 _M, __m64 _Count);//_mm_sll_pi16//Shifts four 16-bit values in _M left the amount specified by _Ccount while //shifting in zeros,左移_Count位,移出位补0,_Count需是一个立即数//汇编语言中的立即数相当于高级语言中的常量(常数),它是直接出现在指令中的数,//不用存储在寄存器或存储器中的数__m64 _m_psllwi(__m64 _M, int _Count);//_mm_slli_pi16 //Shifts two 32-bit values in _M left the amount specified by _Count//while shifting in zeros__m64 _m_pslld(__m64 _M, __m64 _Count);//_mm_sll_pi32//Shifts two 32-bit values in _M left the amount specified by _Count//while shifting in zeros__m64 _m_pslldi(__m64 _M, int _Count);//_mm_slli_pi32//Shifts the 64-bit value in _M left the amount specified by _Count//while shifting in zeros__m64 _m_psllq(__m64 _M, __m64 _Count);//_mm_sll_si64//Shifts the 64-bit value in _M left the amount specified by _Count//while shifting in zeros__m64 _m_psllqi(__m64 _M, int _Count);//_mm_slli_si64//Shifts four 16-bit values in _M right the amount specified by _Count//while shifting in the sign bit__m64 _m_psraw(__m64 _M, __m64 _Count);//_mm_sra_pi16//Shifts four 16-bit values in _M right the amount specified by _Count//while shifting in the sign bit__m64 _m_psrawi(__m64 _M, int _Count);//_mm_srai_pi16//Shifts two 32-bit values in _M right the amount specified by _Count//while shifting in the sign bit__m64 _m_psrad(__m64 _M, __m64 _Count);//_mm_sra_pi32//Shifts two 32-bit values in _M right the amount specified by _Count//while shifting in the sign bit__m64 _m_psradi(__m64 _M, int _Count);//_mm_srai_pi32//Shifts four 16-bit values in _M right the amount specified by _Count//while shifting in zeros__m64 _m_psrlw(__m64 _M, __m64 _Count);//_mm_srl_pi16//Shifts four 16-bit values in _M right the amount specified by _Count//while shifting in zeros__m64 _m_psrlwi(__m64 _M, int _Count);//_mm_srli_pi16//Shifts two 32-bit values in _M right the amount specified by _Count//while shifting in zeros__m64 _m_psrld(__m64 _M, __m64 _Count);//_mm_srl_pi32//Shifts two 32-bit values in _M right the amount specified by _Count//while shifting in zeros__m64 _m_psrldi(__m64 _M, int _Count);//_mm_srli_pi32 //Shifts the 64-bit value in _M right the amount specified by _Count//while shifting in zeros__m64 _m_psrlq(__m64 _M, __m64 _Count);//_mm_srl_si64//Shifts the 64-bit value in _M right the amount specified by _Count//while shifting in zeros__m64 _m_psrlqi(__m64 _M, int _Count);//_mm_srli_si64/* Logical intrinsics *///Performs a bitwise AND of the 64-bit value in _MM1 with the 64-bit value in _MM2__m64 _m_pand(__m64 _MM1, __m64 _MM2);//_mm_and_si64//Performs a logical NOT on the 64-bit value in _MM1 and use the result in a //bitwise AND with the 64-bit value in _MM2__m64 _m_pandn(__m64 _MM1, __m64 _MM2);//_mm_andnot_si64//Performs a bitwise OR of the 64-bit value in _MM1 with the 64-bit value in _MM2__m64 _m_por(__m64 _MM1, __m64 _MM2);//_mm_or_si64//Performs a bitwise XOR of the 64-bit value in _MM1 with the 64-bit value in _MM2__m64 _m_pxor(__m64 _MM1, __m64 _MM2);//_mm_xor_si64/* Comparison intrinsics *///If the respective 8-bit values in _MM1 are equal to the respective //8-bit values in _MM2, sets the respective 8-bit resulting values to //all ones; otherwise, sets them to all zeros__m64 _m_pcmpeqb(__m64 _MM1, __m64 _MM2);//_mm_cmpeq_pi8//If the respective 16-bit values in _MM1 are equal to the respective //16-bit values in _MM2, sets the respective 16-bit resulting values //to all ones; otherwise, sets them to all zeros__m64 _m_pcmpeqw(__m64 _MM1, __m64 _MM2);//_mm_cmpeq_pi16//If the respective 32-bit values in _MM1 are equal to the respective //32-bit values in _MM2, sets the respective 32-bit resulting values//to all ones; otherwise, sets them to all zeros__m64 _m_pcmpeqd(__m64 _MM1, __m64 _MM2);//_mm_cmpeq_pi32 //If the respective 8-bit values in _MM1 are greater than the respective //8-bit values in _MM2, sets the respective 8-bit resulting values to all ones;//otherwise, sets them to all zeros__m64 _m_pcmpgtb(__m64 _MM1, __m64 _MM2);//_mm_cmpgt_pi8//If the respective 16-bit values in _MM1 are greater than the respective 16-bit//values in _MM2, sets the respective 16-bit resulting values to all ones;//otherwise, sets them to all zeros__m64 _m_pcmpgtw(__m64 _MM1, __m64 _MM2);//_mm_cmpgt_pi16//If the respective 32-bit values in _MM1 are greater than the respective 32-bit//values in _MM2, sets the respective 32-bit resulting values to all ones;//otherwise, sets them all to zeros__m64 _m_pcmpgtd(__m64 _MM1, __m64 _MM2);//_mm_cmpgt_pi32/* Utility intrinsics *///Sets the 64-bit value to zero__m64 _mm_setzero_si64(void);//Sets the two signed 32-bit integer values,r0=_I0, r1=_I1__m64 _mm_set_pi32(int _I1, int _I0);//r0=_S0, r1=_S1, r2=_S2, r3=_S3__m64 _mm_set_pi16(short _S3, short _S2, short _S1, short _S0);//r0=_B0, r1=_B1, r2=_B2, r3=_B3, r4=_B4, ..., r7=_B7__m64 _mm_set_pi8(char _B7, char _B6, char _B5, char _B4,char _B3, char _B2, char _B1, char _B0);//Sets the two signed 32-bit integer values to _I,r0=r1=_I__m64 _mm_set1_pi32(int _I);//Sets the four signed 16-bit integer values to _S, r0=r1=r2=r3=_S__m64 _mm_set1_pi16(short _S);//Sets the eight signed 8-bit integer values to _B, r0=r1...=r7=_B__m64 _mm_set1_pi8(char _B);//Sets the two signed 32-bit integer values in reverse order,r0=_I1, r1=_I0__m64 _mm_setr_pi32(int _I1, int _I0);//Sets the four signed 16-bit integer values in reverse order,//r0=_S3, r1=_S2, r2=_S1, r3=_S0__m64 _mm_setr_pi16(short _S3, short _S2, short _S1, short _S0);//Sets the eight signed 8-bit integer values in reverse order//r0=_B7, r1=_B6, r2=_B5, r3=_B4, r4=_B3, r5=_B2, r6=_B1, r7=_B0__m64 _mm_setr_pi8(char _B7, char _B6, char _B5, char _B4,char _B3, char _B2, char _B1, char _B0);/* Alternate intrinsic name definitions */#define _mm_empty         _m_empty#define _mm_cvtsi32_si64  _m_from_int#define _mm_cvtsi64_si32  _m_to_int#define _mm_packs_pi16    _m_packsswb#define _mm_packs_pi32    _m_packssdw#define _mm_packs_pu16    _m_packuswb#define _mm_unpackhi_pi8  _m_punpckhbw#define _mm_unpackhi_pi16 _m_punpckhwd#define _mm_unpackhi_pi32 _m_punpckhdq#define _mm_unpacklo_pi8  _m_punpcklbw#define _mm_unpacklo_pi16 _m_punpcklwd#define _mm_unpacklo_pi32 _m_punpckldq#define _mm_add_pi8       _m_paddb#define _mm_add_pi16      _m_paddw#define _mm_add_pi32      _m_paddd#define _mm_adds_pi8      _m_paddsb#define _mm_adds_pi16     _m_paddsw#define _mm_adds_pu8      _m_paddusb#define _mm_adds_pu16     _m_paddusw#define _mm_sub_pi8       _m_psubb#define _mm_sub_pi16      _m_psubw#define _mm_sub_pi32      _m_psubd#define _mm_subs_pi8      _m_psubsb#define _mm_subs_pi16     _m_psubsw#define _mm_subs_pu8      _m_psubusb#define _mm_subs_pu16     _m_psubusw#define _mm_madd_pi16     _m_pmaddwd#define _mm_mulhi_pi16    _m_pmulhw#define _mm_mullo_pi16    _m_pmullw#define _mm_sll_pi16      _m_psllw#define _mm_slli_pi16     _m_psllwi#define _mm_sll_pi32      _m_pslld#define _mm_slli_pi32     _m_pslldi#define _mm_sll_si64      _m_psllq#define _mm_slli_si64     _m_psllqi#define _mm_sra_pi16      _m_psraw#define _mm_srai_pi16     _m_psrawi#define _mm_sra_pi32      _m_psrad#define _mm_srai_pi32     _m_psradi#define _mm_srl_pi16      _m_psrlw#define _mm_srli_pi16     _m_psrlwi#define _mm_srl_pi32      _m_psrld#define _mm_srli_pi32     _m_psrldi#define _mm_srl_si64      _m_psrlq#define _mm_srli_si64     _m_psrlqi#define _mm_and_si64      _m_pand#define _mm_andnot_si64   _m_pandn#define _mm_or_si64       _m_por#define _mm_xor_si64      _m_pxor#define _mm_cmpeq_pi8     _m_pcmpeqb#define _mm_cmpeq_pi16    _m_pcmpeqw#define _mm_cmpeq_pi32    _m_pcmpeqd#define _mm_cmpgt_pi8     _m_pcmpgtb#define _mm_cmpgt_pi16    _m_pcmpgtw#define _mm_cmpgt_pi32    _m_pcmpgtd

参考文献:1、http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/intref_cls/common/intref_mmx_emms_usage.htm

MMX Intrinsics各函数介绍相关推荐

  1. SSE3 和 SSSE3 Intrinsics各函数介绍

    [转载]:SSE3和SSSE3 Intrinsics各函数介绍 SIMD相关头文件包括: mmintrin.h为MMX 头文件,其中__m64的定义为: xmmintrin.h为SSE 头文件,此头文 ...

  2. SSE4.1和SSE4.2 Intrinsics各函数介绍

    SIMD相关头文件包括: //#include <ivec.h>//MMX //#include <fvec.h>//SSE(also include ivec.h) //#i ...

  3. SSE2 Intrinsics各函数介绍

    SIMD相关头文件包括: //#include <ivec.h>//MMX //#include <fvec.h>//SSE(also include ivec.h) //#i ...

  4. SSE Intrinsics各函数介绍

    原文:http://blog.csdn.net/fengbingchun/article/details/19293081 SIMD相关头文件包括: [cpp] view plaincopy //#i ...

  5. SSE2 Intrinsics各函数介绍 及简单例子

    转载地址 http://blog.csdn.net/fengbingchun/article/details/18460199 关于ARM上的SIMD可以参见网址,ARM上的SIMD技术叫NEON: ...

  6. AES(Advanced Encryption Standard) Intrinsics各函数介绍

    AES为高级加密标准,是较流行的一种密码算法. SIMD相关头文件包括: //#include <ivec.h>//MMX //#include <fvec.h>//SSE(a ...

  7. SSE3和SSSE3 Intrinsics各函数介绍

    SIMD相关头文件包括: //#include <ivec.h>//MMX //#include <fvec.h>//SSE(also include ivec.h) //#i ...

  8. Neon Intrinsics各函数介绍

    #ifndef __ARM_NEON__ #error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) t ...

  9. ARM Neon Intrinsics各函数介绍

    #ifndef __ARM_NEON__ #error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) t ...

最新文章

  1. VMware Coding Challenge: Possible Scores Summary: static
  2. JQUERY的appendappendTo
  3. bpcs uploader.php,linux 备份定时同步到百度云盘
  4. Java常用设计模式————建造者模式
  5. 职业人应该“这山望着那山高”
  6. 分布式 Spring Cloud 基于 Spring Boot 开发一整套
  7. Mac Postman app使用方法
  8. Qt之SQLite数据库可视化工具
  9. 使用硕正插件在strtus2框架下返回数据问题
  10. OpenVINO之链接库
  11. 蝴蝶效应、青蛙现象、鳄鱼法则、鲇鱼效应、羊群效应、刺猬法则、手表定律、破窗理论、二八定律、木桶理论、马太效应
  12. php保存微信用户头像到本地或者服务器的完美方案!
  13. 参数化建模类毕业论文文献有哪些?
  14. 禁止应用和adb安装APK
  15. Matlab中rgb2ind函数用法
  16. IOT网关开发受难记-(一) 2022/05/13
  17. 计算机怎样去掉语音,如何关闭word语音识别 (数据丢失 - 电脑使用小技巧 - 电子发烧友网...
  18. 7-32 寻找250 (10分)
  19. B站,牛啊。,java底层原理
  20. OpenCV:图像批量、任意比例裁剪

热门文章

  1. Python Qt GUI设计:QClipboard剪贴数据类(基础篇—19)
  2. Monitor CodeForces - 846D ——二维前缀和
  3. 【面向对象编程】(2) 类属性的定义及使用;__repr__()方法
  4. c++之openGL在VS中的配置及简单图形绘制
  5. Python计算机视觉——SIFT特征
  6. python3.7.2怎么用不了pillow_python 3.7.0 下pillow安装方法
  7. Udacity机器人软件工程师课程笔记(五)-样本搜索和找回-基于漫游者号模拟器-自主驾驶
  8. 强哥原创管理方法论之“掌纹管理学”
  9. 设置显示Git的修改历史History快捷键Alt+H,方便多人开发的时候快速查看谁修改了代码
  10. 在Ubuntu 16.04.1 LTS上安装ATS 6.2.1 LTS实录