TNN MatConverter CvtColor NV21TOBGR
OpenCV 中的 carotene 对于 armv7优化较好,而 armv8下则是 NEON 实现。TNN 提供了一套图像预处理接口并且进行了汇编优化。下面以 NV21TOBGR 为例进行介绍。
MatUtils
public://copy cpu <-> device, cpu<->cpu, device<->device, src and dst dims must be equal.static Status Copy(Mat& src, Mat& dst, void* command_queue);//src and dst device type must be same. when param scale_w or scale_h is 0, it is computed as// (double)dst.GetWidth() / src.GetWidth() or (double)dst.GetHeight() / src.GetHeight().static Status Resize(Mat& src, Mat& dst, ResizeParam param, void* command_queue);//src and dst device type must be same. when param width or height is 0, it is equal to//dst.GetWidth() or dst.GetHeight().static Status Crop(Mat& src, Mat& dst, CropParam param, void* command_queue);//src and dst device type must be same.static Status WarpAffine(Mat& src, Mat& dst, WarpAffineParam param, void* command_queue);//src and dst device type must be same.static Status CvtColor(Mat& src, Mat& dst, ColorConversionType type, void* command_queue);//src and dst device type must be same. param top, bottom, left and right must be non-negative.static Status CopyMakeBorder(Mat& src, Mat& dst, CopyMakeBorderParam param, void* command_queue);
MatUtils::CvtColor
调用 CheckSrcAndDstMat 输入和输出变量的设备是否相同,输入尺寸是否有效。
构造一个转换器并调用其缩放函数。
auto ret = CheckSrcAndDstMat(src, dst, true, false, true);if (ret != TNN_OK) {return ret;}
if (dst.GetData() == nullptr) {// set dst size by src size and cvt typeDimsVector dims = src.GetDims();dims[1] = GetCvtColorDstChannel(type);dst = Mat(dst.GetDeviceType(), dst.GetMatType(), dims);} else {if (dst.GetWidth() < src.GetWidth() || dst.GetHeight() < src.GetHeight() ||dst.GetChannel() < GetCvtColorDstChannel(type)) {return Status(TNNERR_PARAM_ERR, "cvt color dst size too small");}}
MAT_CONVERTER_PREPARATION 可在必要时为输出申请内存,并创建一个 MatConverterAcc 对象。
MAT_CONVERTER_PREPARATION(src.GetDeviceType());return converter->CvtColor(src, dst, type, command_queue);
CheckSrcAndDstMat
检查输入输出的设备类型是否一致、Mat 类型是否一致,以及输入尺寸是否正常。
if (check_device_type && (src.GetDeviceType() != dst.GetDeviceType())) {return Status(TNNERR_PARAM_ERR, "src and dst DeviceType not equal");}if (check_mat_type && (src.GetMatType() != dst.GetMatType())) {return Status(TNNERR_PARAM_ERR, "src and dst MatType not equal");}if (check_src_size && (src.GetWidth() <= 0 || src.GetHeight() <= 0)) {return Status(TNNERR_INVALID_INPUT, "src size is zero or negnative");}return TNN_OK;
MAT_CONVERTER_PREPARATION
#define MAT_CONVERTER_PREPARATION(device_type) \if (dst.GetData() == nullptr) { \dst = Mat(dst.GetDeviceType(), dst.GetMatType(), dst.GetDims()); \} \auto converter = MatConverterManager::Shared()->CreateMatConverterAcc(device_type); \if (!converter) { \return Status(TNNERR_INIT_LAYER, "image converter is nil, check device type"); \}
MatConverterManager
拥有一个 MatConverterAccCreater 字典,可以实现反射。
public:static std::shared_ptr<MatConverterManager>& Shared();MatConverterManager();~MatConverterManager();std::shared_ptr<MatConverterAcc> CreateMatConverterAcc(DeviceType device_type);int RegisterMatConverterAccCreater(DeviceType type, std::shared_ptr<MatConverterAccCreater> creater);private:std::map<DeviceType, std::shared_ptr<MatConverterAccCreater>> converter_creater_map_;
MatConverterManager::Shared
借助 std::once_flag 和 std::call_once 实现线程安全的单例模式。
static std::once_flag once;static std::shared_ptr<MatConverterManager> g_global_blob_converter_manager;std::call_once(once, []() { g_global_blob_converter_manager = std::make_shared<MatConverterManager>(); });return g_global_blob_converter_manager;
MatConverterManager::CreateMatConverterAcc
在converter_creater_map_
中查找设备类型,如果有相应的构造者则调用其创建函数。
auto iter = converter_creater_map_.find(device_type);if (iter != converter_creater_map_.end()) {return iter->second->CreateMatConverterAcc();}return nullptr;
MatConverterManager::RegisterMatConverterAccCreater
向converter_creater_map_
字典中添加设备的构造者。
auto iter = converter_creater_map_.find(type);if (iter != converter_creater_map_.end()) {LOGE("Error: device_type(%d) cannot be registered twice\n", type);return 1;}if (!creater) {LOGE("Error: MatConverterAccCreater is nil device_type(%d)\n", type);return 1;}converter_creater_map_[type] = creater;return 0;
MatConverterAccCreater
public:virtual ~MatConverterAccCreater(){};virtual std::shared_ptr<MatConverterAcc> CreateMatConverterAcc() = 0;
DECLARE_MAT_CONVERTER_CREATER
#define DECLARE_MAT_CONVERTER_CREATER(device) \class device##MatConverterAccCreater : public MatConverterAccCreater { \public: \virtual ~device##MatConverterAccCreater(){}; \virtual std::shared_ptr<MatConverterAcc> CreateMatConverterAcc() { \return std::make_shared<device##MatConverterAcc>(); \}; \}
REGISTER_MAT_CONVERTER
#define REGISTER_MAT_CONVERTER(device, device_type) \MatConverterAccRegister<device##MatConverterAccCreater> g_mat_converter_##device(device_type)
MatConverterAcc
public:MatConverterAcc() {OMP_SET_THREADS_(1);};virtual ~MatConverterAcc(){};virtual Status Copy(Mat& src, Mat& dst, void* command_queue = NULL) = 0;virtual Status Resize(Mat& src, Mat& dst, ResizeParam param, void* command_queue = NULL) = 0;virtual Status Crop(Mat& src, Mat& dst, CropParam param, void* command_queue = NULL) = 0;virtual Status WarpAffine(Mat& src, Mat& dst, WarpAffineParam param, void* command_queue = NULL) = 0;virtual Status CvtColor(Mat& src, Mat& dst, ColorConversionType type, void* command_queue = NULL) = 0;virtual Status CopyMakeBorder(Mat& src, Mat& dst, CopyMakeBorderParam param, void* command_queue = NULL) = 0;
ArmMatConverterAcc
public:virtual Status Copy(Mat& src, Mat& dst, void* command_queue = NULL);virtual Status Resize(Mat& src, Mat& dst, ResizeParam param, void* command_queue = NULL);virtual Status Crop(Mat& src, Mat& dst, CropParam param, void* command_queue = NULL);virtual Status WarpAffine(Mat& src, Mat& dst, WarpAffineParam param, void* command_queue = NULL);virtual Status CvtColor(Mat& src, Mat& dst, ColorConversionType type, void* command_queue = NULL);virtual Status CopyMakeBorder(Mat& src, Mat& dst, CopyMakeBorderParam param, void* command_queue = NULL);
ArmMatConverterAcc::CvtColor
到此处时command_queue
没有用到。
CheckMatConverterParams 检查输入输出数据是否为空,以及是否在同一设备上。
NV21ToBGR
Status ret = TNN_OK;ret = CheckMatConverterParams(src, dst, true);if (ret != TNN_OK)return ret;switch (type) {case COLOR_CONVERT_NV12TOBGR:NV12ToBGR((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_NV21TOBGR:NV21ToBGR((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_NV12TOBGRA:NV12ToBGRA((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_NV21TOBGRA:NV21ToBGRA((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_BGRTOGRAY:BGRToGray((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_BGRATOGRAY:BGRAToGray((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_RGBTOGRAY:RGBToGray((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;case COLOR_CONVERT_RGBATOGRAY:RGBAToGray((uint8_t*)src.GetData(), (uint8_t*)dst.GetData(), src.GetBatch()*src.GetHeight(), src.GetWidth());break;default:return Status(TNNERR_PARAM_ERR, "ArmMatConverterAcc::CvtColor, color conversion type not support yet");}return ret;
NV21ToBGR
将系数左移8位运算,结果右移。
nn
为在每行中一次处理的像素数量。
#ifndef TNN_USE_NEONreturn NaiveYUVToBGROrBGRA(nv21, bgr, 3, h, w, false);
#elseconst unsigned char* yptr = nv21;const unsigned char* vuptr = nv21 + w * h;for (int y = 0; y < h; y += 2) {const unsigned char* yptr0 = yptr;const unsigned char* yptr1 = yptr + w;unsigned char* rgb0 = bgr;unsigned char* rgb1 = bgr + w * 3;
#if __aarch64__int64_t nn = w >> 3;int remain = w - (nn << 3);int16x8_t _q1135 = vdupq_n_s16(1135);int8x8_t _v74 = vdup_n_s8(74);int8x8_t _v128 = vdup_n_s8(int8_t(128));int8x8_t _v102 = vdup_n_s8(102);int8x8_t _v52 = vdup_n_s8(52);int8x8_t _v25 = vdup_n_s8(25);// use 127 instead of 129 to prevent char overflow, add another 2 in asmint8x8_t _v127 = vdup_n_s8(127);// saturate uv to 240 to avoid b overflowuint8x8_t _v240 = vdup_n_u8(240);
if (nn > 0) {asm volatile("prfm pldl1strm, [%[_vu], #128] \n\t""ld1 {v2.8b}, [%[_vu]], #8 \n\t""cmhi v12.8b, v2.8b, %[_v240].8b \n\t""bsl v12.8b, %[_v240].8b, v2.8b \n\t""sub v2.8b, v12.8b, %[_v128].8b \n\t"
"0: \n\t""prfm pldl1strm, [%[_y0], #128] \n\t""ld1 {v0.8b}, [%[_y0]], #8 \n\t""prfm pldl1strm, [%[_y1], #128] \n\t""ld1 {v1.8b}, [%[_y1]], #8 \n\t""umull v28.8h, v0.8b, %[_v74].8b \n\t""sub v28.8h, v28.8h, %[_q1135].8h \n\t" // v28 -> b0"orr v3.8b, v2.8b, v2.8b \n\t""umull v29.8h, v1.8b, %[_v74].8b \n\t""sub v29.8h, v29.8h, %[_q1135].8h \n\t" // v29 -> b1"orr v9.16b, v28.16b, v28.16b \n\t" // v9 -> g0
"trn1 v30.8b, v2.8b, v3.8b \n\t" // u"trn2 v31.8b, v2.8b, v3.8b \n\t" // v"orr v11.16b, v29.16b, v29.16b \n\t" // v11 -> g1"sshll v27.8h, v31.8b, #1 \n\t""smlsl v9.8h, v30.8b, %[_v52].8b \n\t""orr v8.16b, v28.16b, v28.16b \n\t" // v8 -> r0"smlsl v11.8h, v30.8b, %[_v52].8b \n\t""orr v10.16b, v29.16b, v29.16b \n\t" // v10 -> r1"smlal v8.8h, v30.8b, %[_v102].8b \n\t""smlal v28.8h, v31.8b, %[_v127].8b \n\t""smlal v10.8h, v30.8b, %[_v102].8b \n\t"
"add v28.8h, v28.8h, v27.8h \n\t""smlsl v9.8h, v31.8b, %[_v25].8b \n\t""smlal v29.8h, v31.8b, %[_v127].8b \n\t""smlsl v11.8h, v31.8b, %[_v25].8b \n\t""add v29.8h, v29.8h, v27.8h \n\t"
"sqshrun v26.8b, v8.8h, #6 \n\t" // v24-v26: b0g0r0"sqshrun v24.8b, v28.8h, #6 \n\t""sqshrun v6.8b, v10.8h, #6 \n\t""sqshrun v25.8b, v9.8h, #6 \n\t" // v4-v6: b1g1r1"sqshrun v4.8b, v29.8h, #6 \n\t""sqshrun v5.8b, v11.8h, #6 \n\t"
"prfm pldl1strm, [%[_vu], #128] \n\t""ld1 {v2.8b}, [%[_vu]], #8 \n\t""subs %[_nn], %[_nn], #1 \n\t""prfm pstl1strm, [%[_r0]] \n\t""st3 {v24.8b-v26.8b}, [%[_r0]], #24 \n\t""cmhi v12.8b, v2.8b, %[_v240].8b \n\t""bsl v12.8b, %[_v240].8b, v2.8b \n\t""sub v2.8b, v12.8b, %[_v128].8b \n\t""prfm pstl1strm, [%[_r1]] \n\t""st3 {v4.8b-v6.8b}, [%[_r1]], #24 \n\t""bne 0b \n\t""sub %[_vu], %[_vu], #8 \n\t": [_nn]"+r"(nn),[_y0]"+r"(yptr0),[_y1]"+r"(yptr1),[_vu]"+r"(vuptr),[_r0]"+r"(rgb0),[_r1]"+r"(rgb1): [_v128]"w"(_v128),[_v102]"w"(_v102),[_v52]"w"(_v52),[_v25]"w"(_v25),[_v127]"w"(_v127),[_q1135]"w"(_q1135),[_v74]"w"(_v74),[_v240]"w"(_v240): "cc", "memory", "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v8","v9", "v10", "v11", "v12", "v24", "v25", "v26","v27", "v28", "v29", "v30", "v31");}
#elseint nn = w >> 3;int remain = w - (nn << 3);short _s1135 = 1135;int8x8_t _v74 = vdup_n_s8(74);int8x8_t _v128 = vdup_n_s8(int8_t(128));// to much input w cause compile error, merge to oneint8x8_t _vuvfilter = {102, 52, 25, 127, 0, 0, 0, 0};// saturate uv to 240 to avoid b overflowuint8x8_t _v240 = vdup_n_u8(240);if (nn > 0) {asm volatile("pld [%[_vu], #128] \n""vld1.u8 {d2}, [%[_vu]]! \n""vcgt.u8 d27, d2, %[_v240] \n""vbsl.u8 d27, %[_v240], d2 \n""vsub.u8 d2, d27, %[_v128] \n""vmov.s8 d10, %[_filt] \n""vdup.8 d11, d10[1] \n" // v52"vdup.8 d12, d10[2] \n" // v25"vdup.8 d13, d10[3] \n" // v127"vdup.16 q7, %[_s1135] \n" // q1135"vdup.8 d10, d10[0] \n" // v102"0: \n""pld [%[_y0], #128] \n""vld1.u8 {d0}, [%[_y0]]! \n""pld [%[_y1], #128] \n""vld1.u8 {d1}, [%[_y1]]! \n""vmull.u8 q2, d0, %[_v74] \n""vorr d3, d2, d2 \n""vsub.s16 q2, q2, q7 \n" // q2 -> b0"vmull.u8 q3, d1, %[_v74] \n""vorr q9, q2, q2 \n" // q9 -> g0"vsub.s16 q3, q3, q7 \n" // q3 -> b1"vtrn.s8 d2, d3 \n" // d2 -> u, d3 -> v"vorr q11, q3, q3 \n" // q11 -> g1"vshll.s8 q4, d3, #1 \n""vmlsl.s8 q9, d2, d11 \n""vorr q8, q2, q2 \n" // q8 -> r0"vmlsl.s8 q11, d2, d11 \n""vorr q10, q3, q3 \n" // q10 -> r1"vmlal.s8 q8, d2, d10 \n""vmlal.s8 q2, d3, d13 \n""vmlal.s8 q10, d2, d10 \n""vadd.s16 q2, q2, q4 \n""vmlsl.s8 q9, d3, d12 \n""vmlal.s8 q3, d3, d13 \n""vmlsl.s8 q11,d3, d12 \n""vadd.s16 q3, q3, q4 \n""vqshrun.s16 d26, q8, #6 \n" // d24-d26: b0g0r0"vqshrun.s16 d24, q2, #6 \n""vqshrun.s16 d4, q3, #6 \n""vqshrun.s16 d25, q9, #6 \n" // d4-d6: b1g1r1"vqshrun.s16 d6, q10, #6 \n""vqshrun.s16 d5, q11, #6 \n""pld [%[_vu], #128] \n""vld1.u8 {d2}, [%[_vu]]! \n""subs %[_nn], #1 \n""vst3.u8 {d24-d26}, [%[_r0]]!\n""vcgt.u8 d27, d2, %[_v240] \n""vbsl.u8 d27, %[_v240], d2 \n""vsub.u8 d2, d27, %[_v128] \n""vst3.u8 {d4-d6}, [%[_r1]]!\n""bne 0b \n""sub %[_vu], #8 \n": [_nn]"+r"(nn),[_y0]"+r"(yptr0),[_y1]"+r"(yptr1),[_vu]"+r"(vuptr),[_r0]"+r"(rgb0),[_r1]"+r"(rgb1): [_v128]"w"(_v128),[_filt]"w"(_vuvfilter),[_v74]"w"(_v74),[_s1135]"r"(_s1135),[_v240]"w"(_v240): "cc", "memory", "q0", "q1", "q2", "q3","q4","q5","q6","q7","q8", "q9", "q10", "q11", "q12", "q13");}
#endif //__aarch64__NaiveYUVToBGROrBGRALoop(yptr0, yptr1, vuptr, rgb0, rgb1, remain, false, 3);yptr += 2*w;vuptr += remain;bgr += 2*3*w;}
#endif // TNN_USE_NEON
NaiveYUVToBGROrBGRA
NaiveYUVToBGROrBGRALoop 每次处理两行。
const unsigned char* yptr = yuv;const unsigned char* vuptr = yuv + w * h;for (int y = 0; y < h; y += 2) {const unsigned char* yptr0 = yptr;const unsigned char* yptr1 = yptr + w;unsigned char* rgb0 = bgr;unsigned char* rgb1 = bgr + w * channel;NaiveYUVToBGROrBGRALoop(yptr0, yptr1, vuptr, rgb0, rgb1, w, is_nv12, channel);yptr += 2*w;vuptr += w;bgr += 2*channel*w;}
NaiveYUVToBGROrBGRALoop
for (; remain > 0; remain -= 2) {int u, v;if (is_nv12) {u = (vuptr[0] > 240 ? 240 : vuptr[0]) - 128;v = (vuptr[1] > 240 ? 240 : vuptr[1]) - 128;} else {v = (vuptr[0] > 240 ? 240 : vuptr[0]) - 128;u = (vuptr[1] > 240 ? 240 : vuptr[1]) - 128;}int ruv = 102 * v;int guv = -52 * v + -25 * u;int buv = 129 * u;#define SATURATE_CAST_UCHAR(X) (unsigned char)std::min(std::max(X, 0), 255);int y00 = yptr0[0]* 74 - 1135;if (channel == 4)rgb0[3] = 255;rgb0[0 * channel + 2] = SATURATE_CAST_UCHAR((y00 + ruv) >> 6);rgb0[0 * channel + 1] = SATURATE_CAST_UCHAR((y00 + guv) >> 6);rgb0[0 * channel + 0] = SATURATE_CAST_UCHAR((y00 + buv) >> 6);int y01 = yptr0[1]* 74 - 1135;if (channel == 4)rgb0[7] = 255;rgb0[1 * channel + 2] = SATURATE_CAST_UCHAR((y01 + ruv) >> 6);rgb0[1 * channel + 1] = SATURATE_CAST_UCHAR((y01 + guv) >> 6);rgb0[1 * channel + 0] = SATURATE_CAST_UCHAR((y01 + buv) >> 6);int y10 = yptr1[0]* 74 - 1135;if (channel == 4)rgb1[3] = 255;rgb1[0 * channel + 2] = SATURATE_CAST_UCHAR((y10 + ruv) >> 6);rgb1[0 * channel + 1] = SATURATE_CAST_UCHAR((y10 + guv) >> 6);rgb1[0 * channel + 0] = SATURATE_CAST_UCHAR((y10 + buv) >> 6);int y11 = yptr1[1]* 74 - 1135;if (channel == 4)rgb1[7] = 255;rgb1[1 * channel + 2] = SATURATE_CAST_UCHAR((y11 + ruv) >> 6);rgb1[1 * channel + 1] = SATURATE_CAST_UCHAR((y11 + guv) >> 6);rgb1[1 * channel + 0] = SATURATE_CAST_UCHAR((y11 + buv) >> 6);#undef SATURATE_CAST_UCHARyptr0 += 2;yptr1 += 2;vuptr += 2;rgb0 += 2*channel;rgb1 += 2*channel;}
参考资料:
- C++ Tutorial: Auto Registering Factory
- Automatic object factory in C++
- Unforgettable Factory Registration
- Factory Method design patter with self registering derived classes
- Factory Method in C++
- 原型模式
- Factory method pattern
- 详解设计模式 | 抽象工厂
- 4. 建造者模式
- 30.1 工厂方法模式VS建造者模式
- 建造者模式
- 建造者模式(Builder Pattern)- 最易懂的设计模式解析
- Chapter 4 建造者模式(Builder Pattern)
- 人人都会设计模式—建造者模式–Builder
- C++11于once_flag,call_once分析的实现
- C++中once_flag、call_once使用
- Optimize RGBA->RGB arm64 assembly
- preload-practice.zh
- 飞腾CPU体系结构(十一)
- aarch64 neon指令集拾遗
- ARM架构64位入门基础:架构分析、寄存器、调用规则、指令集、程序调试以及参考手册
- ARM NEON编程初探——一个简单的BGR888转YUV444实例详解
- YUV 格式与 RGB 格式的相互转换公式及C++ 代码
- 【arm】arm32位和arm64位架构、寄存器和指令差异分析总结
- Armv8 指令集
- AN12628 Optimizing Memory Copy Routines
- ARM Neon 常用指令
- ARM NEON SIMD 指令优化
- Dealing with the ARM AArch64 SIMD documentation
- Introduction to ARM64 NEON assembly
- ARMv8常用指令
- AI 移动端框架常用指令·汇总(待续)
- What kind of assembly instruction is this ld1 {v0.16b}, %[in]?
- chromium/external/libyuv/master/./source/row_neon64.cc
- Arm NEON programming quick reference
- AI移动端常用汇编指令汇总以及底层算子汇编实现(附带一点点干货)
- ARMv8 Neon Programming
- ARM_NEON_CNN编程
- 6.47.2 Extended Asm - Assembler Instructions with C Expression Operands
- How to Use Inline Assembly Language in C Code(C语言内联汇编)–continuing…
- TRN1
- ARM64 汇编指令总结
- 浅谈移动工程师跨界机器学习之路
- ARM指令集之乘法指令
- What kind of assembly instruction is this ld1 {v0.16b}, %[in]?
- ARM Instruction Set
- Lecture 8: ARM Arithmetic and BitweiseInstructions
- Arm A64 Instruction Set Architecture
- arm CPU 2D卷积计算方法一览
- ARM Assembly Programming
- 关于ARM中的tst、cmp、bne、beq指令
- Introducing ARM assembly language
- ARM Data Types and Registers (Part 2) | Azeria Labs
- First look at the arm64 architecture and assembly language
- gemmlowp/internal/kernel_neon.h
- ARMv8-A A64 ISA Overview
- What is the fastest way to index into ARMv8 registers
- 移动端arm cpu优化学习笔记第4弹–内联汇编入门
- 【Arm端算法优化笔记】一,一步步优化盒子滤波算法
- ARMv8 中的 SIMD 运算
- 用NEON intrinsic实现RGB转YUV420SP(NV12)
- YUV转RGB(NV21-ARGB)的Neon优化代码
- 运用NEON指令集加速RGB与YUV相互转换
- What is arrangement specifier(.16b,.8b) in ARM assembly language instructions?
- TNN新版本上线!全新特性,更加好用!
TNN MatConverter CvtColor NV21TOBGR相关推荐
- TNN MatConverter WarpAffine
TNN 的仿射变换形态介于 OpenCV 和 ncnn 之间.其处理流程与 OpenCV 较为相似并做了一些优化,不同的地方在于数据处理宽度为4,比较小.在性能表现方面中规中矩,小图上不及 ncnn. ...
- TNN MatConvertParam参数scale和bias设置
Pytorch的Normalize的计算过程是:TNN MatConvertParam参数设置 使用TNN进行模型推理前,需要进行必要的预处理,如下需要设置TNN_NS::MatConvertPara ...
- 37、记录使用 Swin Transformer主干网络去实现分类,并转化NCNN、TNN、MNN模型以及部署
基本思想:最近手中有个swim transformer模型,想移植手机端进行推理一下,随手记录一下遇到的问题涉及简单的转ncnn tnn mnn的流程性问题 一.首先我fork了大佬的代码https: ...
- OpenCV 笔记(09)— 常用的数据结构和函数(Vec、Point、Scalar、Size、Rect、cvtColor)
1. Vec 对象类型 Vec 是一个主要用于数值向量的模板类.我们可以定义向量的类型和组件的数量: Vec<double, 19> myVector 我们还可以使用任何的预定义类型: t ...
- OpenCV最经典的3种颜色空间(cv2.cvtColor)及互相转换
OpenCV最经典的3种颜色空间(cv2.cvtColor)及互相转换 1. 效果图 2. 原理 2.1 照明条件的重要性 2.2 OpenCV 中的3种颜色空间/模型 2.3 颜色空间的主要用途 3 ...
- OpenCV代码提取:cvtColor函数的实现
OpenCV中的cvtColor函数包括了很多颜色格式之间的转换,用起来很方便,这里对cvtColor函数的code进行了提取,经测试,和OpenCV3.1结果完全一致. 实现代码cvtColor.h ...
- 重构ncnn,腾讯优图开源新一代移动端推理框架TNN
来源 | 腾讯优图 从学界到工业界,"开源"已经成为AI领域的一个关键词.一方面,它以"授人以渔"的方式为AI构建了一个开放共进的生态环境,帮助行业加速AI应用 ...
- Opencv中除了cv2.cvtColor彩色图转灰度图之外的其他6种方法
文章目录 1.参考文章: 2.公式集成: 3.代码实现: 4.实验结果: 1.参考文章: https://mp.weixin.qq.com/s/jqVVZbZZRIqVt_Fs7HiUkg 2.公式集 ...
- 彩色空间及cvtColor解析
首先,我们要了解:什么是彩色空间呢? 许多人都知道在绘画时可以使用红色.黄色和蓝色这三种原色生成不同的颜色,这些颜色就定义了一个色彩空间.我们将品红色的量定义为X 坐标轴.青色的量定义为Y坐标轴.黄色 ...
最新文章
- 使用Win API创建顶级菜单(不使用资源文件)
- QCon北京2018关键词:Kubernetes、Service Mesh、Istio和微服务
- MFC设置静态文本框,编辑框等控件背景和字体颜色
- 浏览器显示无法解析服务器的DNS地址,使用搜狗浏览器时突然弹出无法解析服务器的DNS地址该如何处理...
- 卸载 系统打印服务器,win10系统打印机驱动卸载不掉的方案介绍
- STM32F103移植mpu9250
- 不止鸿蒙 OS,华为的备用操作系统还有“极光”?
- 深度学习自学(二十二):推理框架-MNN
- webpack 基础学习
- PHP微信怎么计步数,微信运动怎么关注好友步数(微信运动计步功能使用方法介绍)...
- P1357 花园(dp快速幂转移)
- apache-felix实例
- 计蒜客 A1596.蒜头君王国 概率计算(dp)
- Android合理的使用闪屏
- 市面上U盘便宜种类多 如何鉴别便宜U盘好坏呢
- 深入理解Java虚拟机(周志明第三版)- 第十一章:后端编译与优化
- js控制html控件显示隐藏和是否可用
- 贴片绕线电感和贴片电感的区别
- Linux下安装和使用杀毒软件AntiVir(ZZ)
- the sentiments when install labelimage
热门文章
- java怎么编程class,深入理解Java Class文件格式(一)
- RK3588 VOP-SPLIT分屏模式介绍
- 不等式大两边小中间_不等式取值范围口诀
- docker网络连接——docker network connect命令
- 车间调度标准测试集汇总-FJSP、PFSP、JSP、HFSP和分布式车间调度测试集
- Kubeadm搭建高可用K8S(四)Dashboard安装配置
- 手游客户端开发招聘要求
- 华硕天选 3 和 联想拯救者 Y9000P 2022 款 哪个好
- [Poi2012]Rendezvous
- 移动硬盘数据恢复需多少钱?关于这个不伤钱的方法