

  1. 寻找一个轻量的检测模型,以方便集成到安卓应用中
  2. 使用自己的检测数据集对模型进行训练
  3. 探索模型集成到安卓应用中的方式


  1. 【Yolov5】训练yolov5模型并集成到安卓应用中(上)——模型训练
  2. 【Yolov5】训练yolov5模型并集成到安卓应用中(中)——模型转化
  3. 【Yolov5】训练yolov5模型并集成到安卓应用中(下)——模型集成




TorchScript 是 PyTorch 模型(nn.Module的子类)的中间表示形式,可以在高性能环境(例如 C ++)中运行。


import torch
import torchvisionmodel = torchvision.models.resnet18(pretrained=True)
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model, example)



  1. 默认类型为tensor。如下面这个例子:

    def test(example):temp = []return temp.append(example)test.code


    def test(example: Tensor) -> List[Tensor]:temp = annotate(List[Tensor], [])return torch.append(temp, example)


    test(torch.tensor([1,2,3]))  # 运行正常
    test(1) # 报错
    # 提示:test() Expected a value of type 'Tensor (inferred)' for argument 'example' but instead found type 'int'.


    from typing import List@torch.jit.script
    def test(example:int):temp:List[int] = []return temp.append(example)test.code


    def test(example: int) -> List[int]:temp = annotate(List[int], [])return torch.append(temp, example)


  2. 要求保持list类型的统一。如:

    def test(example):temp = [1]return temp.append(example)

    Could not match type Tensor (inferred) to t in argument 'el': Type variable 't' previously matched to type int is matched to type Tensor (inferred).

  3. 不支持第三方库的函数。

  4. 当然还有其他限制,只是暂时还未遇到,这篇文章对更多的限制进行了说明。



  1. 输入仅仅只能是与tensor相关的类型。如:

    def test(example):return example + 2
    jit = torch.jit.trace(test, 1)

    报错:TypeError: 'int' object is not iterable

    def test(example):return example + 2
    jit = torch.jit.trace(test, [1])

    报错:RuntimeError: Type 'Tuple[int]' cannot be traced. Only Tensors and (possibly nested) Lists, Dicts, and Tuples of Tensors can be traced

  2. 不支持条件和循环。虽然其能编译通过,但通过查看编译后的源码发现,其条件和循环会受到example的影响。如循环5次,会转化为执行5次循环体;条件语句,会只保留为True的语句。看下面的例子:

    example = torch.tensor([1, 2, 3, 4, 5])def test(example):result = 0if 10 > example.shape[0] > 0:for i in range(example.shape[0]):result += example[i]elif example.shape[0] >= 10:for i in range(example.shape[0]):result += example[i] * 2return resultjit = torch.jit.trace(test, example)


    def test(example: Tensor) -> Tensor:result = torch.add(torch.select(example, 0, 0), CONSTANTS.c0, alpha=1)result0 = torch.add_(result, torch.select(example, 0, 1), alpha=1)result1 = torch.add_(result0, torch.select(example, 0, 2), alpha=1)result2 = torch.add_(result1, torch.select(example, 0, 3), alpha=1)_0 = torch.add_(result2, torch.select(example, 0, 4), alpha=1)return _0


  3. 当使用赋值的方式进行原址计算时,需要注意大小可能会被定死。可以看下面的例子:

    example = torch.tensor([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])def test(example):y = exampley[0] =  y[0] + 4return yjit = torch.jit.trace(test, example)


    def test(example: Tensor) -> Tensor:_0 = torch.add(torch.select(example, 0, 0), CONSTANTS.c0, alpha=1)_1 = torch.copy_(torch.select(example, 0, 0), torch.view(_0, [5]), False)return example

    问题出在了第3行torch.view(_0, [5])[5],这个位置被写死,会导致你的输入只能是5列的tensor。如:

    jit(torch.tensor([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2]])) # 运行正常
    jit(torch.tensor([[1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7]])) # 报错
    # 提示:RuntimeError: shape '[5]' is invalid for input of size 7


    example = torch.tensor([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])def test(example):y = exampley[0].add_(4)return yjit = torch.jit.trace(test, example)


    def test(example: Tensor) -> Tensor:_0 = torch.add_(torch.select(example, 0, 0), CONSTANTS.c0, alpha=1)return example






def imageProcessing(filename, new_shape = (320, 320), color = (114, 114, 114), gray = False):img = cv2.imread(filename)shape = img.shape[:2]  # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh paddingdw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh paddingdw /= 2  # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad:  # resizeimg = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border# Convertif gray:img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)else:img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416img = img.astype(np.float)img /= 255.0  # 0 - 255 to 0.0 - 1.0if img.ndim == 3:img = img[np.newaxis,:,:,:]elif img.ndim == 2:img = img[np.newaxis,np.newaxis,:,:]img = np.ascontiguousarray(img)return img



# 初始化变量
new_shape = (320, 320)
color = (114, 114, 114)
imgPath = 'testImage/1.jpg'   # 准备一张图片
gray = False# 载入图片
img = imageProcessing(imgPath, new_shape = new_shape, color = color, gray = gray)
img = torch.from_numpy(img)
img = img.float()# 载入模型
modeldata = torch.load("best.pt", map_location=torch.device('cpu'))
model = modeldata['model'].float().eval()
model = model.float()# 转化前模型对图片进行处理
pred = model.forward(img)
pred # 输出pred信息,


(tensor([[[3.94224e+01, 1.83147e+01, 6.17124e+01, 5.91857e+01, 3.17744e-07, 9.69862e-01],[5.21751e+01, 2.77050e+01, 6.34605e+01, 7.80965e+01, 1.48335e-06, 9.69346e-01],[7.57904e+01, 2.28931e+01, 7.32974e+01, 6.84164e+01, 1.17378e-06, 9.70913e-01],...,[2.39012e+02, 3.14549e+02, 2.71791e+01, 2.62056e+01, 9.96026e-06, 9.87485e-01],[2.45330e+02, 3.11950e+02, 1.63809e+01, 2.46385e+01, 5.99965e-06, 9.84169e-01],[2.51161e+02, 3.11525e+02, 9.34076e+00, 1.88540e+01, 9.93162e-07, 9.83997e-01]]]),[tensor([[[[[ 1.86583e+00,  1.44920e-01, -5.55053e-01, -3.82730e-01, -1.49620e+01,  3.47138e+00],[ 2.62442e-01,  7.67090e-01, -5.32980e-01, -1.37165e-01, -1.34212e+01,  3.45387e+00],[-2.64633e-01,  4.37677e-01, -4.16091e-01, -2.57647e-01, -1.36553e+01,  3.50796e+00],...,[ 3.86080e-01,  2.91431e-01, -3.47019e-01, -4.21045e-01, -1.32108e+01,  3.30868e+00],[-8.95322e-02,  5.37793e-01, -4.54257e-01, -2.35619e-01, -1.39945e+01,  3.30029e+00],[-1.01065e+00,  6.59138e-01, -6.30642e-01, -2.95933e-01, -1.54304e+01,  3.41460e+00]],......省略后面的一堆东西


# 模型编译
model.model[-1].export = True
traced_script_module = torch.jit.trace(model, torch.rand(1, 3, new_shape[0], new_shape[1]))
# 保存模型为pt文件
# 载入pt文件
jitModel = torch.jit.load('best_torchscript.pt')
# 转化后的图片处理
jitPre = jitModel.forward(img)
jitPre # 输出处理结果


 [tensor([[[[[ 1.86583e+00,  1.44920e-01, -5.55053e-01, -3.82730e-01, -1.49620e+01,  3.47138e+00],[ 2.62442e-01,  7.67090e-01, -5.32980e-01, -1.37165e-01, -1.34212e+01,  3.45387e+00],[-2.64633e-01,  4.37677e-01, -4.16091e-01, -2.57647e-01, -1.36553e+01,  3.50796e+00],...,[ 3.86080e-01,  2.91431e-01, -3.47019e-01, -4.21045e-01, -1.32108e+01,  3.30868e+00],[-8.95322e-02,  5.37793e-01, -4.54257e-01, -2.35619e-01, -1.39945e+01,  3.30029e+00],[-1.01065e+00,  6.59138e-01, -6.30642e-01, -2.95933e-01, -1.54304e+01,  3.41460e+00]],......省略后面的一堆东西

对比两个结果,你会发现jitPre的结果丢了前面一部分数据,通过查看源码发现,这部分数据才是最重要数据,最直接能反应结果的数据。为什么会跑出不一样的结果呢?我们看到有这么一句model.model[-1].export = True,其实正是因为这句造成的,但是当改为False,虽然能得到想要的结果,但却无法编译为TorchScript。研究yolo.py源码发现,其实是因为在模型中添加了一些结果处理的内容造成的。代码如下所示:

class Detect(nn.Module):def __init__(self, nc=80, anchors=(), ch=()):  # detection layersuper(Detect, self).__init__()self.stride = None  # strides computed during buildself.nc = nc  # number of classesself.no = nc + 5  # number of outputs per anchorself.nl = len(anchors)  # number of detection layersself.na = len(anchors[0]) // 2  # number of anchorsself.grid = [torch.zeros(1)] * self.nl  # init grida = torch.tensor(anchors).float().view(self.nl, -1, 2)self.register_buffer('anchors', a)  # shape(nl,na,2)self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output convself.export = False  # onnx exportdef forward(self, x):# x = x.copy()  # for profilingz = []  # inference outputself.training |= self.exportfor i in range(self.nl):x[i] = self.m[i](x[i])  # convbs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()# 下面这几行代码即为不能被编译的代码if not self.training:  # inferenceif self.grid[i].shape[2:4] != x[i].shape[2:4]:self.grid[i] = self._make_grid(nx, ny).to(x[i].device)y = x[i].sigmoid()y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xyy[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # whz.append(y.view(bs, -1, self.no))return x if self.training else (torch.cat(z, 1), x)






# 从model中提取部分变量
stride, anchor_grid, grid, no, _make_grid = model.model[-1].stride, model.model[-1].anchor_grid, model.model[-1].grid, model.model[-1].no, model.model[-1]._make_grid# 待转化的函数
def _processing(x, stride, anchor_grid, grid):bs, na, ny, nx, _ = x.shapeif grid.shape[2:4] != x.shape[2:4]:grid = _make_grid(nx, ny)y = x.sigmoid()y[..., 0:2].mul_(2.).sub_(0.5).add_(grid).mul_(stride)# y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid) * stridey[..., 2:4].mul_(2.).pow_(2).mul_(anchor_grid)# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_gridreturn y.view(bs, -1, no)def resultsProcessing(x, y, z):result = []result.append(_processing(x, stride[0], anchor_grid[0], grid[0]))result.append(_processing(y, stride[1], anchor_grid[1], grid[1]))result.append(_processing(z, stride[2], anchor_grid[2], grid[2]))return torch.cat(result, 1)# 使用trace进行转化
example = torch.rand(1, 3, new_shape[0], new_shape[1], 6)
traced_script_method = torch.jit.trace(resultsProcessing, (example, example, example))
traced_script_method.save('best_resultsProcessing.pt')# 验证结果
jitResultsProcessing = torch.jit.load('best_resultsProcessing.pt')
jitRes = jitResultsProcessing.forward(jitPre[0],jitPre[1],jitPre[2])


tensor([[[3.94224e+01, 1.83147e+01, 6.17124e+01, 5.91857e+01, 3.17744e-07, 9.69862e-01],[5.21751e+01, 2.77050e+01, 6.34605e+01, 7.80965e+01, 1.48335e-06, 9.69346e-01],[7.57904e+01, 2.28931e+01, 7.32974e+01, 6.84164e+01, 1.17378e-06, 9.70913e-01],...,[2.39012e+02, 3.14549e+02, 2.71791e+01, 2.62056e+01, 9.96026e-06, 9.87485e-01],[2.45330e+02, 3.11950e+02, 1.63809e+01, 2.46385e+01, 5.99965e-06, 9.84169e-01],[2.51161e+02, 3.11525e+02, 9.34076e+00, 1.88540e+01, 9.93162e-07, 9.83997e-01]]]



# 提取函数及转化为TorchScript
def xywh2xyxy(x):y = torch.zeros_like(x)y[:, 0].copy_( x[:, 0] - x[:, 2] / 2 )  # top left xy[:, 1].copy_( x[:, 1] - x[:, 3] / 2 )  # top left yy[:, 2].copy_( x[:, 0] + x[:, 2] / 2 )  # bottom right xy[:, 3].copy_( x[:, 1] + x[:, 3] / 2 )  # bottom right yreturn y@torch.jit.script
def non_max_suppression(prediction):conf_thres = 0.4iou_thres = 0.5max_det = 300if prediction.dtype is torch.float16:prediction = prediction.float()  # to FP32nc = prediction[0].shape[1] - 5  # number of classesxc = prediction[..., 4] > conf_thres  # candidatesoutput = Nonefor xi, x in enumerate(prediction):  # image index, image inferencex = x[xc[xi]]  # confidence# If none remain process next imageif not x.shape[0]:continue# Compute confx[:, 5:].mul_( x[:, 4:5] )  # conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)box = xywh2xyxy(x[:, :4])conf, j = x[:, 5:].max(1, keepdim=True)x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]# If none remain process next imageif not x.shape[0]:  # number of boxescontinue# Picked bounding boxesindexs = torch.zeros(x.shape[0])boxes, scores = x[:, :4], x[:, 4]  # boxes (offset by class), scores# i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)# Sort by confidence score of bounding boxesorder = scores.argsort()start_x, start_y, end_x, end_y = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3]# Compute areas of bounding boxesareas = (end_x - start_x + 1) * (end_y - start_y + 1)# print(order)# print(order.shape)# Iterate bounding boxeswhile order.shape[0] > 0:# The index of largest confidence scoreindex = order[-1]# Pick the bounding box with largest confidence scoreindexs[index] = 1x1 = torch.max(start_x[index], start_x[order[:-1]])x2 = torch.min(end_x[index], end_x[order[:-1]])y1 = torch.max(start_y[index], start_y[order[:-1]])y2 = torch.min(end_y[index], end_y[order[:-1]])# Compute areas of intersection-over-unionw = torch.max(torch.tensor(0.0), x2 - x1 + 1)h = torch.max(torch.tensor(0.0), y2 - y1 + 1)intersection = w * h# Compute the ratio between intersection and unionratio = intersection / (areas[index] + areas[order[:-1]] - intersection)left = torch.where(ratio < iou_thres)# print(left, ratio, iou_thres)order = order[left[0]]# if len(i) > max_det:  # limit detections#     i = i[:max_det]return x[indexs == 1]# 保存为pt文件
non_max_suppression.save('filter.pt')# 验证模型


tensor([[ 31.0141, 159.4193, 361.3859, 515.5379,   0.8935,   0.0000]])






  1. best_torchscript.pt:网络模型
  2. best_resultsProcessing.pt:过滤无用数据
  3. filter.pt:筛选主要边框


class Yolov5Model(torch.jit.ScriptModule):def __init__(self, model, stride, anchor_grid, grid, no):super(Yolov5Model, self).__init__()self.model = modelself.stride, self.anchor_grid, self.grid, self.no = stride, anchor_grid, grid, no@torch.jit.script_methoddef forward(self, img):preResult = self.model.forward(img)boxs = self.resultsProcessing(preResult[0],preResult[1],preResult[2])filterResult = self.non_max_suppression(boxs)return filterResult@torch.jit.script_methoddef _make_grid(self, nx:int=20, ny:int=20):yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()@torch.jit.script_methoddef _processing(self, x, stride, anchor_grid, grid):bs, na, ny, nx, _ = x.shapeif grid.shape[2:4] != x.shape[2:4]:grid = self._make_grid(nx, ny)y = x.sigmoid()y[..., 0:2].mul_(2.).sub_(0.5).add_(grid).mul_(stride)# y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid) * stridey[..., 2:4].mul_(2.).pow_(2).mul_(anchor_grid)# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_gridreturn y.view(bs, -1, self.no)@torch.jit.script_methoddef resultsProcessing(self, x, y, z):result = []result.append(self._processing(x, self.stride[0], self.anchor_grid[0], self.grid[0]))result.append(self._processing(y, self.stride[1], self.anchor_grid[1], self.grid[1]))result.append(self._processing(z, self.stride[2], self.anchor_grid[2], self.grid[2]))return torch.cat(result, 1)@torch.jit.script_methoddef xywh2xyxy(self, x):y = torch.zeros_like(x)y[:, 0].copy_( x[:, 0] - x[:, 2] / 2 )  # top left xy[:, 1].copy_( x[:, 1] - x[:, 3] / 2 )  # top left yy[:, 2].copy_( x[:, 0] + x[:, 2] / 2 )  # bottom right xy[:, 3].copy_( x[:, 1] + x[:, 3] / 2 )  # bottom right yreturn y@torch.jit.script_methoddef non_max_suppression(self, prediction):conf_thres = 0.4iou_thres = 0.5max_det = 300if prediction.dtype is torch.float16:prediction = prediction.float()  # to FP32nc = prediction[0].shape[1] - 5  # number of classesxc = prediction[..., 4] > conf_thres  # candidatesoutput = Nonefor xi, x in enumerate(prediction):  # image index, image inferencex = x[xc[xi]]  # confidence# If none remain process next imageif not x.shape[0]:continue# Compute confx[:, 5:].mul_( x[:, 4:5] )  # conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)box = self.xywh2xyxy(x[:, :4])conf, j = x[:, 5:].max(1, keepdim=True)x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]# If none remain process next imageif not x.shape[0]:  # number of boxescontinue# Picked bounding boxesindexs = torch.zeros(x.shape[0])boxes, scores = x[:, :4], x[:, 4]  # boxes (offset by class), scores# i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)# Sort by confidence score of bounding boxesorder = scores.argsort()start_x, start_y, end_x, end_y = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3]# Compute areas of bounding boxesareas = (end_x - start_x + 1) * (end_y - start_y + 1)# print(order)# print(order.shape)# Iterate bounding boxeswhile order.shape[0] > 0:# The index of largest confidence scoreindex = order[-1]# Pick the bounding box with largest confidence scoreindexs[index] = 1x1 = torch.max(start_x[index], start_x[order[:-1]])x2 = torch.min(end_x[index], end_x[order[:-1]])y1 = torch.max(start_y[index], start_y[order[:-1]])y2 = torch.min(end_y[index], end_y[order[:-1]])# Compute areas of intersection-over-unionw = torch.max(torch.tensor(0.0), x2 - x1 + 1)h = torch.max(torch.tensor(0.0), y2 - y1 + 1)intersection = w * h# Compute the ratio between intersection and unionratio = intersection / (areas[index] + areas[order[:-1]] - intersection)left = torch.where(ratio < iou_thres)# print(left, ratio, iou_thres)order = order[left[0]]# if len(i) > max_det:  # limit detections#     i = i[:max_det]return x[indexs == 1]


# 载入编译好的模型
jitModel = torch.jit.load('best_torchscript.pt')# 载入相关参数
stride, anchor_grid, grid, no, _make_grid = model.model[-1].stride, model.model[-1].anchor_grid, model.model[-1].grid, model.model[-1].no, model.model[-1]._make_grid# 初始化模型并编译
completeModel = Yolov5Model(jitModel,stride, anchor_grid, grid, no)# 保存模型
completeModel.save('best_Complete.pt')# 验证结果
loadModel = torch.jit.load('best_Complete.pt')





