yolov7利用onnx进行推理同时调用usb摄像头

最近再弄一个项目，需要用到yolov7但官方只发布了利用单张图片进行onnx推理的代码，网上一大堆对也基本是搬运，还一堆bug，博主在这里进行了改正！

官方onnx推理地址，感兴趣的可以去看看。官方onnx图片推理地址

按照官方的配置要求转换为onnx

python export.py --weights weights/yolov7.pt --grid --end2end --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640

博主版本 onnx ==1.9.0 onnx-simplifier == 0.3.6

ok,回归正文，直接贴代码。

import argparse
import time
from pathlib import Pathimport cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import numpy
import onnxruntime as ort
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModelnames = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone','microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear','hair drier', 'toothbrush']class ONNX_engine():def __init__(self, weights, size, cuda) -> None:self.img_new_shape = (size, size)self.weights = weightsself.device = cudaself.init_engine()self.names = 'names'self.colors = {name: [random.randint(0, 255) for _ in range(3)] for i, name in enumerate(self.names)}def init_engine(self):providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if self.device else ['CPUExecutionProvider']self.session = ort.InferenceSession(self.weights[0], providers=providers)def predict(self, im):outname = [i.name for i in self.session.get_outputs()]inname = [i.name for i in self.session.get_inputs()]inp = {inname[0]: im}outputs = self.session.run(outname, inp)[0]# print(outputs.shape)return outputsdef detect(save_img=False):source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_tracesave_img = not opt.nosave and not source.endswith('.txt')  # save inference imageswebcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))# Directoriessave_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir# Initializeset_logging()device = select_device(opt.device)half = device.type != 'cpu'  # half precision only supported on CUDA# Load modelmodel = ONNX_engine(weights, imgsz, device)stride = 32vid_path, vid_writer = None, Noneif webcam:view_img = check_imshow()cudnn.benchmark = True  # set True to speed up constant image size inferencedataset = LoadStreams(source, img_size=imgsz, stride=stride,onnx=True)else:dataset = LoadImages(source, img_size=imgsz, stride=stride)# Get names and colors# names = model.module.names if hasattr(model, 'module') else model.namescolors = [[random.randint(0, 255) for _ in range(3)] for _ in names]# Run inferenceif device.type != 'cpu':model.predict(torch.zeros(1, 3, imgsz, imgsz))# run onceold_img_w = old_img_h = imgszold_img_b = 1t0 = time.time()for path, img, im0s, vid_cap in dataset:img = torch.from_numpy(img).to(device)img = img.half() if half else img.float()  # uint8 to fp16/32img /= 255.0  # 0 - 255 to 0.0 - 1.0if img.ndimension() == 3:img = img.unsqueeze(0)# Warmupif device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):old_img_b = img.shape[0]old_img_h = img.shape[2]old_img_w = img.shape[3]for i in range(3):model.predict(numpy.array(img))# Inferencet1 = time_synchronized()with torch.no_grad():   # Calculating gradients would cause a GPU memory leakpred = torch.from_numpy(model.predict(numpy.array(img))).unsqueeze(0)t2 = time_synchronized()# Apply NMS# pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)t3 = time_synchronized()# if pred.size == 0:#     continue# else:# Process detectionsfor i, det in enumerate(pred):  # detections per imageif webcam:  # batch_size >= 1p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.countelse:p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)p = Path(p)  # to Pathsave_path = str(save_dir / p.name)  # img.jpgtxt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txtgn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwhif len(det):# Rescale boxes from img_size to im0 sizedet[:, 1:5] = scale_coords(img.shape[2:], det[:, 1:5], im0.shape).round()# Print resultsfor c in det[:, -2].unique():n = (det[:, -2] == c).sum()  # detections per classs += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string# Write resultsfor batch_id,*xyxy, cls, conf in reversed(det):if save_txt:  # Write to filexywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywhline = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label formatwith open(txt_path + '.txt', 'a') as f:f.write(('%g ' * len(line)).rstrip() % line + '\n')if save_img or view_img:  # Add bbox to imagelabel = f'{names[int(cls)]} {conf:.2f}'plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1)# Print time (inference + NMS)print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference')# Stream resultsif view_img:cv2.imshow(str(p), im0)cv2.waitKey(1)  # 1 millisecond# Save results (image with detections)if save_img:if dataset.mode == 'image':cv2.imwrite(save_path, im0)print(f" The image with the result is saved in: {save_path}")else:  # 'video' or 'stream'if vid_path != save_path:  # new videovid_path = save_pathif isinstance(vid_writer, cv2.VideoWriter):vid_writer.release()  # release previous video writerif vid_cap:  # videofps = vid_cap.get(cv2.CAP_PROP_FPS)w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))else:  # streamfps, w, h = 30, im0.shape[1], im0.shape[0]save_path += '.mp4'vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))vid_writer.write(im0)if save_txt or save_img:s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''#print(f"Results saved to {save_dir}{s}")print(f'Done. ({time.time() - t0:.3f}s)')if __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcamparser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')parser.add_argument('--view-img', action='store_false', help='display results')parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')parser.add_argument('--nosave', action='store_true', help='do not save images/videos')parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')parser.add_argument('--augment', action='store_true', help='augmented inference')parser.add_argument('--update', action='store_true', help='update all models')parser.add_argument('--project', default='runs/detect', help='save results to project/name')parser.add_argument('--name', default='exp', help='save results to project/name')parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')parser.add_argument('--no-trace', action='store_false', help='don`t trace model')opt = parser.parse_args()print(opt)#check_requirements(exclude=('pycocotools', 'thop'))with torch.no_grad():if opt.update:  # update all models (to fix SourceChangeWarning)for opt.weights in ['yolov7.pt']:detect()strip_optimizer(opt.weights)else:detect()

这是根据detect和官方onnx推理改的代码，直接和detect.py放同一路径下，同时dataset.py不能用minum rectangle,在函数letterbox中，必须长宽相等。改动了部分整个贴出，避免漏掉细节！以下是dataset.py代码直接替换原来的文件就可以了。

# Dataset utils and dataloadersimport glob
import logging
import math
import os
import random
import shutil
import time
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from threading import Threadimport cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdmimport pickle
from copy import deepcopy
# from pycocotools import mask as maskUtils
from torchvision.utils import save_image
from torchvision.ops import roi_pool, roi_align, ps_roi_pool, ps_roi_alignfrom utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \resample_segments, clean_str
from utils.torch_utils import torch_distributed_zero_first# Parameters
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
logger = logging.getLogger(__name__)# Get orientation exif tag
for orientation in ExifTags.TAGS.keys():if ExifTags.TAGS[orientation] == 'Orientation':breakdef get_hash(files):# Returns a single hash value of a list of filesreturn sum(os.path.getsize(f) for f in files if os.path.isfile(f))def exif_size(img):# Returns exif-corrected PIL sizes = img.size  # (width, height)try:rotation = dict(img._getexif().items())[orientation]if rotation == 6:  # rotation 270s = (s[1], s[0])elif rotation == 8:  # rotation 90s = (s[1], s[0])except:passreturn sdef create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''):# Make sure only the first process in DDP process the dataset first, and the following others can use the cachewith torch_distributed_zero_first(rank):dataset = LoadImagesAndLabels(path, imgsz, batch_size,augment=augment,  # augment imageshyp=hyp,  # augmentation hyperparametersrect=rect,  # rectangular trainingcache_images=cache,single_cls=opt.single_cls,stride=int(stride),pad=pad,image_weights=image_weights,prefix=prefix)batch_size = min(batch_size, len(dataset))nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])  # number of workerssampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else Noneloader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader# Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()dataloader = loader(dataset,batch_size=batch_size,num_workers=nw,sampler=sampler,pin_memory=True,collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)return dataloader, datasetclass InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):""" Dataloader that reuses workersUses same syntax as vanilla DataLoader"""def __init__(self, *args, **kwargs):super().__init__(*args, **kwargs)object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))self.iterator = super().__iter__()def __len__(self):return len(self.batch_sampler.sampler)def __iter__(self):for i in range(len(self)):yield next(self.iterator)class _RepeatSampler(object):""" Sampler that repeats foreverArgs:sampler (Sampler)"""def __init__(self, sampler):self.sampler = samplerdef __iter__(self):while True:yield from iter(self.sampler)class LoadImages:  # for inferencedef __init__(self, path, img_size=640, stride=32):p = str(Path(path).absolute())  # os-agnostic absolute pathif '*' in p:files = sorted(glob.glob(p, recursive=True))  # globelif os.path.isdir(p):files = sorted(glob.glob(os.path.join(p, '*.*')))  # direlif os.path.isfile(p):files = [p]  # fileselse:raise Exception(f'ERROR: {p} does not exist')images = [x for x in files if x.split('.')[-1].lower() in img_formats]videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]ni, nv = len(images), len(videos)self.img_size = img_sizeself.stride = strideself.files = images + videosself.nf = ni + nv  # number of filesself.video_flag = [False] * ni + [True] * nvself.mode = 'image'if any(videos):self.new_video(videos[0])  # new videoelse:self.cap = Noneassert self.nf > 0, f'No images or videos found in {p}. ' \f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'def __iter__(self):self.count = 0return selfdef __next__(self):if self.count == self.nf:raise StopIterationpath = self.files[self.count]if self.video_flag[self.count]:# Read videoself.mode = 'video'ret_val, img0 = self.cap.read()if not ret_val:self.count += 1self.cap.release()if self.count == self.nf:  # last videoraise StopIterationelse:path = self.files[self.count]self.new_video(path)ret_val, img0 = self.cap.read()self.frame += 1print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')else:# Read imageself.count += 1img0 = cv2.imread(path)  # BGRassert img0 is not None, 'Image Not Found ' + path# print(f'image {self.count}/{self.nf} {path}: ', end='')# Padded resizeimg = letterbox(img0, self.img_size, stride=self.stride)[0]# Convertimg = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416img = np.ascontiguousarray(img)return path, img, img0, self.capdef new_video(self, path):self.frame = 0self.cap = cv2.VideoCapture(path)self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))def __len__(self):return self.nf  # number of filesclass LoadWebcam:  # for inferencedef __init__(self, pipe='0', img_size=640, stride=32):self.img_size = img_sizeself.stride = strideif pipe.isnumeric():pipe = eval(pipe)  # local camera# pipe = 'rtsp://192.168.1.64/1'  # IP camera# pipe = 'rtsp://username:password@192.168.1.64/1'  # IP camera with login# pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg'  # IP golf cameraself.pipe = pipeself.cap = cv2.VideoCapture(pipe)  # video capture objectself.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)  # set buffer sizedef __iter__(self):self.count = -1return selfdef __next__(self):self.count += 1if cv2.waitKey(1) == ord('q'):  # q to quitself.cap.release()cv2.destroyAllWindows()raise StopIteration# Read frameif self.pipe == 0:  # local cameraret_val, img0 = self.cap.read()img0 = cv2.flip(img0, 1)  # flip left-rightelse:  # IP cameran = 0while True:n += 1self.cap.grab()if n % 30 == 0:  # skip framesret_val, img0 = self.cap.retrieve()if ret_val:break# Printassert ret_val, f'Camera Error {self.pipe}'img_path = 'webcam.jpg'print(f'webcam {self.count}: ', end='')# Padded resizeimg = letterbox(img0, self.img_size, stride=self.stride)[0]# Convertimg = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416img = np.ascontiguousarray(img)return img_path, img, img0, Nonedef __len__(self):return 0class LoadStreams:  # multiple IP or RTSP camerasdef __init__(self, sources='streams.txt', img_size=640, stride=32,onnx=True):self.mode = 'stream'self.img_size = img_sizeself.stride = strideif os.path.isfile(sources):with open(sources, 'r') as f:sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]else:sources = [sources]n = len(sources)self.imgs = [None] * nself.sources = [clean_str(x) for x in sources]  # clean source names for laterfor i, s in enumerate(sources):# Start the thread to read frames from the video streamprint(f'{i + 1}/{n}: {s}... ', end='')url = eval(s) if s.isnumeric() else sif 'youtube.com/' in str(url) or 'youtu.be/' in str(url):  # if source is YouTube videocheck_requirements(('pafy', 'youtube_dl'))import pafyurl = pafy.new(url).getbest(preftype="mp4").urlcap = cv2.VideoCapture(url)assert cap.isOpened(), f'Failed to open {s}'w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))self.fps = (cap.get(cv2.CAP_PROP_FPS) % 100) + 1_, self.imgs[i] = cap.read()  # guarantee first framethread = Thread(target=self.update, args=([i, cap]), daemon=True)print(f' success ({w}x{h} at {self.fps:.2f} FPS).')thread.start()print('')  # newline# check for common shapess = np.stack([letterbox(x, self.img_size, stride=self.stride,onnx=onnx)[0].shape for x in self.imgs], 0)  # shapesself.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equalif not self.rect:print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')def update(self, index, cap):# Read next stream frame in a daemon threadn = 0while cap.isOpened():n += 1# _, self.imgs[index] = cap.read()cap.grab()if n == 4:  # read every 4th framesuccess, im = cap.retrieve()self.imgs[index] = im if success else self.imgs[index] * 0n = 0time.sleep(1 / self.fps)  # wait timedef __iter__(self):self.count = -1return selfdef __next__(self):self.count += 1img0 = self.imgs.copy()if cv2.waitKey(1) == ord('q'):  # q to quitcv2.destroyAllWindows()raise StopIteration# Letterboximg = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]# Stackimg = np.stack(img, 0)# Convertimg = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416img = np.ascontiguousarray(img)return self.sources, img, img0, Nonedef __len__(self):return 0  # 1E12 frames = 32 streams at 30 FPS for 30 yearsdef img2label_paths(img_paths):# Define label paths as a function of image pathssa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substringsreturn ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]class LoadImagesAndLabels(Dataset):  # for training/testingdef __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):self.img_size = img_sizeself.augment = augmentself.hyp = hypself.image_weights = image_weightsself.rect = False if image_weights else rectself.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)self.mosaic_border = [-img_size // 2, -img_size // 2]self.stride = strideself.path = path# self.albumentations = Albumentations() if augment else Nonetry:f = []  # image filesfor p in path if isinstance(path, list) else [path]:p = Path(p)  # os-agnosticif p.is_dir():  # dirf += glob.glob(str(p / '**' / '*.*'), recursive=True)# f = list(p.rglob('**/*.*'))  # pathlibelif p.is_file():  # filewith open(p, 'r') as t:t = t.read().strip().splitlines()parent = str(p.parent) + os.sepf += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path# f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)else:raise Exception(f'{prefix}{p} does not exist')self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats])  # pathlibassert self.img_files, f'{prefix}No images found'except Exception as e:raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')# Check cacheself.label_files = img2label_paths(self.img_files)  # labelscache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')  # cached labelsif cache_path.is_file():cache, exists = torch.load(cache_path), True  # load# if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache:  # changed#    cache, exists = self.cache_labels(cache_path, prefix), False  # re-cacheelse:cache, exists = self.cache_labels(cache_path, prefix), False  # cache# Display cachenf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, totalif exists:d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache resultsassert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'# Read cachecache.pop('hash')  # remove hashcache.pop('version')  # remove versionlabels, shapes, self.segments = zip(*cache.values())self.labels = list(labels)self.shapes = np.array(shapes, dtype=np.float64)self.img_files = list(cache.keys())  # updateself.label_files = img2label_paths(cache.keys())  # updateif single_cls:for x in self.labels:x[:, 0] = 0n = len(shapes)  # number of imagesbi = np.floor(np.arange(n) / batch_size).astype(int)  # batch indexnb = bi[-1] + 1  # number of batchesself.batch = bi  # batch index of imageself.n = nself.indices = range(n)# Rectangular Trainingif self.rect:# Sort by aspect ratios = self.shapes  # whar = s[:, 1] / s[:, 0]  # aspect ratioirect = ar.argsort()self.img_files = [self.img_files[i] for i in irect]self.label_files = [self.label_files[i] for i in irect]self.labels = [self.labels[i] for i in irect]self.shapes = s[irect]  # whar = ar[irect]# Set training image shapesshapes = [[1, 1]] * nbfor i in range(nb):ari = ar[bi == i]mini, maxi = ari.min(), ari.max()if maxi < 1:shapes[i] = [maxi, 1]elif mini > 1:shapes[i] = [1, 1 / mini]self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)self.imgs = [None] * nif cache_images:if cache_images == 'disk':self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]self.im_cache_dir.mkdir(parents=True, exist_ok=True)gb = 0  # Gigabytes of cached imagesself.img_hw0, self.img_hw = [None] * n, [None] * nresults = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))pbar = tqdm(enumerate(results), total=n)for i, x in pbar:if cache_images == 'disk':if not self.img_npy[i].exists():np.save(self.img_npy[i].as_posix(), x[0])gb += self.img_npy[i].stat().st_sizeelse:self.imgs[i], self.img_hw0[i], self.img_hw[i] = xgb += self.imgs[i].nbytespbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'pbar.close()def cache_labels(self, path=Path('./labels.cache'), prefix=''):# Cache dataset labels, check images and read shapesx = {}  # dictnm, nf, ne, nc = 0, 0, 0, 0  # number missing, found, empty, duplicatepbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))for i, (im_file, lb_file) in enumerate(pbar):try:# verify imagesim = Image.open(im_file)im.verify()  # PIL verifyshape = exif_size(im)  # image sizesegments = []  # instance segmentsassert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'assert im.format.lower() in img_formats, f'invalid image format {im.format}'# verify labelsif os.path.isfile(lb_file):nf += 1  # label foundwith open(lb_file, 'r') as f:l = [x.split() for x in f.read().strip().splitlines()]if any([len(x) > 8 for x in l]):  # is segmentclasses = np.array([x[0] for x in l], dtype=np.float32)segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)l = np.array(l, dtype=np.float32)if len(l):assert l.shape[1] == 5, 'labels require 5 columns each'assert (l >= 0).all(), 'negative labels'assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'else:ne += 1  # label emptyl = np.zeros((0, 5), dtype=np.float32)else:nm += 1  # label missingl = np.zeros((0, 5), dtype=np.float32)x[im_file] = [l, shape, segments]except Exception as e:nc += 1print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"pbar.close()if nf == 0:print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')x['hash'] = get_hash(self.label_files + self.img_files)x['results'] = nf, nm, ne, nc, i + 1x['version'] = 0.1  # cache versiontorch.save(x, path)  # save for next timelogging.info(f'{prefix}New cache created: {path}')return xdef __len__(self):return len(self.img_files)# def __iter__(self):#     self.count = -1#     print('ran dataset iter')#     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)#     return selfdef __getitem__(self, index):index = self.indices[index]  # linear, shuffled, or image_weightshyp = self.hypmosaic = self.mosaic and random.random() < hyp['mosaic']if mosaic:# Load mosaicif random.random() < 0.8:img, labels = load_mosaic(self, index)else:img, labels = load_mosaic9(self, index)shapes = None# MixUp https://arxiv.org/pdf/1710.09412.pdfif random.random() < hyp['mixup']:if random.random() < 0.8:img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))else:img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1))r = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0img = (img * r + img2 * (1 - r)).astype(np.uint8)labels = np.concatenate((labels, labels2), 0)else:# Load imageimg, (h0, w0), (h, w) = load_image(self, index)# Letterboxshape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shapeimg, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescalinglabels = self.labels[index].copy()if labels.size:  # normalized xywh to pixel xyxy formatlabels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])if self.augment:# Augment imagespaceif not mosaic:img, labels = random_perspective(img, labels,degrees=hyp['degrees'],translate=hyp['translate'],scale=hyp['scale'],shear=hyp['shear'],perspective=hyp['perspective'])# img, labels = self.albumentations(img, labels)# Augment colorspaceaugment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])# Apply cutouts# if random.random() < 0.9:#     labels = cutout(img, labels)if random.random() < hyp['paste_in']:sample_labels, sample_images, sample_masks = [], [], []while len(sample_labels) < 30:sample_labels_, sample_images_, sample_masks_ = load_samples(self, random.randint(0,len(self.labels) - 1))sample_labels += sample_labels_sample_images += sample_images_sample_masks += sample_masks_# print(len(sample_labels))if len(sample_labels) == 0:breaklabels = pastein(img, labels, sample_labels, sample_images, sample_masks)nL = len(labels)  # number of labelsif nL:labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywhlabels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1if self.augment:# flip up-downif random.random() < hyp['flipud']:img = np.flipud(img)if nL:labels[:, 2] = 1 - labels[:, 2]# flip left-rightif random.random() < hyp['fliplr']:img = np.fliplr(img)if nL:labels[:, 1] = 1 - labels[:, 1]labels_out = torch.zeros((nL, 6))if nL:labels_out[:, 1:] = torch.from_numpy(labels)# Convertimg = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416img = np.ascontiguousarray(img)return torch.from_numpy(img), labels_out, self.img_files[index], shapes@staticmethoddef collate_fn(batch):img, label, path, shapes = zip(*batch)  # transposedfor i, l in enumerate(label):l[:, 0] = i  # add target image index for build_targets()return torch.stack(img, 0), torch.cat(label, 0), path, shapes@staticmethoddef collate_fn4(batch):img, label, path, shapes = zip(*batch)  # transposedn = len(shapes) // 4img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]ho = torch.tensor([[0., 0, 0, 1, 0, 0]])wo = torch.tensor([[0., 0, 1, 0, 0, 0]])s = torch.tensor([[1, 1, .5, .5, .5, .5]])  # scalefor i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHWi *= 4if random.random() < 0.5:im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[0].type(img[i].type())l = label[i]else:im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * simg4.append(im)label4.append(l)for i, l in enumerate(label4):l[:, 0] = i  # add target image index for build_targets()return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image(self, index):# loads 1 image from dataset, returns img, original hw, resized hwimg = self.imgs[index]if img is None:  # not cachedpath = self.img_files[index]img = cv2.imread(path)  # BGRassert img is not None, 'Image Not Found ' + pathh0, w0 = img.shape[:2]  # orig hwr = self.img_size / max(h0, w0)  # resize image to img_sizeif r != 1:  # always resize down, only resize up if training with augmentationinterp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEARimg = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resizedelse:return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resizeddef augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gainshue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))dtype = img.dtype  # uint8x = np.arange(0, 256, dtype=np.int16)lut_hue = ((x * r[0]) % 180).astype(dtype)lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)lut_val = np.clip(x * r[2], 0, 255).astype(dtype)img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return neededdef hist_equalize(img, clahe=True, bgr=False):# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)if clahe:c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))yuv[:, :, 0] = c.apply(yuv[:, :, 0])else:yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogramreturn cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGBdef load_mosaic(self, index):# loads images in a 4-mosaiclabels4, segments4 = [], []s = self.img_sizeyc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, yindices = [index] + random.choices(self.indices, k=3)  # 3 additional image indicesfor i, index in enumerate(indices):# Load imageimg, _, (h, w) = load_image(self, index)# place img in img4if i == 0:  # top leftimg4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tilesx1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)elif i == 1:  # top rightx1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), ycx1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), helif i == 2:  # bottom leftx1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)elif i == 3:  # bottom rightx1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]padw = x1a - x1bpadh = y1a - y1b# Labelslabels, segments = self.labels[index].copy(), self.segments[index].copy()if labels.size:labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy formatsegments = [xyn2xy(x, w, h, padw, padh) for x in segments]labels4.append(labels)segments4.extend(segments)# Concat/clip labelslabels4 = np.concatenate(labels4, 0)for x in (labels4[:, 1:], *segments4):np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()# img4, labels4 = replicate(img4, labels4)  # replicate# Augment# img4, labels4, segments4 = remove_background(img4, labels4, segments4)# sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste'])img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste'])img4, labels4 = random_perspective(img4, labels4, segments4,degrees=self.hyp['degrees'],translate=self.hyp['translate'],scale=self.hyp['scale'],shear=self.hyp['shear'],perspective=self.hyp['perspective'],border=self.mosaic_border)  # border to removereturn img4, labels4def load_mosaic9(self, index):# loads images in a 9-mosaiclabels9, segments9 = [], []s = self.img_sizeindices = [index] + random.choices(self.indices, k=8)  # 8 additional image indicesfor i, index in enumerate(indices):# Load imageimg, _, (h, w) = load_image(self, index)# place img in img9if i == 0:  # centerimg9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tilesh0, w0 = h, wc = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinateselif i == 1:  # topc = s, s - h, s + w, selif i == 2:  # top rightc = s + wp, s - h, s + wp + w, selif i == 3:  # rightc = s + w0, s, s + w0 + w, s + helif i == 4:  # bottom rightc = s + w0, s + hp, s + w0 + w, s + hp + helif i == 5:  # bottomc = s + w0 - w, s + h0, s + w0, s + h0 + helif i == 6:  # bottom leftc = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + helif i == 7:  # leftc = s - w, s + h0 - h, s, s + h0elif i == 8:  # top leftc = s - w, s + h0 - hp - h, s, s + h0 - hppadx, pady = c[:2]x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords# Labelslabels, segments = self.labels[index].copy(), self.segments[index].copy()if labels.size:labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy formatsegments = [xyn2xy(x, w, h, padx, pady) for x in segments]labels9.append(labels)segments9.extend(segments)# Imageimg9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]hp, wp = h, w  # height, width previous# Offsetyc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border]  # mosaic center x, yimg9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]# Concat/clip labelslabels9 = np.concatenate(labels9, 0)labels9[:, [1, 3]] -= xclabels9[:, [2, 4]] -= ycc = np.array([xc, yc])  # centerssegments9 = [x - c for x in segments9]for x in (labels9[:, 1:], *segments9):np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()# img9, labels9 = replicate(img9, labels9)  # replicate# Augment# img9, labels9, segments9 = remove_background(img9, labels9, segments9)img9, labels9, segments9 = copy_paste(img9, labels9, segments9, probability=self.hyp['copy_paste'])img9, labels9 = random_perspective(img9, labels9, segments9,degrees=self.hyp['degrees'],translate=self.hyp['translate'],scale=self.hyp['scale'],shear=self.hyp['shear'],perspective=self.hyp['perspective'],border=self.mosaic_border)  # border to removereturn img9, labels9def load_samples(self, index):# loads images in a 4-mosaiclabels4, segments4 = [], []s = self.img_sizeyc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, yindices = [index] + random.choices(self.indices, k=3)  # 3 additional image indicesfor i, index in enumerate(indices):# Load imageimg, _, (h, w) = load_image(self, index)# place img in img4if i == 0:  # top leftimg4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tilesx1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)elif i == 1:  # top rightx1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), ycx1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), helif i == 2:  # bottom leftx1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)elif i == 3:  # bottom rightx1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]padw = x1a - x1bpadh = y1a - y1b# Labelslabels, segments = self.labels[index].copy(), self.segments[index].copy()if labels.size:labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy formatsegments = [xyn2xy(x, w, h, padw, padh) for x in segments]labels4.append(labels)segments4.extend(segments)# Concat/clip labelslabels4 = np.concatenate(labels4, 0)for x in (labels4[:, 1:], *segments4):np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()# img4, labels4 = replicate(img4, labels4)  # replicate# Augment# img4, labels4, segments4 = remove_background(img4, labels4, segments4)sample_labels, sample_images, sample_masks = sample_segments(img4, labels4, segments4, probability=0.5)return sample_labels, sample_images, sample_masksdef copy_paste(img, labels, segments, probability=0.5):# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)n = len(segments)if probability and n:h, w, c = img.shape  # height, width, channelsim_new = np.zeros(img.shape, np.uint8)for j in random.sample(range(n), k=round(probability * n)):l, s = labels[j], segments[j]box = w - l[3], l[2], w - l[1], l[4]ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over areaif (ioa < 0.30).all():  # allow 30% obscuration of existing labelslabels = np.concatenate((labels, [[l[0], *box]]), 0)segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)result = cv2.bitwise_and(src1=img, src2=im_new)result = cv2.flip(result, 1)  # augment segments (flip left-right)i = result > 0  # pixels to replace# i[:, :] = result.max(2).reshape(h, w, 1)  # act over chimg[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debugreturn img, labels, segmentsdef remove_background(img, labels, segments):# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)n = len(segments)h, w, c = img.shape  # height, width, channelsim_new = np.zeros(img.shape, np.uint8)img_new = np.ones(img.shape, np.uint8) * 114for j in range(n):cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)result = cv2.bitwise_and(src1=img, src2=im_new)i = result > 0  # pixels to replaceimg_new[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debugreturn img_new, labels, segmentsdef sample_segments(img, labels, segments, probability=0.5):# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)n = len(segments)sample_labels = []sample_images = []sample_masks = []if probability and n:h, w, c = img.shape  # height, width, channelsfor j in random.sample(range(n), k=round(probability * n)):l, s = labels[j], segments[j]box = l[1].astype(int).clip(0, w - 1), l[2].astype(int).clip(0, h - 1), l[3].astype(int).clip(0, w - 1), l[4].astype(int).clip(0, h - 1)# print(box)if (box[2] <= box[0]) or (box[3] <= box[1]):continuesample_labels.append(l[0])mask = np.zeros(img.shape, np.uint8)cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)sample_masks.append(mask[box[1]:box[3], box[0]:box[2], :])result = cv2.bitwise_and(src1=img, src2=mask)i = result > 0  # pixels to replacemask[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug# print(box)sample_images.append(mask[box[1]:box[3], box[0]:box[2], :])return sample_labels, sample_images, sample_masksdef replicate(img, labels):# Replicate labelsh, w = img.shape[:2]boxes = labels[:, 1:].astype(int)x1, y1, x2, y2 = boxes.Ts = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)for i in s.argsort()[:round(s.size * 0.5)]:  # smallest indicesx1b, y1b, x2b, y2b = boxes[i]bh, bw = y2b - y1b, x2b - x1byc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, yx1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)return img, labelsdef letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=True, scaleup=True,onnx=False,stride=32):# Resize and pad image while meeting stride-multiple constraintsif onnx:auto,scaleFill = False,Trueshape = img.shape[:2]  # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup:  # only scale down, do not scale up (for better test mAP)r = min(r, 1.0)if auto:  # minimum rectangle# Compute paddingratio = r, r  # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh paddingdw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh paddingif scaleFill:  # stretchnew_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratiosdw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh paddingdw /= 2  # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad:  # resizeimg = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add borderreturn img, ratio, (dw, dh)def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,border=(0, 0)):# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))# targets = [cls, xyxy]height = img.shape[0] + border[0] * 2  # shape(h,w,c)width = img.shape[1] + border[1] * 2# CenterC = np.eye(3)C[0, 2] = -img.shape[1] / 2  # x translation (pixels)C[1, 2] = -img.shape[0] / 2  # y translation (pixels)# PerspectiveP = np.eye(3)P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)# Rotation and ScaleR = np.eye(3)a = random.uniform(-degrees, degrees)# a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotationss = random.uniform(1 - scale, 1.1 + scale)# s = 2 ** random.uniform(-scale, scale)R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)# ShearS = np.eye(3)S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)# TranslationT = np.eye(3)T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)# Combined rotation matrixM = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANTif (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changedif perspective:img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))else:  # affineimg = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))# Visualize# import matplotlib.pyplot as plt# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()# ax[0].imshow(img[:, :, ::-1])  # base# ax[1].imshow(img2[:, :, ::-1])  # warped# Transform label coordinatesn = len(targets)if n:use_segments = any(x.any() for x in segments)new = np.zeros((n, 4))if use_segments:  # warp segmentssegments = resample_segments(segments)  # upsamplefor i, segment in enumerate(segments):xy = np.ones((len(segment), 3))xy[:, :2] = segmentxy = xy @ M.T  # transformxy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine# clipnew[i] = segment2box(xy, width, height)else:  # warp boxesxy = np.ones((n * 4, 3))xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1xy = xy @ M.T  # transformxy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine# create new boxesx = xy[:, [0, 2, 4, 6]]y = xy[:, [1, 3, 5, 7]]new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T# clipnew[:, [0, 2]] = new[:, [0, 2]].clip(0, width)new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)# filter candidatesi = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)targets = targets[i]targets[:, 1:5] = new[i]return img, targetsdef box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratiow1, h1 = box1[2] - box1[0], box1[3] - box1[1]w2, h2 = box2[2] - box2[0], box2[3] - box2[1]ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratioreturn (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidatesdef bbox_ioa(box1, box2):# Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2box2 = box2.transpose()# Get the coordinates of bounding boxesb1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]# Intersection areainter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)# box2 areabox2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16# Intersection over box2 areareturn inter_area / box2_areadef cutout(image, labels):# Applies image cutout augmentation https://arxiv.org/abs/1708.04552h, w = image.shape[:2]# create random masksscales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fractionfor s in scales:mask_h = random.randint(1, int(h * s))mask_w = random.randint(1, int(w * s))# boxxmin = max(0, random.randint(0, w) - mask_w // 2)ymin = max(0, random.randint(0, h) - mask_h // 2)xmax = min(w, xmin + mask_w)ymax = min(h, ymin + mask_h)# apply random color maskimage[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]# return unobscured labelsif len(labels) and s > 0.03:box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over arealabels = labels[ioa < 0.60]  # remove >60% obscured labelsreturn labelsdef pastein(image, labels, sample_labels, sample_images, sample_masks):# Applies image cutout augmentation https://arxiv.org/abs/1708.04552h, w = image.shape[:2]# create random masksscales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6  # image size fractionfor s in scales:if random.random() < 0.2:continuemask_h = random.randint(1, int(h * s))mask_w = random.randint(1, int(w * s))# boxxmin = max(0, random.randint(0, w) - mask_w // 2)ymin = max(0, random.randint(0, h) - mask_h // 2)xmax = min(w, xmin + mask_w)ymax = min(h, ymin + mask_h)box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)if len(labels):ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over areaelse:ioa = np.zeros(1)if (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin + 20) and (ymax > ymin + 20):  # allow 30% obscuration of existing labelssel_ind = random.randint(0, len(sample_labels) - 1)# print(len(sample_labels))# print(sel_ind)# print((xmax-xmin, ymax-ymin))# print(image[ymin:ymax, xmin:xmax].shape)# print([[sample_labels[sel_ind], *box]])# print(labels.shape)hs, ws, cs = sample_images[sel_ind].shaper_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws)r_w = int(ws * r_scale)r_h = int(hs * r_scale)if (r_w > 10) and (r_h > 10):r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))temp_crop = image[ymin:ymin + r_h, xmin:xmin + r_w]m_ind = r_mask > 0if m_ind.astype(np.int32).sum() > 60:temp_crop[m_ind] = r_image[m_ind]# print(sample_labels[sel_ind])# print(sample_images[sel_ind].shape)# print(temp_crop.shape)box = np.array([xmin, ymin, xmin + r_w, ymin + r_h], dtype=np.float32)if len(labels):labels = np.concatenate((labels, [[sample_labels[sel_ind], *box]]), 0)else:labels = np.array([[sample_labels[sel_ind], *box]])image[ymin:ymin + r_h, xmin:xmin + r_w] = temp_cropreturn labelsclass Albumentations:# YOLOv5 Albumentations class (optional, only used if package is installed)def __init__(self):self.transform = Noneimport albumentations as Aself.transform = A.Compose([A.CLAHE(p=0.01),A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.01),A.RandomGamma(gamma_limit=[80, 120], p=0.01),A.Blur(p=0.01),A.MedianBlur(p=0.01),A.ToGray(p=0.01),A.ImageCompression(quality_lower=75, p=0.01), ],bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))# logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))def __call__(self, im, labels, p=1.0):if self.transform and random.random() < p:new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformedim, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])return im, labelsdef create_folder(path='./new'):# Create folderif os.path.exists(path):shutil.rmtree(path)  # delete output folderos.makedirs(path)  # make new output folderdef flatten_recursive(path='../coco'):# Flatten a recursive directory by bringing all files to top levelnew_path = Path(path + '_flat')create_folder(new_path)for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):shutil.copyfile(file, new_path / Path(file).name)def extract_boxes(path='../coco/'):  # from utils.datasets import *; extract_boxes('../coco128')# Convert detection dataset into classification dataset, with one directory per classpath = Path(path)  # images dirshutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existingfiles = list(path.rglob('*.*'))n = len(files)  # number of filesfor im_file in tqdm(files, total=n):if im_file.suffix[1:] in img_formats:# imageim = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGBh, w = im.shape[:2]# labelslb_file = Path(img2label_paths([str(im_file)])[0])if Path(lb_file).exists():with open(lb_file, 'r') as f:lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labelsfor j, x in enumerate(lb):c = int(x[0])  # classf = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filenameif not f.parent.is_dir():f.parent.mkdir(parents=True)b = x[1:] * [w, h, w, h]  # box# b[2:] = b[2:].max()  # rectangle to squareb[2:] = b[2:] * 1.2 + 3  # padb = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of imageb[[1, 3]] = np.clip(b[[1, 3]], 0, h)assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'def autosplit(path='../coco', weights=(0.9, 0.1, 0.0), annotated_only=False):""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt filesUsage: from utils.datasets import *; autosplit('../coco')Argumentspath:           Path to images directoryweights:        Train, val, test weights (list)annotated_only: Only use images with an annotated txt file"""path = Path(path)  # images dirfiles = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], [])  # image files onlyn = len(files)  # number of filesindices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a splittxt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files[(path / x).unlink() for x in txt if (path / x).exists()]  # remove existingprint(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)for i, img in tqdm(zip(indices, files), total=n):if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check labelwith open(path / txt[i], 'a') as f:f.write(str(img) + '\n')  # add image to txt filedef load_segmentations(self, index):key = '/work/handsomejw66/coco17/' + self.img_files[index]# print(key)# /work/handsomejw66/coco17/return self.segs[key]

用摄像头检测，代码如下其中detect_onnx.py是最上面的修改的detect源文件,source这里博主用的usb摄像头，id为700。换成视频或图像直接把700换成视频或图像的地址即可

python detect_onnx.py --weights weights/yolov7.onnx --conf 0.25 --img-size 640 --source 700

不知道摄像头id可以简单写个函数暴力破解，这里博主也是看的别人的代码，但已经忘记地址。作者看到可以评论告诉我,我引用您。ok，直接放代码,博主最后id=700

import cv2id=0
while True:cap = cv2.VideoCapture(id)ret, frame = cap.read()if not ret:id += 1print(id)else:print("final id =",id)break

yolov7利用onnx进行推理同时调用usb摄像头相关推荐

五十、opencv调用USB摄像头并保存图片
@Author: Runsen opencv OpenCV是计算机视觉的一个模块. OpenCV可以通过图像处理减少图像噪声,调整图像亮度.颜色或者对比度等等.想要进一步系统了解OpenCV图像处理基 ...
AndroidUSBCamera调用USB摄像头问题
UVC_ERROR_INVALID_DEVICE not open camera:err=-50 由于客户需求,笔者最近在测试大型android平板设备,但是所使用的厂商提供的设备并未继承前后置摄像头 ...
C#调用usb摄像头的实现方法
1.下载aforge类库,下载地址:,我下载的版本是:aforge.net framework-2.2.5.exe: 2.下载安装好后,将下载类库中的release文件夹复制到c#项目的可执行文件文件 ...
ubuntu上python使用opencv调用usb摄像头无故掉线以后重连摄像头。
项目中使用opencv调用usb摄像头,运行过程中,会出现摄像头掉线的情况.猜测掉线原因是硬件相关,可能是摄像头,也可能是usb线和端口. 这里主要介绍两种掉线情况以及代码重连摄像头的方式. 1.第一 ...
C++中调用usb摄像头并保存图片【学习记录第1篇】
[学习记录第1篇]C++中通过OPENCV调用usb摄像头并保存图片准备外置USB摄像头代码外置USB摄像头运行结果外置USB超声波探头代码外置USB超声波探头运行结果第一篇博客的感想准 ...
C# 调用USB摄像头
目录 C#调用USB摄像头使用AForge类库进行开发 1.AForge安装 2.下载完毕后进行USB摄像头类封装 C#调用USB摄像头使用AForge类库进行开发 1.AForge安装右击工程,在 ...
讯为iTOP4412开发板ARM-linux 使用OPENCV调用USB摄像头
讯为iTOP4412开发板ARM-linux 使用OPENCV调用USB摄像头首先要在内核里面确定有添加USB摄像头的驱动,这个在讯为官方文档里面有所以就不展开讲了. 主要问题是如何让opencv能 ...
Jetson NX YOLOV目标检测学习笔记1----CV调用USB摄像头
1.相关的包安装(安装步骤自行百度) python 版本3.6.9 pip3 install opencv-python=='xxxx' 其中xxx为对应版本号(注意:opencv我用到的版本是ope ...
MATLAB调用USB摄像头实现过程
使用USB Webcams包进行调用和打开 Matlab自身不支持直接读取摄像头数据,需要到Image Acquisition Toolbox Support Package中安装"MATL ...

yolov7利用onnx进行推理同时调用usb摄像头

yolov7利用onnx进行推理同时调用usb摄像头相关推荐

最新文章

热门文章