


1.cv2(OpenCV-Python :BGR顺序

img = cv2.imread(image_path)
h, w, c = image.shape
img = cv2.resize(img, (self.img_width, self.img_height))
img = img[:,:,::-1]#bgr转rgb


from PIL import Image
img = Image.open(img_path).convert('RGB')
img = Image.fromarray(img)


from skimage import io
img=io.imread(image_path)from skimage import transform
img = transform.resize(img, (C, H * scale, W * scale), mode='reflect',anti_aliasing=False)


import scipy.misc
img = scipy.misc.imread(image_path)


import matplotlib.image as mpimg
img = mpimg.imread(image_path)






1.import xml.etree.ElementTree as ET




(1)ImageSets:划分(split)数据,txt文件存文件名(不带后缀),划分得比较多,也比较细,有不同用途,faster-rcnn只要其中的 'ImageSets/Main/{0}.txt'.format(split)即可,这里split in ['train', 'test', 'val']或其他别的也可以。





<annotation><folder>VOC2007</folder><filename>000001.jpg</filename><source><database>The VOC2007 Database</database><annotation>PASCAL VOC2007</annotation><image>flickr</image><flickrid>341012865</flickrid></source><owner><flickrid>Fried Camels</flickrid><name>Jinky the Fruit Bat</name></owner><size><width>353</width><height>500</height><depth>3</depth></size><segmented>0</segmented><object><name>dog</name><pose>Left</pose><truncated>1</truncated><difficult>0</difficult><bndbox><xmin>48</xmin><ymin>240</ymin><xmax>195</xmax><ymax>371</ymax></bndbox></object><object><name>person</name><pose>Left</pose><truncated>1</truncated><difficult>0</difficult><bndbox><xmin>8</xmin><ymin>12</ymin><xmax>352</xmax><ymax>498</ymax></bndbox></object>




Elijha:Pytorch数据读取(Dataset, DataLoader, DataLoaderIter)​zhuanlan.zhihu.com










class PascalVOCDataset(torch.utils.data.Dataset):第一个是背景,其他是目标类别CLASSES = ("__background__ ","aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor",)def __init__(self, data_dir, split, use_difficult=False, transforms=None):
数据根目录self.root = data_dir
指定用哪个数据,test、val、train?self.image_set = split
要不要难以识别的图片?self.keep_difficult = use_difficult
图片处理self.transforms = transforms
对应三个文件加:self._annopath = os.path.join(self.root, "Annotations", "%s.xml")self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
直接用Main文件夹self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")
初始化的时候就把文件名全读出来with open(self._imgsetpath % self.image_set) as f:self.ids = f.readlines()self.ids = [x.strip("n") for x in self.ids]
构造id到图片的字典,self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}cls = PascalVOCDataset.CLASSES
类别到序号和序号到类别的两个字典self.class_to_ind = dict(zip(cls, range(len(cls))))self.categories = dict(zip(range(len(cls)), cls))
__getitem__是必须的def __getitem__(self, index):img_id = self.ids[index]
根据id号读取图片,用的是PIL来读img = Image.open(self._imgpath % img_id).convert("RGB")
获取目标框,裁剪target = self.get_groundtruth(index)target = target.clip_to_image(remove_empty=True)
做归一化,缩放等处理if self.transforms is not None:img, target = self.transforms(img, target)
target 前四个表示坐标,第五个值表示类别 return img, target, index
__len__也是必须的def __len__(self):return len(self.ids)def get_groundtruth(self, index):img_id = self.ids[index]
定位到根节点anno = ET.parse(self._annopath % img_id).getroot()
读取图片大小、目标类别、目标位置信息anno = self._preprocess_annotation(anno)
取出高宽信息height, width = anno["im_info"]
BoxList是自定义的处理这几个不目标框的类target = BoxList(anno["boxes"], (width, height), mode="xyxy")
BoxList添加标签,难度target.add_field("labels", anno["labels"])target.add_field("difficult", anno["difficult"])return targetdef _preprocess_annotation(self, target):boxes = []gt_classes = []difficult_boxes = []TO_REMOVE = 1
只遍历object节点for obj in target.iter("object"):
遇到难以识别的,跳过也行,不是强烈要求,就跳过吧difficult = int(obj.find("difficult").text) == 1if not self.keep_difficult and difficult:continue
标签名称name = obj.find("name").text.lower().strip()
矩形坐标bb = obj.find("bndbox")# Make pixel indexes 0-based# Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211"box = [bb.find("xmin").text,bb.find("ymin").text,bb.find("xmax").text,bb.find("ymax").text,]
得到一个tuplebndbox = tuple(map(lambda x: x - TO_REMOVE, list(map(int, box))))
看图片大小size = target.find("size")
把高宽从文本转化成int,得到一个tupleim_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
并把坐标转化成floatres = {"boxes": torch.tensor(boxes, dtype=torch.float32),"labels": torch.tensor(gt_classes),"difficult": torch.tensor(difficult_boxes),"im_info": im_info,}return res



{"info": {"description": "COCO 2014 Dataset","url": "http://cocodataset.org","version": "1.0","year": 2014,"contributor": "COCO Consortium","date_created": "2017/09/01"},"licenses": [{"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/","id": 1,"name": "Attribution-NonCommercial-ShareAlike License"}],"categories": [{"supercategory": "person","id": 1,"name": "person"}, {"supercategory": "vehicle","id": 2,"name": "bicycle"}],"images": [{"license": 1,"file_name": "COCO_val2014_000000581062.jpg","coco_url": "http://images.cocodataset.org/val2014/COCO_val2014_000000581062.jpg","height": 375,"width": 500,"date_captured": "2013-11-20 09:12:04","flickr_url": "http://farm4.staticflickr.com/3582/3328164554_6765a03a6a_z.jpg","id": 581062}],"annotations": [{"segmentation": [[134.12, 155.64, 136.9, 139.37]],"area": 2470.656449999998,"iscrowd": 0,"image_id": 581062,"bbox": [131.74, 51.28, 62.7, 113.49],"category_id": 1,"id": 469201}] 8,"name": "United States Government Work"}],"categories": [{"supercategory": "person","id": 1,"name": "person"}, {"supercategory": "vehicle","id": 2,"name": "bicycle"}, {"supercategory": "vehicle","id": 3,"name": "car"}, {"supercategory": "vehicle","id": 4,"name": "motorcycle"}, {"supercategory": "vehicle","id": 5,"name": "airplane"}, {"supercategory": "vehicle","id": 6,"name": "bus"}, {"supercategory": "vehicle","id": 7,"name": "train"}, {"supercategory": "vehicle","id": 8,"name": "truck"}, {"supercategory": "vehicle","id": 9,"name": "boat"}, {"supercategory": "outdoor","id": 10,"name": "traffic light"}, {"supercategory": "outdoor","id": 11,"name": "fire hydrant"}, {"supercategory": "outdoor","id": 13,"name": "stop sign"}, {"supercategory": "outdoor","id": 14,"name": "parking meter"}, {"supercategory": "outdoor","id": 15,"name": "bench"}, {"supercategory": "animal","id": 16,"name": "bird"}, {"supercategory": "animal","id": 17,"name": "cat"}, {"supercategory": "animal","id": 18,"name": "dog"}, {"supercategory": "animal","id": 19,"name": "horse"}, {"supercategory": "animal","id": 20,"name": "sheep"}, {"supercategory": "animal","id": 21,"name": "cow"}, {"supercategory": "animal","id": 22,"name": "elephant"}, {"supercategory": "animal","id": 23,"name": "bear"}, {"supercategory": "animal","id": 24,"name": "zebra"}, {"supercategory": "animal","id": 25,"name": "giraffe"}, {"supercategory": "accessory","id": 27,"name": "backpack"}, {"supercategory": "accessory","id": 28,"name": "umbrella"}, {"supercategory": "accessory","id": 31,"name": "handbag"}, {"supercategory": "accessory","id": 32,"name": "tie"}, {"supercategory": "accessory","id": 33,"name": "suitcase"}, {"supercategory": "sports","id": 34,"name": "frisbee"}, {"supercategory": "sports","id": 35,"name": "skis"}, {"supercategory": "sports","id": 36,"name": "snowboard"}, {"supercategory": "sports","id": 37,"name": "sports ball"}, {"supercategory": "sports","id": 38,"name": "kite"}, {"supercategory": "sports","id": 39,"name": "baseball bat"}, {"supercategory": "sports","id": 40,"name": "baseball glove"}, {"supercategory": "sports","id": 41,"name": "skateboard"}, {"supercategory": "sports","id": 42,"name": "surfboard"}, {"supercategory": "sports","id": 43,"name": "tennis racket"}, {"supercategory": "kitchen","id": 44,"name": "bottle"}, {"supercategory": "kitchen","id": 46,"name": "wine glass"}, {"supercategory": "kitchen","id": 47,"name": "cup"}, {"supercategory": "kitchen","id": 48,"name": "fork"}, {"supercategory": "kitchen","id": 49,"name": "knife"}, {"supercategory": "kitchen","id": 50,"name": "spoon"}, {"supercategory": "kitchen","id": 51,"name": "bowl"}, {"supercategory": "food","id": 52,"name": "banana"}, {"supercategory": "food","id": 53,"name": "apple"}, {"supercategory": "food","id": 54,"name": "sandwich"}, {"supercategory": "food","id": 55,"name": "orange"}, {"supercategory": "food","id": 56,"name": "broccoli"}, {"supercategory": "food","id": 57,"name": "carrot"}, {"supercategory": "food","id": 58,"name": "hot dog"}, {"supercategory": "food","id": 59,"name": "pizza"}, {"supercategory": "food","id": 60,"name": "donut"}, {"supercategory": "food","id": 61,"name": "cake"}, {"supercategory": "furniture","id": 62,"name": "chair"}, {"supercategory": "furniture","id": 63,"name": "couch"}, {"supercategory": "furniture","id": 64,"name": "potted plant"}, {"supercategory": "furniture","id": 65,"name": "bed"}, {"supercategory": "furniture","id": 67,"name": "dining table"}, {"supercategory": "furniture","id": 70,"name": "toilet"}, {"supercategory": "electronic","id": 72,"name": "tv"}, {"supercategory": "electronic","id": 73,"name": "laptop"}, {"supercategory": "electronic","id": 74,"name": "mouse"}, {"supercategory": "electronic","id": 75,"name": "remote"}, {"supercategory": "electronic","id": 76,"name": "keyboard"}, {"supercategory": "electronic","id": 77,"name": "cell phone"}, {"supercategory": "appliance","id": 78,"name": "microwave"}, {"supercategory": "appliance","id": 908400000632}]




  1. faster rcnn源码解读(四)之数据类型imdb.py和pascal_voc.py(主要是imdb和roidb数据类型的解说)

  2. faster rcnn源码解读总结

  3. Faster R-CNN源码中RPN的解析(自用)

  4. faster rcnn源码解读(六)之minibatch

  5. faster rcnn源码解读(五)之layer(网络里的input-data)

  6. faster rcnn源码理解(二)之AnchorTargetLayer(网络中的rpn_data)

  7. 【Faster R-CNN论文精度系列】从Faster R-CNN源码中,我们“学习”到了什么?

  8. faster rcnn源码解读(三)train_faster_rcnn_alt_opt.py

  9. 基于lis3dh的简易倾角仪c源码_开源网关apisix源码阅读和最佳实践

