其他
YOLO v5在分割任务上的持续发力!(训练/调优/部署一体化)
The following article is from 所向披靡的张大刀 Author Q记
作者 | Q记 编辑 | 所向披靡的张大刀
点击下方卡片,关注“自动驾驶之心”公众号
点击进入→自动驾驶之心【目标检测】技术交流群
yolov5一直作为目标检测的扛把子,训练快、效果好、易部署等优点让从入门小白到行业大佬都对其膜拜不已,而yolov5不仅限于目标检测,现在已经在分类、分割等其他任务上开始发力,这篇文章介绍下yolov5框架在分割任务上的应用以及相关代码的变动。
前言
1
Yolov5-seg的变动
01训练、测试、验证等的代码入口
|--train.py
|--val.py
|--pred.py
02 数据加载和预处理
|--dataset_seg
|--images
|--train
|--1.jpg
|--2.jpg
....
|--val
|--111.jpg
|--222.jpg
....
|--labels
|--train
|--1.txt
|--2.txt
....
|--val
|--111.txt
|--222.txt
....
segments = self.segments[index].copy()
if len(segments):
for i_s in range(len(segments)): # 将分割的点做相应的移动
segments[i_s] = xyn2xy(
segments[i_s],
ratio[0] * w,
ratio[1] * h,
padw=pad[0],
padh=pad[1])
同时将分割标签多点组成的polygons转换像素级的mask(utils/dataloaders 第156行):
nl = len(labels) # number of labels
if nl: # 标签存在
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) # 标签的
if self.overlap:
"""
将分割的多个标签保存成一个mask上size(h,w),会加快训练速度。
具体操作:
①先将每行polygon转换成对应图片大小的mask,像素值为1;
②将mask面积从大到小排列,并返回对应的index;
③将面积最大的mask像素赋值为1,第二大的mask像素赋值为2,以此类推,所有mask像素叠加成一层mask;(排序我的理解是为了尽可能避免mask重叠时的溢出)
④并将像素值控制在0到n(label数量)中(防止在下采样中像素点可能会有mask的重叠)。
这样可以将所有的mask保存成一个mask,并后期通过index和masks的像素值解析对应的标签。
"""
masks, sorted_idx = polygons2masks_overlap(img.shape[:2],
segments,
downsample_ratio=self.downsample_ratio)
masks = masks[None] # (640, 640) -> (1, 640, 640)
labels = labels[sorted_idx]
else:
"""
如需要将分割的标签保存成nl(标签数量)个mask size:(nl,h,w)
"""
masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] //
self.downsample_ratio, img.shape[1] //
self.downsample_ratio)
03 网络变动
class SegmentationModel(DetectionModel):
# YOLOv5 segmentation model
def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
super().__init__(cfg, ch, nc, anchors)
class Segment(Detect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
super().__init__(nc, anchors, ch, inplace)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.proto = Proto(ch[0], self.npr, self.nm) #上采样的protos
self.detect = Detect.forward
def forward(self, x):
p = self.proto(x[0]) # 分割模块
x = self.detect(self, x) #检测模块
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
04 loss
# Mask regression
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0]
marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized
mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) #mask的长宽与处理的标注保持一致
for bi in b.unique():
j = b == bi # 匹配索引
if self.overlap:
mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0)
else:
mask_gti = masks[tidxs[i]][j]
lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j])
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
# Mask loss for one image
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80)
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")# BCE loss
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() # 通过判断mask是否位于预测框内对loss约束
05 评价指标
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy
if len(stats) and stats[0].any():
results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) #计算ap
metrics.update(results) #更新
nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class
def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
Return correct prediction matrix
Arguments:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
labels (array[M, 5]), class, x1, y1, x2, y2
Returns:
correct (array[N, 10]), for 10 IoU levels
"""
if masks:
if overlap:
nl = len(labels)
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
if gt_masks.shape[1:] != pred_masks.shape[1:]:
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
gt_masks = gt_masks.gt_(0.5)
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
else: # boxes
iou = box_iou(labels[:, 1:], detections[:, :4]
2
Yolov5-seg训练及建议
# Single-GPU
python segment/train.py --weights yolov5s-seg.pt --data coco128-seg.yaml --epochs 5 --img 640
# Multi-GPU DDP
python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --weights yolov5s-seg.pt --data coco128-seg.yaml --epochs 5 --img 640 --device 0,1,2,3
bash data/scripts/get_coco.sh --val --segments # download COCO val segments split (780MB, 5000 images)
python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 # validat
python segment/predict.py --weights yolov5m-seg.pt --data data/images/bus.jp
python export.py --weights yolov5s-seg.pt --include onnx engine --img 640 --device 0
建议
结语
参考:
[1] https://github.com/ultralytics/yolov5
【自动驾驶之心】全栈技术交流群