其他
【他山之石】超轻量的YOLO-Nano
“他山之石,可以攻玉”,站在巨人的肩膀才能看得更高,走得更远。在科研的道路上,更需借助东风才能更快前行。为此,我们特别搜集整理了一些实用的代码链接,数据集,软件,编程技巧等,开辟“他山之石”专栏,助你乘风破浪,一路奋勇向前,敬请关注。
地址:https://www.zhihu.com/people/yang-jian-hua-63-91
01
1.1 Backbone
1.2 neck
p4 = self.smooth_0(p4 + F.interpolate(p5, scale_factor=2.0))
p3 = self.smooth_1(p3 + F.interpolate(p4, scale_factor=2.0))
# PAN
p4 = self.smooth_2(p4 + F.interpolate(p3, scale_factor=0.5))
p5 = self.smooth_3(p5 + F.interpolate(p4, scale_factor=0.5))
1.3 head
class YOLONano(nn.Module):
def __init__(self, device, input_size=None, num_classes=20, trainable=False, conf_thresh=0.001, nms_thresh=0.50, anchor_size=None, backbone='1.0x', diou_nms=False):
super(YOLONano, self).__init__()
self.device = device
self.input_size = input_size
self.num_classes = num_classes
self.trainable = trainable
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
self.nms_processor = self.diou_nms if diou_nms else self.nms
self.bk = backbone
self.stride = [8, 16, 32]
self.anchor_size = torch.tensor(anchor_size).view(3, len(anchor_size) // 3, 2)
self.anchor_number = self.anchor_size.size(1)
self.grid_cell, self.stride_tensor, self.all_anchors_wh = self.create_grid(input_size)
self.scale = np.array([[[input_size[1], input_size[0], input_size[1], input_size[0]]]])
self.scale_torch = torch.tensor(self.scale.copy(), device=device).float()
if self.bk == '0.5x':
# use shufflenetv2_0.5x as backbone
print('Use backbone: shufflenetv2_0.5x')
self.backbone = shufflenetv2(model_size=self.bk, pretrained=trainable)
width = 0.4138
elif self.bk == '1.0x':
# use shufflenetv2_1.0x as backbone
print('Use backbone: shufflenetv2_1.0x')
self.backbone = shufflenetv2(model_size=self.bk, pretrained=trainable)
width = 1.0
else:
print("For YOLO-Nano, we only support <0.5x, 1.0x> as our backbone !!")
exit(0)
# FPN+PAN
self.conv1x1_0 = Conv(int(116*width), 96, k=1)
self.conv1x1_1 = Conv(int(232*width), 96, k=1)
self.conv1x1_2 = Conv(int(464*width), 96, k=1)
self.smooth_0 = Conv(96, 96, k=3, p=1)
self.smooth_1 = Conv(96, 96, k=3, p=1)
self.smooth_2 = Conv(96, 96, k=3, p=1)
self.smooth_3 = Conv(96, 96, k=3, p=1)
# det head
self.head_det_1 = nn.Sequential(
Conv(96, 96, k=3, p=1, g=96),
Conv(96, 96, k=1),
Conv(96, 96, k=3, p=1, g=96),
Conv(96, 96, k=1),
nn.Conv2d(96, self.anchor_number * (1 + self.num_classes + 4), 1)
)
self.head_det_2 = nn.Sequential(
Conv(96, 96, k=3, p=1, g=96),
Conv(96, 96, k=1),
Conv(96, 96, k=3, p=1, g=96),
Conv(96, 96, k=1),
nn.Conv2d(96, self.anchor_number * (1 + self.num_classes + 4), 1)
)
self.head_det_3 = nn.Sequential(
Conv(96, 96, k=3, p=1, g=96),
Conv(96, 96, k=1),
Conv(96, 96, k=3, p=1, g=96),
Conv(96, 96, k=1),
nn.Conv2d(96, self.anchor_number * (1 + self.num_classes + 4), 1)
)
def forward(self, x, target=None):
# backbone
c3, c4, c5 = self.backbone(x)
# # neck
# c5 = self.spp(c5)
p3 = self.conv1x1_0(c3)
p4 = self.conv1x1_1(c4)
p5 = self.conv1x1_2(c5)
# FPN
p4 = self.smooth_0(p4 + F.interpolate(p5, scale_factor=2.0))
p3 = self.smooth_1(p3 + F.interpolate(p4, scale_factor=2.0))
# PAN
p4 = self.smooth_2(p4 + F.interpolate(p3, scale_factor=0.5))
p5 = self.smooth_3(p5 + F.interpolate(p4, scale_factor=0.5))
# det head
pred_s = self.head_det_1(p3)
pred_m = self.head_det_2(p4)
pred_l = self.head_det_3(p5)
preds = [pred_s, pred_m, pred_l]
total_conf_pred = []
total_cls_pred = []
total_txtytwth_pred = []
B = HW = 0
for pred in preds:
B_, abC_, H_, W_ = pred.size()
# [B, anchor_n * C, H, W] -> [B, H, W, anchor_n * C] -> [B, H*W, anchor_n*C]
pred = pred.permute(0, 2, 3, 1).contiguous().view(B_, H_*W_, abC_)
# Divide prediction to obj_pred, xywh_pred and cls_pred
# [B, H*W*anchor_n, 1]
conf_pred = pred[:, :, :1 * self.anchor_number].contiguous().view(B_, H_*W_*self.anchor_number, 1)
# [B, H*W*anchor_n, num_cls]
cls_pred = pred[:, :, 1 * self.anchor_number : (1 + self.num_classes) * self.anchor_number].contiguous().view(B_, H_*W_*self.anchor_number, self.num_classes)
# [B, H*W*anchor_n, 4]
txtytwth_pred = pred[:, :, (1 + self.num_classes) * self.anchor_number:].contiguous()
total_conf_pred.append(conf_pred)
total_cls_pred.append(cls_pred)
total_txtytwth_pred.append(txtytwth_pred)
B = B_
HW += H_*W_
conf_pred = torch.cat(total_conf_pred, 1)
cls_pred = torch.cat(total_cls_pred, 1)
txtytwth_pred = torch.cat(total_txtytwth_pred, 1).view(B, -1, 4)
# train
if self.trainable:
txtytwth_pred = txtytwth_pred.view(B, HW, self.anchor_number, 4)
x1y1x2y2_pred = (self.decode_boxes(txtytwth_pred) / self.scale_torch).view(-1, 4)
x1y1x2y2_gt = target[:, :, 7:].view(-1, 4)
# compute iou
iou_pred = tools.iou_score(x1y1x2y2_pred, x1y1x2y2_gt, batch_size=B)
txtytwth_pred = txtytwth_pred.view(B, -1, 4)
# compute loss
conf_loss, cls_loss, bbox_loss, total_loss = tools.loss(pred_conf=conf_pred,
pred_cls=cls_pred,
pred_txtytwth=txtytwth_pred,
label=target,
num_classes=self.num_classes
)
return conf_loss, cls_loss, bbox_loss, total_loss
# test
else:
txtytwth_pred = txtytwth_pred.view(B, HW, self.anchor_number, 4)
with torch.no_grad():
# batch size = 1
all_obj = torch.sigmoid(conf_pred)[0] # 0 is because that these is only 1 batch.
all_bbox = torch.clamp((self.decode_boxes(txtytwth_pred) / self.scale_torch)[0], 0., 1.)
all_class = (torch.softmax(cls_pred[0, :, :], dim=1) * all_obj)
# separate box pred and class conf
all_obj = all_obj.to('cpu').numpy()
all_class = all_class.to('cpu').numpy()
all_bbox = all_bbox.to('cpu').numpy()
bboxes, scores, cls_inds = self.postprocess(all_bbox, all_class)
# print(len(all_boxes))
return bboxes, scores, cls_inds
02
03
04
4.1 VOC
4.2 COCO
05
06
“他山之石”历史文章
MMAction2: 新一代视频理解工具箱
TensorFlow神经网络实现二分类的正确姿势
人类早期驯服野生机器学习模型的珍贵资料
不会强化学习,只会numpy,能解决多难的RL问题?
技术总结《OpenAI Gym》
ROC和CMC曲线的理解(FAR, FRR的理解)
pytorch使用hook打印中间特征图、计算网络算力等
Ray和Pytorch Lightning 使用指北
如何在科研论文中画出漂亮的插图?
PyTorch 源码解读之 torch.optim:优化算法接口详解
AI框架基础技术之深度学习中的通信优化
SimCLR:用于视觉表征的对比学习框架
Pytorch Autograd与计算图
tensorflow2.4性能调优最佳实践
PyTorch | DDP系列:入门教程、实现原理与源代码解析、实战与技巧
更多他山之石专栏文章,
请点击文章底部“阅读原文”查看
分享、点赞、在看,给个三连击呗!