其他
【强基固本】常用卷积神经网络巡礼(论文详解+代码实现)
“强基固本,行稳致远”,科学研究离不开理论基础,人工智能学科更是需要数学、物理、神经科学等基础学科提供有力支撑,为了紧扣时代脉搏,我们推出“强基固本”专栏,讲解AI领域的基础知识,为你的科研学习提供助力,夯实理论基础,提升原始创新能力,敬请关注。
01
1.1 证明了传统卷积神经网络(CNN)的退化(degradation)现象: 随着网络层级加深,训练准确率趋于饱和(反向传播的梯度下降).
2. Reflection
import torch as t
import torch.nn as nn
def Conv1(in_planes, places, stride=2):
return nn.Sequential(
nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
nn.BatchNorm2d(places),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
class Bottleneck(nn.Module):
def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
super(Bottleneck,self).__init__()
self.expansion = expansion
self.downsampling = downsampling
self.bottleneck = nn.Sequential(
nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
nn.BatchNorm2d(places),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(places),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(places*self.expansion),
)
if self.downsampling:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(places*self.expansion)
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
residual = x
out = self.bottleneck(x)
if self.downsampling:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,blocks, num_classes=1000, expansion = 4):
super(ResNet,self).__init__()
self.expansion = expansion
self.conv1 = Conv1(in_planes = 3, places= 64)
self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(2048,num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def make_layer(self, in_places, places, block, stride):
layers = []
layers.append(Bottleneck(in_places, places,stride, downsampling =True))
for i in range(1, block):
layers.append(Bottleneck(places*self.expansion, places))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def ResNet50():
return ResNet([3, 4, 6, 3])#ResNet中每个Layer的Bottleneck数
net = ResNet50()
02
TDC+PC = DF x DF x M x DK x DK + DF x DF x M x N Tconv+conv = DF x DF x M x N x DK x DK
import torch as t
import torch.nn as nn
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expansion, downsample=None):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
self.use_res_connect = self.stride == 1 and inp == oup
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, inp * expansion, 1, 1, 0, bias=False),
nn.BatchNorm2d(inp * expansion),
nn.ReLU6(inplace=True),
# dw
nn.Conv2d(inp * expansion, inp * expansion, 3, stride, 1, groups=inp * expansion, bias=False),
nn.BatchNorm2d(inp * expansion),
nn.ReLU6(inplace=True),
# pw-linear
nn.Conv2d(inp * expansion, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNet(nn.Module):
def __init__(self, blocks, num_classes=1000,inchannels=32):
super(MobileNet, self).__init__()
self.inchannels = inchannels
self.head_conv = nn.Sequential(nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU())
self.layer1 = self.make_layer(blocks[0],outchannels=16,stride=1,expansion=1)
self.layer2 = self.make_layer(blocks[1],outchannels=24,stride=2,expansion=6)
self.layer3 = self.make_layer(blocks[2],outchannels=32,stride=2,expansion=6)
self.layer4 = self.make_layer(blocks[3],outchannels=64,stride=2,expansion=6)
self.layer5 = self.make_layer(blocks[4],outchannels=96,stride=1,expansion=6)
self.layer6 = self.make_layer(blocks[5],outchannels=160,stride=2,expansion=6)
self.layer7 =self.make_layer(blocks[6],outchannels=320,stride=1,expansion=6)
self.end_conv_avgpool_conv = nn.Sequential(nn.Conv2d(320, 1280, kernel_size=1, stride=1, bias=False),
nn.AvgPool2d(7,stride=1),
nn.Conv2d(1280,num_classes,kernel_size=1,stride=1))
def make_layer(self, block, outchannels, stride, expansion):
downsample_ = nn.Sequential(
nn.Conv2d(self.inchannels,outchannels,kernel_size=1,stride=stride),
nn.BatchNorm2d(outchannels)
)
layers = []
layers.append(InvertedResidual(self.inchannels, outchannels, expansion=expansion, stride=stride, downsample=downsample_))
self.inchannels = outchannels
for i in range(1, block):
layers.append(InvertedResidual(self.inchannels, outchannels, expansion=expansion, stride=stride))
return nn.Sequential(*layers)
def forward(self, x):
x = self.head_conv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
x = self.end_conv_avgpool_conv(x)
x = x.view(x.size(0), -1)
return x
def MobileNetV2():
return MobileNet([1,2,3,4,3,3,1])
net=MobileNetV2()
03
1. Highlights
Deepwise Conv: H’, W’, C分别为输入feature_map的高、宽、通道数,经过DeepwiseConv之后转换为H,W,C的格式 Fsq为squeeze:通过一个全局平均池化层将H x W x C的输入转换为1 x 1 x C的输出 Fex为excitation:通过1x1Conv -> swish -> 1x1Conv -> sigmoid激活,格式仍然为1 x 1 x c Fscale为squeeze-and-excitation的输入和输出相乘,得到MBConv输出格式为H x W x C,其后接Conv -> Batch Normalization
Stem -> layer1: { MBConv-start -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 2
-> layer2: { MBConv -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 5
-> layer3: { MBConv -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 5
-> layer4: { MBConv -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 8
-> layer5: { MBConv -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 8
-> layer6: { MBConv -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 11
-> layer7: { MBConv -> MBConv -> Dropconnect } -> { MBConv -> Dropconnect } x 2
-> Final Layer
图中Layer2-7为循环,即Layer2完成后进入Layer3,Layer3之后进入Layer4,直至Layer7完成才进入Final Layer x2表示箭头方向连接循环2次 黑白+表示快速链接
import math
import torch as t
from torch import nn
from torch.nn import functional as F
from functools import partial
class SwishImplementation(t.autograd.Function):
#staticmethod
def forward(ctx, i):
result = i * t.sigmoid(i)
ctx.save_for_backward(i)
return result
#staticmethod
def backward(ctx, grad_output):
i = ctx.saved_tensors[0]#saved_variables
sigmoid_i = t.sigmoid(i)
return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
class MemoryEfficientSwish(nn.Module):
def forward(self, x):
return SwishImplementation.apply(x)
def drop_connect(inputs, p, training):
#Drop connect
if not training: return inputs
batch_size = inputs.shape[0]
keep_prob = 1 - p
random_tensor = keep_prob
random_tensor += t.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
binary_tensor = t.floor(random_tensor)
output = inputs / keep_prob * binary_tensor
return output
def get_same_padding_conv2d(image_size=None):
return partial(Conv2dStaticSamePadding, image_size=image_size)
def get_width_and_height_from_size(x):
#Obtains width and height from a int or tuple
if isinstance(x, int): return x, x
if isinstance(x, list) or isinstance(x, tuple): return x
else: raise TypeError()
def calculate_output_image_size(input_image_size, stride):
#计算出 Conv2dSamePadding with a stride.
if input_image_size is None: return None
image_height, image_width = get_width_and_height_from_size(input_image_size)
stride = stride if isinstance(stride, int) else stride[0]
image_height = int(math.ceil(image_height / stride))
image_width = int(math.ceil(image_width / stride))
return [image_height, image_width]
class Conv2dStaticSamePadding(nn.Conv2d):
#2D Convolutions like TensorFlow, for a fixed image size
def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
super().__init__(in_channels, out_channels, kernel_size, **kwargs)
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
# Calculate padding based on image size and save it
assert image_size is not None
ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
kh, kw = self.weight.size()[-2:]
sh, sw = self.stride
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
if pad_h > 0 or pad_w > 0:
self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
else:
self.static_padding = Identity()
def forward(self, x):
x = self.static_padding(x)
x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
return x
class Identity(nn.Module):
def __init__(self, ):
super(Identity, self).__init__()
def forward(self, input):
return input
# MBConvBlock
class MBConvBlock(nn.Module):
# ksize3*3 输入32 输出16 conv1 stride步长1
def __init__(self, ksize, input_filters, output_filters, expand_ratio, stride, image_size=None):
super().__init__()
self._bn_mom = 0.1
self._bn_eps = 0.01
self._se_ratio = 0.25
self._input_filters = input_filters
self._output_filters = output_filters
self._expand_ratio = expand_ratio
self._kernel_size = ksize
self._stride = stride
inp = self._input_filters
oup = self._input_filters * self._expand_ratio
# Depthwise convolution
if self._expand_ratio != 1:
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
k = self._kernel_size
s = self._stride
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._depthwise_conv = Conv2d(
in_channels=oup, out_channels=oup, groups=oup,
kernel_size=k, stride=s, bias=False)
self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
image_size = calculate_output_image_size(image_size, s)
# Squeeze and Excitation layer, if desired
Conv2d = get_same_padding_conv2d(image_size=(1,1))
num_squeezed_channels = max(1, int(self._input_filters * self._se_ratio))
self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
# Output phase
final_oup = self._output_filters
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
self._swish = MemoryEfficientSwish()
def forward(self, inputs, drop_connect_rate=None):
#:param inputs: input tensor
#:param drop_connect_rate: drop connect rate (float, between 0 and 1)
#:return: output of block
# Expansion and Depthwise Convolution
x = inputs
if self._expand_ratio != 1:
expand = self._expand_conv(inputs)
bn0 = self._bn0(expand)
x = self._swish(bn0)
depthwise = self._depthwise_conv(x)
bn1 = self._bn1(depthwise)
x = self._swish(bn1)
# Squeeze and Excitation
x_squeezed = F.adaptive_avg_pool2d(x, 1)
x_squeezed = self._se_reduce(x_squeezed)
x_squeezed = self._swish(x_squeezed)
x_squeezed = self._se_expand(x_squeezed)
x = t.sigmoid(x_squeezed) * x
x = self._bn2(self._project_conv(x))
# Skip connection and drop connect
input_filters, output_filters = self._input_filters, self._output_filters
if self._stride == 1 and input_filters == output_filters:
if drop_connect_rate:
x = drop_connect(x, p=drop_connect_rate, training=self.training)
x = x + inputs # skip connection
return x
class EfficientNet(nn.Module):
def __init__(self, cfgs, num_classes=1000, image_size=224):
super().__init__()
bn_mom = 0.01
bn_eps = 0.001
self.cfgs = cfgs
# stem
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._conv_stem = Conv2d(3, 32, kernel_size=3, stride=2, bias=False)
self._bn0 = nn.BatchNorm2d(num_features=32, momentum=bn_mom, eps=bn_eps)
# MBConv
self._blocks = nn.ModuleList([])
for expand, ksize, input_filters, output_filters, stride, image_size in self.cfgs:
self._blocks.append(MBConvBlock(ksize, input_filters, output_filters, expand, stride, image_size))
# Head
in_channels = self.cfgs[-1][3]
out_channels = in_channels * 4
image_size = self.cfgs[-1][-1]
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
# Final linear layer
self._avg_pooling = nn.AdaptiveAvgPool2d(1)
self._dropout = nn.Dropout(0.2)
self._fc = nn.Linear(out_channels, num_classes)
self._swish = MemoryEfficientSwish()
def _make_MBConv(self, inputs):
x = self._swish(inputs)
# Blocks
for idx, block in enumerate(self._blocks):
drop_connect_rate = 0.2
drop_connect_rate *= float(idx) / len(self._blocks)
x = block(x, drop_connect_rate=drop_connect_rate)
return x
def forward(self, inputs):
# Stem
x = self._conv_stem(inputs)
x = self._bn0(x)
# Convolution layers
x = self._make_MBConv(x)
# Head
x = self._conv_head(x)
s = self._bn1(x)
x = self._swish(x)
# Pooling and final linear layer
x = self._avg_pooling(x)
x = x.view(inputs.size(0), -1)
x = self._dropout(x)
x = self._fc(x)
return x
def EfficientNetB7(**kwargs):
MBConv_cfgs = [
# expand_ratio, ksize, input, output, expand, stride, image_size
# layer1 1 4
# MBConv1, k3*3, inputChannels, outputChannels, stride, Resolution
[1, 3, 32, 16, 1, [112, 112]],
[6,3,16,16,1,[112,112]],
[6,3,16,16,1,[112,112]],
[6,3,16,16,1,[112,112]],
# layer2 2 7
# MBConv6, k3*3, inputChannels, outputChannels, stride, Resolution
[6, 3, 16, 24, 2, [112, 112]],
[6, 3, 24, 24, 1, [56, 56]],
[6,3,24,24,1,[56,56]],
[6,3,24,24,1,[56,56]],
[6,3,24,24,1,[56,56]],
[6,3,24,24,1,[56,56]],
[6,3,24,24,1,[56,56]],
# layer3 2 7
[6, 5, 24, 40, 2, [56, 56]],
[6, 5, 40, 40, 1, [28, 28]],
[6,5,40,40,1,[28,28]],
[6,5,40,40,1,[28,28]],
[6,5,40,40,1,[28,28]],
[6,5,40,40,1,[28,28]],
[6,5,40,40,1,[28,28]],
# layer4 3 10
[6, 3, 40, 80, 2, [28, 28]],
[6, 3, 80, 80, 1, [14, 14]],
[6, 3, 80, 80, 1, [14, 14]],
[6,3,80,80,1,[14,14]],
[6,3,80,80,1,[14,14]],
[6,3,80,80,1,[14,14]],
[6,3,80,80,1,[14,14]],
[6,3,80,80,1,[14,14]],
[6,3,80,80,1,[14,14]],
[6,3,80,80,1,[14,14]],
# layer5 3 10 #第一个MBConv的stride=1
[6, 5, 80, 112, 1, [14, 14]],
[6, 5, 112, 112, 1, [14, 14]],
[6, 5, 112, 112, 1, [14, 14]],
[6,5,112,112,1,[14,14]],
[6,5,112,112,1,[14,14]],
[6,5,112,112,1,[14,14]],
[6,5,112,112,1,[14,14]],
[6,5,112,112,1,[14,14]],
[6,5,112,112,1,[14,14]],
[6,5,112,112,1,[14,14]],
# layer6 4 13
[6, 5, 112, 192, 2, [14, 14]],
[6, 5, 192, 192, 1, [7, 7]],
[6, 5, 192, 192, 1, [7, 7]],
[6, 5, 192, 192, 1, [7, 7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
[6,5,192,192,1,[7,7]],
# layer7 1 4 #第一个MBConv的stride=1
[6, 3, 192, 320, 1, [7, 7]],
[6,3,320,320,1,[7,7]],
[6,3,320,320,1,[7,7]],
[6,3,320,320,1,[7,7]],
]
return EfficientNet(MBConv_cfgs, **kwargs)
net=EfficientNetB7()
本文目的在于学术交流,并不代表本公众号赞同其观点或对其内容真实性负责,版权归原作者所有,如有侵权请告知删除。
“强基固本”历史文章
强化学习入门简述
深度学习分类任务常用评估指标
漫谈什么是AI框架?
深度学习检测小目标常用方法
CNN网络结构的发展
传统图机器学习特征提取方法 -- 基于节点水平的特征(Node-level)
GNN | GCN-谱图卷积从零开始
神经网络如何模拟任意函数?
GMM: Gaussian Mixed Model(高斯混合模型)
深挖一下F1 score (F-measure, F-score)
浅谈拉格朗日乘子法
通用矩阵乘(GEMM)优化算法
深层学习为何要“Deep”
神经网络反向传播的数学原理
更多强基固本专栏文章,
请点击文章底部“阅读原文”查看
分享、点赞、在看,给个三连击呗!