其他
【他山之石】PyTorch trick 集锦
作者:赵剑行
地址:https://www.zhihu.com/people/zhao-xiao-de-93
01
指定GPU编号
02
查看模型每层输出详情
from torchsummary import summary
summary(your_model, input_size=(channels, H, W))
03
梯度裁剪(Gradient Clipping)
import torch.nn as nn
outputs = model(data)
loss= loss_fn(outputs, target)
optimizer.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=20, norm_type=2)
optimizer.step()
parameters – 一个基于变量的迭代器,会进行梯度归一化 max_norm – 梯度的最大范数 norm_type – 规定范数的类型,默认为L2
04
扩展单张图片维度
import cv2
import torch
image = cv2.imread(img_path)
image = torch.tensor(image)
print(image.size())
img = image.view(1, *image.size())
print(img.size())
# output:
# torch.Size([h, w, c])
# torch.Size([1, h, w, c])
import cv2
import numpy as np
image = cv2.imread(img_path)
print(image.shape)
img = image[np.newaxis, :, :, :]
print(img.shape)
# output:
# (h, w, c)
# (1, h, w, c)
import cv2
import torch
image = cv2.imread(img_path)
image = torch.tensor(image)
print(image.size())
img = image.unsqueeze(dim=0)
print(img.size())
img = img.squeeze(dim=0)
print(img.size())
# output:
# torch.Size([(h, w, c)])
# torch.Size([1, h, w, c])
# torch.Size([h, w, c])
05
独热编码
import torch
class_num = 8
batch_size = 4
def one_hot(label):
"""
将一维列表转换为独热编码
"""
label = label.resize_(batch_size, 1)
m_zeros = torch.zeros(batch_size, class_num)
# 从 value 中取值,然后根据 dim 和 index 给相应位置赋值
onehot = m_zeros.scatter_(1, label, 1) # (dim,index,value)
return onehot.numpy() # Tensor -> Numpy
label = torch.LongTensor(batch_size).random_() % class_num # 对随机数取余
print(one_hot(label))
# output:
[[0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0.]]
06
防止验证模型时爆显存
with torch.no_grad():
# 使用model进行预测的代码
pass
Pytorch 训练时无用的临时变量可能会越来越多,导致 out of memory ,可以使用下面语句来清理这些不需要的变量。
Releases all unoccupied cached memory currently held by the caching allocator so that those can be used in other GPU application and visible innvidia-smi. torch.cuda.empty_cache()
07
学习率衰减
import torch.optim as optim
from torch.optim import lr_scheduler
# 训练前的初始化
optimizer = optim.Adam(net.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, 10, 0.1) # # 每过10个epoch,学习率乘以0.1
# 训练过程中
for n in n_epoch:
scheduler.step()
...
提醒一点就是参数 mode='min' 还是'max',取决于优化的的损失还是准确率,即使用 scheduler.step(loss)还是scheduler.step(acc) 。
08
冻结某些层的参数
net = Network() # 获取自定义网络结构
for name, value in net.named_parameters():
print('name: {0},\t grad: {1}'.format(name, value.requires_grad))
name: cnn.VGG_16.convolution1_1.weight, grad: True
name: cnn.VGG_16.convolution1_1.bias, grad: True
name: cnn.VGG_16.convolution1_2.weight, grad: True
name: cnn.VGG_16.convolution1_2.bias, grad: True
name: cnn.VGG_16.convolution2_1.weight, grad: True
name: cnn.VGG_16.convolution2_1.bias, grad: True
name: cnn.VGG_16.convolution2_2.weight, grad: True
name: cnn.VGG_16.convolution2_2.bias, grad: True
no_grad = [
'cnn.VGG_16.convolution1_1.weight',
'cnn.VGG_16.convolution1_1.bias',
'cnn.VGG_16.convolution1_2.weight',
'cnn.VGG_16.convolution1_2.bias'
]
net = Net.CTPN() # 获取网络结构
for name, value in net.named_parameters():
if name in no_grad:
value.requires_grad = False
else:
value.requires_grad = True
name: cnn.VGG_16.convolution1_1.weight, grad: False
name: cnn.VGG_16.convolution1_1.bias, grad: False
name: cnn.VGG_16.convolution1_2.weight, grad: False
name: cnn.VGG_16.convolution1_2.bias, grad: False
name: cnn.VGG_16.convolution2_1.weight, grad: True
name: cnn.VGG_16.convolution2_1.bias, grad: True
name: cnn.VGG_16.convolution2_2.weight, grad: True
name: cnn.VGG_16.convolution2_2.bias, grad: True
optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=0.01)
09
对不同层使用不同学习率
net = Network() # 获取自定义网络结构
for name, value in net.named_parameters():
print('name: {}'.format(name))
# 输出:
# name: cnn.VGG_16.convolution1_1.weight
# name: cnn.VGG_16.convolution1_1.bias
# name: cnn.VGG_16.convolution1_2.weight
# name: cnn.VGG_16.convolution1_2.bias
# name: cnn.VGG_16.convolution2_1.weight
# name: cnn.VGG_16.convolution2_1.bias
# name: cnn.VGG_16.convolution2_2.weight
# name: cnn.VGG_16.convolution2_2.bias
conv1_params = []
conv2_params = []
for name, parms in net.named_parameters():
if "convolution1" in name:
conv1_params += [parms]
else:
conv2_params += [parms]
# 然后在优化器中进行如下操作:
optimizer = optim.Adam(
[
{"params": conv1_params, 'lr': 0.01},
{"params": conv2_params, 'lr': 0.001},
],
weight_decay=1e-3,
)
10
模型相关操作
11
Pytorch内置one_hot函数
import torch.nn.functional as F
import torch
tensor = torch.arange(0, 5) % 3 # tensor([0, 1, 2, 0, 1])
one_hot = F.one_hot(tensor)
# 输出:
# tensor([[1, 0, 0],
# [0, 1, 0],
# [0, 0, 1],
# [1, 0, 0],
# [0, 1, 0]])
tensor = torch.arange(0, 5) % 3 # tensor([0, 1, 2, 0, 1])
one_hot = F.one_hot(tensor, num_classes=5)
# 输出:
# tensor([[1, 0, 0, 0, 0],
# [0, 1, 0, 0, 0],
# [0, 0, 1, 0, 0],
# [1, 0, 0, 0, 0],
# [0, 1, 0, 0, 0]])
12
网络参数初始化
以下介绍两种常用的初始化操作。
(1) 使用pytorch内置的torch.nn.init方法。
init.xavier_uniform(net1[0].weight)
for layer in net1.modules():
if isinstance(layer, nn.Linear): # 判断是否是线性层
param_shape = layer.weight.shape
layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))
# 定义为均值为 0,方差为 0.5 的正态分布
13
加载内置预训练模型
AlexNet VGG ResNet SqueezeNet DenseNet
import torchvision.models as models
resnet18 = models.resnet18()
alexnet = models.alexnet()
vgg16 = models.vgg16()
import torchvision.models as models
resnet18 = models.resnet18(pretrained=True)
alexnet = models.alexnet(pretrained=True)
vgg16 = models.vgg16(pretrained=True)
更多的模型可以查看
https://pytorch-cn.readthedocs.io/zh/latest/torchvision/torchvision-models/
历史文章推荐
太牛逼了!一位中国博士把整个CNN都给可视化了,每个细节看的清清楚楚!
Nature发表牛津博士建议:我希望在读博士之初时就能知道的20件事
沈向洋、华刚:读科研论文的三个层次、四个阶段与十个问题
如何看待2021年秋招算法岗灰飞烟灭?
独家解读 | ExprGAN:基于强度可控的表情编辑
独家解读 | 矩阵视角下的BP算法
独家解读 | Capsule Network深度解读
独家解读 | Fisher信息度量下的对抗攻击
论文解读 | 知识图谱最新研究综述
你的毕业论文过了吗?《如何撰写毕业论文?》
卡尔曼滤波系列——经典卡尔曼滤波推导
一代传奇 SIFT 算法 专利到期!
人体姿态估计的过去,现在,未来
给研究新生的建议,光看论文是学不好的,一定要看书,看书,看书!
分享、点赞、在看,给个三连击呗!