其他
【强基固本】浅谈图像分割调优
“强基固本,行稳致远”,科学研究离不开理论基础,人工智能学科更是需要数学、物理、神经科学等基础学科提供有力支撑,为了紧扣时代脉搏,我们推出“强基固本”专栏,讲解AI领域的基础知识,为你的科研学习提供助力,夯实理论基础,提升原始创新能力,敬请关注。
来源:知乎—FlyEgle
初版代码已经提交,欢迎大家提issue和pr
DeepLabV3 U2Net HRNet-seg
这里DeepLabV3有空洞卷积存在,对于细致的扣图,效果不是很好,更加适用于连通性比较强的物体分割以及多类别分割。 HRNet-seg存在一个问题,最后输出的featuremap分别是[1/4, 1/8, 1/16, 1/32],虽然是有不断的高低分辨率的交互,但是1/4还是有点捉襟见肘,会影响一些小的pixel,空洞以及边缘效果。所以做了简单的修该如下:
FPN+upsmaple形式
upsmaple+cat
# ----------------- DICE Loss--------------------class DiceLoss(nn.Module):
def __init__(self):
super(DiceLoss, self).__init__()
def forward(self, logits, targets, mask=False):
num = targets.size(0)
smooth = 1.
probs = torch.sigmoid(logits)
m1 = probs.view(num, -1)
m2 = targets.view(num, -1)
intersection = (m1 * m2)
score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
score = 1 - score.sum() / num
return score# -------------------- BCELoss -----------------------class BCELoss(nn.Module):
"""binary bceloss with sigmoid"""
def __init__(self):
super(BCELoss, self).__init__()
def forward(self, inputs, targets, weights=None, mask=False):
assert len(inputs.shape) == 4, "inputs shape must be NCHW"
if len(targets.shape) != 4:
targets = targets.unsqueeze(1).float()
else:
targets = targets.float()
if mask:
inputs = inputs * targets
losses = F.binary_cross_entropy_with_logits(inputs, targets, weights)
return losses# ----------------- DICE+BCE Loss--------------------class DiceWithBCELoss(nn.Module):
def __init__(self, weights, mining=False):
super(DiceWithBCELoss, self).__init__()
self.dice_loss = DiceLoss()
if mining:
self.bce_loss = BalanceCrossEntropyLoss()
else:
self.bce_loss = BCELoss()
self.weights = weights
def forward(self, preds, targets):
bceloss = self.bce_loss(preds, targets)
diceloss = self.dice_loss(preds, targets)
return self.weights['bce'] * bceloss + self.weights['dice']*diceloss
1. 分辨率
2. 数据增强
def build_transformers(crop_size=(320, 320)):
if isinstance(crop_size, int):
crop_size = (crop_size, crop_size)
data_aug = [
# RandomCropScale(scale_size=crop_size, scale=(0.4, 1.0)),
RandomCropScale2(scale_size=crop_size, scale=(0.3, 1.2), prob=0.5),
RandomHorizionFlip(p=0.5),
RandomRotate(degree=15, mode=0),
RandomGaussianBlur(p=0.2),
]
to_tensor = [
Normalize(normalize=True, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
ToTensor(channel_first=True)
]
final_aug = data_aug + to_tensor
return Compose(final_aug)
比较重要的一点的是randomcrop,调整了crop的区域为[0.3,1.2],一方面是因为crop区域太小,容易忽视整体性,另一方面是crop大一些可以相应的对应大分辨率。要注意的是,crop的区域是需要包含前景,可以通过设定前景占比来进行调整,也可以理解为手动balance数据。
class RandomCropScale2:
"""RandomCrop with Scale the images & targets, if not crop fit size, need to switch the prob to do reisze to keep the over figure scale_size : (list) a sequence of scale scale : default is (0.08, 1.0), crop region areas ratio : default is (3. / 4., 4. / 3.), ratio for width / height Returns: scale_image : (ndarray) crop and scale image scale_target: (ndarray) crop and scale target, shape is same with image """
def __init__(self, scale_size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), prob=0.5):
self.scale_size = scale_size
self.scale = scale
self.ratio = ratio
# self.prob = np.random.uniform(0, 1) > prob
self.prob = prob
self.scale_func = Scale(self.scale_size)
# center crop
# self.centercrop = CenterCrop(self.scale_size)
if (self.scale[0] > self.scale[1]) or (self.ratio[0] > self.ratio[1]):
warnings.warn("Scale and ratio should be of kind (min, max)")
def _isBG(self, tgts):
"""If the targets all is 0, 0 is background """
if np.sum(tgts) == 0:
return True
else:
return False
# TODO: fix empty bug
def _crop_imgs(self, imgs, tgts):
height, width, _ = imgs.shape
area = height * width
for _ in range(10):
target_area = area * np.random.uniform(self.scale[0], self.scale[1])
aspect_ratio = np.random.uniform(self.ratio[0], self.ratio[1])
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < w < width and 0 < h < height:
random_y = np.random.randint(0, height - h + 1)
random_x = np.random.randint(0, width - w + 1)
crop_image = imgs[random_y:random_y+h, random_x:random_x+w]
crop_target = tgts[random_y:random_y+h, random_x:random_x+w]
if not self._isBG(crop_target):
crop_image, crop_target = self.scale_func(crop_image, crop_target)
return crop_image, crop_target
# switch prob or center crop
if np.random.uniform(0, 1) > self.prob:
# center crop
in_ratio = float(width) / float(height)
if in_ratio < min(self.ratio):
w = width
h = int(round(w / min(self.ratio)))
elif in_ratio > max(self.ratio):
h = height
w = int(round(h * max(self.ratio)))
else:
w = width
h = height
# navie center crop
crop_x = max((width - w) // 2, 0)
crop_y = max((height - h) // 2, 0)
imgs = imgs[crop_y:crop_y+height, crop_x:crop_x+width]
tgts = tgts[crop_y:crop_y+height, crop_x:crop_x+width]
# scale
crop_image, crop_target = self.scale_func(imgs, tgts)
return crop_image, crop_target
def __call__(self, imgs, tgts):
crop_image, crop_target = self._crop_imgs(imgs, tgts)
return crop_image, crop_target
3. 数据
output[output >= thre] = 1 or None
output[output < thre] = 0
def edgePostProcess(mask, image):
"""Edge post Process Args: mask: a ndarray map, value is [0,255], shape is (h, w, 3) image: a ndarray map, value is 0-255, shape is(h, w, 3) Returns: outputs: edge blur image """
mask[mask==255] = 1
mask = getShrink(mask)
image = image * mask
image[image==0] = 255
blur_image = cv2.GaussianBlur(image, (5, 5), 0)
new_mask = np.zeros(image.shape, np.uint8)
contours, hierachy = cv2.findContours(
mask[:,:,0],
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
cv2.drawContours(new_mask, contours, -1, (255, 255, 255), 5)
output = np.where(new_mask==np.array([255, 255, 255]), blur_image, image)
return output
“强基固本”历史文章
理解Jacobian矩阵与行列式
理解Tensor Core
损失函数 | 交叉熵损失函数
多场景建模
深度学习优化背后包含哪些数学知识?
利用宇宙的能力来处理数据!「物理网络」远胜深度神经网络
入门 | 异常检测Anomaly Detection
通俗易懂的解释Sparse Convolution过程
现在的人工智能是否走上了数学的极端?
神经网络训练中的拓扑演化
一文详解colmap中的多视图重建算法原理
深度学习、计算机视觉面试题合集
大白话用Transformer做Object Detection
手把手教你学DBNet
更多强基固本专栏文章,
请点击文章底部“阅读原文”查看
分享、点赞、在看,给个三连击呗!