其他
【强基固本】深度学习从入门到放飞自我:完全解析triplet loss
“强基固本,行稳致远”,科学研究离不开理论基础,人工智能学科更是需要数学、物理、神经科学等基础学科提供有力支撑,为了紧扣时代脉搏,我们推出“强基固本”专栏,讲解AI领域的基础知识,为你的科研学习提供助力,夯实理论基础,提升原始创新能力,敬请关注。
地址:https://www.zhihu.com/people/liu-xin-chen-64
01
02
anchor是基准 positive是针对anchor的正样本,表示与anchor来自同一个人 negative是针对anchor的负样本
具有相同label的样本,它们的embedding在embedding空间尽可能接近 具有不同label的样本,它们的embedding距离尽可能拉远
03
easy triplets: 此时loss为 ,这种情况是我们最希望看到的,可以理解成是容易分辨的triplets。即 hard triplets: 此时negative比positive更接近anchor,这种情况是我们最不希望看到的,可以理解成是处在模糊区域的triplets。即 semi-hard triplets: 此时negative比positive距离anchor更远,但是距离差没有达到一个margin,可以理解成是一定会被误识别的triplets。
04
batch all:挑选出所有合规的triplets,将hard triplets和semi-hard triplets的loss平均 这个过程产生了 个合规的triplets,其中anchor有 个,positive 个,negative 个 batch hard: 对于每个anchor,挑选出hardest positive ( 距离最大)和hardest negative ( 距离最小)。 这个过程产生了 个triplets 这种方式得到的triplets被称为这个batch中的hardest triplets
05
anchor_output = ... # shape [None, 128]
positive_output = ... # shape [None, 128]
negative_output = ... # shape [None, 128]
d_pos = tf.reduce_sum(tf.square(anchor_output - positive_output), 1)
d_neg = tf.reduce_sum(tf.square(anchor_output - negative_output), 1)
loss = tf.maximum(0.0, margin + d_pos - d_neg)
loss = tf.reduce_mean(loss)
def _pairwise_distance(embeddings, squared=False):
'''
计算两两embedding的距离
------------------------------------------
Args:
embedding: 特征向量, 大小(batch_size, vector_size)
squared: 是否距离的平方,即欧式距离
Returns:
distances: 两两embeddings的距离矩阵,大小 (batch_size, batch_size)
'''
# 矩阵相乘,得到(batch_size, batch_size),因为计算欧式距离|a-b|^2 = a^2 -2ab + b^2,
# 其中 ab 可以用矩阵乘表示
dot_product = tf.matmul(embeddings, tf.transpose(embeddings))
# dot_product对角线部分就是 每个embedding的平方
square_norm = tf.diag_part(dot_product)
# |a-b|^2 = a^2 - 2ab + b^2
# tf.expand_dims(square_norm, axis=1)是(batch_size, 1)大小的矩阵,减去 (batch_size, batch_size)大小的矩阵,相当于每一列操作
distances = tf.expand_dims(square_norm, axis=1) - 2.0 * dot_product + tf.expand_dims(square_norm, axis=0)
distances = tf.maximum(distances, 0.0) # 小于0的距离置为0
if not squared: # 如果不平方,就开根号,但是注意有0元素,所以0的位置加上 1e*-16
distances = distances + mask * 1e-16
distances = tf.sqrt(distances)
distances = distances * (1.0 - mask) # 0的部分仍然置为0
return distances
的 必须不等(也就是 不能是同一个样本) 的label相同, 的label不同
def _get_triplet_mask(labels):
'''
得到一个3D的mask [a, p, n], 对应triplet(a, p, n)是valid的位置是True
----------------------------------
Args:
labels: 对应训练数据的labels, shape = (batch_size,)
Returns:
mask: 3D,shape = (batch_size, batch_size, batch_size)
'''
# 初始化一个二维矩阵,坐标(i, j)不相等置为1,得到indices_not_equal
indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
indices_not_equal = tf.logical_not(indices_equal)
# 因为最后得到一个3D的mask矩阵(i, j, k),增加一个维度,则 i_not_equal_j 在第三个维度增加一个即,(batch_size, batch_size, 1), 其他同理
i_not_equal_j = tf.expand_dims(indices_not_equal, 2)
i_not_equal_k = tf.expand_dims(indices_not_equal, 1)
j_not_equal_k = tf.expand_dims(indices_not_equal, 0)
# 想得到i!=j!=k, 三个不等取and即可, 最后可以得到当下标(i, j, k)不相等时才取True
distinct_indices = tf.logical_and(tf.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k)
# 同样根据labels得到对应i=j, i!=k
label_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
i_equal_j = tf.expand_dims(label_equal, 2)
i_equal_k = tf.expand_dims(label_equal, 1)
valid_labels = tf.logical_and(i_equal_j, tf.logical_not(i_equal_k))
# mask即为满足上面两个约束,所以两个3D取and
mask = tf.logical_and(distinct_indices, valid_labels)
return mask
def batch_all_triplet_loss(labels, embeddings, margin, squared=False):
'''
triplet loss of a batch
-------------------------------
Args:
labels: 标签数据,shape = (batch_size,)
embeddings: 提取的特征向量, shape = (batch_size, vector_size)
margin: margin大小, scalar
Returns:
triplet_loss: scalar, 一个batch的损失值
fraction_postive_triplets : valid的triplets占的比例
'''
# 得到每两两embeddings的距离,然后增加一个维度,一维需要得到(batch_size, batch_size, batch_size)大小的3D矩阵
# 然后再点乘上valid 的 mask即可
pairwise_dis = _pairwise_distance(embeddings, squared=squared)
anchor_positive_dist = tf.expand_dims(pairwise_dis, 2)
assert anchor_positive_dist.shape[2] == 1, "{}".format(anchor_positive_dist.shape)
anchor_negative_dist = tf.expand_dims(pairwise_dis, 1)
assert anchor_negative_dist.shape[1] == 1, "{}".format(anchor_negative_dist.shape)
triplet_loss = anchor_positive_dist - anchor_negative_dist + margin
mask = _get_triplet_mask(labels)
mask = tf.to_float(mask)
triplet_loss = tf.multiply(mask, triplet_loss)
triplet_loss = tf.maximum(triplet_loss, 0.0)
# 计算valid的triplet的个数,然后对所有的triplet loss求平均
valid_triplets = tf.to_float(tf.greater(triplet_loss, 1e-16))
num_positive_triplets = tf.reduce_sum(valid_triplets)
num_valid_triplets = tf.reduce_sum(mask)
fraction_postive_triplets = num_positive_triplets / (num_valid_triplets + 1e-16)
triplet_loss = tf.reduce_sum(triplet_loss) / (num_positive_triplets + 1e-16)
return triplet_loss, fraction_postive_triplets
triplet_loss = tf.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)
def batch_hard_triplet_loss(labels, embeddings, margin, squared=False):
"""Build the triplet loss over a batch of embeddings.
For each anchor, we get the hardest positive and hardest negative to form a triplet.
Args:
labels: labels of the batch, of size (batch_size,)
embeddings: tensor of shape (batch_size, embed_dim)
margin: margin for triplet loss
squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
If false, output is the pairwise euclidean distance matrix.
Returns:
triplet_loss: scalar tensor containing the triplet loss
"""
# Get the pairwise distance matrix
pairwise_dist = _pairwise_distances(embeddings, squared=squared)
# For each anchor, get the hardest positive
# First, we need to get a mask for every valid positive (they should have same label)
mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
mask_anchor_positive = tf.to_float(mask_anchor_positive)
# We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
anchor_positive_dist = tf.multiply(mask_anchor_positive, pairwise_dist)
# shape (batch_size, 1)
hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=1, keepdims=True)
# For each anchor, get the hardest negative
# First, we need to get a mask for every valid negative (they should have different labels)
mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
mask_anchor_negative = tf.to_float(mask_anchor_negative)
# We add the maximum value in each row to the invalid negatives (label(a) == label(n))
max_anchor_negative_dist = tf.reduce_max(pairwise_dist, axis=1, keepdims=True)
anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative)
# shape (batch_size,)
hardest_negative_dist = tf.reduce_min(anchor_negative_dist, axis=1, keepdims=True)
# Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
triplet_loss = tf.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)
# Get final mean triplet loss
triplet_loss = tf.reduce_mean(triplet_loss)
return triplet_loss
本文目的在于学术交流,并不代表本公众号赞同其观点或对其内容真实性负责,版权归原作者所有,如有侵权请告知删除。
“强基固本”历史文章
卡尔曼滤波器
Reinforcement learning入门:从马尔可夫,动态规划到强化学习
算法工程师应该了解的浮点数知识
神经网络量化简介
样本量极少如何机器学习?Few-Shot Learning概述
我们真的需要深度图神经网络吗?
深度强化学习(Deep Reinforcement Learning)入门
伪标签(Pseudo-Labelling)——锋利的匕首
基础算法:使用numpy实现逻辑回归随机梯度下降(附代码)
脉冲神经网络(SNN)
分享、点赞、在看,给个三连击呗!