其他
【强基固本】GMM: Gaussian Mixed Model(高斯混合模型)
“强基固本,行稳致远”,科学研究离不开理论基础,人工智能学科更是需要数学、物理、神经科学等基础学科提供有力支撑,为了紧扣时代脉搏,我们推出“强基固本”专栏,讲解AI领域的基础知识,为你的科研学习提供助力,夯实理论基础,提升原始创新能力,敬请关注。
地址:https://zhuanlan.zhihu.com/p/113200655
01
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
X = iris.data
N, D = X.shape
display(X.shape, X[:10])
(150, 4)
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.4, 3.9, 1.7, 0.4],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[4.4, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.1]])
02
mus = X[np.random.choice(X.shape[0], 3, replace=False)]
covs = [np.identity(4) for i in range(3)]
pis = [1/3] * 3
03
def gaussian(X, mu, cov):
diff = X - mu
return 1 / ((2 * np.pi) ** (D / 2) * np.linalg.det(cov) ** 0.5) * np.exp(-0.5 * np.dot(np.dot(diff, np.linalg.inv(cov)), diff))
gammas = []
for mu_, cov_, pi_ in zip(mus, covs, pis):# loop each center
gamma_ = [[pi_ * gaussian(x_, mu_, cov_)] for x_ in X]# loop each point
gammas.append(gamma_)
gammas = np.array(gammas)
gamma_total = gammas.sum(0)
gammas /= gamma_total
04
代码如下;
mus, covs, pis = [], [], []
for gamma_ in gammas:#loop each center
gamma_sum = gamma_.sum()
pi_ = gamma_sum / N
mu_ = (gamma_ * X).sum(0) / gamma_sum
cov_ = []
for x_, gamma_i in zip(X, gamma_):
diff = (x_ - mu_).reshape(-1, 1)
cov_.append(gamma_i * np.dot(diff, diff.T))
cov_ = np.sum(cov_, axis=0) / gamma_sum
pis.append(pi_)
mus.append(mu_)
covs.append(cov_)
05
def train_step(X, mus, covs, pis):
gammas = []
for mu_, cov_, pi_ in zip(mus, covs, pis):# loop each center
gamma_ = [[pi_ * gaussian(x_, mu_, cov_)] for x_ in X]# loop each point
gammas.append(gamma_)
gammas = np.array(gammas)
gamma_total = gammas.sum(0)
gammas /= gamma_total
mus, covs, pis = [], [], []
for gamma_ in gammas:#loop each center
gamma_sum = gamma_.sum()
pi_ = gamma_sum / N
mu_ = (gamma_ * X).sum(0) / gamma_sum
cov_ = []
for x_, gamma_i in zip(X, gamma_):
diff = (x_ - mu_).reshape(-1, 1)
cov_.append(gamma_i * np.dot(diff, diff.T))
cov_ = np.sum(cov_, axis=0) / gamma_sum
pis.append(pi_)
mus.append(mu_)
covs.append(cov_)
return mus, covs, pis
for _ in range(50):
mus, covs, pis = train_step(X, mus, covs, pis)
整个训练过程的动态图如下;
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
iris = datasets.load_iris()
X = iris.data
N, D = X.shape
mus = X[np.random.choice(X.shape[0], 3, replace=False)]
covs = [np.identity(4) for i in range(3)]
pis = [1/3] * 3
def gaussian(X, mu, cov):
diff = X - mu
return 1 / ((2 * np.pi) ** (D / 2) * np.linalg.det(cov) ** 0.5) * np.exp(-0.5 * np.dot(np.dot(diff, np.linalg.inv(cov)), diff))
def get_likelihood(gamma_total):
return np.log(gamma_total).sum()
def train_step(X, mus, covs, pis):
gammas = []
for mu_, cov_, pi_ in zip(mus, covs, pis):# loop each center
gamma_ = [[pi_ * gaussian(x_, mu_, cov_)] for x_ in X]# loop each point
gammas.append(gamma_)
gammas = np.array(gammas)
gamma_total = gammas.sum(0)
gammas /= gamma_total
mus, covs, pis = [], [], []
for gamma_ in gammas:#loop each center
gamma_sum = gamma_.sum()
pi_ = gamma_sum / N
mu_ = (gamma_ * X).sum(0) / gamma_sum
cov_ = []
for x_, gamma_i in zip(X, gamma_):
diff = (x_ - mu_).reshape(-1, 1)
cov_.append(gamma_i * np.dot(diff, diff.T))
cov_ = np.sum(cov_, axis=0) / gamma_sum
pis.append(pi_)
mus.append(mu_)
covs.append(cov_)
return mus, covs, pis, gamma_total
log_LL = []
for _ in range(50):
mus, covs, pis, gamma_total = train_step(X, mus, covs, pis)
log_LL.append(get_likelihood(gamma_total))
plt.plot(log_LL)
plt.grid()
本文目的在于学术交流,并不代表本公众号赞同其观点或对其内容真实性负责,版权归原作者所有,如有侵权请告知删除。
“强基固本”历史文章
深挖一下F1 score (F-measure, F-score)
浅谈拉格朗日乘子法
通用矩阵乘(GEMM)优化算法
深层学习为何要“Deep”
神经网络反向传播的数学原理
目标跟踪初探(DeepSORT)
科普帖:深度学习中GPU和显存分析
深度学习笔记:常用的模型评估指标
三次样条(cubic spline)插值
运动规划丨轨迹规划丨基于改进Dijkstra算法的轨迹平滑方法
神经网络15分钟入门!足够通俗易懂了吧
5分钟理解Focal Loss与GHM——解决样本不平衡利器
详解残差网络
MMD:最大均值差异
更多强基固本专栏文章,
请点击文章底部“阅读原文”查看
分享、点赞、在看,给个三连击呗!