其他
答读者问 (二)为什么我的PCA分析报错了?
往期回顾:
Biomamba
2021/9/25
数据处理
rm(list = ls())#先清除所有变量
#####读入数据########
data=read.table("GSE.txt",header=T,sep="\t",row.names=1)
data=as.matrix(data)
data.class <- rownames(data)
data.pca <- prcomp(data, scale. = TRUE)#计算PCA
write.table(data.pca$rotation,file="PC.xls",quote=F,sep="\t")
write.table(predict(data.pca),file="PCnewTab.xls",quote=F,sep="\t")
B | NK | CD4T | CD8T | Mono | Neutro | |
---|---|---|---|---|---|---|
GSM2041183 | 0.1473657 | 0.1985070 | 0.3653793 | 0 | 0.1225928 | 0.1661553 |
GSM2041184 | 0.1429292 | 0.2410523 | 0.3581817 | 0 | 0.0097851 | 0.2480517 |
GSM2041185 | 0.1776962 | 0.2040224 | 0.4125045 | 0 | 0.1107103 | 0.0950666 |
GSM2041186 | 0.1287803 | 0.2118940 | 0.3621990 | 0 | 0.2436740 | 0.0534528 |
GSM2041187 | 0.1586567 | 0.1894103 | 0.4098673 | 0 | 0.1429274 | 0.0991383 |
GSM2041188 | 0.1069473 | 0.2379640 | 0.3438224 | 0 | 0.1676619 | 0.1436043 |
GSM2041189 | 0.1644211 | 0.1808029 | 0.3416325 | 0 | 0.1102115 | 0.2029321 |
GSM2041190 | 0.1330260 | 0.1813068 | 0.3818282 | 0 | 0.1808698 | 0.1229692 |
GSM2041191 | 0.1737450 | 0.1709841 | 0.4128166 | 0 | 0.1008284 | 0.1416259 |
GSM2041192 | 0.2308062 | 0.2027444 | 0.3573406 | 0 | 0.1495346 | 0.0595742 |
######PCA可视化
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
######正常组和肿瘤组的样品数目
group=c(rep("Normal",79),rep("Tumor",104))
######预测主成分的值
pcaPredict=predict(data.pca)
PC = data.frame(PC1 = pcaPredict[,1], PC2 = pcaPredict[,2],group=group)
head(PC)
## PC1 PC2 group
## GSM2041183 0.75273835 0.67558607 Normal
## GSM2041184 1.84402079 1.97989076 Normal
## GSM2041185 0.92023279 -0.20677388 Normal
## GSM2041186 -0.02871153 -1.26670228 Normal
## GSM2041187 0.69069491 -0.29914324 Normal
## GSM2041188 0.73819726 0.02010291 Normal
######取均值
PCA.mean=aggregate(PC[,1:2],list(group=PC$group),mean)
######自定义函数绘制椭圆
veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) {
theta <- (0:npoints) * 2 * pi/npoints
Circle <- cbind(cos(theta), sin(theta))
t(center + scale * t(Circle %*% chol(cov)))
}
df_ell <- data.frame()
for(g in unique(PC$group)){
df_ell <- rbind(df_ell, cbind(as.data.frame(with(PC[PC$group==g,],
veganCovEllipse(cov.wt(cbind(PC1,PC2),
wt=rep(1/length(PC1),length(PC1)))$cov,
center=c(mean(PC1),mean(PC2))))),group=g))
}
head(df_ell)
## PC1 PC2 group
## 1 1.529171 0.1845581 Normal
## 2 1.526877 0.2469502 Normal
## 3 1.520002 0.3085994 Normal
## 4 1.508574 0.3692624 Normal
## 5 1.492638 0.4286998 Normal
## 6 1.472257 0.4866770 Normal
#这里原本是levels(PC$group),但是大家可以看一下效果:
levels(PC$group)
## NULL
出图!!!
ggplot(data = PC,aes(PC1, PC2)) + ##原来这里都是PCA,应换成PC
geom_point(aes(color = group)) +
geom_path(data=df_ell, aes(x=PC1, y=PC2,colour=group), size=1, linetype=2, inherit.aes = FALSE) +
annotate("text", x=PCA.mean$PC1, y=PCA.mean$PC2,label=PCA.mean$group) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())