耐药患者组织与敏感的患者组织内的成纤维细胞比较
第一步 下载数据集
rm( list = ls() )
options( stringsAsFactors = F )
library( "GEOquery" )
GSE_name = 'GSE108565'
options( 'download.file.method.GEOquery' = 'libcurl' )
gset <- getGEO( GSE_name, getGPL = T )
save( gset, file = 'gset.Rdata' )
第二步 处理数据集
rm( list = ls() )
options( stringsAsFactors = F )
load( './gset.Rdata' )
library( "GEOquery" )
## 取表达矩阵和样本信息表
{
gset = gset[[1]]
exprSet = exprs( gset )
pdata = pData( gset )
chl = length( colnames( pdata ) )
group_list = as.character( pdata[, "neo-adjuvant chemotherapy:ch1"] )
}
## 分成两个组,耐药组和敏感组
dim( exprSet )
exprSet[ 1:5, 1:5 ]
table( group_list )
save(exprSet, group_list, file = 'final_exprSet.Rdata')
## plot
library( "ggfortify" )
## 聚类
{
colnames( exprSet ) = paste( group_list, 1:ncol( exprSet ), sep = '_' )
nodePar <- list( lab.cex = 0.4, pch = c( NA, 19 ), cex = 0.6, col = "red" )
hc = hclust( dist( t( exprSet ) ) )
png('hclust.png', res = 100, height = 1800)
plot( as.dendrogram( hc ), nodePar = nodePar, horiz = TRUE )
dev.off()
}
## PCA
data = as.data.frame( t( exprSet ) )
data$group = group_list
png( 'pca_plot.png', res=100 )
autoplot( prcomp( data[ , 1:( ncol( data ) - 1 ) ] ), data = data, colour = 'group') + theme_bw()
dev.off()
第三步 差异分析
rm( list = ls() )
## 差异分析
load( "./final_exprSet.Rdata" )
library( "limma" )
{
design <- model.matrix( ~0 + factor( group_list ) )
colnames( design ) = levels( factor( group_list ) )
rownames( design ) = colnames( exprSet )
}
design
contrast.matrix <- makeContrasts( "resistant-sensitive", levels = design )
contrast.matrix
{
fit <- lmFit( exprSet, design )
fit2 <- contrasts.fit( fit, contrast.matrix )
fit2 <- eBayes( fit2 )
nrDEG = topTable( fit2, coef = 1, n = Inf )
write.table( nrDEG, file = "nrDEG.out")
}
head(nrDEG)
## heatmap
library( "pheatmap" )
{
nrDEG_Z = nrDEG[ order( nrDEG$logFC ), ]
nrDEG_F = nrDEG[ order( -nrDEG$logFC ), ]
choose_gene = c( rownames( nrDEG_Z )[1:100], rownames( nrDEG_F )[1:100] )
choose_matrix = exprSet[ choose_gene, ]
choose_matrix = t( scale( t( choose_matrix ) ) )
choose_matrix[choose_matrix > 1] = 1
choose_matrix[choose_matrix < -1] = -1
annotation_col = data.frame( CellType = factor( group_list ) )
rownames( annotation_col ) = colnames( exprSet )
pheatmap( fontsize = 2, choose_matrix, annotation_col = annotation_col, show_rownames = F,
annotation_legend = F, cluster_cols = F, filename = "heatmap.png")
}
## volcano plot
library( "ggplot2" )
logFC_cutoff <- with( nrDEG, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) )
logFC_cutoff
{
nrDEG$change = as.factor( ifelse( nrDEG$P.Value < 0.01 & abs(nrDEG$logFC) > logFC_cutoff,
ifelse( nrDEG$logFC > logFC_cutoff , 'UP', 'DOWN' ), 'NOT' ) )
save( nrDEG, file = "nrDEG.Rdata" )
this_tile <- paste0( 'Cutoff for logFC is ', round( logFC_cutoff, 3 ),
'\nThe number of up gene is ', nrow(nrDEG[ nrDEG$change =='UP', ] ),
'\nThe number of down gene is ', nrow(nrDEG[ nrDEG$change =='DOWN', ] ) )
volcano = ggplot(data = nrDEG, aes( x = logFC, y = -log10(P.Value), color = change)) +
geom_point( alpha = 0.4, size = 1.75) +
theme_set( theme_set( theme_bw( base_size = 15 ) ) ) +
xlab( "log2 fold change" ) + ylab( "-log10 p-value" ) +
ggtitle( this_tile ) + theme( plot.title = element_text( size = 15, hjust = 0.5)) +
scale_colour_manual( values = c('blue','black','red') )
print( volcano )
ggsave( volcano, filename = 'volcano.png' )
dev.off()
}
第四步 注释
rm( list = ls() )
load( "./nrDEG.Rdata" )
## 注释
library( "clusterProfiler" )
library( "org.Hs.eg.db" )
## 需要注意这个数据的行名是ACCNUM
df <- bitr( rownames( nrDEG ), fromType = "ACCNUM", toType = c( "ENTREZID" ), OrgDb = org.Hs.eg.db )
head( df )
{
nrDEG$SYMBOL = rownames( nrDEG )
nrDEG = merge( nrDEG, df, by.y='ACCNUM', by.x='SYMBOL')
}
head( nrDEG )
{
gene_up = nrDEG[ nrDEG$change == 'UP', 'ENTREZID' ]
gene_down = nrDEG[ nrDEG$change == 'DOWN', 'ENTREZID' ]
gene_diff = c( gene_up, gene_down )
gene_all = as.character(nrDEG[ ,'ENTREZID'] )
}
{
geneList = nrDEG$logFC
names( geneList ) = nrDEG$ENTREZID
geneList = sort( geneList, decreasing = T )
}
{
## KEGG pathway analysis
kk.up <- enrichKEGG( gene = gene_up ,
organism = 'hsa' ,
universe = gene_all ,
pvalueCutoff = 0.8 ,
qvalueCutoff = 0.8 )
kk.down <- enrichKEGG( gene = gene_down ,
organism = 'hsa' ,
universe = gene_all ,
pvalueCutoff = 0.8 ,
qvalueCutoff = 0.8 )
}
head( kk.up )[ ,1:6 ]
head( kk.down )[ ,1:6 ]
library( "ggplot2" )
{
kegg_down_dt <- as.data.frame( kk.down )
kegg_up_dt <- as.data.frame( kk.up )
down_kegg <- kegg_down_dt[ kegg_down_dt$pvalue < 0.05, ]
down_kegg$group = -1
up_kegg <- kegg_up_dt[ kegg_up_dt$pvalue < 0.05, ]
up_kegg$group = 1
dat = rbind( up_kegg, down_kegg )
dat$pvalue = -log10( dat$pvalue )
dat$pvalue = dat$pvalue * dat$group
dat = dat[ order( dat$pvalue, decreasing = F ), ]
g_kegg <- ggplot( dat,
aes(x = reorder( Description, order( pvalue, decreasing=F ) ), y = pvalue, fill = group)) +
geom_bar( stat = "identity" ) +
scale_fill_gradient( low = "blue", high = "red", guide = FALSE ) +
scale_x_discrete( name = "Pathway names" ) +
scale_y_continuous( name = "log10P-value" ) +
coord_flip() + theme_bw() + theme( plot.title = element_text( hjust = 0.5 ) ) +
ggtitle( "Pathway Enrichment" )
print( g_kegg )
ggsave( g_kegg, filename = 'kegg_up_down.png' )
}
### GO database analysis
g_list = list( gene_up = gene_up, gene_down = gene_down, gene_diff = gene_diff)
go_enrich_results <- lapply( g_list, function( gene ) {
lapply( c( 'BP', 'MF', 'CC' ) , function( ont ) {
cat( paste( 'Now process', ont ) )
ego <- enrichGO( gene = gene,
universe = gene_all,
OrgDb = org.Hs.eg.db,
ont = ont ,
pAdjustMethod = "BH",
pvalueCutoff = 0.99,
qvalueCutoff = 0.99,
readable = TRUE)
print( head( ego ) )
return( ego )
})
})
save( go_enrich_results, file = 'go_enrich_results.Rdata' )
n1 = c( 'gene_up', 'gene_down', 'gene_diff' )
n2 = c( 'BP', 'MF', 'CC' )
for ( i in 1:3 ){
for ( j in 1:3 ){
fn = paste0( 'dotplot_', n1[i], '_', n2[j], '.png' )
cat( paste0( fn, '\n' ) )
png( fn, res = 150, width = 1080 )
print( dotplot( go_enrich_results[[i]][[j]] ) )
dev.off()
}
}
GEO数据挖掘系列文-第四期-肝细胞癌 (WGCNA)
GEO数据挖掘系列文-第五期-肝细胞癌 (多组差异分析)
我们提供GEO线上视频课程,以及课后答疑服务,如有意向,请扫描下面的二维码于我联系。(仅限于购买课程用户,需提供购买截图方可入答疑群)
(长按图片扫描二维码可添加微信)
点击阅读原文直达我们的网易云课堂购买链接!
请务必思考再三哦,不要瞎买