[翻译稿]我是如何创建人体解剖医学包-gganatogram的

Original 土豆学生信生信技能树 2022-06-06

jimmy

　　前两天，生信技能树VIP交流群的haitao甩出了一个非常神奇的R包，也同步在各大其它交流群转发，马上就被好学的土豆看中了，而且还找到了包的作者的创造心声，非常值得推荐，所以就费劲翻译了一遍，希望让中文读者也了解背后的故事！

当然，这个故事之所以会被我挖掘也是因为昨天的推文：

诚邀您加盟，生信技能树

一切都是那么的顺理成章！

原文来源 https://jespermaag.github.io/blog/2018/gganatogram/
本文为翻译版，不当之处请见谅！

GGANATOGREM

https://github.com/jespermaag/gganatogram

✎✎✎

希望可以为不同的生物创建解剖图像，但是目前只有人类男性可用。

在看到ggseg的twitter帖子之后，我有了这个包的想法。类似的工具对整个生物学都会有帮助。由于找不到任何类似的东西，我决定创建我的第一个R包。

该软件包使用ArrayExpress Expression Atlas图中的组织坐标。
https://www.ebi.ac.uk/gxa/home
https://github.com/ebi-gene-expression-group/anatomogram

【创建包】

下载所有svg

为了创建包，我首先必须从Expression Atlas中检索所有组织的坐标。

使用以下命令下载解剖图包。

npm install --save anatomogram

从svg中提取坐标

我使用python来提取homo_sapiens.mal.svg文件中每个组织的坐标，名称和转换。

以下代码获取svg，并将名称，坐标和转换写入文件，然后在R中处理。

from xml.dom import minidom
import os
import csv
organism="homo_sapiens.male"
doc = minidom.parse(organism + ".svg")
your_csv_file = open(organism + '_coords.tsv', 'w')
wr = csv.writer(your_csv_file, delimiter='\t')
for path in doc.getElementsByTagName('path'):
    if "outline" in path.getAttribute('id') or "LAYER_OUTLINE" in path.getAttribute('id') :
        wr.writerow([path.getAttribute('id') ,path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')]) 
    if path.getAttribute('id').startswith('UB'):
        wr.writerow([path.getElementsByTagName('title')[0].firstChild.nodeValue, path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')])
    if path.parentNode.attributes['id'].value.startswith('UB'):
        if "transform" not in list(path.parentNode.attributes.keys()): 
            wr.writerow([path.parentNode.attributes['id'].value, path.getAttribute('d'), str('matrix(1,0,0,1,0,0)')])
for path in doc.getElementsByTagName('g')[5:]:
    if len(path.childNodes) >0 :
        for node in path.childNodes:
            if "text" not in node.nodeName:
                print(node.nodeName)
                print(node.attributes.keys())
                if 'd' in list(node.attributes.keys()): 
                    nodeVal = node.attributes['d'].value
                    wr.writerow([path.childNodes[1].attributes['id'].value, nodeVal,  path.attributes['transform'].value])
your_csv_file.close()

处理R中的坐标，并创建一个包

首先，我创建了一个函数，将坐标提取到数据框中并转换数据。需要进行一些手动编辑才能获得正确的坐标，并删除一些不起作用的组织。

extractCoords <- function(coords, name, transMatrix) {
    c <- strsplit(coords, " ")
    c[[1]]

    c[[1]][c(grep("M", c[[1]] )+1,grep("M", c[[1]] )+2)] <- NA

    c[[1]] <- c[[1]][grep("[[:alpha:]]", c[[1]], invert=TRUE)]

    anatCoord <- as.data.frame(lapply( c, function(u) 
        matrix(as.numeric(unlist(strsplit(u, ","))),ncol=2,byrow=TRUE) ))
    anatCoord$X2[is.na(anatCoord$X1)] <- NA
    anatCoord$X1[is.na(anatCoord$X2)] <- NA
    anatCoord$id <- name

    if (length(transMatrix[grep('matrix', transMatrix)])>0) {
        transForm <- gsub('matrix\\(|\\)', '', transMatrix)
        transForm <- as.numeric(strsplit(transForm, ",")[[1]])

        anatCoord$x <-  (anatCoord$X1* transForm[1]) + (anatCoord$X1* transForm[3]) + transForm[5]
        anatCoord$y <-  (anatCoord$X2* transForm[2]) + (anatCoord$X2* transForm[4]) + transForm[6]
    } else if (grep('translate', transMatrix)) {
        transForm <- gsub('translate\\(|\\)', '', transMatrix)
        transForm <- as.numeric(strsplit(transForm, ",")[[1]])
         if(name =='leukocyte' & transForm[1]==4.5230265) {
            transForm <- c(103.63591+4.5230265,-47.577078+11.586659)
        }
        anatCoord$x <-  anatCoord$X1 + transForm[1]
        anatCoord$y <-  anatCoord$X2 + transForm[2]
    }
    #anatCoord <- anatCoord[complete.cases(anatCoord),]
    if (name == 'bronchus') {
        if (max(anatCoord$x, na.rm=T) >100 ) {
            anatCoord$x <- NA
            anatCoord$y <- NA
        }
    }
    if( any(anatCoord[complete.cases(anatCoord),]$x < -5)) {
            anatCoord$x <- NA
            anatCoord$y <- NA
    }

    if( any(anatCoord[complete.cases(anatCoord),]$x > 150)) {
            anatCoord$x <- NA
            anatCoord$y <- NA
    }
    return(anatCoord)
}

最后，用extractCoords函数处理了python输出。

hsMale <- read.table('homo_sapiens.male_coords.tsv', sep='\t', stringsAsFactors=F)

hgMale_list <- list()
for (i in 1:nrow(hsMale)) {
    df <- extractCoords(hsMale$V2[i], hsMale$V1[i],  hsMale$V3[i])

    hgMale_list[[i]] <- extractCoords(hsMale$V2[i], hsMale$V1[i],  hsMale$V3[i])
    names(hgMale_list)[i] <-  paste0(hsMale$V1[i],'-', i)
}
names(hgMale_list) <- gsub('-.*', '', names(hgMale_list))

之后，将结果列表用作gganatogram包的基础。可以使用以下说明从github安装该软件包。

【安装】

使用devtools从github安装。

# install from Github
devtools::install_github("jespermaag/gganatogram")

【用法】

这个包需要ggplot2和ggpolypath

library(ggplot2)
library(ggpolypath)
library(gganatogram)
library(dplyr)

并且需要使用函数gganatogram，您需要拥有一个包含器官组织，颜色和数值的数据框。

organPlot <- data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
 type = c("circulation", "circulation",  "nervous system", "nervous system", "digestion", "digestion", "digestion"), 
 colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
 value = c(10, 5, 1, 8, 2, 5, 5), 
 stringsAsFactors=F)

head(organPlot)
##       organ           type colour value
## 1     heart    circulation    red    10
## 2 leukocyte    circulation    red     5
## 3     nerve nervous system purple     1
## 4     brain nervous system purple     8
## 5     liver      digestion orange     2
## 6   stomach      digestion orange     5

使用函数gganatogram，根据颜色填充器官。

gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="colour")

然后，我们可以使用ggplot主题和函数来调整图上细节

gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") + 
theme_void()

我们还可以使用hgMale_key绘制所有可用组织，这是一个可用的对象

hgMale_key$organ
##  [1] "bone marrow"               "frontal cortex"           
##  [3] "prefrontal cortex"         "gastroesophageal junction"
##  [5] "caecum"                    "ileum"                    
##  [7] "rectum"                    "nose"                     
##  [9] "tongue"                    "penis"                    
## [11] "nasal pharynx"             "spinal cord"              
## [13] "throat"                    "diaphragm"                
## [15] "liver"                     "stomach"                  
## [17] "spleen"                    "duodenum"                 
## [19] "gall bladder"              "pancreas"                 
## [21] "colon"                     "small intestine"          
## [23] "appendix"                  "urinary bladder"          
## [25] "bone"                      "cartilage"                
## [27] "esophagus"                 "skin"                     
## [29] "brain"                     "heart"                    
## [31] "lymph_node"                "skeletal_muscle"          
## [33] "leukocyte"                 "temporal_lobe"            
## [35] "atrial_appendage"          "coronary_artery"          
## [37] "hippocampus"               "vas_deferens"             
## [39] "seminal_vesicle"           "epididymis"               
## [41] "tonsil"                    "lung"                     
## [43] "trachea"                   "bronchus"                 
## [45] "nerve"                     "kidney"
gganatogram(data=hgMale_key, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +theme_void()

要跳过图表的轮廓，请使用outline = F

organPlot %>%
    dplyr::filter(type %in% c('circulation', 'nervous system')) %>%
gganatogram(outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") + 
theme_void()

我们可以根据给予每个器官的值来填充组织

gganatogram(data=organPlot, fillOutline='#a6bddb', organism='human', sex='male', fill="value") + 
theme_void() +
scale_fill_gradient(low = "white", high = "red")

我们也可以使用facet_wrap来比较组。
首先创建两个数据框以及设置类型列中的不同数值和条件。

compareGroups <- rbind(data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
  colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
 value = c(10, 5, 1, 8, 2, 5, 5), 
 type = rep('Normal', 7), 
 stringsAsFactors=F),
 data.frame(organ = c("heart", "leukocyte", "nerve", "brain", "liver", "stomach", "colon"), 
  colour = c("red", "red", "purple", "purple", "orange", "orange", "orange"), 
 value = c(5, 5, 10, 8, 2, 5, 5), 
 type = rep('Cancer', 7), 
 stringsAsFactors=F))
gganatogram(data=compareGroups, fillOutline='#a6bddb', organism='human', sex='male', fill="value") + 
    theme_void() +
    facet_wrap(~type) +
    scale_fill_gradient(low = "white", high = "red")

gganatogram(data=hgMale_key, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
    theme_void() +
    facet_wrap(~type)

gganatogram(data=hgMale_key, outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
    theme_void() +
    facet_wrap(~type, scale='free')

organtype <- organPlot
organtype %>%
    mutate(type=organ) %>%
    gganatogram( outline=F, fillOutline='#a6bddb', organism='human', sex='male', fill="colour") +
        theme_void() +
        facet_wrap(~type, scale='free')

／End.

独家福利

如果需要组装自己的服务器；代办生物信息学服务器

如果需要帮忙下载海外数据(GEO/TCGA/GTEx等等)，点我？

如果需要线下辅导及培训，看招学徒

如果需要个人电脑：个人计算机推荐

如果需要置办生物信息学书籍，看：生信人必备书单

如果需要实习岗位：实习职位发布

如果需要售后：点我

点击文末的阅读原文链接直达土豆的简书博客！

大摩宏观策略谈：2025中美变局展望

穿了跟没穿一样，胸型赞到爆！天然乳胶，性感到让男人腿软！

高三女生醉酒后被强奸致死？检方回应

高三女生醉酒后被强奸致死？检方回应

那些内心强大的孩子，童年被允许做过这1件事

[翻译稿]我是如何创建人体解剖医学包-gganatogram的

下载所有svg

从svg中提取坐标

处理R中的坐标，并创建一个包

您可能也对以下帖子感兴趣

大摩宏观策略谈：2025中美变局展望

穿了跟没穿一样，胸型赞到爆！天然乳胶，性感到让男人腿软！

高三女生醉酒后被强奸致死？检方回应

高三女生醉酒后被强奸致死？检方回应

那些内心强大的孩子，童年被允许做过这1件事

生成图片，分享到微信朋友圈

[翻译稿]我是如何创建人体解剖医学包-gganatogram的

下载所有svg

从svg中提取坐标

处理R中的坐标，并创建一个包

您可能也对以下帖子感兴趣