第一次看到如此原始的绘图代码
在运行RSeQC软件对转录组的比对好的bam文件进行质控的时候,我发现了一个很无语的现象,就是它模仿fastqc的4个质控图里面,GC含量分布,ATCG碱基比例没有问题,但是画碱基质量图的时候,所有样本都是空的。
我打开了软件出图的R代码才发现,真的好原始啊!!!
pdf('1_read_quality.qual.boxplot.pdf')
p0<-rep(c(2,12,22,27,32,37,41),times=c(502,1062718,8073,1306425,35457919,542565,272330)/1)
p1<-rep(c(2,12,22,27,32,37,41),times=c(2857,1133839,10183,1348200,34597142,1162560,395751)/1)
p2<-rep(c(2,12,22,27,32,37,41),times=c(1531,1055310,16489,1310029,8109840,27627266,530067)/1)
p3<-rep(c(2,12,22,27,32,37,41),times=c(13859,960145,121042,1091350,3551363,32128720,784053)/1)
p4<-rep(c(2,12,22,27,32,37,41),times=c(1,831342,222717,811763,2444363,32816629,1523717)/1)
p5<-rep(c(12,22,27,32,37,41),times=c(756725,269357,693807,2111482,5112150,29707011)/1)
p6<-rep(c(2,12,22,27,32,37,41),times=c(383,714875,267381,623622,1782376,4452520,30809375)/1)
p7<-rep(c(2,12,22,27,32,37,41),times=c(3,699425,352083,576246,1676829,4164567,31181379)/1)
p8<-rep(c(2,12,22,27,32,37,41),times=c(33,719293,393174,559110,1597976,4062411,31318535)/1)
p9<-rep(c(2,12,22,27,32,37,41),times=c(33,671366,400498,557291,1556259,3951524,31513561)/1)
p10<-rep(c(2,12,22,27,32,37,41),times=c(11,676272,391661,553181,1532467,3847357,31649583)/1)
p11<-rep(c(2,12,22,27,32,37,41),times=c(171,707083,433602,604159,1551516,3917455,31436546)/1)
p12<-rep(c(12,22,27,32,37,41),times=c(691370,452590,622665,1536494,3938074,31409339)/1)
p13<-rep(c(2,12,22,27,32,37,41),times=c(15,735724,479656,648292,1525773,3996162,31264910)/1)
p14<-rep(c(2,12,22,27,32,37,41),times=c(2,719720,505675,718166,1435858,3968543,31302568)/1)
p15<-rep(c(12,22,27,32,37,41),times=c(684960,517462,749091,1369890,3890668,31438461)/1)
p16<-rep(c(12,22,27,32,37,41),times=c(694095,521500,756687,1342136,3868719,31467395)/1)
p17<-rep(c(2,12,22,27,32,37,41),times=c(1,699737,530920,784387,1322692,3884913,31427882)/1)
p18<-rep(c(12,22,27,32,37,41),times=c(729153,548522,827748,1298547,3944864,31301698)/1)
p19<-rep(c(2,12,22,27,32,37,41),times=c(2,728518,561195,854509,1286615,3953745,31265948)/1)
p20<-rep(c(2,12,22,27,32,37,41),times=c(6,766992,570514,865237,1298635,4003661,31145487)/1)
p21<-rep(c(2,12,22,27,32,37,41),times=c(35,778889,580682,876155,1308700,3988540,31117531)/1)
p22<-rep(c(2,12,22,27,32,37,41),times=c(1,771143,582180,891862,1305112,4019308,31080926)/1)
p23<-rep(c(2,12,22,27,32,37,41),times=c(3,742853,585301,906872,1265223,4004918,31145362)/1)
p24<-rep(c(2,12,22,27,32,37,41),times=c(117,744192,580200,918959,1226368,3965246,31215450)/1)
p25<-rep(c(2,12,22,27,32,37,41),times=c(3,832301,584370,927455,1218032,4009025,31079346)/1)
p26<-rep(c(2,12,22,27,32,37,41),times=c(233,847184,600237,951672,1227499,4064559,30959148)/1)
p27<-rep(c(2,12,22,27,32,37,41),times=c(32,822081,603814,951215,1220279,4032329,31020782)/1)
p28<-rep(c(12,22,27,32,37,41),times=c(815925,598461,937316,1214674,4005228,31078928)/1)
p29<-rep(c(2,12,22,27,32,37,41),times=c(23,812733,600813,939699,1217753,3990265,31089246)/1)
p30<-rep(c(12,22,27,32,37,41),times=c(822835,599345,937065,1218164,4000554,31072569)/1)
p31<-rep(c(2,12,22,27,32,37,41),times=c(3,819821,601435,943129,1215190,3978099,31092855)/1)
p32<-rep(c(2,12,22,27,32,37,41),times=c(75,816107,597396,934031,1211682,3980719,31110522)/1)
p33<-rep(c(2,12,22,27,32,37,41),times=c(1,828263,601197,939752,1218438,4002214,31060667)/1)
p34<-rep(c(2,12,22,27,32,37,41),times=c(12,841128,607539,951096,1228161,4029001,30993595)/1)
p35<-rep(c(2,12,22,27,32,37,41),times=c(24,878053,618430,967778,1246359,4101761,30838127)/1)
p36<-rep(c(2,12,22,27,32,37,41),times=c(3,874809,626513,982694,1256711,4156107,30753695)/1)
p37<-rep(c(2,12,22,27,32,37,41),times=c(86,839266,618442,970032,1247703,4122919,30852084)/1)
p38<-rep(c(2,12,22,27,32,37,41),times=c(1,845991,620542,972963,1254834,4118333,30837868)/1)
p39<-rep(c(12,22,27,32,37,41),times=c(856500,617056,966740,1251736,4123476,30835024)/1)
p40<-rep(c(2,12,22,27,32,37,41),times=c(6,882873,630955,987617,1260948,4142945,30745188)/1)
p41<-rep(c(2,12,22,27,32,37,41),times=c(316,871153,633915,996778,1263813,4139830,30744682)/1)
p42<-rep(c(2,12,22,27,32,37,41),times=c(2,870548,627710,984112,1266996,4125523,30775487)/1)
p43<-rep(c(2,12,22,27,32,37,41),times=c(6,895056,636733,1002172,1298570,4163654,30654055)/1)
p44<-rep(c(2,12,22,27,32,37,41),times=c(61,902873,643684,1006133,1317883,4158298,30621107)/1)
p45<-rep(c(2,12,22,27,32,37,41),times=c(71,908831,647831,1022162,1376023,4160673,30534109)/1)
p46<-rep(c(2,12,22,27,32,37,41),times=c(9,921965,654815,1028952,1415443,4149460,30478685)/1)
p47<-rep(c(2,12,22,27,32,37,41),times=c(82,947712,661661,1036930,1437652,4168232,30396571)/1)
p48<-rep(c(2,12,22,27,32,37,41),times=c(358,962195,673971,1061641,1463369,4241910,30244667)/1)
p49<-rep(c(2,12,22,27,32,37,41),times=c(202,962146,678263,1061091,1465059,4231003,30249297)/1)
p50<-rep(c(2,12,22,27,32,37,41),times=c(60,969956,681561,1067820,1477914,4255726,30188307)/1)
p51<-rep(c(2,12,22,27,32,37,41),times=c(192,982034,686223,1075981,1495114,4309008,30086815)/1)
p52<-rep(c(2,12,22,27,32,37,41),times=c(3,947136,677766,1058639,1477691,4250457,30217809)/1)
p53<-rep(c(2,12,22,27,32,37,41),times=c(187,959598,678831,1068157,1493716,4242767,30180424)/1)
p54<-rep(c(2,12,22,27,32,37,41),times=c(6,947238,676177,1057547,1482063,4223045,30231761)/1)
p55<-rep(c(2,8,12,22,27,32,37,41),times=c(233,1,986433,686760,1069117,1517604,4228366,30123453)/1)
p56<-rep(c(2,12,22,27,32,37,41),times=c(4,942773,685808,1070437,1546963,4169028,30191006)/1)
p57<-rep(c(2,8,12,22,27,32,37,41),times=c(7,1,998837,691096,1078339,1602279,4225038,30004375)/1)
p58<-rep(c(2,8,12,22,27,32,37,41),times=c(43,1,1031250,714125,1106725,1658704,4260229,29822723)/1)
p59<-rep(c(2,12,22,27,32,37,41),times=c(77,1000243,712136,1097526,1686375,4200641,29890620)/1)
p60<-rep(c(2,12,22,27,32,37,41),times=c(3,979646,706717,1083447,1685018,4131352,29994922)/1)
p61<-rep(c(2,12,22,27,32,37,41),times=c(62,992888,703124,1076733,1708177,4095308,29998194)/1)
p62<-rep(c(2,12,22,27,32,37,41),times=c(43,965056,704746,1068774,1712672,4078353,30038121)/1)
p63<-rep(c(2,12,22,27,32,37,41),times=c(92,994758,710866,1064544,1728702,4093538,29968571)/1)
p64<-rep(c(2,8,12,22,27,32,37,41),times=c(51,1,986215,707587,1083456,1746841,4142878,29887209)/1)
p65<-rep(c(2,8,12,22,27,32,37,41),times=c(82,2,1001793,714133,1078237,1740022,4117488,29895081)/1)
p66<-rep(c(2,8,12,22,27,32,37,41),times=c(2,2,1038683,738742,1097344,1765601,4183929,29714962)/1)
p67<-rep(c(2,8,12,22,27,32,37,41),times=c(40,1,1021258,747209,1091011,1790945,4201949,29679083)/1)
boxplot(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,p51,p52,p53,p54,p55,p56,p57,p58,p59,p60,p61,p62,p63,p64,p65,p66,p67,xlab="Position of Read(5'->3')",ylab="Phred Quality Score",outline=F)
dev.off()
pdf('1_read_quality.qual.heatmap.pdf')
上面的67个循环,代码就构建了67个长度为2千万的向量,对这两千万的向量画boxplot,一个向量内存约200多M,R语言本身如此低效,怪不得我都没有出图,肯定是内存溢出,挂掉了。
画出来的图应该是跟下面的差不多,大家试试看吧,如果你内存足够:
朋友们,对于这个绘图代码,大家有什么优化建议呢?
赶紧动起来吧,(๑•̀ㅂ•́)و✧加油