Last updated: 2017-12-19

Code version: 1d63e1d

The goal of this analysis is to create nice plots showing that we are getting as much information as the 1 lane from the Mayer sample. I will do this with our merged data vs. their 1 lane.

Genes from IGV that I want to use:

  • HERPUD1 chr16:56,964,002-56,979,793

  • ACTB chr7:5,564,779-5,572,232

  • CCNB2 chr15:59,396,707-59,401,006

  • chr11:234,336-239,997

  • KIAA0100 chr17:26,968,078-26,974,887

  • HECTD1 chr14:31,672,040-31,681,043

  • STAG1 chr3:136,469,421-136,472,771

  • SRSF3

  • ENO1

Load Packages:

library(Gviz)
Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: 'BiocGenerics'
The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs
The following objects are masked from 'package:base':

    anyDuplicated, append, as.data.frame, cbind, colMeans,
    colnames, colSums, do.call, duplicated, eval, evalq, Filter,
    Find, get, grep, grepl, intersect, is.unsorted, lapply,
    lengths, Map, mapply, match, mget, order, paste, pmax,
    pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
    rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
    tapply, union, unique, unsplit, which, which.max, which.min

Attaching package: 'S4Vectors'
The following object is masked from 'package:base':

    expand.grid
Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: grid
library(GenomicRanges)
library(biomaRt)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
Loading required package: GenomicFeatures
Loading required package: AnnotationDbi
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.
library("IRanges")
library("dplyr")

Attaching package: 'dplyr'
The following object is masked from 'package:AnnotationDbi':

    select
The following object is masked from 'package:Biobase':

    combine
The following object is masked from 'package:biomaRt':

    select
The following objects are masked from 'package:GenomicRanges':

    intersect, setdiff, union
The following object is masked from 'package:GenomeInfoDb':

    intersect
The following objects are masked from 'package:IRanges':

    collapse, desc, intersect, setdiff, slice, union
The following objects are masked from 'package:S4Vectors':

    first, intersect, rename, setdiff, setequal, union
The following objects are masked from 'package:BiocGenerics':

    combine, intersect, setdiff, union
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library("data.table")

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:GenomicRanges':

    shift
The following object is masked from 'package:IRanges':

    shift
The following objects are masked from 'package:S4Vectors':

    first, second
library("GenomicAlignments")
Loading required package: SummarizedExperiment
Loading required package: DelayedArray
Loading required package: matrixStats

Attaching package: 'matrixStats'
The following object is masked from 'package:dplyr':

    count
The following objects are masked from 'package:Biobase':

    anyMissing, rowMedians

Attaching package: 'DelayedArray'
The following objects are masked from 'package:matrixStats':

    colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
The following object is masked from 'package:base':

    apply
Loading required package: Biostrings
Loading required package: XVector

Attaching package: 'Biostrings'
The following object is masked from 'package:DelayedArray':

    type
The following object is masked from 'package:base':

    strsplit
Loading required package: Rsamtools

Attaching package: 'GenomicAlignments'
The following object is masked from 'package:data.table':

    last
The following object is masked from 'package:dplyr':

    last

Upload data:

chr= "chr7"
gen= "hg19"

merged_data_7= DataTrack(range = "../data/bam_files_chr/merged_Net1_chr.bam", genome = gen, type = "h", name = "Merged", window = -1, chromosome = "chr7")
mayer_data_7= DataTrack(range = "../data/bam_files_chr/mayer_chr.bam", genome = gen, type = "h", name = "Mayer", window = -1, chromosome = "chr7")



refGenes <- UcscTrack(genome= gen, chromosome=chr, 
     track="RefSeq Genes", from = 5555158, to = 5581854, 
     trackType="GeneRegionTrack", rstarts="exonStarts", 
     rends="exonEnds", gene="name", symbol="name2", 
     transcript="name", strand="strand", fill="#800000", name="RefSeq Genes", showId=TRUE)
gtrack = GenomeAxisTrack()


itrack= IdeogramTrack(genome = gen, chromosome = chr)


#plots
plotTracks(list(itrack,gtrack, merged_data_7, mayer_data_7, refGenes), from = 5555158, to = 5581854,  background.title="darkblue",  background.panel = "#FFFEDB")

merged_anno_7= AnnotationTrack(range = "../data/bam_files_chr/merged_Net1_chr.bam", genome = gen, name = "Merged", window = -1, chromosome = chr)
mayer_anno_7= AnnotationTrack(range = "../data/bam_files_chr/mayer_chr.bam", genome = gen, name = "Mayer", window = -1, chromosome = chr)
plotTracks(list(merged_data_7, merged_anno_7), from = 5564779, to = 5572232)

plotTracks(list(mayer_data_7,mayer_anno_7), from = 5564779, to = 5572232)

Create a function for the plots

track_plot=function(chrom, from, to){
  gen= "hg19"
  chr= chrom
  merged_data= DataTrack(range = "../data/bam_files_chr/merged_Net1_chr.bam", genome = gen, type = "h", name = "Merged", window = -1, chromosome = chr)
  mayer_data= DataTrack(range = "../data/bam_files_chr/mayer_chr.bam", genome = gen, type = "h", name = "Mayer", window = -1, chromosome = chr)
  refGenes <- UcscTrack(genome= gen, chromosome=chr, 
     track="RefSeq Genes", from = from, to = to, 
     trackType="GeneRegionTrack", rstarts="exonStarts", 
     rends="exonEnds", gene="name", symbol="name2", 
     transcript="name", strand="strand", fill="#800000", name="RefSeq Genes", showId=TRUE)
  gtrack = GenomeAxisTrack()
  itrack= IdeogramTrack(genome = gen, chromosome = chr)
  plot= plotTracks(list(itrack,gtrack, merged_data, mayer_data, refGenes), from = from, to = to,  background.title="darkblue",  background.panel = "#FFFEDB")
  return(plot)
}
#plot_SRSF3= track_plot("chr6",36564332,36571507)
#plot_ENO1=track_plot("chr1",8919652,8940558 )
#plot_tars=track_plot("chr5",33438802,33468000)
#plot_CAXN= track_plot("chr5",179123129,179159838)
#future: change track size with sizes=c(5,1,5) commpand in plot track
#plot_ALDOA= track_plot("chr16",30080421,30082314)
  
#plot_XRCC5= track_plot("chr2", 216968869,217009667)

#plot_TUBB= track_plot("chr6",30684274,30698626)

Fix in terminal

Add the chr tag to the bam files.

samtools view -h SRR1575922-sort.bam | awk 'BEGIN{FS=OFS="\t"} (/^@/ && !/@SQ/){print $0} $2~/^SN:[1-9]|^SN:X|^SN:Y|^SN:MT/{print $0}  $3~/^[1-9]|X|Y|MT/{$3="chr"$3; print $0} ' | sed 's/SN:/SN:chr/g' | sed 's/chrMT/chrM/g' | samtools view -bS - > mayer_chr.bam

samtools view -h merged_Net1.bam | awk 'BEGIN{FS=OFS="\t"} (/^@/ && !/@SQ/){print $0} $2~/^SN:[1-9]|^SN:X|^SN:Y|^SN:MT/{print $0}  $3~/^[1-9]|X|Y|MT/{$3="chr"$3; print $0} ' | sed 's/SN:/SN:chr/g' | sed 's/chrMT/chrM/g' | samtools view -bS - > merged_Net1_chr.bam

index the bam files:

samtools index mayer_chr.bam
samtools index merged_Net1_chr.bam

Session information

sessionInfo()
R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.6

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
 [1] grid      parallel  stats4    stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] GenomicAlignments_1.14.1               
 [2] Rsamtools_1.30.0                       
 [3] Biostrings_2.46.0                      
 [4] XVector_0.18.0                         
 [5] SummarizedExperiment_1.8.0             
 [6] DelayedArray_0.4.1                     
 [7] matrixStats_0.52.2                     
 [8] data.table_1.10.4-3                    
 [9] dplyr_0.7.4                            
[10] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
[11] GenomicFeatures_1.30.0                 
[12] AnnotationDbi_1.40.0                   
[13] Biobase_2.38.0                         
[14] biomaRt_2.34.0                         
[15] Gviz_1.22.2                            
[16] GenomicRanges_1.30.0                   
[17] GenomeInfoDb_1.14.0                    
[18] IRanges_2.12.0                         
[19] S4Vectors_0.16.0                       
[20] BiocGenerics_0.24.0                    

loaded via a namespace (and not attached):
 [1] ProtGenerics_1.10.0           bitops_1.0-6                 
 [3] bit64_0.9-7                   RColorBrewer_1.1-2           
 [5] progress_1.1.2                httr_1.3.1                   
 [7] rprojroot_1.2                 tools_3.4.2                  
 [9] backports_1.1.2               R6_2.2.2                     
[11] rpart_4.1-11                  Hmisc_4.0-3                  
[13] DBI_0.7                       lazyeval_0.2.1               
[15] colorspace_1.3-2              nnet_7.3-12                  
[17] gridExtra_2.3                 prettyunits_1.0.2            
[19] RMySQL_0.10.13                bit_1.1-12                   
[21] curl_3.1                      compiler_3.4.2               
[23] git2r_0.19.0                  htmlTable_1.11.0             
[25] rtracklayer_1.38.2            scales_0.5.0                 
[27] checkmate_1.8.5               stringr_1.2.0                
[29] digest_0.6.13                 foreign_0.8-69               
[31] rmarkdown_1.8                 base64enc_0.1-3              
[33] dichromat_2.0-0               pkgconfig_2.0.1              
[35] htmltools_0.3.6               ensembldb_2.2.0              
[37] BSgenome_1.46.0               htmlwidgets_0.9              
[39] rlang_0.1.4                   rstudioapi_0.7               
[41] RSQLite_2.0                   BiocInstaller_1.28.0         
[43] shiny_1.0.5                   bindr_0.1                    
[45] BiocParallel_1.12.0           acepack_1.4.1                
[47] VariantAnnotation_1.24.2      RCurl_1.95-4.8               
[49] magrittr_1.5                  GenomeInfoDbData_0.99.1      
[51] Formula_1.2-2                 Matrix_1.2-12                
[53] Rcpp_0.12.14                  munsell_0.4.3                
[55] stringi_1.1.6                 yaml_2.1.16                  
[57] zlibbioc_1.24.0               plyr_1.8.4                   
[59] AnnotationHub_2.10.1          blob_1.1.0                   
[61] lattice_0.20-35               splines_3.4.2                
[63] knitr_1.17                    XML_3.98-1.9                 
[65] glue_1.2.0                    evaluate_0.10.1              
[67] biovizBase_1.26.0             latticeExtra_0.6-28          
[69] httpuv_1.3.5                  gtable_0.2.0                 
[71] purrr_0.2.4                   tidyr_0.7.2                  
[73] assertthat_0.2.0              ggplot2_2.2.1                
[75] mime_0.5                      xtable_1.8-2                 
[77] AnnotationFilter_1.2.0        survival_2.41-3              
[79] tibble_1.3.4                  memoise_1.1.0                
[81] bindrcpp_0.2                  cluster_2.0.6                
[83] interactiveDisplayBase_1.16.0

This R Markdown site was created with workflowr