<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Briana Mittleman" /> <meta name="date" content="2017-11-09" /> <title>Use the genomation package to vizualize genome features and see overlap with Net-seq1</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-1.1/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-1.1/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs && document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">Net-seq</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/brimittleman/Net-seq"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Use the genomation package to vizualize genome features and see overlap with Net-seq1</h1> <h4 class="author"><em>Briana Mittleman</em></h4> <h4 class="date"><em>2017-11-09</em></h4> </div> <!-- The file analysis/chunks.R contains chunks that define default settings shared across the workflowr files. --> <!-- Update knitr chunk options --> <!-- Insert the date the file was last updated --> <p><strong>Last updated:</strong> 2017-11-29</p> <!-- Insert the code version (Git commit SHA1) if Git repository exists and R package git2r is installed --> <p><strong>Code version:</strong> ff2076f</p> <p>I will look at genome feature overlap with the Netseq1 libary. I will start with one sample then move to more once the code is working. I will look specifically at overlap with CTCF sites.</p> <p>Load libraries:</p> <pre class="r"><code>library(genomation)</code></pre> <pre><code>Loading required package: grid</code></pre> <pre class="r"><code>library(genomationData)</code></pre> <div id="load-gata" class="section level3"> <h3>Load gata:</h3> <pre class="r"><code>CTCF_gen= readGeneric(file = "../data/hg19.GM72.CTCF", header=FALSE, keep.all.metadata = TRUE) #bed file for one of my libraries # to get from midway #scp brimittleman@midway2.rcc.uchicago.edu:/project2/gilad/briana/Net-seq/Net-seq1/data/cov/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001-sort.dedup.cov.bed . dedup.18486.dep= readGeneric(file= "../data/net1_18486_dep_dedup_chr.bed", header = FALSE, keep.all.metadata = TRUE)</code></pre> </div> <div id="data-extaction" class="section level3"> <h3>Data extaction:</h3> <p>Try to get the density of net-seq reads around CTCF sites</p> <p>fix formating of file with</p> <pre class="bash"><code>#awk '$0="chr"$0' file > newfile</code></pre> <p>Need to do this with the reads not the coverage file. I used the samtools bamtobed function on one of the dedup files and copied this to the data file here.</p> <pre class="r"><code>peak.annot = annotateWithFeature(CTCF_gen, dedup.18486.dep, intersect.chr = TRUE)</code></pre> <pre><code>intersecting chromosomes...</code></pre> <pre class="r"><code>peak.annot</code></pre> <pre><code>summary of target set annotation with feature annotation:</code></pre> <pre><code>Rows in target set: 175659</code></pre> <pre><code>----------------------------</code></pre> <pre><code>percentage of target elements overlapping with features:</code></pre> <pre><code>dedup.18486.dep other 11.03 88.97 </code></pre> <pre><code></code></pre> <pre><code>percentage of feature elements overlapping with target:</code></pre> <pre><code>[1] 17.52</code></pre> <pre><code></code></pre> <p>I need to compare this to another feature. I will look at the genes file.</p> <pre class="r"><code>genes= readGeneric(file= "../data/hg19.ref.genes.bed", header = FALSE, keep.all.metadata = TRUE) head(genes)</code></pre> <pre><code>GRanges object with 6 ranges and 9 metadata columns: seqnames ranges strand | V4 V5 <Rle> <IRanges> <Rle> | <character> <integer> [1] chr1 [66999251, 67216822] * | NM_001308203 0 [2] chr1 [66999638, 67216822] * | NM_001350217 0 [3] chr1 [66999638, 67216822] * | NM_001350218 0 [4] chr1 [66999638, 67216822] * | NM_032291 0 [5] chr1 [25071759, 25170815] * | NM_013943 0 [6] chr1 [33546713, 33586132] * | NM_001293562 0 V6 V7 V8 V9 V10 <character> <integer> <integer> <integer> <integer> [1] + 67000041 67208778 0 22 [2] + 67000041 67208778 0 25 [3] + 67000041 67208778 0 23 [4] + 67000041 67208778 0 25 [5] + 25072044 25167428 0 6 [6] + 33547850 33585783 0 11 V11 <character> [1] 104,123,64,25,57,55,176,25,52,86,93,75,128,127,66,112,156,133,203,65,165,8067, [2] 413,64,25,84,57,55,176,12,12,25,52,86,93,75,501,128,127,60,112,156,133,203,65,165,8067, [3] 413,64,25,57,55,176,12,12,25,52,86,93,75,128,127,60,112,156,133,203,65,165,8067, [4] 413,64,25,72,57,55,176,12,12,25,52,86,93,75,501,128,127,60,112,156,133,203,65,165,8067, [5] 357,110,126,107,182,3552, [6] 182,118,177,174,173,135,166,163,113,215,488, V12 <character> [1] 0,677,92278,99501,106208,109241,109975,137426,138375,139712,143435,146109,155579,156621,160870,185725,195695,200179,205766,207089,207703,209504, [2] 0,91891,99114,100124,105821,108854,109588,126557,133574,137039,137988,139325,143048,145722,147913,155192,156234,161478,185338,195308,199792,205379,206702,207316,209117, [3] 0,91891,99114,105821,108854,109588,126557,133574,137039,137988,139325,143048,145722,155192,156234,161478,185338,195308,199792,205379,206702,207316,209117, [4] 0,91891,99114,101988,105821,108854,109588,126557,133574,137039,137988,139325,143048,145722,147913,155192,156234,161478,185338,195308,199792,205379,206702,207316,209117, [5] 0,52473,68825,81741,94591,95504, [6] 0,278,1065,2841,10937,12169,13435,15594,16954,36789,38931, ------- seqinfo: 54 sequences from an unspecified genome; no seqlengths</code></pre> <pre class="r"><code>peak.annot_genes = annotateWithFeature(genes, dedup.18486.dep, intersect.chr = TRUE)</code></pre> <pre><code>intersecting chromosomes...</code></pre> <pre><code>Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrM, chr1_gl000191_random, chr1_gl000192_random, chr4_ctg9_hap1, chr4_gl000193_random, chr4_gl000194_random, chr6_apd_hap1, chr6_cox_hap2, chr6_dbb_hap3, chr6_mann_hap4, chr6_mcf_hap5, chr6_qbl_hap6, chr6_ssto_hap7, chr7_gl000195_random, chr17_ctg5_hap1, chr17_gl000205_random, chr19_gl000209_random, chrUn_gl000211, chrUn_gl000212, chrUn_gl000213, chrUn_gl000215, chrUn_gl000218, chrUn_gl000219, chrUn_gl000220, chrUn_gl000222, chrUn_gl000223, chrUn_gl000224, chrUn_gl000227, chrUn_gl000228, chrUn_gl000241 - in 'y': chrMT Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).</code></pre> <pre><code>Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrMT - in 'y': chrM, chr1_gl000191_random, chr1_gl000192_random, chr4_ctg9_hap1, chr4_gl000193_random, chr4_gl000194_random, chr6_apd_hap1, chr6_cox_hap2, chr6_dbb_hap3, chr6_mann_hap4, chr6_mcf_hap5, chr6_qbl_hap6, chr6_ssto_hap7, chr7_gl000195_random, chr17_ctg5_hap1, chr17_gl000205_random, chr19_gl000209_random, chrUn_gl000211, chrUn_gl000212, chrUn_gl000213, chrUn_gl000215, chrUn_gl000218, chrUn_gl000219, chrUn_gl000220, chrUn_gl000222, chrUn_gl000223, chrUn_gl000224, chrUn_gl000227, chrUn_gl000228, chrUn_gl000241 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).</code></pre> <pre class="r"><code>peak.annot_genes</code></pre> <pre><code>summary of target set annotation with feature annotation:</code></pre> <pre><code>Rows in target set: 66635</code></pre> <pre><code>----------------------------</code></pre> <pre><code>percentage of target elements overlapping with features:</code></pre> <pre><code>dedup.18486.dep other 86.97 13.03 </code></pre> <pre><code></code></pre> <pre><code>percentage of feature elements overlapping with target:</code></pre> <pre><code>[1] 60.91</code></pre> <pre><code></code></pre> <p>The genes file has a lot of random chr tags at the end.</p> <pre class="r"><code>sm = ScoreMatrixBin(target = dedup.18486.dep, windows = CTCF_gen, bin.num = 50)</code></pre> <pre><code>Warning in .local(target, windows, bin.num, bin.op, strand.aware): 32 windows fall off the target</code></pre> <pre><code>Warning in .local(target, windows, bin.num, bin.op, strand.aware): supplied GRanges object contains ranges of width < number of bins</code></pre> <pre class="r"><code>heat.matrix= heatMatrix(sm, xcoords = c(-1000, 1000))</code></pre> <p><img src="figure/visualize_genomefeatures.Rmd/unnamed-chunk-6-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>heat.matrix</code></pre> <pre><code>NULL</code></pre> <pre class="r"><code>plotMeta(sm, xcoords = c(-1000, 1000))</code></pre> <p><img src="figure/visualize_genomefeatures.Rmd/unnamed-chunk-6-2.png" width="672" style="display: block; margin: auto;" /></p> </div> <div id="metaplot-for-tss" class="section level3"> <h3>Metaplot for TSS</h3> <pre class="r"><code>TSS= readGeneric(file = "../data/SwitchGear_TSS.bed", header=FALSE, keep.all.metadata = TRUE) dedup.18486.dep= readGeneric(file= "../data/net1_18486_dep_dedup_chr.bed", header = FALSE, keep.all.metadata = TRUE) annotateWithFeature(TSS, dedup.18486.dep, intersect.chr = TRUE)</code></pre> <pre><code>intersecting chromosomes...</code></pre> <pre><code>Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrM - in 'y': chrMT Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).</code></pre> <pre><code>Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrMT - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).</code></pre> <pre><code>summary of target set annotation with feature annotation:</code></pre> <pre><code>Rows in target set: 131746</code></pre> <pre><code>----------------------------</code></pre> <pre><code>percentage of target elements overlapping with features:</code></pre> <pre><code>dedup.18486.dep other 6.61 93.39 </code></pre> <pre><code></code></pre> <pre><code>percentage of feature elements overlapping with target:</code></pre> <pre><code>[1] 4.53</code></pre> <pre><code></code></pre> <pre class="r"><code>#sm2 = ScoreMatrixBin(target = dedup.18486.dep, windows = TSS) #plotMeta(sm2, xcoords = c(-1000, 1000))</code></pre> <p>Problem: This TSS file just has the 1bp site. I need to make a file that is 200 up and downstream of this site. Maybe first try the promoter file that was 250bp upstream of the gene.</p> <pre class="r"><code>ref_250up= readGeneric(file = "../data/refseq_250up.bed", header=FALSE, keep.all.metadata = TRUE) dedup.18486.dep= readGeneric(file= "../data/net1_18486_dep_dedup_chr.bed", header = FALSE, keep.all.metadata = TRUE) sm2 = ScoreMatrixBin(target = dedup.18486.dep, windows = ref_250up)</code></pre> <pre><code>Warning in .local(target, windows, bin.num, bin.op, strand.aware): 3100 windows fall off the target</code></pre> <pre class="r"><code>plotMeta(sm2, xcoords = c(-1000, 1000), main="Windows are 250bp upstream")</code></pre> <p><img src="figure/visualize_genomefeatures.Rmd/unnamed-chunk-8-1.png" width="672" style="display: block; margin: auto;" /></p> </div> <div id="session-information" class="section level2"> <h2>Session information</h2> <!-- Insert the session information into the document --> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.4.2 (2017-09-28) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.6 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] grid stats graphics grDevices utils datasets methods [8] base other attached packages: [1] genomationData_1.10.0 genomation_1.10.0 loaded via a namespace (and not attached): [1] Rcpp_0.12.13 plyr_1.8.4 [3] compiler_3.4.2 git2r_0.19.0 [5] GenomeInfoDb_1.14.0 XVector_0.18.0 [7] bitops_1.0-6 tools_3.4.2 [9] zlibbioc_1.24.0 digest_0.6.12 [11] gridBase_0.4-7 tibble_1.3.4 [13] gtable_0.2.0 evaluate_0.10.1 [15] BSgenome_1.46.0 lattice_0.20-35 [17] rlang_0.1.4 Matrix_1.2-11 [19] DelayedArray_0.4.1 yaml_2.1.14 [21] parallel_3.4.2 GenomeInfoDbData_0.99.1 [23] rtracklayer_1.38.0 stringr_1.2.0 [25] knitr_1.17 hms_0.3 [27] Biostrings_2.46.0 S4Vectors_0.16.0 [29] IRanges_2.12.0 stats4_3.4.2 [31] rprojroot_1.2 data.table_1.10.4-3 [33] impute_1.52.0 Biobase_2.38.0 [35] R6_2.2.2 plotrix_3.6-6 [37] XML_3.98-1.9 BiocParallel_1.12.0 [39] seqPattern_1.10.0 rmarkdown_1.6 [41] reshape2_1.4.2 readr_1.1.1 [43] ggplot2_2.2.1 magrittr_1.5 [45] scales_0.5.0 backports_1.1.1 [47] Rsamtools_1.30.0 htmltools_0.3.6 [49] matrixStats_0.52.2 BiocGenerics_0.24.0 [51] GenomicRanges_1.30.0 GenomicAlignments_1.14.0 [53] SummarizedExperiment_1.8.0 colorspace_1.3-2 [55] KernSmooth_2.23-15 stringi_1.1.5 [57] lazyeval_0.2.1 munsell_0.4.3 [59] RCurl_1.95-4.8 </code></pre> </div> <hr> <p> This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>