<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Briana Mittleman" /> <meta name="date" content="2018-01-29" /> <title>Explore coverage within 3` UTRs</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-9.12.0/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs) { hljs.configure({languages: []}); hljs.initHighlightingOnLoad(); if (document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">Net-seq</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/brimittleman/Net-seq"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Explore coverage within 3` UTRs</h1> <h4 class="author"><em>Briana Mittleman</em></h4> <h4 class="date"><em>2018-01-29</em></h4> </div> <!-- The file analysis/chunks.R contains chunks that define default settings shared across the workflowr files. --> <!-- Update knitr chunk options --> <!-- Insert the date the file was last updated --> <p><strong>Last updated:</strong> 2018-01-31</p> <!-- Insert the code version (Git commit SHA1) if Git repository exists and R package git2r is installed --> <p><strong>Code version:</strong> 308d2b5</p> <!-- Add your analysis here --> <p>I have a file with the 3` UTRs and I want to extract coverage from the drop and dronc seq bam files that are in these UTR regions.</p> <p>I will use:</p> <ul> <li>bedtools coverage -d -a /project2/gilad/spott/dropBams/Day7_cardiomyocytes_drop_seq.bam -b /project2/gilad/briana/apa_sites/three_prime_utr.bed > drop7_cardio_3utr.txt</li> </ul> <p>This is taking too much memory. I am going to convert bam to a bed file then sort it.</p> <pre class="bash"><code>bedtools bamtobed -i /project2/gilad/spott/dropBams/Day7_cardiomyocytes_drop_seq.bam > Day7_cardiomyocytes_drop_seq.bed sort -k1,1 -k2,2n Day7_cardiomyocytes_drop_seq.bed > Day7_cardiomyocytes_drop_seq_sort.bed </code></pre> <p>Do this for the UTR file as well.</p> <pre class="bash"><code>sort -k1,1 -k2,2n three_prime_utr.bed > three_prime_utr_sort.bed </code></pre> <p>Now add the sorted to the coverage: Update the drop7_cardio_3UTR_cov.sh script</p> <pre class="bash"><code>bedtools coverage -d -sorted -a /project2/gilad/briana/apa_sites/Day7_cardiomyocytes_drop_seq_sort.bed -b /project2/gilad/briana/apa_sites/three_prime_utr_sort.bed > drop7_cardio_3utr.txt</code></pre> <p>Do this for one of 3’ seq lines too (LCL). The scipt is called lcl_3UTR_cov.sh.</p> <p>These files are not correct. They have coverage for a region rather than per base in each UTR. Try to switch a and b in the script.</p> <p>Now this seems correct. THe file has the UTR regions. A line for each base in the region in column 6 and the coverage at that base in column 7.</p> <ul> <li><p>LCL_3utrAB.bed</p></li> <li><p>drop7_cardio_3utrAB.bed</p></li> </ul> <p>I will continue this analysis on the Rstudio for midway because the coverage files are so big.</p> <div id="change-data-structure" class="section level3"> <h3>Change data structure:</h3> <p>I am now using a python dictionary to create a file that has the UTRs as keys and the values as a list of the read count for that UTR. The script is /project2/gilad/briana/apa_sites/code/cov_per_UTR.py</p> <p>To run this I have to make a bash script. I called it test.sh for now.</p> <p>It has the bash heading, module load python, and the following line to call the pythn script.</p> <p>‘python cov_per_UTR.py ../LCL_3utrAB.bed ../output/cov_at_UTRs_LCL.csv’</p> <p>This takes too long to run. I will up the time and add a step that creates a file from the UTR list. This will help me know if the keys are created partway through the run. The new py script is cov_per_UTR2.py and takes 3 arguments, data, a UTR_names file and a full output file. The bash script to run this is /project2/gilad/briana/apa_sites/code/utr_py_lcl.sh. I updates this file for the drop7 coverage file as well. The bash script for this is /project2/gilad/briana/apa_sites/code/utr_py_drop7.sh.</p> </div> <div id="use-the-group-by-function" class="section level3"> <h3>Use the group by function:</h3> <p>First seperate the files by pos and neg strand UTR</p> <pre class="bash"><code> awk '{if ($6 == "+") {print} }' drop7_cardio_3utrAB.bed > drop7_cardio_3utrAB.pos.bed awk '{if ($6 == "-") {print} }' drop7_cardio_3utrAB.bed > drop7_cardio_3utrAB.neg.bed awk '{if ($6 == "-") {print} }' LCL_3utrAB.bed > LCL_3utrAB.neg.bed awk '{if ($6 == "+") {print} }' LCL_3utrAB.bed > LCL_3utrAB.pos.bed </code></pre> <p>Use the group by bedtools function to group by the 7th column (base number in the UTR) and get the mean for the coverage at that base. This script is /project2/gilad/briana/apa_sites/groupby/per_base_mean.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=mean_UTR #SBATCH --time=2:00:00 #SBATCH --partition=gilad #SBATCH --mem=10G #SBATCH --mail-type=END #SBATCH --ntasks-per-node=2 module load Anaconda3 source activate net-seq bedtools groupby -i drop7_cardio_3utrAB.pos.bed -g 7 -c 8 -o mean > drop7_cardio_pos_mean.bed bedtools groupby -i drop7_cardio_3utrAB.neg.bed -g 7 -c 8 -o mean > drop7_cardio_neg_mean.bed bedtools groupby -i LCL_3utrAB.pos.bed -g 7 -c 8 -o mean > LCL_pos_mean.bed bedtools groupby -i LCL_3utrAB.neg.bed -g 7 -c 8 -o mean > LCL_neg_mean.bed</code></pre> <p>This doesnt work. I am going to subset the file and try this for 1 chromosome.</p> <ul> <li>less drop7_cardio_3utrAB.pos.bed |head -n 10000 |sort -k7,7n |bedtools groupby -i stdin -g 7 -c 8 -o mean | head -n 10000</li> </ul> <pre class="bash"><code>awk '{if ($1 == "chr21") {print} }' drop7_cardio_3utrAB.pos.bed > drop7_cardio_3utrAB.pos.chr21.bed</code></pre> <pre class="r"><code>drop7_3UTR_pos_chr21= read.table("../data/drop7_cardio_3utrAB.pos.chr21.bed", header=FALSE)</code></pre> <pre class="r"><code>library(dplyr)</code></pre> <pre><code> Attaching package: 'dplyr'</code></pre> <pre><code>The following objects are masked from 'package:stats': filter, lag</code></pre> <pre><code>The following objects are masked from 'package:base': intersect, setdiff, setequal, union</code></pre> <pre class="r"><code>group_by(drop7_3UTR_pos_chr21, V7) %>% summarise(sum=sum(V8)) -> UTR_21_cov plot(UTR_21_cov$sum, ylab="sum expression", xlab="UTR base position", main="Chr 21 3' UTR coverage Dropseq day 7")</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-8-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>utr_unique_pos= unique(drop7_3UTR_pos_chr21[,1:3]) summary(utr_unique_pos$V3-utr_unique_pos$V2)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 2 286 642 1269 1443 8907 </code></pre> <pre class="r"><code>plot(UTR_21_cov$sum, ylab="sum expression", xlab="UTR base position", main="Chr 21 3' UTR coverage Dropseq day 7", xlim = c(0,1000))</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-10-1.png" width="672" style="display: block; margin: auto;" /></p> <p>Do this on the negative strand.</p> <pre class="r"><code>drop7_3UTR_neg_chr21= read.table("../data/drop7_cardio_3utrAB.neg.chr21.bed", header=FALSE) group_by(drop7_3UTR_neg_chr21, V7) %>% summarise(sum=sum(V8)) -> UTR_21neg_cov plot(UTR_21neg_cov$sum, ylab="sum expression", xlab="UTR base position", main="Chr 21 3' UTR coverage Dropseq day 7")</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-11-1.png" width="672" style="display: block; margin: auto;" /></p> <p>Average utr size:</p> <pre class="r"><code>utr_unique= unique(drop7_3UTR_neg_chr21[,1:3]) summary(utr_unique$V3-utr_unique$V2)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 2.0 102.0 166.0 564.4 414.0 10781.0 </code></pre> <p>Subset plot wit this information:</p> <pre class="r"><code>plot(UTR_21neg_cov$sum, ylab="sum expression", xlab="UTR base position- negative strand", main="Chr 21 3' UTR coverage Dropseq day 7", xlim=c(0,1000))</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-13-1.png" width="672" style="display: block; margin: auto;" /></p> <p>With the 3` seq:</p> <pre class="r"><code>LCL_3UTR_neg_chr20= read.table("../data/LCL_3utrAB.neg.chr20.bed", header=FALSE) group_by(LCL_3UTR_neg_chr20, V7) %>% summarise(sum=sum(V8)) -> LCL_UTR_20_cov plot(LCL_UTR_20_cov$sum, ylab="mean expression", xlab="UTR base position", main="Chr 20 3' UTR coverage LCL")</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-14-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>utr_lclneg_unique= unique(LCL_3UTR_neg_chr20[,1:3]) summary(utr_lclneg_unique$V3-utr_lclneg_unique$V2)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 2.0 101.0 172.0 582.8 576.2 9106.0 </code></pre> <p>Plot a little past the median:</p> <pre class="r"><code>plot(LCL_UTR_20_cov$sum, ylab="mean expression", xlab="UTR base position", main="Chr 20 3' UTR coverage LCL", xlim=c(0,200))</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-16-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>LCL_3UTR_pos_chr1= read.table("../data/LCL_3utrAB_pos.chr1.bed", header=FALSE) group_by(LCL_3UTR_pos_chr1, V7) %>% summarise(sum=sum(V8)) -> LCL_UTR_1_cov plot(LCL_UTR_1_cov$sum, ylab="mean expression", xlab="UTR base position", main="Chr 1 3' UTR coverage LCL")</code></pre> <p><img src="figure/UTR_coverage.Rmd/unnamed-chunk-17-1.png" width="672" style="display: block; margin: auto;" /></p> </div> <div id="session-information" class="section level2"> <h2>Session information</h2> <!-- Insert the session information into the document --> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.4.2 (2017-09-28) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.6 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] dplyr_0.7.4 loaded via a namespace (and not attached): [1] Rcpp_0.12.15 assertthat_0.2.0 digest_0.6.14 rprojroot_1.3-2 [5] R6_2.2.2 backports_1.1.2 git2r_0.21.0 magrittr_1.5 [9] evaluate_0.10.1 pillar_1.1.0 rlang_0.1.6 stringi_1.1.6 [13] bindrcpp_0.2 rmarkdown_1.8.5 tools_3.4.2 stringr_1.2.0 [17] glue_1.2.0 yaml_2.1.16 compiler_3.4.2 pkgconfig_2.0.1 [21] htmltools_0.3.6 bindr_0.1 knitr_1.18 tibble_1.4.2 </code></pre> </div> <hr> <p> This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>