<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Briana Mittleman" /> <meta name="date" content="2017-11-30" /> <title>Check read counts in binned genome</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-1.1/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-1.1/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs && document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">Net-seq</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/brimittleman/Net-seq"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Check read counts in binned genome</h1> <h4 class="author"><em>Briana Mittleman</em></h4> <h4 class="date"><em>2017-11-30</em></h4> </div> <!-- The file analysis/chunks.R contains chunks that define default settings shared across the workflowr files. --> <!-- Update knitr chunk options --> <!-- Insert the date the file was last updated --> <p><strong>Last updated:</strong> 2017-12-04</p> <!-- Insert the code version (Git commit SHA1) if Git repository exists and R package git2r is installed --> <p><strong>Code version:</strong> a63bb5f</p> <!-- Add your analysis here --> <div id="bash-script" class="section level3"> <h3>Bash script</h3> <p>Split genome into 200bp windows and run the coverage command:</p> <p>/project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed</p> <p>/project2/gilad/briana/Net-seq/ref_genes/windows_200</p> <p>make window_200_cov.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=window_200_cov #SBATCH --time=8:00:00 #SBATCH --partition=broadwl #SBATCH --mem=50G #SBATCH --tasks-per-node=4 #SBATCH --mail-type=END bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18486_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/windows_200/window_200_cov_18486.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18508_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/windows_200/window_200_cov_18508_dep.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18508_nondep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/windows_200/window_200_cov_18508_nondep.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_19238_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/windows_200/window_200_cov_19238.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/Net-seq/data/bed_sort/mayer_SRR1575922_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/windows_200/window_200_cov_mayer.txt #bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed/merged_Net1.chr.bed > /project2/gilad/briana/Net-seq/ref_genes/windows_200/window_200_cov_merged.txt #step memory exceeded!</code></pre> </div> <div id="import-data" class="section level3"> <h3>Import data</h3> <pre class="r"><code>window_200_18486=read.csv("../data/windows_200/window_200_cov_18486.txt", header=FALSE, sep="\t") window_200_18508_dep=read.csv("../data/windows_200/window_200_cov_18508_dep.txt", header=FALSE, sep="\t") window_200_18508_nondep=read.csv("../data/windows_200/window_200_cov_18508_nondep.txt", header=FALSE, sep="\t") window_200_19238=read.csv("../data/windows_200/window_200_cov_19238.txt", header=FALSE, sep="\t") window_200_mayer= read.csv("../data/windows_200/window_200_cov_mayer.txt", header=FALSE, sep="\t")</code></pre> <p>Add col labels to each file:</p> <pre class="r"><code>colnames(window_200_18486) = c("chr", "start", "end", "count") colnames(window_200_18508_dep) = c("chr", "start", "end", "count") colnames(window_200_18508_nondep) = c("chr", "start", "end", "count") colnames(window_200_19238) = c("chr", "start", "end", "count") colnames(window_200_mayer) = c("chr", "start", "end", "count")</code></pre> </div> <div id="plot-data" class="section level3"> <h3>Plot data</h3> <p>Data I want to look at:</p> <ul> <li>summary per library</li> </ul> <pre class="r"><code>summary(window_200_18486$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 1 0 4076520 </code></pre> <pre class="r"><code>summary(window_200_18508_dep$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 21 0 27069584 </code></pre> <pre class="r"><code>summary(window_200_18508_nondep$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 23 0 30571781 </code></pre> <pre class="r"><code>summary(window_200_19238$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 4 0 13033253 </code></pre> <pre class="r"><code>summary(window_200_mayer$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0 0.0 0.0 113.3 0.0 701170.0 </code></pre> <p>Use a plot to see the distribution:</p> <ul> <li>summaries not including zero</li> </ul> <p>Make dataframe excluding the zeros:</p> <pre class="r"><code>window_200_18486_non0= window_200_18486[window_200_18486$count!=0,] window_200_18508_dep_non0= window_200_18508_dep[window_200_18508_dep$count!=0,] window_200_18508_nondep_non0= window_200_18508_nondep[window_200_18508_nondep$count!=0,] window_200_19238_non0= window_200_19238[window_200_19238$count!=0,] window_200_mayer_non0= window_200_mayer[window_200_mayer$count!=0,]</code></pre> <p>summarise</p> <pre class="r"><code>summary(window_200_18486_non0$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 1 1 1 45 3 4076520 </code></pre> <pre class="r"><code>summary(window_200_18508_dep_non0$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 1 1 1 624 3 27069584 </code></pre> <pre class="r"><code>summary(window_200_18508_nondep_non0$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 1 1 1 633 3 30571781 </code></pre> <pre class="r"><code>summary(window_200_19238_non0$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 1 1 1 149 3 13033253 </code></pre> <pre class="r"><code>summary(window_200_mayer_non0$count)</code></pre> <pre><code> Min. 1st Qu. Median Mean 3rd Qu. Max. 1 1 1 2200 2 701170 </code></pre> <pre class="r"><code>plot(sort(log(window_200_19238_non0$count), decreasing=TRUE))</code></pre> <p><img src="figure/bin_windows.Rmd/unnamed-chunk-7-1.png" width="672" style="display: block; margin: auto;" /></p> <ul> <li>number of entries that are non zero</li> </ul> <pre class="r"><code>x= nrow(window_200_18486) barplot(c(nrow(window_200_18486_non0)/x,nrow(window_200_18508_dep_non0)/x,nrow(window_200_18508_nondep_non0)/x, nrow(window_200_19238_non0)/x), main="Proportion of detected bins", names=c("18486", "18508 \n dep", "18508 \n nondep", "19238"))</code></pre> <p><img src="figure/bin_windows.Rmd/unnamed-chunk-8-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>nrow(window_200_mayer_non0)/x</code></pre> <pre><code>[1] 0.05149546</code></pre> </div> <div id="integrate-sets" class="section level3"> <h3>Integrate sets:</h3> <pre class="r"><code>window_non_0_all= nrow(window_200_18486[window_200_18486$count!=0 | window_200_18508_dep$count!=0 | window_200_18486$count!=0,] ) prop_window_non0= window_non_0_all/x prop_window_non0</code></pre> <pre><code>[1] 0.05416194</code></pre> <pre class="r"><code>window_non_0_2= nrow(window_200_18486[window_200_18486$count!=0 | window_200_18508_dep$count!=0,] ) prop_window_non0_2= window_non_0_2/x prop_window_non0_2</code></pre> <pre><code>[1] 0.05416194</code></pre> <pre class="r"><code>window_non_0_2b= nrow(window_200_18486[window_200_18486$count!=0 |window_200_18486$count!=0,] ) prop_window_non0_2b= window_non_0_2b/x prop_window_non0_2b</code></pre> <pre><code>[1] 0.03282309</code></pre> </div> <div id="explore-bin-overlap" class="section level3"> <h3>Explore bin overlap</h3> <p>I will combine the depleted samples counts in one data frame:</p> <pre class="r"><code>window_200_3lib= cbind(window_200_18486, window_200_18508_dep$count, window_200_19238$count) colnames(window_200_3lib)= c("chr", "start", "end", "18486", "18508", "19238")</code></pre> <p>Make a vector with how many of the libraries have coverage for each bin</p> <pre class="r"><code>sum_vec= apply(window_200_3lib[,4:6],1,function(x)sum(x>=1)) window_200_3lib= cbind(window_200_3lib, sum_vec)</code></pre> <p>Explore the results:</p> <pre class="r"><code>bin_0= sum(sum_vec==0) bin_1= sum(sum_vec==1) bin_2= sum(sum_vec==2) bin_3= sum(sum_vec==3) prop0=bin_0/x prop1=bin_1/x prop2=bin_2/x prop3=bin_3/x barplot_prop=(barplot(c(prop1, prop2, prop3),names = c("1 bin", "2 bins", "3 bins"), main="Proportion of bins with coverage in 3 libaries", ylab="proportion", xlab="library" ))</code></pre> <p><img src="figure/bin_windows.Rmd/unnamed-chunk-14-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>prop_vec=c(prop0,prop1, prop2, prop3) names_vec= c("0 bins","1 bin", "2 bins", "3 bins") prop_table=rbind(names_vec,prop_vec) prop_table</code></pre> <pre><code> [,1] [,2] [,3] names_vec "0 bins" "1 bin" "2 bins" prop_vec "0.934702546224945" "0.0453430987382321" "0.0115298190107529" [,4] names_vec "3 bins" prop_vec "0.00842453602607038"</code></pre> </div> <div id="perform-on-mayer-data-or-multiple-lanes" class="section level3"> <h3>Perform on mayer data or multiple lanes</h3> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=window_200_cov_mayer #SBATCH --time=8:00:00 #SBATCH --partition=broadwl #SBATCH --mem=50G #SBATCH --tasks-per-node=4 #SBATCH --mail-type=END module load bedtools bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_200bp_window.bed -b /project2/gilad/briana/mayer.data/mayer_hek/data/bed/mayer_hek-sort.chr.bed > /project2/gilad/briana/mayer.data/mayer_hek/data/window_200_cov_mayer_hek.txt</code></pre> <p>Not enough memory</p> </div> <div id="bin-the-gene-file" class="section level3"> <h3>Bin the gene file:</h3> <pre class="bash"><code>bedtools makewindows -b ref_seq_gene_hg19 -w 200 > ref_seq_gene_hg16_bins.bed</code></pre> <p>Make a bash script to get coverage in these bins. This is in /project2/gilad/briana/Net-seq/ref_genes/gene_windows_200/gene_window_200.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=gene_window_200_cov #SBATCH --time=8:00:00 #SBATCH --partition=broadwl #SBATCH --mem=30G #SBATCH --tasks-per-node=4 #SBATCH --mail-type=END bedtools coverage -counts -a /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg16_bins.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18486_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/gene_windows_200/gene_window_cov_18486.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg16_bins.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18508_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/gene_windows_200/gene_window_cov_18508_dep.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg16_bins.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_19238_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/gene_windows_200/gene_window_cov_19238.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg16_bins.bed -b /project2/gilad/briana/Net-seq/data/bed_sort/mayer_SRR1575922_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/gene_windows_200/gene_window_cov_mayer.txt </code></pre> <p><strong>This file has 22071951 bins. This is not a good representation because some locations in the genome are in multiple genes. I need to think about this more. Maybe I can only keep unique windows.</strong></p> <p>``ref_seq_gene_hg16_bins.bed | uniq -u | wc -l```</p> <p><strong>This leaves 5023160 lines. Still not right because the genes start at different positions but overlap then the lines wouldnt be unique.</strong></p> <p>Use bedtools intersect -a genome -b genes -wa:</p> <ul> <li><p>a is the genome windows</p></li> <li><p>b is the gene file</p></li> </ul> <p>This should keep the windows of a that intersect b. This means I will only have windows that contain a gene.</p> <pre class="bash"><code> bedtools intersect -a /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_sort_2.bed -b /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg19_small.bed -wa > /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_with_gene.bed </code></pre> <p>ERROR:</p> <p>ERROR: Received illegal bin number 262143 from getBin call.<br /> ERROR: Unable to add record to tree</p> <p>Potential problem: hg19 file starts with chrom 10 and the gene file starts with chr1. Maybe try sorting the gene file the way I sorted the hg19 file.</p> <p>Get the chr# list from the genes file using:</p> <p><code>cut -f 1 | uniq /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg19_small.bed > names.txt</code></p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=sortgene #SBATCH --time=8:00:00 #SBATCH --partition=broadwl #SBATCH --mem=20G #SBATCH --tasks-per-node=4 #SBATCH --mail-type=END module load bedtools cd /project2/gilad/briana/Net-seq/genome_bed bedtools sort -faidx names_uniq.txt -i hg19_200bp_window.bed -o hg19_windows_sort.bed</code></pre> <p>ERROR: Chromosome “chr1” undefined in names_uniq.txt</p> <p>Try:</p> <p><code>sort -k 1,1 -k2,2n hg19_200bp_window.bed > hg19_windows_sort_2.bed</code></p> <p>Cut the gene file to only have the first 3 columns with:</p> <p><code>cut -f 1,2,3 ref_seq_gene_hg19 > ref_seq_gene_hg19_3col.bed</code></p> <p>Run intersect:</p> <pre class="bash"><code>bedtools intersect -a /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_sort_2.bed -b /project2/gilad/briana/Net-seq/ref_genes/ref_seq_gene_hg19_3col.bed -wa > /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_with_gene.bed </code></pre> <p>This worked but you still have multiples. I will take the unique lines of this with:</p> <p><code>cat hg19_windows_with_gene.bed | uniq > hg19_windows_with_gene_uniq.bed</code></p> <p>Now I can run the coverage counts on this file to see the how many of the genome bins that had at least one gene have coverage.</p> <p>Call this script. uniq_gene_window.sh in /project2/gilad/briana/Net-seq/ref_genes/uniq_gene_windows_200</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=uniq_gene_window #SBATCH --time=8:00:00 #SBATCH --partition=broadwl #SBATCH --mem=30G #SBATCH --tasks-per-node=4 #SBATCH --mail-type=END bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_with_gene_uniq.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18486_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/uniq_gene_windows_200/uniq_gene_window_cov_18486.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_with_gene_uniq.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_18508_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/uniq_gene_windows_200/uniq_gene_window_cov_18508_dep.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_with_gene_uniq.bed -b /project2/gilad/briana/Net-seq/Net-seq1/data/bed_sort/net1_19238_dep_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/uniq_gene_windows_200/uniq_gene_window_cov_19238.txt bedtools coverage -counts -a /project2/gilad/briana/Net-seq/genome_bed/hg19_windows_with_gene_uniq.bed -b /project2/gilad/briana/Net-seq/data/bed_sort/mayer_SRR1575922_chr_sort.bed > /project2/gilad/briana/Net-seq/ref_genes/uniq_gene_windows_200/uniq_gene_window_cov_mayer.txt </code></pre> <div id="load-uniq-gene-files" class="section level4"> <h4>Load uniq gene files:</h4> <p>These are the uniq bind in the genome 200bp window file that include a gene.</p> <pre class="r"><code>uniq_gene_window_18486=read.csv("../data/uniq_genes/uniq_gene_window_cov_18486.txt", header=FALSE, sep="\t") uniq_gene_window_18508=read.csv("../data/uniq_genes/uniq_gene_window_cov_18508_dep.txt", header=FALSE, sep="\t") uniq_gene_window_19238=read.csv("../data/uniq_genes/uniq_gene_window_cov_19238.txt", header=FALSE, sep="\t") uniq_gene_window_mayer=read.csv("../data/uniq_genes/uniq_gene_window_cov_mayer.txt", header=FALSE, sep="\t")</code></pre> <pre class="r"><code>colnames(uniq_gene_window_mayer)= c("chr", "start", "end", "count") colnames(uniq_gene_window_19238)= c("chr", "start", "end", "count") colnames(uniq_gene_window_18486)= c("chr", "start", "end", "count") colnames(uniq_gene_window_18508)= c("chr", "start", "end", "count")</code></pre> <p>Look at the number of windows with coverage:</p> <pre class="r"><code>uniq_gene_window_18486_no0= uniq_gene_window_18486[uniq_gene_window_18486$count!=0,] uniq_gene_window_18508_no0= uniq_gene_window_18508[uniq_gene_window_18508$count!=0,] uniq_gene_window_19238_no0= uniq_gene_window_19238[uniq_gene_window_19238$count!=0,] uniq_gene_window_mayer_no0= uniq_gene_window_mayer[uniq_gene_window_mayer$count!=0,]</code></pre> <p>Bar plot for coverage:</p> <pre class="r"><code>gene_windows= nrow(uniq_gene_window_mayer) barplot(c(nrow(uniq_gene_window_18486_no0)/gene_windows, nrow(uniq_gene_window_18508_no0)/gene_windows, nrow(uniq_gene_window_19238_no0)/gene_windows, nrow(uniq_gene_window_mayer_no0)/gene_windows), main="Coverage for windows with genes", names= c("18486", "18508", "19238", "Mayer"), xlab="Library")</code></pre> <p><img src="figure/bin_windows.Rmd/unnamed-chunk-25-1.png" width="672" style="display: block; margin: auto;" /></p> <pre class="r"><code>mayer_gene_coverage=nrow(uniq_gene_window_mayer_no0)/gene_windows</code></pre> <p>Mayer has more coverage for this parameter. Their library has 0.0954862.</p> <p>Ours:</p> <ul> <li><p>18486: 0.0495328</p></li> <li><p>18508: 0.0460654</p></li> <li><p>19238: 0.0346009</p></li> </ul> </div> </div> <div id="session-information" class="section level3"> <h3>Session information</h3> <!-- Insert the session information into the document --> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.4.2 (2017-09-28) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.6 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base loaded via a namespace (and not attached): [1] compiler_3.4.2 backports_1.1.1 magrittr_1.5 rprojroot_1.2 [5] tools_3.4.2 htmltools_0.3.6 yaml_2.1.14 Rcpp_0.12.13 [9] stringi_1.1.5 rmarkdown_1.6 knitr_1.17 git2r_0.19.0 [13] stringr_1.2.0 digest_0.6.12 evaluate_0.10.1</code></pre> </div> <hr> <p> This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>