<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Brina Mittleman" />

<meta name="date" content="2017-11-06" />

<title>Initial Data Exploration Netseq1</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-1.1/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-1.1/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs && document.readyState && document.readyState === "complete") {
   window.setTimeout(function() {
      hljs.initHighlighting();
   }, 0);
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->




<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}


.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
  padding-left: 25px;
  text-indent: 0;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>

<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-xs-12 col-sm-8 col-md-9">




<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">Net-seq</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="index.html">Home</a>
</li>
<li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/brimittleman/Net-seq">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Initial Data Exploration Netseq1</h1>
<h4 class="author"><em>Brina Mittleman</em></h4>
<h4 class="date"><em>2017-11-06</em></h4>

</div>


<!-- The file analysis/chunks.R contains chunks that define default settings
shared across the workflowr files. -->
<!-- Update knitr chunk options -->
<!-- Insert the date the file was last updated -->
<p><strong>Last updated:</strong> 2017-11-27</p>
<!-- Insert the code version (Git commit SHA1) if Git repository exists and R
 package git2r is installed -->
<p><strong>Code version:</strong> c9cf54b</p>
<!-- Add your analysis here -->
<p>I will use this analysis to look at inital data QC and points of interests.</p>
<p>First I looked at the number of reads that mapp to the genome before and after deduplication UMI steps.</p>
<p><code>samtools view -c -F 4</code></p>
<p>For flag info: <a href="https://broadinstitute.github.io/picard/explain-flags.html" class="uri">https://broadinstitute.github.io/picard/explain-flags.html</a></p>
<p>Mayer data: fastq: 137281933<br />
sorted: 120124203 dedup: 2262387<br />
dedup/sorted: 0.01883373</p>
<pre class="r"><code>library= c( &quot;18486-dep&quot;, &quot;18508-dep&quot;, &quot;18508-nondep&quot;, &quot;19238-dep&quot;, &quot;mayer&quot;)

fastq= c( 45803834, 70776230, 77223987, 113160855, 137281933)  

sorted= c(17336796,  43247747, 50189574, 40420633, 17157730 )

dedup= c(1533069,  1776330,1919904,
1870359,2262387)

perc= dedup/sorted

reads_mapped_dedup= data.frame(rbind(library, fastq, sorted, dedup, perc))

reads_mapped_dedup</code></pre>
<pre><code>                        X1                 X2                 X3
library          18486-dep          18508-dep       18508-nondep
fastq             45803834           70776230           77223987
sorted            17336796           43247747           50189574
dedup              1533069            1776330            1919904
perc    0.0884286231435151 0.0410733534859053 0.0382530443474177
                        X4                X5
library          19238-dep             mayer
fastq            113160855         137281933
sorted            40420633          17157730
dedup              1870359           2262387
perc    0.0462723827209732 0.131858177043234</code></pre>
<pre class="r"><code>total_reads= sum(fastq)

sorted/fastq</code></pre>
<pre><code>[1] 0.3785010 0.6110490 0.6499221 0.3571962 0.1249817</code></pre>
<p>Make a stacked bar plot to show complexity and coverage.<br />
library, fastq, mapped, dedup</p>
<pre class="r"><code>counts= rbind(fastq, sorted, dedup)
colnames(counts)= library
count_plot=barplot(as.matrix(counts), main=&quot;Counts for coverage and complexity&quot;,
  xlab=&quot;Library&quot;, col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;),
    ylab=&quot;Read counts&quot;)
legend(&quot;topleft&quot;, legend = c(&quot;total&quot;, &quot;mapped&quot;, &quot;UMI&quot;), col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;), pch=20, cex = .75)</code></pre>
<p><img src="figure/initial.data.exploration.Rmd/unnamed-chunk-2-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>percent_mapped= sorted/fastq
percent_UMI= dedup/fastq
percent_not_mapped= 1- percent_mapped - percent_UMI

prop=rbind(percent_not_mapped, percent_mapped, percent_UMI)
colnames(prop)= library
prop_plot=barplot(as.matrix(prop), main=&quot;Proportions for coverage and complexity&quot;,
  xlab=&quot;Library&quot;, col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;),
    ylab=&quot;Proportion of sequenced reads&quot;)
legend(&quot;bottomright&quot;, legend = c(&quot;Un-mapped&quot;, &quot;mapped&quot;, &quot;UMI&quot;), col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;), pch=20, cex = 0.75)</code></pre>
<p><img src="figure/initial.data.exploration.Rmd/unnamed-chunk-3-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>Undetermined is nothing: it corresponds to random reads</p>
<p>From meeting:</p>
<ul>
<li><p>Allign with star and bwa to compare</p></li>
<li><p>compare to <a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&amp;lastVirtModeType=default&amp;lastVirtModeExtraState" class="uri">http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&amp;lastVirtModeType=default&amp;lastVirtModeExtraState</a>=&amp;virtModeType=default&amp;virtMode=0&amp;nonVirtPosition=&amp;position=chr7%3A5568588%2D5568715&amp;hgsid=642260271_FLEwDANY0lSWCFhW4QjbmbASDDnB</p></li>
</ul>
<div id="explore-different-mappers" class="section level3">
<h3>Explore different mappers</h3>
<pre class="bash"><code>
#index and prepare ref genome:

STAR --runThreadN 2 --runMode genomeGenerate --genomeDir /scratch/midway2/brimittleman/star_genome/ --genomeFastaFiles  /project2/gilad/briana/Net-seq/STAR_genome/hg19.fa  --sjdbGTFfile /project2/gilad/briana/Net-seq/Homo_sapiens.GRCh37.75.chr.gtf --sjdbOverhang 43

# --sjdbOverhang read length -1 


STAR --runThreadN 4 --genomeDir /scratch/midway2/brimittleman/star_genome/ --readFilesIn fastq_extr/SRR1575922_extracted.fastq --outFilterMultimapNmax 1 --outSAMtype BAM SortedByCoordinate --outStd BAM_SortedByCoordinate &gt; star/mayer_star_align.bam</code></pre>
<pre class="bash"><code>samtools sort -o star.sort/star_mayer.sort.bam star/mayer_star_align.bam
</code></pre>
<p>Run this on my data as well.</p>
<pre class="bash"><code>#!/bin/bash

#SBATCH --job-name=star_align_mayer
#SBATCH --time=8:00:00
#SBATCH --partition=broadwl
#SBATCH --mem=50G
#SBATCH --tasks-per-node=4 


module load Anaconda3
source activate net-seq

STAR --runThreadN 4 --genomeDir /scratch/midway2/brimittleman/star_genome/ --readFilesIn fastq_extr/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.fastq  --outFilterMultimapNmax 1 --outSAMtype BAM SortedByCoordinate --outStd BAM_SortedByCoordinate &gt; star/star_18486-dep.bam

#to run: sbatch --partition=broadwl --mem=50G</code></pre>
<p>Continue with the sort and index the bam.</p>
<pre class="bash"><code>samtools sort -o star.sort/star_18486_dep_sort.bam  star/star_18486-dep.bam
samtools index star_18486_dep_sort.bam</code></pre>
<p>Look at the percent mapped with star.</p>
<pre class="r"><code>samples=c(&quot;18486_dep&quot;, &quot;mayer&quot;)
fastq_star= c(45803834,137281933)
bam_star= c(1996777,1993674)
bam_star/fastq_star</code></pre>
<pre><code>[1] 0.04359410 0.01452248</code></pre>
<p>Thats way too low. This didnt work.</p>
<p>In the log.Final file.</p>
<p>% of reads unmapped: too many mismatches | 0.00%<br />
% of reads unmapped: too short | 79.96%<br />
% of reads unmapped: other | 0.00%</p>
<p>“–outFilterScoreMinOverLread 0 –outFilterMatchNminOverLread 0 –outFilterMatchNmin 0” Try to add these parameters on the mayer map.</p>
<p>This run gave too many multi-map reads. 64.82%.</p>
<p>Try:<br />
“–outFilterScoreMinOverLread 0.3 –outFilterMatchNminOverLread 0.3”<br />
% of reads mapped to too many loci | 63.75%</p>
<p>Test length of fastq reads:<br />
Mayer: total 137281933 avg=70.000000 stddev=0.000000 18486_dep: total 45803834 avg=44.000000 stddev=0.000000</p>
<p>Try clipping last 10 bases with : “–clip3pNbases 10”<br />
* This didnt work for the mayer data but that is long. I will try it on ours.</p>
<p>Our data:</p>
<ul>
<li>% of reads mapped to too many loci | 31.70%<br />
</li>
<li>% of reads unmapped: too short | 55.32%</li>
</ul>
<p>Other ways to fix this:</p>
<ul>
<li>try blasting the unmapped reads</li>
</ul>
</div>
<div id="look-into-bwa-mapping" class="section level3">
<h3>Look into BWA mapping</h3>
<p>BWA-backtrack - for Illumina seqs up to 100 bp</p>
<p>First step is to construt a FM-index for the reference genome.</p>
<p>“bwa index -a bwtsw -p /scratch/midway2/brimittleman/BWA_genome/BWA.index STAR_genome/hg19.fa”</p>
<p>Added bwa to envirnoment</p>
<p>Mapping:</p>
<p>bwa aln</p>
<ul>
<li><p>creates the .sai index files</p></li>
<li><p>-n 0.01 1% missmatch allowed</p></li>
<li><p>-t 3 spead up by using 3 threads</p></li>
</ul>
<p>bwa samse</p>
<ul>
<li>generates alignments in a sam format</li>
</ul>
<pre class="bash"><code>
#$1 ref fa  
#$2 fastq 
#$3 output sai 


module load Anaconda3
source activate net-seq
module load bwa

bwa aln -t 3 -n 0.01 $1 $2 &gt; $3


#submit 
sbatch scripts/bwa_aln.sh hg19.copy.fa /project2/gilad/briana/Net-seq/Net-seq1/data/fastq_extr/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.fastq /project2/gilad/briana/Net-seq/Net-seq1/data/BWA/bwa.18486.dep.sai</code></pre>
<pre class="bash"><code>

#SBATCH --job-name=BWA_samse
#SBATCH --time=8:00:00
#SBATCH --partition=broadwl
#SBATCH --mem=50G
#SBATCH --tasks-per-node=4
#SBATCH --mail-type=END

#$1 ref fasta
#$2 sai file  
#$3 fastq file  
#$4 output sam  


module load Anaconda3
source activate net-seq
module load bwa

bwa samse $1 $2 $3 &gt; $4


#run on mayer  
sbatch scripts/bwa_samse.sh hg19.copy.fa /project2/gilad/briana/Net-seq/data/bwa/bwa.mayer.sai /project2/gilad/briana/Net-seq/data/fastq_extr/SRR1575922_extracted.fastq  /project2/gilad/briana/Net-seq/data/bwa/bwa.mayer.sam


sbatch scripts/bwa_samse.sh hg19.copy.fa /project2/gilad/briana/Net-seq/data/bwa/bwa.mayer.cut3prime.sai  /project2/gilad/briana/Net-seq/data/fastq_extr/SRR1575922_extracted.fastq  /project2/gilad/briana/Net-seq/data/bwa/bwa.mayer.cut3prime.sam




#run on 18486 dep  
sbatch scripts/bwa_samse.sh hg19.copy.fa /project2/gilad/briana/Net-seq/Net-seq1/data/BWA/bwa.18486.dep.sai /project2/gilad/briana/Net-seq/Net-seq1/data/fastq_extr/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.fastq  /project2/gilad/briana/Net-seq/Net-seq1/data/BWA/bwa.18486.dep.sam
</code></pre>
<p>Sam to bam:<br />
<code>samtools view -S -b sample.sam &gt; sample.bam</code></p>
<p>Check how big the file is:</p>
<ul>
<li>18486_dep : 796546</li>
<li>Mayer: 117726</li>
</ul>
<p>This is super low mapping as well. Not sure what is going on.</p>
<p>For poor quality on the ends- add -q 15 to the bwa aln command. I am trying this on the mayer data.</p>
<ul>
<li>18486_dep : 805899<br />
</li>
<li>Mayer: 121892</li>
</ul>
<div id="rerun-star" class="section level4">
<h4>Rerun star:</h4>
<p>I deleted the reference genome and am reindexing and rebuilding it.</p>
</div>
<div id="cut-polya" class="section level4">
<h4>Cut polyA</h4>
<p>Code from Sebs snakemake will allow me to cut any read that has more than 6 As. It will then keep the read if it is longer than 25 bases long post cut. I will run this on the UMI extracted fastq files.</p>
<p>This script is called cut_polyA.sh and is in the /project2/gilad/briana/Net-seq/scripts directory.</p>
<pre class="bash"><code>#!/bin/bash

#SBATCH --job-name=cut_polyA
#SBATCH --time=8:00:00
#SBATCH --partition=broadwl
#SBATCH --mem=8G
#SBATCH --tasks-per-node=4
#SBATCH --mail-type=END

module load Anaconda3
source activate net-seq

#$1 fastq file 
#$2 output cut file name 

cutadapt --minimum-length 25 -a AAAAAA -o $2 $1</code></pre>
<p>Run this script first on 18486 dep.</p>
<pre class="bash"><code>sbatch scripts/cut_polyA.sh /project2/gilad/briana/Net-seq/Net-seq1/data/fastq_extr/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.fastq /project2/gilad/briana/Net-seq/Net-seq1/data/cut_polyA/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.cutPolyA.fastq  
</code></pre>
<p>Pre-cut: 45803834<br />
Cut: 40905492</p>
<pre class="bash"><code>sbatch scripts/star_allign.sh /project2/gilad/briana/Net-seq/Net-seq1/data/cut_polyA/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.cutPolyA.fastq /project2/gilad/briana/Net-seq/Net-seq1/data/star/star_18486-dep_cutPolyA.bam</code></pre>
<p>Cut: 40905492<br />
mapped: 1350684</p>
</div>
<div id="subjunc-with-alljunctions" class="section level4">
<h4>Subjunc with –allJunctions</h4>
<p>I am running subjunc on the polyA cut reads with the –allJunctions to map cononincal and non-connoical exon exon boundaries.</p>
<p>This script is called subjunc_all_junc.sh and is in the /project2/gilad/briana/Net-seq/scripts directory.</p>
<pre class="bash"><code>#!/bin/bash

#SBATCH --job-name=cut_polyA
#SBATCH --time=8:00:00
#SBATCH --partition=broadwl
#SBATCH --mem=20G
#SBATCH --tasks-per-node=4
#SBATCH --mail-type=END

module load Anaconda3
source activate net-seq

#$1 input extracted fast q  
#$2 output bam 


subjunc --allJunctions -i /project2/gilad/briana/Net-seq/Net-seq1/genome/ -r $1 -T 8 &gt; $2</code></pre>
<pre class="bash"><code>#slurm-40290339.out

sbatch scripts/subjunc_all_junc.sh /project2/gilad/briana/Net-seq/Net-seq1/data/cut_polyA/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.cutPolyA.fastq /project2/gilad/briana/Net-seq/Net-seq1/data/subjunc_all_junc/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.cutPolyA.all.junc.bam
</code></pre>
<p>Before subjunc mapped 37.85. Now it mapped 38.4%.</p>
<p>I am also going to run this on the non polyA cut samples.</p>
<pre class="bash"><code>#slurm-40290637.out

sbatch scripts/subjunc_all_junc.sh /project2/gilad/briana/Net-seq/Net-seq1/data/fastq_extr/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.fastq /project2/gilad/briana/Net-seq/Net-seq1/data/subjunc_all_junc/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001_extracted.all.junc.bam
</code></pre>
<p>Before subjunc mapped 37.85. This run is 53.1%</p>
<p>Try for mayer:</p>
<pre class="bash"><code>sbatch scripts/subjunc_all_junc.sh /project2/gilad/briana/Net-seq/data/fastq_extr/SRR1575922_extracted.fastq /project2/gilad/briana/Net-seq/data/subjunc_all_junc/mayer.extracted.subjunc.all.junc.bam

#508 dep
sbatch scripts/subjunc_all_junc.sh /project2/gilad/briana/Net-seq/Net-seq1/data/fastq_extr/YG-SP-NET1-18508-dep-2017-10-13_S2_R1_001_extracted.fastq /project2/gilad/briana/Net-seq/Net-seq1/data/subjunc_all_junc/YG-SP-NET1-18508-dep-2017-10-13_S2_R1_001_extracted.all.junc.bam  </code></pre>
<p>508_dep<br />
dep fastq: 70776230<br />
mapped: 54088856</p>
<p>This mapped 76.4%</p>
<pre class="bash"><code>samtools sort -o {output} {input}
samtools index {input}
umi_tools dedup -I {input.bam} -S {output}</code></pre>
</div>
</div>
<div id="extend-mayer-data-exploration" class="section level3">
<h3>Extend Mayer data exploration</h3>
<p>I downloaded HeLa S3 Rep1 and the other run for HEK293T Rep1. I ran the snake file in the mayer.data directory as Mayer_hek and Mayer_hela.</p>
<p>mayer_hek</p>
<ul>
<li><p>reads: 358754064</p></li>
<li><p>mapped: 128152521</p></li>
<li><p>deduplication: 4392741</p></li>
</ul>
<p>mayer_hela</p>
<ul>
<li><p>reads: 175303176</p></li>
<li><p>mapped: 51362897</p></li>
<li><p>deduplication: 6314281</p></li>
</ul>
<pre class="r"><code>m_fastq= c(358754064,175303176)
m_sort= c(128152521 , 51362897)
m_dedup= c(4392741, 6314281 )
mayer= c(&quot;Hek&quot;, &quot;Hela&quot;)
counts_m= rbind(m_fastq, m_sort, m_dedup)
colnames(counts_m)= mayer
count_plot_m=barplot(as.matrix(counts_m), main=&quot;Counts for coverage and complexity&quot;,
  xlab=&quot;Library&quot;, col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;),
    ylab=&quot;Read counts&quot;)
legend(&quot;topright&quot;, legend = c(&quot;total&quot;, &quot;mapped&quot;, &quot;UMI&quot;), col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;), pch=20, cex = .75)</code></pre>
<p><img src="figure/initial.data.exploration.Rmd/unnamed-chunk-19-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>percent_mapped_m= m_sort/m_fastq
percent_UMI_m= m_dedup/m_fastq
percent_not_mapped_m= 1- percent_mapped_m - percent_UMI_m

prop_m=rbind(percent_not_mapped_m, percent_mapped_m, percent_UMI_m)
colnames(prop_m)= mayer
prop_plot_m=barplot(as.matrix(prop_m), main=&quot;Proportions for coverage and complexity&quot;,
  xlab=&quot;Library&quot;, col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;),
    ylab=&quot;Proportion of sequenced reads&quot;)
legend(&quot;bottomright&quot;, legend = c(&quot;Un-mapped&quot;, &quot;mapped&quot;, &quot;UMI&quot;), col=c(&quot;lightskyblue2&quot;,&quot;dodgerblue1&quot;,&quot;navy&quot;), pch=20, cex = 0.75)</code></pre>
<p><img src="figure/initial.data.exploration.Rmd/unnamed-chunk-20-1.png" width="672" style="display: block; margin: auto;" /></p>
</div>
<div id="session-information" class="section level2">
<h2>Session information</h2>
<!-- Insert the session information into the document -->
<pre class="r"><code>sessionInfo()</code></pre>
<pre><code>R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.6

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] compiler_3.4.2  backports_1.1.1 magrittr_1.5    rprojroot_1.2  
 [5] tools_3.4.2     htmltools_0.3.6 yaml_2.1.14     Rcpp_0.12.13   
 [9] stringi_1.1.5   rmarkdown_1.6   knitr_1.17      git2r_0.19.0   
[13] stringr_1.2.0   digest_0.6.12   evaluate_0.10.1</code></pre>
</div>

<hr>
<p>
    This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a>
</p>
<hr>

<!-- To enable disqus, uncomment the section below and provide your disqus_shortname -->

<!-- disqus
  <div id="disqus_thread"></div>
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function() {
            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
        })();
    </script>
    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
    <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-->


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>