<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Briana Mittleman" />

<meta name="date" content="2017-11-16" />

<title>Work on extracting mapping info from bam files</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-1.1/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-1.1/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs && document.readyState && document.readyState === "complete") {
   window.setTimeout(function() {
      hljs.initHighlighting();
   }, 0);
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->




<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}


.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
  padding-left: 25px;
  text-indent: 0;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>

<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-xs-12 col-sm-8 col-md-9">




<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">Net-seq</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="index.html">Home</a>
</li>
<li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/brimittleman/Net-seq">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Work on extracting mapping info from bam files</h1>
<h4 class="author"><em>Briana Mittleman</em></h4>
<h4 class="date"><em>2017-11-16</em></h4>

</div>


<!-- The file analysis/chunks.R contains chunks that define default settings
shared across the workflowr files. -->
<!-- Update knitr chunk options -->
<!-- Insert the date the file was last updated -->
<p><strong>Last updated:</strong> 2017-11-18</p>
<!-- Insert the code version (Git commit SHA1) if Git repository exists and R
 package git2r is installed -->
<p><strong>Code version:</strong> affe5d0</p>
<div id="mapped-stats" class="section level3">
<h3>Mapped stats</h3>
<p>Goal: Extract the number of bases that mapped and were soft clipped from bam files and make a histogram</p>
<p>Look first at the original bam files from the snake mapping.</p>
<p>To look at bam files in python, use pysam. I added this to my environment.</p>
<p>Process:</p>
<ul>
<li>Python Script: for simplicity at first, this file is called get_mapped.py and is in the /project2/gilad/briana/Net-seq/Net-seq1/data/sort/ directory. The bam file needs to have a bai file in teh same directory to work with the pysam program</li>
</ul>
<pre class="python"><code>#!/usr/bin/env python
import sys
import pysam
import re


mapped_num = open(&quot;mapped_18486_dep.txt&quot;, &quot;w&quot;)
file_bam_open= open(&quot;/project2/gilad/briana/Net-seq/Net-seq1/data/sort/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001-sort.bam&quot;, &quot;r&quot;)
file_bam= pysam.AlignmentFile(file_bam_open, &quot;rb&quot;)
def get_mapped(file):
  mapped=[]
  for read in file.fetch():
      read= str(read)
      cigar= read.split()[5] #python starts at 0
      num= re.findall(&quot;\d+M&quot;, cigar) #this gives me each &quot;##M&quot; in the cigar
      for i in num:
        mapped.append(i[:-1])
  return(mapped)

a= get_mapped(file_bam)

mapped_num.write(&quot;\n&quot;.join(a))

mapped_num.close()

  
    </code></pre>
<p>What I want:</p>
<p>6th column (sep by tab) of the line. ex: 7S23M14S. I want the all the numbers before the M in this string.</p>
<p>Pull the txt file with all the numbers into R and make a histogram.</p>
<pre class="bash"><code>scp brimittleman@midway2.rcc.uchicago.edu:/project2/gilad/briana/Net-seq/Net-seq1/data/sort/mapped_18486_dep.txt .
</code></pre>
<pre class="r"><code>mapped=read.csv(&quot;../data/mapped_18486_dep.txt&quot;, header=FALSE)
map.vec= as.vector(mapped[,1])
a= hist(map.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of 18486 mapped reads from cigar string&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-3-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>summary(map.vec)</code></pre>
<pre><code>   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00   21.00   23.00   23.16   26.00   44.00 </code></pre>
<p>Change the code so I only add the max mapped value from each read to the file. get_mapped_max.py</p>
<pre class="bash"><code>#!/usr/bin/env python
import sys
import pysam
import re 


mapped_num = open(&quot;mapped_18486_dep_max.txt&quot;, &quot;w&quot;)

file_bam_open= open(&quot;/project2/gilad/briana/Net-seq/Net-seq1/data/sort/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001-sort.bam&quot;, &quot;r&quot;)
file_bam= pysam.AlignmentFile(file_bam_open, &quot;rb&quot;)

def get_mapped(file):
  mapped=[]
  for read in file.fetch():
      read=str(read)
      cigar= read.split()[5] #python starts at 0
      num= re.findall(&quot;\d+M&quot;, cigar) #this gives me each &quot;##M&quot; in the cigar
      m=[]      
      for i in num:
        m.append(i[:-1])
      mapped.append(max(m))
  return(mapped)


a=get_mapped(file_bam)
  
mapped_num.write(&quot;\n&quot;.join(a))  

mapped_num.close()</code></pre>
<pre class="r"><code>mapped.max=read.csv(&quot;../data/mapped_18486_dep_max.txt&quot;, header=FALSE)
map.vec.max= as.vector(mapped.max[,1])
hist(map.vec.max, xlab=&quot;Bases&quot;, main=&quot;Histogram of max mapped reads from cigar string&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-5-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>summary(map.vec.max)</code></pre>
<pre><code>   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2.00   22.00   23.00   23.87   27.00   44.00 </code></pre>
</div>
<div id="soft-clip-stats" class="section level3">
<h3>Soft Clip stats</h3>
<p>I will change the code so it looks for the digits before the S in the cigar string.</p>
<pre class="python"><code>#!/usr/bin/env python
import sys
import pysam
import re


clip_num = open(&quot;clip_18486_dep.txt&quot;, &quot;w&quot;)
file_bam_open= open(&quot;/project2/gilad/briana/Net-seq/Net-seq1/data/sort/YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001-sort.bam&quot;, &quot;r&quot;)
file_bam= pysam.AlignmentFile(file_bam_open, &quot;rb&quot;)
def get_clip(file):
  clip=[]
  for read in file.fetch():
      read= str(read)
      cigar= read.split()[5] #python starts at 0
      num= re.findall(&quot;\d+S&quot;, cigar) #this gives me each &quot;##S&quot; in the cigar
      for i in num:
        clip.append(i[:-1])
  return(clip)

c= get_clip(file_bam)

clip_num.write(&quot;\n&quot;.join(c))

clip_num.close()

  
    </code></pre>
<pre class="r"><code>clip=read.csv(&quot;../data/clip_18486_dep.txt&quot;, header=FALSE)
clip.vec= as.vector(clip[,1])
hist(clip.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of clipped reads from cigar string&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-7-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>summary(clip.vec)</code></pre>
<pre><code>   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    8.00   14.00   13.85   19.00   26.00 </code></pre>
</div>
<div id="make-this-code-general" class="section level3">
<h3>Make this code general</h3>
<p>Use the sys.argv[1] command to pass in arguments from the command line. This script will generalize my get_mapped.py</p>
<pre class="python"><code>#!/usr/bin/env python
import sys
import pysam
import re

file_name = sys.argv[1]
out_file = sys.argv[2]


#use by passing a sorted bam file, an output text file 


mapped_num = open(out_file, &quot;w&quot;)
file_bam_open= open(file_name, &quot;r&quot;)
file_bam= pysam.AlignmentFile(file_bam_open, &quot;rb&quot;)
def get_mapped(file):
  out=[]
  for read in file.fetch():
      read= str(read)
      cigar= read.split()[5] #python starts at 0
      num= re.findall(&quot;\d+M&quot;, cigar) #this gives me each &quot;##M&quot; in the cigar
      for i in num:
        out.append(i[:-1])
  return(out)

a= get_mapped(file_bam)

mapped_num.write(&quot;\n&quot;.join(a))

mapped_num.close()</code></pre>
<pre class="bash"><code>./get_mapped.py /project2/gilad/briana/Net-seq/Net-seq1/data/sort/YG-SP-NET1-18508-dep-2017-10-13_S2_R1_001-sort.bam mapped_18508_dep.txt


./get_mapped.py /project2/gilad/briana/Net-seq/Net-seq1/data/sort/YG-SP-NET1-Unk1_S6_R1_001-sort.bam mapped_19238_dep.txt


/project2/gilad/briana/Net-seq/Net-seq1/data/sort/get_mapped.py /project2/gilad/briana/Net-seq/data/sort/SRR1575922-sort.bam  /project2/gilad/briana/Net-seq/data/sort/mapped_mayer.txt </code></pre>
<pre class="r"><code>mapped_mayer=read.csv(&quot;../data/mapped_mayer.txt&quot;, header=FALSE)
map.mayer.vec= as.vector(mapped_mayer[,1])
d= hist(map.mayer.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of mayer mapped reads from cigar string&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-10-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>summary(map.mayer.vec)</code></pre>
<pre><code>   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0    21.0    32.0    28.1    38.0    70.0 </code></pre>
<pre class="r"><code>mapped_18508=read.csv(&quot;../data/mapped_18508_dep.txt&quot;, header=FALSE)
map.18508.vec= as.vector(mapped_18508[,1])
b= hist(map.18508.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of 18508 mapped reads from cigar string&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-11-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>summary(map.18508.vec)</code></pre>
<pre><code>   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00   13.00   22.00   19.21   23.00   44.00 </code></pre>
<pre class="r"><code>mapped_19238=read.csv(&quot;../data/mapped_19238_dep.txt&quot;, header=FALSE)
map.19238.vec= as.vector(mapped_19238[,1])
c= hist(map.19238.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of 19238 mapped reads from cigar string&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-12-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre class="r"><code>summary(map.19238.vec)</code></pre>
<pre><code>   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00   13.00   22.00   19.41   23.00   44.00 </code></pre>
</div>
<div id="all-of-the-histograms" class="section level3">
<h3>All of the histograms:</h3>
<pre class="r"><code>par(mfrow = c(2,2))
hist(map.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of 18486 mapped reads&quot;)
hist(map.18508.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of 18508 mapped reads&quot;)
hist(map.19238.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of 19238 mapped reads&quot;)
hist(map.mayer.vec, xlab=&quot;Bases&quot;, main=&quot;Histogram of mayer reads&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-13-1.png" width="672" style="display: block; margin: auto;" /></p>
</div>
<div id="extract-the-softclipped-regions" class="section level3">
<h3>Extract the softclipped regions</h3>
<p>Example bam line:</p>
<p>700819F:582:HNHYYBCXY:1:2214:10312:72354_TGATCC 0 1 13537 13 19M25S * 0 0 CGGTGTTTGTCATGGGCCTAATTTCGTATGCCGTCTTCTGCTTG IIIIIGIIIIIIIGGIIGIIGGIIGGGIIIIGIIIIIIIGIIII HI:i:1 NH:i:1 NM:i:0</p>
<p>The cigar is column 6. The read is column 10.</p>
<pre class="python"><code>import re
cigar= &quot;18S19M7S&quot;
read= &quot;TCTGCAACAGCTGCCCCTGATCTCGTATGCCGTCTTCTGCTTGA&quot;
num_M= re.findall(&quot;\d+[SM]&quot;, cigar)
num_int= re.split(&quot;[SM]&quot;, cigar)
num_int= num_int[0:-1]
num_int=list(map(int,num_int))
cum_sum= [sum(num_int[:i]) for i in range(1, len(num_int)+1)]
cum_sum= [0] + cum_sum
split_string= [read[i:j] for i, j in zip(cum_sum[:-1], cum_sum[1:])]
num_all= []
for i in num_M:
    if &quot;S&quot; in i:
        x= re.findall(&quot;\d+&quot;, i)
        x = int(x[0])
        num_all.append(x)
max_S= max(num_all)
for i in range(len(num_M)):
  if num_int[i] == max_S:
      break
final = split_string[i]</code></pre>
<pre class="python"><code>#!/usr/bin/env python

import sys
import pysam
import re

file_name = sys.argv[1]
out_file = sys.argv[2]


file_bam_open= open(file_name, &quot;r&quot;)
file_bam= pysam.AlignmentFile(file_bam_open, &quot;rb&quot;)
softclip_tot = open(out_file, &quot;w&quot;)

def soft_clip(file):
  out= []
  for a in file.fetch():
    line=str(a)
    cigar= line.split()[5]
    read= line.split()[9]
    num_M= re.findall(&quot;\d+[SM]&quot;, cigar)
    num_int= re.split(&quot;[A-Z]&quot;, cigar)
    num_int= num_int[0:-1]
    num_int=list(map(int,num_int))
    cum_sum= [sum(num_int[:i]) for i in range(1, len(num_int)+1)]
    cum_sum= [0] + cum_sum
    split_string= [read[i:j] for i, j in zip(cum_sum[:-1], cum_sum[1:])]
    num_all= []
    for i in num_M:
        if &quot;S&quot; in i:
            x= re.findall(&quot;\d+&quot;, i)
            x = int(x[0])
            num_all.append(x)
    if len(num_all) != 0:
      max_S= max(num_all)
      for i in range(len(num_M)):
        if num_int[i] == max_S:
            break
      final = split_string[i]
      out.append(final)
  return(out)

run= soft_clip(file_bam)

softclip_tot.write(&quot;\n&quot;.join(run))

softclip_tot.close()
</code></pre>
<p>Write bash file to submit this where I input the input bam and output txt.</p>
<pre class="bash"><code>#!/bin/bash

#SBATCH --job-name=run_get_soft_clip
#SBATCH --time=8:00:00
#SBATCH --partition=broadwl
#SBATCH --mem=20G
#SBATCH --tasks-per-node=4 

module load Anaconda3
source activate net-seq

#$1 input file name
#$2 output file 


/project2/gilad/briana/Net-seq/Net-seq1/data/sort/get_soft_clip.py $1 $2 
</code></pre>
<p>Submit for 18486 dep:</p>
<pre class="bash"><code>sbatch submit_get_soft_clip.sh YG-SP-NET1-18486-dep-2017-10-13_S4_R1_001-sort.bam  soft_clip_seq_18486_dep.txt



sbatch submit_get_soft_clip.sh /project2/gilad/briana/Net-seq/data/sort/SRR1575922-sort.bam /project2/gilad/briana/Net-seq/data/sort/soft_clip_mayer.txt</code></pre>
<p>Complete and ready to analyze the txt files.</p>
</div>
<div id="extract-the-quality-score-per-read" class="section level3">
<h3>Extract the quality score per read</h3>
<p>The quality score is in the sorted bam file.</p>
<ol start="5" style="list-style-type: decimal">
<li>MAPQ: MAPping Quality. It equals 􀀀10 log10 Prfmapping position is wrongg, rounded to the nearest integer. A value 255 indicates that the mapping quality is not available.</li>
</ol>
<p>This is column 5 in the bam file.</p>
<p>This script is called get_qual.py This is in /project2/gilad/briana/Net-seq/scripts</p>
<p>Run by passing it the bam file and an output txt file.</p>
<pre class="python"><code>#!/usr/bin/env python
import sys
import pysam
import re

file_name = sys.argv[1]
out_file = sys.argv[2]

qual_score = open(out_file, &quot;w&quot;)
file_bam_open= open(file_name, &quot;r&quot;)
file_bam= pysam.AlignmentFile(file_bam_open, &quot;rb&quot;)

def get_qual(file):
  out=[]
  for read in file.fetch():
      read= str(read)
      cigar= read.split()[4] #python starts at 0
      out.append(cigar)
  return(out)

a= get_qual(file_bam)

qual_score.write(&quot;\n&quot;.join(a))

qual_score.close()
</code></pre>
<p>Import data:</p>
<pre class="r"><code>qual_18486= read.csv(&quot;../data/qual_18486_dep.txt&quot;, head=FALSE)
qual_18508= read.csv(&quot;../data/qual_18508_dep.txt&quot;, head=FALSE)
qual_19238= read.csv(&quot;../data/qual_19238_dep.txt&quot;, head=FALSE)
qual_mayer= read.csv(&quot;../data/qual_mayer.txt&quot;, head=FALSE)</code></pre>
<p>Look at the summary:</p>
<pre class="r"><code>summary(qual_18486)</code></pre>
<pre><code>       V1      
 Min.   : 6.0  
 1st Qu.:13.0  
 Median :13.0  
 Mean   :14.3  
 3rd Qu.:13.0  
 Max.   :40.0  </code></pre>
<pre class="r"><code>summary(qual_18508)</code></pre>
<pre><code>       V1       
 Min.   : 6.00  
 1st Qu.:13.00  
 Median :13.00  
 Mean   :12.59  
 3rd Qu.:13.00  
 Max.   :40.00  </code></pre>
<pre class="r"><code>summary(qual_19238)</code></pre>
<pre><code>       V1       
 Min.   : 6.00  
 1st Qu.:13.00  
 Median :13.00  
 Mean   :12.88  
 3rd Qu.:13.00  
 Max.   :40.00  </code></pre>
<pre class="r"><code>summary(qual_mayer)</code></pre>
<pre><code>       V1       
 Min.   : 6.00  
 1st Qu.:10.00  
 Median :13.00  
 Mean   :21.11  
 3rd Qu.:40.00  
 Max.   :40.00  </code></pre>
<p>Make map quality histograms:</p>
<pre class="r"><code>par(mfrow = c(2,2))

hist(as.numeric(qual_18486[,1]), freq=FALSE, main=&quot;Quality 18486&quot;, xlab=&quot;Quality Score&quot;)
hist(as.numeric(qual_18508[,1]), freq=FALSE, main=&quot;Quality 18508&quot;, xlab=&quot;Quality Score&quot;)
hist(as.numeric(qual_19238[,1]), freq=FALSE, main=&quot;Quality 19238&quot;, xlab=&quot;Quality Score&quot;)
hist(as.numeric(qual_mayer[,1]), freq=FALSE, main= &quot;Quality Mayer&quot;, xlab=&quot;Quality Score&quot;)</code></pre>
<p><img src="figure/map_stats_from_bam.Rmd/unnamed-chunk-21-1.png" width="672" style="display: block; margin: auto;" /></p>
</div>
<div id="session-information" class="section level3">
<h3>Session information</h3>
<!-- Insert the session information into the document -->
<pre class="r"><code>sessionInfo()</code></pre>
<pre><code>R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.6

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] compiler_3.4.2  backports_1.1.1 magrittr_1.5    rprojroot_1.2  
 [5] tools_3.4.2     htmltools_0.3.6 yaml_2.1.14     Rcpp_0.12.13   
 [9] stringi_1.1.5   rmarkdown_1.6   knitr_1.17      git2r_0.19.0   
[13] stringr_1.2.0   digest_0.6.12   evaluate_0.10.1</code></pre>
</div>

<hr>
<p>
    This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a>
</p>
<hr>

<!-- To enable disqus, uncomment the section below and provide your disqus_shortname -->

<!-- disqus
  <div id="disqus_thread"></div>
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function() {
            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
        })();
    </script>
    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
    <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-->


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>