<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Briana Mittleman" /> <title>Data Processing Figures</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/journal.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-9.12.0/highlight.js"></script> <link href="site_libs/font-awesome-5.0.13/css/fa-svg-with-js.css" rel="stylesheet" /> <script src="site_libs/font-awesome-5.0.13/js/fontawesome-all.min.js"></script> <script src="site_libs/font-awesome-5.0.13/js/fa-v4-shims.min.js"></script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs) { hljs.configure({languages: []}); hljs.initHighlightingOnLoad(); if (document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } .html-widget { margin-bottom: 20px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">Three Prime Sequencing in Human LCLs</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/brimittleman/threeprimeseq"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <!-- Add a small amount of space between sections. --> <style type="text/css"> div.section { padding-top: 12px; } </style> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Data Processing Figures</h1> <h4 class="author"><em>Briana Mittleman</em></h4> <h4 class="date"><em>8/28/2018</em></h4> </div> <p><strong>Last updated:</strong> 2018-09-04</p> <strong>workflowr checks:</strong> <small>(Click a bullet for more information)</small> <ul> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>R Markdown file:</strong> up-to-date </summary></p> <p>Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Environment:</strong> empty </summary></p> <p>Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Seed:</strong> <code>set.seed(12345)</code> </summary></p> <p>The command <code>set.seed(12345)</code> was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Session information:</strong> recorded </summary></p> <p>Great job! Recording the operating system, R version, and package versions is critical for reproducibility.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Repository version:</strong> <a href="https://github.com/brimittleman/threeprimeseq/tree/deaa5b0c0f3a0e46c7c187c7fe4ee62e042bca30" target="_blank">deaa5b0</a> </summary></p> Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated. <br><br> Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use <code>wflow_publish</code> or <code>wflow_git_commit</code>). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated: <pre><code> Ignored files: Ignored: .DS_Store Ignored: .Rhistory Ignored: .Rproj.user/ Ignored: analysis/figure/ Ignored: output/.DS_Store Untracked files: Untracked: analysis/ncbiRefSeq_sm.sort.mRNA.bed Untracked: analysis/snake.config.notes.Rmd Untracked: data/18486.genecov.txt Untracked: data/APApeaksYL.total.inbrain.bed Untracked: data/RNAkalisto/ Untracked: data/Totalpeaks_filtered_clean.bed Untracked: data/YL-SP-18486-T-combined-genecov.txt Untracked: data/YL-SP-18486-T_S9_R1_001-genecov.txt Untracked: data/bedgraph_peaks/ Untracked: data/bin200.5.T.nuccov.bed Untracked: data/bin200.Anuccov.bed Untracked: data/bin200.nuccov.bed Untracked: data/clean_peaks/ Untracked: data/comb_map_stats.csv Untracked: data/comb_map_stats.xlsx Untracked: data/combined_reads_mapped_three_prime_seq.csv Untracked: data/gencov.test.csv Untracked: data/gencov.test.txt Untracked: data/gencov_zero.test.csv Untracked: data/gencov_zero.test.txt Untracked: data/gene_cov/ Untracked: data/joined Untracked: data/leafcutter/ Untracked: data/merged_combined_YL-SP-threeprimeseq.bg Untracked: data/nom_QTL/ Untracked: data/nom_QTL_opp/ Untracked: data/nuc6up/ Untracked: data/peakPerRefSeqGene/ Untracked: data/perm_QTL/ Untracked: data/perm_QTL_opp/ Untracked: data/reads_mapped_three_prime_seq.csv Untracked: data/smash.cov.results.bed Untracked: data/smash.cov.results.csv Untracked: data/smash.cov.results.txt Untracked: data/smash_testregion/ Untracked: data/ssFC200.cov.bed Untracked: data/temp.file1 Untracked: data/temp.file2 Untracked: data/temp.gencov.test.txt Untracked: data/temp.gencov_zero.test.txt Untracked: output/picard/ Untracked: output/plots/ Untracked: output/qual.fig2.pdf Unstaged changes: Modified: analysis/28ind.peak.explore.Rmd Modified: analysis/cleanupdtseq.internalpriming.Rmd Modified: analysis/dif.iso.usage.leafcutter.Rmd Modified: analysis/diff_iso_pipeline.Rmd Modified: analysis/explore.filters.Rmd Modified: analysis/peak.cov.pipeline.Rmd Modified: analysis/peakOverlap_oppstrand.Rmd Modified: analysis/pheno.leaf.comb.Rmd Modified: analysis/test.max2.Rmd Modified: code/Snakefile </code></pre> Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes. </details> </li> </ul> <details> <summary> <small><strong>Expand here to see past versions:</strong></small> </summary> <ul> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> File </th> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> <th style="text-align:left;"> Message </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/deaa5b0c0f3a0e46c7c187c7fe4ee62e042bca30/analysis/dataprocfigures.Rmd" target="_blank">deaa5b0</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-09-04 </td> <td style="text-align:left;"> compare TPM for genes with no peaks </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/2e39f7a21ec47a1d8efaf410a87616f2bda9283f/docs/dataprocfigures.html" target="_blank">2e39f7a</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-30 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/a2a7cd9bb204a1c4f22f57125a72d05f36a44697/analysis/dataprocfigures.Rmd" target="_blank">a2a7cd9</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-30 </td> <td style="text-align:left;"> add kalisto code </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/cbec2f6eb084a5f22386c61b8e6d872d238c3c05/docs/dataprocfigures.html" target="_blank">cbec2f6</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-29 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/6b818cbdefc857eb57e9a522483fa63b60b7fb60/analysis/dataprocfigures.Rmd" target="_blank">6b818cb</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-29 </td> <td style="text-align:left;"> try gencode anno </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/c6dc97bf8c0353ab9179b44a1f9ed8210f4c0adb/docs/dataprocfigures.html" target="_blank">c6dc97b</a> </td> <td style="text-align:left;"> brimittleman </td> <td style="text-align:left;"> 2018-08-28 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/fa818a13d8591ac31fd9f6c63a86af3af565def7/analysis/dataprocfigures.Rmd" target="_blank">fa818a1</a> </td> <td style="text-align:left;"> brimittleman </td> <td style="text-align:left;"> 2018-08-28 </td> <td style="text-align:left;"> first processing figure </td> </tr> </tbody> </table> </ul> <p></details></p> <hr /> <p>I will use this analysis to work on vizualising some of the processing steps of this analysis.</p> <div id="peaks-per-gene" class="section level2"> <h2>Peaks per gene</h2> <p>I want to create a figure similar to the one I created in <a href="https://brimittleman.github.io/comparative_threeprime/characterize.ortho.peaks.html" class="uri">https://brimittleman.github.io/comparative_threeprime/characterize.ortho.peaks.html</a>. I will use the count distinct function from bedtools map. For this I am using the RefSeq mRNA annotations.</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=refseq_countdistinct #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=refseq_countdistinct.out #SBATCH --error=refseq_countdistinct.err #SBATCH --partition=broadwl #SBATCH --mem=12G #SBATCH --mail-type=END bedtools map -c 4 -s -o count_distinct -a /project2/gilad/briana/genome_anotation_data/refseq.ProteinCoding.noCHR.bed -b /project2/gilad/briana/threeprimeseq/data/mergedPeaks_comb/filtered_APApeaks_merged_allchrom.named.fixed.bed > /project2/gilad/briana/threeprimeseq/data/peakPerRefseqGene/filtered_APApeaks_perRefseqGene.txt #try opp strand bedtools map -c 4 -S -o count_distinct -a /project2/gilad/briana/genome_anotation_data/refseq.ProteinCoding.noCHR.bed -b /project2/gilad/briana/threeprimeseq/data/mergedPeaks_comb/filtered_APApeaks_merged_allchrom.named.fixed.bed > /project2/gilad/briana/threeprimeseq/data/peakPerRefseqGene/filtered_APApeaks_perRefseqGene_oppStrand.txt </code></pre> <pre class="r"><code>library(tidyverse)</code></pre> <pre><code>── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──</code></pre> <pre><code>✔ ggplot2 3.0.0 ✔ purrr 0.2.5 ✔ tibble 1.4.2 ✔ dplyr 0.7.6 ✔ tidyr 0.8.1 ✔ stringr 1.3.1 ✔ readr 1.1.1 ✔ forcats 0.3.0</code></pre> <pre><code>── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag()</code></pre> <pre class="r"><code>library(workflowr)</code></pre> <pre><code>This is workflowr version 1.1.1 Run ?workflowr for help getting started</code></pre> <pre class="r"><code>library(reshape2)</code></pre> <pre><code> Attaching package: 'reshape2'</code></pre> <pre><code>The following object is masked from 'package:tidyr': smiths</code></pre> <pre class="r"><code>library(cowplot)</code></pre> <pre><code> Attaching package: 'cowplot'</code></pre> <pre><code>The following object is masked from 'package:ggplot2': ggsave</code></pre> <pre class="r"><code>names=c("Chr", "Start", "End", "Name", "Score", "Strand", "numPeaks") peakpergene=read.table("../data/peakPerRefSeqGene/filtered_APApeaks_perRefseqGene.txt", stringsAsFactors = F, header = F, col.names = names) %>% mutate(onePeak=ifelse(numPeaks==1, 1, 0 )) %>% mutate(multPeaks=ifelse(numPeaks > 1, 1, 0 ))</code></pre> <pre class="r"><code>genes1peak=sum(peakpergene$onePeak)/nrow(peakpergene) genesMultpeak=sum(peakpergene$multPeaks)/nrow(peakpergene) genes0peak= 1- genes1peak - genesMultpeak perPeak= c(round(genes0peak,digits = 3), round(genes1peak,digits = 3),round(genesMultpeak, digits = 3)) Category=c("Zero", "One", "Multiple") perPeakdf=as.data.frame(cbind(Category,as.numeric(perPeak)))</code></pre> <p>Plot these proportions:</p> <pre class="r"><code>lab1=paste("Genes =", genes0peak*nrow(peakpergene), sep=" ") lab2=paste("Genes =", sum(peakpergene$onePeak), sep=" ") lab3=paste("Genes =", sum(peakpergene$multPeaks), sep=" ") genepeakplot=ggplot(perPeakdf, aes(x="", y=perPeak, fill=Category)) + geom_bar(stat="identity")+ labs(title="Characterize genes by number of PAS", y="Proportion of Protein Coding gene", x="")+ scale_fill_brewer(palette="Paired") + coord_cartesian(ylim=c(0,1)) + annotate("text", x="", y= .35, label=lab1) + annotate("text", x="", y= .78, label=lab2) + annotate("text", x="", y= .92, label=lab3) genepeakplot</code></pre> <p><img src="figure/dataprocfigures.Rmd/unnamed-chunk-5-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-5-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/cbec2f6eb084a5f22386c61b8e6d872d238c3c05/docs/figure/dataprocfigures.Rmd/unnamed-chunk-5-1.png" target="_blank">cbec2f6</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-29 </td> </tr> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/c6dc97bf8c0353ab9179b44a1f9ed8210f4c0adb/docs/figure/dataprocfigures.Rmd/unnamed-chunk-5-1.png" target="_blank">c6dc97b</a> </td> <td style="text-align:left;"> brimittleman </td> <td style="text-align:left;"> 2018-08-28 </td> </tr> </tbody> </table> <p></details></p> <p>This includes for than 1 isoform for different genes. I am going to go back to the original refseq file and resegment it. Column 13 is the gene name. Column 2 needs to start with NM because that is mRNA.</p> <pre class="bash"><code>grep "NM" ncbiRefSeq.txt | awk '{print $3 "\t" $5 "\t" $6 "\t" $2 "\t" $13 "\t" $4}' > ncbiRefSeq.mRNA.named.bed</code></pre> <p>I can write a script that writes only the longest isoform for each gene.</p> <pre class="bash"><code> outfile=open("refseq.ProteinCoding.bed", "w") infile=open("ncbiRefSeq.mRNA.named.bed", "r") lines=infile.readlines() lot_lines=len(lines) for n,ln in enumerate(lines): chrom, start, end, mRNA, gene, strand = ln.split() #if first line if n == 0: #first line condition SE_list=[] cur_gene=gene SE_list.append(int(start)) SE_list.append(int(end)) elif n == lot_lines-1: #last line condition if gene == cur_gene: SE_list.append(int(start)) SE_list.append(int(end)) SE_list.sort() outfile.write("%s\t%d\t%d\t%s\t.\t%s\n"%(chrom, SE_list[0], SE_list[-1], gene, strand)) else: outfile.write("%s\t%d\t%d\t%s\t.\t%s\n"%(chrom, int(start), int(end), gene, strand)) elif gene == cur_gene: SE_list.append(int(start)) SE_list.append(int(end)) elif gene != cur_gene: #write out the last line but with the start end from the SE list prevline=lines[n-1] chrom2, start2, end2, mRNA2, gene2, strand2 = prevline.split() outfile.write("%s\t%d\t%d\t%s\t.\t%s\n"%(chrom2, SE_list[0], SE_list[-1], gene2, strand2)) cur_gene=gene SE_list=[int(start), int(end)] outfile.close() </code></pre> <p>I can check this by maknig sure there is 1 line for all of the unique names in the in file.</p> <pre class="bash"><code>awk '{print $5}' ncbiRefSeq.mRNA.named.bed | sort | uniq | wc -l #19243 wc -l refseq.ProteinCoding.bed #20298 sed 's/^chr//' refseq.ProteinCoding.bed > refseq.ProteinCoding.noCHR.bed</code></pre> <p>There is still a problem with the script. The problem is when the same gene name is on extra haplotypes. I could remove all of the lines in the file that have _ in the first column. These are on contigs or specfic haplotypes. They will not map to our peaks anyway. I also need to remove the chr.</p> <p>This still seems lower than previos APA estimates. I had used gencode estimates before. I am gonig to run this analysis again with those gene.</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=gencode_countdistinct #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=gencode_countdistinct.out #SBATCH --error=gencode_countdistinct.err #SBATCH --partition=broadwl #SBATCH --mem=12G #SBATCH --mail-type=END bedtools map -c 4 -s -o count_distinct -a /project2/gilad/briana/genome_anotation_data/gencode.v19.annotation.proteincodinggene.sort.bed -b /project2/gilad/briana/threeprimeseq/data/mergedPeaks_comb/filtered_APApeaks_merged_allchrom.named.fixed.bed > </code></pre> <pre class="r"><code>Gpeakpergene=read.table("../data/peakPerRefSeqGene/filtered_APApeaks_perGencodeGene.txt", stringsAsFactors = F, header = F, col.names = names) %>% mutate(onePeak=ifelse(numPeaks==1, 1, 0 )) %>% mutate(multPeaks=ifelse(numPeaks > 1, 1, 0 ))</code></pre> <pre class="r"><code>Ggenes1peak=sum(Gpeakpergene$onePeak)/nrow(Gpeakpergene) GgenesMultpeak=sum(Gpeakpergene$multPeaks)/nrow(Gpeakpergene) Ggenes0peak= 1- Ggenes1peak - GgenesMultpeak GperPeak= c(round(Ggenes0peak,digits = 3), round(Ggenes1peak,digits = 3),round(GgenesMultpeak, digits = 3)) GperPeakdf=as.data.frame(cbind(Category,as.numeric(GperPeak)))</code></pre> <p>Plot these proportions:</p> <pre class="r"><code>Glab1=paste("Genes =", Ggenes0peak*nrow(Gpeakpergene), sep=" ") Glab2=paste("Genes =", sum(Gpeakpergene$onePeak), sep=" ") Glab3=paste("Genes =", sum(Gpeakpergene$multPeaks), sep=" ") Ggenepeakplot=ggplot(GperPeakdf, aes(x="", y=perPeak, fill=Category)) + geom_bar(stat="identity")+ labs(title="Characterize Gencode genes by number of PAS", y="Proportion of Protein Coding gene", x="")+ scale_fill_brewer(palette="Paired") + coord_cartesian(ylim=c(0,1)) + annotate("text", x="", y= .35, label=Glab1) + annotate("text", x="", y= .78, label=Glab2) + annotate("text", x="", y= .92, label=Glab3) Ggenepeakplot</code></pre> <p><img src="figure/dataprocfigures.Rmd/unnamed-chunk-12-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-12-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/cbec2f6eb084a5f22386c61b8e6d872d238c3c05/docs/figure/dataprocfigures.Rmd/unnamed-chunk-12-1.png" target="_blank">cbec2f6</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-29 </td> </tr> </tbody> </table> <p></details></p> <p>These results are still lower than expected. This is because all of my previous analysis mapped the genes to the peaks as which were closest in the upstream direction. Here I am saying the peak must overlap the gene.</p> <p>I should again look at some of the genes with the top counts in RNA seq and the 0 peaks.</p> <p>I am going to run feaureCounts on 18486 guevardis with the refseq annotation with the named genes. I need to make this a SAF file.</p> <pre class="bash"><code>from misc_helper import * fout = file("/project2/gilad/briana/genome_anotation_data/refseq.ProteinCoding.noCHR.SAF",'w') fout.write("GeneID\tChr\tStart\tEnd\tStrand\n") for ln in open("/project2/gilad/briana/genome_anotation_data/refseq.ProteinCoding.noCHR.bed"): chrom, start, end, gene, score, strand = ln.split() start_i=int(start) end_i=int(end) fout.write("%s\t%s\t%d\t%d\t%s\n"%(gene, chrom, start_i, end_i, strand)) fout.close()</code></pre> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=fc_RNAseq_refseq #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=fc_RNAseq_refseq.out #SBATCH --error=fc_RNAseq_refseq.err #SBATCH --partition=broadwl #SBATCH --mem=12G #SBATCH --mail-type=END module load Anaconda3 source activate three-prime-env # outdir: /project2/gilad/briana/comparitive_threeprime/data/PeakwGene_quant featureCounts -a /project2/gilad/briana/genome_anotation_data/refseq.ProteinCoding.noCHR.SAF -F SAF -o /project2/gilad/briana/threeprimeseq/data/peakPerRefseqGene/refseq18486exp.quant /project2/yangili1/LCL/RNAseqGeuvadisBams/RNAseqGeuvadis_STAR_18486.final.bam -s 1</code></pre> <p>Now I can upload the results and compare them to the peak counts in these genes.</p> <pre class="r"><code>namesRNA=c("Name", "Chr", "Start", "End", "Strand", "Length", "RNAseq") RNAseqrefseq=read.table("../data/peakPerRefSeqGene/refseq18486exp.quant", header=T, stringsAsFactors = F, col.names = namesRNA) RNAseqrefseq$Start=as.integer(RNAseqrefseq$Start)</code></pre> <pre><code>Warning: NAs introduced by coercion</code></pre> <pre class="r"><code>RNAseqrefseq$End=as.integer(RNAseqrefseq$End)</code></pre> <pre><code>Warning: NAs introduced by coercion</code></pre> <p>Join the peakpergene dataframe with this dataframe.</p> <pre class="r"><code>refPeakandRNA=peakpergene %>% inner_join(RNAseqrefseq, by=c("Name", "Chr", "Start", "End", "Strand")) refPeakandRNA_noPeak=refPeakandRNA %>% filter(RNAseq!=0) %>% filter(numPeaks==0) %>% arrange(desc(RNAseq)) %>% select(Name, Start, End, Chr, RNAseq, numPeaks)</code></pre> <p>This doesnt make much sense. Seems like the peaks are on the opposite strand for the top genes. I am gonig to force opposite strandedness and see what happens.</p> <pre class="r"><code>Opeakpergene=read.table("../data/peakPerRefSeqGene/filtered_APApeaks_perRefseqGene_oppStrand.txt", stringsAsFactors = F, header = F, col.names = names) %>% mutate(onePeak=ifelse(numPeaks==1, 1, 0 )) %>% mutate(multPeaks=ifelse(numPeaks > 1, 1, 0 ))</code></pre> <pre class="r"><code>Ogenes1peak=sum(Opeakpergene$onePeak)/nrow(Opeakpergene) OgenesMultpeak=sum(Opeakpergene$multPeaks)/nrow(Opeakpergene) Ogenes0peak= 1- Ogenes1peak - OgenesMultpeak OperPeak= c(round(Ogenes0peak,digits = 3), round(Ogenes1peak,digits = 3),round(OgenesMultpeak, digits = 3)) OperPeakdf=as.data.frame(cbind(Category,OperPeak)) OperPeakdf$OperPeak=as.numeric(as.character(OperPeakdf$OperPeak))</code></pre> <p>Plot these proportions:</p> <pre class="r"><code>Olab1=paste("Genes =", Ogenes0peak*nrow(Opeakpergene), sep=" ") Olab2=paste("Genes =", sum(Opeakpergene$onePeak), sep=" ") Olab3=paste("Genes =", sum(Opeakpergene$multPeaks), sep=" ") Ogenepeakplot=ggplot(OperPeakdf, aes(x="", y=OperPeak, by=Category, fill=Category)) + geom_bar(stat="identity")+ labs(title="Characterize Refseq genes by number of PAS- Oppstrand", y="Proportion of Protein Coding gene", x="")+ scale_fill_brewer(palette="Paired") + coord_cartesian(ylim=c(0,1)) + annotate("text", x="", y= .2, label=Olab1) + annotate("text", x="", y= .4, label=Olab2) + annotate("text", x="", y= .9, label=Olab3) Ogenepeakplot</code></pre> <p><img src="figure/dataprocfigures.Rmd/unnamed-chunk-19-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-19-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/2e39f7a21ec47a1d8efaf410a87616f2bda9283f/docs/figure/dataprocfigures.Rmd/unnamed-chunk-19-1.png" target="_blank">2e39f7a</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-30 </td> </tr> </tbody> </table> <p></details></p> <p>This makes more sense now.</p> <pre class="r"><code>refPeakandRNA_withO=Opeakpergene %>% inner_join(RNAseqrefseq, by=c("Name", "Chr", "Start", "End", "Strand")) refPeakandRNA_noPeakw_withO=refPeakandRNA_withO %>% filter(RNAseq!=0) %>% filter(numPeaks==0) %>% arrange(desc(RNAseq)) %>% select(Name, Start, End, Chr, RNAseq, numPeaks)</code></pre> <pre class="r"><code>plot(sort(log10(refPeakandRNA_withO$RNAseq), decreasing = T), main="Distribution of RNA expression counts 18486", ylab="log10 Gene count", xlab="Refseq Gene") points(sort(log10(refPeakandRNA_noPeakw_withO$RNAseq), decreasing = T), col="Red") legend("topright", legend=c("All Gene", "Gene without Peak"), col=c("black", "red"),pch=19)</code></pre> <p><img src="figure/dataprocfigures.Rmd/unnamed-chunk-21-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-21-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/2e39f7a21ec47a1d8efaf410a87616f2bda9283f/docs/figure/dataprocfigures.Rmd/unnamed-chunk-21-1.png" target="_blank">2e39f7a</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-08-30 </td> </tr> </tbody> </table> <p></details></p> <p>Run Kalisto on the this RNA seq line and look at this plot with the kalisto output expression TPM. I added Kallisto to the three-prime-env.</p> <p>Kallisto step:</p> <ul> <li>make index: kallisto_index18486.sh</li> </ul> <p>This needs to be based on a transcriptome. I will use the protein coding transcripts from <a href="https://www.gencodegenes.org/releases/28lift37.html" class="uri">https://www.gencodegenes.org/releases/28lift37.html</a>.</p> <pre class="bash"><code> #!/bin/bash #SBATCH --job-name=kallisto_index18486 #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=kallisto_index18486.out #SBATCH --error=kallisto_index18486.err #SBATCH --partition=broadwl #SBATCH --mem=50G #SBATCH --mail-type=END module load Anaconda3 source activate three-prime-env kallisto index --make-unique -i /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto/RNA18486_index /project2/gilad/briana/genome_anotation_data/gencode.v28lift37.pc_transcripts.fa</code></pre> <ul> <li>quantify: kallisto_quant18467.sh</li> </ul> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=kallisto_quant18486 #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=kallisto_quant18486.out #SBATCH --error=kallisto_quant18486.err #SBATCH --partition=broadwl #SBATCH --mem=12G #SBATCH --mail-type=END module load Anaconda3 source activate three-prime-env kallisto quant -i /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto/RNA18486_index -o /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto/ /project2/yangili1/LCL/RNAseq/RNA.18486_1.fastq.gz /project2/yangili1/LCL/RNAseq/RNA.18486_2.fastq.gz</code></pre> <p>Convert to readable with TPM:</p> <pre class="bash"><code> kallisto h5dump abundance.h5 -o /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto</code></pre> <p>This is the gencode annotation. I want to do this with the refseq transcriptome. <a href="https://www.ncbi.nlm.nih.gov/projects/genome/guide/human/" class="uri">https://www.ncbi.nlm.nih.gov/projects/genome/guide/human/</a></p> <p>kallisto_refseqindex18486.sh</p> <pre class="bash"><code> #!/bin/bash #SBATCH --job-name=kallisto_refseqindex18486 #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=kallisto_refseqindex18486.out #SBATCH --error=kallisto_refseqindex18486.err #SBATCH --partition=broadwl #SBATCH --mem=50G #SBATCH --mail-type=END module load Anaconda3 source activate three-prime-env kallisto index --make-unique -i /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto/RNA18486_refseq_index /project2/gilad/briana/genome_anotation_data/GRCh37_latest_rna.fna </code></pre> <ul> <li>quantify: kallisto_refseq_quant18467.sh</li> </ul> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=kallisto_refseq_quant18486 #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=kallisto_refseq_quant18486.out #SBATCH --error=kallisto_refseq_quant18486.err #SBATCH --partition=broadwl #SBATCH --mem=12G #SBATCH --mail-type=END module load Anaconda3 source activate three-prime-env kallisto quant -i /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto/RNA18486_refseq_index -o /project2/gilad/briana/threeprimeseq/data/RNAseqKallisto/refseq/project2/yangili1/LCL/RNAseq/RNA.18486_1.fastq.gz /project2/yangili1/LCL/RNAseq/RNA.18486_2.fastq.gz</code></pre> <p>I will use tximport to convert from the transcripts that are quantified in kalisto.</p> <pre class="r"><code>#source("https://bioconductor.org/biocLite.R") #biocLite("tximport") #biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene") library(tximport) library("TxDb.Hsapiens.UCSC.hg19.knownGene")</code></pre> <pre><code>Loading required package: GenomicFeatures</code></pre> <pre><code>Loading required package: BiocGenerics</code></pre> <pre><code>Loading required package: parallel</code></pre> <pre><code> Attaching package: 'BiocGenerics'</code></pre> <pre><code>The following objects are masked from 'package:parallel': clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB</code></pre> <pre><code>The following objects are masked from 'package:dplyr': combine, intersect, setdiff, union</code></pre> <pre><code>The following objects are masked from 'package:stats': IQR, mad, sd, var, xtabs</code></pre> <pre><code>The following objects are masked from 'package:base': anyDuplicated, append, as.data.frame, basename, cbind, colMeans, colnames, colSums, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min</code></pre> <pre><code>Loading required package: S4Vectors</code></pre> <pre><code>Loading required package: stats4</code></pre> <pre><code> Attaching package: 'S4Vectors'</code></pre> <pre><code>The following objects are masked from 'package:dplyr': first, rename</code></pre> <pre><code>The following object is masked from 'package:tidyr': expand</code></pre> <pre><code>The following object is masked from 'package:base': expand.grid</code></pre> <pre><code>Loading required package: IRanges</code></pre> <pre><code> Attaching package: 'IRanges'</code></pre> <pre><code>The following objects are masked from 'package:dplyr': collapse, desc, slice</code></pre> <pre><code>The following object is masked from 'package:purrr': reduce</code></pre> <pre><code>Loading required package: GenomeInfoDb</code></pre> <pre><code>Loading required package: GenomicRanges</code></pre> <pre><code>Loading required package: AnnotationDbi</code></pre> <pre><code>Loading required package: Biobase</code></pre> <pre><code>Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'.</code></pre> <pre><code> Attaching package: 'AnnotationDbi'</code></pre> <pre><code>The following object is masked from 'package:dplyr': select</code></pre> <p>Import Kalisto resutls:</p> <pre class="r"><code>#I need to make a gene to transcript ID with the transcript id and gene id columns tx2gene=read.table("../data/RNAkalisto/ncbiRefSeq.txn2gene.txt" ,header= F, sep="\t", stringsAsFactors = F) txi.kallisto.tsv <- tximport("../data/RNAkalisto/abundance.tsv", type = "kallisto", tx2gene = tx2gene)</code></pre> <pre><code>Note: importing `abundance.h5` is typically faster than `abundance.tsv`</code></pre> <pre><code>reading in files with read_tsv</code></pre> <pre><code>1 removing duplicated transcript rows from tx2gene transcripts missing from tx2gene: 99 summarizing abundance summarizing counts summarizing length</code></pre> <pre class="r"><code>txi.kallisto.tsv$abundance= as.data.frame(txi.kallisto.tsv$abundance) %>% rownames_to_column(var="Name") colnames(txi.kallisto.tsv$abundance)= c("Name", "TPM")</code></pre> <p>Now I want to join this with the RNA seq data so I am looking at the expression tpm rather than counts.</p> <pre class="r"><code>refPeakandRNA_withO_TPM=refPeakandRNA_withO %>% inner_join(txi.kallisto.tsv$abundance, by="Name") %>% filter(TPM>0) refPeakandRNA_noPeakw_withO_TPM=refPeakandRNA_noPeakw_withO %>% inner_join(txi.kallisto.tsv$abundance, by="Name") %>% filter(TPM >0)</code></pre> <p>I can now replot the genes without peaks by TPM for the RNA seq rather than count.</p> <pre class="r"><code>plot(sort(log10(refPeakandRNA_withO_TPM$TPM), decreasing = T), main="Distribution of RNA expression 18486", ylab="log10 TPM", xlab="Refseq Gene") points(sort(log10(refPeakandRNA_noPeakw_withO_TPM$TPM), decreasing = T), col="Red") legend("topright", legend=c("All Genes", "Genes without Peak"), col=c("black", "red"),pch=19)</code></pre> <p><img src="figure/dataprocfigures.Rmd/unnamed-chunk-30-1.png" width="672" style="display: block; margin: auto;" /></p> <p>I can use this to look at some of the highest expressed genes that we do not have peaks for.</p> <ul> <li><p>HIST2H2AA4: no coverage at location</p></li> <li><p>HIST1H2AC: no coverage at location</p></li> <li><p>BOP1: Not in the protein coding gene file. Are 2 peaks.</p></li> <li><p>GSTM1: no coverage at location</p></li> <li><p>NPIPA1: no coverage at location</p></li> <li><p>SLX1A: difficult to interpret due to overlapping genes in the region</p></li> <li><p>HIST1H2BJ: no coverage at location</p></li> <li><p>MTX1: peak in the original filtered peaks, not in the refseq gene - lost due to direction, the peak goes the same was as the gene. probably noise</p></li> <li><p>GALE - looks like there is a peak but we are not detecting it. May be too close to the next peak at the 3’ end of LYPLA2 gene.</p></li> <li><p>HGH1: no coverage at location</p></li> <li><p>MSMP: difficult to interpret due to overlapping genes in the region</p></li> </ul> </div> <div id="session-information" class="section level2"> <h2>Session information</h2> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.5.1 (2018-07-02) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.6 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats4 parallel stats graphics grDevices utils datasets [8] methods base other attached packages: [1] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 [2] GenomicFeatures_1.32.2 [3] AnnotationDbi_1.42.1 [4] Biobase_2.40.0 [5] GenomicRanges_1.32.6 [6] GenomeInfoDb_1.16.0 [7] IRanges_2.14.11 [8] S4Vectors_0.18.3 [9] BiocGenerics_0.26.0 [10] tximport_1.8.0 [11] bindrcpp_0.2.2 [12] cowplot_0.9.3 [13] reshape2_1.4.3 [14] workflowr_1.1.1 [15] forcats_0.3.0 [16] stringr_1.3.1 [17] dplyr_0.7.6 [18] purrr_0.2.5 [19] readr_1.1.1 [20] tidyr_0.8.1 [21] tibble_1.4.2 [22] ggplot2_3.0.0 [23] tidyverse_1.2.1 loaded via a namespace (and not attached): [1] nlme_3.1-137 matrixStats_0.54.0 [3] bitops_1.0-6 lubridate_1.7.4 [5] bit64_0.9-7 RColorBrewer_1.1-2 [7] progress_1.2.0 httr_1.3.1 [9] rprojroot_1.3-2 tools_3.5.1 [11] backports_1.1.2 R6_2.2.2 [13] DBI_1.0.0 lazyeval_0.2.1 [15] colorspace_1.3-2 withr_2.1.2 [17] tidyselect_0.2.4 prettyunits_1.0.2 [19] bit_1.1-14 compiler_3.5.1 [21] git2r_0.23.0 cli_1.0.0 [23] rvest_0.3.2 xml2_1.2.0 [25] DelayedArray_0.6.5 rtracklayer_1.40.6 [27] labeling_0.3 scales_1.0.0 [29] digest_0.6.16 Rsamtools_1.32.3 [31] rmarkdown_1.10 R.utils_2.7.0 [33] XVector_0.20.0 pkgconfig_2.0.2 [35] htmltools_0.3.6 rlang_0.2.2 [37] readxl_1.1.0 rstudioapi_0.7 [39] RSQLite_2.1.1 bindr_0.1.1 [41] jsonlite_1.5 BiocParallel_1.14.2 [43] R.oo_1.22.0 RCurl_1.95-4.11 [45] magrittr_1.5 GenomeInfoDbData_1.1.0 [47] Matrix_1.2-14 Rcpp_0.12.18 [49] munsell_0.5.0 R.methodsS3_1.7.1 [51] stringi_1.2.4 whisker_0.3-2 [53] yaml_2.2.0 SummarizedExperiment_1.10.1 [55] zlibbioc_1.26.0 plyr_1.8.4 [57] grid_3.5.1 blob_1.1.1 [59] crayon_1.3.4 lattice_0.20-35 [61] Biostrings_2.48.0 haven_1.1.2 [63] hms_0.4.2 knitr_1.20 [65] pillar_1.3.0 biomaRt_2.36.1 [67] XML_3.98-1.16 glue_1.3.0 [69] evaluate_0.11 modelr_0.1.2 [71] cellranger_1.1.0 gtable_0.2.0 [73] assertthat_0.2.0 broom_0.5.0 [75] GenomicAlignments_1.16.0 memoise_1.1.0 </code></pre> </div> <hr> <p> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> <!-- Adjust MathJax settings so that all math formulae are shown using TeX fonts only; see http://docs.mathjax.org/en/latest/configuration.html. This will make the presentation more consistent at the cost of the webpage sometimes taking slightly longer to load. Note that this only works because the footer is added to webpages before the MathJax javascript. --> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <hr> <p> This reproducible <a href="http://rmarkdown.rstudio.com">R Markdown</a> analysis was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> 1.1.1 </p> <hr> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>