<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Briana Mittleman" /> <title>Overlap Molecular QTLs</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/journal.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-9.12.0/highlight.js"></script> <link href="site_libs/font-awesome-5.0.13/css/fa-svg-with-js.css" rel="stylesheet" /> <script src="site_libs/font-awesome-5.0.13/js/fontawesome-all.min.js"></script> <script src="site_libs/font-awesome-5.0.13/js/fa-v4-shims.min.js"></script> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs) { hljs.configure({languages: []}); hljs.initHighlightingOnLoad(); if (document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } .html-widget { margin-bottom: 20px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">Three Prime Sequencing in Human LCLs</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/brimittleman/threeprimeseq"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <!-- Add a small amount of space between sections. --> <style type="text/css"> div.section { padding-top: 12px; } </style> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Overlap Molecular QTLs</h1> <h4 class="author"><em>Briana Mittleman</em></h4> <h4 class="date"><em>10/1/2018</em></h4> </div> <p><strong>Last updated:</strong> 2018-10-06</p> <strong>workflowr checks:</strong> <small>(Click a bullet for more information)</small> <ul> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>R Markdown file:</strong> up-to-date </summary></p> <p>Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Environment:</strong> empty </summary></p> <p>Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Seed:</strong> <code>set.seed(12345)</code> </summary></p> <p>The command <code>set.seed(12345)</code> was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Session information:</strong> recorded </summary></p> <p>Great job! Recording the operating system, R version, and package versions is critical for reproducibility.</p> </details> </li> <li> <p><details> <summary> <strong style="color:blue;">✔</strong> <strong>Repository version:</strong> <a href="https://github.com/brimittleman/threeprimeseq/tree/2e2c69596afe9f411d88bf153cf1366497ac2bd7" target="_blank">2e2c695</a> </summary></p> Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated. <br><br> Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use <code>wflow_publish</code> or <code>wflow_git_commit</code>). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated: <pre><code> Ignored files: Ignored: .DS_Store Ignored: .Rhistory Ignored: .Rproj.user/ Ignored: output/.DS_Store Untracked files: Untracked: KalistoAbundance18486.txt Untracked: analysis/genometrack_figs.Rmd Untracked: analysis/ncbiRefSeq_sm.sort.mRNA.bed Untracked: analysis/snake.config.notes.Rmd Untracked: analysis/verifyBAM.Rmd Untracked: data/18486.genecov.txt Untracked: data/APApeaksYL.total.inbrain.bed Untracked: data/NuclearApaQTLs.txt Untracked: data/RNAkalisto/ Untracked: data/TotalApaQTLs.txt Untracked: data/Totalpeaks_filtered_clean.bed Untracked: data/YL-SP-18486-T-combined-genecov.txt Untracked: data/YL-SP-18486-T_S9_R1_001-genecov.txt Untracked: data/bedgraph_peaks/ Untracked: data/bin200.5.T.nuccov.bed Untracked: data/bin200.Anuccov.bed Untracked: data/bin200.nuccov.bed Untracked: data/clean_peaks/ Untracked: data/comb_map_stats.csv Untracked: data/comb_map_stats.xlsx Untracked: data/comb_map_stats_39ind.csv Untracked: data/combined_reads_mapped_three_prime_seq.csv Untracked: data/ensemble_to_genename.txt Untracked: data/filtered_APApeaks_merged_allchrom_refseqTrans.closest2End.bed Untracked: data/filtered_APApeaks_merged_allchrom_refseqTrans.closest2End.noties.bed Untracked: data/first50lines_closest.txt Untracked: data/gencov.test.csv Untracked: data/gencov.test.txt Untracked: data/gencov_zero.test.csv Untracked: data/gencov_zero.test.txt Untracked: data/gene_cov/ Untracked: data/joined Untracked: data/leafcutter/ Untracked: data/merged_combined_YL-SP-threeprimeseq.bg Untracked: data/mol_overlap/ Untracked: data/nom_QTL/ Untracked: data/nom_QTL_opp/ Untracked: data/nom_QTL_trans/ Untracked: data/nuc6up/ Untracked: data/other_qtls/ Untracked: data/peakPerRefSeqGene/ Untracked: data/perm_QTL/ Untracked: data/perm_QTL_opp/ Untracked: data/perm_QTL_trans/ Untracked: data/reads_mapped_three_prime_seq.csv Untracked: data/smash.cov.results.bed Untracked: data/smash.cov.results.csv Untracked: data/smash.cov.results.txt Untracked: data/smash_testregion/ Untracked: data/ssFC200.cov.bed Untracked: data/temp.file1 Untracked: data/temp.file2 Untracked: data/temp.gencov.test.txt Untracked: data/temp.gencov_zero.test.txt Untracked: output/picard/ Untracked: output/plots/ Untracked: output/qual.fig2.pdf Unstaged changes: Modified: analysis/28ind.peak.explore.Rmd Modified: analysis/39indQC.Rmd Modified: analysis/cleanupdtseq.internalpriming.Rmd Modified: analysis/dif.iso.usage.leafcutter.Rmd Modified: analysis/diff_iso_pipeline.Rmd Modified: analysis/explore.filters.Rmd Modified: analysis/overlap_qtls.Rmd Modified: analysis/peakOverlap_oppstrand.Rmd Modified: analysis/pheno.leaf.comb.Rmd Modified: analysis/test.max2.Rmd Modified: code/Snakefile </code></pre> Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes. </details> </li> </ul> <details> <summary> <small><strong>Expand here to see past versions:</strong></small> </summary> <ul> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> File </th> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> <th style="text-align:left;"> Message </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/2e2c69596afe9f411d88bf153cf1366497ac2bd7/analysis/overlapMolQTL.Rmd" target="_blank">2e2c695</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-06 </td> <td style="text-align:left;"> histogram for APA cond on mol pheno </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/11e79ea68b415897a023c9d023c1e6ce771b67db/docs/overlapMolQTL.html" target="_blank">11e79ea</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/2a0b2557e22d781e3b341435a3e921909f7f3fff/analysis/overlapMolQTL.Rmd" target="_blank">2a0b255</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> <td style="text-align:left;"> add barplot for overlap </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/overlapMolQTL.html" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/10483d977b1bfadb739a1e76ea13d99710d08589/analysis/overlapMolQTL.Rmd" target="_blank">10483d9</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> <td style="text-align:left;"> add overlap QQ plots </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/f8a639d8400bcb8ad94541ea6c4f300e6059c2c4/docs/overlapMolQTL.html" target="_blank">f8a639d</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-03 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/8314135355662a52bc4ec2d37593a62c3a8019b3/analysis/overlapMolQTL.Rmd" target="_blank">8314135</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-03 </td> <td style="text-align:left;"> RNA gene vendiagram </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/2e0d959a51368bcfa9ebc00b54b6b86195e8835a/docs/overlapMolQTL.html" target="_blank">2e0d959</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-01 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/9d6ee033c3a681f38437f2a0cee61cef97ac51da/analysis/overlapMolQTL.Rmd" target="_blank">9d6ee03</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-01 </td> <td style="text-align:left;"> add 4su plots </td> </tr> <tr> <td style="text-align:left;"> html </td> <td style="text-align:left;"> <a href="https://cdn.rawgit.com/brimittleman/threeprimeseq/ac983db135c4ca1872cc58b4efea050275858722/docs/overlapMolQTL.html" target="_blank">ac983db</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-01 </td> <td style="text-align:left;"> Build site. </td> </tr> <tr> <td style="text-align:left;"> Rmd </td> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/35142fb308209cce516f74504f2f1a3abcfdff51/analysis/overlapMolQTL.Rmd" target="_blank">35142fb</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-01 </td> <td style="text-align:left;"> overlap QTL plots </td> </tr> </tbody> </table> </ul> <p></details></p> <hr /> <p>I will use this script to overlap the molQTLs found in <a href="callMolQTLS.html">Call molQTL</a> analysis with the APA QTLs I found using the <a href="PeakToGeneAssignment.html">transcript level annotations</a> .</p> <p>I want to ask if APA QTLs effect other molecular QTLs. The first step is to find the top snp-gene pair. The permuted value is giving me 1 snp for each peak. I need to find the top snp/peak in this file for each gene. I will then test these snps for significance at 10% fdr.</p> <p>Overlap: Use the permulted molecular QTL pvalues to find the significant QTLs for each molecular phenotype I tested. Find each of these snps in the APA nominal file. Take the most stignficant pair and multiple the pvalue by the number of peaks the snp is associated with for that same gene. As a baseline for this test I will randomly choose the same number of snps from molecular QTL and test these in the APA nominal files. I can run this for the total and nuclear.</p> <p>I want to do this for each of the molecular QTLs, therefore it would be best to upload the necessary files then create a script that can take any of them and create the QQplot.</p> <div id="upload-data" class="section level2"> <h2>Upload Data:</h2> <p>Library</p> <pre class="r"><code>library(workflowr)</code></pre> <pre><code>This is workflowr version 1.1.1 Run ?workflowr for help getting started</code></pre> <pre class="r"><code>library(reshape2) library(tidyverse)</code></pre> <pre><code>── Attaching packages ──────────────────────────────────── tidyverse 1.2.1 ──</code></pre> <pre><code>✔ ggplot2 3.0.0 ✔ purrr 0.2.5 ✔ tibble 1.4.2 ✔ dplyr 0.7.6 ✔ tidyr 0.8.1 ✔ stringr 1.3.1 ✔ readr 1.1.1 ✔ forcats 0.3.0</code></pre> <pre><code>── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag()</code></pre> <pre class="r"><code>library(VennDiagram)</code></pre> <pre><code>Loading required package: grid</code></pre> <pre><code>Loading required package: futile.logger</code></pre> <pre class="r"><code>library(data.table)</code></pre> <pre><code> Attaching package: 'data.table'</code></pre> <pre><code>The following objects are masked from 'package:dplyr': between, first, last</code></pre> <pre><code>The following object is masked from 'package:purrr': transpose</code></pre> <pre><code>The following objects are masked from 'package:reshape2': dcast, melt</code></pre> <p>Permuted Results from APA:</p> <pre class="r"><code>nuclearAPA=read.table("../data/perm_QTL_trans/filtered_APApeaks_merged_allchrom_refseqGenes_pheno_Nuclear_transcript_permResBH.txt", stringsAsFactors = F, header = T) totalAPA=read.table("../data/perm_QTL_trans/filtered_APApeaks_merged_allchrom_refseqGenes_pheno_Total_transcript_permResBH.txt", stringsAsFactors = F, header=T) </code></pre> <p>Permuted results for other QTLs</p> <pre class="r"><code>perm_names=c("pid" ,"nvar","shape1" ,"shape2", "dummy","sid" ,"dist","npval", "slope" , "ppval" ,"bpval") su30=read.table("../data/other_qtls/fastqtl_qqnorm_4su30.fixed.perm.out", stringsAsFactors = F,col.names = perm_names) su60=read.table("../data/other_qtls/fastqtl_qqnorm_4su60.fixed.perm.out", stringsAsFactors = F,col.names = perm_names) rna=read.table("../data/other_qtls/fastqtl_qqnorm_RNAseq_phase2.fixed.perm.out", stringsAsFactors = F,col.names = perm_names) rnaG=read.table("../data/other_qtls/fastqtl_qqnorm_RNAseqGeuvadis.fixed.perm.out", stringsAsFactors = F,col.names = perm_names) rib=read.table("../data/other_qtls/fastqtl_qqnorm_ribo_phase2.fixed.perm.out", stringsAsFactors = F,col.names = perm_names) prot=read.table("../data/other_qtls/fastqtl_qqnorm_prot.fixed.perm.out", stringsAsFactors = F,col.names = c("Gene.stable.ID" ,"nvar","shape1" ,"shape2", "dummy","sid" ,"dist","npval", "slope" , "ppval" ,"bpval"))</code></pre> </div> <div id="create-overlap-plot" class="section level2"> <h2>Create overlap plot</h2> <p>I will write this in multiple functions and put them together. The first function will take in the permuted results and return the significant snps at a given FDR.</p> <p>First step is to take in the mol file and change the names:</p> <pre class="r"><code>geneNames=read.table("/project2/gilad/briana/genome_anotation_data/ensemble_to_genename.txt", sep="\t", header=T,stringsAsFactors = F) file_newNames=mol_file %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval")</code></pre> <pre class="r"><code>#returns significant snps given a file and a cutoff sigsnp=function(file, cutoff){ file$bh=p.adjust(file$bpval, method="fdr") file_sig=file %>% filter(-log10(bh)> cutoff) %>% select(Gene.name, sid) return(file_sig) } testsigsnp=sigsnp(rna,1 )</code></pre> <p>Next step is to choose a random subset with the same number of snps as were found significant.</p> <pre class="r"><code>#takes the file and the list of sig snps, returns a df with the same number of random snps randomsnps=function(file, SigSnpList){ nsnp=nrow(SigSnpList) randomSnpDF= file %>% sample_n(nsnp) %>% arrange(sid) %>% select(Gene.name,sid) return(randomSnpDF) } testrandomsnps=randomsnps(rna, testsigsnp)</code></pre> <p>The next step is to filter nuclear file by the snp id and gene. To do this I will join on the snpIDs then group by the snp ids. I should then be able to take the lowest Pvalue from each group and count how many are in each group to multiply by the number of tests. I will practice this with a small set then make the general function.</p> <pre class="r"><code>#filter and fix pvals filt_tot= totalAPA %>% semi_join(testrandomsnps, by=c("Gene.name","sid") %>% group_by(sid) %>% add_tally() %>% ungroup() %>% mutate(corrPval=bpval * n) #take top snp filt_tot_top= filt_tot %>% group_by(sid) %>% top_n(-1, corrPval)</code></pre> <p>Make this into a function for the total and nuclear:</p> <pre class="r"><code>nom_names=c("peakID", "sid", "dist", "pval", "slope") #import total nominal apaTotNom=read_table("/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes_pheno_Total_NomRes.txt", col_names=nom_name, col_types = c(col_character(), col_character(), col_double(), col_double(), col_double())) #import nuclear nominal apaNucNom=read_table("/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes_pheno_Nuclear_NomRes.txt", col_names=nom_name, col_types = nom_names c(col_character(), col_character(), col_double(), col_double(), col_double())) #takes a list of snps and filters the top corrected snp for each one, returns df top_Total=function(snp_list){ filt_tot=apaTotNom %>% separate(peakID, into=c("chr", "start", "end", "id"), sep=":") %>% separate(id, into=c("Gene.name", "strand", "peaknum"), sep="_") %>% semi_join(snp_list, by=c("sid", "Gene.name") %>% group_by(sid, Gene.name) %>% add_tally() %>% ungroup() %>% mutate(corrPval=bpval* n) filt_tot_top= filt_tot %>% group_by(sid, Gene.name) %>% top_n(-1, corrPval) return(filt_tot_top) } #same for nuclear: top_Nuclear=function(snp_list){ filt_nuc=apaNucNom %>% separate(peakID, into=c("chr", "start", "end", "id"), sep=":") %>% separate(id, into=c("Gene.name", "strand", "peaknum"), sep="_") %>% semi_join(snp_list, by=c("sid", "Gene.name") %>% group_by(sid, Gene.name) %>% add_tally() %>% ungroup() %>% mutate(corrPval=bpval* n) filt_nuc_top= filt_nuc %>% group_by(sid, Gene.name) %>% top_n(-1, corrPval) return(filt_nuc_top) }</code></pre> <p>In the full script I will run this on the real QTLs and the random snps.</p> <p>The next function will make the plots. I will make one that takes the results of the top_total or top_Nuclear snps.</p> <pre class="r"><code>#function returns a QQplot when given the results of the top_X functions. One will be the test set (real QTLs) and 1 will be the baseline snps. makeQQ=function(test, baseline, Mol, Fraction){ plot=qqplot(-log10(runif(nrow(baseline))), -log10(baseline$corrPval), ylab="Observed", xlab="Expected", main=paste("Overlap QTLs:", Mol, "with APA", Fraction, sep=" ")) points(sort(-log10(runif(nrow(test)))), sort(-log10(test$corrPval)), col= alpha("Red")) abline(0,1) return(plot) }</code></pre> <p>Put these together in a function: I want to give the function the molQTL file and it will make the total and nuclear plots. This means I need to give it the file to write the png files to.</p> <p>createOverlapSigMol2APA.R</p> <pre class="r"><code>#!/bin/rscripts #this script creates the files for the molQTLs overlap with total and nuclear APA qtl library(dplyr) library(tidyr) library(ggplot2) library(readr) library(optparse) #this script will take the total and nuclear nominal file for a given, then output files to put the total/nuclear/base/test files into, and the mol QTL permuted results option_list = list( make_option(c("-T", "--file_Total"), action="store", default=NA, type='character', help="input nom file total"), make_option(c("-N", "--file_Nuclear"), action="store", default=NA, type='character', help="input nom file nuclear"), make_option(c("-A", "--output_test_total"), action="store", default=NA, type='character', help="output for test set total"), make_option(c("-B", "--output_test_nuclear"), action="store", default=NA, type='character', help="output for test set nulear"), make_option(c("-C", "--output_base_total"), action="store", default=NA, type='character', help="output for base total"), make_option(c("-D", "--output_base_nuclear"), action="store", default=NA, type='character', help="output for baseset nuclear"), make_option(c("-M", "--molPhenoQTLperm"), action="store", default=NA, type='character', help="permuter results for molecular pheno") ) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) print(opt) nom_name=c("peakID", "sid", "dist", "pval", "slope") geneNames=read.table("/project2/gilad/briana/genome_anotation_data/ensemble_to_genename.txt", sep="\t", header=T, stringsAsFactors = F) #function to run per mol QTLs overlapQTLplot=function(mol_file, cut, optA=opt, nom_nameA=nom_name){ if (mol_file == "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_prot.fixed.perm.out") { in_file=read.table(mol_file, col.names = c("Gene.stable.ID", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"),stringsAsFactors=F) file_newNames=in_file %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") } else { in_file=read.table(mol_file, col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"),stringsAsFactors=F) file_newNames=in_file %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") } #helper functions #returns significant snps given a file and a cutoff sigsnp=function(file, cutoff){ file$bh=p.adjust(file$bpval, method="fdr") file_sig=file %>% filter(-log10(bh)> cutoff) %>% select(Gene.name, sid) return(file_sig) } randomsnps=function(file, SigSnpList){ nsnp=nrow(SigSnpList) randomSnpDF= file %>% sample_n(nsnp) %>% arrange(sid) %>% select(Gene.name,sid) return(randomSnpDF) } #takes a list of snps and filters the top corrected snp for each one, returns df top_Total=function(snp_list,optB=optA,nom_name1=nom_nameA){ apaTotNom=read.table(optB$file_Total, col.names=nom_name1,stringsAsFactors=F) filt_tot=apaTotNom %>% separate(peakID, into=c("chr", "start", "end", "id"), sep=":") %>% separate(id, into=c("Gene.name", "strand", "peaknum"), sep="_") %>% semi_join(snp_list, by=c("sid", "Gene.name")) %>% group_by(sid, Gene.name) %>% add_tally() %>% ungroup() %>% mutate(corrPvalx=pval* n) %>% mutate(corrPval=ifelse(as.numeric(corrPvalx)>1, "1", corrPvalx)) filt_tot_top= filt_tot %>% group_by(sid, Gene.name) %>% top_n(-1, corrPvalx) return(as.data.frame(filt_tot_top)) } #same for nuclear: top_Nuclear=function(snp_list,optC=optA, nom_name2=nom_nameA){ apaNucNom=read.table(optC$file_Nuclear, col.names=nom_name2,stringsAsFactors=F) filt_nuc=apaNucNom %>% separate(peakID, into=c("chr", "start", "end", "id"), sep=":") %>% separate(id, into=c("Gene.name", "strand", "peaknum"), sep="_") %>% semi_join(snp_list, by=c("sid", "Gene.name")) %>% group_by(sid, Gene.name) %>% add_tally() %>% ungroup() %>% mutate(corrPvalx=pval* n) %>% mutate(corrPval=ifelse(as.numeric(corrPvalx)>1, "1", corrPvalx)) filt_nuc_top= filt_nuc %>% group_by(sid, Gene.name) %>% top_n(-1, corrPvalx) return(as.data.frame(filt_nuc_top)) } TL=sigsnp(file_newNames, cut) BL=randomsnps(file_newNames, TL) #top snps test and base total topT_T=top_Total(TL) topT_B=top_Total(BL) #top snps test and base total topN_T=top_Nuclear(TL) topN_B=top_Nuclear(BL) return(list(TT=topT_T,TB=topT_B, NT=topN_T,NB=topN_B)) } outputFiles=overlapQTLplot(opt$molPhenoQTLperm, 1) #write tables write.table(outputFiles$TT,opt$output_test_total,quote=F,row.names = F, col.names=T ) write.table(outputFiles$TB,opt$output_base_total,quote=F,row.names = F, col.names=T ) write.table(outputFiles$NT,opt$output_test_nuclear,quote=F,row.names = F, col.names=T ) write.table(outputFiles$NB,opt$output_base_nuclear,quote=F,row.names = F, col.names=T )</code></pre> <p>maybe try as.data.frame</p> <p>Directories for output:</p> <p>Run this on chr 13, protein:</p> <p>test_createOverlapSigMol2APA.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=test_createOverlapSigMol2APA #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=test_createOverlapSigMol2APA.out #SBATCH --error=test_createOverlapSigMol2APA.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr13.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr13.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigProteinQTL_overlapAPA_Total.test.chr13.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigProteinQTL_overlapAPA_Nuclear.test.chr13.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigProteinQTL_overlapAPA_Total.base.chr13.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigProteinQTL_overlapAPA_Nuclear.base.chr13.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_prot.fixed.perm.out" </code></pre> <p>Run this on all individuals and all phenos:</p> <p>run_createOverlapSigMol2APA_prot.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=run_createOverlapSigMol2APA_prot #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=run_createOverlapSigMol2APA_prot.out #SBATCH --error=run_createOverlapSigMol2APA_prot.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R #protein for i in $(seq 1 22) do Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigProteinQTL_overlapAPA_Total.test.chr${i}.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigProteinQTL_overlapAPA_Nuclear.test.chr${i}.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigProteinQTL_overlapAPA_Total.base.chr${i}.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigProteinQTL_overlapAPA_Nuclear.base.chr${i}.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_prot.fixed.perm.out" done </code></pre> <p>run_createOverlapSigMol2APA_4su30.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=run_createOverlapSigMol2APA_4su30 #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=run_createOverlapSigMol2APA_4su30.out #SBATCH --error=run_createOverlapSigMol2APA_4su30.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R #4su30 for i in $(seq 1 22) do Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su30QTL_overlapAPA_Total.test.chr${i}.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su30QTL_overlapAPA_Nuclear.test.chr${i}.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su30QTL_overlapAPA_Total.base.chr${i}.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su30QTL_overlapAPA_Nuclear.base.chr${i}.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_4su30.fixed.perm.out" done </code></pre> <p>run_createOverlapSigMol2APA_4su60.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=run_createOverlapSigMol2APA_4su60 #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=run_createOverlapSigMol2APA_4su60.out #SBATCH --error=run_createOverlapSigMol2APA_4su60.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R #4su60 for i in $(seq 1 22) do Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su60QTL_overlapAPA_Total.test.chr${i}.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su60QTL_overlapAPA_Nuclear.test.chr${i}.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su60QTL_overlapAPA_Total.base.chr${i}.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su60QTL_overlapAPA_Nuclear.base.chr${i}.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_4su60.fixed.perm.out" done </code></pre> <p>run_createOverlapSigMol2APA_RNAsG.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=run_createOverlapSigMol2APA_RNAsG #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=run_createOverlapSigMol2APA_RNAsG.out #SBATCH --error=run_createOverlapSigMol2APA_RNAsG.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R #RNAseqGeuvadis for i in $(seq 1 22) do Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Total.test.chr${i}.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.test.chr${i}.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqGeuvadisQTL_overlapAPA_Total.base.chr${i}.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.base.chr${i}.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_RNAseqGeuvadis.fixed.perm.out" done </code></pre> <p>run_createOverlapSigMol2APA_RNA.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=run_createOverlapSigMol2APA_RNA #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=run_createOverlapSigMol2APA_RNA.out #SBATCH --error=run_createOverlapSigMol2APA_RNA.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R #RNAseq for i in $(seq 1 22) do Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNASeqQTL_overlapAPA_Total.test.chr${i}.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqQTL_overlapAPA_Nuclear.test.chr${i}.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqQTL_overlapAPA_Total.base.chr${i}.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqQTL_overlapAPA_Nuclear.base.chr${i}.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_RNAseq_phase2.fixed.perm.out" done </code></pre> <p>run_createOverlapSigMol2APA_Ribo.sh</p> <pre class="bash"><code>#!/bin/bash #SBATCH --job-name=run_createOverlapSigMol2APA_Ribo #SBATCH --account=pi-yangili1 #SBATCH --time=24:00:00 #SBATCH --output=run_createOverlapSigMol2APA_Ribo.out #SBATCH --error=run_createOverlapSigMol2APA_Ribo.err #SBATCH --partition=bigmem2 #SBATCH --mem=64G #SBATCH --mail-type=END module load R #Ribo for i in $(seq 1 22) do Rscript createOverlapSigMol2APA.R --file_Total "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Total.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --file_Nuclear "/project2/gilad/briana/threeprimeseq/data/nominal_APAqtl_trans/filtered_APApeaks_merged_allchrom_refseqGenes.Transcript_sm_quant.Nuclear.pheno_fixed.txt.gz.qqnorm_chr${i}.nominal.out" --output_test_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigriboQTL_overlapAPA_Total.test.chr${i}.txt" --output_test_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigriboQTL_overlapAPA_Nuclear.test.chr${i}.txt" --output_base_total "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigriboQTL_overlapAPA_Total.base.chr${i}.txt" --output_base_nuclear "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigriboQTL_overlapAPA_Nuclear.base.chr${i}.txt" --molPhenoQTLperm "/project2/gilad/briana/threeprimeseq/data/molecular_QTLs/perm/fastqtl_qqnorm_ribo_phase2.fixed.perm.out" done </code></pre> <p>I will need to concatinate all of the test and base files for each phenotype befre running the code to create the QQ plots.</p> <pre class="bash"><code>#in base for i in $(ls *) do awk '{if (NR!=1) {print}}' ${i} > ../base_nohead/${i} done #in test for i in $(ls *) do awk '{if (NR!=1) {print}}' ${i} > ../test_nohead/${i} done</code></pre> <p>The results are not large. I will transfer them to my computer and run the analysis here.</p> <pre class="r"><code>makeQQ=function(test, baseline, Mol, Fraction){ names=c("chr", "start", "end", "Gene.name", "strand" ,"peaknum", "sid", "dist", "pval", "slope", "n" ,"corrPvalX", "corrPval") t=read.table(test,stringsAsFactors = F, col.names = names) t$corrPval=as.numeric(t$corrPval) b=read.table(baseline,stringsAsFactors = F,col.names = names) b$corrPval=as.numeric(b$corrPval) plot=qqplot(-log10(runif(nrow(b))), -log10(b$corrPval), ylab="Observed", xlab="Expected", main=paste("Overlap QTLs:", Mol, "with APA", Fraction, sep=" ")) points(sort(-log10(runif(nrow(t)))), sort(-log10(t$corrPval)), col= alpha("Red")) abline(0,1) return(plot) }</code></pre> <div id="rna" class="section level4"> <h4>RNA</h4> <pre class="r"><code>rna_total_plot=makeQQ("../data/mol_overlap/test/SigRNAseqQTL_overlapAPA_Total.test.txt", "../data/mol_overlap/base/SigRNAseqQTL_overlapAPA_Total.base.txt", "RNAseq", "Total")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-20-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-20-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-20-1.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <pre class="r"><code>rna_nuc_plot=makeQQ("../data/mol_overlap/test/SigRNAseqQTL_overlapAPA_Nuclear.test.txt", "../data/mol_overlap/base/SigRNAseqQTL_overlapAPA_Nuclear.base.txt", "RNAseq", "Nuclear")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-20-2.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-20-2.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-20-2.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> </div> <div id="rna-g" class="section level3"> <h3>RNA G</h3> <pre class="r"><code>rna_total_plot=makeQQ("../data/mol_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Total.test.txt", "../data/mol_overlap/base/SigRNAseqGeuvadisQTL_overlapAPA_Total.base.txt", "RNAseq Geu", "Total")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-21-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-21-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-21-1.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <pre class="r"><code>rna_nuc_plot=makeQQ("../data/mol_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.test.txt", "../data/mol_overlap/base/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.base.txt", "RNAseq Geu", "Nuclear")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-21-2.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-21-2.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-21-2.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> </div> <div id="su30" class="section level3"> <h3>4su30</h3> <pre class="r"><code>rna_total_plot=makeQQ("../data/mol_overlap/test/Sig4su30QTL_overlapAPA_Total.test.txt", "../data/mol_overlap/base/Sig4su30QTL_overlapAPA_Total.base.txt", "4su 30", "Total")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-22-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-22-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-22-1.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <pre class="r"><code>rna_nuc_plot=makeQQ("../data/mol_overlap/test/Sig4su30QTL_overlapAPA_Nuclear.test.txt", "../data/mol_overlap/base/Sig4su30QTL_overlapAPA_Nuclear.base.txt", "4su 30", "Nuclear")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-22-2.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-22-2.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-22-2.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> </div> <div id="su60" class="section level3"> <h3>4su60</h3> <pre class="r"><code>rna_total_plot=makeQQ("../data/mol_overlap/test/Sig4su60QTL_overlapAPA_Total.test.txt", "../data/mol_overlap/base/Sig4su60QTL_overlapAPA_Total.base.txt", "4su 60", "Total")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-23-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-23-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-23-1.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <pre class="r"><code>rna_nuc_plot=makeQQ("../data/mol_overlap/test/Sig4su60QTL_overlapAPA_Nuclear.test.txt", "../data/mol_overlap/base/Sig4su60QTL_overlapAPA_Nuclear.base.txt", "4su 60", "Nuclear")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-23-2.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-23-2.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-23-2.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> </div> <div id="protein" class="section level3"> <h3>Protein</h3> <pre class="r"><code>rna_total_plot=makeQQ("../data/mol_overlap/test/SigProteinQTL_overlapAPA_Total.test.txt", "../data/mol_overlap/base/SigProteinQTL_overlapAPA_Total.base.txt", "Protein", "Total")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-24-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-24-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-24-1.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <pre class="r"><code>rna_nuc_plot=makeQQ("../data/mol_overlap/test/SigProteinQTL_overlapAPA_Nuclear.test.txt", "../data/mol_overlap/base/SigProteinQTL_overlapAPA_Nuclear.base.txt", "Protein", "Nuclear")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-24-2.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-24-2.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-24-2.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> </div> <div id="ribo" class="section level3"> <h3>Ribo</h3> <pre class="r"><code>rna_total_plot=makeQQ("../data/mol_overlap/test/SigriboQTL_overlapAPA_Total.test.txt", "../data/mol_overlap/base/SigriboQTL_overlapAPA_Total.base.txt", "Ribo", "Total")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-25-1.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-25-1.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-25-1.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <pre class="r"><code>rna_nuc_plot=makeQQ("../data/mol_overlap/test/SigriboQTL_overlapAPA_Nuclear.test.txt", "../data/mol_overlap/base/SigriboQTL_overlapAPA_Nuclear.base.txt", "Ribo", "Nuclear")</code></pre> <p><img src="figure/overlapMolQTL.Rmd/unnamed-chunk-25-2.png" width="672" style="display: block; margin: auto;" /></p> <details> <summary><em>Expand here to see past versions of unnamed-chunk-25-2.png:</em></summary> <table style="border-collapse:separate; border-spacing:5px;"> <thead> <tr> <th style="text-align:left;"> Version </th> <th style="text-align:left;"> Author </th> <th style="text-align:left;"> Date </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> <a href="https://github.com/brimittleman/threeprimeseq/blob/b32f50b1c3ca9b0dec598801204ecc39400777eb/docs/figure/overlapMolQTL.Rmd/unnamed-chunk-25-2.png" target="_blank">b32f50b</a> </td> <td style="text-align:left;"> Briana Mittleman </td> <td style="text-align:left;"> 2018-10-05 </td> </tr> </tbody> </table> <p></details></p> <p>I want to plot the proportion of QTLs that overlap with APA per pheno type:</p> <p>Fix names and look at number of significant:</p> <pre class="r"><code>geneNames=read.table("../data/ensemble_to_genename.txt", sep="\t", header=T,stringsAsFactors = F) #su30 su30.name=su30 %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") su30.name$bh=p.adjust(su30.name$bpval, method="fdr") su30.name_sig=su30.name %>% filter(-log10(bh)> 1) %>% nrow() su30.name_sig</code></pre> <pre><code>[1] 384</code></pre> <pre class="r"><code>su60.name=su60 %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") su60.name$bh=p.adjust(su60.name$bpval, method="fdr") su60.name_sig=su60.name %>% filter(-log10(bh)> 1) %>% nrow() su60.name_sig</code></pre> <pre><code>[1] 360</code></pre> <pre class="r"><code>rna.name=rna %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") rna.name$bh=p.adjust(rna.name$bpval, method="fdr") rna.name_sig=rna.name %>% filter(-log10(bh)> 1) %>% nrow() rna.name_sig</code></pre> <pre><code>[1] 447</code></pre> <pre class="r"><code>rnaG.name=rnaG %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") rnaG.name$bh=p.adjust(rnaG.name$bpval, method="fdr") rnaG.name_sig=rnaG.name %>% filter(-log10(bh)> 1) %>% nrow() rnaG.name_sig</code></pre> <pre><code>[1] 1361</code></pre> <pre class="r"><code>rib.name=rib %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") rib.name$bh=p.adjust(rib.name$bpval, method="fdr") rib.name_sig=rib.name %>% filter(-log10(bh)> 1) %>% nrow() rib.name_sig</code></pre> <pre><code>[1] 285</code></pre> <pre class="r"><code>prot.name=prot %>% inner_join(geneNames, by="Gene.stable.ID") prot.name$bh=p.adjust(prot.name$bpval, method="fdr") prot.name_sig=prot.name %>% filter(-log10(bh)> 1) %>% nrow() prot.name_sig</code></pre> <pre><code>[1] 54</code></pre> <pre class="r"><code>phenos=c("4su30", "4su60", "RNA", "RNAG", "Ribo", "Protein") sig=c(su30.name_sig, su60.name_sig, rna.name_sig, rnaG.name_sig, rib.name_sig, prot.name_sig)</code></pre> <p>Get the Total overlap numbers</p> <pre class="r"><code>names=c("chr", "start", "end", "Gene.name", "strand" ,"peaknum", "sid", "dist", "pval", "slope", "n" ,"corrPvalX", "corrPval") su30_overT=fread("../data/mol_overlap/test/Sig4su30QTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() su60_overT=fread("../data/mol_overlap/test/Sig4su60QTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() rna_overT=fread("../data/mol_overlap/test/SigRNAseqQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() rnaG_overT=fread("../data/mol_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() ribo_overT=fread("../data/mol_overlap/test/SigriboQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() prot_overT=fread("../data/mol_overlap/test/SigProteinQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() overlap_total=c(su30_overT,su60_overT, rna_overT,rnaG_overT,ribo_overT,prot_overT)</code></pre> <p>Get nuclear overlap</p> <pre class="r"><code>su30_overN=fread("../data/mol_overlap/test/Sig4su30QTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() su60_overN=fread("../data/mol_overlap/test/Sig4su60QTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() rna_overN=fread("../data/mol_overlap/test/SigRNAseqQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() rnaG_overN=fread("../data/mol_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() ribo_overN=fread("../data/mol_overlap/test/SigriboQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() prot_overN=fread("../data/mol_overlap/test/SigProteinQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) %>% nrow() overlap_nuclear=c(su30_overN,su60_overN, rna_overN,rnaG_overN,ribo_overN,prot_overN)</code></pre> <p>Make this a dataframe:</p> <pre class="r"><code>overlapDF=as.data.frame(cbind(phenos,sig, overlap_total, overlap_nuclear)) overlapDF$sig=as.numeric(as.character(overlapDF$sig)) overlapDF$overlap_total=as.numeric(as.character(overlapDF$overlap_total)) overlapDF$overlap_nuclear=as.numeric(as.character(overlapDF$overlap_nuclear)) overlapDF=overlapDF%>% mutate(Total=overlap_total/sig) %>% mutate(Nuclear=(overlap_nuclear/sig)) %>% dplyr::select(phenos, Total, Nuclear) overlapDF_melt=melt(overlapDF,id.vars="phenos",variable.name = "Fraction", value.name = "Percent_QTL_Overlap")</code></pre> <pre class="r"><code>molQTLshare=ggplot(overlapDF_melt, aes(x=phenos, y=Percent_QTL_Overlap, by=Fraction, fill=Fraction)) +geom_bar(stat="identity", position="dodge") + scale_fill_manual(values=c("#5D478B", "#87CEFF")) + labs(title="Percent of Molecular QTLs sharing an APAqtl", x="Molecular Phenotype", y="Percent QTLs at FDR 10%") ggsave("../output/plots/PercOverlapMolQTL.png", molQTLshare)</code></pre> <pre><code>Saving 7 x 5 in image</code></pre> <p>This is not quite right. This is if I tested it in both. I need to look and see if we have a significant snp gene pair.</p> <p>Historgram of the Pvalues from the other significant snp in each phenotype:<br /> First for the total fraction</p> <pre class="r"><code>su30_APApval=read.table("../data/mol_overlap/test/Sig4su30QTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) su60_APApval=read.table("../data/mol_overlap/test/Sig4su60QTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) rna_APApval=read.table("../data/mol_overlap/test/SigRNAseqQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) rnaG_APApval=read.table("../data/mol_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) ribo_APApval=read.table("../data/mol_overlap/test/SigriboQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) prot_APApval=read.table("../data/mol_overlap/test/SigProteinQTL_overlapAPA_Total.test.txt",stringsAsFactors = F, col.names = names) </code></pre> <p>Plot a histogram with the corrected Pvalues for each of these:</p> <pre class="r"><code>png("../output/plots/AllPheno_histPval_TotAPA.png",width=600, height=400) par(mfrow=c(2,3)) hist(su30_APApval$corrPval, breaks=50, main="4su30 (Transcription)", xlab="APA total Pval") hist(su60_APApval$corrPval, breaks=50,main="4su60 (Transcription)", xlab="APA total Pval") hist(rna_APApval$corrPval, breaks=50, main= "RNA", xlab="APA total Pval") hist(rnaG_APApval$corrPval, breaks=50, main="RNA Guevadis", xlab="APA total Pval") hist(ribo_APApval$corrPval, breaks=50, main="Ribosome (Translation)", xlab="APA total Pval") hist(prot_APApval$corrPval, breaks=50, main="Protein", xlab="APA total Pval") dev.off()</code></pre> <pre><code>quartz_off_screen 2 </code></pre> <pre class="r"><code>su30_APApvalN=read.table("../data/mol_overlap/test/Sig4su30QTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) su60_APApvalN=read.table("../data/mol_overlap/test/Sig4su60QTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) rna_APApvalN=read.table("../data/mol_overlap/test/SigRNAseqQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) rnaG_APApvalN=read.table("../data/mol_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) ribo_APApvalN=read.table("../data/mol_overlap/test/SigriboQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) prot_APApvalN=read.table("../data/mol_overlap/test/SigProteinQTL_overlapAPA_Nuclear.test.txt",stringsAsFactors = F, col.names = names) </code></pre> <pre class="r"><code>png("../output/plots/AllPheno_histPval_NucAPA.png", width=600, height=400) par(mfrow=c(2,3)) hist(su30_APApvalN$corrPval, breaks=50, main="4su30", xlab="APA Nuclear Pval") hist(su60_APApvalN$corrPval, breaks=50,main="4su60", xlab="APA Nuclear Pval") hist(rna_APApvalN$corrPval, breaks=50, main= "RNA", xlab="APA Nuclear Pval") hist(rnaG_APApvalN$corrPval, breaks=50, main="RNA Guevadis", xlab="APA Nuclear Pval") hist(ribo_APApvalN$corrPval, breaks=50, main="Ribosome", xlab="APA Nuclear Pval") hist(prot_APApvalN$corrPval, breaks=50, main="Protein", xlab="APA Nuclear Pval") dev.off()</code></pre> <pre><code>quartz_off_screen 2 </code></pre> </div> <div id="did-not-use-this-on-terminal" class="section level3"> <h3>Did not use this on terminal</h3> <p>I will need to concatinate all of the test and base files for each phenotype befre running the code to create the QQ plots.</p> <p>Make QQplots</p> <pre class="r"><code>makeQQ=function(test, baseline, Mol, Fraction, plot_name){ t=read.table(test,stringsAsFactors = F, header=T) b=read.table(baseline,stringsAsFactors = F,header=T) png(plot_name) plot=qqplot(-log10(runif(nrow(b))), -log10(b$corrPval), ylab="Observed", xlab="Expected", main=paste("Overlap QTLs:", Mol, "with APA", Fraction, sep=" ")) points(sort(-log10(runif(nrow(t)))), sort(-log10(t$corrPval)), col= alpha("Red")) abline(0,1) dev.off } #run function on each pheno makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su30QTL_overlapAPA_Total.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su30QTL_overlapAPA_Total.base.txt", "4su 30", "Total", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlap4su30_Total.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su30QTL_overlapAPA_Nuclear.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su30QTL_overlapAPA_Nuclear.base.txt", "4su 30", "Nuclear", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlap4su30_Nuclear.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su60QTL_overlapAPA_Total.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su60QTL_overlapAPA_Total.base.txt", "4su 60", "Total", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlap4su60_Total.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/Sig4su60QTL_overlapAPA_Nuclear.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/Sig4su60QTL_overlapAPA_Nuclear.base.txt", "4su 60", "Nuclear", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlap4su60_Nuclear.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Total.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqGeuvadisQTL_overlapAPA_Total.base.txt", "RNAseq_Guevadis", "Total", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapRNAGue_Total.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqGeuvadisQTL_overlapAPA_Nuclear.base.txt", "RNAseq_Guevadis", "Nuclear", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapRNAGue_Nuclear.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqQTL_overlapAPA_Total.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqQTL_overlapAPA_Total.base.txt", "RNAseq", "Total", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapRNA_Total.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigRNAseqQTL_overlapAPA_Nuclear.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigRNAseqQTL_overlapAPA_Nuclear.base.txt", "RNAseq", "Nuclear", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapRNA_Nuclear.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigriboQTL_overlapAPA_Total.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigriboQTL_overlapAPA_Total.base.txt", "Ribo Seq", "Total", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapRibo_Total.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigriboQTL_overlapAPA_Nuclear.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigriboQTL_overlapAPA_Nuclear.base.txt", "Ribo Seq", "Nuclear", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapRibo_Nuclear.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigProteinQTL_overlapAPA_Total.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigProteinQTL_overlapAPA_Total.base.txt", "Protein", "Total", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapProtein_Total.png") makeQQ("/project2/gilad/briana/threeprimeseq/data/molecular_overlap/test/SigProteinQTL_overlapAPA_Nuclear.test.txt", "/project2/gilad/briana/threeprimeseq/data/molecular_overlap/base/SigProteinQTL_overlapAPA_Nuclear.base.txt", "Protein", "Nuclear", "/project2/gilad/briana/threeprimeseq/output/plots/QTL_overlap/APAoverlapProtein_Nuclear.png")</code></pre> </div> <div id="change-gene-ids" class="section level3"> <h3>Change gene IDs</h3> <p><strong>test for name changes </strong> <a href="http://useast.ensembl.org/biomart/martview/79ab6e7b92009b7da70ce6306b5efb93" class="uri">http://useast.ensembl.org/biomart/martview/79ab6e7b92009b7da70ce6306b5efb93</a></p> <pre class="r"><code>#geneNames=read.table("../data/ensemble_to_genename.txt", sep="\t", header=T,stringsAsFactors = F)</code></pre> <pre class="r"><code>#genesAPA=nuclearAPA %>% separate(pid, into=c("chr", "start", "end", "id"), sep=":") %>% separate(id, into=c("Gene.name", "strand", "peaknum"), sep="_") %>% dplyr::select(Gene.name) %>% distinct(Gene.name) %>% distinct()</code></pre> <p>Make a full list of the genes used in all of the mol phenoytpe files and use this as the input for the getBM.</p> <pre class="r"><code>#su30_geneID=su30 %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") %>% distinct() #su60_geneID=su60 %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") %>% distinct() #rna_geneID=rna %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") %>% distinct() #rib_geneID=rib %>% separate(pid, into=c("Gene.stable.ID", "ver"), sep ="[.]") %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") %>% distinct() #prot_geneID=prot %>% rename("Gene.stable.ID"=pid) %>% inner_join(geneNames, by="Gene.stable.ID") %>% dplyr::select("Gene.name", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") %>% distinct()</code></pre> </div> </div> <div id="session-information" class="section level2"> <h2>Session information</h2> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.5.1 (2018-07-02) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.6 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] grid stats graphics grDevices utils datasets methods [8] base other attached packages: [1] bindrcpp_0.2.2 data.table_1.11.8 VennDiagram_1.6.20 [4] futile.logger_1.4.3 forcats_0.3.0 stringr_1.3.1 [7] dplyr_0.7.6 purrr_0.2.5 readr_1.1.1 [10] tidyr_0.8.1 tibble_1.4.2 ggplot2_3.0.0 [13] tidyverse_1.2.1 reshape2_1.4.3 workflowr_1.1.1 loaded via a namespace (and not attached): [1] tidyselect_0.2.4 haven_1.1.2 lattice_0.20-35 [4] colorspace_1.3-2 htmltools_0.3.6 yaml_2.2.0 [7] rlang_0.2.2 R.oo_1.22.0 pillar_1.3.0 [10] glue_1.3.0 withr_2.1.2 R.utils_2.7.0 [13] lambda.r_1.2.3 modelr_0.1.2 readxl_1.1.0 [16] bindr_0.1.1 plyr_1.8.4 munsell_0.5.0 [19] gtable_0.2.0 cellranger_1.1.0 rvest_0.3.2 [22] R.methodsS3_1.7.1 evaluate_0.11 labeling_0.3 [25] knitr_1.20 broom_0.5.0 Rcpp_0.12.18 [28] formatR_1.5 backports_1.1.2 scales_1.0.0 [31] jsonlite_1.5 hms_0.4.2 digest_0.6.16 [34] stringi_1.2.4 rprojroot_1.3-2 cli_1.0.0 [37] tools_3.5.1 magrittr_1.5 lazyeval_0.2.1 [40] futile.options_1.0.1 crayon_1.3.4 whisker_0.3-2 [43] pkgconfig_2.0.2 xml2_1.2.0 lubridate_1.7.4 [46] assertthat_0.2.0 rmarkdown_1.10 httr_1.3.1 [49] rstudioapi_0.7 R6_2.2.2 nlme_3.1-137 [52] git2r_0.23.0 compiler_3.5.1 </code></pre> </div> <hr> <p> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> <!-- Adjust MathJax settings so that all math formulae are shown using TeX fonts only; see http://docs.mathjax.org/en/latest/configuration.html. This will make the presentation more consistent at the cost of the webpage sometimes taking slightly longer to load. Note that this only works because the footer is added to webpages before the MathJax javascript. --> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <hr> <p> This reproducible <a href="http://rmarkdown.rstudio.com">R Markdown</a> analysis was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> 1.1.1 </p> <hr> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>