<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Joyce Hsiao" /> <title>Data overview</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-1.1/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-1.1/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs && document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">fucci-seq</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/jdblischak/workflowr"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <!-- Add a small amount of space between sections. --> <style type="text/css"> div.section { padding-top: 12px; } </style> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Data overview</h1> <h4 class="author"><em>Joyce Hsiao</em></h4> </div> <!-- The file analysis/chunks.R contains chunks that define default settings shared across the workflowr files. --> <!-- Update knitr chunk options --> <!-- Insert the date the file was last updated --> <p><strong>Last updated:</strong> 2017-12-13</p> <!-- Insert the code version (Git commit SHA1) if Git repository exists and R package git2r is installed --> <p><strong>Code version:</strong> 7509725</p> <hr /> <p>All processed data are stored as <code>expressionSet</code> objects in <code>data/eset</code>. Below shows how to extract information from an <code>expressionSet</code> object.</p> <p><span class="math inline">\(~\)</span></p> <pre class="r"><code>library(knitr) library(Biobase)</code></pre> <p><span class="math inline">\(~\)</span></p> <p>To combine all <code>expressionSet</code> objects in the folder,</p> <pre class="r"><code>fname <- Sys.glob("../data/eset/*.rds") eset <- Reduce(combine, Map(readRDS, fname))</code></pre> <p><span class="math inline">\(~\)</span></p> <p>To view the sample metadata labels,</p> <pre class="r"><code>kable(varMetadata(phenoData(eset)))</code></pre> <table> <thead> <tr class="header"> <th></th> <th align="left">labelDescription</th> </tr> </thead> <tbody> <tr class="odd"> <td>experiment</td> <td align="left">ID of C1 chip (i.e. processing date in YYYYMMDD)</td> </tr> <tr class="even"> <td>well</td> <td align="left">Well of C1 chip (96 total, rows A-H, cols 1-12)</td> </tr> <tr class="odd"> <td>cell_number</td> <td align="left">The number of cells observed in the well via microscopy</td> </tr> <tr class="even"> <td>concentration</td> <td align="left">The cDNA concentration of the well prior to library prep</td> </tr> <tr class="odd"> <td>ERCC</td> <td align="left">The dilution factor of the ERCC spike-ins</td> </tr> <tr class="even"> <td>individual.1</td> <td align="left">Individual # 1 included on this C1 chip</td> </tr> <tr class="odd"> <td>individual.2</td> <td align="left">Individual # 2 included on this C1 chip</td> </tr> <tr class="even"> <td>image_individual</td> <td align="left">The chip label for the image files</td> </tr> <tr class="odd"> <td>image_label</td> <td align="left">The well label for the image files</td> </tr> <tr class="even"> <td>raw</td> <td align="left">The number of raw reads</td> </tr> <tr class="odd"> <td>umi</td> <td align="left">The number of reads with a valid UMI</td> </tr> <tr class="even"> <td>mapped</td> <td align="left">The number of reads with a valid UMI that mapped to a genome</td> </tr> <tr class="odd"> <td>unmapped</td> <td align="left">The number of reads with a valid UMI that did <em>not</em> map to a genome</td> </tr> <tr class="even"> <td>reads_ercc</td> <td align="left">The number of reads that mapped to the ERCC spike-in transcripts</td> </tr> <tr class="odd"> <td>reads_hs</td> <td align="left">The number of reads that mapped to the H. sapiens genome</td> </tr> <tr class="even"> <td>reads_egfp</td> <td align="left">The number of reads that mapped to the FUCCI EGFP transgene</td> </tr> <tr class="odd"> <td>reads_mcherry</td> <td align="left">The number of reads that mapped to the FUCCI mCherry transgene</td> </tr> <tr class="even"> <td>molecules</td> <td align="left">The number of molecules (i.e. post UMI-deduplication)</td> </tr> <tr class="odd"> <td>mol_ercc</td> <td align="left">The number of molecules that mapped to the ERCC spike-in transcripts</td> </tr> <tr class="even"> <td>mol_hs</td> <td align="left">The number of molecules that mapped to the H. sapiens genome</td> </tr> <tr class="odd"> <td>mol_egfp</td> <td align="left">The number of molecules that mapped to the FUCCI EGFP transgene</td> </tr> <tr class="even"> <td>mol_mcherry</td> <td align="left">The number of molecules that mapped to the FUCCI mCherry transgene</td> </tr> <tr class="odd"> <td>detect_ercc</td> <td align="left">The number of ERCC genes with at least one molecule</td> </tr> <tr class="even"> <td>detect_hs</td> <td align="left">The number of H. sapiens genes with at least one molecule</td> </tr> <tr class="odd"> <td>chip_id</td> <td align="left">verifyBamID: The predicted individual based on the sequencing data</td> </tr> <tr class="even"> <td>chipmix</td> <td align="left">verifyBamID: chipmix is a metric for detecting sample swaps</td> </tr> <tr class="odd"> <td>freemix</td> <td align="left">verifyBamID: freemix is a measure of contamination. 0 == good & 0.5 == bad</td> </tr> <tr class="even"> <td>snps</td> <td align="left">verifyBamID: The number of SNPs that passed thresholds for AF and missingness</td> </tr> <tr class="odd"> <td>reads</td> <td align="left">verifyBamID: The number of sequences that overlapped SNPs</td> </tr> <tr class="even"> <td>avg_dp</td> <td align="left">verifyBamID: The average sequencing depth that covered a SNP</td> </tr> <tr class="odd"> <td>min_dp</td> <td align="left">verifyBamID: A minimun depth threshold for QC only (affects snps_w_min)</td> </tr> <tr class="even"> <td>snps_w_min</td> <td align="left">verifyBamID: The number of SNPs that had the minimum depth (min_dp); QC only</td> </tr> <tr class="odd"> <td>valid_id</td> <td align="left">verifyBamID: Is the predicted individual 1 of the 2 added to the C1 chip?</td> </tr> </tbody> </table> <p><span class="math inline">\(~\)</span></p> <p>To view the feature (gene) metadata labels,</p> <pre class="r"><code>kable(varMetadata(featureData(eset)))</code></pre> <table> <thead> <tr class="header"> <th></th> <th align="left">labelDescription</th> </tr> </thead> <tbody> <tr class="odd"> <td>chr</td> <td align="left">Chromosome</td> </tr> <tr class="even"> <td>start</td> <td align="left">Most 5’ start position (GRCh37/hg19; 1-based; inclusive)</td> </tr> <tr class="odd"> <td>end</td> <td align="left">Most 3’ end position (GRCh37/hg19; 1-based; inclusive)</td> </tr> <tr class="even"> <td>name</td> <td align="left">Gene name</td> </tr> <tr class="odd"> <td>strand</td> <td align="left">Strand (+ = positive/forward; - = negative/reverse)</td> </tr> <tr class="even"> <td>source</td> <td align="left">Source of RNA</td> </tr> </tbody> </table> <p><span class="math inline">\(~\)</span></p> <p>To extract count data,</p> <pre><code>exprs(eset)</code></pre> <p>There are 20,421 genes and 1,536 single cell samples in the raw data.</p> <pre class="r"><code>dim(exprs(eset))</code></pre> <pre><code>[1] 20421 1536</code></pre> <p><span class="math inline">\(~\)</span></p> <p>To extract feature/gene information,</p> <pre><code>fData(eset)</code></pre> <p>The features include FUCCI transgenes (EGFP and mCherry), ERCC spike-in controls, and endogenoeus genes (ENSG).</p> <pre class="r"><code>head(fData(eset))</code></pre> <pre><code> chr start end name strand source EGFP EGFP 1 714 EGFP + EGFP ENSG00000000003 hsX 99883667 99894988 TSPAN6 - H. sapiens ENSG00000000005 hsX 99839799 99854882 TNMD + H. sapiens ENSG00000000419 hs20 49551404 49575092 DPM1 - H. sapiens ENSG00000000457 hs1 169818772 169863408 SCYL3 - H. sapiens ENSG00000000460 hs1 169631245 169823221 C1orf112 + H. sapiens</code></pre> <p><span class="math inline">\(~\)</span></p> <p>To extract sample information,</p> <pre><code>pData(eset)</code></pre> <p>The rows contain single cell samples. Row names indicate the experiment date (we had one C1 plate per day), and C1 well ID.</p> <pre class="r"><code>head(pData(eset))</code></pre> <pre><code> experiment well cell_number concentration ERCC 20170905-A01 20170905 A01 1 1.7264044 50x dilution 20170905-A02 20170905 A02 1 1.4456926 50x dilution 20170905-A03 20170905 A03 1 1.8896170 50x dilution 20170905-A04 20170905 A04 1 0.4753723 50x dilution 20170905-A05 20170905 A05 1 0.5596827 50x dilution 20170905-A06 20170905 A06 1 2.1353518 50x dilution individual.1 individual.2 image_individual image_label 20170905-A01 NA18855 NA18870 18870_18855 3 20170905-A02 NA18855 NA18870 18870_18855 2 20170905-A03 NA18855 NA18870 18870_18855 1 20170905-A04 NA18855 NA18870 18870_18855 49 20170905-A05 NA18855 NA18870 18870_18855 50 20170905-A06 NA18855 NA18870 18870_18855 51 raw umi mapped unmapped reads_ercc reads_hs 20170905-A01 2734844 1754078 1240443 513635 76433 1163764 20170905-A02 1910671 1254676 861713 392963 120589 740940 20170905-A03 2284182 1571727 1093848 477879 124186 968934 20170905-A04 920518 610742 382426 228316 116552 265873 20170905-A05 1260569 831378 494618 336760 117843 376703 20170905-A06 2501607 1733479 1258695 474784 99172 1158976 reads_egfp reads_mcherry molecules mol_ercc mol_hs mol_egfp 20170905-A01 246 0 113178 3122 110041 15 20170905-A02 182 2 59545 3143 56390 10 20170905-A03 727 1 74459 3307 71119 32 20170905-A04 1 0 29385 3110 26274 1 20170905-A05 71 1 42407 3059 39343 4 20170905-A06 546 1 94362 3120 91215 26 mol_mcherry detect_ercc detect_hs chip_id chipmix freemix 20170905-A01 0 39 8390 NA18870 0.12414 0.08025 20170905-A02 2 45 6057 NA18870 0.19067 0.10145 20170905-A03 1 40 6429 NA18855 0.21403 0.08767 20170905-A04 0 42 2746 NA18870 0.45097 0.08319 20170905-A05 1 44 3633 NA18870 0.44775 0.22013 20170905-A06 1 41 7508 NA18870 0.15767 0.11801 snps reads avg_dp min_dp snps_w_min valid_id 20170905-A01 311848 7959 0.03 1 3503 TRUE 20170905-A02 311848 3651 0.01 1 1802 TRUE 20170905-A03 311848 5059 0.02 1 2159 TRUE 20170905-A04 311848 815 0.00 1 591 TRUE 20170905-A05 311848 1209 0.00 1 780 TRUE 20170905-A06 311848 6722 0.02 1 2815 TRUE</code></pre> <hr /> <div id="session-information" class="section level2"> <h2>Session information</h2> <pre><code>R version 3.4.1 (2017-06-30) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Scientific Linux 7.2 (Nitrogen) Matrix products: default BLAS: /home/joycehsiao/miniconda3/envs/fucci-seq/lib/R/lib/libRblas.so LAPACK: /home/joycehsiao/miniconda3/envs/fucci-seq/lib/R/lib/libRlapack.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats graphics grDevices utils datasets methods [8] base other attached packages: [1] Biobase_2.38.0 BiocGenerics_0.24.0 knitr_1.16 loaded via a namespace (and not attached): [1] Rcpp_0.12.14 digest_0.6.12 rprojroot_1.2 backports_1.0.5 [5] git2r_0.19.0 magrittr_1.5 evaluate_0.10.1 highr_0.6 [9] stringi_1.1.2 rmarkdown_1.6 tools_3.4.1 stringr_1.2.0 [13] yaml_2.1.14 compiler_3.4.1 htmltools_0.3.6</code></pre> </div> <!-- Adjust MathJax settings so that all math formulae are shown using TeX fonts only; see http://docs.mathjax.org/en/latest/configuration.html. This will make the presentation more consistent at the cost of the webpage sometimes taking slightly longer to load. Note that this only works because the footer is added to webpages before the MathJax javascript. --> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <hr> <p> This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>