<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Joyce Hsiao" />


<title>Data overview</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-1.1/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-1.1/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs && document.readyState && document.readyState === "complete") {
   window.setTimeout(function() {
      hljs.initHighlighting();
   }, 0);
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->




<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}


.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
  padding-left: 25px;
  text-indent: 0;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>

<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-xs-12 col-sm-8 col-md-9">




<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">fucci-seq</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="index.html">Home</a>
</li>
<li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/jdblischak/workflowr">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->
<!-- Add a small amount of space between sections. -->
<style type="text/css">
div.section {
  padding-top: 12px;
}
</style>

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Data overview</h1>
<h4 class="author"><em>Joyce Hsiao</em></h4>

</div>


<!-- The file analysis/chunks.R contains chunks that define default settings
shared across the workflowr files. -->
<!-- Update knitr chunk options -->
<!-- Insert the date the file was last updated -->
<p><strong>Last updated:</strong> 2017-12-13</p>
<!-- Insert the code version (Git commit SHA1) if Git repository exists and R
 package git2r is installed -->
<p><strong>Code version:</strong> 7509725</p>
<hr />
<p>All processed data are stored as <code>expressionSet</code> objects in <code>data/eset</code>. Below shows how to extract information from an <code>expressionSet</code> object.</p>
<p><span class="math inline">\(~\)</span></p>
<pre class="r"><code>library(knitr)
library(Biobase)</code></pre>
<p><span class="math inline">\(~\)</span></p>
<p>To combine all <code>expressionSet</code> objects in the folder,</p>
<pre class="r"><code>fname &lt;- Sys.glob(&quot;../data/eset/*.rds&quot;)
eset &lt;- Reduce(combine, Map(readRDS, fname))</code></pre>
<p><span class="math inline">\(~\)</span></p>
<p>To view the sample metadata labels,</p>
<pre class="r"><code>kable(varMetadata(phenoData(eset)))</code></pre>
<table>
<thead>
<tr class="header">
<th></th>
<th align="left">labelDescription</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>experiment</td>
<td align="left">ID of C1 chip (i.e. processing date in YYYYMMDD)</td>
</tr>
<tr class="even">
<td>well</td>
<td align="left">Well of C1 chip (96 total, rows A-H, cols 1-12)</td>
</tr>
<tr class="odd">
<td>cell_number</td>
<td align="left">The number of cells observed in the well via microscopy</td>
</tr>
<tr class="even">
<td>concentration</td>
<td align="left">The cDNA concentration of the well prior to library prep</td>
</tr>
<tr class="odd">
<td>ERCC</td>
<td align="left">The dilution factor of the ERCC spike-ins</td>
</tr>
<tr class="even">
<td>individual.1</td>
<td align="left">Individual # 1 included on this C1 chip</td>
</tr>
<tr class="odd">
<td>individual.2</td>
<td align="left">Individual # 2 included on this C1 chip</td>
</tr>
<tr class="even">
<td>image_individual</td>
<td align="left">The chip label for the image files</td>
</tr>
<tr class="odd">
<td>image_label</td>
<td align="left">The well label for the image files</td>
</tr>
<tr class="even">
<td>raw</td>
<td align="left">The number of raw reads</td>
</tr>
<tr class="odd">
<td>umi</td>
<td align="left">The number of reads with a valid UMI</td>
</tr>
<tr class="even">
<td>mapped</td>
<td align="left">The number of reads with a valid UMI that mapped to a genome</td>
</tr>
<tr class="odd">
<td>unmapped</td>
<td align="left">The number of reads with a valid UMI that did <em>not</em> map to a genome</td>
</tr>
<tr class="even">
<td>reads_ercc</td>
<td align="left">The number of reads that mapped to the ERCC spike-in transcripts</td>
</tr>
<tr class="odd">
<td>reads_hs</td>
<td align="left">The number of reads that mapped to the H. sapiens genome</td>
</tr>
<tr class="even">
<td>reads_egfp</td>
<td align="left">The number of reads that mapped to the FUCCI EGFP transgene</td>
</tr>
<tr class="odd">
<td>reads_mcherry</td>
<td align="left">The number of reads that mapped to the FUCCI mCherry transgene</td>
</tr>
<tr class="even">
<td>molecules</td>
<td align="left">The number of molecules (i.e. post UMI-deduplication)</td>
</tr>
<tr class="odd">
<td>mol_ercc</td>
<td align="left">The number of molecules that mapped to the ERCC spike-in transcripts</td>
</tr>
<tr class="even">
<td>mol_hs</td>
<td align="left">The number of molecules that mapped to the H. sapiens genome</td>
</tr>
<tr class="odd">
<td>mol_egfp</td>
<td align="left">The number of molecules that mapped to the FUCCI EGFP transgene</td>
</tr>
<tr class="even">
<td>mol_mcherry</td>
<td align="left">The number of molecules that mapped to the FUCCI mCherry transgene</td>
</tr>
<tr class="odd">
<td>detect_ercc</td>
<td align="left">The number of ERCC genes with at least one molecule</td>
</tr>
<tr class="even">
<td>detect_hs</td>
<td align="left">The number of H. sapiens genes with at least one molecule</td>
</tr>
<tr class="odd">
<td>chip_id</td>
<td align="left">verifyBamID: The predicted individual based on the sequencing data</td>
</tr>
<tr class="even">
<td>chipmix</td>
<td align="left">verifyBamID: chipmix is a metric for detecting sample swaps</td>
</tr>
<tr class="odd">
<td>freemix</td>
<td align="left">verifyBamID: freemix is a measure of contamination. 0 == good &amp; 0.5 == bad</td>
</tr>
<tr class="even">
<td>snps</td>
<td align="left">verifyBamID: The number of SNPs that passed thresholds for AF and missingness</td>
</tr>
<tr class="odd">
<td>reads</td>
<td align="left">verifyBamID: The number of sequences that overlapped SNPs</td>
</tr>
<tr class="even">
<td>avg_dp</td>
<td align="left">verifyBamID: The average sequencing depth that covered a SNP</td>
</tr>
<tr class="odd">
<td>min_dp</td>
<td align="left">verifyBamID: A minimun depth threshold for QC only (affects snps_w_min)</td>
</tr>
<tr class="even">
<td>snps_w_min</td>
<td align="left">verifyBamID: The number of SNPs that had the minimum depth (min_dp); QC only</td>
</tr>
<tr class="odd">
<td>valid_id</td>
<td align="left">verifyBamID: Is the predicted individual 1 of the 2 added to the C1 chip?</td>
</tr>
</tbody>
</table>
<p><span class="math inline">\(~\)</span></p>
<p>To view the feature (gene) metadata labels,</p>
<pre class="r"><code>kable(varMetadata(featureData(eset)))</code></pre>
<table>
<thead>
<tr class="header">
<th></th>
<th align="left">labelDescription</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>chr</td>
<td align="left">Chromosome</td>
</tr>
<tr class="even">
<td>start</td>
<td align="left">Most 5’ start position (GRCh37/hg19; 1-based; inclusive)</td>
</tr>
<tr class="odd">
<td>end</td>
<td align="left">Most 3’ end position (GRCh37/hg19; 1-based; inclusive)</td>
</tr>
<tr class="even">
<td>name</td>
<td align="left">Gene name</td>
</tr>
<tr class="odd">
<td>strand</td>
<td align="left">Strand (+ = positive/forward; - = negative/reverse)</td>
</tr>
<tr class="even">
<td>source</td>
<td align="left">Source of RNA</td>
</tr>
</tbody>
</table>
<p><span class="math inline">\(~\)</span></p>
<p>To extract count data,</p>
<pre><code>exprs(eset)</code></pre>
<p>There are 20,421 genes and 1,536 single cell samples in the raw data.</p>
<pre class="r"><code>dim(exprs(eset))</code></pre>
<pre><code>[1] 20421  1536</code></pre>
<p><span class="math inline">\(~\)</span></p>
<p>To extract feature/gene information,</p>
<pre><code>fData(eset)</code></pre>
<p>The features include FUCCI transgenes (EGFP and mCherry), ERCC spike-in controls, and endogenoeus genes (ENSG).</p>
<pre class="r"><code>head(fData(eset))</code></pre>
<pre><code>                 chr     start       end     name strand     source
EGFP            EGFP         1       714     EGFP      +       EGFP
ENSG00000000003  hsX  99883667  99894988   TSPAN6      - H. sapiens
ENSG00000000005  hsX  99839799  99854882     TNMD      + H. sapiens
ENSG00000000419 hs20  49551404  49575092     DPM1      - H. sapiens
ENSG00000000457  hs1 169818772 169863408    SCYL3      - H. sapiens
ENSG00000000460  hs1 169631245 169823221 C1orf112      + H. sapiens</code></pre>
<p><span class="math inline">\(~\)</span></p>
<p>To extract sample information,</p>
<pre><code>pData(eset)</code></pre>
<p>The rows contain single cell samples. Row names indicate the experiment date (we had one C1 plate per day), and C1 well ID.</p>
<pre class="r"><code>head(pData(eset))</code></pre>
<pre><code>             experiment well cell_number concentration         ERCC
20170905-A01   20170905  A01           1     1.7264044 50x dilution
20170905-A02   20170905  A02           1     1.4456926 50x dilution
20170905-A03   20170905  A03           1     1.8896170 50x dilution
20170905-A04   20170905  A04           1     0.4753723 50x dilution
20170905-A05   20170905  A05           1     0.5596827 50x dilution
20170905-A06   20170905  A06           1     2.1353518 50x dilution
             individual.1 individual.2 image_individual image_label
20170905-A01      NA18855      NA18870      18870_18855           3
20170905-A02      NA18855      NA18870      18870_18855           2
20170905-A03      NA18855      NA18870      18870_18855           1
20170905-A04      NA18855      NA18870      18870_18855          49
20170905-A05      NA18855      NA18870      18870_18855          50
20170905-A06      NA18855      NA18870      18870_18855          51
                 raw     umi  mapped unmapped reads_ercc reads_hs
20170905-A01 2734844 1754078 1240443   513635      76433  1163764
20170905-A02 1910671 1254676  861713   392963     120589   740940
20170905-A03 2284182 1571727 1093848   477879     124186   968934
20170905-A04  920518  610742  382426   228316     116552   265873
20170905-A05 1260569  831378  494618   336760     117843   376703
20170905-A06 2501607 1733479 1258695   474784      99172  1158976
             reads_egfp reads_mcherry molecules mol_ercc mol_hs mol_egfp
20170905-A01        246             0    113178     3122 110041       15
20170905-A02        182             2     59545     3143  56390       10
20170905-A03        727             1     74459     3307  71119       32
20170905-A04          1             0     29385     3110  26274        1
20170905-A05         71             1     42407     3059  39343        4
20170905-A06        546             1     94362     3120  91215       26
             mol_mcherry detect_ercc detect_hs chip_id chipmix freemix
20170905-A01           0          39      8390 NA18870 0.12414 0.08025
20170905-A02           2          45      6057 NA18870 0.19067 0.10145
20170905-A03           1          40      6429 NA18855 0.21403 0.08767
20170905-A04           0          42      2746 NA18870 0.45097 0.08319
20170905-A05           1          44      3633 NA18870 0.44775 0.22013
20170905-A06           1          41      7508 NA18870 0.15767 0.11801
               snps reads avg_dp min_dp snps_w_min valid_id
20170905-A01 311848  7959   0.03      1       3503     TRUE
20170905-A02 311848  3651   0.01      1       1802     TRUE
20170905-A03 311848  5059   0.02      1       2159     TRUE
20170905-A04 311848   815   0.00      1        591     TRUE
20170905-A05 311848  1209   0.00      1        780     TRUE
20170905-A06 311848  6722   0.02      1       2815     TRUE</code></pre>
<hr />
<div id="session-information" class="section level2">
<h2>Session information</h2>
<pre><code>R version 3.4.1 (2017-06-30)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Scientific Linux 7.2 (Nitrogen)

Matrix products: default
BLAS: /home/joycehsiao/miniconda3/envs/fucci-seq/lib/R/lib/libRblas.so
LAPACK: /home/joycehsiao/miniconda3/envs/fucci-seq/lib/R/lib/libRlapack.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] Biobase_2.38.0      BiocGenerics_0.24.0 knitr_1.16         

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14    digest_0.6.12   rprojroot_1.2   backports_1.0.5
 [5] git2r_0.19.0    magrittr_1.5    evaluate_0.10.1 highr_0.6      
 [9] stringi_1.1.2   rmarkdown_1.6   tools_3.4.1     stringr_1.2.0  
[13] yaml_2.1.14     compiler_3.4.1  htmltools_0.3.6</code></pre>
</div>

<!-- Adjust MathJax settings so that all math formulae are shown using
TeX fonts only; see
http://docs.mathjax.org/en/latest/configuration.html.  This will make
the presentation more consistent at the cost of the webpage sometimes
taking slightly longer to load. Note that this only works because the
footer is added to webpages before the MathJax javascript. -->
<script type="text/x-mathjax-config">
  MathJax.Hub.Config({
    "HTML-CSS": { availableFonts: ["TeX"] }
  });
</script>

<hr>
<p>
    This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a>
</p>
<hr>

<!-- To enable disqus, uncomment the section below and provide your disqus_shortname -->

<!-- disqus
  <div id="disqus_thread"></div>
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function() {
            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
        })();
    </script>
    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
    <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-->


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>