<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Joyce Hsiao" />


<title>Circle fit to intensities: follow-up</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs) {
  hljs.configure({languages: []});
  hljs.initHighlightingOnLoad();
  if (document.readyState && document.readyState === "complete") {
    window.setTimeout(function() { hljs.initHighlighting(); }, 0);
  }
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->






<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">fucci-seq</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="index.html">Home</a>
</li>
<li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/jdblischak/fucci-seq">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->
<!-- Add a small amount of space between sections. -->
<style type="text/css">
div.section {
  padding-top: 12px;
}
</style>

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Circle fit to intensities: follow-up</h1>
<h4 class="author"><em>Joyce Hsiao</em></h4>

</div>

<div id="TOC">
<ul>
<li><a href="#goals">Goals</a></li>
<li><a href="#data-and-packages">Data and packages</a></li>
<li><a href="#projected-normal-on-pcs-of-redgreen">Projected normal on PCs of Red/Green</a></li>
<li><a href="#circular-circular-correlation-between-projected-time-and-gene-expression">Circular-circular correlation between projected time and gene expression</a></li>
<li><a href="#circular-linear-correlation-between-projected-time-and-gene-expression">Circular-linear correlation between projected time and gene expression</a></li>
<li><a href="#comparae-results-between-circ-linear-and-cir-circ-correlation">Comparae results between circ-linear and cir-circ correlation</a></li>
<li><a href="#session-information">Session information</a></li>
</ul>
</div>

<!-- The file analysis/chunks.R contains chunks that define default settings
shared across the workflowr files. -->
<!-- Update knitr chunk options -->
<!-- Insert the date the file was last updated -->
<p><strong>Last updated:</strong> 2018-03-13</p>
<!-- Insert the code version (Git commit SHA1) if Git repository exists and R
 package git2r is installed -->
<p><strong>Code version:</strong> 4ba15d8</p>
<hr />
<div id="goals" class="section level2">
<h2>Goals</h2>
<ol style="list-style-type: decimal">
<li><p>Estimate positions based on PCA</p></li>
<li><p>Correlation positions with genes and DAPI</p></li>
</ol>
<hr />
</div>
<div id="data-and-packages" class="section level2">
<h2>Data and packages</h2>
<p>Packages</p>
<pre class="r"><code>library(circular)
library(conicfit)
library(Biobase)
library(dplyr)
library(matrixStats)
library(CorShrink)</code></pre>
<p>Load data</p>
<pre class="r"><code>df &lt;- readRDS(file=&quot;../data/eset-filtered.rds&quot;)
pdata &lt;- pData(df)
fdata &lt;- fData(df)

# select endogeneous genes
counts &lt;- exprs(df)[grep(&quot;ENSG&quot;, rownames(df)), ]

log2cpm &lt;- readRDS(&quot;../output/seqdata-batch-correction.Rmd/log2cpm.rds&quot;)
# log2cpm.adjust &lt;- readRDS(&quot;../output/seqdata-batch-correction.Rmd/log2cpm.adjust.rds&quot;)

# import corrected intensities
pdata.adj &lt;- readRDS(&quot;../output/images-normalize-anova.Rmd/pdata.adj.rds&quot;)

macosko &lt;- readRDS(&quot;../data/cellcycle-genes-previous-studies/rds/macosko-2015.rds&quot;)</code></pre>
<hr />
</div>
<div id="projected-normal-on-pcs-of-redgreen" class="section level2">
<h2>Projected normal on PCs of Red/Green</h2>
<pre class="r"><code>pc.fucci &lt;- prcomp(subset(pdata.adj, 
                          select=c(&quot;rfp.median.log10sum.adjust&quot;,
                                   &quot;gfp.median.log10sum.adjust&quot;)),
                   center = T, scale. = T)</code></pre>
<p>The first two PC are about the sample in explaining variation.</p>
<pre class="r"><code>pc.fucci$sdev[1:2]</code></pre>
<pre><code>[1] 1.0065119 0.9934455</code></pre>
<p>Green and red are similarly correlated with PC1 and PC2 across individuals.</p>
<pre class="r"><code>pcs.rfp &lt;- sapply(1:2, function(i) {
  res &lt;- summary(lm(pc.fucci$x[,i]~pdata.adj$rfp.median.log10sum.adjust))
  res$adj.r.squared
})
pcs.rfp</code></pre>
<pre><code>[1] 0.5060336 0.4929543</code></pre>
<pre class="r"><code>pcs.gfp &lt;- sapply(1:2, function(i) {
  res &lt;- summary(lm(pc.fucci$x[,i]~pdata.adj$gfp.median.log10sum.adjust))
  res$adj.r.squared
})
pcs.gfp</code></pre>
<pre><code>[1] 0.5060336 0.4929543</code></pre>
<pre class="r"><code>library(circular)
Theta.fucci &lt;- coord2rad(pc.fucci$x)

par(mfrow=c(2,2), mar=c(4,4,3,1))
plot(circular(Theta.fucci), stack = TRUE)
plot(as.numeric(Theta.fucci), pdata.adj$rfp.median.log10sum.adjust,
     col = &quot;firebrick&quot;, pch = 16, cex = .7,
     xlab = &quot;Theta (projected cell time)&quot;, 
     ylab = &quot;RFP (log10 intensity)&quot;)
plot(as.numeric(Theta.fucci), pdata.adj$gfp.median.log10sum.adjust,
     col = &quot;forestgreen&quot;, pch = 16, cex = .7,
     xlab = &quot;Theta (projected cell time)&quot;, 
     ylab = &quot;GFP (log10 intensity)&quot;)
plot(as.numeric(Theta.fucci), pdata.adj$gfp.median.log10sum.adjust,
     col = &quot;mediumblue&quot;, pch = 16, cex = .7,
     xlab = &quot;Theta (projected cell time)&quot;, 
     ylab = &quot;DAPI (log10 intensity)&quot;)</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/unnamed-chunk-6-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>Correlation with DAPI, RFP and GFP.</p>
<pre class="r"><code>library(Rfast)
library(circular)

# normalize intensities to between 0 to 2*pi
pdata.adj$dapi.circ &lt;- with(pdata.adj, {
  normed &lt;- (dapi.median.log10sum.adjust-min(dapi.median.log10sum.adjust))/(max(dapi.median.log10sum.adjust)- min(dapi.median.log10sum.adjust))
  normed*2*pi
} )
pdata.adj$gfp.circ &lt;- with(pdata.adj, {
  normed &lt;- (gfp.median.log10sum-min(gfp.median.log10sum.adjust))/(max(gfp.median.log10sum.adjust)- min(gfp.median.log10sum.adjust))
  normed*2*pi
} )
pdata.adj$rfp.circ &lt;- with(pdata.adj, {
  normed &lt;- (rfp.median.log10sum.adjust-min(rfp.median.log10sum.adjust))/(max(rfp.median.log10sum.adjust)- min(rfp.median.log10sum.adjust))
  normed*2*pi
} )


cor.dapi.cl &lt;- sqrt(circlin.cor(as.numeric(Theta.fucci), pdata.adj$dapi.median.log10sum.adjust)[1])
cor.gfp.cl &lt;- sqrt(circlin.cor(as.numeric(Theta.fucci), pdata.adj$gfp.median.log10sum.adjust)[1])
cor.rfp.cl &lt;- sqrt(circlin.cor(as.numeric(Theta.fucci), pdata.adj$rfp.median.log10sum.adjust)[1])

cor.dapi.cc &lt;- cor.circular(as.numeric(Theta.fucci), pdata.adj$dapi.median.log10sum.adjust)
cor.gfp.cc &lt;- cor.circular(as.numeric(Theta.fucci), pdata.adj$gfp.median.log10sum.adjust)
cor.rfp.cc &lt;- cor.circular(as.numeric(Theta.fucci), pdata.adj$rfp.median.log10sum.adjust)

out &lt;- data.frame(cbind(rbind(cor.dapi.cl, cor.gfp.cl, cor.rfp.cl),
                 rbind(cor.dapi.cc, cor.gfp.cc, cor.rfp.cc)),
           row.names = c(&quot;DAPI&quot;, &quot;GFP&quot;, &quot;RFP&quot;))
colnames(out) &lt;- c(&quot;cor.circ.linear&quot;, &quot;cor.circ.circ&quot;)
print(out)</code></pre>
<pre><code>     cor.circ.linear cor.circ.circ
DAPI       0.5313878    -0.4176550
GFP        0.8787765    -0.4959238
RFP        0.9333337     0.8057986</code></pre>
<p>The high correlation of GFP and RFP in the first column follows the use of PCs of GFP and RFP to construct cell time. Results in the second column seem interesting. The values capture the relationship between these measures in the plot. GFP and DAPI are negatively associated with cell time and RFP is positive associated with cell time.</p>
<p>Correlation with DAPI, RFP and GFP.</p>
<pre class="r"><code>library(Rfast)
library(circular)

# normalize intensities to between 0 to 2*pi
pdata.adj$dapi.circ &lt;- with(pdata.adj, {
  normed &lt;- (dapi.median.log10sum.adjust-min(dapi.median.log10sum.adjust))/(max(dapi.median.log10sum.adjust)- min(dapi.median.log10sum.adjust))
  normed*2*pi
} )
pdata.adj$gfp.circ &lt;- with(pdata.adj, {
  normed &lt;- (gfp.median.log10sum-min(gfp.median.log10sum.adjust))/(max(gfp.median.log10sum.adjust)- min(gfp.median.log10sum.adjust))
  normed*2*pi
} )
pdata.adj$rfp.circ &lt;- with(pdata.adj, {
  normed &lt;- (rfp.median.log10sum.adjust-min(rfp.median.log10sum.adjust))/(max(rfp.median.log10sum.adjust)- min(rfp.median.log10sum.adjust))
  normed*2*pi
} )


cor.dapi.cl &lt;- sqrt(circlin.cor(as.numeric(Theta.fucci), pdata.adj$dapi.median.log10sum.adjust)[1])
cor.gfp.cl &lt;- sqrt(circlin.cor(as.numeric(Theta.fucci), pdata.adj$gfp.median.log10sum.adjust)[1])
cor.rfp.cl &lt;- sqrt(circlin.cor(as.numeric(Theta.fucci), pdata.adj$rfp.median.log10sum.adjust)[1])

cor.dapi.cc &lt;- cor.circular(as.numeric(Theta.fucci), pdata.adj$dapi.median.log10sum.adjust)
cor.gfp.cc &lt;- cor.circular(as.numeric(Theta.fucci), pdata.adj$gfp.median.log10sum.adjust)
cor.rfp.cc &lt;- cor.circular(as.numeric(Theta.fucci), pdata.adj$rfp.median.log10sum.adjust)

out &lt;- data.frame(cbind(rbind(cor.dapi.cl, cor.gfp.cl, cor.rfp.cl),
                 rbind(cor.dapi.cc, cor.gfp.cc, cor.rfp.cc)),
           row.names = c(&quot;DAPI&quot;, &quot;GFP&quot;, &quot;RFP&quot;))
colnames(out) &lt;- c(&quot;cor.circ.linear&quot;, &quot;cor.circ.circ&quot;)
print(out)</code></pre>
<pre><code>     cor.circ.linear cor.circ.circ
DAPI       0.5313878    -0.4176550
GFP        0.8787765    -0.4959238
RFP        0.9333337     0.8057986</code></pre>
<hr />
</div>
<div id="circular-circular-correlation-between-projected-time-and-gene-expression" class="section level2">
<h2>Circular-circular correlation between projected time and gene expression</h2>
<pre class="r"><code>library(Rfast)
library(circular)

counts.log2cpm &lt;- log10((10^6)*t(t(counts)/colSums(counts))+1)
counts.log2cpm.normed &lt;- 2*pi*(counts.log2cpm)/max(counts.log2cpm)

cor.cc.log2cpm &lt;- sapply(1:nrow(counts.log2cpm), function(g) {
  cor.circular(counts.log2cpm.normed[g,],Theta.fucci)
})
nsam.nonzero &lt;- rowSums(counts.log2cpm &gt; 0)

library(CorShrink)
corshrink.nonzero.cc.log2cpm &lt;- CorShrinkVector(cor.cc.log2cpm,
                    nsamp_vec = nsam.nonzero, report_model=TRUE)

sval.cc &lt;- corshrink.nonzero.cc.log2cpm$model$result$svalue

par(mfrow=c(1,2))
hist(cor.cc.log2cpm, nclass=50,
     main = &quot;Correlation with Theta&quot;)
with(corshrink.nonzero.cc.log2cpm$model$result, { 
     plot(betahat,PosteriorMean, pch = 16, col = &quot;gray40&quot;, cex=.7,
          xlab = &quot;Observed effect size&quot;, ylab = &quot;Shrunked effect size&quot;);
     abline(0,1, col = &quot;mediumblue&quot;);
     points(betahat,PosteriorMean, col = as.numeric(sval.cc&lt;.01)+1, lwd=.8)
     title(paste(&quot;sig. at svalue &lt; .01&quot;, sum(sval.cc&lt;.01), &quot;genes&quot;))
     })</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/unnamed-chunk-9-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>Check cell cycle enrichment with s-value &lt; .01.</p>
<pre class="r"><code>genes.sig.cc &lt;- (rownames(counts.log2cpm.normed)[sval.cc &lt; .01])[order(cor.cc.log2cpm[which(sval.cc &lt; .01)], decreasing = F)]
sig.cycle.cc &lt;- mean(genes.sig.cc %in% macosko$ensembl)
nonsig.cycle.cc &lt;- mean(setdiff(rownames(counts.log2cpm.normed), genes.sig.cc) %in% macosko$ensembl)
sig.cycle.cc/nonsig.cycle.cc     </code></pre>
<pre><code>[1] 10.28224</code></pre>
<pre class="r"><code>rho.sig.cc &lt;- (cor.cc.log2cpm[sval.cc &lt; .01])[order(cor.cc.log2cpm[which(sval.cc &lt; .01)], decreasing = F)]

genes.sig.cycle.cc &lt;- genes.sig.cc[genes.sig.cc %in% macosko$ensembl]
rho.sig.cycle.cc &lt;- rho.sig.cc[genes.sig.cc %in% macosko$ensembl]
gene.names.sig.cycle.cc &lt;- sapply(1:length(genes.sig.cycle.cc), function(g) {
  nm &lt;- with(macosko, hgnc[ensembl == genes.sig.cycle.cc[g]]) 
  return(nm[1])})</code></pre>
<p>Check cell cycle enrichment in top 200 genes.</p>
<pre class="r"><code>genes.sig.cc &lt;- (rownames(counts.log2cpm.normed)[order(sval.cc) &lt; 201])[order(cor.cc.log2cpm[which(order(sval.cc) &lt; 201)], decreasing = F)]
sig.cycle.cc &lt;- mean(genes.sig.cc %in% macosko$ensembl)
nonsig.cycle.cc &lt;- mean(setdiff(rownames(counts.log2cpm.normed), genes.sig.cc) %in% macosko$ensembl)
sig.cycle.cc/nonsig.cycle.cc     </code></pre>
<pre><code>[1] 1.037587</code></pre>
<pre class="r"><code>rho.sig.cc &lt;- (cor.cc.log2cpm[order(sval.cc) &lt; 201])[order(cor.cc.log2cpm[which(order(sval.cc) &lt; 201)], decreasing = F)]

genes.sig.cycle.cc &lt;- genes.sig.cc[genes.sig.cc %in% macosko$ensembl]
rho.sig.cycle.cc &lt;- rho.sig.cc[genes.sig.cc %in% macosko$ensembl]
gene.names.sig.cycle.cc &lt;- sapply(1:length(genes.sig.cycle.cc), function(g) {
  nm &lt;- with(macosko, hgnc[ensembl == genes.sig.cycle.cc[g]]) 
  return(nm[1])})</code></pre>
<p>Visualize expression of significant genes.</p>
<pre class="r"><code>par(mfrow=c(4,2))
for(i in 1:8) {
  plot(counts.log2cpm.normed[which(rownames(counts.log2cpm) == genes.sig.cycle.cc[i]),
                             order(as.numeric(Theta.fucci))],
     ylab = &quot;Angle (0,2pi)&quot;, xlab = &quot;Cells ordered by Theta&quot;,
     pch = 16, cex = .7, ylim = c(0,2*pi))
  title(paste(gene.names.sig.cycle.cc[i], &quot;; corr = &quot;, round(rho.sig.cycle.cc[i],digits=2)))
  points(as.numeric(Theta.fucci)[order(Theta.fucci)], col = &quot;gray40&quot;, cex=.5, pch=16)
}
title(&quot;Expression pattern by Theta&quot;, outer=T, line=-1)</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/unnamed-chunk-12-1.png" width="576" style="display: block; margin: auto;" /></p>
<pre class="r"><code># sin versus sin
par(mfrow=c(4,2))
for(i in 1:8) {
  xx &lt;- as.numeric(Theta.fucci)[order(Theta.fucci)]
  yy &lt;- counts.log2cpm.normed[which(rownames(counts.log2cpm) == genes.sig.cycle.cc[i]),order(as.numeric(Theta.fucci))]
  xx &lt;- sin(xx-mean(xx))
  yy &lt;- sin(yy-mean(yy))
  plot(x=xx, y=yy,
     pch = 16, cex = .7, ylim=c(-1,1),
     xlab = &quot;sin(Theta)&quot;, ylab=&quot;sin(gene angle)&quot;) 
  title(paste(gene.names.sig.cycle.cc[i], &quot;; corr = &quot;, round(rho.sig.cycle.cc[i],digits=2)))
}
title(&quot;sin(Theta) vs. sin(normalized gene expression)&quot;, outer=T, line=-1)</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/unnamed-chunk-12-2.png" width="576" style="display: block; margin: auto;" /></p>
<pre class="r"><code># write.table(cbind(genes.sig.cycle, rho.sig.cycle), file = &quot;output_tmp/genes.sig.cycle.txt&quot;, quote=F, sep = &quot;\t&quot;, col.names=F, row.names=F)
# 
# write.table(cbind(genes.sig, rho.sig), file = &quot;output_tmp/genes.sig.txt&quot;, quote=F, sep = &quot;\t&quot;, col.names=F, row.names=F)</code></pre>
</div>
<div id="circular-linear-correlation-between-projected-time-and-gene-expression" class="section level2">
<h2>Circular-linear correlation between projected time and gene expression</h2>
<pre class="r"><code>library(Rfast)
library(circular)

counts.log2cpm &lt;- log10((10^6)*t(t(counts)/colSums(counts))+1)

cor.cl.log2cpm &lt;- sapply(1:nrow(counts.log2cpm), function(g) {
    sqrt(circlin.cor(counts.log2cpm[g,],Theta.fucci)[1])
})
nsam.nonzero &lt;- rowSums(counts.log2cpm &gt; 0)

library(CorShrink)
corshrink.nonzero.cl.log2cpm &lt;- CorShrinkVector(cor.cl.log2cpm, nsamp_vec = nsam.nonzero, report_model=TRUE)

sval.cl &lt;- corshrink.nonzero.cl.log2cpm$model$result$svalue

par(mfrow=c(1,2))
hist(cor.cl.log2cpm, nclass=50,
     main = &quot;Correlation with Theta&quot;)
with(corshrink.nonzero.cl.log2cpm$model$result, { 
     plot(betahat,PosteriorMean, pch = 16, col = &quot;gray40&quot;, cex=.7,
          xlab = &quot;Observed effect size&quot;, ylab = &quot;Shrunked effect size&quot;);
     abline(0,1, col = &quot;mediumblue&quot;);
     points(betahat,PosteriorMean, col = as.numeric(sval.cl&lt;.01)+1, lwd=.8)
     title(paste(&quot;sig. at svalue &lt; .01&quot;, sum(svalue&lt;.01), &quot;genes&quot;))
     })</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/unnamed-chunk-13-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>Check cell cycle enrichment in genes with s-value &lt; .01.</p>
<pre class="r"><code>genes.sig.cl &lt;- (rownames(counts.log2cpm)[sval.cl &lt; .01])[order(cor.cl.log2cpm[which(sval.cl &lt; .01)], decreasing = F)]
sig.cycle.cl &lt;- mean(genes.sig.cl %in% macosko$ensembl)
nonsig.cycle.cl &lt;- mean(setdiff(rownames(counts.log2cpm), genes.sig.cl) %in% macosko$ensembl)
sig.cycle.cl/nonsig.cycle.cl</code></pre>
<pre><code>[1] 2.981338</code></pre>
<pre class="r"><code>rho.sig.cl &lt;- (cor.cl.log2cpm[sval.cl &lt; .01])[order(cor.cl.log2cpm[which(sval.cl &lt; .01)], decreasing = F)]

genes.sig.cycle.cl &lt;- genes.sig.cl[genes.sig.cl %in% macosko$ensembl]
rho.sig.cycle.cl &lt;- rho.sig.cl[genes.sig.cl %in% macosko$ensembl]
gene.names.sig.cycle.cl &lt;- sapply(1:length(genes.sig.cycle.cl), function(g) {
  nm &lt;- with(macosko, hgnc[ensembl == genes.sig.cycle.cl[g]]) 
  return(nm[1])})


## these 8 are also significant in circular-circular correlation
sval.cc[rownames(counts.log2cpm.normed) %in% genes.sig.cycle.cl[1:8]]</code></pre>
<pre><code>[1] 5.925637e-01 3.282286e-02 5.918561e-01 2.547773e-05 2.772930e-01
[6] 5.637073e-01 2.386840e-02 6.050944e-02</code></pre>
<p>Check cell cycle enrichment in genes with s-value in top 200.</p>
<pre class="r"><code>genes.sig.cl &lt;- (rownames(counts.log2cpm)[order(sval.cl) &lt; 201])[order(cor.cl.log2cpm[which(order(sval.cl) &lt; 201)], decreasing = F)]
sig.cycle.cl &lt;- mean(genes.sig.cl %in% macosko$ensembl)
nonsig.cycle.cl &lt;- mean(setdiff(rownames(counts.log2cpm), genes.sig.cl) %in% macosko$ensembl)
sig.cycle.cl/nonsig.cycle.cl</code></pre>
<pre><code>[1] 0.5717413</code></pre>
<pre class="r"><code>rho.sig.cl &lt;- (cor.cl.log2cpm[order(sval.cl) &lt; 201])[order(cor.cl.log2cpm[which(order(sval.cl) &lt; 201)], decreasing = F)]

genes.sig.cycle.cl &lt;- genes.sig.cl[genes.sig.cl %in% macosko$ensembl]
rho.sig.cycle.cl &lt;- rho.sig.cl[genes.sig.cl %in% macosko$ensembl]
gene.names.sig.cycle.cl &lt;- sapply(1:length(genes.sig.cycle.cl), function(g) {
  nm &lt;- with(macosko, hgnc[ensembl == genes.sig.cycle.cl[g]]) 
  return(nm[1])})


## these 8 are also significant in circular-circular correlation
sval.cc[rownames(counts.log2cpm.normed) %in% genes.sig.cycle.cl[1:8]]</code></pre>
<pre><code>[1] 3.114114e-01 3.174974e-01 3.674104e-03 1.500274e-05 1.262248e-01</code></pre>
<p>Visualize expression of the significant genes.</p>
<pre class="r"><code>par(mfrow=c(4,2))
for(i in 1:min(length(genes.sig.cycle.cl),8)) {
  plot(x=as.numeric(Theta.fucci),
       y=counts.log2cpm.normed[which(rownames(counts.log2cpm) == genes.sig.cycle.cl[i]),
                             order(as.numeric(Theta.fucci))],
     ylab = &quot;Angle (0,2pi)&quot;, xlab = &quot;Sin(Theta)&quot;,
     pch = 16, cex = .7, ylim = c(1,5))
  title(paste(gene.names.sig.cycle.cl[i], &quot;; corr = &quot;, round(rho.sig.cycle.cl[i],digits=2)))
}
title(&quot;sinusoidal pattern by Theta?&quot;, outer=T, line=-1)

# sin versus data
par(mfrow=c(4,2))</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/cir-liear-plots-1.png" width="576" style="display: block; margin: auto;" /></p>
<pre class="r"><code>for(i in 1:min(length(genes.sig.cycle.cl),8)) {
  xx &lt;- as.numeric(Theta.fucci)[order(Theta.fucci)]
  yy &lt;- counts.log2cpm.normed[which(rownames(counts.log2cpm) == genes.sig.cycle.cl[i]),order(as.numeric(Theta.fucci))]
  xx &lt;- sin(xx-mean(xx))
  yy &lt;- yy-mean(yy)
  plot(x=xx, y=yy,
     pch = 16, cex = .7, ylim=c(-1,1),
     xlab = &quot;sin(Theta)&quot;, ylab=&quot;sin(gene angle)&quot;) 
  title(paste(gene.names.sig.cycle.cl[i], &quot;; corr = &quot;, round(rho.sig.cycle.cl[i],digits=2)))
}
title(&quot;Sin(Theta) vs. data&quot;, outer=TRUE, line=-1)

# cosine versus data
par(mfrow=c(4,2))</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/cir-liear-plots-2.png" width="576" style="display: block; margin: auto;" /></p>
<pre class="r"><code>for(i in 1:min(length(genes.sig.cycle.cl),8)) {
  xx &lt;- as.numeric(Theta.fucci)[order(Theta.fucci)]
  yy &lt;- counts.log2cpm.normed[which(rownames(counts.log2cpm) == genes.sig.cycle.cl[i]),order(as.numeric(Theta.fucci))]
  xx &lt;- cos(xx-mean(xx))
  yy &lt;- yy-mean(yy)
  plot(x=xx, y=yy,
     pch = 16, cex = .7, ylim=c(-1,1),
     xlab = &quot;cos(Theta)&quot;, ylab=&quot;sin(gene angle)&quot;) 
  title(paste(gene.names.sig.cycle.cl[i], &quot;; corr = &quot;, round(rho.sig.cycle.cl[i],digits=2)))
}
title(&quot;Cos(Theta) vs. data&quot;, outer=TRUE, line=-1)</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/cir-liear-plots-3.png" width="576" style="display: block; margin: auto;" /></p>
<hr />
</div>
<div id="comparae-results-between-circ-linear-and-cir-circ-correlation" class="section level2">
<h2>Comparae results between circ-linear and cir-circ correlation</h2>
<ol style="list-style-type: decimal">
<li><p>There are almost no overlap in the top 200 genes between the two methods. Going up to top 500 genes, we see 19 of the 481 genes overlap between the two methods. In other words, a variable that is significantly corrleated with sin(theta) doesn’t necessary have a high correlation in sin(variable value) with sin(theta). This also implies that all previous examples that I plotted for each correlation case is only significant for that particular category.</p></li>
<li><p>Results above on gene enrichment showed that when considering top 200 genes, cir-cir results are enriched two-fold with cell cycle genes (in Macosko data), while cir-linear are enriched with .5 with cell cycle genes, i.e., the top 200 have fewer cell cycle genes than the other genes. When consider s-values, cir-cir has about 11 fold of enrichment and cir-linear has about 2 fold enrichment…</p></li>
</ol>
<pre class="r"><code># so many more when considering circular-linear correlation
# probably because it&#39;s picking up sinuisoidal patterns, hence osciallating genes
# find some examples...
table(order(sval.cc)&lt;201, order(sval.cl)&lt;201)</code></pre>
<pre><code>       
        FALSE  TRUE
  FALSE 11033   196
  TRUE    196     4</code></pre>
<pre class="r"><code>table(order(sval.cc)&lt;501, order(sval.cl)&lt;501)</code></pre>
<pre><code>       
        FALSE  TRUE
  FALSE 10450   479
  TRUE    479    21</code></pre>
<pre class="r"><code>plot(rank(sval.cc), rank(sval.cl))</code></pre>
<p><img src="figure/images-circle-ordering-eval.Rmd/unnamed-chunk-16-1.png" width="672" style="display: block; margin: auto;" /></p>
<hr />
</div>
<div id="session-information" class="section level2">
<h2>Session information</h2>
<pre><code>R version 3.4.1 (2017-06-30)
Platform: x86_64-redhat-linux-gnu (64-bit)
Running under: Scientific Linux 7.2 (Nitrogen)

Matrix products: default
BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] Rfast_1.8.6         RcppZiggurat_0.1.4  Rcpp_0.12.15       
 [4] CorShrink_0.1.1     matrixStats_0.53.1  dplyr_0.7.4        
 [7] Biobase_2.38.0      BiocGenerics_0.24.0 conicfit_1.0.4     
[10] geigen_2.1          pracma_2.1.4        circular_0.4-93    

loaded via a namespace (and not attached):
 [1] plyr_1.8.4        compiler_3.4.1    pillar_1.1.0     
 [4] git2r_0.21.0      bindr_0.1         iterators_1.0.9  
 [7] tools_3.4.1       boot_1.3-19       digest_0.6.15    
[10] evaluate_0.10.1   tibble_1.4.2      lattice_0.20-35  
[13] pkgconfig_2.0.1   rlang_0.2.0       foreach_1.4.4    
[16] Matrix_1.2-10     yaml_2.1.16       mvtnorm_1.0-7    
[19] bindrcpp_0.2      stringr_1.3.0     knitr_1.20       
[22] rprojroot_1.3-2   grid_3.4.1        glue_1.2.0       
[25] R6_2.2.2          rmarkdown_1.8     reshape2_1.4.3   
[28] ashr_2.2-4        magrittr_1.5      MASS_7.3-47      
[31] codetools_0.2-15  backports_1.1.2   htmltools_0.3.6  
[34] assertthat_0.2.0  stringi_1.1.6     pscl_1.5.2       
[37] doParallel_1.0.11 truncnorm_1.0-7   SQUAREM_2017.10-1</code></pre>
</div>

<!-- Adjust MathJax settings so that all math formulae are shown using
TeX fonts only; see
http://docs.mathjax.org/en/latest/configuration.html.  This will make
the presentation more consistent at the cost of the webpage sometimes
taking slightly longer to load. Note that this only works because the
footer is added to webpages before the MathJax javascript. -->
<script type="text/x-mathjax-config">
  MathJax.Hub.Config({
    "HTML-CSS": { availableFonts: ["TeX"] }
  });
</script>

<hr>
<p>
    This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a>
</p>
<hr>

<!-- To enable disqus, uncomment the section below and provide your disqus_shortname -->

<!-- disqus
  <div id="disqus_thread"></div>
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function() {
            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
        })();
    </script>
    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
    <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-->



</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>