<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Lei Sun" />

<meta name="date" content="2017-03-25" />

<title>Empirical Null with Gaussian Derivatives: Fitting Examples</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs) {
  hljs.configure({languages: []});
  hljs.initHighlightingOnLoad();
  if (document.readyState && document.readyState === "complete") {
    window.setTimeout(function() { hljs.initHighlighting(); }, 0);
  }
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->




<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}


.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
  padding-left: 25px;
  text-indent: 0;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>

<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-xs-12 col-sm-8 col-md-9">




<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">truncash</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="index.html">Home</a>
</li>
<li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/LSun/truncash">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Empirical Null with Gaussian Derivatives: Fitting Examples</h1>
<h4 class="author"><em>Lei Sun</em></h4>
<h4 class="date"><em>2017-03-25</em></h4>

</div>


<!-- The file analysis/chunks.R contains chunks that define default settings
shared across the workflowr files. -->
<!-- Update knitr chunk options -->
<!-- Insert the date the file was last updated -->
<p><strong>Last updated:</strong> 2017-12-21</p>
<!-- Insert the code version (Git commit SHA1) if Git repository exists and R
 package git2r is installed -->
<p><strong>Code version:</strong> 6e42447</p>
<!-- Add your analysis here -->
<section id="problem-setting" class="level2">
<h2>Problem setting</h2>
<p>With <a href="gaussian_derivatives.html">all the assumptions</a>, we formulate a convex optimization as follows.</p>
<p><span class="math display">\[
\begin{array}{rl}
\max\limits_{w_1, \ldots, w_K} &amp; \sum\limits_{i = 1}^n\log\left(1 +\sum\limits_{k = 1}^K w_kh_k(z_i)\right)\\
\text{s.t.} &amp; 
1 + \sum\limits_{k = 1}^K w_kh_k(z_i) \geq0
\end{array}
\]</span> It can also be written as</p>
<p><span class="math display">\[
\begin{array}{rl}
\max\limits_{w} &amp; \sum\log\left(1 +Hw\right)\\
\text{s.t.} &amp; 
1 +Hw \geq0
\end{array}
\]</span></p>
<p>where <span class="math inline">\(H_{ik} = h_k(z_i)\)</span>.</p>
</section>
<section id="choosing-k" class="level2">
<h2>Choosing <span class="math inline">\(K\)</span></h2>
<p>With finite <span class="math inline">\(K\)</span> Gaussian derivatives, the fitted log-likelihood <span class="math inline">\(\log\prod\limits_{i = 1}^nf_0(z_i) = \sum\limits_{i = 1}^n\log \varphi(z_i) + \sum\limits_{i = 1}^n\log\left(1 +\sum\limits_{k = 1}^K \hat w_kh_k(z_i)\right)\)</span>. Note that <span class="math inline">\(\hat w_k \equiv 0\)</span> is a feasible solution, so the optimal solution will always have a log-likelihood no less than <span class="math inline">\(\sum\limits_{i = 1}^n\log \varphi(z_i)\)</span>, the log-likelihood of <span class="math inline">\(N(0, 1)\)</span>. Similarly, let <span class="math inline">\(\hat w_K\)</span> be the optimal solution with <span class="math inline">\(K\)</span> Gaussian derivatives, then <span class="math inline">\([\hat w_K, 0]\)</span> will be a feasible solution with <span class="math inline">\(K + 1\)</span> Gaussian derivatives, with the same objective value, which should be no larger than the optimal objective value for <span class="math inline">\(K + 1\)</span>. This fact implies that the fitted log-likelihood should be non-decreasing with respect to <span class="math inline">\(K\)</span>.</p>
<p>This property implies that for the same data set, multiple models with increasing <span class="math inline">\(K\)</span>’s can be fitted, and the searching stops at a sufficiently large <span class="math inline">\(K\)</span>. Let <span class="math inline">\(\hat g_K = \max\limits_w\sum\limits_{i = 1}^n\log\left(1 +\sum\limits_{k = 1}^K w_kh_k(z_i)\right) = \sum\limits_{i = 1}^n\log\left(1 +\sum\limits_{k = 1}^K \hat w_kh_k(z_i)\right)\)</span>, the optimal objective value for <span class="math inline">\(K\)</span>, drawing on insight from the likelihood ratio test, the optimal <span class="math inline">\(\hat K = \inf\left\{K: 2(\hat g_{K + m} - \hat g_K) \leq \chi_{m, 1-\alpha}^2, m = 1, \ldots, M\right\}\)</span> with pre-specified <span class="math inline">\(\alpha\)</span> and <span class="math inline">\(M\)</span>.</p>
<p><span class="math inline">\(M\)</span> should be greater than <span class="math inline">\(1\)</span> because odd order Gaussian derivatives and even order ones have distinct properties, especially in lower orders. Odd order Gaussian derivatives are odd functions, more associated with the skewness of the empirical distribution, whereas even order ones are even functions, more associated with its kurtosis. Therefore, it’s not at all impossible that the difference in the log-likelihood objective between <span class="math inline">\(K\)</span> and <span class="math inline">\(K + 1\)</span> is small, yet that between <span class="math inline">\(K\)</span> and <span class="math inline">\(K + 2\)</span> is large.</p>
<p>The function <code>ecdfz.optimal</code> in the script <a href="https://github.com/LSun/truncash/blob/master/code/ecdfz.R"><code>ecdfz.R</code></a> is using this rule with the default setting <span class="math inline">\(\alpha = 0.05, M = 2\)</span>.</p>
</section>
<section id="fitting-with-cvxr" class="level2">
<h2>Fitting with <code>cvxr</code></h2>
<p>The script <a href="https://github.com/LSun/truncash/blob/master/code/ecdfz.R"><code>ecdfz.R</code></a> is using <a href="https://github.com/anqif/cvxr"><code>cvxr</code></a> to find the maximum likelihood estimate of the observed density of correlated null <span class="math inline">\(z\)</span> scores.</p>
<pre class="r"><code>source(&quot;../code/ecdfz.R&quot;)</code></pre>
<pre class="r"><code>z = read.table(&quot;../output/z_null_liver_777.txt&quot;)
p = read.table(&quot;../output/p_null_liver_777.txt&quot;)</code></pre>
</section>
<section id="examples" class="level2">
<h2>Examples</h2>
<p>Several selected <a href="correlated_z_2.html"><code>ash</code>-hostile</a> and/or <a href="correlated_z_3.Rmd"><code>BH</code>-hostile</a> data sets are fitted as follows. More detailed information of these selected data sets are <a href="correlated_z_2.html">here</a> and <a href="correlated_z_3.Rmd">here</a>.</p>
<p>For each of these selected data sets, we plot the histogram, the density of <span class="math inline">\(N(0, 1)\)</span> in red line, and that fitted by Gaussian derivatives in blue. We also give the information of the number of false discoveries by Benjamini-Hochberg, and <span class="math inline">\(\hat\pi_0\)</span> estimated by <code>ash</code>. Note that BH’s false discoveries suggests the inflation of the most extreme observations, whereas <code>ash</code>’s <span class="math inline">\(\hat\pi_0\)</span> the empirical distribution’s general deviation from <span class="math inline">\(N(0, 1)\)</span>.</p>
<p>We also plot the optimal objective <span class="math inline">\(\hat g_K\)</span> for all the fitted <span class="math inline">\(K\)</span>, and indicate the optimal <span class="math inline">\(\hat K\)</span> found according to the aforementioned rule. <strong>It appears the second order derivative is usually the most important, followed by the fourth.</strong></p>
<pre class="r"><code>library(ashr)
DataSet = c(32, 327, 355, 483, 778)
res_DataSet = list()
for (i in 1:length(DataSet)) {
  zscore = as.numeric(z[DataSet[i], ])
  fit.ecdfz = ecdfz.optimal(zscore)
  fit.ash = ash(zscore, 1, method = &quot;fdr&quot;)
  fit.ash.pi0 = get_pi0(fit.ash)
  pvalue = as.numeric(p[DataSet[i], ])
  fd.bh = sum(p.adjust(pvalue, method = &quot;BH&quot;) &lt;= 0.05)
  res_DataSet[[i]] = list(DataSet = DataSet[i], fit.ecdfz = fit.ecdfz, fit.ash = fit.ash, fit.ash.pi0 = fit.ash.pi0, fd.bh = fd.bh, zscore = zscore, pvalue = pvalue)
}</code></pre>
<pre><code>Data Set 32 : Number of BH&#39;s False Discoveries: 0 ; ASH&#39;s pihat0 = 0.184423 ; Chosen number of Gaussian derivatives K = 4 
Optimal weights of Gaussian derivatives w =
1 : -0.0365352616939748 ; 2 : 0.1999283988026 ; 3 : 0.0104709711917952 ; 4 : -0.0201268873245717 ;</code></pre>
<p><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-1.png" width="672" style="display: block; margin: auto;" /><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-2.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>Data Set 327 : Number of BH&#39;s False Discoveries: 489 ; ASH&#39;s pihat0 = 0.1522796 ; Chosen number of Gaussian derivatives K = 9 
Optimal weights of Gaussian derivatives w =
1 : 0.0339429040246409 ; 2 : 0.73456039139193 ; 3 : -0.153251399156623 ; 4 : 0.188391233293932 ; 5 : -0.0550489496726788 ; 6 : 0.0229717983852805 ; 7 : -0.00690839817896393 ; 8 : 0.00121590052122215 ; 9 : -0.000314826111681546 ;</code></pre>
<p><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-3.png" width="672" style="display: block; margin: auto;" /><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-4.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>Data Set 355 : Number of BH&#39;s False Discoveries: 639 ; ASH&#39;s pihat0 = 0.04750946 ; Chosen number of Gaussian derivatives K = 9 
Optimal weights of Gaussian derivatives w =
1 : 0.0226254633548931 ; 2 : 0.921396261289049 ; 3 : 0.0218042845603059 ; 4 : 0.17609829567166 ; 5 : -0.0137967389059908 ; 6 : 0.00396812175677823 ; 7 : -0.00490495150100676 ; 8 : -0.000608489154565857 ; 9 : -0.000298931322154747 ;</code></pre>
<p><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-5.png" width="672" style="display: block; margin: auto;" /><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-6.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>Data Set 483 : Number of BH&#39;s False Discoveries: 1 ; ASH&#39;s pihat0 = 0.9998824 ; Chosen number of Gaussian derivatives K = 4 
Optimal weights of Gaussian derivatives w =
1 : 0.045465618357444 ; 2 : -0.127037015386377 ; 3 : 0.00972556942208551 ; 4 : 0.0105165371329065 ;</code></pre>
<p><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-7.png" width="672" style="display: block; margin: auto;" /><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-8.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>Data Set 778 : Number of BH&#39;s False Discoveries: 1 ; ASH&#39;s pihat0 = 0.07619716 ; Chosen number of Gaussian derivatives K = 4 
Optimal weights of Gaussian derivatives w =
1 : 0.00594319611757542 ; 2 : 0.398070908599464 ; 3 : -0.00922245793412396 ; 4 : 0.026306961083798 ;</code></pre>
<p><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-9.png" width="672" style="display: block; margin: auto;" /><img src="figure/gaussian_derivatives_2.rmd/unnamed-chunk-5-10.png" width="672" style="display: block; margin: auto;" /></p>
</section>
<section id="conclusion" class="level2">
<h2>Conclusion</h2>
<p>Gaussian derivatives can deal with a variety of empirical null distribution, especially those not close to normal.</p>
</section>
<section id="session-information" class="level2">
<h2>Session information</h2>
<!-- Insert the session information into the document -->
<pre class="r"><code>sessionInfo()</code></pre>
<pre><code>R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] CVXR_0.94-4   EQL_1.0-0     ttutils_1.0-1

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14      knitr_1.17        magrittr_1.5     
 [4] bit_1.1-12        lattice_0.20-35   R6_2.2.2         
 [7] stringr_1.2.0     tools_3.4.3       grid_3.4.3       
[10] R.oo_1.21.0       git2r_0.20.0      scs_1.1-1        
[13] htmltools_0.3.6   yaml_2.1.16       bit64_0.9-7      
[16] rprojroot_1.3-1   digest_0.6.13     Matrix_1.2-12    
[19] gmp_0.5-13.1      ECOSolveR_0.3-2   R.utils_2.6.0    
[22] evaluate_0.10.1   rmarkdown_1.8     stringi_1.1.6    
[25] compiler_3.4.3    Rmpfr_0.6-1       backports_1.1.2  
[28] R.methodsS3_1.7.1</code></pre>
</section>

<hr>
<p>
    This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a>
</p>
<hr>

<!-- To enable disqus, uncomment the section below and provide your disqus_shortname -->

<!-- disqus
  <div id="disqus_thread"></div>
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function() {
            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
        })();
    </script>
    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
    <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-->


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>