<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="generator" content="pandoc" /> <meta name="author" content="Lei Sun" /> <meta name="date" content="2017-01-18" /> <title>Comparison with Knockoff: \Sigma_X is Toeplitz</title> <script src="site_libs/jquery-1.11.3/jquery.min.js"></script> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" /> <script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script> <script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script> <script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script> <link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" /> <script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script> <script src="site_libs/navigation-1.1/tabsets.js"></script> <link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" /> <script src="site_libs/highlightjs-9.12.0/highlight.js"></script> <link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" /> <style type="text/css">code{white-space: pre;}</style> <style type="text/css"> pre:not([class]) { background-color: white; } </style> <script type="text/javascript"> if (window.hljs) { hljs.configure({languages: []}); hljs.initHighlightingOnLoad(); if (document.readyState && document.readyState === "complete") { window.setTimeout(function() { hljs.initHighlighting(); }, 0); } } </script> <style type="text/css"> h1 { font-size: 34px; } h1.title { font-size: 38px; } h2 { font-size: 30px; } h3 { font-size: 24px; } h4 { font-size: 18px; } h5 { font-size: 16px; } h6 { font-size: 12px; } .table th:not([align]) { text-align: left; } </style> </head> <body> <style type = "text/css"> .main-container { max-width: 940px; margin-left: auto; margin-right: auto; } code { color: inherit; background-color: rgba(0, 0, 0, 0.04); } img { max-width:100%; height: auto; } .tabbed-pane { padding-top: 12px; } button.code-folding-btn:focus { outline: none; } </style> <style type="text/css"> /* padding for bootstrap navbar */ body { padding-top: 51px; padding-bottom: 40px; } /* offset scroll position for anchor links (for fixed navbar) */ .section h1 { padding-top: 56px; margin-top: -56px; } .section h2 { padding-top: 56px; margin-top: -56px; } .section h3 { padding-top: 56px; margin-top: -56px; } .section h4 { padding-top: 56px; margin-top: -56px; } .section h5 { padding-top: 56px; margin-top: -56px; } .section h6 { padding-top: 56px; margin-top: -56px; } </style> <script> // manage active state of menu based on current page $(document).ready(function () { // active menu anchor href = window.location.pathname href = href.substr(href.lastIndexOf('/') + 1) if (href === "") href = "index.html"; var menuAnchor = $('a[href="' + href + '"]'); // mark it active menuAnchor.parent().addClass('active'); // if it's got a parent navbar menu mark it active as well menuAnchor.closest('li.dropdown').addClass('active'); }); </script> <div class="container-fluid main-container"> <!-- tabsets --> <script> $(document).ready(function () { window.buildTabsets("TOC"); }); </script> <!-- code folding --> <script> $(document).ready(function () { // move toc-ignore selectors from section div to header $('div.section.toc-ignore') .removeClass('toc-ignore') .children('h1,h2,h3,h4,h5').addClass('toc-ignore'); // establish options var options = { selectors: "h1,h2,h3", theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 }; options.showAndHide = true; options.smoothScroll = true; // tocify var toc = $("#TOC").tocify(options).data("toc-tocify"); }); </script> <style type="text/css"> #TOC { margin: 25px 0px 20px 0px; } @media (max-width: 768px) { #TOC { position: relative; width: 100%; } } .toc-content { padding-left: 30px; padding-right: 40px; } div.main-container { max-width: 1200px; } div.tocify { width: 20%; max-width: 260px; max-height: 85%; } @media (min-width: 768px) and (max-width: 991px) { div.tocify { width: 25%; } } @media (max-width: 767px) { div.tocify { width: 100%; max-width: none; } } .tocify ul, .tocify li { line-height: 20px; } .tocify-subheader .tocify-item { font-size: 0.90em; padding-left: 25px; text-indent: 0; } .tocify .list-group-item { border-radius: 0px; } </style> <!-- setup 3col/9col grid for toc_float and main content --> <div class="row-fluid"> <div class="col-xs-12 col-sm-4 col-md-3"> <div id="TOC" class="tocify"> </div> </div> <div class="toc-content col-xs-12 col-sm-8 col-md-9"> <div class="navbar navbar-default navbar-fixed-top" role="navigation"> <div class="container"> <div class="navbar-header"> <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar"> <span class="icon-bar"></span> <span class="icon-bar"></span> <span class="icon-bar"></span> </button> <a class="navbar-brand" href="index.html">truncash</a> </div> <div id="navbar" class="navbar-collapse collapse"> <ul class="nav navbar-nav"> <li> <a href="index.html">Home</a> </li> <li> <a href="about.html">About</a> </li> <li> <a href="license.html">License</a> </li> </ul> <ul class="nav navbar-nav navbar-right"> <li> <a href="https://github.com/LSun/truncash"> <span class="fa fa-github"></span> </a> </li> </ul> </div><!--/.nav-collapse --> </div><!--/.container --> </div><!--/.navbar --> <div class="fluid-row" id="header"> <h1 class="title toc-ignore">Comparison with <code>Knockoff</code>: <span class="math inline">\(\Sigma_X\)</span> is Toeplitz</h1> <h4 class="author"><em>Lei Sun</em></h4> <h4 class="date"><em>2017-01-18</em></h4> </div> <!-- The file analysis/chunks.R contains chunks that define default settings shared across the workflowr files. --> <!-- Update knitr chunk options --> <!-- Insert the date the file was last updated --> <p><strong>Last updated:</strong> 2018-01-30</p> <!-- Insert the code version (Git commit SHA1) if Git repository exists and R package git2r is installed --> <p><strong>Code version:</strong> 5442ab8</p> <!-- Add your analysis here --> <section id="introduction" class="level2"> <h2>Introduction</h2> <p>Applying CASH to linear regression variable selection, compared with other popular methods including BH, Knockoff.</p> </section> <section id="simulation-setting" class="level2"> <h2>Simulation Setting</h2> <p>The simulation setting is very similar to what’s used in Knockoff’s <a href="https://cran.r-project.org/web/packages/knockoff/vignettes/fixed.html">vignette</a>. The only notable difference is that the non-zero signals are normally distributed centered at zero, rather than constant.</p> <p>Data are simulated by <span class="math display">\[ y_n = X_{n \times p}\beta_p + e_n \]</span> where <span class="math display">\[ \begin{array}{c} n = 2000 \\ p = 1000 \\ e_n \sim N(0, 1) \\ \beta \sim \eta\delta_0 + (1 - \eta)N(0, \sigma /\sqrt{n}) \end{array} \]</span> Each row of <span class="math inline">\(X\)</span> is generated independently from a <span class="math inline">\(N(0, \Sigma_\rho)\)</span> distribution, where <span class="math inline">\(\left(\Sigma_\rho\right)_{j, k} = \rho^{|j - k|}\)</span>, a Toplitz matrix.</p> <p>Every method selects the variables with respect to a nominal false discovery rate <span class="math inline">\(q = 0.1\)</span>.</p> <p>In simulations, we are changing the values of the sparsity level <span class="math inline">\(\eta\)</span>, the signal strength <span class="math inline">\(\sigma\)</span>, the feature correlation <span class="math inline">\(\rho\)</span>.</p> </section> <section id="methods" class="level2"> <h2>Methods</h2> <ul> <li><p>BH: First run multiple linear regression, then apply BH to obtained <span class="math inline">\(p\)</span>-values.</p></li> <li><p>qvalue: First run multiple linear regression, then apply <code>qvalue::qvalue</code> to obtained <span class="math inline">\(p\)</span>-values.</p></li> <li><p>Knockoff: Directly apply <code>knockoff::knockoff</code> on <span class="math inline">\(X\)</span>, <span class="math inline">\(y\)</span>.</p></li> <li><p>ASH: First run multiple linear regression, then apply <code>ashr::ash</code> on obtained <span class="math inline">\(\hat\beta\)</span> and <span class="math inline">\(\hat{\text{se}}\left(\hat \beta\right)\)</span>, using normal mixture and normal likelihood.</p></li> <li><p>CASH: First run multiple linear regression, then apply <code>cash</code> on obtained <span class="math inline">\(\hat\beta\)</span> and <span class="math inline">\(\hat{\text{se}}\left(\hat \beta\right)\)</span>, using normal mixture and normal likelihood, with default penalty on Gaussian derivative coefficients.</p></li> <li><p>CASH+: CASH with perfect knowledge, using real noise level <span class="math inline">\(\text{se}\left(\hat{\beta}\right)\)</span>.</p></li> </ul> </section> <section id="observations" class="level2"> <h2>Observations</h2> <ul> <li><p>BH is very robust, very fast.</p></li> <li><p>Knockoff is way too slow and way too conservative with signals being unimodal at zero. Perhaps it needs strong signals distinctly different from the “bulk.” Unimodal setting is really adversary to this method.</p></li> <li><p><code>CASH</code> works fine, but not better than the basic <code>ASH</code>.</p></li> </ul> </section> <section id="eta-0.9-sigma-5-rho-0.5" class="level2"> <h2><span class="math inline">\(\eta = 0.9\)</span>, <span class="math inline">\(\sigma = 5\)</span>, <span class="math inline">\(\rho = 0.5\)</span></h2> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-2-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-2-2.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z $group == : Outlier (-Inf) in boxplot 3 is not drawn</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-2-3.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-0.8-sigma-4-rho-0.5" class="level2"> <h2><span class="math inline">\(\eta = 0.8\)</span>, <span class="math inline">\(\sigma = 4\)</span>, <span class="math inline">\(\rho = 0.5\)</span></h2> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-4-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-4-2.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z $group == : Outlier (-Inf) in boxplot 2 is not drawn</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-4-3.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-0.5-sigma-4-rho-0.5" class="level2"> <h2><span class="math inline">\(\eta = 0.5\)</span>, <span class="math inline">\(\sigma = 4\)</span>, <span class="math inline">\(\rho = 0.5\)</span></h2> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-6-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/unnamed-chunk-6-2.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/unnamed-chunk-6-3.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-in-left0.5-0.6-0.7-0.8-0.9right-sigma-4-rho-0.5" class="level2"> <h2><span class="math inline">\(\eta \in \left\{0.5, 0.6, 0.7, 0.8, 0.9\right\}\)</span>, <span class="math inline">\(\sigma = 4\)</span>, <span class="math inline">\(\rho = 0.5\)</span></h2> <section id="overall-across-all-sparsity" class="level3"> <h3>Overall across all sparsity</h3> <p><img src="figure/knockoff.rmd/avg-1.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/avg-2.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-0.5-sigma-4-rho-0.5-1" class="level3"> <h3><span class="math inline">\(\eta = 0.5\)</span>, <span class="math inline">\(\sigma = 4\)</span>, <span class="math inline">\(\rho = 0.5\)</span></h3> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/500-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/500-2.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/500-3.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-0.9-sigma-4-rho-0.5" class="level3"> <h3><span class="math inline">\(\eta = 0.9\)</span>, <span class="math inline">\(\sigma = 4\)</span>, <span class="math inline">\(\rho = 0.5\)</span></h3> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/100-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/100-2.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/100-3.png" width="672" style="display: block; margin: auto;" /></p> </section> </section> <section id="eta-in-left0.5-0.6-0.7-0.8-0.9right-sigma-3-rho-0.7" class="level2"> <h2><span class="math inline">\(\eta \in \left\{0.5, 0.6, 0.7, 0.8, 0.9\right\}\)</span>, <span class="math inline">\(\sigma = 3\)</span>, <span class="math inline">\(\rho = 0.7\)</span></h2> <section id="overall-across-all-sparsity-1" class="level3"> <h3>Overall across all sparsity</h3> <p><img src="figure/knockoff.rmd/avg%20weak-1.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/avg%20weak-2.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-0.5-sigma-3-rho-0.7" class="level3"> <h3><span class="math inline">\(\eta = 0.5\)</span>, <span class="math inline">\(\sigma = 3\)</span>, <span class="math inline">\(\rho = 0.7\)</span></h3> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/500%20weak-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/500%20weak-2.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/500%20weak-3.png" width="672" style="display: block; margin: auto;" /></p> </section> <section id="eta-0.9-sigma-3-rho-0.7" class="level3"> <h3><span class="math inline">\(\eta = 0.9\)</span>, <span class="math inline">\(\sigma = 3\)</span>, <span class="math inline">\(\rho = 0.7\)</span></h3> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/100%20weak-1.png" width="672" style="display: block; margin: auto;" /></p> <pre><code>No id variables; using all as measure variables</code></pre> <p><img src="figure/knockoff.rmd/100%20weak-2.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff.rmd/100%20weak-3.png" width="672" style="display: block; margin: auto;" /></p> </section> </section> <section id="session-information" class="level2"> <h2>Session information</h2> <!-- Insert the session information into the document --> <pre class="r"><code>sessionInfo()</code></pre> <pre><code>R version 3.4.3 (2017-11-30) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS High Sierra 10.13.2 Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] ggplot2_2.2.1 reshape2_1.4.3 loaded via a namespace (and not attached): [1] Rcpp_0.12.14 knitr_1.18 magrittr_1.5 munsell_0.4.3 [5] colorspace_1.3-2 rlang_0.1.6 stringr_1.2.0 plyr_1.8.4 [9] tools_3.4.3 grid_3.4.3 gtable_0.2.0 git2r_0.21.0 [13] htmltools_0.3.6 yaml_2.1.16 lazyeval_0.2.1 rprojroot_1.3-2 [17] digest_0.6.14 tibble_1.4.1 evaluate_0.10.1 rmarkdown_1.8 [21] labeling_0.3 stringi_1.1.6 compiler_3.4.3 pillar_1.0.1 [25] scales_0.5.0 backports_1.1.2 </code></pre> </section> <hr> <p> This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a> </p> <hr> <!-- To enable disqus, uncomment the section below and provide your disqus_shortname --> <!-- disqus <div id="disqus_thread"></div> <script type="text/javascript"> /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname /* * * DON'T EDIT BELOW THIS LINE * * */ (function() { var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); })(); </script> <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> --> </div> </div> </div> <script> // add bootstrap table styles to pandoc tables function bootstrapStylePandocTables() { $('tr.header').parent('thead').parent('table').addClass('table table-condensed'); } $(document).ready(function () { bootstrapStylePandocTables(); }); </script> <!-- dynamically load mathjax for compatibility with self-contained --> <script> (function () { var script = document.createElement("script"); script.type = "text/javascript"; script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; document.getElementsByTagName("head")[0].appendChild(script); })(); </script> </body> </html>