<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Lei Sun" />

<meta name="date" content="2018-02-20" />

<title>Factor Model for \hat\beta and Column Randomization for Knockoff</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/cosmo.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs) {
  hljs.configure({languages: []});
  hljs.initHighlightingOnLoad();
  if (document.readyState && document.readyState === "complete") {
    window.setTimeout(function() { hljs.initHighlighting(); }, 0);
  }
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->




<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}


.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
  padding-left: 25px;
  text-indent: 0;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>

<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-xs-12 col-sm-8 col-md-9">




<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">truncash</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="index.html">Home</a>
</li>
<li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/LSun/truncash">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Factor Model for <span class="math inline">\(\hat\beta\)</span> and Column Randomization for <code>Knockoff</code></h1>
<h4 class="author"><em>Lei Sun</em></h4>
<h4 class="date"><em>2018-02-20</em></h4>

</div>


<!-- The file analysis/chunks.R contains chunks that define default settings
shared across the workflowr files. -->
<!-- Update knitr chunk options -->
<!-- Insert the date the file was last updated -->
<p><strong>Last updated:</strong> 2018-04-05</p>
<!-- Insert the code version (Git commit SHA1) if Git repository exists and R
 package git2r is installed -->
<p><strong>Code version:</strong> 20ea328</p>
<!-- Add your analysis here -->
<div id="introduction" class="section level2">
<h2>Introduction</h2>
<p><code>Knockoff</code> has 3 steps.</p>
<ol style="list-style-type: decimal">
<li><p>Generate knockoff variables, which keep the same correlation structure as original variables but has no effect on the response.</p></li>
<li><p>Generate test statistics such that these statistics tend to be large positive number for non-null variables but iid positive or negative for null variables.</p></li>
<li><p>Find a cutoff threshold for those test statistics to control the estimated FDR under <span class="math inline">\(q\)</span>.</p></li>
</ol>
<p>The default <code>knockoff::knockoff.filter</code> function uses <code>SDP</code> construction in step 1 and <code>LASSO</code>-related statistics in step 2. However, <a href="knockoff_5.html#scenario_3:_(hatbeta)_from_a_factor_model7">we’ve found</a> that <code>Knockoff</code> coded in this way failed to control FDR in simulations when variables are generated such that <span class="math inline">\(\hat\beta\)</span> has heavy average absolute pairwise correlation, which seems to contradict Theorem 2 in the <code>Knockoff</code> paper. Now we take a closer look to see what went wrong.</p>
<p>In step 1, we use two construction methods: <code>equi</code> and <code>sdp</code>. <code>sdp</code> is believed to be more powerful. In step 2, we use two statistics: <code>marginal</code> and <code>lasso</code>-related. <code>lasso</code>-related is believed to be more powerful.</p>
<pre class="r"><code>n &lt;- 3e3
p &lt;- 1e3
k &lt;- 50
d &lt;- 7
q &lt;- 0.1</code></pre>
</div>
<div id="fixed-x-knockoffs-1000-simulation-trials" class="section level2">
<h2>Fixed <span class="math inline">\(X\)</span> Knockoffs: 1000 simulation trials</h2>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-5-1.png" width="672" style="display: block; margin: auto;" /></p>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-6-1.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff_7.rmd/unnamed-chunk-6-2.png" width="672" style="display: block; margin: auto;" /></p>
</div>
<div id="model-x-knockoffs-1000-simulation-trials" class="section level2">
<h2>Model <span class="math inline">\(X\)</span> Knockoffs: 1000 simulation trials</h2>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-9-1.png" width="672" style="display: block; margin: auto;" /></p>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-10-1.png" width="672" style="display: block; margin: auto;" /></p>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-11-1.png" width="672" style="display: block; margin: auto;" /></p>
</div>
<div id="observation" class="section level2">
<h2>Observation</h2>
<p>A set of well-bahaving <code>Knockoff</code> variables <span class="math inline">\(X^k\)</span> should have the property that <span class="math display">\[
\begin{array}{c}
cor(X^k_i, X^k_j) = cor(X_i, X_j)\\
cor(X_i, X^k_j) = cor(X_i, X_j)
\end{array}
\]</span> while <span class="math inline">\(cor(X_i, X^k_i)\)</span> should be as small as possible. It turns out it’s just not that easy to generate these well-behaving <code>Knockoff</code> variables when columns in <span class="math inline">\(X\)</span> are correlated in a certain way. Especially when using <code>SDP</code> optimization, it could generate a lot of knockoffs that are exactly the same as the originals.</p>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-12-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>If an original variable and its knockoff are too similar, it essentially makes little difference which one is included in the model, from a goodness of fit point of view.</p>
<p>The problem becomes more severe when we fit models like LASSO using methods like coordinate descent. The result depends in large part on the sequence of variables getting into the model. So if we feed LASSO with <code>cbind(X, Xk)</code>, for every iteration, it’s always <code>X[j]</code> being optimized before <code>Xk[j]</code>. That’s a major reason we see asymmetric test statistics as above, and why <code>Knockoff</code> loses FDR control in these circumstances.</p>
<p>One way to fix that is to randomize the order of variables in <code>cbind(X, Xk)</code> before feeding them to LASSO. The following is a simulation.</p>
<p><img src="figure/knockoff_7.rmd/unnamed-chunk-14-1.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff_7.rmd/unnamed-chunk-14-2.png" width="672" style="display: block; margin: auto;" /><img src="figure/knockoff_7.rmd/unnamed-chunk-14-3.png" width="672" style="display: block; margin: auto;" /></p>
<p>After column randomization, the test statistcs for null variables are back to normal and <code>Knockoff</code> controls FDR again. The low power is another issue.</p>
</div>
<div id="session-information" class="section level2">
<h2>Session information</h2>
<!-- Insert the session information into the document -->
<pre class="r"><code>sessionInfo()</code></pre>
<pre><code>R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.4

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] lattice_0.20-35 doMC_1.3.5      iterators_1.0.9 foreach_1.4.4  
[5] ggplot2_2.2.1   reshape2_1.4.3  knockoff_0.3.0 

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14     knitr_1.20       magrittr_1.5     munsell_0.4.3   
 [5] colorspace_1.3-2 rlang_0.1.6      stringr_1.3.0    plyr_1.8.4      
 [9] tools_3.4.3      grid_3.4.3       gtable_0.2.0     RSpectra_0.12-0 
[13] git2r_0.21.0     htmltools_0.3.6  yaml_2.1.18      lazyeval_0.2.1  
[17] rprojroot_1.3-2  digest_0.6.15    tibble_1.4.1     Rdsdp_1.0.4-2   
[21] Matrix_1.2-12    codetools_0.2-15 evaluate_0.10.1  rmarkdown_1.9   
[25] labeling_0.3     stringi_1.1.6    pillar_1.0.1     compiler_3.4.3  
[29] scales_0.5.0     backports_1.1.2 </code></pre>
</div>

<hr>
<p>
    This <a href="http://rmarkdown.rstudio.com">R Markdown</a> site was created with <a href="https://github.com/jdblischak/workflowr">workflowr</a>
</p>
<hr>

<!-- To enable disqus, uncomment the section below and provide your disqus_shortname -->

<!-- disqus
  <div id="disqus_thread"></div>
    <script type="text/javascript">
        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
        var disqus_shortname = 'rmarkdown'; // required: replace example with your forum shortname

        /* * * DON'T EDIT BELOW THIS LINE * * */
        (function() {
            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
        })();
    </script>
    <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
    <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
-->


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>