<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">

<head>

<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />


<meta name="author" content="Lei Sun" />

<meta name="date" content="2017-05-09" />

<title>Improvement on Implementation with Rmosek: Regularization</title>

<script src="site_libs/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/readable.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/textmate.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-4.5.0/css/font-awesome.min.css" rel="stylesheet" />

<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
  pre:not([class]) {
    background-color: white;
  }
</style>
<script type="text/javascript">
if (window.hljs) {
  hljs.configure({languages: []});
  hljs.initHighlightingOnLoad();
  if (document.readyState && document.readyState === "complete") {
    window.setTimeout(function() { hljs.initHighlighting(); }, 0);
  }
}
</script>



<style type="text/css">
h1 {
  font-size: 34px;
}
h1.title {
  font-size: 38px;
}
h2 {
  font-size: 30px;
}
h3 {
  font-size: 24px;
}
h4 {
  font-size: 18px;
}
h5 {
  font-size: 16px;
}
h6 {
  font-size: 12px;
}
.table th:not([align]) {
  text-align: left;
}
</style>


</head>

<body>

<style type = "text/css">
.main-container {
  max-width: 940px;
  margin-left: auto;
  margin-right: auto;
}
code {
  color: inherit;
  background-color: rgba(0, 0, 0, 0.04);
}
img {
  max-width:100%;
  height: auto;
}
.tabbed-pane {
  padding-top: 12px;
}
button.code-folding-btn:focus {
  outline: none;
}
</style>


<style type="text/css">
/* padding for bootstrap navbar */
body {
  padding-top: 51px;
  padding-bottom: 40px;
}
/* offset scroll position for anchor links (for fixed navbar)  */
.section h1 {
  padding-top: 56px;
  margin-top: -56px;
}

.section h2 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h3 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h4 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h5 {
  padding-top: 56px;
  margin-top: -56px;
}
.section h6 {
  padding-top: 56px;
  margin-top: -56px;
}
</style>

<script>
// manage active state of menu based on current page
$(document).ready(function () {
  // active menu anchor
  href = window.location.pathname
  href = href.substr(href.lastIndexOf('/') + 1)
  if (href === "")
    href = "index.html";
  var menuAnchor = $('a[href="' + href + '"]');

  // mark it active
  menuAnchor.parent().addClass('active');

  // if it's got a parent navbar menu mark it active as well
  menuAnchor.closest('li.dropdown').addClass('active');
});
</script>


<div class="container-fluid main-container">

<!-- tabsets -->
<script>
$(document).ready(function () {
  window.buildTabsets("TOC");
});
</script>

<!-- code folding -->




<script>
$(document).ready(function ()  {

    // move toc-ignore selectors from section div to header
    $('div.section.toc-ignore')
        .removeClass('toc-ignore')
        .children('h1,h2,h3,h4,h5').addClass('toc-ignore');

    // establish options
    var options = {
      selectors: "h1,h2,h3",
      theme: "bootstrap3",
      context: '.toc-content',
      hashGenerator: function (text) {
        return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
      },
      ignoreSelector: ".toc-ignore",
      scrollTo: 0
    };
    options.showAndHide = true;
    options.smoothScroll = true;

    // tocify
    var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>

<style type="text/css">

#TOC {
  margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
  position: relative;
  width: 100%;
}
}


.toc-content {
  padding-left: 30px;
  padding-right: 40px;
}

div.main-container {
  max-width: 1200px;
}

div.tocify {
  width: 20%;
  max-width: 260px;
  max-height: 85%;
}

@media (min-width: 768px) and (max-width: 991px) {
  div.tocify {
    width: 25%;
  }
}

@media (max-width: 767px) {
  div.tocify {
    width: 100%;
    max-width: none;
  }
}

.tocify ul, .tocify li {
  line-height: 20px;
}

.tocify-subheader .tocify-item {
  font-size: 0.90em;
  padding-left: 25px;
  text-indent: 0;
}

.tocify .list-group-item {
  border-radius: 0px;
}


</style>

<!-- setup 3col/9col grid for toc_float and main content  -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>

<div class="toc-content col-xs-12 col-sm-8 col-md-9">




<div class="navbar navbar-default  navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <a class="navbar-brand" href="index.html">truncash</a>
    </div>
    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
        <li>
  <a href="about.html">About</a>
</li>
<li>
  <a href="license.html">License</a>
</li>
      </ul>
      <ul class="nav navbar-nav navbar-right">
        <li>
  <a href="https://github.com/LSun/truncash">
    <span class="fa fa-github"></span>
     
  </a>
</li>
      </ul>
    </div><!--/.nav-collapse -->
  </div><!--/.container -->
</div><!--/.navbar -->

<!-- Add a small amount of space between sections. -->
<style type="text/css">
div.section {
  padding-top: 12px;
}
</style>

<div class="fluid-row" id="header">



<h1 class="title toc-ignore">Improvement on Implementation with <code>Rmosek</code>: Regularization</h1>
<h4 class="author"><em>Lei Sun</em></h4>
<h4 class="date"><em>2017-05-09</em></h4>

</div>


<p><strong>Last updated:</strong> 2018-05-15</p>
<strong>workflowr checks:</strong> <small>(Click a bullet for more information)</small>
<ul>
<li>
<details>
<p><summary> <strong style="color:blue;">✔</strong> <strong>R Markdown file:</strong> up-to-date </summary></p>
<p>Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.</p>
</details>
</li>
<li>
<details>
<p><summary> <strong style="color:blue;">✔</strong> <strong>Repository version:</strong> <a href="https://github.com/LSun/truncash/tree/388e65e06000e313c170a82f3ed57346f6024897" target="_blank">388e65e</a> </summary></p>
Great! You are using Git for version control. Tracking code development and connecting the code version to the results is critical for reproducibility. The version displayed above was the version of the Git repository at the time these results were generated. <br><br> Note that you need to be careful to ensure that all relevant files for the analysis have been committed to Git prior to generating the results (you can use <code>wflow_publish</code> or <code>wflow_git_commit</code>). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:
<pre><code>
Ignored files:
    Ignored:    .DS_Store
    Ignored:    .Rhistory
    Ignored:    .Rproj.user/
    Ignored:    analysis/.DS_Store
    Ignored:    analysis/BH_robustness_cache/
    Ignored:    analysis/FDR_Null_cache/
    Ignored:    analysis/FDR_null_betahat_cache/
    Ignored:    analysis/Rmosek_cache/
    Ignored:    analysis/StepDown_cache/
    Ignored:    analysis/alternative2_cache/
    Ignored:    analysis/alternative_cache/
    Ignored:    analysis/ash_gd_cache/
    Ignored:    analysis/average_cor_gtex_2_cache/
    Ignored:    analysis/average_cor_gtex_cache/
    Ignored:    analysis/brca_cache/
    Ignored:    analysis/cash_deconv_cache/
    Ignored:    analysis/cash_fdr_1_cache/
    Ignored:    analysis/cash_fdr_2_cache/
    Ignored:    analysis/cash_fdr_3_cache/
    Ignored:    analysis/cash_fdr_4_cache/
    Ignored:    analysis/cash_fdr_5_cache/
    Ignored:    analysis/cash_fdr_6_cache/
    Ignored:    analysis/cash_plots_cache/
    Ignored:    analysis/cash_sim_1_cache/
    Ignored:    analysis/cash_sim_2_cache/
    Ignored:    analysis/cash_sim_3_cache/
    Ignored:    analysis/cash_sim_4_cache/
    Ignored:    analysis/cash_sim_5_cache/
    Ignored:    analysis/cash_sim_6_cache/
    Ignored:    analysis/cash_sim_7_cache/
    Ignored:    analysis/correlated_z_2_cache/
    Ignored:    analysis/correlated_z_3_cache/
    Ignored:    analysis/correlated_z_cache/
    Ignored:    analysis/create_null_cache/
    Ignored:    analysis/cutoff_null_cache/
    Ignored:    analysis/design_matrix_2_cache/
    Ignored:    analysis/design_matrix_cache/
    Ignored:    analysis/diagnostic_ash_cache/
    Ignored:    analysis/diagnostic_correlated_z_2_cache/
    Ignored:    analysis/diagnostic_correlated_z_3_cache/
    Ignored:    analysis/diagnostic_correlated_z_cache/
    Ignored:    analysis/diagnostic_plot_2_cache/
    Ignored:    analysis/diagnostic_plot_cache/
    Ignored:    analysis/efron_leukemia_cache/
    Ignored:    analysis/fitting_normal_cache/
    Ignored:    analysis/gaussian_derivatives_2_cache/
    Ignored:    analysis/gaussian_derivatives_3_cache/
    Ignored:    analysis/gaussian_derivatives_4_cache/
    Ignored:    analysis/gaussian_derivatives_5_cache/
    Ignored:    analysis/gaussian_derivatives_cache/
    Ignored:    analysis/gd-ash_cache/
    Ignored:    analysis/gd_delta_cache/
    Ignored:    analysis/gd_lik_2_cache/
    Ignored:    analysis/gd_lik_cache/
    Ignored:    analysis/gd_w_cache/
    Ignored:    analysis/knockoff_10_cache/
    Ignored:    analysis/knockoff_2_cache/
    Ignored:    analysis/knockoff_3_cache/
    Ignored:    analysis/knockoff_4_cache/
    Ignored:    analysis/knockoff_5_cache/
    Ignored:    analysis/knockoff_6_cache/
    Ignored:    analysis/knockoff_7_cache/
    Ignored:    analysis/knockoff_8_cache/
    Ignored:    analysis/knockoff_9_cache/
    Ignored:    analysis/knockoff_cache/
    Ignored:    analysis/knockoff_var_cache/
    Ignored:    analysis/marginal_z_alternative_cache/
    Ignored:    analysis/marginal_z_cache/
    Ignored:    analysis/mosek_reg_2_cache/
    Ignored:    analysis/mosek_reg_4_cache/
    Ignored:    analysis/mosek_reg_5_cache/
    Ignored:    analysis/mosek_reg_6_cache/
    Ignored:    analysis/mosek_reg_cache/
    Ignored:    analysis/pihat0_null_cache/
    Ignored:    analysis/plot_diagnostic_cache/
    Ignored:    analysis/poster_obayes17_cache/
    Ignored:    analysis/real_data_simulation_2_cache/
    Ignored:    analysis/real_data_simulation_3_cache/
    Ignored:    analysis/real_data_simulation_4_cache/
    Ignored:    analysis/real_data_simulation_5_cache/
    Ignored:    analysis/real_data_simulation_cache/
    Ignored:    analysis/rmosek_primal_dual_2_cache/
    Ignored:    analysis/rmosek_primal_dual_cache/
    Ignored:    analysis/seqgendiff_cache/
    Ignored:    analysis/simulated_correlated_null_2_cache/
    Ignored:    analysis/simulated_correlated_null_3_cache/
    Ignored:    analysis/simulated_correlated_null_cache/
    Ignored:    analysis/simulation_real_se_2_cache/
    Ignored:    analysis/simulation_real_se_cache/
    Ignored:    analysis/smemo_2_cache/
    Ignored:    data/LSI/
    Ignored:    docs/.DS_Store
    Ignored:    docs/figure/.DS_Store
    Ignored:    output/fig/

</code></pre>
Note that any generated files, e.g. HTML, png, CSS, etc., are not included in this status report because it is ok for generated content to have uncommitted changes.
</details>
</li>
</ul>
<details>
<summary> <small><strong>Expand here to see past versions:</strong></small> </summary>
<ul>
<table style="border-collapse:separate; border-spacing:5px;">
<thead>
<tr>
<th style="text-align:left;">
File
</th>
<th style="text-align:left;">
Version
</th>
<th style="text-align:left;">
Author
</th>
<th style="text-align:left;">
Date
</th>
<th style="text-align:left;">
Message
</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left;">
html
</td>
<td style="text-align:left;">
<a href="https://cdn.rawgit.com/LSun/truncash/e05bc836b3c74dc6ebca415afb5938675d6c5436/docs/mosek_reg_3.html" target="_blank">e05bc83</a>
</td>
<td style="text-align:left;">
LSun
</td>
<td style="text-align:left;">
2018-05-12
</td>
<td style="text-align:left;">
Update to 1.0
</td>
</tr>
<tr>
<td style="text-align:left;">
rmd
</td>
<td style="text-align:left;">
<a href="https://github.com/LSun/truncash/blob/cc0ab8379469bc3726f1508cd81e4ecd6fef1b1a/analysis/mosek_reg_3.rmd" target="_blank">cc0ab83</a>
</td>
<td style="text-align:left;">
Lei Sun
</td>
<td style="text-align:left;">
2018-05-11
</td>
<td style="text-align:left;">
update
</td>
</tr>
<tr>
<td style="text-align:left;">
html
</td>
<td style="text-align:left;">
<a href="https://cdn.rawgit.com/LSun/truncash/0f36d998db26444c5dd01502ea1af7fbd1129b22/docs/mosek_reg_3.html" target="_blank">0f36d99</a>
</td>
<td style="text-align:left;">
LSun
</td>
<td style="text-align:left;">
2017-12-21
</td>
<td style="text-align:left;">
Build site.
</td>
</tr>
<tr>
<td style="text-align:left;">
html
</td>
<td style="text-align:left;">
<a href="https://cdn.rawgit.com/LSun/truncash/853a484bfacf347e109f6c8fb3ffaab5f4d6cc02/docs/mosek_reg_3.html" target="_blank">853a484</a>
</td>
<td style="text-align:left;">
LSun
</td>
<td style="text-align:left;">
2017-11-07
</td>
<td style="text-align:left;">
Build site.
</td>
</tr>
<tr>
<td style="text-align:left;">
html
</td>
<td style="text-align:left;">
<a href="https://cdn.rawgit.com/LSun/truncash/4cc8ec5aac2895d5043afec62a36c063acd69d09/docs/mosek_reg_3.html" target="_blank">4cc8ec5</a>
</td>
<td style="text-align:left;">
LSun
</td>
<td style="text-align:left;">
2017-05-09
</td>
<td style="text-align:left;">
regularization
</td>
</tr>
<tr>
<td style="text-align:left;">
rmd
</td>
<td style="text-align:left;">
<a href="https://github.com/LSun/truncash/blob/b3932c75f92c779b3bc885e35d7fac2d6f7aad39/analysis/mosek_reg_3.rmd" target="_blank">b3932c7</a>
</td>
<td style="text-align:left;">
LSun
</td>
<td style="text-align:left;">
2017-05-09
</td>
<td style="text-align:left;">
regularization
</td>
</tr>
</tbody>
</table>
</ul>
</details>
<hr />
<div id="introduction" class="section level2">
<h2>Introduction</h2>
<p>The <span class="math inline">\(w\)</span> optimization problem we hope to solve has two constraints. If we are not imposing these two constraints, chances are the optimization will be <a href="gaussian_derivatives_3.html">unstable</a>, especially when we don’t know <span class="math inline">\(L\)</span> for sure beforehand. However, these two constraints are hard to be converted directly and strictly to convex or even tractable forms.</p>
<p><span class="math display">\[
\begin{array}{rl}
\min\limits_{w} &amp; -\sum\limits_{j = 1}^n\log\left(\sum\limits_{l=1}^Lw_l\left(\sum\limits_{k = 1}^K\hat\pi_k  f_{jkl}\right) + \sum\limits_{k = 1}^K\hat\pi_kf_{jk0}\right) + 
\sum\limits_{l = 1}^L\lambda_l^w
\phi
\left(\left|w_l\right|\right)
\\
\text{subject to} &amp; \sum\limits_{l=1}^L w_l \varphi^{(l)}\left(z\right) + \varphi\left(z\right) \geq 0, \forall z\in \mathbb{R}\\
&amp; w_l \text{ decay reasonably fast.}
\end{array}
\]</span></p>
<p>One observation is that without the constraints, the optimization goes unstable as <span class="math inline">\(\hat w\)</span> get uncontrollably large. Therefore, a natural idea to replace these two constraints would be to bound, penalize, or regularize <span class="math inline">\(w\)</span>, to prevent them from being too large.</p>
<p>On the other hand, as indicated the <a href="gaussian_derivatives.html">theory</a> and <a href="mosek_reg.html">examples</a> of good fitting by Gaussian derivatives, <span class="math inline">\(w_l\)</span> is getting smaller as <span class="math inline">\(l\)</span> increases. Moreover, oftentimes, really small <span class="math inline">\(w_l\)</span> could still make a difference and thus indispensable. Therefore, although we need to stop <span class="math inline">\(\hat w\)</span> getting too large, we cerntainly don’t want to shrink them unnecessarily and unwarrantedly to zero.</p>
<p>Ideally, the goal of <span class="math inline">\(\sum\limits_{l = 1}^L\lambda_l^w \phi \left(\left|w_l\right|\right)\)</span> regularizing <span class="math inline">\(w\)</span> should be to</p>

</div>
<div id="ideas-on-lambda_lw-and-phi" class="section level2">
<h2>Ideas on <span class="math inline">\(\lambda_l^w\)</span> and <span class="math inline">\(\phi\)</span></h2>
<p>Remember that in <a href="gaussian_derivatives.html">theory</a>, if we are writing the random empirical density of the correlated marginally <span class="math inline">\(N\left(0, 1\right)\)</span> random samples as</p>
<p><span class="math display">\[
f_0\left(z\right) = \varphi\left(z\right) + \sum\limits_{l = 1}^\infty W_k\varphi^{(l)}\left(z\right) \  ,
\]</span> then</p>
<p><span class="math display">\[
\text{var}\left(W_l\right) = \frac{\alpha_l}{l!} \  ,
\]</span> where</p>
<p><span class="math display">\[
\alpha_l = \frac{1}{\frac{n\left(n - 1\right)}{2}}\sum_\limits{i &lt; j}\rho_{ij}^l := \bar{\rho_{ij}^l} \  .
\]</span></p>
<p>Therefore, naturally <span class="math inline">\(w_l\)</span> should decay somehow in the order of</p>
<p><span class="math display">\[
\left|w_l\right| \leadsto \sqrt{\text{var}\left(W_l\right)} =
\frac{\sqrt{\bar{\rho_{ij}^l}}}{\sqrt{l!}} \  .
\]</span></p>
<div id="normalization" class="section level3">
<h3>Normalization</h3>
<p>The order of decay suggests that we should work with <a href="mosek_reg_2.html">normalized coefficients</a>, so that</p>
<p><span class="math display">\[
\left|w_l^n\right| = \sqrt{l!}\left|w_l\right| \leadsto 
\sqrt{\bar{\rho_{ij}^l}} \  .
\]</span> This piece of information on the order of magnitude of the normalized <span class="math inline">\(w_l^n\)</span> provides a hint to determine <span class="math inline">\(\lambda_l^w\)</span>, for example,</p>
</div>
<div id="regularizing-even-orders-only" class="section level3">
<h3>Regularizing even orders only</h3>
<p>Odd orders of Gaussian derivatives are generally associated with mean shift and skewness of <span class="math inline">\(f_0\)</span>, so they are generally very small, but if they are indeed not zero, they are important however they are small.</p>
<p>Meanwhile, when <span class="math inline">\(l\)</span> is odd, <span class="math inline">\(\bar{\rho_{ij}^l}\)</span> could be very small, and not in order of <span class="math inline">\(\bar{\rho^l}\)</span> for some <span class="math inline">\(\rho \in \left(0, 1\right)\)</span>, so it’s very difficult to come up with a good <span class="math inline">\(\lambda_l^w\)</span> when <span class="math inline">\(l\)</span> is odd.</p>
<p>Therefore, it might be better to leave the odd orders alone and regularize the even orders only.</p>
</div>
<div id="penalty-l_1-and-l_2" class="section level3">
<h3>Penalty: <span class="math inline">\(l_1\)</span> and <span class="math inline">\(l_2\)</span></h3>
<p>Generally speaking, <span class="math inline">\(l_1\)</span> imposes sparsity by shrinking small estimates exactly to zero, which is both good and bad for our case, whereas <span class="math inline">\(l_2\)</span> penalizes large estimates severely but doesn’t force exact sparsity, which could also be good or bad.</p>
<p>We’ll implement both and see what happens.</p>
</div>
<div id="lambda_lw" class="section level3">
<h3><span class="math inline">\(\lambda_l^w\)</span></h3>
<p><span class="math inline">\(\lambda_l^w\)</span> is determined to help ensure</p>
<p><span class="math display">\[
\left|w_l^n\right| = \sqrt{l!}\left|w_l\right| \leadsto 
\sqrt{\bar{\rho_{ij}^l}} \  ,
\]</span></p>
<p>Given <span class="math inline">\(\rho \in \left(0, 1\right)\)</span>, for <span class="math inline">\(l_1\)</span> regularization, <span class="math inline">\(\lambda_l^n \sim \lambda / \sqrt{\rho^l}\)</span>; for <span class="math inline">\(l_2\)</span> regularization, <span class="math inline">\(\lambda_l^n \sim \lambda / \rho^l\)</span>.</p>
</div>
<div id="l" class="section level3">
<h3><span class="math inline">\(L\)</span></h3>
<p>So far, <span class="math inline">\(L = 9\)</span> has been able to handle all the example we’ve tried. Without constraints or regularization, <span class="math inline">\(L = 9\)</span> is usually too large for cases where, say, <span class="math inline">\(L = 4\)</span> is enough, and a larger than necessary <span class="math inline">\(L\)</span> could lead to numerical instability.</p>
<p>Hence, we’ll experiment with <span class="math inline">\(L = 12\)</span>, assuming it’s enough for all the correlation induced distortions in practice, and assuming the regularization could prevent the optimization from going crazy.</p>
</div>
</div>
<div id="summary" class="section level2">
<h2>Summary</h2>
<p>The <span class="math inline">\(w\)</span> optimization problem becomes</p>
<p><span class="math display">\[
\begin{array}{rl}
\min\limits_{w, g} &amp; 
-\sum\limits_{j = 1}^n
\log\left(g_j\right) 
+
\text{Regularization}
\\
\text{subject to}
&amp; Aw + a = g \\
&amp; g \geq 0\  ,
\end{array}
\]</span> where <span class="math inline">\(A_{n \times L} = \left[a_1,\ldots, a_L\right]\)</span> and <span class="math inline">\(a\)</span> are computed with normalized Gaussian derivatives and Hermite polynomials.</p>
<p>Let <span class="math inline">\(\lambda &gt; 0\)</span>, <span class="math inline">\(\rho \in \left(0, 1\right)\)</span>, the regularization term has two forms as follows.</p>
<div id="l_1-regularization" class="section level3">
<h3><span class="math inline">\(l_1\)</span> regularization</h3>
<p><span class="math display">\[
\begin{array}{l}
\sum\limits_{l = 1}^L\lambda_l^{w^s}\left|w_l^s\right| \  ,\\
\lambda_l^{w^s} = \begin{cases}
0, &amp; l \text{ is odd;} \\
\lambda / \rho^{l/2}, &amp; l \text{ is even.}
\end{cases}
\end{array}
\]</span></p>
</div>
<div id="l_2-regularization" class="section level3">
<h3><span class="math inline">\(l_2\)</span> regularization</h3>
<p><span class="math display">\[
\begin{array}{l}
\sum\limits_{l = 1}^L \lambda_l^{w^s}{w_l^s}^2 \  ,\\
\lambda_l^{w^s} = \begin{cases}
0, &amp; l \text{ is odd;} \\
\lambda / \rho^{l}, &amp; l \text{ is even.}
\end{cases}
\end{array}
\]</span></p>
</div>
</div>
<div id="dual-problem-for-l_1-regularization." class="section level2">
<h2>Dual problem for <span class="math inline">\(l_1\)</span> regularization.</h2>
<p>The primal form is</p>
<p><span class="math display">\[
\begin{array}{rl}
\min\limits_{w, g} &amp; 
-\sum\limits_{j = 1}^n
\log\left(g_j\right) 
+
\sum\limits_{l = 1}^L\lambda_l^{w^s}\left|w_l^s\right|
\\
\text{subject to}
&amp; Aw + a = g \\
&amp; g \geq 0\  ,
\end{array}
\]</span> The dual form is</p>
<p><span class="math display">\[
\begin{array}{rl}
\min\limits_{v} &amp; 
-\sum\limits_{j = 1}^n
\log\left(v_j\right) 
+ a^Tv - n
\\
\text{subject to}
&amp; -\lambda \leq A^Tv \leq \lambda \\
&amp; v \geq 0\  .
\end{array}
\]</span></p>
</div>
<div id="dual-problem-for-l_2-regularization." class="section level2">
<h2>Dual problem for <span class="math inline">\(l_2\)</span> regularization.</h2>
<p>The primal form is</p>
<p><span class="math display">\[
\begin{array}{rl}
\min\limits_{w, g} &amp; 
-\sum\limits_{j = 1}^n
\log\left(g_j\right) 
+
\sum\limits_{l = 1}^L\lambda_l^{w^s}{w_l^s}^2
\\
\text{subject to}
&amp; Aw + a = g \\
&amp; g \geq 0\  ,
\end{array}
\]</span> The dual form is</p>
<p><span class="math display">\[
\begin{array}{rl}
\min\limits_{v} &amp; 
-\sum\limits_{j = 1}^n
\log\left(v_j\right) 
+ a^Tv - n
+ \frac14 v^T \left(A \Lambda^{-1} A^T\right)v
\\
\text{subject to}
&amp; a_j^Tv = 0 \    \text{ if }\lambda_j = 0 \  ,\\
&amp; v \geq 0\  ,
\end{array}
\]</span> where <span class="math inline">\(\Lambda = \begin{bmatrix}\lambda_1 &amp; &amp; \\ &amp; \ddots &amp; \\ &amp; &amp; \lambda_L \end{bmatrix}\)</span>, and <span class="math inline">\({\Lambda^{-1}}_{jj} = 0\)</span> when <span class="math inline">\(\lambda_j = 0\)</span>.</p>
</div>
<div id="choosing-lambda-and-rho" class="section level2">
<h2>Choosing <span class="math inline">\(\lambda\)</span> and <span class="math inline">\(\rho\)</span></h2>
<p>Let’s start with <span class="math inline">\(\rho = 0.9\)</span> and <span class="math inline">\(\lambda = 0.1\)</span>.</p>
</div>

<!-- Adjust MathJax settings so that all math formulae are shown using
TeX fonts only; see
http://docs.mathjax.org/en/latest/configuration.html.  This will make
the presentation more consistent at the cost of the webpage sometimes
taking slightly longer to load. Note that this only works because the
footer is added to webpages before the MathJax javascript. -->
<script type="text/x-mathjax-config">
  MathJax.Hub.Config({
    "HTML-CSS": { availableFonts: ["TeX"] }
  });
</script>

<hr>
<p>
  This reproducible <a href="http://rmarkdown.rstudio.com">R Markdown</a>
  analysis was created with
  <a href="https://github.com/jdblischak/workflowr">workflowr</a> 1.0.1
</p>
<hr>


</div>
</div>

</div>

<script>

// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
  $('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
  bootstrapStylePandocTables();
});


</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>