Last updated: 2018-03-06

Code version: 193208d

R=5

library(mashr)
Loading required package: ashr
set.seed(2018)
data = simple_sims(500, err_sd = 0.1)

data.miss = data
# select missing rows
missing.row = sample(1:nrow(data$Bhat), nrow(data$Bhat)/4)
missing.ind = matrix(0, nrow = nrow(data$B), ncol=ncol(data$B))
missing.ind[missing.row,] = 1

# set missing rows to NA
missing = which(missing.ind == 1)
data.miss$Bhat[missing] = NA
data.miss$Shat[missing] = NA
missing1 = is.na(data.miss$Bhat[,1])

# set the missing value with large standard deviation
data.large.dev = data.miss
data.large.dev$Bhat[is.na(data.miss$Bhat)] = 0
data.large.dev$Shat[is.na(data.miss$Shat)] = 1000

EE model

# with missing values
mash.data.missing = mash_set_data(Bhat=data.large.dev$Bhat, Shat=data.large.dev$Shat)
U.c = cov_canonical(mash.data.missing)

mash.model.missing = mash(mash.data.missing, U.c, verbose = FALSE)

# delete missing values
data.miss.na = data.miss
data.miss.na$Bhat = na.omit(data.miss$Bhat)
data.miss.na$Shat = na.omit(data.miss$Shat)

mash.data.missing.na = mash_set_data(Bhat=data.miss.na$Bhat, Shat= data.miss.na$Shat)
U.c = cov_canonical(mash.data.missing.na)

mash.model.missing.na = mash(mash.data.missing.na, U.c, verbose=FALSE)
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
               consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(1, 0.204132576568413,
0.736530341806702, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
par(mfrow=c(1,2))
barplot(get_estimated_pi(mash.model.missing), las=2, cex.names = 0.7, main='with missing values')
barplot(get_estimated_pi(mash.model.missing.na), las=2, cex.names = 0.7, main = 'delete missing values')

par(mfrow=c(1,1))

EZ model

# with missing values
mash.data.missing.1 = mash_set_data(Bhat=data.large.dev$Bhat, Shat=data.large.dev$Shat, alpha = 1)
U.c = cov_canonical(mash.data.missing.1)

mash.model.missing.1 = mash(mash.data.missing.1, U.c, verbose = FALSE)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
# delete missing values
mash.data.missing.na.1 = mash_set_data(Bhat=data.miss.na$Bhat, Shat= data.miss.na$Shat, alpha = 1)
U.c = cov_canonical(mash.data.missing.na.1)

mash.model.missing.na.1 = mash(mash.data.missing.na.1, U.c, verbose=FALSE)
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
               consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(1, 0.204132576568412,
0.736530341806701, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
par(mfrow=c(1,2))
barplot(get_estimated_pi(mash.model.missing.1), las=2, cex.names = 0.7, main='with missing values')
barplot(get_estimated_pi(mash.model.missing.na.1), las=2, cex.names = 0.7, main='delete missing values')

par(mfrow=c(1,1))

R=60

set.seed(2018)
data = simple_sims(500, ncond = 60, err_sd = 0.1)

data.miss = data
missing.row = sample(1:nrow(data$Bhat), nrow(data$Bhat)/4)
missing.ind = matrix(0, nrow = nrow(data$B), ncol=ncol(data$B))
missing.ind[missing.row,] = 1

missing = which(missing.ind == 1)
data.miss$Bhat[missing] = NA
data.miss$Shat[missing] = NA
missing1 = is.na(data.miss$Bhat[,1])
# Set the missing value with large standard deviation
data.large.dev = data.miss
data.large.dev$Bhat[is.na(data.miss$Bhat)] = 0
data.large.dev$Shat[is.na(data.miss$Shat)] = 1000

EE model

With missing values

mash.data.missing = mash_set_data(Bhat=data.large.dev$Bhat, Shat=data.large.dev$Shat)
U.c = cov_canonical(mash.data.missing)

mash.model.missing = mash(mash.data.missing, U.c, verbose = FALSE)
barplot(get_estimated_pi(mash.model.missing), las=2, cex.names = 0.7)

The weights learned from the data are correct.

Delete missing values

data.miss.na = data.miss
data.miss.na$Bhat = na.omit(data.miss$Bhat)
data.miss.na$Shat = na.omit(data.miss$Shat)

mash.data.missing.na = mash_set_data(Bhat=data.miss.na$Bhat, Shat= data.miss.na$Shat)
U.c = cov_canonical(mash.data.missing.na)

mash.model.missing.na = mash(mash.data.missing.na, U.c, verbose=FALSE)
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
               consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(0.240920767436896,
0.337563330121546, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
barplot(get_estimated_pi(mash.model.missing.na), las=2, cex.names = 0.7)

The estimated weights are very weird and incorrect. It ignores the null matrix.

EZ model

With missing values

mash.data.missing.1 = mash_set_data(Bhat=data.large.dev$Bhat, Shat=data.large.dev$Shat, alpha = 1)
U.c = cov_canonical(mash.data.missing.1)

mash.model.missing.1 = mash(mash.data.missing.1, U.c, verbose = FALSE)
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
               consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(0.240920767436896,
0.33756333012155, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(mash.model.missing.1), las=2, cex.names = 0.7)

Delete missing values

mash.data.missing.na.1 = mash_set_data(Bhat=data.miss.na$Bhat, Shat= data.miss.na$Shat, alpha = 1)
U.c = cov_canonical(mash.data.missing.na.1)

mash.model.missing.na.1 = mash(mash.data.missing.na.1, U.c, verbose=FALSE)
Warning in REBayes::KWDual(A, rep(1, k), normalize(w), control = control): estimated mixing distribution has some negative values:
               consider reducing rtol
Warning in mixIP(matrix_lik = structure(c(0.240920767436896,
0.33756333012155, : Optimization step yields mixture weights that are
either too small, or negative; weights have been corrected and renormalized
after the optimization.
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
barplot(get_estimated_pi(mash.model.missing.na.1), las=2, cex.names = 0.7)

The estimated weights are different.

Subgroups

The weired covariance structure is caused by the high dimension of R.

If we separate conditions into several groups, the covariance structure will be correct.

We show this for the EZ model R=60.

mash.data.missing.1.sub1 = mash_set_data(Bhat=data.large.dev$Bhat[,1:20], Shat=data.large.dev$Shat[,1:20], alpha = 1)
U.c = cov_canonical(mash.data.missing.1.sub1)
mash.model.missing.1.sub1 = mash(mash.data.missing.1.sub1, U.c, verbose = FALSE)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
mash.data.missing.1.sub2 = mash_set_data(Bhat=data.large.dev$Bhat[,21:40], Shat=data.large.dev$Shat[,21:40], alpha = 1)
U.c = cov_canonical(mash.data.missing.1.sub2)
mash.model.missing.1.sub2 = mash(mash.data.missing.1.sub2, U.c, verbose = FALSE)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
mash.data.missing.1.sub3 = mash_set_data(Bhat=data.large.dev$Bhat[,41:60], Shat=data.large.dev$Shat[,41:60], alpha = 1)
U.c = cov_canonical(mash.data.missing.1.sub3)
mash.model.missing.1.sub3 = mash(mash.data.missing.1.sub3, U.c, verbose = FALSE)
FIXME: 'compute_posterior_matrices' in Rcpp does not transfer EZ to EE
par(mfcol=c(3,1))
barplot(get_estimated_pi(mash.model.missing.1.sub1), las=2, cex.names = 0.7, main='condition 1:20')
barplot(get_estimated_pi(mash.model.missing.1.sub2), las=2, cex.names = 0.7, main='condition 21:40')
barplot(get_estimated_pi(mash.model.missing.1.sub3), las=2, cex.names = 0.7,
main='condition 41:60')

par(mfcol=c(1,1))

Session information

sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.3

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] mashr_0.2-6 ashr_2.2-7 

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.15      knitr_1.20        magrittr_1.5     
 [4] REBayes_1.2       MASS_7.3-47       doParallel_1.0.11
 [7] pscl_1.5.2        SQUAREM_2017.10-1 lattice_0.20-35  
[10] foreach_1.4.4     plyr_1.8.4        stringr_1.3.0    
[13] tools_3.4.3       parallel_3.4.3    grid_3.4.3       
[16] rmeta_2.16        git2r_0.20.0      htmltools_0.3.6  
[19] iterators_1.0.9   assertthat_0.2.0  yaml_2.1.17      
[22] rprojroot_1.2     digest_0.6.13     Matrix_1.2-12    
[25] codetools_0.2-15  evaluate_0.10.1   rmarkdown_1.8    
[28] stringi_1.1.6     compiler_3.4.3    Rmosek_8.0.69    
[31] backports_1.1.2   mvtnorm_1.0-7     truncnorm_1.0-8  

This R Markdown site was created with workflowr