Last updated: 2018-09-18
workflowr checks: (Click a bullet for more information) ✔ R Markdown file: up-to-date 
Great! Since the R Markdown file has been committed to the Git repository, you know the exact version of the code that produced these results.
 ✔ Environment: empty 
Great job! The global environment was empty. Objects defined in the global environment can affect the analysis in your R Markdown file in unknown ways. For reproduciblity it’s best to always run the code in an empty environment.
 ✔ Seed: 
set.seed(20180714) 
The command set.seed(20180714) was run prior to running the code in the R Markdown file. Setting a seed ensures that any results that rely on randomness, e.g. subsampling or permutations, are reproducible.
 ✔ Session information: recorded 
Great job! Recording the operating system, R version, and package versions is critical for reproducibility.
 ✔ Repository version: d08b991 
wflow_publish or wflow_git_commit). workflowr only checks the R Markdown file, but you know if there are other scripts or data files that it depends on. Below is the status of the Git repository when the results were generated:
Ignored files:
    Ignored:    .DS_Store
    Ignored:    .Rhistory
    Ignored:    .Rproj.user/
    Ignored:    docs/.DS_Store
    Ignored:    docs/figure/.DS_Store
Untracked files:
    Untracked:  data/greedy19.rds
Unstaged changes:
    Modified:   analysis/nonnegative.Rmd
| File | Version | Author | Date | Message | 
|---|---|---|---|---|
| Rmd | d08b991 | Jason Willwerscheid | 2018-09-18 | wflow_publish(“analysis/scalar_tau.Rmd”) | 
| html | 8f492a4 | Jason Willwerscheid | 2018-09-18 | Build site. | 
| Rmd | dacfb93 | Jason Willwerscheid | 2018-09-18 | wflow_publish(“analysis/scalar_tau.Rmd”) | 
| html | a9b2d31 | Jason Willwerscheid | 2018-09-17 | Build site. | 
| Rmd | fe526ea | Jason Willwerscheid | 2018-09-17 | wflow_publish(“analysis/scalar_tau.Rmd”) | 
In flashr Issue #83, I identify some possible ways to speed up fitting when var_type = "constant" or when var_type = "zero" and S (the standard errors) is stored as a scalar. I also identify ways to reduce memory usage when loadings or factors are not fixed (and they will almost never both have fixed elements).
Here I implement the suggested changes and profile the code on data from the GTEx project. I compare elapsed time and total memory used in five scenarios: 1. var_type = "by_column"; 2. var_type = "constant"; 3. var_type = "zero" (with standard errors fixed); 4. var_type = "constant" with missing data; and 5. var_type = "by_column" with fixed factors. (The fifth scenario was added to help ensure that no bugs were introduced into the code; little improvement is expected.)
First I load the data and generate missing entries.
devtools::load_all("~/GitHub/ebnm")Loading ebnmlibrary("profmem")
gtex <- readRDS(gzcon(url("https://github.com/stephenslab/gtexresults/blob/master/data/MatrixEQTLSumStats.Portable.Z.rds?raw=TRUE")))
set.seed(666)
strong <- gtex$strong.z
strong_missing <- strong
strong_missing[sample(1:length(strong), 
                    floor(0.1*length(strong)), 
                    replace=FALSE)] <- NA
K <- 3 # number of factors to greedily add in scenarios 1-4
# fixed factors for scenario 5:
FF <- cbind(rep(1, 44),
            c(rep(0, 6), rep(NA, 10), rep(0, 28)),
            c(rep(0, 43), 1))Next I load the branch with the proposed changes. For each scenario (except the fifth), I add three factors greedily and then backfit.
system("cd ~/GitHub/flashr; git checkout efficient-tau2")
devtools::load_all("~/GitHub/flashr")Loading flashrdata <- flash_set_data(strong)
data_fixS <- flash_set_data(strong, S = 1)
data_missing <- flash_set_data(strong_missing)
mem_bycol_aft <- profmem::profmem({
  t_bycol_aft <- system.time({
    fl_bycol_after <- flash(data, Kmax=K,
                            var_type="by_column",
                            backfit=TRUE, verbose=FALSE)
  })
})
mem_const_aft <- profmem::profmem({
  t_const_aft <- system.time({
    fl_const_after <- flash(data, Kmax=K,
                            var_type="constant",
                            backfit=TRUE, verbose=FALSE)
  })
})
mem_zero_aft <- profmem::profmem({
  t_zero_aft <- system.time({
    fl_zero_after <- flash(data_fixS, Kmax=K, 
                           var_type="zero",
                           backfit=TRUE, verbose=FALSE)
  })
})
mem_miss_aft <- profmem::profmem({
  t_miss_aft <- system.time({
    fl_miss_after <- flash(data_missing, Kmax=K, 
                           var_type="constant",
                           backfit=TRUE, verbose=FALSE)
  })
})
mem_fixed_aft <- profmem::profmem({
  t_fixed_aft <- system.time({
    fl_fixed_after <- flash_add_fixed_factors(data_missing, FF,
                                              backfit=TRUE,
                                              verbose=FALSE)
  })
})Finally, I run the code from the master branch. (I run this code second so that none of the performance gains can be attributed to caching.)
system("cd ~/GitHub/flashr; git checkout master")
devtools::load_all("~/GitHub/flashr")Loading flashrdata <- flash_set_data(strong)
data_fixS <- flash_set_data(strong, S = 1)
data_missing <- flash_set_data(strong_missing)
mem_bycol_bef <- profmem::profmem({
  t_bycol_bef <- system.time({
    fl_bycol_before <- flash(data, Kmax=K,
                             var_type="by_column",
                             backfit=TRUE, verbose=FALSE)
  })
})
mem_const_bef <- profmem::profmem({
  t_const_bef <- system.time({
    fl_const_before <- flash(data, Kmax=K,
                             var_type="constant",
                             backfit=TRUE, verbose=FALSE)
  })
})
mem_zero_bef <- profmem::profmem({
  t_zero_bef <- system.time({
    fl_zero_before <- flash(data_fixS, Kmax=K, 
                            var_type="zero",
                            backfit=TRUE, verbose=FALSE)
  })
})
mem_miss_bef <- profmem::profmem({
  t_miss_bef <- system.time({
    fl_miss_before <- flash(data_missing, Kmax=K, 
                            var_type="constant",
                            backfit=TRUE, verbose=FALSE)
  })
})
mem_fixed_bef <- profmem::profmem({
  t_fixed_bef <- system.time({
    fl_fixed_before <- flash_add_fixed_factors(data_missing, FF, 
                                               backfit=TRUE,
                                               verbose=FALSE)
  })
})The elapsed time per trial (in seconds) is as follows.
rnames <- c("before", "after")
cnames <- c("by_column", "constant", "zero", "missing", "fixed")
t_table <- t(matrix(c(t_bycol_bef[3], 
                      t_const_bef[3], 
                      t_zero_bef[3], 
                      t_miss_bef[3],
                      t_fixed_bef[3],
                      t_bycol_aft[3], 
                      t_const_aft[3],
                      t_zero_aft[3], 
                      t_miss_aft[3],
                      t_fixed_aft[3]),
                    nrow=5, ncol=2))
rownames(t_table) <- rnames
colnames(t_table) <- cnames
round(t_table, digits = 1)       by_column constant  zero missing fixed
before      56.4     83.8 104.6    57.2  13.9
after       33.3     44.1  56.1    45.9  11.3The memory used per trial (in GB) is as follows.
mem_table <- t(matrix(c(sum(mem_bycol_bef$bytes, na.rm=TRUE),
                        sum(mem_const_bef$bytes, na.rm=TRUE),
                        sum(mem_zero_bef$bytes, na.rm=TRUE),
                        sum(mem_miss_bef$bytes, na.rm=TRUE),
                        sum(mem_fixed_bef$bytes, na.rm=TRUE),
                        sum(mem_bycol_aft$bytes, na.rm=TRUE),
                        sum(mem_const_aft$bytes, na.rm=TRUE),
                        sum(mem_zero_aft$bytes, na.rm=TRUE),
                        sum(mem_miss_aft$bytes, na.rm=TRUE),
                        sum(mem_fixed_aft$bytes, na.rm=TRUE)),
                      nrow=5, ncol=2)) / 1024^3
rownames(mem_table) <- rnames
colnames(mem_table) <- cnames
round(mem_table, digits = 1)       by_column constant zero missing fixed
before      35.6     51.7 67.0    36.4   6.7
after       22.7     29.6 35.4    29.7   6.3Finally, I run a quick check to verify that I am getting the same flash object in each case.
all.equal(fl_bycol_before$objective, fl_bycol_after$objective)[1] TRUEall.equal(fl_const_before$objective, fl_const_after$objective)[1] TRUEall.equal(fl_zero_before$objective, fl_zero_after$objective)[1] TRUEall.equal(fl_miss_before$objective, fl_miss_after$objective)[1] TRUEall.equal(fl_fixed_before$objective, fl_fixed_after$objective)[1] TRUEsessionInfo()R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     
other attached packages:
[1] flashr_0.6-1  profmem_0.5.0 ebnm_0.1-14  
loaded via a namespace (and not attached):
 [1] Rcpp_0.12.18        pillar_1.2.1        plyr_1.8.4         
 [4] compiler_3.4.3      git2r_0.21.0        workflowr_1.0.1    
 [7] R.methodsS3_1.7.1   R.utils_2.6.0       iterators_1.0.9    
[10] tools_3.4.3         testthat_2.0.0      digest_0.6.15      
[13] tibble_1.4.2        gtable_0.2.0        evaluate_0.10.1    
[16] memoise_1.1.0       lattice_0.20-35     rlang_0.2.0        
[19] Matrix_1.2-12       foreach_1.4.4       commonmark_1.4     
[22] yaml_2.1.17         parallel_3.4.3      withr_2.1.1.9000   
[25] stringr_1.3.0       roxygen2_6.0.1.9000 xml2_1.2.0         
[28] knitr_1.20          devtools_1.13.4     rprojroot_1.3-2    
[31] grid_3.4.3          R6_2.2.2            rmarkdown_1.8      
[34] reshape2_1.4.3      ggplot2_2.2.1       ashr_2.2-13        
[37] magrittr_1.5        whisker_0.3-2       scales_0.5.0       
[40] backports_1.1.2     codetools_0.2-15    htmltools_0.3.6    
[43] MASS_7.3-48         softImpute_1.4      colorspace_1.3-2   
[46] stringi_1.1.6       lazyeval_0.2.1      munsell_0.4.3      
[49] doParallel_1.0.11   pscl_1.5.2          truncnorm_1.0-8    
[52] SQUAREM_2017.10-1   R.oo_1.21.0        This reproducible R Markdown analysis was created with workflowr 1.0.1