Last updated: 2018-03-06
Code version: e16e34a
library(flashr); library(R.matlab)
R.matlab v3.6.1 (2016-10-19) successfully loaded. See ?R.matlab for help.
Attaching package: 'R.matlab'
The following objects are masked from 'package:base':
getOption, isOpen
library(denoiseR)
Presidential Address data:
The data contains word counts from the inaugural addresses of 13 US presidents (1940–2009). There are 13 rows (presidents) and 836 columns (words), and each entry indicates the number of times a particular word was used in a particular address. Since both row and column means vary greatly here we pre-processed the data by centering and scaling both rows and columns, using the biScale function from softImpute.
data("Presidents")
data = readMat('../data/PresiAdd.mat')
data = data$Yscaled
words = row.names(Presidents)
names = colnames(Presidents)
row.names(data) = names
colnames(data) = words
Flash:
flash.data = flash_set_data(data)
fmodel = flash(flash.data, greedy = TRUE, backfit = TRUE)
fitting factor/loading 1
fitting factor/loading 2
fitting factor/loading 3
fitting factor/loading 4
fitting factor/loading 5
fitting factor/loading 6
saveRDS(fmodel, '../output/PresiAddVarCol.rds')
Factors = flash_get_ldf(fmodel)$f
row.names(Factors) = words
pve.order = order(flash_get_pve(fmodel), decreasing = TRUE)
par(mar=c(1,1,1,1))
par(mfrow=c(3,2))
for(i in pve.order){
barplot(Factors[,i], main=paste0('Factor ',i, ' pve= ', round(flash_get_pve(fmodel)[i],3)), las=2, names='')
}
par(mfrow=c(1,1))
Flash again on the loading matrix
flash.loading = flash_set_data(fmodel$EL[,1:5])
flmodel = flash(flash.loading, greedy = TRUE, backfit = TRUE)
fitting factor/loading 1
fitting factor/loading 2
Factors_loading = flash_get_ldf(flmodel)$f
barplot(as.numeric(Factors_loading), main=paste0('Factor 1 pve= ', round(flash_get_pve(flmodel),3)), las=2, cex.names = 0.4, names='')
The distributions for the first four columns of the loading matrix are consistent with a normal distribution. So they are captured by the error term of the second flash run.
It is hard to interpret each factor, none of them is sparse. For example, for factor 1 in the first flash run,
words[Factors[,1] > 0]
[1] "$1" "0" "Asia "
[4] "European" "Iraq" "Iraqa"
[7] "Iraqi" "Medicare" "Vietnam"
[10] "a" "ability" "about"
[13] "abroad " "accept " "access "
[16] "accomplish " "account" "achievement"
[19] "add " "addition" "address"
[22] "adopt" "affair " "affect "
[25] "afford " "afghanistan" "africa "
[28] "ago " "agree" "aim "
[31] "air " "alliance" "allow"
[34] "alone" "always " "and "
[37] "answer " "approach" "appropriate"
[40] "approve" "army " "assist "
[43] "attempt" "attention " "available "
[46] "average" "avoid" "away "
[49] "back " "bad " "balance"
[52] "balanced" "bank " "basis"
[55] "bear " "before " "beginning "
[58] "being" "big " "bill "
[61] "bipartisan " "border " "branch "
[64] "break" "broad" "building"
[67] "burden " "but " "buy "
[70] "ca" "capacity" "capital"
[73] "case " "century" "chamber"
[76] "chance " "china" "choice "
[79] "choose " "civilian" "clean"
[82] "clearly" "close" "college"
[85] "commit " "committee " "common "
[88] "company" "competition" "competitive"
[91] "comprehensive" "conference " "confidence "
[94] "conflict" "confront" "conservation "
[97] "constitution " "construction " "consumer"
[100] "continued " "contribute " "contribution "
[103] "courage" "coverage" "credit "
[106] "crime" "crisis " "critical"
[109] "current" "deal " "decade "
[112] "decision" "decline" "deep "
[115] "defend " "deficit" "demonstrate"
[118] "depend " "destroy" "determination"
[121] "difference " "different " "difficult "
[124] "direct " "direction " "directly"
[127] "disease" "domestic" "double "
[130] "down " "dream" "drug "
[133] "duty " "early" "earn "
[136] "earth" "easy " "effect "
[139] "efficiency " "efficient " "election"
[142] "eliminate " "emergency " "employee"
[145] "enable " "enact" "energy "
[148] "enforcement" "enough " "ensure "
[151] "enter" "entire " "environment"
[154] "equal" "era " "especially "
[157] "event" "example" "expansion "
[160] "experience " "export " "facility"
[163] "fail " "fall " "fear "
[166] "fellow " "final" "finally"
[169] "finance" "financial " "form "
[172] "foundation " "friend " "fulfill"
[175] "fully" "fundamental" "funding"
[178] "gain " "generation " "get "
[181] "global " "god " "guarantee "
[184] "half " "hand " "happen "
[187] "hard " "hear " "heart"
[190] "hemisphere " "historic" "hold "
[193] "honor" "hospital" "hour "
[196] "house" "how " "idea "
[199] "ideal" "i" "immediate "
[202] "importance " "improvement" "in"
[205] "incentive " "independence " "influence "
[208] "information" "instead" "institution"
[211] "intend " "invest " "investment "
[214] "involve" "issue" "japan"
[217] "key " "kind " "korea"
[220] "launch " "lay " "leadership "
[223] "least" "leave" "legislative"
[226] "liberty" "lie " "like "
[229] "limit" "line " "little "
[232] "living " "loan " "longer "
[235] "look " "lose " "majority"
[238] "matter " "middle " "might"
[241] "minimum" "missile" "moment "
[244] "money" "moral" "mr"
[247] "mutual " "n" "natural"
[250] "nearly " "negotiation" "neighbor"
[253] "no" "north" "nothing"
[256] "obligation " "offer" "office "
[259] "often" "oil " "once "
[262] "one " "open " "operation "
[265] "out " "parent " "particular "
[268] "particularly " "partner" "partnership"
[271] "party" "payment" "peacetime "
[274] "permanent " "person " "personal"
[277] "plant" "play " "pledge "
[280] "police " "poor " "position"
[283] "poverty" "powerful" "practice"
[286] "prepare" "preserve" "press"
[289] "priority" "product" "productive "
[292] "profit " "promise" "proposal"
[295] "proud" "prove" "provision "
[298] "pursue " "put " "quality"
[301] "question" "race " "range"
[304] "rapidly" "ready" "realize"
[307] "reason " "receive" "recent "
[310] "recession " "recognize " "record "
[313] "recovery" "reform " "region "
[316] "regulation " "relationship " "remember"
[319] "remove " "renew" "represent "
[322] "representative " "republic" "request"
[325] "requirement" "reserve" "resolve"
[328] "responsible" "rest " "restore"
[331] "retirement " "return " "revenue"
[334] "review " "revolution " "reward "
[337] "risk " "road " "role "
[340] "rule " "run " "rural"
[343] "sacrifice " "safe " "saving "
[346] "science" "sector " "senate "
[349] "send " "sense" "serious"
[352] "session" "short" "side "
[355] "sign " "significant" "simple "
[358] "simply " "single " "situation "
[361] "skill" "solve" "something "
[364] "soon " "sound" "source "
[367] "south" "space" "speak"
[370] "speaker" "specific" "speed"
[373] "spend" "spending" "stability "
[376] "stable " "stand" "start"
[379] "steady " "stop " "strategic "
[382] "strategy" "street " "strike "
[385] "structure " "struggle" "student"
[388] "subject" "submit " "succeed"
[391] "success" "successful " "sure "
[394] "t" "talk " "taxis"
[397] "taxpayer" "teacher" "technology "
[400] "tell " "term " "terror "
[403] "terrorist " "test " "thank"
[406] "there" "thousand" "threaten"
[409] "thus " "tomorrow" "too "
[412] "tough" "training" "transportation "
[415] "troop" "truly" "trust"
[418] "turn " "understand " "understanding"
[421] "unity" "up" "vast "
[424] "vice " "victory" "view "
[427] "vote " "washington " "waste"
[430] "we" "week " "welcome"
[433] "western" "whatever" "whose"
[436] "why " "win " "word "
[439] "young"
The first factor separate the words above from others. But it is hard to summarize the feature of these words.
The fifth factor separate the words below from others.
words[Factors[,5] > 0.1]
[1] "address" "chance " "difference " "look " "message"
[6] "place" "set " "structure " "truly" "washington "
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] denoiseR_1.0 R.matlab_3.6.1 flashr_0.5-6
loaded via a namespace (and not attached):
[1] Rcpp_0.12.15 compiler_3.4.3 git2r_0.20.0
[4] plyr_1.8.4 R.utils_2.6.0 R.methodsS3_1.7.1
[7] iterators_1.0.9 tools_3.4.3 digest_0.6.13
[10] evaluate_0.10.1 tibble_1.3.4 gtable_0.2.0
[13] lattice_0.20-35 rlang_0.1.6 Matrix_1.2-12
[16] foreach_1.4.4 yaml_2.1.17 parallel_3.4.3
[19] ebnm_0.1-10 cluster_2.0.6 stringr_1.3.0
[22] knitr_1.20 flashClust_1.01-2 scatterplot3d_0.3-40
[25] rprojroot_1.2 grid_3.4.3 rmarkdown_1.8
[28] irlba_2.3.2 FactoMineR_1.39 ggplot2_2.2.1
[31] ashr_2.2-7 magrittr_1.5 leaps_3.0
[34] backports_1.1.2 scales_0.5.0 codetools_0.2-15
[37] htmltools_0.3.6 MASS_7.3-47 assertthat_0.2.0
[40] softImpute_1.4 colorspace_1.3-2 stringi_1.1.6
[43] lazyeval_0.2.1 pscl_1.5.2 doParallel_1.0.11
[46] munsell_0.4.3 truncnorm_1.0-8 SQUAREM_2017.10-1
[49] R.oo_1.21.0
This R Markdown site was created with workflowr