Last updated: 2018-07-17

workflowr checks: (Click a bullet for more information)
Expand here to see past versions:


Pollen

i = 2

load('../data/unnecessary_in_building/2_Pollen.RData')
X        = as.matrix(Pollen$x)
truth    = as.numeric(as.factor(Pollen$label))
numClust = length(unique(truth))
logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Pollen-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R))
tsne2 = Rtsne(t(logX))

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Pollen-2.png:
Version Author Date
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.7755788
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.8325414
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.7665684

Usoskin

i = 3

load('../data/unnecessary_in_building/3_Usoskin.RData')
X        = as.matrix(Usoskin$X)
truth    = as.numeric(as.factor(as.character(Usoskin$lab1)))
numClust = 4
rm(Usoskin)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

plot(irlba(logX,1)$v[,1]~log(det))

Expand here to see past versions of Usoskin-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

det2 = qr(cbind(rep(1, length(det)), log(det)))
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Usoskin-2.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=20)
tsne2 = Rtsne(t(logX))

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Usoskin-3.png:
Version Author Date
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.6188269
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.8746858
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.6348444
rm(R,X,logX,res1,res2,res3); gc()
          used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
Ncells 2221109 118.7    3972565  212.2         NA   3972565  212.2
Vcells 5410323  41.3  166030754 1266.8      16384 207538345 1583.4

Buettner

i = 4

#read data
load('../data/unnecessary_in_building/4_Buettner.RData')
X        = as.matrix(Buettner$X)
truth    = as.numeric(as.factor(Buettner$label))
numClust = 3
rm(Buettner)

logX = log(X+1)
det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Buettner-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=20)
tsne2 = Rtsne(t(logX), perplexity=20)

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Buettner-2.png:
Version Author Date
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.4329975
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.428236
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.4145447
rm(R,X,logX,res1,res2,res3); gc()
          used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
Ncells 2203793 117.7    3972484  212.2         NA   3972484  212.2
Vcells 5419519  41.4  138305840 1055.2      16384 172882188 1319.0

Yan

i = 5

load('../data/unnecessary_in_building/5_Yan.rda')
X        = as.matrix(yan)
truth    = as.character(ann$cell_type1)
truth    = as.numeric(as.factor(truth))
numClust = 6
rm(ann, yan)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Yan-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=20)
tsne2 = Rtsne(t(logX), perplexity=20)

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Yan-2.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.8954618
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.8954618
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.675345
rm(R,X,logX,res1,res2,res3); gc()
          used  (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
Ncells 2236314 119.5    3972484 212.2         NA   3972484  212.2
Vcells 5358557  40.9  110644672 844.2      16384 172882188 1319.0

Treutlein

i = 6

load('../data/unnecessary_in_building/6_Treutlein.rda')
X        = as.matrix(treutlein)
truth    = as.numeric(colnames(treutlein))
ind      = sort(truth, index.return=TRUE)$ix
X        = X[,ind]
truth    = truth[ind]
numClust = length(unique(truth))
rm(treutlein)
logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(cbind(log(det), rep(1, length(det))))
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Treutlein-1.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=10)
tsne2 = Rtsne(t(logX), perplexity=10)

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Treutlein-2.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.3672819
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.4136064
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.3488583
rm(R,X,logX,res1,res2,res3); gc()
          used  (Mb) gc trigger  (Mb) limit (Mb)  max used   (Mb)
Ncells 2225549 118.9    3972484 212.2         NA   3972484  212.2
Vcells 5350667  40.9   88515737 675.4      16384 172882188 1319.0

Chu (cell type)

i = 7

load('../data/unnecessary_in_building/7_Chu_celltype.Rdata')
X        = as.matrix(Chu_celltype$X)
truth    = as.numeric(as.factor(Chu_celltype$label))
numClust = 7
rm(Chu_celltype)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Chu_celltype-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R))
tsne2 = Rtsne(t(logX))

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Chu_celltype-2.png:
Version Author Date
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.9900038
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.9956408
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.7612027
rm(R,X,logX,res1,res2,res3); gc()
          used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
Ncells 2259138 120.7    3972565  212.2         NA   3972565  212.2
Vcells 5473999  41.8  207597943 1583.9      16384 259497187 1979.9

Chu (timecourse)

i = 8
load('../data/unnecessary_in_building/8_Chu_timecourse.Rdata')
X        = as.matrix(Chu_timecourse$X)
truth    = as.numeric(as.factor(Chu_timecourse$label))
numClust = length(unique(truth))

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Chu_timecourse-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R))
tsne2 = Rtsne(t(logX))

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Chu_timecourse-2.png:
Version Author Date
b7e4475 tk382 2018-07-16

set.seed(1); res1 = SLSL(R, log=F, filter=F, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.7321747
set.seed(1); res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.7276994
set.seed(1); res3 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res3$result, truth)
[1] 0.7145637
rm(R,X,logX,res1,res2,res3); gc()
           used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
Ncells  2260642 120.8    3972565  212.2         NA   3972565  212.2
Vcells 20014295 152.7  199358024 1521.0      16384 259497187 1979.9

Session information

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.5

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] stargazer_5.2.2         abind_1.4-5            
 [3] broom_0.4.5             diceR_0.5.1            
 [5] Rtsne_0.13              fossil_0.3.7           
 [7] shapefiles_0.7          foreign_0.8-70         
 [9] maps_3.3.0              sp_1.2-7               
[11] reshape_0.8.7           dplyr_0.7.6            
[13] ggplot2_3.0.0           irlba_2.3.2            
[15] Matrix_1.2-14           quadprog_1.5-5         
[17] inline_0.3.15           matrixStats_0.53.1     
[19] SCNoisyClustering_0.1.0

loaded via a namespace (and not attached):
 [1] tidyselect_0.2.4  reshape2_1.4.3    purrr_0.2.4      
 [4] lattice_0.20-35   colorspace_1.3-2  htmltools_0.3.6  
 [7] yaml_2.1.19       rlang_0.2.0       R.oo_1.22.0      
[10] pillar_1.2.2      glue_1.2.0        withr_2.1.2      
[13] R.utils_2.6.0     bindrcpp_0.2.2    plyr_1.8.4       
[16] bindr_0.1.1       stringr_1.3.0     munsell_0.4.3    
[19] gtable_0.2.0      workflowr_1.1.1   R.methodsS3_1.7.1
[22] psych_1.8.3.3     evaluate_0.10.1   labeling_0.3     
[25] knitr_1.20        parallel_3.5.1    Rcpp_0.12.16     
[28] scales_0.5.0      backports_1.1.2   mnormt_1.5-5     
[31] digest_0.6.15     stringi_1.1.7     grid_3.5.1       
[34] rprojroot_1.3-2   tools_3.5.1       magrittr_1.5     
[37] lazyeval_0.2.1    tibble_1.4.2      tidyr_0.8.0      
[40] whisker_0.3-2     pkgconfig_2.0.1   assertthat_0.2.0 
[43] rmarkdown_1.9     R6_2.2.2          mclust_5.4       
[46] nlme_3.1-137      git2r_0.21.0      compiler_3.5.1   

This reproducible R Markdown analysis was created with workflowr 1.1.1