Last updated: 2018-07-23

workflowr checks: (Click a bullet for more information)
Expand here to see past versions:


Pollen

i = 2

load('../data/unnecessary_in_building/2_Pollen.RData')
X        = as.matrix(Pollen$x)
truth    = as.numeric(as.factor(Pollen$label))
numClust = length(unique(truth))
rm(Pollen)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(cbind(rep(1,length(det)), det))
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Pollen-1.png:
Version Author Date
5787a2e tk382 2018-07-17
b7e4475 tk382 2018-07-16

# tsne1 = Rtsne(t(R))
# tsne2 = Rtsne(t(logX))

# dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
# ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.7755788
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.7755788
plot(res1$tsne, col=rainbow(numClust)[res1$result])

Expand here to see past versions of Pollen-2.png:
Version Author Date
5787a2e tk382 2018-07-17
b7e4475 tk382 2018-07-16

plot(res2$tsne, col=rainbow(numClust)[res2$result])

Usoskin

i = 3

load('../data/unnecessary_in_building/3_Usoskin.RData')
X        = as.matrix(Usoskin$X)
truth    = as.numeric(as.factor(as.character(Usoskin$lab1)))
numClust = 4
rm(Usoskin)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

plot(irlba(logX,1)$v[,1]~log(det))

Expand here to see past versions of Usoskin-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

det2 = qr(cbind(rep(1, length(det)), log(det)))
R = t(qr.resid(det2, t(logX)))

# pca1 = irlba(R,2); pca2 = irlba(logX,2)
# dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
# ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")
# 
# tsne1 = Rtsne(t(R), perplexity=20)
# tsne2 = Rtsne(t(logX))
# 
# dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
# ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.5574876
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.5215175

Buettner

i = 4

#read data
load('../data/unnecessary_in_building/4_Buettner.RData')
X        = as.matrix(Buettner$X)
truth    = as.numeric(as.factor(Buettner$label))
numClust = 3
rm(Buettner)

logX = log(X+1)
det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Buettner-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=20)
tsne2 = Rtsne(t(logX), perplexity=20)

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Buettner-2.png:
Version Author Date
b7e4475 tk382 2018-07-16

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.4665139
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.5680807

Yan

i = 5

load('../data/unnecessary_in_building/5_Yan.rda')
X        = as.matrix(yan)
truth    = as.character(ann$cell_type1)
truth    = as.numeric(as.factor(truth))
numClust = 6
rm(ann, yan)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Yan-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=20)
tsne2 = Rtsne(t(logX), perplexity=20)

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Yan-2.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.6517271
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.8954618

Treutlein

i = 6

load('../data/unnecessary_in_building/6_Treutlein.rda')
X        = as.matrix(treutlein)
truth    = as.numeric(colnames(treutlein))
ind      = sort(truth, index.return=TRUE)$ix
X        = X[,ind]
truth    = truth[ind]
numClust = length(unique(truth))
rm(treutlein)
logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(cbind(log(det), rep(1, length(det))))
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Treutlein-1.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R), perplexity=10)
tsne2 = Rtsne(t(logX), perplexity=10)

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Treutlein-2.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.326254
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.4426981

Chu (cell type)

i = 7

load('../data/unnecessary_in_building/7_Chu_celltype.Rdata')
X        = as.matrix(Chu_celltype$X)
truth    = as.numeric(as.factor(Chu_celltype$label))
numClust = 7
rm(Chu_celltype)

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Chu_celltype-1.png:
Version Author Date
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R))
tsne2 = Rtsne(t(logX))

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Chu_celltype-2.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.6712136
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.9556889

Chu (timecourse)

i = 8
load('../data/unnecessary_in_building/8_Chu_timecourse.Rdata')
X        = as.matrix(Chu_timecourse$X)
truth    = as.numeric(as.factor(Chu_timecourse$label))
numClust = length(unique(truth))

logX = log(X+1)

det = colSums(X!=0) / nrow(X)

det2 = qr(det)
R = t(qr.resid(det2, t(logX)))

pca1 = irlba(R,2); pca2 = irlba(logX,2)
dat = data.frame(pc1=c(pca1$v[,1], pca2$v[,1]), detection.rate=rep(det, 2), label=rep(c("After correction", "Before correction"), each=nrow(pca1$v)), true.label=as.factor(rep(truth,2)))
ggplot(dat, aes(x=pc1, y=detection.rate, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("PCA")

Expand here to see past versions of Chu_timecourse-1.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

tsne1 = Rtsne(t(R))
tsne2 = Rtsne(t(logX))

dat = data.frame(v1 = c(tsne1$Y[,1], tsne2$Y[,1]), v2 = c(tsne1$Y[,2], tsne2$Y[,2]), label=rep(c("After correction", "Before correction"), each=nrow(tsne1$Y)), true.label = as.factor(rep(truth, 2)))
ggplot(dat, aes(x=v1, y=v2, col=true.label)) + facet_grid(~label) + geom_point() + ggtitle("tSNE")

Expand here to see past versions of Chu_timecourse-2.png:
Version Author Date
36ebfc2 tk382 2018-07-16
b7e4475 tk382 2018-07-16

set.seed(1)
res1 = SLSL(X, log=T, filter=F, correct_detection_rate = T, numClust = numClust)
adj.rand.index(res1$result, truth)
[1] 0.6992235
set.seed(1)
res2 = SLSL(X, log=T, filter=F, correct_detection_rate = F, numClust = numClust)
adj.rand.index(res2$result, truth)
[1] 0.6997664

Session information

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.5

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] stargazer_5.2.2         abind_1.4-5            
 [3] broom_0.4.5             diceR_0.5.1            
 [5] Rtsne_0.13              fossil_0.3.7           
 [7] shapefiles_0.7          foreign_0.8-70         
 [9] maps_3.3.0              sp_1.2-7               
[11] reshape_0.8.7           dplyr_0.7.6            
[13] ggplot2_3.0.0           irlba_2.3.2            
[15] Matrix_1.2-14           quadprog_1.5-5         
[17] inline_0.3.15           matrixStats_0.53.1     
[19] SCNoisyClustering_0.1.0

loaded via a namespace (and not attached):
 [1] tidyselect_0.2.4  reshape2_1.4.3    purrr_0.2.4      
 [4] lattice_0.20-35   colorspace_1.3-2  htmltools_0.3.6  
 [7] yaml_2.1.19       rlang_0.2.0       R.oo_1.22.0      
[10] pillar_1.2.2      glue_1.2.0        withr_2.1.2      
[13] R.utils_2.6.0     bindrcpp_0.2.2    plyr_1.8.4       
[16] bindr_0.1.1       stringr_1.3.0     munsell_0.4.3    
[19] gtable_0.2.0      workflowr_1.1.1   R.methodsS3_1.7.1
[22] codetools_0.2-15  psych_1.8.3.3     evaluate_0.10.1  
[25] labeling_0.3      knitr_1.20        parallel_3.5.1   
[28] Rcpp_0.12.16      scales_0.5.0      backports_1.1.2  
[31] mnormt_1.5-5      digest_0.6.15     stringi_1.1.7    
[34] grid_3.5.1        rprojroot_1.3-2   tools_3.5.1      
[37] magrittr_1.5      lazyeval_0.2.1    tibble_1.4.2     
[40] tidyr_0.8.0       whisker_0.3-2     pkgconfig_2.0.1  
[43] assertthat_0.2.0  rmarkdown_1.9     R6_2.2.2         
[46] mclust_5.4        nlme_3.1-137      git2r_0.21.0     
[49] compiler_3.5.1   

This reproducible R Markdown analysis was created with workflowr 1.1.1