theme_umap <- function() {
theme_void() +
theme(panel.background = element_rect(fill = "grey", color = "black", size = 2),
legend.position = c(1,0),
legend.justification = c(1.1,0),
legend.direction = "horizontal",
legend.text = element_text(angle = 45, hjust = 1, vjust = 1),
legend.title = element_blank(),
plot.margin = unit(c(1,1,1,1), "mm"))
}
# function to overlay values onto the UMAP embeddings
hex_plots <-
function(data,
dim1,
dim2,
color,
facets = NULL,
CI = 0.05,
fun = "mean",
bins = 200,
divergent_scale = F) {
message("calc CI")
# calculate the quantilles for CI limits onthe color scale.
if (CI) {
lims <- quantile(pull(data, color), probs = c(CI / 2, 1 - CI / 2))
# if user supplied CI leads to 0 values ef no range, set automatically to 1%
if (lims[2] == 0) {
CI <- 0.01
lims <-
quantile(pull(data, color), probs = c(CI / 2, 1 - CI / 2))
}
# if still 0 values still, do not set any CI
if (lims[2] == 0) {
lims <- c(NA, NA)
}
} else {
lims <- c(NA, NA)
}
message("compute plot")
plot <- ggplot(data, aes_string(dim1, dim2)) +
stat_summary_hex(
bins = bins,
fun = fun,
aes(z = !!ensym(color), color = ..value..),
lwd = 0.1
) +
ggtitle(color) +
theme_mrl() +
guides(fill = guide_none()) +
theme_umap()
if (divergent_scale) {
plot <- plot +
scale_fill_gradient2(
midpoint = 0.5,
low = "royalblue",
mid = "grey80",
high = "red4",
limits = lims,
oob = squish
) +
scale_color_gradient2(
midpoint = 0.5,
low = "royalblue",
mid = "grey80",
high = "red4",
limits = lims,
oob = squish
)
} else {
plot <- plot +
scale_fill_viridis(limits = lims, oob = squish) +
scale_color_viridis(limits = lims, oob = squish)
}
if (is.null(facets)) {
return(plot)
} else {
return(plot + facet_wrap(as.formula(facets)))
}
}
ridges_plots <- function(data,dim1,dim2) {
ggplot(data,
aes_string(dim1,
dim2)) +
stat_density_ridges(aes(fill = factor(stat(quantile))),
geom = "density_ridges_gradient", calc_ecdf = TRUE,
quantiles = c(0.01,0.05, 0.1,0.9,0.95,0.99), quantile_lines = TRUE,
color = NA) +
facet_grid(curated_types~., scales = "free_y", space = "free_y") +
scale_fill_manual(labels = c("0.01","0.05", "0.1","0.1-0.9","0.9","0.95","0.99"),
values = c("black","grey80","black","grey80","black","grey80","black"))
}
SingleR_heatmap <- function(singleR_results, meta, k, keepBest = T) {
# Input
# singleR results: a list with 1/ scores matrix (rows = cluster/cells, col = refs), 2/ calls table on each cells (cells as rows)
# metadata table with cell barcodes (or cluster calls) as rownames
# k parameter for cutree, in order to split the dendrogram of cells/clusters
library(ComplexHeatmap)
# add barcodes back to the scores matrix
rownames(singleR_results[[1]]) <- rownames(singleR_results[[2]])
#print(rownames(singleR_results[[1]]))
#print(rownames(meta))
#enforce ordering of cells in the same in scores matrix and metadata
singleR_results <- singleR_results[[1]][rownames(meta),]
# scale scores per cell to % of max score
singleR_results <- singleR_results / matrixStats::rowMaxs(singleR_results)
# Only keep refs that have best score for at least one cells
if(keepBest) {
singleR_results <- singleR_results[,matrixStats::colMaxs(singleR_results) == 1]
}
# Print final dim before plotting
cat("Dimension of matrix to plot after filering",dim(singleR_results))
# hierrachical clustering
row_clust <- hclust(dist(t(singleR_results)))
col_clust <- hclust(dist(singleR_results))
# cut tree and assign color for heatmap annotation
cluster_split <- paste0("cluster ", cutree(col_clust, k = k))
cluster_col <- ggsci::pal_futurama()(k)
names(cluster_col) <- unique(cluster_split)
#plot heatmap
Heatmap(t(singleR_results),
col = c(viridis(1000),"#ff0000"),
show_column_names = F,
bottom_annotation = HeatmapAnnotation(meta),
top_annotation = HeatmapAnnotation("cluster" = cluster_split,
col = list("cluster" =cluster_col)),
cluster_rows = row_clust,
cluster_columns = col_clust,
use_raster = T,
raster_quality = 1)
}
### plotting function draft for singleR objects
plot_singleR_heatmap <- function(singleR_results, scale = T, kmeans = 10, return_scores = F) {
#### usage options
# scale: scale cells to max score fraction. Useful to highlight best score and minimize library size effect
# kmeans: apply kmeans to cluster cells. faster than hierarchical clustering which too slow when dealing with many cells
# TODO: add option to do hierarchical clustering on the kmeans cluster
# TODO: add option to print max score in heatmap given matrix size in not too big
if("SummarizedExperiment" %in% class(singleR_results)) {
# if raw singleR output
# convert to matrix and metadata
scores = as.matrix(singleR_results$scores)
calls = as.data.frame(singleR_results[, colnames(singleR_results) != "scores"])
} else {
# if singleR output converted to list of scores and meta, extract each component
scores = singleR_results$scores
calls = singleR_results$calls
}
# add rownames t score matrix
rownames(scores) <- rownames(calls)
# scale per cell at max value
scores_scaled <- scores / matrixStats::rowMaxs(scores)
#labels of actual best rho scre if printing score on heatmap. Not used rn
labels <- (scores_scaled == 1) * scores %>% round(digits = 2)
labels[labels == 0] <- ""
if(scale) {
mat <- scores_scaled
} else {
mat <- scores
}
if(return_scores) {
return(mat)
}
hm <- ComplexHeatmap::Heatmap(
mat,
col = c(viridis(100), "red"),
km = kmeans,
cluster_rows = F,
show_row_names = F,
use_raster = T
)
hm <- draw(hm)
clustering_results <- ComplexHeatmap::row_order(hm) %>%
enframe("hm_cluster", "id") %>%
unnest()
clustering_results <- data.frame(cell.id = rownames(mat)) %>%
tibble::rowid_to_column("id") %>%
left_join(clustering_results)
}
Introduction
Here we will visualize the results from the initial analyses:
clustering, marker genes, clusters compositional variations.
# import clustering and umap data
seurat <- qread("results/NK_analysis/seurat_objects/analysisNK_Ruchan.qs")
meta <- seurat@meta.data %>%
tibble::rownames_to_column("cell.id") %>%
left_join(
seurat@reductions$umap@cell.embeddings %>% as.data.frame() %>% tibble::rownames_to_column("cell.id")
)
DimPlot(seurat)

technical metrics
umap_settings <- paste0("UMAP_", c(1, 2))
p <- ggplot(meta, aes_string(umap_settings[1], umap_settings[2])) +
geom_hex(bins = 200, fill = "grey")
background_tiles <- ggplot_build(p)$data[[1]] %>%
dplyr::select(x,y) %>%
geom_point(data = ., aes(x,y), color = "grey", size = rel(0.5))
meta %>%
#mutate(percent.rb = log1p(percent.rb)) %>%
hex_plots(umap_settings[1],
umap_settings[2],
"percent.rb",
CI = 0.01,
fun = "mean")

cat("\n\n")
meta %>%
#mutate(percent.rb = log1p(percent.rb)) %>%
hex_plots(umap_settings[1],
umap_settings[2],
"percent.mt",
CI = 0.01,
fun = "mean")

Ruchan’s scoring calls:
meta %>%
group_by(predicted.celltype.l2) %>%
dplyr::filter(n() > 1) %>%
ungroup() %>%
{
ggplot(., aes_string(umap_settings[1], umap_settings[2])) +
geom_hex(aes(fill = predicted.celltype.l2, alpha = log(..ndensity..)),
bins = 200) +
geom_density2d(color = "black", bins = 8) +
scale_alpha(range = c(0.2, 1)) +
theme_mrl() +
theme(panel.background = element_rect(fill = 'grey90', color = NA)) +
scale_fill_manual(values = rev(palette)) +
scale_color_manual(values = rev(palette)) +
geom_label_repel(
data = . %>%
group_by(predicted.celltype.l2) %>%
summarize(
!!umap_settings[1] := median(UQ(sym(umap_settings[1]))),!!umap_settings[2] := median(UQ(sym(umap_settings[2])))
),
aes(fill = predicted.celltype.l2,
label = predicted.celltype.l2),
size = 6
)
}

Clustering
Final calls
meta <- meta %>%
mutate(curated_cluster = seurat_clusters) %>%
mutate(emulsion = orig.ident)
labels <- data.frame(
"1" = "",
"2" = "",
"3" = "",
"4" = "",
"5" = "bright",
"6" = "",
"7" = "",
"8" = "BM resident",
"9" = "proliferating"
) %>% pivot_longer(everything(), names_to = "curated_cluster", values_to = "cluster_label") %>%
mutate(curated_cluster = as.factor(parse_number(curated_cluster)))
meta %>%
left_join(labels) %>%
mutate(cluster_label = fct_reorder(paste(
curated_cluster, cluster_label
), as.numeric(curated_cluster))) %>%
{
ggplot(., aes_string(umap_settings[1], umap_settings[2])) +
geom_hex(aes(fill = cluster_label, alpha = log(..ndensity..)),
bins = 200) +
geom_density2d(
aes(group = cluster_label),
bins = 4,
contour_var = "ndensity",
color = "black",
size = 2
) +
geom_density2d(aes(color = cluster_label),
bins = 4,
contour_var = "ndensity") +
scale_alpha(range = c(0.2, 1)) +
theme_mrl() +
theme(panel.background = element_rect(fill = 'grey90', color = NA)) +
scale_fill_manual(values = palette) +
scale_color_manual(values = palette) +
geom_label_repel(
data = . %>%
group_by(curated_cluster, cluster_label) %>%
summarize(
!!umap_settings[1] := median(UQ(sym(umap_settings[1]))),!!umap_settings[2] := median(UQ(sym(umap_settings[2])))
),
aes(fill = cluster_label,
label = curated_cluster),
size = 6,
point.padding = 30,
min.segment.length = 10,
max.overlaps = 1,
show.legend = FALSE,
fontface = "bold"
) +
theme_void() +
theme(panel.background = element_rect(
fill = "grey",
color = "black",
size = 2
),
legend.title = element_blank(),
legend.text = element_text(size = rel(2), face = "bold"),
#legend.spacing.y = unit(0.5, 'cm'),
aspect.ratio = 0.7) +
guides(alpha = guide_none(),
fill = guide_legend(byrow = TRUE))
}

T cells and NK cells from donor T23491 were emulsioned separately,
providing an experimental control to our in silico partitioning of NK
and T cells. It is reassuring that the NK cells selected without
knowledge of emulsion information all come from the NK emulsion and no T
cells carried over from donor T23491.
meta %>%
dplyr::filter(id == "T23491") %>%
group_by(emulsion) %>%
mutate(emulsion = paste(emulsion, "(", n(), "cells)")) %>%
nest() %>%
mutate(
plot = purrr::map2(
data,
emulsion,
~ ggplot(.x, aes_string(umap_settings[1], umap_settings[2])) +
background_tiles +
geom_bin_2d(bins = 100, aes(color = ..ncount.., fill = ..ncount..)) +
theme_mrl() +
scale_fill_viridis(
trans = "sqrt",
option = "C",
n.breaks = 10
) +
scale_color_viridis(
trans = "sqrt",
option = "C",
n.breaks = 10
) +
theme(
legend.key.height = unit(1, "in"),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
) +
ggtitle(.y)
)
) %>%
pull(plot) %>%
plot_grid(plotlist = ., ncol = 2)

cluster split by sample
p0 <- ggplot(meta, aes(emulsion)) +
geom_bar() +
#scale_y_log10() +
theme_mrl() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank())
p1 <- ggplot(meta, aes(emulsion, fill = curated_cluster, color = condition)) +
geom_bar(position = "fill", size = 1) +
scale_color_manual(values = c("red","black")) +
#scale_y_log10() +
theme_mrl() +
scale_fill_manual(values = palette)
plot_grid(p0,p1, align = "vh", axis = "blrt", ncol = 1)

Basic QC metrics
p0 <- ggplot(meta, aes(curated_cluster)) +
geom_bar() +
geom_label(stat='count', aes(label=..count..), vjust=0.5) +
#scale_y_log10() +
theme_mrl() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank())
p1 <- ggplot(meta, aes(curated_cluster, fill = emulsion, color = condition, alpha = condition)) +
geom_bar(position = "fill", size = 1) +
scale_color_manual(values = c("red","black")) +
scale_fill_manual(values = fct_rev(palette)) +
#scale_y_log10() +
theme_mrl() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank()) +
scale_alpha_discrete(range = c(0.3, 0.8))
p2 <- ggplot(meta, aes(curated_cluster, nCount_RNA)) +
geom_violin(fill = "grey") +
scale_y_log10() +
theme_mrl() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none")
p3 <- ggplot(meta, aes(curated_cluster, nFeature_RNA)) +
geom_violin(fill = "grey") +
scale_y_log10() +
theme_mrl() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank())
p4 <- ggplot(meta, aes(curated_cluster, percent.mt)) +
geom_violin() +
theme_mrl() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = "none")
p5 <- ggplot(meta, aes(curated_cluster, percent.rb)) +
geom_violin(fill = "grey") +
theme_mrl() +
theme(
legend.position = "none")
plot_grid(p0,p1,p2,p3,p4,p5, align = "vh", axis = "blrt", ncol = 1)

cluster markers
presto_markers <- qread("results/NK_analysis/clustering_typing/clusters_markers_table_harmony_NK_paired_filtered.qs") %>%
dplyr::filter(!grepl("^RP[LS]|^MT-|^AC[0-9]", feature))
write_csv(presto_markers, "results/NK_analysis/clustering_typing/clusters_markers_table_harmony_NK_paired_filtered.csv")
presto_markers <- presto_markers %>%
group_by(feature, group) %>%
summarize(count = n(),
logFC = mean(logFC),
mlogPadj = mean(-log10(padj + 10^-250))) %>%
ungroup()
write_csv(presto_markers, "results/NK_analysis/clustering_typing/clusters_markers_table_harmony_NK_paired_filtered_summarized.csv")
I use AUROC to prioritize marker genes with a very loose pass at AUC
> 0.6. Then I count how many times a gene is deemed a marker across
all paired comparisons between clusters. I retained here a list of a
curated list of genes with high counts, based on biological
knowledge.
Most plots showing gene expression overlayed on UMAPs are saved to
file at figs/markers for the sake of brevity. I highlight
some questions for cluster 1 below.
Manual inspection of marker list:
match_genes <- "CST7|^GNLY|^GZMB|^GZMM|^HOPX|^CD247|^CD7|^GZMA|^GZMH|^HLA|^IGFBP7|^ITGB2|^KLRD1|^KLRF1|^PRF1|^TYROBP|^CD99|^FCER1G|^IFITM2|^IL2RB|^KLRB1|^TRDC|^CCL4|^CCL5|^CD63|^CLIC1|^IL2RG|^LGALS1|^CD300A|^CD160|^CD53|^CD81|^LY6E|^TRBC1|^ZBTB16|^ADAM|^AHR|^AIF1|^BANK1|^BIRC5|^CCR6|^CD14|^CD302|^CD33|^CD36|^CD68|^CD79|^CD86|^CD93|^CLEC|^CTLA|^CTS|^EFHD2|^FGFBP2|^FGL2|^FOXO|^FOXP|^GZM|^HIF1A|^IKZF2|^IL1|^IL4|^IRF|^KLR|^LGALS|^NKG7|^S100A|^SPON2|^VAV|^VIM|^XCL|^ABHD17A|^ADAM|^AKR1C3|^ALCAM|^AREG|^ARAP2|^BACH2|^BTG1|^CAMK4|^CARD11|^CBLB|^CCL|^CXCL|^CCR|^CXCR|^CD7|^CD8|^FOXN3|^FYN|^IL32|^IL2|^IL6|^LEF1|^SELL|^MEF2|^NELL2|^NFKB|^PARP|^TGFB|^CD63|^FCER|^FCGR|^ROR|^STAT|^TRDC|^JUNB|^CD28|^COTL1|^ICOS|^LTB|^CD6|^JUN|^CCR7|^CD3|^CD44|^ITK|^TCF7|^RUNX|^CD5|^IL7R|^BCL2|^THEMIS|^CXCR4|^CD69|^ISG|^LTA|^GNG2|^IL23R|^ID2|^CD4|^TGFB|^IFN|^PDCD|^TNFRSF18|^CD27|^NCAM|^FAU|^IFI|^BCL|^OAS"
genes_to_plot <- presto_markers %>%
dplyr::filter(count > 5) %>%
#dplyr::filter(grepl(match_genes, feature)) %>%
group_by(group) %>%
arrange(-count) %>%
dplyr::select(group, feature) %>%
nest() %>%
mutate(data = purrr::map(data, ~paste(.x$feature, collapse = ", "))) %>%
arrange(as.numeric(group)) %>%
mutate(data = str_wrap(data, 40))
kable(genes_to_plot) %>%
column_spec(2, "7in")
genes_to_plot <- presto_markers %>%
dplyr::filter(count > 4) %>%
dplyr::select(feature) %>%
distinct() %>%
#dplyr::filter(grepl(match_genes, feature)) %>%
pull(feature)
genes_to_plot <- seurat@assays$RNA@data[genes_to_plot,] %>% #marker_genes_to_plot
as.matrix() %>%
t() %>%
as.data.frame() %>%
dplyr::rename_with(~paste0("gene_",.)) %>%
tibble::rownames_to_column("cell.id")
meta %>%
dplyr::select(cell.id,umap_settings[1],umap_settings[2], curated_cluster, source, condition) %>%
left_join(genes_to_plot) %>%
gather("feat", "val", -!matches("^gene_")) %>%
group_by(feat) %>%
nest() %>%
mutate(plots = purrr::map2(data,feat, ~ ggsave(paste0("figs/markers/NK/",.y,"_mean.png"),
hex_plots(.x,
umap_settings[1],
umap_settings[2],
"val",
#"source~condition",
CI = 0.05,
fun = "mean") +
ggtitle(.y) +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.line.y = element_blank(),
panel.grid = element_blank(),
panel.background = element_rect(fill = 'grey', color = NA),
panel.border = element_rect(colour = "black", fill=NA),
#strip.text = element_blank(),
aspect.ratio = 0.7,
legend.position = c(1,0),
legend.justification = c(1,0),
legend.title = element_blank(),
legend.direction = "horizontal",
legend.key.height = unit(0.1, "in"),
legend.text = element_text(angle = 45, hjust = 1, vjust = 1.2)))
)
)
Clustered heatmap of curated markers
Main heatmap
hm_genes <- meta %>%
dplyr::select(cell.id, curated_cluster) %>%
left_join(genes_to_plot) %>%
gather("feat", "val", -!matches("^gene_")) %>%
group_by(feat, curated_cluster) %>%
summarize(avg = mean(val), sd = sd(val))
hm_genes <- hm_genes %>%
dplyr::select(-sd) %>%
mutate(feat = gsub("gene_", "", feat)) %>%
pivot_wider(names_from = "feat",values_from = "avg") %>%
tibble::column_to_rownames("curated_cluster") %>%
as.matrix() %>%
scale()
hm <- ComplexHeatmap::Heatmap(t(hm_genes),
col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
c("royalblue", "grey", "red4")),
row_names_gp = gpar(fontsize = 12, fontface = "bold"),
column_names_gp = gpar(fontsize = 16, fontface = "bold"),
column_names_rot = 45,
column_names_centered = T,
column_split = NULL)
hm <- draw(hm)

ori_col_order <- column_order(hm)
ori_row_order <- row_order(hm)
hm <- ComplexHeatmap::Heatmap(t(hm_genes),
col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
c("royalblue", "grey", "red4")),
row_names_gp = gpar(fontsize = 12, fontface = "bold"),
column_names_gp = gpar(fontsize = 16, fontface = "bold"),
column_names_rot = 0,
column_names_centered = T,
row_split = 4)
hm <- draw(hm)
# %v%
# columnAnnotation(link= anno_mark(at = 1:ncol(hm), side = "bottom", labels = colnames(hm), labels_rot = 90, link_height = unit(20, "mm")))
#
Zoom on parts
i=1
for(cluster in row_order(hm)) {
cat("\n\n#### ", i, "\n\n")
hm_tmp <- ComplexHeatmap::Heatmap(
t(hm_genes)[ori_row_order[ori_row_order %in% cluster], ori_col_order],
col = colorRamp2(quantile(hm_genes, c(0.025, 0.5, 0.975)),
c("royalblue", "grey", "red4")),
row_names_gp = gpar(fontsize = 12, fontface = "bold"),
column_names_gp = gpar(fontsize = 16, fontface = "bold"),
column_names_rot = 45,
column_names_centered = T,
cluster_rows = F,
cluster_columns = F
)
hm_tmp <- draw(hm_tmp)
cat("\n\n")
i <- i+1
}
1

2

3

4

Cell type enrichment across treatment groups
Variations in frequency
Empirical compositions
frequencies <- meta %>%
#dplyr::filter(percent.BCR < 1) %>%
group_by(source, condition, id, curated_cluster, .drop = F) %>%
dplyr::summarize(count = n()) %>%
ungroup() %>%
group_by(source, condition, id) %>%
mutate(freq = count / sum(count)) %>%
left_join(labels) %>%
mutate(cluster_label = fct_reorder(paste(
curated_cluster, "\n", cluster_label
), as.numeric(curated_cluster)))
plots <- frequencies %>%
group_by(curated_cluster, cluster_label) %>%
nest() %>%
mutate(
plot = purrr::map2(
data,
paste("cluster" ,curated_cluster),
~ ggplot(.x,
aes(condition, freq, fill = condition)
) +
geom_boxplot(width = 0.5, outlier.shape = NA, position = position_nudge(-0.3)) +
geom_point(aes(size = count), position = position_nudge(0.2), alpha = 0.5) +
#stat_summary() +
#geom_jitter(fill = "grey", shape = 21, color = "black") +
facet_wrap(.~source, nrow = 1, strip.position = "bottom") +
theme_mrl() +
#scale_y_log10() +
scale_alpha_continuous(range = c(0.5, 1)) +
scale_size_continuous(trans = "log10") +
ggsignif::geom_signif(comparisons = list(c("HD", "MM")),
map_signif_level = F) +
ggtitle(.y) +
theme(legend.position = "none",
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank()) +
scale_fill_jco()
)
) %>%
arrange(curated_cluster) %>%
pull(plot)
plots[["legend"]] <- get_legend(plots[[1]] + theme(legend.position = "left",
legend.text = element_text(size = rel(2)),
legend.title = element_text(size = rel(2))))
plots[[1]] <- plots[[1]] + theme(axis.title.y = element_text(hjust = 0, angle = 90, size = rel(2))) +
ylab("Frequencies")
plot_grid(plotlist = plots, nrow = 3, axis = "lbrt", align = "hv")

sccomp modelling (beta-binomial)
Blood
library(sccomp)
sccomp_Blood <- meta %>%
dplyr::filter(source == "PBMC") %>%
sccomp_glm(
formula_composition = ~condition,
.sample = id,
.cell_group = curated_cluster,
bimodal_mean_variability_association = TRUE)
plots <- plot_summary(sccomp_Blood)
plot_grid(plots[[1]][[1]], plot_grid(plotlist = plots[2:3], nrow = 2), nrow = 1)

Bone marrow
sccomp_BM <- meta %>%
dplyr::filter(source == "BMMC") %>%
sccomp_glm(
formula_composition = ~condition,
.sample = id,
.cell_group = curated_cluster,
bimodal_mean_variability_association = TRUE)
plots <- plot_summary(sccomp_BM)
plot_grid(plots[[1]][[1]], plot_grid(plotlist = plots[2:3], nrow = 2), nrow = 1)

sccomp_joined <- meta %>%
mutate(sample = paste0(id,source)) %>%
group_by(condition) %>%
mutate(donor = paste0("D",unclass(as.factor(id)))) %>%
ungroup() %>%
sccomp_glm(
formula_composition = ~ condition + donor:source + condition:source,
.sample = sample,
.cell_group = curated_cluster,
bimodal_mean_variability_association = TRUE,
cores = 5)
full analysis