wrapper functions for plotting:

theme_umap <- function() {
  theme_void() +
    theme(panel.background = element_rect(fill = "grey", color = "black", size = 2),
          legend.position = c(1,0),
          legend.justification = c(1.1,0),
          legend.direction = "horizontal",
          legend.text = element_text(angle = 45, hjust = 1, vjust = 1),
          legend.title = element_blank(),
          plot.margin = unit(c(1,1,1,1), "mm"))
  
  
}


# function to overlay values onto the UMAP embeddings
hex_plots <-
  function(data,
           dim1,
           dim2,
           color,
           facets = NULL,
           CI = 0.05,
           fun = "mean",
           bins = 200,
           divergent_scale = F) {
    message("calc CI")
    # calculate the quantilles for CI limits onthe color scale.
    if (CI) {
      lims <- quantile(pull(data, color), probs = c(CI / 2, 1 - CI / 2))
      
      # if user supplied CI leads to 0 values ef no range, set automatically to 1%
      if (lims[2] == 0) {
        CI <- 0.01
        lims <-
          quantile(pull(data, color), probs = c(CI / 2, 1 - CI / 2))
      }
      # if still 0 values still, do not set any CI
      if (lims[2] == 0) {
        lims <- c(NA, NA)
      }
      
    } else {
      lims <- c(NA, NA)
    }
    
    message("compute plot")
    plot <- ggplot(data, aes_string(dim1, dim2)) +
      stat_summary_hex(
        bins = bins,
        fun = fun,
        aes(z = !!ensym(color), color = ..value..),
        lwd = 0.1
      ) +
      ggtitle(color) +
      theme_mrl() +
      guides(fill = guide_none()) +
      theme_umap()
    
    
    if (divergent_scale) {
      plot <- plot  +
        scale_fill_gradient2(
          midpoint = 0.5,
          low = "royalblue",
          mid = "grey80",
          high = "red4",
          limits = lims,
          oob = squish
        ) +
        scale_color_gradient2(
          midpoint = 0.5,
          low = "royalblue",
          mid = "grey80",
          high = "red4",
          limits = lims,
          oob = squish
        )
    } else {
      plot <- plot +
        scale_fill_viridis(limits = lims, oob = squish) +
        scale_color_viridis(limits = lims, oob = squish)
    }
    
    if (is.null(facets)) {
      return(plot)
    } else {
      return(plot + facet_wrap(as.formula(facets)))
    }
  }

ridges_plots <- function(data,dim1,dim2) {
  
  ggplot(data, 
       aes_string(dim1, 
                  dim2)) +
  stat_density_ridges(aes(fill = factor(stat(quantile))),
    geom = "density_ridges_gradient", calc_ecdf = TRUE,
    quantiles = c(0.01,0.05, 0.1,0.9,0.95,0.99), quantile_lines = TRUE,
    color = NA) +
  facet_grid(curated_types~., scales = "free_y", space = "free_y") +
  scale_fill_manual(labels = c("0.01","0.05", "0.1","0.1-0.9","0.9","0.95","0.99"),
                    values = c("black","grey80","black","grey80","black","grey80","black"))
}






SingleR_heatmap <- function(singleR_results, meta, k, keepBest = T) {
  
  # Input
  # singleR results: a list with 1/ scores matrix (rows = cluster/cells, col = refs), 2/ calls table on each cells (cells as rows)
  # metadata table with cell barcodes (or cluster calls) as rownames
  # k parameter for cutree, in order to split the dendrogram of cells/clusters
  
  library(ComplexHeatmap)
  
  # add barcodes back to the scores matrix
  rownames(singleR_results[[1]]) <- rownames(singleR_results[[2]])
  
  #print(rownames(singleR_results[[1]]))
  #print(rownames(meta))
  #enforce ordering of cells in the same in scores matrix and metadata
  singleR_results <- singleR_results[[1]][rownames(meta),]
  
  # scale scores per cell to % of max score
  singleR_results <- singleR_results / matrixStats::rowMaxs(singleR_results)
  
  # Only keep refs that have best score for at least one cells
  if(keepBest) {
    singleR_results <- singleR_results[,matrixStats::colMaxs(singleR_results) == 1]
  }
  

  # Print final dim before plotting
  cat("Dimension of matrix to plot after filering",dim(singleR_results))



  # hierrachical clustering
  row_clust <- hclust(dist(t(singleR_results)))

  col_clust <- hclust(dist(singleR_results))

  # cut tree and assign color for heatmap annotation
  cluster_split <- paste0("cluster ", cutree(col_clust, k = k))

  cluster_col <- ggsci::pal_futurama()(k)

  names(cluster_col) <- unique(cluster_split)

  #plot heatmap
  Heatmap(t(singleR_results),
        col = c(viridis(1000),"#ff0000"),
        show_column_names = F,
        bottom_annotation = HeatmapAnnotation(meta),
        top_annotation = HeatmapAnnotation("cluster" = cluster_split, 
                                           col = list("cluster" =cluster_col)),
        cluster_rows = row_clust,
        cluster_columns = col_clust,
        use_raster = T,
        raster_quality = 1)
  
  
  
  
  
  
}

Import data:

# import clustering and umap data
seurat <- qread("results/NK_analysis/seurat_objects/analysisNK_Ruchan.qs") 

adt <- seurat@assays$ADT@data %>% 
  t() %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column("cell.id") %>% 
  dplyr::rename_with(~gsub("-TotalSeqB$","",.))



# import ADT normalized data from my treatment of data
# adt <- qread("../01A_HCvsMM_10x/results/clustering_typing/adt_norm_class.qs") %>%
#   dplyr::select(-c(nCount_ADT:emulsion, adt_cell_type))

meta <- seurat@meta.data %>%
  tibble::rownames_to_column("cell.id") %>%
  left_join(
    seurat@reductions$umap@cell.embeddings %>% as.data.frame() %>% tibble::rownames_to_column("cell.id")
  ) %>%
  left_join(
    adt
  )

meta <- meta %>%
  mutate(curated_cluster = seurat_clusters) %>%
  mutate(emulsion = orig.ident)

umap_settings <- paste0("UMAP_", c(1, 2))

UMAP and clustering

labels <- tribble(
  ~ curated_cluster, ~ cluster_label,
  1, "CD56_dim",
  2, "activated",
  3, "Terminally_diff",
  4, "adaptive_like_1",
  5, "CD56_bright",
  6, "transitional",
  7, "adaptive_like_2",
  8, "BM_resident"
) %>%
  mutate(curated_cluster = as.factor(curated_cluster))

gt::gt(labels)

curated_cluster	cluster_label
1	CD56_dim
2	activated
3	Terminally_diff
4	adaptive_like_1
5	CD56_bright
6	transitional
7	adaptive_like_2
8	BM_resident

main_umap <- meta %>%
  left_join(labels) %>%
  mutate(cluster_label = fct_reorder(paste(
    curated_cluster, cluster_label, sep = ": "
  ), as.numeric(curated_cluster))) %>%
  dplyr::rename(x = umap_settings[1], y = umap_settings[2]) %>%
  {
    ggplot(., aes(x,y)) +
      geom_point(aes(color = cluster_label)) +
      geom_density2d(
        aes(group = cluster_label),
        bins = 4,
        contour_var = "ndensity",
        color = "black",
        size = 2
      ) +
      geom_density2d(aes(color = cluster_label),
                     bins = 4,
                     contour_var = "ndensity") +
      scale_alpha(range = c(0.2, 1)) +
      theme_mrl() +
      theme(panel.background = element_rect(fill = 'grey90', color = NA)) +
      scale_fill_manual(values = alpha(palette, 0.5)) +
      scale_color_manual(values = palette) +
      theme_void() +
      theme(panel.background = element_rect(
        fill = "grey",
        color = "black",
        size = 2
      ),
      legend.title = element_blank(),
      legend.text = element_text(size = rel(2), face = "bold"),
      #legend.spacing.y = unit(0.5, 'cm'),
      aspect.ratio = 0.7) 
  }


main_umap <- main_umap +
  theme(plot.margin = unit(rep(2,4), "mm")) +
  guides(alpha = guide_none(),
         color = guide_legend(
           byrow = TRUE,
           override.aes = list(size = 7, geom = "point", linetype = NA)
         ))

p1 <- meta %>%
  left_join(labels) %>%
  mutate(source = gsub("MC$", "", source)) %>%
  mutate(cluster_label = fct_reorder(paste(
    curated_cluster, cluster_label, sep = ": "
  ), as.numeric(curated_cluster))) %>%
  dplyr::rename(x = umap_settings[1], y = umap_settings[2]) %>%
  {
    ggplot(., aes(x,y)) +
      geom_point(aes(color = cluster_label)) +
      geom_density2d(
        #aes(group = cluster_label),
        bins = 8,
        contour_var = "ndensity",
        color = "black",
        size = 1
      ) +
      scale_alpha(range = c(0.2, 1)) +
      theme_mrl() +
      theme(panel.background = element_rect(fill = 'grey90', color = NA)) +
      scale_fill_manual(values = alpha(palette, 0.5)) +
      scale_color_manual(values = palette) +
      theme_void() +
      theme(panel.background = element_rect(
        fill = "grey",
        color = "black",
        size = 2
      ),
      legend.title = element_blank(),
      legend.text = element_text(size = rel(2), face = "bold"),
      #legend.spacing.y = unit(0.5, 'cm'),
      aspect.ratio = 0.7) 
  }


p1 <- p1 +
  theme(legend.position = "none",
        #plot.margin = unit(rep(1,4), "mm"),
        strip.text.y.left = element_text(size = 20, angle = 0),
        strip.text.x = element_text(size = 20, angle = 0)) +
  facet_grid(condition ~ source, switch = "y")

plot_grid(main_umap + theme(legend.position = "none"), p1, get_legend(main_umap), 
          nrow = 1,
          align = "hv",
          axis = "tblr")

ggsave(file.path(manuscript_figures_path, "main_umaps.pdf"), width = 60, height = 20, units = "cm")

CITE-seq markers overlay

colnames(adt)[-1] %>%
  lapply(function(cite_param) {
    hex_plots(meta %>% dplyr::filter(!is.na(CD16)),
              umap_settings[1],
              umap_settings[2],
              cite_param,
              CI = 0.1) +
      theme(legend.justification = c(1.1,-0.1),
            legend.direction = "vertical",
            legend.text = element_text(
              angle = 0,
              hjust = 1,
              vjust = 0
            ),
      )
  }) %>%
  patchwork::wrap_plots()

#colnames(meta)

Heatmap of curated markers

We use a set of marker genes (mixture of PPA’s and Eve’s curation) to illustrate the transcriptomic differences across clusters

initial heatmap with z-scores

curated_cluster_markers <- list(
  "1" = c("IKZF2",
        "PRF1","FCGR3A","CX3CR1","FCER1G","PTGDS","GZMB","GZMA","TXNIP", "FCGR3A", "CD226"),
  "2" = c("HLA-A", "HLA-B",
        "PRF1","NKG7","GZMB","STAT4"),
  "3" = c("REL","NFKB1","NFKBIA","TNF","RELB"),
  "4" = c("IL32", "ITGB1", "CD52", "CLEC2D", "KLRK1", "KLRC3", "CD3E", "TGFBR3",
        "IL32","GZMH","CCL5","CD52","S100A6","KLRC2","S100A4", "CD3E","KLRC3","IFIT2","PMAIP1","IFIT3","TNF","OASL","CCL3L1","IFIT1","NFKBIZ","CCL4L2"),
  "5" = c("TCF7", "CD44", "IL7R", "SELL", "IL18R1", 
        "GZMK","KLRC1","SELL","XCL2","XCL1","CD44","COTL1","IL7R"),
  "6" = c("PTCH2", "TMEM107", "CROCC"),
  "7" = c("TNF", "IFIT2", "IFIT3", "IFIH1", "SP140", "NFKBIZ", "OASL"),
  "8" = c("EOMES", "ID2", "TOX2", "CD7", "IL2RB", "KLRB1", "CD69", "CCL3", "XCL2",
        "CCL3","XCL1","XCL2","GZMK","CD160","CEBPD","IRF8")
)

curated_cluster_markers %>%
  enframe("cluster", "markers") %>%
  gt::gt()

cluster	markers
1	IKZF2, PRF1, FCGR3A, CX3CR1, FCER1G, PTGDS, GZMB, GZMA, TXNIP, FCGR3A, CD226
2	HLA-A, HLA-B, PRF1, NKG7, GZMB, STAT4
3	REL, NFKB1, NFKBIA, TNF, RELB
4	IL32, ITGB1, CD52, CLEC2D, KLRK1, KLRC3, CD3E, TGFBR3, IL32, GZMH, CCL5, CD52, S100A6, KLRC2, S100A4, CD3E, KLRC3, IFIT2, PMAIP1, IFIT3, TNF, OASL, CCL3L1, IFIT1, NFKBIZ, CCL4L2
5	TCF7, CD44, IL7R, SELL, IL18R1, GZMK, KLRC1, SELL, XCL2, XCL1, CD44, COTL1, IL7R
6	PTCH2, TMEM107, CROCC
7	TNF, IFIT2, IFIT3, IFIH1, SP140, NFKBIZ, OASL
8	EOMES, ID2, TOX2, CD7, IL2RB, KLRB1, CD69, CCL3, XCL2, CCL3, XCL1, XCL2, GZMK, CD160, CEBPD, IRF8

curated_cluster_markers <- do.call(c, curated_cluster_markers) %>% unique()

genes_to_plot <- seurat@assays$RNA@data[curated_cluster_markers,] %>% #marker_genes_to_plot
  as.matrix() %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename_with(~paste0("gene_",.)) %>%
  tibble::rownames_to_column("cell.id")

hm_genes <- meta %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -!matches("^gene_")) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), pct = sum(val > 0) / n() * 100)

pct_genes <- hm_genes %>%
  dplyr::select(-avg) %>%
  mutate(feat = gsub("gene_", "", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "pct") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  scale()

hm_genes <- hm_genes %>%
  dplyr::select(-pct) %>%
  mutate(feat = gsub("gene_", "", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "avg") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  scale()



hm <- ComplexHeatmap::Heatmap(t(hm_genes),
                        col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4")),
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 0,
                        column_names_centered = T,
                        column_split = 5,
                        row_split = 5,
                        column_title_gp = gpar(cex = 0, fontsize = 0),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "z-score")) 



hm <- draw(hm)

ori_col_order <- column_order(hm)
ori_row_order <- row_order(hm)

cl_dend <- dist(hm_genes) %>% hclust()

pdf(file.path(manuscript_figures_path, "hm_genes.pdf"), width = 4, height = 16)

hm

dev.off()

dotplot heatmap

pct_genes <- pct_genes[rownames(hm_genes), colnames(hm_genes)]

#from https://divingintogeneticsandgenomics.rbind.io/post/clustered-dotplot-for-single-cell-rnaseq/

col_fun <- colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4"))

cell_fun <- function(j, i, x, y, w, h, fill) {
  grid.rect(
    x = x,
    y = y,
    width = w,
    height = h,
    gp = gpar(col = NA, fill = NA)
  )
  grid.circle(
    x = x,
    y = y,
    r = pct_genes[i, j] * min(unit.c(w, h)) * 0.7,
    gp = gpar(fill = col_fun(hm_genes[i, j]), col = NA)
  )
}

hm_genes <- t(hm_genes)
pct_genes <- t(pct_genes)

hm <- ComplexHeatmap::Heatmap(hm_genes,
                        col = col_fun,
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        cell_fun = cell_fun,
                        rect_gp = gpar(type = "none"),
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 0,
                        column_names_centered = T,
                        column_split = 5,
                        row_split = 5,
                        column_title_gp = gpar(cex = 0, fontsize = 0),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "z-score"),
                        border = "black") 



hm <- draw(hm)

hm_genes <- matrix(c(1,20,50))

3 vs 1 differences

We first remove low detection genes:

markers <- qread("results/NK_analysis/clustering_typing/clusters_markers_table_harmony_NK_paired.qs") %>%
  dplyr::filter(group == 3) %>%
  dplyr::filter(groupA == 1 | groupB == 1)


markers %>% 
  ggplot(aes(avgExpr + 10^-4)) +
  geom_histogram() +
  scale_x_log10() +
  theme_mrl() +
  annotation_logticks(sides = "b") +
  geom_vline(xintercept = 0.05)

Then visualize the markers using an MA plot.

markers %>%
  {
    dplyr::filter(., avgExpr > 0.05) %>%
      ggplot(aes(avgExpr, auc)) +
      geom_hex(aes(color = after_scale(fill)), bins = 100) +
      geom_label_repel(
        data = dplyr::filter(., auc > 0.65),
        aes(label = feature),
        force = 10,
        ylim = c(0.7, 1),
        max.overlaps = 50
      ) +
      geom_label_repel(
        data = dplyr::filter(., auc < 0.35),
        aes(label = feature),
        force = 10,
        ylim = c(0, 0.3),
        max.overlaps = 50
      ) +
      scale_x_log10() +
      scale_fill_viridis(trans = "log10") +
      theme_mrl(2) +
      theme(
        panel.border = element_rect(
          color = "black",
          fill = NA,
          linetype = "solid"
        ),
        plot.margin = unit(c(0.05, 0.05, 0.05, 0.05), "npc"),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        legend.position = c(0, 1),
        legend.justification = c(0, 1),
      ) +
      scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
      geom_hline(yintercept = c(0.5, 0.65, 0.35),
                 color = "red") +
      labs(y = expression("AUC\ncluster 1" %<->% "cluster 3"),
           x = "average gene expression") +
      guides(fill = guide_colourbar(
        title = "gene count",
        label.position = "left",
        label.hjust = 1
      )) +
      annotation_logticks(sides = "b")
  }

Cell type enrichment across treatment groups

Variations in frequency

sccomp based inference

sccomp_Blood <- meta %>%
  dplyr::filter(source == "PBMC") %>%
  sccomp_glm(
           formula_composition = ~condition,
           .sample = id,
           .cell_group = curated_cluster,
           bimodal_mean_variability_association = TRUE)

plots_blood <- plot_summary(sccomp_Blood, 0.05)

sccomp_BM <- meta %>%
  dplyr::filter(source == "BMMC") %>%
  sccomp_glm(
           formula_composition = ~condition,
           .sample = id,
           .cell_group = curated_cluster,
           bimodal_mean_variability_association = TRUE)


plots_BM <- plot_summary(sccomp_BM, 0.05)





sccomp_joined <- meta %>%
  mutate(sample = paste0(id,source)) %>%
  group_by(condition) %>%
  mutate(donor = paste0("D",unclass(as.factor(id)))) %>%
  ungroup() %>%
  sccomp_glm(
           formula_composition = ~ condition + donor:source + condition:source,
           .sample = sample,
           .cell_group = curated_cluster,
           bimodal_mean_variability_association = TRUE,
           cores = 5)


plots_blood$boxplot[[1]] + facet_wrap(.~curated_cluster, nrow = 2) +
  scale_y_continuous() +
  ggtitle("Blood")

plots_BM$boxplot[[1]] + facet_wrap(.~curated_cluster, nrow = 2) +
  scale_y_continuous() +
  ggtitle("BM")

merged_metrics <- rbind(mutate(sccomp_Blood, source = "PBMC"),
                       mutate(sccomp_BM, source = "BMMC")) %>%
  dplyr::filter(parameter == "conditionMM") %>%
  dplyr::select(curated_cluster, source, matches("^c_"))

p_val_metrics <- merged_metrics %>%
  dplyr::select(curated_cluster, source, label = c_FDR) %>%
  mutate(start = "HD", end = "MM", y = 0.5, label = case_when(label < 0.001 ~ "***",
                                                              label < 0.01 ~ "**",
                                                              label < 0.05 ~ "*",
                                                              .default = NA))


per_tissue_plots <- merged_metrics %>%
  mutate(c_FDR = ifelse(c_FDR < 0.05, "FDR < 0.05", "non significant")) %>%
  group_by(source) %>%
  nest() %>%
  mutate(plots = map2(data, source, ~ ggplot(.x, aes(
    c_effect, fct_reorder(curated_cluster, c_effect), color = c_FDR
  )) +
    geom_errorbarh(aes(xmin = c_lower, xmax = c_upper)) +
    geom_point() +
    theme_mrl() +
    theme(legend.title = element_blank(), legend.position = c(1,0), legend.justification = c(1,0)) +
    labs(title = .y, x = "log fold change", y = "cluster") +
    scale_color_manual(values = c("red", "black")) +
    geom_vline(xintercept = 0))) %>%
  pull(plots)



tissue_comparison <- merged_metrics %>%
  mutate(c_FDR = ifelse(c_FDR < 0.05, "FDR < 0.05", "non significant")) %>%
  group_by(source) %>%
  nest() %>%
  deframe()

comparison_plot <- left_join(dplyr::rename_with(tissue_comparison[["PBMC"]], ~gsub("c_", "PBMC_", .)),
          dplyr::rename_with(tissue_comparison[["BMMC"]], ~gsub("c_", "BMMC_", .))) %>%
  ggplot(aes(PBMC_effect, BMMC_effect)) +
  geom_abline() +
  geom_hline(yintercept = 0) +
  geom_vline(xintercept = 0) +
  geom_errorbarh(aes(xmin = PBMC_lower, xmax = PBMC_upper, color = PBMC_FDR)) +
  geom_errorbar(aes(ymin = BMMC_lower, ymax = BMMC_upper, color = BMMC_FDR)) +
  scale_color_manual(values = c("red", "black")) +
  geom_label(aes(label = curated_cluster)) +
  theme_mrl() +
  theme(legend.position = c(1,0),
        legend.justification = c(1,0),
        legend.title = element_blank()) +
  labs(title = "tissue comparison", x = "PBMC LFC", y = "BMMC LFC")
  


sccomp_results <- plot_grid(plotlist = c(per_tissue_plots, list(comparison_plot)), nrow = 1)

frequencies <- meta %>%
  #dplyr::filter(percent.BCR < 1) %>%
  group_by(source, condition, id, curated_cluster, .drop = F) %>%
  dplyr::summarize(count = n()) %>%
  #pivot_wider(names_from = curated_cluster, values_from = count)
  ungroup() %>%
  group_by(source, condition, id) %>%
  mutate(freq = count / sum(count)) %>%
  left_join(labels) %>%
  mutate(cluster_label = fct_reorder(paste(
    curated_cluster, "\n", cluster_label
  ), as.numeric(curated_cluster))) 

plots <- frequencies %>%
  group_by(curated_cluster, cluster_label) %>%
  nest() %>%
  mutate(
    plot = purrr::map2(
      data,
      paste(curated_cluster),
      ~ ggplot(.x,
        aes(condition, freq)
      ) +
        geom_boxplot(aes(fill = condition), outlier.shape = NA) +
        #geom_boxplot(width = 0.5, outlier.shape = NA, position = position_nudge(-0.3)) +
        #geom_point(aes(size = count), position = position_nudge(0.2), alpha = 0.5) +
        #stat_summary() +
        #geom_jitter(fill = "grey", shape = 21, color = "black") +
        facet_wrap(.~source, nrow = 1, strip.position = "bottom") +
        theme_mrl() +
        scale_y_continuous(limits = c(0, 0.5)) +
        #scale_y_log10() +
        scale_alpha_continuous(range = c(0.5, 1)) +
        scale_size_continuous(trans = "log10") +
        # ggsignif::geom_signif(comparisons = list(c("HD", "MM")),
        #                       map_signif_level = c("***"=0.001, "**"=0.01, "*"=0.05, " " = 1),
        #                       y_position = 0.49,
        #                       margin_top = 0,
        #                       tip_length = 0) +
        ggsignif::geom_signif(data = p_val_metrics %>% dplyr::filter(curated_cluster == .y),
                              aes(xmin = start, xmax = end, annotations = label, y_position = y),
                              map_signif_level = c("***"=0.001, "**"=0.01, "*"=0.05, " " = 1),
                              margin_top = 0,
                              tip_length = 0, manual = TRUE) +
        ggtitle(.y) +
        theme(legend.position = "none",
              axis.text.x = element_blank(),
              axis.text.y = element_blank(),
              axis.title.x = element_blank(),
              axis.ticks.x = element_blank(),
              axis.ticks.y = element_blank(),
              axis.title.y = element_blank(),
              plot.margin = unit(c(0.5,0.2,0.5,-2), "cm")) +
        scale_fill_jco()
      
      
    )
  ) %>%
  arrange(curated_cluster) %>%
  pull(plot) 

legend <- get_legend(
  plots[[1]] + theme(
    legend.position = "left",
    legend.text = element_text(size = rel(2)),
    legend.title = element_text(size = rel(2)),
    legend.justification = c(0, 1)
  )
)

legend <- list(legend)

plots[[1]] <- plots[[1]] + theme(
  axis.title.y = element_text(
    hjust = 0.5,
    vjust = 0.5,
    angle = 90,
    size = rel(2)
  ),
  axis.text.y = element_text(size = 12, face = "bold"),
  axis.ticks.y = element_line(color = "black")
) +
  ylab("Frequencies\n")

freq_plot <- plot_grid(plotlist = c(legend,plots), nrow = 1, axis = "lbrt", align = "hv")

plot_grid(freq_plot, sccomp_results, ncol = 1)

ggsave(file.path(manuscript_figures_path, "frequency.pdf"), width = 35, height = 35, units = "cm")

Custom genesets enrichment analysis

We use transcriptpmic signatures from other publications to characterize NK cells clusters:

genesets_path <- file.path("data", "List Pathways_clusters.xlsx")
genesets <- readxl::excel_sheets(genesets_path) %>%
  lapply(function(tab_name) {
    
    tab <- readxl::read_excel(genesets_path, tab_name)
    
    if(colnames(tab)[1] != "gene") {
      tab <- readxl::read_excel(genesets_path, tab_name, col_names = F) %>%
        dplyr::rename(gene = 1) %>%
        mutate(geneset = tab_name) %>%
        dplyr::select(geneset, gene)
    } 
    else 
      {
      tab <- tab %>%
        dplyr::select(gene, avg_logFC) %>%
        mutate(geneset = ifelse(avg_logFC > 0, paste0(tab_name, "_up"), paste0(tab_name, "_dn"))) %>%
        dplyr::select(geneset, gene)
    }
    
    return(tab)
  }) %>%
  bind_rows()

genesets_cleaned <- genesets %>%
  dplyr::mutate(geneset = gsub(" ", "_", geneset))

Raw genesets

Run module scoring (optional, not run interactively by default).

genesets_list <- genesets_cleaned %>%
  group_by(geneset) %>%
  summarize(gene = list(gene)) %>%
  deframe()

seurat <- AddModuleScore(seurat, genesets_list, name = paste0(names(genesets_list), "xxx"))

geneset_results <- seurat@meta.data %>% dplyr::select(matches("xxx[0-9]*$")) %>%
  dplyr::rename_with(~str_replace(.,"xxx[0-9]*$", "")) %>%
  as.matrix()


qsave(geneset_results, "results/NK_analysis/clustering_typing/custom_NK_genesets.qs")

Regress library size effect on module scores:

geneset_results <- qread("results/NK_analysis/clustering_typing/custom_NK_genesets.qs")

lib_size <- meta %>%
  dplyr::select(cell.id, nCount_RNA) %>%
  deframe()

lib_size <- lib_size[rownames(geneset_results)]

par(mfcol=c(2,ncol(geneset_results)))

# regress library size effect
geneset_results_norm <- apply(geneset_results, 2, function(val, libs) {
  
  
  
  
  res <- residuals(lm(val ~ libs))
  
  
  print(plot(libs, val, main = "raw"))
  print(plot(libs, res, main = "regressed"))
  
  rescale(res)
}, libs = log(lib_size))

# convert to dataframe to joint metadata like cluster calls
genes_to_plot <- geneset_results_norm %>%
  as.data.frame() %>%
  tibble::rownames_to_column("cell.id")

# summarise to cluster level
hm_genes <- meta %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -c(cell.id, curated_cluster)) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), sd = sd(val))


# back to matrix format
hm_genes <- hm_genes %>%
  dplyr::select(-sd) %>%
  dplyr::filter(!grepl("_dn$", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "avg") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  scale()
  #apply(2, rescale)



hm_raw <- ComplexHeatmap::Heatmap(t(hm_genes),
                        col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4")),
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 0,
                        column_names_centered = T,
                        column_split = 5,
                        column_title = "raw genesets, z-score",
                        #row_split = 5,
                        column_title_gp =  gpar(cex = 1, fontsize = 16, fontface = "bold"),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "z-score")) 



#hm <- draw(hm)
#ori_col_order <- column_order(hm)
#ori_row_order <- row_order(hm)

hm_genes <- meta %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -c(cell.id, curated_cluster)) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), sd = sd(val))

hm_genes <- hm_genes %>%
  dplyr::select(-sd) %>%
  dplyr::filter(!grepl("_dn$", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "avg") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  #scale()
  apply(2, rescale)


row_clust <- t(hm_genes) %>% dist() %>% hclust()

hm2_raw <- ComplexHeatmap::Heatmap(t(hm_genes),
                        col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4")),
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        cluster_rows = row_clust,
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 0,
                        column_names_centered = T,
                        column_split = 5,
                        column_title = "raw genesets, [0,1] score",
                        #row_split = 5,
                        row_dend_reorder = row_clust %>% as.dendrogram() %>% order.dendrogram() %>% {.*-1},
                        column_title_gp = gpar(cex = 1, fontsize = 16, fontface = "bold"),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "[0,1] score"))

hm_raw

hm2_raw

#hm <- draw(hm)
#ori_col_order <- column_order(hm)
#ori_row_order <- row_order(hm)

cleaned genesets

Run module scoring (optional, not run interactively by default).

genesets_list <- genesets_cleaned %>%
  dplyr::filter(!grepl("RP[SL]", gene)) %>%
  group_by(geneset) %>%
  summarize(gene = list(gene)) %>%
  deframe()

seurat <- AddModuleScore(seurat, genesets_list, name = paste0(names(genesets_list), "xxx"))

geneset_results <- seurat@meta.data %>% dplyr::select(matches("xxx[0-9]*$")) %>%
  dplyr::rename_with(~str_replace(.,"xxx[0-9]*$", "")) %>%
  as.matrix()

qsave(geneset_results, "results/NK_analysis/clustering_typing/custom_NK_genesets_cleaned.qs")

Regress library size effect on module scores:

geneset_results <- qread("results/NK_analysis/clustering_typing/custom_NK_genesets_cleaned.qs")
lib_size <- meta %>%
  dplyr::select(cell.id, nCount_RNA) %>%
  deframe()

lib_size <- lib_size[rownames(geneset_results)]

par(mfcol=c(2,ncol(geneset_results)))

# regress library size effect
geneset_results_norm_clean <- apply(geneset_results, 2, function(val, libs) {
  
  
  
  
  res <- residuals(lm(val ~ libs))
  
  
  print(plot(libs, val, main = "raw"))
  print(plot(libs, res, main = "regressed"))
  
  rescale(res)
}, libs = log(lib_size))

# convert to dataframe to joint metadata like cluster calls
genes_to_plot <- geneset_results_norm_clean %>%
  as.data.frame() %>%
  tibble::rownames_to_column("cell.id")

# summarise to cluster level
hm_genes <- meta %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -c(cell.id, curated_cluster)) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), sd = sd(val))


# back to matrix format
hm_genes <- hm_genes %>%
  dplyr::select(-sd) %>%
  dplyr::filter(!grepl("_dn$", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "avg") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  scale()
  #apply(2, rescale)



hm_cleaned <- ComplexHeatmap::Heatmap(t(hm_genes),
                        col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4")),
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 0,
                        column_names_centered = T,
                        column_split = 5,
                        column_title = "cleaned (no ribo genes) genesets, z-score",
                        #row_split = 5,
                        column_title_gp =  gpar(cex = 1, fontsize = 16, fontface = "bold"),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "z-score")) 



#hm <- draw(hm)
#ori_col_order <- column_order(hm)
#ori_row_order <- row_order(hm)

hm_genes <- meta %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -c(cell.id, curated_cluster)) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), sd = sd(val))

hm_genes <- hm_genes %>%
  dplyr::select(-sd) %>%
  dplyr::filter(!grepl("_dn$", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "avg") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  #scale()
  apply(2, rescale)



hm2_cleaned <- ComplexHeatmap::Heatmap(t(hm_genes),
                        col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4")),
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 0,
                        column_names_centered = T,
                        column_split = 5,
                        column_title = "cleaned (no ribo genes) genesets, [0,1] score",
                        #row_split = 5,
                        column_title_gp =  gpar(cex = 1, fontsize = 16, fontface = "bold"),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "[0,1] score"))

hm_cleaned

hm2_cleaned

#hm <- draw(hm)
#ori_col_order <- column_order(hm)
#ori_row_order <- row_order(hm)

library(patchwork)


grobs <- lapply(c(hm_cleaned, hm2_cleaned, hm_raw, hm2_raw), function(hm) grid.grabExpr(draw(hm)))

pdf(file.path(manuscript_figures_path, "custom_genesets_hm.pdf"), width = 11, height = 10)
do.call(grid.arrange, grobs)
dev.off()

MELD scores

results_dir <- "results/NK_analysis/phate_meld_output/"

meld_files <- list.files(results_dir, full.names = F, pattern = "LLH.csv")

base_name <- gsub("_NK_MELD_LLH\\.csv$", "", meld_files)

meld_results <- tibble(files = meld_files, dir = results_dir) %>%
  mutate(base_name = gsub("_NK_MELD_LLH\\.csv$", "", files)) %>%
  mutate(results = purrr::map(here(dir, files), ~read_csv(.x) %>%
                                pivot_longer(-index, values_to = "score", names_to = "comparison"))) %>%
  unnest() %>%
  dplyr::select(-c("files", "dir", "base_name")) %>%
  dplyr::filter(comparison %in% c("MM", "BMMC")) %>%
  dplyr::filter(!is.na(score)) %>%
  mutate(comparison = paste0("MELD_", comparison)) %>%
  ungroup() %>%
  pivot_wider(names_from = comparison, values_from = score) %>%
  right_join(meta, by = c("index" = "cell.id"))




p1 <- hex_plots(meld_results,
              umap_settings[1],
              umap_settings[2],
              "MELD_BMMC",
          facets = .~condition,
              CI = 0.1,
          divergent_scale = T) +
      theme(legend.justification = c(1.1,-0.1),
            legend.direction = "vertical",
            aspect.ratio = 0.7,
            legend.text = element_text(
              angle = 0,
              hjust = 1,
              vjust = 0
            ),
      )

p2 <- hex_plots(meld_results,
              umap_settings[1],
              umap_settings[2],
              "MELD_MM",
          facets = .~source,
              CI = 0.1,
          divergent_scale = T) +
      theme(legend.justification = c(1.1,-0.1),
            legend.direction = "vertical",
            aspect.ratio = 0.7,
            legend.text = element_text(
              angle = 0,
              hjust = 1,
              vjust = 0
            ),
      )


plot_grid(p1,p2, ncol = 1)

ggsave(file.path(manuscript_figures_path, "MELD_UMAP.pdf"), width = 10, height = 7)


p1 <- meld_results %>%
  left_join(labels) %>%
  ggplot(aes(reorder(cluster_label, as.numeric(curated_cluster)), MELD_BMMC)) +
  geom_violin(aes(fill = reorder(cluster_label, as.numeric(curated_cluster)))) +
  facet_grid(.~condition) +
  geom_hline(yintercept = 0.5) +
  theme_mrl() +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank()) +
  scale_fill_manual(values = palette)

p2 <- meld_results %>%
  left_join(labels) %>%
  ggplot(aes(reorder(cluster_label, as.numeric(curated_cluster)), MELD_MM)) +
  geom_violin(aes(fill = reorder(cluster_label, as.numeric(curated_cluster)))) +
  facet_grid(.~source) +
  geom_hline(yintercept = 0.5) +
  theme_mrl() +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "none") +
  scale_fill_manual(values = palette)


plot_grid(p1,p2, ncol = 1, axis = "bltr", align = "hv")

ggsave(file.path(manuscript_figures_path, "MELD_violins.pdf"), width = 10, height = 7)

trajectory

R slingshot analysis

RuNK <- qs::qread("results/NK_analysis/trajectory_pseudotime/slingshot_pto.qs")

source("src/helpers/slingshot_layout_helpers.R")

sling_meta <- meta %>%
  dplyr::select(cell.id, UMAP_1, UMAP_2, id, condition, source, curated_cluster) %>%
  left_join(sling_cell_data(RuNK), by = c("cell.id" = "id"))

Average pseudotime across clusters, ordered by lineages

pseudotime_tree <- pseudotime_branching(RuNK, graphlayouts::layout_with_stress, "layout_only", offset = 1) +
  theme_graph() +
  theme(text = element_text(family = "Arial"), 
        panel.border = element_rect(color = "black", fill = NA, linetype = "solid"),
        axis.text.x = element_text(),
        axis.ticks.x = element_line(),
        axis.title.x = element_text(),
        legend.title = element_blank()) +
  labs(x = "pseudotime") +
  scale_fill_manual(values = palette)

pseudotime_tree

Main branch

sling_cell_data(RuNK) %>%
  ggplot(aes(cluster, fill = grepl("1", lineage))) +
  geom_bar(position = "fill") +
  ggtitle("Proportion of cells associated with lineage 1 by distance to the principal curves")

PT_hm <- sling_meta %>%
  dplyr::filter(!cluster %in% c(6,8)) %>%
  ggplot(aes(pseudotime, condition)) +
  stat_density(aes(fill = after_stat(scaled)),
               geom = "raster",
               position = "identity") +
  #stat_summary(geom = "point", fun = function(x) exp(mean(log(x)))) +
  theme_mrl() +
  theme(strip.text.y.left = element_text(angle = 0),
        axis.title.y = element_blank(),
        strip.placement = "outside",
        legend.position = "none") +
  scale_x_continuous(expand = expand_scale(), limits = c(15,50), oob = squish)
  #scale_fill_viridis(trans = ggforce::power_trans(2)) +
  


PT_hm_tissue <- PT_hm + 
  facet_grid(reorder(cluster, pseudotime)~ source, switch = "y") +
  ggtitle("density of cells along pseudotime ranks for each cluster")
PT_hm <- PT_hm + facet_grid(reorder(cluster, pseudotime)~ "overall", switch = "y")

PT_hm <- plot_grid(PT_hm_tissue, PT_hm, nrow = 1,
          align = "hv",
          axis = "tblr",
          rel_widths = c(2,1.2))

library(mgcv)
pt_genes <- c(
"GZMA",
"B3GAT1",
"GZMB",
"FCGR3A",
"PRF1",
"OASL",
#"KLRC2",
"KLRK1",
"NCR1",
"CCL3",
"CD69",
"RELB",
#"TCF7",
"IL7R",
"IL18R1",
"SELL", 
"NCAM1", 
"GZMH", 
"IFNGR1"
)
genes_to_plot <- seurat@assays$RNA@data[pt_genes,] %>% #marker_genes_to_plot
  as.matrix() %>%
  t() %>%
  as.data.frame() %>%
  tibble::rownames_to_column("cell.id") %>%
  pivot_longer(-cell.id, names_to = "gene", values_to = "expr")


sling_pt_genes <- sling_meta %>%
  dplyr::filter(!cluster %in% c(6,8)) %>%
  dplyr::select(cell.id, pseudotime) %>%
  left_join(genes_to_plot) %>%
  group_by(gene) %>%
  nest() %>%
  mutate(pred_grid = purrr::map(data, ~seq(
                                      min(.x$pseudotime),
                                      max(.x$pseudotime),
                                      length.out = 1000
                                    ))) %>%
  mutate(smooth_mean = purrr::map2(data, pred_grid, ~ gam(expr ~ s(pseudotime, bs = "cs"),
                                              data = .x) %>%
                                    predict(newdata = data.frame(pseudotime = .y)) %>%
                                    rescale())) %>%
  dplyr::select(gene, pred_grid, smooth_mean) %>%
  unnest()

pdf(file.path(manuscript_figures_path, "pt_genes.pdf"), width = 11, height = 5)

tidyHeatmap::heatmap(ungroup(sling_pt_genes),
        gene,
        pred_grid,
        smooth_mean,
        palette_value = colorRamp2(1:10/10, viridis(10)),
        cluster_columns = FALSE,
        column_title = NA)
dev.off()

PT_MELD <- sling_meta %>%
  dplyr::filter(!cluster %in% c(6,8)) %>%
  left_join(meld_results %>% dplyr::select(cell.id = index, MELD_MM, MELD_BMMC)) %>%
  ggplot(aes(pseudotime, MELD_MM)) +
  stat_density2d(aes(fill = cluster), contour_var = "ndensity", geom = "polygon", alpha = 0.3, color = "black", bins = 4) +
  stat_summary_xy(aes(group = cluster, label = cluster), geom = "label", .fun.x = "median", .fun.y = "median") +
  theme_mrl() +
  theme(legend.position = c(0,1), legend.justification = c(0,1)) +
  scale_fill_manual(values = setNames(palette[1:8], 1:8)) +
  guides(fill = guide_legend(title = "lineage 1 clusters", override.aes = list(alpha = 0.7)))

PT_distro <- sling_meta %>%
  dplyr::filter(!cluster %in% c(6,8)) %>%
  ggplot(aes(pseudotime)) +
  geom_density(aes(fill = condition), alpha = 0.5) +
  facet_grid(source~.) +
  theme_mrl() +
  theme(legend.position = c(0,1), legend.justification = c(0,1)) +
  scale_fill_jco()


sling_meta %>%
  dplyr::filter(!cluster %in% c(6,8)) %>%
  ggplot(aes(condition,pseudotime, group = id, color = condition)) +
  stat_summary(position = position_dodge(width = 0.1), geom = "label", aes(label = id)) +
  facet_grid(source~cluster) +
  theme_mrl()

Visualize the lineages in low dim space

PT_UMAP <- slingLineages(RuNK) %>%
  enframe("lineage", "cluster") %>%
  unnest() %>%
  mutate(code = T) %>%
  pivot_wider(names_from = "lineage",
              values_from = "code",
              values_fill = F) %>%
  right_join(sling_meta) %>%
  pivot_longer(matches("Lineage[0-9]+"),
               names_to = "Lineage",
               values_to = "belong") %>%
  dplyr::filter(belong) %>%
  group_by(Lineage) %>%
  mutate(groups = ntile(pseudotime, 10)) %>%
  group_by(Lineage, groups) %>%
  summarize(
    pseudotime = median(pseudotime),
    UMAP_1 = median(UMAP_1),
    UMAP_2 = median(UMAP_2)
  ) %>%
  group_by(Lineage) %>%
  mutate(pseudotime = rank(pseudotime)) %>%
  nest() %>%
  mutate(smoothed_coord = purrr::map(data, ~ {
    plot.new()
    list(x = .x$UMAP_1, y = .x$UMAP_2) %>%
      xspline(shape = -0.5, draw = F) %>%
      data.frame()
  })) %>%
  dplyr::select(-data) %>%
  unnest() %>%
  dplyr::rename(UMAP_1 = x, UMAP_2 = y) %>%
  group_by(Lineage) %>%
  mutate(group = ntile(n = 4)) %>%
  ggplot(aes(UMAP_1, UMAP_2)) +
  stat_summary_hex(
    data = sling_meta,
    aes(z = pseudotime, color = after_scale(fill)),
    bins = 100
  ) +
  #scale_fill_manual(values = palette) +
  geom_path(aes(group = paste(Lineage, group)), 
            size = 3,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed"),
    show.legend = F) +
  geom_path(
    aes(color = Lineage, group = paste(Lineage, group)),
    size = 1,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed")
  ) +
  theme_void() +
  theme(panel.border = element_rect(
          color = "black",
          linewidth = 1,
          fill = NA,
          linetype = "solid"
        ), plot.margin = unit(c(0.05,0.05,0.05,0.05), "npc"),
        aspect.ratio = 0.7) +
  scale_fill_viridis_c(limits = c(15,50), oob = squish) +
  guides(color = guide_legend(title = element_blank(), override.aes = list(fill = NA)),
         fill = guide_colorbar(title = "pseudotime")) +
  scale_color_manual(values = c("lightblue", "darkred", "orchid4"))

# combine plots for paper figure


PT_combined <- plot_grid(plot_grid(PT_UMAP, pseudotime_tree, nrow = 1), 
                         PT_hm, 
                         plot_grid(PT_MELD, PT_distro, ggplot(), nrow = 1),
                         ncol = 1)
PT_combined

extrafont::loadfonts()

ggsave(file.path(manuscript_figures_path, "pseudotime_results.pdf"),  width = 20, height = 14)

alternate cluster 8 start

RuNK <- qread("results/NK_analysis/trajectory_pseudotime/slingshot_pto_c8.qs")

sling_meta <- meta %>%
  dplyr::select(cell.id, UMAP_1, UMAP_2, id, condition, source, curated_cluster) %>%
  left_join(sling_cell_data(RuNK), by = c("cell.id" = "id"))


pseudotime_tree <- pseudotime_branching(RuNK, graphlayouts::layout_with_stress, "layout_only", offset = 1) +
  theme_graph() +
  theme(text = element_text(family = "Arial"), 
        panel.border = element_rect(color = "black", fill = NA, linetype = "solid"),
        axis.text.x = element_text(),
        axis.ticks.x = element_line(),
        axis.title.x = element_text(),
        legend.title = element_blank()) +
  labs(x = "pseudotime") +
  scale_fill_manual(values = palette)



PT_UMAP <- slingLineages(RuNK) %>%
  enframe("lineage", "cluster") %>%
  unnest() %>%
  mutate(code = T) %>%
  pivot_wider(names_from = "lineage",
              values_from = "code",
              values_fill = F) %>%
  right_join(sling_meta) %>%
  pivot_longer(matches("Lineage[0-9]+"),
               names_to = "Lineage",
               values_to = "belong") %>%
  dplyr::filter(belong) %>%
  group_by(Lineage) %>%
  mutate(groups = ntile(pseudotime, 10)) %>%
  group_by(Lineage, groups) %>%
  summarize(
    pseudotime = median(pseudotime),
    UMAP_1 = median(UMAP_1),
    UMAP_2 = median(UMAP_2)
  ) %>%
  group_by(Lineage) %>%
  mutate(pseudotime = rank(pseudotime)) %>%
  nest() %>%
  mutate(smoothed_coord = purrr::map(data, ~ {
    plot.new()
    list(x = .x$UMAP_1, y = .x$UMAP_2) %>%
      xspline(shape = -0.5, draw = F) %>%
      data.frame()
  })) %>%
  dplyr::select(-data) %>%
  unnest() %>%
  dplyr::rename(UMAP_1 = x, UMAP_2 = y) %>%
  group_by(Lineage) %>%
  mutate(group = ntile(n = 4)) %>%
  ggplot(aes(UMAP_1, UMAP_2)) +
  stat_summary_hex(
    data = sling_meta,
    aes(z = pseudotime, color = after_scale(fill)),
    bins = 100
  ) +
  #scale_fill_manual(values = palette) +
  geom_path(aes(group = paste(Lineage, group)), 
            size = 3,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed"),
    show.legend = F) +
  geom_path(
    aes(color = Lineage, group = paste(Lineage, group)),
    size = 1,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed")
  ) +
  theme_void() +
  theme(panel.border = element_rect(
          color = "black",
          linewidth = 1,
          fill = NA,
          linetype = "solid"
        ), plot.margin = unit(c(0.05,0.05,0.05,0.05), "npc"),
        aspect.ratio = 0.7) +
  scale_fill_viridis_c(limits = c(15,50), oob = squish) +
  guides(color = guide_legend(title = element_blank(), override.aes = list(fill = NA)),
         fill = guide_colorbar(title = "pseudotime")) +
  scale_color_manual(values = c("lightblue", "darkred", "orchid4"))

plot_grid(pseudotime_tree, PT_UMAP, ncol = 1)

extrafont::loadfonts()
ggsave(file.path(manuscript_figures_path, "slingshot_C8_start.pdf"),  width = 10, height = 14)

alternate pbmc vs bmmc separation

RuNK <- qread("results/NK_analysis/trajectory_pseudotime/slingshot_pto_c5_pbmc.qs")

sling_meta <- meta %>%
  dplyr::select(cell.id, UMAP_1, UMAP_2, id, condition, source, curated_cluster) %>%
  left_join(sling_cell_data(RuNK), by = c("cell.id" = "id"))


pseudotime_tree <- pseudotime_branching(RuNK, graphlayouts::layout_with_stress, "layout_only", offset = 1) +
  theme_graph() +
  theme(text = element_text(family = "Arial"), 
        panel.border = element_rect(color = "black", fill = NA, linetype = "solid"),
        axis.text.x = element_text(),
        axis.ticks.x = element_line(),
        axis.title.x = element_text(),
        legend.title = element_blank()) +
  labs(x = "pseudotime") +
  scale_fill_manual(values = palette)



PT_UMAP <- slingLineages(RuNK) %>%
  enframe("lineage", "cluster") %>%
  unnest() %>%
  mutate(code = T) %>%
  pivot_wider(names_from = "lineage",
              values_from = "code",
              values_fill = F) %>%
  right_join(sling_meta) %>%
  pivot_longer(matches("Lineage[0-9]+"),
               names_to = "Lineage",
               values_to = "belong") %>%
  dplyr::filter(belong) %>%
  group_by(Lineage) %>%
  mutate(groups = ntile(pseudotime, 10)) %>%
  group_by(Lineage, groups) %>%
  summarize(
    pseudotime = median(pseudotime),
    UMAP_1 = median(UMAP_1),
    UMAP_2 = median(UMAP_2)
  ) %>%
  group_by(Lineage) %>%
  mutate(pseudotime = rank(pseudotime)) %>%
  nest() %>%
  mutate(smoothed_coord = purrr::map(data, ~ {
    plot.new()
    list(x = .x$UMAP_1, y = .x$UMAP_2) %>%
      xspline(shape = -0.5, draw = F) %>%
      data.frame()
  })) %>%
  dplyr::select(-data) %>%
  unnest() %>%
  dplyr::rename(UMAP_1 = x, UMAP_2 = y) %>%
  group_by(Lineage) %>%
  mutate(group = ntile(n = 4)) %>%
  ggplot(aes(UMAP_1, UMAP_2)) +
  stat_summary_hex(
    data = sling_meta,
    aes(z = pseudotime, color = after_scale(fill)),
    bins = 100
  ) +
  #scale_fill_manual(values = palette) +
  geom_path(aes(group = paste(Lineage, group)), 
            size = 3,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed"),
    show.legend = F) +
  geom_path(
    aes(color = Lineage, group = paste(Lineage, group)),
    size = 1,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed")
  ) +
  theme_void() +
  theme(panel.border = element_rect(
          color = "black",
          linewidth = 1,
          fill = NA,
          linetype = "solid"
        ), plot.margin = unit(c(0.05,0.05,0.05,0.05), "npc"),
        aspect.ratio = 0.7) +
  scale_fill_viridis_c(limits = c(15,50), oob = squish) +
  guides(color = guide_legend(title = element_blank(), override.aes = list(fill = NA)),
         fill = guide_colorbar(title = "pseudotime")) +
  scale_color_manual(values = c("lightblue", "darkred", "orchid4"))

plot_grid(pseudotime_tree + ggtitle("PBMC only"), PT_UMAP, ncol = 1)

extrafont::loadfonts()
ggsave(file.path(manuscript_figures_path, "slingshot_pbmc.pdf"),  width = 10, height = 14)

RuNK <- qread("results/NK_analysis/trajectory_pseudotime/slingshot_pto_c5_bmmc.qs")

sling_meta <- meta %>%
  dplyr::select(cell.id, UMAP_1, UMAP_2, id, condition, source, curated_cluster) %>%
  left_join(sling_cell_data(RuNK), by = c("cell.id" = "id"))


pseudotime_tree <- pseudotime_branching(RuNK, graphlayouts::layout_with_stress, "layout_only", offset = 1) +
  theme_graph() +
  theme(text = element_text(family = "Arial"), 
        panel.border = element_rect(color = "black", fill = NA, linetype = "solid"),
        axis.text.x = element_text(),
        axis.ticks.x = element_line(),
        axis.title.x = element_text(),
        legend.title = element_blank()) +
  labs(x = "pseudotime") +
  scale_fill_manual(values = palette)



PT_UMAP <- slingLineages(RuNK) %>%
  enframe("lineage", "cluster") %>%
  unnest() %>%
  mutate(code = T) %>%
  pivot_wider(names_from = "lineage",
              values_from = "code",
              values_fill = F) %>%
  right_join(sling_meta) %>%
  pivot_longer(matches("Lineage[0-9]+"),
               names_to = "Lineage",
               values_to = "belong") %>%
  dplyr::filter(belong) %>%
  group_by(Lineage) %>%
  mutate(groups = ntile(pseudotime, 10)) %>%
  group_by(Lineage, groups) %>%
  summarize(
    pseudotime = median(pseudotime),
    UMAP_1 = median(UMAP_1),
    UMAP_2 = median(UMAP_2)
  ) %>%
  group_by(Lineage) %>%
  mutate(pseudotime = rank(pseudotime)) %>%
  nest() %>%
  mutate(smoothed_coord = purrr::map(data, ~ {
    plot.new()
    list(x = .x$UMAP_1, y = .x$UMAP_2) %>%
      xspline(shape = -0.5, draw = F) %>%
      data.frame()
  })) %>%
  dplyr::select(-data) %>%
  unnest() %>%
  dplyr::rename(UMAP_1 = x, UMAP_2 = y) %>%
  group_by(Lineage) %>%
  mutate(group = ntile(n = 4)) %>%
  ggplot(aes(UMAP_1, UMAP_2)) +
  stat_summary_hex(
    data = sling_meta,
    aes(z = pseudotime, color = after_scale(fill)),
    bins = 100
  ) +
  #scale_fill_manual(values = palette) +
  geom_path(aes(group = paste(Lineage, group)), 
            size = 3,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed"),
    show.legend = F) +
  geom_path(
    aes(color = Lineage, group = paste(Lineage, group)),
    size = 1,
    arrow = arrow(angle = 30, length = unit(0.2, "inches"), type = "closed")
  ) +
  theme_void() +
  theme(panel.border = element_rect(
          color = "black",
          linewidth = 1,
          fill = NA,
          linetype = "solid"
        ), plot.margin = unit(c(0.05,0.05,0.05,0.05), "npc"),
        aspect.ratio = 0.7) +
  scale_fill_viridis_c(limits = c(15,50), oob = squish) +
  guides(color = guide_legend(title = element_blank(), override.aes = list(fill = NA)),
         fill = guide_colorbar(title = "pseudotime")) +
  scale_color_manual(values = c("lightblue", "darkred", "orchid4"))

plot_grid(pseudotime_tree + ggtitle("BMMC only"), PT_UMAP, ncol = 1)

extrafont::loadfonts()
ggsave(file.path(manuscript_figures_path, "slingshot_bmmc.pdf"),  width = 10, height = 14)

Exploration of cluster 5 outliers: activated CD56 bright NK cells

sling_cell_data(RuNK) %>%
  dplyr::rename(cell.id = id) %>%
  left_join(meta) %>%
  mutate(pseudotime = rank(pseudotime)) %>%
  dplyr::filter(curated_cluster == 5) %>%
  ggplot(aes(curated_cluster, pseudotime, fill = id)) +
  geom_violin(scale = "width", adjust = 1/2) +
  facet_grid(source ~ condition)

sling_cell_data(RuNK) %>%
  dplyr::rename(cell.id = id) %>%
  left_join(meta) %>%
  ggplot(aes(UMAP_1, UMAP_2)) +
  geom_point(aes(color = rank(pseudotime) > 20000 & curated_cluster == 5), size = 0.5, alpha = 0.5) +
  geom_density2d() +
  theme_umap() +
  facet_grid(source ~ condition)

sling_cell_data(RuNK) %>%
  dplyr::rename(cell.id = id) %>%
  left_join(meta) %>%
  mutate(activated_bright = rank(pseudotime) > 20000 & curated_cluster == 5) %>%
  dplyr::filter(curated_cluster == 5) %>%
  group_by(condition, source,id, activated_bright) %>%
  summarize(count = n()) %>%
  group_by(condition, source) %>%
  mutate(freq = count / sum(count)) %>%
  dplyr::select(-count) %>%
  pivot_wider(names_from = activated_bright, values_from =freq, values_fill = 0) %>%
  dplyr::rename(activated_bright = `TRUE`) %>%
  ggplot(aes(condition, activated_bright*100)) +
  geom_boxplot() +
  geom_jitter(alpha = 0.5) +
  facet_grid(.~source) +
  #scale_y_log10() +
  ggsignif::geom_signif(comparisons = list(c("HD", "MM"))) +
  theme_mrl()

there is a small subset of CD56 bright NK cells with high NFkB activation and dowmodulation of CD56 bright population markers:

genes_to_plot <- seurat@assays$RNA@data[curated_cluster_markers,] %>% #marker_genes_to_plot
  as.matrix() %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename_with(~paste0("gene_",.)) %>%
  tibble::rownames_to_column("cell.id")

hm_genes <- sling_cell_data(RuNK) %>%
  dplyr::rename(cell.id = id) %>%
  left_join(meta) %>%
  mutate(curated_cluster = ifelse(rank(pseudotime) > 20000 & curated_cluster == 5, 
                                  "5_act", 
                                  curated_cluster)) %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -!matches("^gene_")) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), pct = sum(val > 0) / n() * 100)

pct_genes <- hm_genes %>%
  dplyr::select(-avg) %>%
  mutate(feat = gsub("gene_", "", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "pct") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  scale()

hm_genes <- hm_genes %>%
  dplyr::select(-pct) %>%
  mutate(feat = gsub("gene_", "", feat)) %>%
  pivot_wider(names_from = "feat",values_from = "avg") %>%
  tibble::column_to_rownames("curated_cluster") %>%
  as.matrix() %>%
  scale()



hm <- ComplexHeatmap::Heatmap(t(hm_genes),
                        col = colorRamp2(quantile(hm_genes, c(0.025,0.5,0.975)),
                                         c("royalblue", "grey", "red4")),
                        row_names_gp = gpar(fontsize = 12, fontface = "bold"),
                        column_names_gp = gpar(fontsize = 16, fontface = "bold"),
                        column_names_rot = 45,
                        column_names_centered = F,
                        column_split = 5,
                        row_split = 5,
                        column_title_gp = gpar(cex = 0, fontsize = 0),
                        row_title_gp = gpar(cex = 0, fontsize = 0),
                        heatmap_legend_param = list(title = "z-score")) 



hm <- draw(hm)

ori_col_order <- column_order(hm)
ori_row_order <- row_order(hm)

GSEA from geneset databases

main enriched genesets

gsea_results <- qread("results/NK_analysis/clustering_typing/gsea_results.qs")

comparison_pairs <- vapply(gsea_results, function(x) x[["pair"]], "character")

names(gsea_results) <- comparison_pairs


gsea_results <- gsea_results[c("Cl_3vs1", "Cl_7vs4")] %>%
  enframe("comparison", "results") 


gsea_results <- gsea_results %>% 
  mutate(db = purrr::map(results, names)) %>% 
  unnest() %>%
  dplyr::filter(! db %in% c("ranks", "pair")) %>%
  unnest(results)

KEGG

signif_gs <- gsea_results %>%
  dplyr::filter(padj < 0.05 & db == "kegg") %>%
  mutate(sign = ifelse(NES > 0, "up", "dn"),
         comparison = str_replace(comparison, "Cl_", "cluster "),
         pathway = str_replace(pathway, "KEGG_", "")) %>%
  ggplot(aes(NES, reorder(pathway, NES))) +
  geom_point(aes(size = -log10(padj), color = sign)) +
  geom_text(aes(label = pathway, hjust = ifelse(NES > 0, 1.1, -0.1))) +
  facet_wrap(comparison~db, scales = "free") +
  theme_mrl() +
  theme(aspect.ratio = 0.8,
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        legend.position = c(1,0),
        legend.justification = c(1,0)) +
  scale_color_manual(values = c("up" = "darkred", dn = "royalblue")) +
  geom_vline(xintercept = 0, linetype = "dashed")



volcano_gs <- gsea_results %>%
  dplyr::filter(db == "kegg") %>%
  mutate(sign = ifelse(padj >= 0.05, "n.s.", ifelse(NES > 0, "up", "dn")),
         comparison = str_replace(comparison, "Cl_", "cluster "),
         pathway = str_replace(pathway, "KEGG_", "") %>% str_replace_all("_", " ") %>% str_wrap(15)) %>%
  mutate(annotations = ifelse(rank(-abs(NES)) < 20 & padj < 0.05, pathway, ""), .by = comparison) %>%
  ggplot(aes(NES, -log10(pval), color = sign)) +
  geom_point() +
  geom_label_repel(aes(label = annotations),
                   force = 10,
                   xlim = c(-1.5,1.5),
                   show.legend = FALSE) +
  facet_wrap(comparison~db, scales = "free") +
  theme_mrl() +
  theme(aspect.ratio = 1.2,
        legend.position = c(1,0),
        legend.justification = c(1,0)) +
  scale_color_manual(values = c("up" = "darkred", dn = "royalblue", "n.s." = "grey")) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  guides(color = guide_legend(override.aes = list(geom = "point")))


comparison_gs <- gsea_results %>%
  dplyr::filter(db == "kegg") %>%
  group_by(pathway) %>%
  #dplyr::filter(sum(padj < 0.05) > 0) %>%
  ungroup() %>%
  dplyr::select(pathway, comparison, NES, padj) %>%
  pivot_wider(names_from = comparison, values_from = c(NES, padj)) %>%
  mutate(common = ifelse(padj_Cl_3vs1 < 0.05 & padj_Cl_7vs4 < 0.05,
                         ifelse(NES_Cl_3vs1 > 0 & NES_Cl_7vs4 >0, "up", "dn"),
                         "n.s."),
         pathway = str_replace(pathway, "KEGG_", "") %>% str_replace_all("_", " ") %>% str_wrap(15)) %>%
  mutate(annotations = ifelse(common == "n.s.", "",  pathway)) %>%
  ggplot(aes(NES_Cl_3vs1, NES_Cl_7vs4, color = common)) +
  geom_point()+
  geom_label_repel(aes(label = annotations),
                   force = 50,
                   min.segment.length = 0,
                   max.overlaps = 100,
                   show.legend = FALSE) +
  theme_mrl() +
  theme(aspect.ratio = 1.2,
        legend.position = c(0,1),
        legend.justification = c(0,1)) +
  scale_color_manual(values = c("up" = "darkred", dn = "royalblue", "n.s." = "grey")) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  guides(color = guide_legend(override.aes = list(geom = "point"))) +
  scale_x_continuous(expand = expand_scale(1)) +
  scale_y_continuous(expand = expand_scale(1))
  

plot_grid(signif_gs, plot_grid(volcano_gs, comparison_gs, nrow = 1), ncol = 1)

ggsave(file.path(manuscript_figures_path, "gsea_pair_results.pdf"), width = 20, height = 14)

Reactome

gs_db<-"react"
signif_gs <- gsea_results %>%
  dplyr::filter(padj < 0.05 & db == gs_db) %>%
  mutate(sign = ifelse(NES > 0, "up", "dn"),
         comparison = str_replace(comparison, "Cl_", "cluster "),
         pathway = str_replace(pathway, "REACTOME_", "") %>% str_replace_all("_", " ") %>% str_wrap(60)) %>%
  ggplot(aes(y = reorder(pathway, NES))) +
  geom_point(aes(NES, size = -log10(padj), color = sign)) +
  geom_text(aes(NES*0.95, label = pathway, hjust = ifelse(NES > 0, 1, -0))) +
  facet_wrap(comparison~db, scales = "free") +
  theme_mrl() +
  theme(aspect.ratio = 3,
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        legend.position = c(1,0),
        legend.justification = c(1,0)) +
  scale_color_manual(values = c("up" = "darkred", dn = "royalblue")) +
  geom_vline(xintercept = 0, linetype = "dashed")



volcano_gs <- gsea_results %>%
  dplyr::filter(db == gs_db) %>%
  mutate(sign = ifelse(padj >= 0.05, "n.s.", ifelse(NES > 0, "up", "dn")),
         comparison = str_replace(comparison, "Cl_", "cluster "),
         pathway = str_replace(pathway, "REACTOME_", "") %>% str_replace_all("_", " ") %>% str_wrap(15)) %>%
  mutate(annotations = ifelse(rank(-abs(NES)) < 20 & padj < 0.05, pathway, ""), .by = comparison) %>%
  ggplot(aes(NES, -log10(pval), color = sign)) +
  geom_point() +
  geom_label_repel(aes(label = annotations),
                   force = 10,
                   xlim = c(-1.5,1.5),
                   show.legend = FALSE) +
  facet_wrap(comparison~db, scales = "free") +
  theme_mrl() +
  theme(aspect.ratio = 1.2,
        legend.position = c(1,0),
        legend.justification = c(1,0)) +
  scale_color_manual(values = c("up" = "darkred", dn = "royalblue", "n.s." = "grey")) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  guides(color = guide_legend(override.aes = list(geom = "point")))


comparison_gs <- gsea_results %>%
  dplyr::filter(db == gs_db) %>%
  group_by(pathway) %>%
  #dplyr::filter(sum(padj < 0.05) > 0) %>%
  ungroup() %>%
  dplyr::select(pathway, comparison, NES, padj) %>%
  pivot_wider(names_from = comparison, values_from = c(NES, padj)) %>%
  mutate(common = ifelse(padj_Cl_3vs1 < 0.05 & padj_Cl_7vs4 < 0.05,
                         ifelse(NES_Cl_3vs1 > 0 & NES_Cl_7vs4 >0, "up", "dn"),
                         "n.s."),
         pathway = str_replace(pathway, "REACTOME_", "") %>% str_replace_all("_", " ") %>% str_wrap(15)) %>%
  mutate(annotations = ifelse(common == "n.s.", "",  pathway)) %>%
  ggplot(aes(NES_Cl_3vs1, NES_Cl_7vs4, color = common)) +
  geom_point()+
  geom_label_repel(aes(label = annotations),
                   force = 50,
                   min.segment.length = 0,
                   max.overlaps = 200,
                   show.legend = FALSE) +
  theme_mrl() +
  theme(aspect.ratio = 1.2,
        legend.position = c(0,1),
        legend.justification = c(0,1)) +
  scale_color_manual(values = c("up" = "darkred", dn = "royalblue", "n.s." = "grey")) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  guides(color = guide_legend(override.aes = list(geom = "point"))) +
  scale_x_continuous(expand = expand_scale(1)) +
  scale_y_continuous(expand = expand_scale(1))
  

plot_grid(signif_gs, plot_grid(volcano_gs, comparison_gs, nrow = 1), ncol = 1, rel_heights = c(2,1))

ggsave(file.path(manuscript_figures_path, "gsea_pair_results_reactome.pdf"), width = 20, height = 45)

Leading genes visualization and scoring

In this section, plots are saved directly as pdf in the figs/manuscript folder.

leading edge genes for KEGG

genes_to_plot <- gsea_results %>%
  dplyr::filter(padj < 0.05 & db == "kegg") %>%
  dplyr::select(leadingEdge) %>%
  unnest() %>%
  pull(leadingEdge) %>%
  unique()


genes_to_plot <- seurat@assays$RNA@data[genes_to_plot,] %>% #marker_genes_to_plot
  as.matrix() %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename_with(~paste0("gene_",.)) %>%
  tibble::rownames_to_column("cell.id")

# summarise to cluster level
hm_genes <- meta %>%
  dplyr::select(cell.id, curated_cluster) %>%
  left_join(genes_to_plot) %>%
  gather("feat", "val",  -c(cell.id, curated_cluster)) %>%
  group_by(feat, curated_cluster) %>%
  summarize(avg = mean(val), sd = sd(val))

Leading edge genes heatmaps per geneset

le_sets <- gsea_results %>%
  dplyr::filter(padj < 0.05 & db == "kegg") %>%
  dplyr::select(pathway, leadingEdge) %>%
  group_by(pathway) %>%
  summarize(leadingEdge = list(leadingEdge)) %>%
  mutate(leadingEdge = purrr::map(leadingEdge, ~unique(do.call(c, .)))) %>%
  deframe()


grobs_cluster <- lapply(names(le_sets), function(le) {
  
  hm_genes %>%
    ungroup() %>%
    mutate(feat = str_replace(feat, "gene_", "")) %>%
    dplyr::filter(feat %in% le_sets[[le]]) %>%
    tidyHeatmap::heatmap(
      feat,
      curated_cluster,
      avg,
      scale = "row",
      col = colorRamp2(c(-2, 0, 2),
                       c("royalblue", "grey", "red4")),
      cluster_columns = cl_dend,
      column_split = 5,
      column_title = gsub("^KEGG_","",le),
      column_title_gp = gpar(fontsize = 10),
      row_title = NA,
      row_names_gp = gpar(fontsize = 6, fontface = "bold"),
      column_names_gp = gpar(fontsize = 12, fontface = "bold"),
      column_names_rot = 45,
      column_names_centered = F,
      heatmap_legend_param = list(title = "z-score")
    ) %>%
    as_ComplexHeatmap() 
  
  
  
})

names(grobs_cluster) <- names(le_sets)

pdf(file.path(manuscript_figures_path, "leadingEdge_hm.pdf"), width = 30, height = 30)
grid.arrange(arrangeGrob(grobs = lapply(grobs_cluster, function(x) grid.grabExpr(draw(x))),
                         nrow = 4, ncol = 7))
dev.off()

per cell score on leading edges genes

Calculate per cell score with permutations and then summarise at sample level for heatmap of HC vs MM

gs_results <- AddModuleScore(seurat, le_sets, name = paste0(names(le_sets), "___GS"))@meta.data %>%
  as.data.frame() %>%
  dplyr::select(matches("___GS[0-9]+$")) 


if(identical(meta$cell.id, rownames(gs_results))) {
  
  groupings <- meta %>%
  dplyr::select(condition, source, id)

  gs_results <- aggregate(gs_results, groupings, "mean")
  
}



# per patient

kegg_hm <- gs_results %>%
  dplyr::rename_with( ~ gsub("^KEGG_|___GS[0-9]+$", "", .)) %>%
  pivot_longer(-c(condition, source, id),
               values_to = "score",
               names_to = "pathway") %>%
  group_by(source, pathway) %>%
  mutate(score = rescale(score)) %>%
  ungroup() %>%
  group_by(source) %>%
  nest() %>%
  mutate(hm = purrr::map2(
    data,
    source,
    ~ heatmap(
      .x,
      pathway,
      id,
      score,
      #scale = "row",
      column_title = .y
    ) %>% add_tile(condition)
  )) %>%
  pull(hm)



pdf(file.path(manuscript_figures_path, "patient_hm_kegg.pdf"), width = 30, height = 15)
lapply(kegg_hm, function(hm) hm %>%
    as_ComplexHeatmap() %>%
    draw(heatmap_legend_side = "left", annotation_legend_side = "left") %>%
    grid.grabExpr()) %>%
    arrangeGrob(grobs = ., nrow = 1, ncol = 2) %>%
    grid.arrange()
dev.off()




# disease level summary

kegg_hm <- gs_results %>%
  dplyr::rename_with( ~ gsub("^KEGG_|___GS[0-9]+$", "", .)) %>%
  pivot_longer(-c(condition, source, id),
               values_to = "score",
               names_to = "pathway") %>%
  group_by(pathway) %>%
  mutate(score = scale(score)) %>%
  ungroup() %>%
  group_by(source, condition, pathway) %>%
  summarise(score = mean(score)) %>%
  ungroup() %>%
  mutate(condition = paste(source, condition)) %>%
  group_by(source) %>%
  heatmap(pathway,
          condition,
          score,
          col = colorRamp2(c(-1, 0, 1),
                       c("royalblue", "grey", "red4")))



pdf(file.path(manuscript_figures_path, "disease_hm_kegg.pdf"), width = 10, height = 10)
kegg_hm %>%
    as_ComplexHeatmap() %>%
    draw(heatmap_legend_side = "left", annotation_legend_side = "left") %>%
    grid.grabExpr() %>%
    arrangeGrob(nrow = 1, ncol = 2) %>%
    grid.arrange()
dev.off()

genes_to_plot_disease <- genes_to_plot %>%
  dplyr::rename_with(~gsub("^gene_","", .)) %>%
  tibble::column_to_rownames("cell.id")

if(identical(meta$cell.id, rownames(genes_to_plot_disease))) {
  
  groupings <- meta %>%
  dplyr::select(condition, source, id)

  genes_to_plot_disease <- aggregate(genes_to_plot_disease, groupings, "mean")
  
}

genes_to_plot_disease <- genes_to_plot_disease %>%
  pivot_longer(-c(condition, source, id),
               values_to = "expr",
               names_to = "gene") %>%
  group_by(gene) %>%
  mutate(expr = scale(expr)) %>%
  ungroup() %>%
  group_by(source, condition, gene) %>%
  summarise(expr = mean(expr)) %>%
  ungroup() %>%
  mutate(condition = paste(source, condition))


grobs <- lapply(names(le_sets), function(le) {
  
  hm <- genes_to_plot_disease %>%
    ungroup() %>%
    dplyr::filter(gene %in% le_sets[[le]]) %>%
    group_by(source) %>%
    tidyHeatmap::heatmap(
      gene,
      condition,
      expr,
      col = colorRamp2(c(-1, 0, 1),
                       c("royalblue", "grey", "red4")),
      cluster_columns = FALSE,
      column_title = gsub("^KEGG_","",le),
      column_title_gp = gpar(fontsize = 0),
      row_title = NA,
      row_names_gp = gpar(fontsize = 6, fontface = "bold"),
      column_names_gp = gpar(fontsize = 12, fontface = "bold"),
      column_names_rot = 45,
      column_names_centered = F
    ) %>%
    as_ComplexHeatmap() 
  
  #cat("test")
  
    draw(grobs_cluster[[le]] + hm) %>%
    grid.grabExpr()
  
  
  
})


pdf(file.path(manuscript_figures_path, "disease_leadingEdge_hm.pdf"), width = 30, height = 30)
grid.arrange(arrangeGrob(grobs = grobs, nrow = 4, ncol = 7))
dev.off()

Table outputs

Generate excel results table for the GSEA analysis:

library(openxlsx)

xl_tables <- gsea_results %>%
  arrange(NES) %>%
  dplyr::filter(db %in% c("kegg", "react")) %>%
  unite("sheet", comparison, db) %>%
  group_by(sheet) %>%
  nest() %>%
  deframe() 

xl_tables[["common_pathways"]] <- gsea_results %>%
  dplyr::filter(db %in% c("kegg", "react")) %>%
  arrange(NES) %>%
  group_by(pathway) %>%
  dplyr::filter(sum(padj < 0.05) > 0) %>%
  ungroup() %>%
  dplyr::select(db, pathway) %>%
  distinct() %>%
  group_by(db) %>%
  mutate(rank = rank(pathway)) %>%
  pivot_wider(names_from = db, values_from = pathway) %>%
  arrange(rank) %>%
  dplyr::select(-rank)


xl_tables %>%
  write.xlsx(file.path(manuscript_figures_path, "gsea_results_2.xlsx"))

NK exploratory analysis and manuscript figures generation

PPA

2024-12-11