R/dist_calc_seq.R
dist_calc_seq.Rd
Calculate distances between sequential samples in ps_extra/phyloseq object
dist_calc_seq(
data,
dist,
group,
seq,
unequal = "warn",
start_value = NaN,
return = "data",
var_name = paste0(dist, "_DistFromLast")
)
psExtra object, e.g. output from tax_transform()
name of distance to calculate between pairs of sequential samples
name of variable in phyloseq sample_data used to define groups of samples
name of variable in phyloseq sample_data used to define order of samples within groups
"error" or "warn" or "ignore" if groups of samples, defined by group argument, are of unequal size
value returned for the first sample in each group, which has no preceding sample in the group's sequence, and so has no obvious value
format of return object: "data" returns psExtra with sorted samples and additional variable. "vector" returns only named vector of sequential distances.
name of variable created in psExtra if return arg = "data"
psExtra object sorted and with new sequential distance variable or a named vector of that variable
library(ggplot2)
library(dplyr)
data("dietswap", package = "microbiome")
pseq <- dietswap %>%
tax_transform("identity", rank = "Genus") %>%
dist_calc_seq(
dist = "aitchison", group = "subject", seq = "timepoint",
# group sizes are unequal because some subjects are missing a timepoint
unequal = "ignore"
)
#> Warning: ps_split may drop different taxa per group with ps_filter
pseq %>%
samdat_tbl() %>%
dplyr::select(1, subject, timepoint, dplyr::last_col())
#> # A tibble: 222 × 4
#> .sample_name subject timepoint aitchison_DistFromLast
#> <chr> <fct> <int> <dbl>
#> 1 Sample-32 azh 1 NaN
#> 2 Sample-109 azh 2 9.27
#> 3 Sample-119 azh 3 11.2
#> 4 Sample-130 azh 4 9.19
#> 5 Sample-140 azh 5 5.12
#> 6 Sample-41 azh 6 5.30
#> 7 Sample-61 azl 1 NaN
#> 8 Sample-93 azl 2 7.99
#> 9 Sample-106 azl 3 8.44
#> 10 Sample-78 azl 4 8.24
#> # ℹ 212 more rows
# ggplot heatmap - unsorted
pseq %>%
samdat_tbl() %>%
filter(timepoint != 1) %>%
ggplot(aes(x = timepoint, y = subject)) +
geom_tile(aes(fill = aitchison_DistFromLast)) +
scale_fill_viridis_c(na.value = NA, name = "dist") +
theme_minimal(base_line_size = NA) +
scale_y_discrete(limits = rev(levels(samdat_tbl(pseq)$subject)))
# ComplexHeatmap plotting with clustering #
library(tidyr)
library(ComplexHeatmap)
# make data matrix
heatmat <- pseq %>%
samdat_tbl() %>%
filter(timepoint != 1) %>%
pivot_wider(
id_cols = subject,
names_from = timepoint, names_prefix = "t",
values_from = aitchison_DistFromLast
) %>%
tibble::column_to_rownames("subject")
heatmat <- as.matrix.data.frame(heatmat)
heatmap <- Heatmap(
name = "dist",
matrix = heatmat, col = viridisLite::viridis(12), na_col = "white",
cluster_columns = FALSE,
cluster_rows = hclust(dist(heatmat), method = "ward.D"),
width = unit(1.5, "in"), rect_gp = gpar(col = "black"),
row_names_side = "left", row_names_gp = gpar(fontsize = 8)
)
heatmap
# comparison with subject tracking on PCA
pca <- pseq %>%
# already sorted data
dist_calc("aitchison") %>%
ord_calc("PCoA") %>%
ord_plot(alpha = 0.1, shape = "nationality", size = 2) %>%
add_paths(
mapping = aes(colour = subject, alpha = timepoint, size = timepoint),
id_var = "subject", id_values = c(
"eve", "hsf", # low variation
"vem", # medium
"ufm", # high variation
"pku" # starts high
)
) +
scale_alpha_continuous(range = c(0.3, 1), breaks = c(2, 4, 6)) +
scale_size_continuous(range = c(1, 2), breaks = c(2, 4, 6))
heatmap
pca