My data set up is as follows:
#data
topclones<- structure(list(CTaa_beta = c("CASSEGTSGGASTQYF", "CASSEGTSGGASTQYF",
"CSVEDPSSGSYEQYF", "CASSVAGPNTEAFF", "CSARDPETYEQYF", "CASSVAGPNTEAFF",
"CASSLGGLGTSTDTQYF", "CASSLRQGPSYEQYF", "CASSLGEYYGYTF", "CASSFGYTGELFF",
"CASSVGNRGGTDTQYF", "CASSELAGGQETQYF", "CSVPAGYTDTQYF", "CASSPGTAVQGYTF",
"CASSNWGGRGHTDTQYF", "CASRDSDGLAYQPQHF", "CSVPGTSTSGEQFF", "CASKPGTTSNQPQHF",
"CASSYPTSGANVLTF", "CSASTGADYNEQFF", "CSARVPTSGDYNEQFF", "CASSYPTSGANVLTF",
"CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CSVAGTGVYNEQFF", "CSVVPGGQGGYEQYF",
"CASSLEGRERYEQFF", "CASSVGLFSTDTQYF", "CASSLRGGPYNEQFF", "CASSLLAGGNNEQFF",
"CASSPLQGPSQPQHF", "CATSGRGDEVGELFF", "CSARAGESGRAMEQFF", "CASSLRQGPSYEQYF",
"CASTPAVRDGNYEQYF", "CASSPSTGYNEQFF", "CASSSGGLDEQYF", "CASSQDRGTGANVLTF",
"CASSFGTENTGELFF", "CATSGRGDEVGELFF", "CAWSVQSGGHEQYF", "CASSPGTAVQGYTF",
"CSARGGKENSPLHF", "CASSDSGGAYNEQFF", "CASSQDSGSGANVLTF", "CASSAGLAGGYEQYF",
"CASSSPGTTNEKLFF", "CASSLRGGPYNEQFF", "CASSQAKGGGETQYF"), Group = structure(c(6L,
7L, 3L, 4L, 1L, 3L, 1L, 4L, 1L, 1L, 4L, 2L, 1L, 2L, 2L, 6L, 2L,
4L, 7L, 2L, 7L, 6L, 7L, 7L, 4L, 4L, 4L, 7L, 6L, 4L, 7L, 6L, 1L,
3L, 4L, 2L, 4L, 1L, 7L, 7L, 2L, 3L, 6L, 2L, 6L, 4L, 4L, 7L, 7L
), levels = c("HC PBMC", "axSpA PBMC", "axSpA SFMC", "InEx",
"PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"), n = c(441L,
292L, 345L, 303L, 268L, 264L, 242L, 200L, 218L, 211L, 163L, 242L,
166L, 225L, 223L, 59L, 209L, 125L, 53L, 177L, 48L, 44L, 46L,
45L, 99L, 96L, 94L, 41L, 39L, 89L, 40L, 38L, 97L, 97L, 85L, 128L,
80L, 88L, 36L, 35L, 117L, 84L, 30L, 106L, 29L, 67L, 66L, 29L,
29L), Total_n_per_group = c(2770L, 2899L, 7226L, 6400L, 7081L,
7226L, 7081L, 6400L, 7081L, 7081L, 6400L, 9999L, 7081L, 9999L,
9999L, 2770L, 9999L, 6400L, 2899L, 9999L, 2899L, 2770L, 2899L,
2899L, 6400L, 6400L, 6400L, 2899L, 2770L, 6400L, 2899L, 2770L,
7081L, 7226L, 6400L, 9999L, 6400L, 7081L, 2899L, 2899L, 9999L,
7226L, 2770L, 9999L, 2770L, 6400L, 6400L, 2899L, 2899L), Percent = c(15.92,
10.07, 4.77, 4.73, 3.78, 3.65, 3.42, 3.12, 3.08, 2.98, 2.55,
2.42, 2.34, 2.25, 2.23, 2.13, 2.09, 1.95, 1.83, 1.77, 1.66, 1.59,
1.59, 1.55, 1.55, 1.5, 1.47, 1.41, 1.41, 1.39, 1.38, 1.37, 1.37,
1.34, 1.33, 1.28, 1.25, 1.24, 1.24, 1.21, 1.17, 1.16, 1.08, 1.06,
1.05, 1.05, 1.03, 1, 1)), row.names = c(NA, -49L), class = c("tbl_df",
"tbl", "data.frame"))
Essentially I have 41 unique values of "CTaa_beta" that I would plot as a bar graph, where variable "Group" is on the x-axis, variable "Percent" is on the y-axis, and variable "CTaa_beta" is the 'fill' condition in the barplot. Something like this:
#plot
topclones %>%
ggplot(aes(x=Group, y=Percent, fill = CTaa_beta)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_text(aes(label = Percent), vjust = -.5, position = position_dodge(1)) +
scale_fill_manual(values = rep(unname(alphabet2()),2), name = "CDR3b") +
xlab(NULL) +
ylab("Frequency of CDR3b (% total cells per group)") +
theme_classic() +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "bottom")
Code above results in the following plot:
As you can see, I have 41 distinct values of CTaa_beta that I need to assign colours for. I repeated the alphabet2 colour palette so that I could plot all 41, however this results in some duplicate colour assignment.
As a way to remedy this, I was suggested to use a combination of colour+pattern to make my bar graph (one of the questions I posted earlier). I have since then been following the accepted answer from here in order to make a plot where some values of CTaa_beta get assigned a colour palette and the others get assigned the same colour palette + pattern.
My data manipulation is as follows:
#assign colours to first 26 unique CTaa_beta and the remaining also get assigned the same colours
topclones$Colour<- ifelse(topclones$CTaa_beta %in% unique(topclones$CTaa_beta)[1:26], unname(alphabet2()), unname(alphabet2()))
#assign stripe column such that the first 26 unique CTaa_beta do not get a stripe but the remainder do get a stripe
topclones$Stripe <- ifelse(topclones$CTaa_beta %in% unique(topclones$CTaa_beta)[1:26], "none", "stripe")
Now, to plot this:
#plot
topclones %>%
ggplot(aes(x=Group, y=Percent, fill = CTaa_beta)) +
geom_col_pattern(color = "black", alpha = 0.7, aes(pattern = Stripe)) +
scale_pattern_identity() +
scale_fill_manual(values = topclones$Colour, name = "CDR3b") +
xlab(NULL) +
ylab("Frequency of CDR3b (% total cells per group)") +
theme_classic() +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "bottom")
Results in:
Essentially, I wanted to produce a plot where the first 26 unique values of CTaa_beta are plotted like a regular bar plot with the colours from alphabet2, and the remaining values also get plotted with the colours from alphabet2 but also contain a stripe pattern as a way to distinguish the values of CTaa_beta. Any suggestions to graph it in this manner would be greatly welcome!
My data set up is as follows:
#data
topclones<- structure(list(CTaa_beta = c("CASSEGTSGGASTQYF", "CASSEGTSGGASTQYF",
"CSVEDPSSGSYEQYF", "CASSVAGPNTEAFF", "CSARDPETYEQYF", "CASSVAGPNTEAFF",
"CASSLGGLGTSTDTQYF", "CASSLRQGPSYEQYF", "CASSLGEYYGYTF", "CASSFGYTGELFF",
"CASSVGNRGGTDTQYF", "CASSELAGGQETQYF", "CSVPAGYTDTQYF", "CASSPGTAVQGYTF",
"CASSNWGGRGHTDTQYF", "CASRDSDGLAYQPQHF", "CSVPGTSTSGEQFF", "CASKPGTTSNQPQHF",
"CASSYPTSGANVLTF", "CSASTGADYNEQFF", "CSARVPTSGDYNEQFF", "CASSYPTSGANVLTF",
"CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CSVAGTGVYNEQFF", "CSVVPGGQGGYEQYF",
"CASSLEGRERYEQFF", "CASSVGLFSTDTQYF", "CASSLRGGPYNEQFF", "CASSLLAGGNNEQFF",
"CASSPLQGPSQPQHF", "CATSGRGDEVGELFF", "CSARAGESGRAMEQFF", "CASSLRQGPSYEQYF",
"CASTPAVRDGNYEQYF", "CASSPSTGYNEQFF", "CASSSGGLDEQYF", "CASSQDRGTGANVLTF",
"CASSFGTENTGELFF", "CATSGRGDEVGELFF", "CAWSVQSGGHEQYF", "CASSPGTAVQGYTF",
"CSARGGKENSPLHF", "CASSDSGGAYNEQFF", "CASSQDSGSGANVLTF", "CASSAGLAGGYEQYF",
"CASSSPGTTNEKLFF", "CASSLRGGPYNEQFF", "CASSQAKGGGETQYF"), Group = structure(c(6L,
7L, 3L, 4L, 1L, 3L, 1L, 4L, 1L, 1L, 4L, 2L, 1L, 2L, 2L, 6L, 2L,
4L, 7L, 2L, 7L, 6L, 7L, 7L, 4L, 4L, 4L, 7L, 6L, 4L, 7L, 6L, 1L,
3L, 4L, 2L, 4L, 1L, 7L, 7L, 2L, 3L, 6L, 2L, 6L, 4L, 4L, 7L, 7L
), levels = c("HC PBMC", "axSpA PBMC", "axSpA SFMC", "InEx",
"PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"), n = c(441L,
292L, 345L, 303L, 268L, 264L, 242L, 200L, 218L, 211L, 163L, 242L,
166L, 225L, 223L, 59L, 209L, 125L, 53L, 177L, 48L, 44L, 46L,
45L, 99L, 96L, 94L, 41L, 39L, 89L, 40L, 38L, 97L, 97L, 85L, 128L,
80L, 88L, 36L, 35L, 117L, 84L, 30L, 106L, 29L, 67L, 66L, 29L,
29L), Total_n_per_group = c(2770L, 2899L, 7226L, 6400L, 7081L,
7226L, 7081L, 6400L, 7081L, 7081L, 6400L, 9999L, 7081L, 9999L,
9999L, 2770L, 9999L, 6400L, 2899L, 9999L, 2899L, 2770L, 2899L,
2899L, 6400L, 6400L, 6400L, 2899L, 2770L, 6400L, 2899L, 2770L,
7081L, 7226L, 6400L, 9999L, 6400L, 7081L, 2899L, 2899L, 9999L,
7226L, 2770L, 9999L, 2770L, 6400L, 6400L, 2899L, 2899L), Percent = c(15.92,
10.07, 4.77, 4.73, 3.78, 3.65, 3.42, 3.12, 3.08, 2.98, 2.55,
2.42, 2.34, 2.25, 2.23, 2.13, 2.09, 1.95, 1.83, 1.77, 1.66, 1.59,
1.59, 1.55, 1.55, 1.5, 1.47, 1.41, 1.41, 1.39, 1.38, 1.37, 1.37,
1.34, 1.33, 1.28, 1.25, 1.24, 1.24, 1.21, 1.17, 1.16, 1.08, 1.06,
1.05, 1.05, 1.03, 1, 1)), row.names = c(NA, -49L), class = c("tbl_df",
"tbl", "data.frame"))
Essentially I have 41 unique values of "CTaa_beta" that I would plot as a bar graph, where variable "Group" is on the x-axis, variable "Percent" is on the y-axis, and variable "CTaa_beta" is the 'fill' condition in the barplot. Something like this:
#plot
topclones %>%
ggplot(aes(x=Group, y=Percent, fill = CTaa_beta)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_text(aes(label = Percent), vjust = -.5, position = position_dodge(1)) +
scale_fill_manual(values = rep(unname(alphabet2()),2), name = "CDR3b") +
xlab(NULL) +
ylab("Frequency of CDR3b (% total cells per group)") +
theme_classic() +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "bottom")
Code above results in the following plot:
As you can see, I have 41 distinct values of CTaa_beta that I need to assign colours for. I repeated the alphabet2 colour palette so that I could plot all 41, however this results in some duplicate colour assignment.
As a way to remedy this, I was suggested to use a combination of colour+pattern to make my bar graph (one of the questions I posted earlier). I have since then been following the accepted answer from here in order to make a plot where some values of CTaa_beta get assigned a colour palette and the others get assigned the same colour palette + pattern.
My data manipulation is as follows:
#assign colours to first 26 unique CTaa_beta and the remaining also get assigned the same colours
topclones$Colour<- ifelse(topclones$CTaa_beta %in% unique(topclones$CTaa_beta)[1:26], unname(alphabet2()), unname(alphabet2()))
#assign stripe column such that the first 26 unique CTaa_beta do not get a stripe but the remainder do get a stripe
topclones$Stripe <- ifelse(topclones$CTaa_beta %in% unique(topclones$CTaa_beta)[1:26], "none", "stripe")
Now, to plot this:
#plot
topclones %>%
ggplot(aes(x=Group, y=Percent, fill = CTaa_beta)) +
geom_col_pattern(color = "black", alpha = 0.7, aes(pattern = Stripe)) +
scale_pattern_identity() +
scale_fill_manual(values = topclones$Colour, name = "CDR3b") +
xlab(NULL) +
ylab("Frequency of CDR3b (% total cells per group)") +
theme_classic() +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "bottom")
Results in:
Essentially, I wanted to produce a plot where the first 26 unique values of CTaa_beta are plotted like a regular bar plot with the colours from alphabet2, and the remaining values also get plotted with the colours from alphabet2 but also contain a stripe pattern as a way to distinguish the values of CTaa_beta. Any suggestions to graph it in this manner would be greatly welcome!
You can combine the fill and pattern aesthetics in the usual way using `scale_xxx_manual` and assigning the same "name" and "values". And to group-sort the legend (no stripe / stripe), we need to convert the CTaa_beta values from character to factor:
library(ggplot2)
library(ggpattern)
library(pals)
topclones <- mutate(topclones, CTaa_beta=factor(CTaa_beta,
levels=unique(CTaa_beta)))
fill_vals <- setNames(topclones$Colour, topclones$CTaa_beta)
patt_vals <- setNames(topclones$Stripe, topclones$CTaa_beta)
topclones %>%
ggplot(aes(x=Group, y=Percent,
fill = CTaa_beta,
pattern = CTaa_beta)) +
geom_col_pattern(color = "black", alpha = 0.7,
pattern_key_scale_factor=0.5) + # RStudio issue
theme_minimal() +
scale_fill_manual(name = "CDR3b", values = fill_vals) +
scale_pattern_manual(name = "CDR3b", values = patt_vals) +
xlab(NULL) +
ylab("Frequency of CDR3b (% total cells per group)") +
theme_classic() +
theme(legend.position = "bottom")
This solution is more complex than I would like, but it is relatively generalisable so it should be able to be applied to other datasets.
Ensuring that so many values are distinguishable in a single plot is difficult, ill-advised even. You could make an argument that displaying your data in a table would be better. That aside, the following is a potential workflow to apply to your data. There are three main ideas behind the design process:
ggplot2
does not natively support individual legends, so individual scaled plots for each Group will be patchwork
ed together. There are multiple workarounds to achieve individual legends, but I find patchwork
to be the easiest to understandgeom_col()
scales the bar widths relative to the number of bars, which makes the bars in groups with fewer values wider. This visually give the impression that these values are larger/more important even though the data are discrete. You can use preserve = "single"
to ensure equal bar widths, but scaling each plot width will also achieve thisFirst, load required packages, generate unique pattern/colour combinations for each duplicated CTaa_beta value, and assign colourramps to each Group:
library(dplyr)
library(scales)
library(colorspace)
library(ggplot2)
library(ggpattern)
library(patchwork)
# Remove unused factor level
topclones$Group <- droplevels(topclones$Group)
# Create df for defining unique patterns for each duplicated value in CTaa_beta
p_style <- topclones |>
mutate(grp_n = n(), .by = Group) |>
filter(n() > 1, .by = CTaa_beta) |>
select(CTaa_beta, Group, grp_n) |>
mutate(
pattern = c("stripe", "crosshatch")[(dense_rank(CTaa_beta) %% 2) + 1],
spacing = 0.075 * (min(grp_n) / grp_n),
key_scale = grp_n * (0.075 / min(grp_n)),
angle = seq(0, 80, length.out = n_distinct(CTaa_beta))
[match(CTaa_beta, unique(CTaa_beta))]
) |>
mutate(
pattern_colour = c("black", "white")[(dense_rank(CTaa_beta) %% 2) + 1],
.by = pattern
)
# Sort topclones, join pattern style df
topclones <- topclones |>
arrange(Group, CTaa_beta) |>
left_join(p_style, by = join_by(Group, CTaa_beta))
# Generate min and max colour hues for each Group based on hue_pal()
colour1 <- darken(hue_pal()(length(unique(topclones$Group))), 0.5)
colour2 <- lighten(hue_pal()(length(unique(topclones$Group))), 0.5)
# Initialise colours column, loop through groups, assign colourramps
topclones$colours <- NA
ct <- 0
for (i in unique(topclones$Group)) {
ct <- ct + 1
n <- length(unique(topclones$CTaa_beta[topclones$Group == i]))
groups <- unique(topclones$CTaa_beta[topclones$Group == i])
group_colours <-
setNames(colorRampPalette(c(colour1[ct], colour2[ct]))(n), groups)
topclones$colours[topclones$Group == i] <-
group_colours[topclones$CTaa_beta[topclones$Group == i]]
}
Examine one of the groups with CTaa_beta values duplicated in other groups:
data.frame(filter(topclones, Group == "axSpA SFMC"))
# CTaa_beta Group n Total_n_per_group Percent grp_n pattern spacing key_scale angle pattern_colour colours
# 1 CASSLRQGPSYEQYF axSpA SFMC 97 7226 1.34 4 crosshatch 0.075 0.075 22.85714 black #005B17
# 2 CASSPGTAVQGYTF axSpA SFMC 84 7226 1.16 4 stripe 0.075 0.075 34.28571 black #1F8A35
# 3 CASSVAGPNTEAFF axSpA SFMC 264 7226 3.65 4 crosshatch 0.075 0.075 11.42857 white #3EBA53
# 4 CSVEDPSSGSYEQYF axSpA SFMC 345 7226 4.77 NA <NA> NA NA NA <NA> #5EEA72
The pattern, spacing, key_scale, angle, and pattern_colour variables are from the p_style df, and colours from the for()
loop. Defining pattern parameters is as much an art as it is a science. You could manually assign pattern parameters in the p_style df. However, to ensure the patterns are the same across each plot, the spacing and key_scale values must be derived from the grp_n values.
Now create a list of plots, one for each Group, derive maximum y-axis value for 'tidy' axis breaks, and return vector for scaling each plot width:
# Generate list of plots for each Group
p_tmp <- lapply(unique(topclones$Group), \(x) {
tmp <- topclones |> filter(Group == x)
ggplot(tmp, aes(CTaa_beta, Percent)) +
geom_col_pattern(
aes(pattern = CTaa_beta,
fill = CTaa_beta,
pattern_angle = CTaa_beta,
pattern_spacing = CTaa_beta,
pattern_key_scale_factor = CTaa_beta,
pattern_fill = CTaa_beta),
pattern_colour = NA,
width = 1) +
scale_pattern_manual(name = "", values = tmp$pattern) +
scale_fill_manual(name = "", values = tmp$colours) +
scale_pattern_angle_manual(name = "", values = tmp$angle) +
scale_pattern_spacing_manual(name = "", values = tmp$spacing) +
scale_pattern_key_scale_factor_manual(name = "", values = tmp$key_scale) +
scale_pattern_fill_manual(name = "", values = tmp$pattern_colour) +
labs(x = x)
})
# Get rounded up max summed Percent for y-axis (in case of > 1 value per Group)
max_y <- ceiling(
max(aggregate(Percent ~ Group + CTaa_beta,
data = topclones,
FUN = sum)$Percent)
)
# Get vector of Group lengths for plot widths
pw <- topclones |>
summarise(n = n_distinct(CTaa_beta), .by = Group) |>
pull(n)
Finally, combine all plots from p_tmp. There is an issue with ggpattern
regarding long axes labels if the plot window is too small to accommodate them. Also, plotting pattern geoms can be very slow. To get around this, you can create a plot object in your environment and ggsave()
it to view it. I have include the ggave()
used to save the example plot below as the result is dependent on the width and height parameters (note that the example has been scaled to reduce its size).
p <- wrap_plots(p_tmp,
nrow = 1,
axes = "collect_y") +
plot_layout(widths = unit(pw, rep("cm", length(pw))),
heights = unit(rep(16, length(pw)), rep("cm", length(pw)))) &
geom_text(aes(label = Percent), vjust = -.5, position = position_dodge(1)) &
scale_y_continuous(limits = c(0, max_y),
breaks = seq(0, max_y, length.out = 5)) &
guides(pattern = guide_legend(nrow = 8),
fill = guide_legend(nrow = 8),
pattern_angle = guide_legend(nrow = 8),
pattern_spacing = guide_legend(nrow = 8),
pattern_colour = guide_legend(nrow = 8)) &
ylab("Frequency of CDR3b (% total cells per group)") &
coord_cartesian(expand = FALSE,
clip = "off") &
theme_classic() &
theme(axis.text = element_text(size = 18),
axis.title = element_text(size = 18),
axis.title.x = element_text(vjust = -1),
axis.ticks.x = element_blank(),
legend.text = element_text(size = 10),
legend.position = "bottom",
legend.justification = c(0.5, 0),
legend.key.spacing.y = unit(0, "mm"),
axis.text.x = element_blank())
ggsave("CTaa_beta_plot.jpg",
p,
width = 21,
height = 9.5,
dpi = 150)
Rather than add to what is already a long answer, posting a separate answer here as an alternative to my previous solution to this question. This IMHO represents your data, and the connection between the duplicated CTaa_beta values across groups, in a less visually 'cluttered'/much clearer way.
It involves using ggplot2::geom_point()
in place of patterns. Aside from less clutter, points do not require scaling the parameters for individual plot/legend pairings.
First, load required packages, generate unique point shapes for each duplicated CTaa_beta value, and assign colourramps to each Group:
library(dplyr)
library(scales)
library(colorspace)
library(ggplot2)
library(patchwork)
# Remove unused factor level
topclones$Group <- droplevels(topclones$Group)
# Create tibble defining unique point shapes for each duplicated value in CTaa_beta
set.seed(42)
styles <- topclones |>
filter(n() > 1, .by = CTaa_beta) |>
select(CTaa_beta, Group) |>
mutate(point_shape = sample(0:25, n_distinct(CTaa_beta)),
.by = CTaa_beta)
# Sort topclones, join styles to topclones
topclones <- topclones |>
arrange(Group, CTaa_beta) |>
left_join(styles, by = join_by(Group, CTaa_beta))
# Generate min and max colour hues for each Group based on hue_pal()
colour1 <- darken(hue_pal()(length(unique(topclones$Group))), 0.5)
colour2 <- lighten(hue_pal()(length(unique(topclones$Group))), 0.5)
# Initialise colours column, loop through groups, assign colourramps
topclones$colours <- NA
ct <- 0
for (i in unique(topclones$Group)) {
ct <- ct + 1
n <- length(unique(topclones$CTaa_beta[topclones$Group == i]))
groups <- unique(topclones$CTaa_beta[topclones$Group == i])
group_colours <-
setNames(colorRampPalette(c(colour1[ct], colour2[ct]))(n), groups)
topclones$colours[topclones$Group == i] <-
group_colours[topclones$CTaa_beta[topclones$Group == i]]
}
Examine one of the groups with CTaa_beta values duplicated in other groups:
data.frame(filter(topclones, Group == "axSpA SFMC"))
# CTaa_beta Group n Total_n_per_group Percent point_shape colours
# 1 CASSLRQGPSYEQYF axSpA SFMC 97 7226 1.34 0 #005B17
# 2 CASSPGTAVQGYTF axSpA SFMC 84 7226 1.16 24 #1F8A35
# 3 CASSVAGPNTEAFF axSpA SFMC 264 7226 3.65 4 #3EBA53
# 4 CSVEDPSSGSYEQYF axSpA SFMC 345 7226 4.77 NA #5EEA72
The point_shape values correspond to R point shape codes, and the colours values are from the for()
loop.
Now create a list of plots, one for each Group, derive maximum y-axis value for 'tidy' axis breaks, and return vector for scaling the width of each plot:
# Generate list of plots for each Group
p_tmp <- lapply(unique(topclones$Group), \(x) {
tmp <- topclones |> filter(Group == x)
ggplot(tmp) +
geom_col(aes(CTaa_beta, Percent, fill = CTaa_beta),
width = 1) +
geom_point(aes(CTaa_beta, 0.25, shape = CTaa_beta),
size = 2,
stroke = 1,
colour = "white") +
labs(x = x) +
scale_fill_manual(name = "", values = tmp$colours) +
scale_shape_manual(name = "", values = tmp$point_shape)
})
# Get rounded up max summed Percent for y-axis (in case of > 1 CTaa_beta value per Group)
max_y <- ceiling(max(aggregate(Percent ~ Group + CTaa_beta,
data = topclones,
FUN = sum)$Percent))
# Get vector of Group lengths for plot widths
pw <- topclones |>
summarise(n = n_distinct(CTaa_beta), .by = Group) |>
pull(n)
Finally, combine all plots from p_tmp. As there are NA values in the point_shape column, ggplot()
will throw warnings letting you know the corresponding points were not plotted. To avoid the warnings, you can write the plot object to your environment and wrap ggsave()
inside suppressWarnings()
.
I have included the ggave()
used to save the example plot below as the result is dependent on the width and height parameters (note that the example has been scaled to reduce its size).
p <- wrap_plots(p_tmp,
nrow = 1,
axes = "collect_y") +
plot_layout(widths = unit(pw, rep("cm", length(pw))),
heights = unit(rep(16, length(pw)), rep("cm", length(pw)))) &
geom_text(aes(CTaa_beta, Percent, label = Percent),
vjust = -.5,
position = position_dodge(1)) &
scale_y_continuous(limits = c(0, max_y),
breaks = seq(0, max_y, length.out = 5)) &
guides(fill = guide_legend(nrow = 8),
shape = guide_legend(override.aes = list(size = 1, stroke = 0.75))) &
ylab("Frequency of CDR3b (% total cells per group)") &
coord_cartesian(expand = FALSE,
clip = "off") &
theme_classic() &
theme(axis.text = element_text(size = 18),
axis.title = element_text(size = 18),
axis.title.x = element_text(vjust = -1),
axis.ticks.x = element_blank(),
legend.text = element_text(size = 10),
legend.position = "bottom",
legend.justification = c(0.5, 0),
legend.key.spacing.y = unit(0, "mm"),
axis.text.x = element_blank())
suppressWarnings(
ggsave("C:/test/gradient_colours_by_group_no_thatching.jpg",
p,
width = 21,
height = 9.5,
dpi = 150)
)
An argument could be made for a separate point shape legend, but this would add more clutter to an already complex plot. Further, I believe (hope?) the target audience for this plot will be able to infer the point shape/CTaa_beta relationship.