一、前言
上篇博文《Funky heatmap - 从初探到进阶 (初级版)》我们简单的探索了一下funky heatmap是怎么出图的。但很多时候我们总会觉得它自动生成的图这不满意那有点缺陷的,总在想能不能给他美化一下。这篇我们就来讲讲具体该怎么对图的细节进行自定义。
二、进阶版介绍
我就不多说废话了,直接开始。
在进行修改前,我们需要创建一个新的Rscript文件,用于存放我们修改过的函数:
# 这里放修改好的代码
# 代码1:
# 代码2:
# ...
# 最后我们需要把修改好的函数加载到我们的环境中,让funkyheatmap包执行我们修改的函数而不是它默认的函数
assignInNamespace("function name", function_name, ns = "funkyheatmap")
我们在这个新的Rscript中每做一次修改,你都把这个Rscript运行一遍,更新函数。
接着我们来了解它这个funky_heatmap()函数是怎么工作的。我们按CTRL + 鼠标左键,点击funky_heatmap()函数去查看它的代码:
function (data, column_info = NULL, row_info = NULL, column_groups = NULL,
row_groups = NULL, palettes = NULL, legends = NULL, position_args = position_arguments(),
scale_column = TRUE, add_abc = TRUE, col_annot_offset, col_annot_angle,
expand)
{
data <- verify_data(data)
column_info <- verify_column_info(column_info, data)
row_info <- verify_row_info(row_info, data)
column_groups <- verify_column_groups(column_groups, column_info)
row_groups <- verify_row_groups(row_groups, row_info)
palettes <- verify_palettes(palettes, column_info, data)
legends <- verify_legends(legends, palettes, column_info,
data)
if (!missing(col_annot_offset)) {
warning("Argument `col_annot_offset` is deprecated. Use `position_arguments(col_annot_offset = ...)` instead.")
position_args$col_annot_offset <- col_annot_offset
}
if (!missing(col_annot_angle)) {
warning("Argument `col_annot_angle` is deprecated. Use `position_arguments(col_annot_angle = ...)` instead.")
position_args$col_annot_angle <- col_annot_angle
}
if (!missing(expand)) {
warning("Argument `expand` is deprecated. Use `position_arguments(expand_* = ...)` instead.")
for (name in names(expand)) {
position_args[[paste0("expand_", name)]] <- expand[[name]]
}
}
geom_positions <- calculate_geom_positions(data, column_info,
row_info, column_groups, row_groups, palettes, position_args,
scale_column, add_abc)
main_plot <- compose_ggplot(geom_positions, position_args)
geom_legend_funs <- list(funkyrect = create_funkyrect_legend,
circle = create_circle_legend, rect = create_rect_legend,
pie = create_pie_legend, text = create_text_legend)
legend_plots <- list()
for (legend in legends) {
if (legend$enabled) {
legend_fun <- geom_legend_funs[[legend$geom]]
legend_args <- legend
legend_args$geom <- NULL
legend_args$enabled <- NULL
legend_args$palette <- NULL
legend_args$position_args <- position_args
legend_plot <- do.call(legend_fun, legend_args)
legend_plots <- c(legend_plots, list(legend_plot))
}
}
if (length(legend_plots) == 0) {
return(main_plot)
}
legend_widths <- map_dbl(legend_plots, ~.x$width)
legend_heights <- map_dbl(legend_plots, ~.x$height)
heights <- main_plot$height
width <- main_plot$width
heights <- c(heights, 0.1, max(legend_heights))
width <- max(width, sum(legend_widths))
out <- patchwork::wrap_plots(main_plot, patchwork::plot_spacer(),
patchwork::wrap_plots(legend_plots, nrow = 1, widths = legend_widths),
ncol = 1, heights = heights)
out$width <- width
out$height <- sum(heights)
out
}
这里包含了多个子函数,比如verify_column_info(), verify_row_info(), verify_column_groups()等等,这些我们都不管,因为这些都不是作图的,可以理解为作者对你输入数据的检查,如果哪里出问题它会给出error信息。funky_heatmap()函数的作图核心函数在于compose_ggplot(),这是一个封装好了的基于ggplot的函数,我们这次自定义作图需要做出修改的就是这个函数。
同样地,点进这个函数查看其代码:
function (geom_positions, position_args)
{
g <- ggplot() + coord_equal(expand = FALSE) + scale_alpha_identity() +
scale_colour_identity() + scale_fill_identity() + scale_size_identity() +
scale_linewidth_identity() + scale_linetype_identity() +
cowplot::theme_nothing()
row_pos <- (geom_positions$row_pos %||% tibble(colour_background = logical(0))) %>%
filter(.data$colour_background)
if (nrow(row_pos) > 0) {
g <- g + geom_rect(aes(xmin = min(geom_positions$column_pos$xmin) -
0.25, xmax = max(geom_positions$column_pos$xmax) +
0.25, ymin = .data$ymin - (geom_positions$viz_params$row_space/2),
ymax = .data$ymax + (geom_positions$viz_params$row_space/2)),
row_pos, fill = "#DDDDDD")
}
if (nrow(geom_positions$segment_data %||% tibble()) > 0) {
geom_positions$segment_data <- geom_positions$segment_data %>%
add_column_if_missing(size = 0.5, colour = "black",
linetype = "solid")
g <- g + geom_segment(aes(x = .data$x, xend = .data$xend,
y = .data$y, yend = .data$yend, linewidth = .data$size,
colour = .data$colour, linetype = .data$linetype),
geom_positions$segment_data)
}
if (nrow(geom_positions$rect_data %||% tibble()) > 0) {
geom_positions$rect_data <- geom_positions$rect_data %>%
add_column_if_missing(alpha = 1, border = TRUE,
border_colour = "black") %>% mutate(border_colour = ifelse(.data$border,
.data$border_colour, NA_character_))
g <- g + geom_rect(aes(xmin = .data$xmin, xmax = .data$xmax,
ymin = .data$ymin, ymax = .data$ymax, fill = .data$colour,
colour = .data$border_colour, alpha = .data$alpha),
geom_positions$rect_data, linewidth = 0.25)
}
if (nrow(geom_positions$circle_data %||% tibble()) > 0) {
g <- g + ggforce::geom_circle(aes(x0 = .data$x0, y0 = .data$y0,
fill = .data$colour, r = .data$r), geom_positions$circle_data,
linewidth = 0.25)
}
if (nrow(geom_positions$funkyrect_data %||% tibble()) >
0) {
g <- g + geom_rounded_rect(aes(xmin = .data$xmin, xmax = .data$xmax,
ymin = .data$ymin, ymax = .data$ymax, radius = .data$corner_size,
fill = .data$colour), geom_positions$funkyrect_data,
size = 0.25, colour = "black")
}
if (nrow(geom_positions$pie_data %||% tibble()) > 0) {
g <- g + ggforce::geom_arc_bar(aes(x0 = .data$x0, y0 = .data$y0,
r0 = .data$r0, r = .data$r, start = .data$rad_start,
end = .data$rad_end, fill = .data$colour), data = geom_positions$pie_data,
linewidth = 0.25)
}
if (nrow(geom_positions$img_data %||% tibble()) > 0) {
if (!requireNamespace("magick", quietly = TRUE)) {
cli_alert_warning("Package `magick` is required to draw images. Skipping columns with geom == \"image\".")
}
else {
for (r in seq_len(nrow(geom_positions$img_data))) {
image <- geom_positions$img_data[[r, "path"]]
if (!inherits(image, "magick-image")) {
if (is.character(image)) {
assert_that(file.exists(image), msg = paste0("Image '",
image, "' does not exist."))
}
image <- magick::image_read(image)
}
g <- g + cowplot::draw_image(image = image,
x = geom_positions$img_data[[r, "xmin"]],
y = geom_positions$img_data[[r, "ymin"]],
width = geom_positions$img_data[[r, "width"]],
height = geom_positions$img_data[[r, "height"]])
}
}
}
if (nrow(geom_positions$text_data %||% tibble()) > 0) {
geom_positions$text_data <- geom_positions$text_data %>%
add_column_if_missing(hjust = 0.5, vjust = 0.5,
size = 4, fontface = "plain", colour = "black",
lineheight = 1, angle = 0) %>% mutate(angle2 = .data$angle/360 *
2 * pi, cosa = cos(.data$angle2) %>% round(2), sina = sin(.data$angle2) %>%
round(2), alphax = ifelse(.data$cosa < 0, 1 - .data$hjust,
.data$hjust) * abs(.data$cosa) + ifelse(.data$sina >
0, 1 - .data$vjust, .data$vjust) * abs(.data$sina),
alphay = ifelse(.data$sina < 0, 1 - .data$hjust,
.data$hjust) * abs(.data$sina) + ifelse(.data$cosa <
0, 1 - .data$vjust, .data$vjust) * abs(.data$cosa),
x = (1 - .data$alphax) * .data$xmin + .data$alphax *
.data$xmax, y = (1 - .data$alphay) * .data$ymin +
.data$alphay * .data$ymax) %>% filter(.data$label_value !=
"")
g <- g + geom_text(aes(x = .data$x, y = .data$y, label = .data$label_value,
colour = .data$colour, hjust = .data$hjust, vjust = .data$vjust,
size = .data$size, fontface = .data$fontface, angle = .data$angle),
data = geom_positions$text_data)
}
if (is.null(geom_positions$bounds)) {
geom_positions$bounds <- compute_bounds(row_pos = geom_positions$row_pos,
column_pos = geom_positions$column_pos, segment_data = geom_positions$segment_data,
rect_data = geom_positions$rect_data, circle_data = geom_positions$circle_data,
funkyrect_data = geom_positions$funkyrect_data,
pie_data = geom_positions$pie_data, text_data = geom_positions$text_data)
}
minimum_x <- geom_positions$bounds$minimum_x - (position_args$expand_xmin %||%
0)
maximum_x <- geom_positions$bounds$maximum_x + (position_args$expand_xmax %||%
0)
minimum_y <- geom_positions$bounds$minimum_y - (position_args$expand_ymin %||%
0)
maximum_y <- geom_positions$bounds$maximum_y + (position_args$expand_ymax %||%
0)
g <- g + expand_limits(x = c(minimum_x, maximum_x), y = c(minimum_y,
maximum_y))
g$minimum_x <- minimum_x
g$maximum_x <- maximum_x
g$minimum_y <- minimum_y
g$maximum_y <- maximum_y
g$width <- (maximum_x - minimum_x)/4
g$height <- (maximum_y - minimum_y)/4
g
}
我们发现作者用ggplot做了很多sub-plot,然后通过cowplot进行整合。整体过一遍这个代码,我的思路就是:我要自定义图的哪部分,就去修改对应部分的作图代码。我自定义的需求是:1. 像示意图那样,把column的label移动到图的下方,90度显示。2. 还想修改一些细节,比如,隐藏坐标轴的ticks。那么我们就来找对应的代码部分:
1. 这段是修改背景那个灰白相间的表格图的代码:
if (nrow(row_pos) > 0) {
g <- g + geom_rect(aes(xmin = min(geom_positions$column_pos$xmin) -
0.25, xmax = max(geom_positions$column_pos$xmax) +
0.25, ymin = .data$ymin - (geom_positions$viz_params$row_space/2),
ymax = .data$ymax + (geom_positions$viz_params$row_space/2)),
row_pos, fill = "#DDDDDD")
}
2. 这段是修改坐标轴,各种分割线的代码:
if (nrow(geom_positions$segment_data %||% tibble()) > 0) {
geom_positions$segment_data <- geom_positions$segment_data %>%
add_column_if_missing(size = 0.5, colour = "black",
linetype = "solid")
g <- g + geom_segment(aes(x = .data$x, xend = .data$xend,
y = .data$y, yend = .data$yend, linewidth = .data$size,
colour = .data$colour, linetype = .data$linetype),
geom_positions$segment_data)
}
3. 这段是修改图上rect的代码,目前测试下来可以修改分组的色块和条形图:
if (nrow(geom_positions$rect_data %||% tibble()) > 0) {
geom_positions$rect_data <- geom_positions$rect_data %>%
add_column_if_missing(alpha = 1, border = TRUE,
border_colour = "black") %>% mutate(border_colour = ifelse(.data$border,
.data$border_colour, NA_character_))
g <- g + geom_rect(aes(xmin = .data$xmin, xmax = .data$xmax,
ymin = .data$ymin, ymax = .data$ymax, fill = .data$colour,
colour = .data$border_colour, alpha = .data$alpha),
geom_positions$rect_data, linewidth = 0.25)
}
4. 后面几个代码分别是处理geom设定为circle, funkyrect, pie和img的代码:
if (nrow(geom_positions$circle_data %||% tibble()) > 0) {
g <- g + ggforce::geom_circle(aes(x0 = .data$x0, y0 = .data$y0,
fill = .data$colour, r = .data$r), geom_positions$circle_data,
linewidth = 0.25)
}
if (nrow(geom_positions$funkyrect_data %||% tibble()) >
0) {
g <- g + geom_rounded_rect(aes(xmin = .data$xmin, xmax = .data$xmax,
ymin = .data$ymin, ymax = .data$ymax, radius = .data$corner_size,
fill = .data$colour), geom_positions$funkyrect_data,
size = 0.25, colour = "black")
}
if (nrow(geom_positions$pie_data %||% tibble()) > 0) {
g <- g + ggforce::geom_arc_bar(aes(x0 = .data$x0, y0 = .data$y0,
r0 = .data$r0, r = .data$r, start = .data$rad_start,
end = .data$rad_end, fill = .data$colour), data = geom_positions$pie_data,
linewidth = 0.25)
}
if (nrow(geom_positions$img_data %||% tibble()) > 0) {
if (!requireNamespace("magick", quietly = TRUE)) {
cli_alert_warning("Package `magick` is required to draw images. Skipping columns with geom == \"image\".")
}
else {
for (r in seq_len(nrow(geom_positions$img_data))) {
image <- geom_positions$img_data[[r, "path"]]
if (!inherits(image, "magick-image")) {
if (is.character(image)) {
assert_that(file.exists(image), msg = paste0("Image '",
image, "' does not exist."))
}
image <- magick::image_read(image)
}
g <- g + cowplot::draw_image(image = image,
x = geom_positions$img_data[[r, "xmin"]],
y = geom_positions$img_data[[r, "ymin"]],
width = geom_positions$img_data[[r, "width"]],
height = geom_positions$img_data[[r, "height"]])
}
}
}
5. 下面是设定图上所有文本信息的代码:
if (nrow(geom_positions$text_data %||% tibble()) > 0) {
geom_positions$text_data <- geom_positions$text_data %>%
add_column_if_missing(hjust = 0.5, vjust = 0.5,
size = 4, fontface = "plain", colour = "black",
lineheight = 1, angle = 0) %>% mutate(angle2 = .data$angle/360 *
2 * pi, cosa = cos(.data$angle2) %>% round(2), sina = sin(.data$angle2) %>%
round(2), alphax = ifelse(.data$cosa < 0, 1 - .data$hjust,
.data$hjust) * abs(.data$cosa) + ifelse(.data$sina >
0, 1 - .data$vjust, .data$vjust) * abs(.data$sina),
alphay = ifelse(.data$sina < 0, 1 - .data$hjust,
.data$hjust) * abs(.data$sina) + ifelse(.data$cosa <
0, 1 - .data$vjust, .data$vjust) * abs(.data$cosa),
x = (1 - .data$alphax) * .data$xmin + .data$alphax *
.data$xmax, y = (1 - .data$alphay) * .data$ymin +
.data$alphay * .data$ymax) %>% filter(.data$label_value !=
"")
g <- g + geom_text(aes(x = .data$x, y = .data$y, label = .data$label_value,
colour = .data$colour, hjust = .data$hjust, vjust = .data$vjust,
size = .data$size, fontface = .data$fontface, angle = .data$angle),
data = geom_positions$text_data)
}
好了,接下来你就只需要去修改你图的对应部分的代码,就能实现自定义了。具体如何实现呢?我这里以我自己的需求为例,首先,我想先隐藏ticks。我们定位到geom_positions$segment_data这块:我们先看看这个segment_data长啥样:
我们发现这里面存放了包括坐标信息和颜色之类的属性信息。这里坐标信息我们就不要动他,我们就看后面的那些信息有哪些有区别的,在我的案例中我发现linetype这一列包含了dashed和solid两种类型。其中dashed line主要用来分隔各个column的,那这个solid呢?我这里就假设它指代的就是所谓的坐标轴的ticks,于是我对这个geom_positions$segment_data进行了过滤处理,只保留linetype为dashed的行。我们先把compos_plot所有的代码复制到上面新创建的Rscript中,然后对segment_data那部分代码进行修改:
# 我们还需要把compos_plot中用到的两个自定义函数一并复制进来,不需要对它进行修改,只是每次需要把它读到环境中去。
add_column_if_missing<-function (df, ...)
{
column_values <- list(...)
for (column_name in names(column_values)) {
default_val <- rep(column_values[[column_name]], nrow(df))
if (column_name %in% colnames(df)) {
df[[column_name]] <- ifelse(is.na(df[[column_name]]),
default_val, df[[column_name]])
}
else {
df[[column_name]] <- default_val
}
}
df
}
compute_bounds<-function (row_pos, column_pos, segment_data, rect_data, circle_data,
funkyrect_data, pie_data, text_data)
{
suppressWarnings({
minimum_x <- min(column_pos$xmin, segment_data$x, segment_data$xend,
rect_data$xmin, circle_data$x - circle_data$r, funkyrect_data$x -
funkyrect_data$r, pie_data$xmin, text_data$xmin,
na.rm = TRUE)
maximum_x <- max(column_pos$xmax, segment_data$x, segment_data$xend,
rect_data$xmax, circle_data$x + circle_data$r, funkyrect_data$x +
funkyrect_data$r, pie_data$xmax, text_data$xmax,
na.rm = TRUE)
minimum_y <- min(row_pos$ymin, segment_data$y, segment_data$yend,
rect_data$ymin, circle_data$y - circle_data$r, funkyrect_data$y -
funkyrect_data$r, pie_data$ymin, text_data$ymin,
na.rm = TRUE)
maximum_y <- max(row_pos$ymax, segment_data$y, segment_data$yend,
rect_data$ymax, circle_data$y + circle_data$r, funkyrect_data$y +
funkyrect_data$r, pie_data$ymax, text_data$ymax,
na.rm = TRUE)
})
list(minimum_x = minimum_x, maximum_x = maximum_x, minimum_y = minimum_y,
maximum_y = maximum_y)
}
compose_ggplot <- function (geom_positions, position_args)
{
# 代码...
if (nrow(geom_positions$segment_data %||% tibble()) > 0) {
# 这里修改了源代码,把源代码中ticks隐藏了,也就是lintype为solid的给过滤掉了。
# 过滤 linetype 为 'dashed' 的行
filtered_segment_data <- geom_positions$segment_data %>%
filter(linetype == "dashed")
# 检查过滤后的数据行数
if (nrow(filtered_segment_data) > 0) {
# 如果存在符合条件的数据,则继续添加列并绘图
filtered_segment_data <- filtered_segment_data %>%
add_column_if_missing(size = 0.5, colour = "black", linetype = "solid")
g <- g + geom_segment(aes(x = .data$x, xend = .data$xend,
y = .data$y, yend = .data$yend, linewidth = .data$size,
colour = .data$colour, linetype = .data$linetype),
filtered_segment_data)
}
}
# 代码...
}
修改好后我们先运行一下看看情况:
我们发现果然奏效了!每个column的label下面的ticks消失了。成功!
接着,我的需求是修改文字,我希望每个column的label放到图下面来,然后把角度也改一改。这个时候我们就要来修改text_data这部分的内容了。同样地,我们先观察一下text_data这个数据框:
我们可以看到这是一张非常宽的表,里面包含了非常多的信息。我们通过观察发现,geom列似乎可以作为突破口。因为里面包含所有文字信息的类型,通过unique()函数检查这个列发现只有两种值:text和NA。text值对应的就是我们产生的图中除了column label和分组文字以外的所有文本。而我们耀想对column label进行处理的话那我们只要对geom == NA的行进行修改。但NA对应的行除了label以外还有分组的文本,怎么进一步区分呢?再次观察发现,这两者之间在hjust这一列还有区别,分组文本的hjust不为0,而column label的hjust为0,那这就是我们进一步做过滤的条件了。简而言之就是,我们对text_data进行分割,分成三部分,第一部分就是geom == 'text'的那部分,在这里我们不做任何处理(当然你要改颜色,字体之类的这里就可以进行修改了);第二部分就是column label,我们把它向下平移;第三部分就是分组文本,我们也不做处理。以下是修改后的代码(我这里就只粘贴text_data处理的那段代码):
if (nrow(geom_positions$text_data %||% tibble()) > 0) {
# 1. 处理 geom 不为空的行
geom_text_non_na <- geom_positions$text_data %>%
filter(!is.na(.data$geom)) %>% # 筛选 geom 列不为空的行
add_column_if_missing(hjust = 0.5, vjust = 0.5,
size = 4, fontface = "plain", colour = "black",
lineheight = 1, angle = 0) %>%
mutate(
angle2 = .data$angle / 360 * 2 * pi,
cosa = cos(.data$angle2) %>% round(2),
sina = sin(.data$angle2) %>% round(2),
alphax = ifelse(.data$cosa < 0, 1 - .data$hjust, .data$hjust) * abs(.data$cosa) +
ifelse(.data$sina > 0, 1 - .data$vjust, .data$vjust) * abs(.data$sina),
alphay = ifelse(.data$sina < 0, 1 - .data$hjust, .data$hjust) * abs(.data$sina) +
ifelse(.data$cosa < 0, 1 - .data$vjust, .data$vjust) * abs(.data$cosa),
x = (1 - .data$alphax) * .data$xmin + .data$alphax * .data$xmax,
y = (1 - .data$alphay) * .data$ymin + .data$alphay * .data$ymax
) %>%
filter(.data$label_value != "")
# 2. 处理 geom 为空且 hjust == 0 的行
geom_text_na_hjust_0 <- geom_positions$text_data %>%
filter(is.na(.data$geom) & .data$hjust == 0) %>% # 筛选 geom 为空且 hjust == 0 的行
mutate(
hjust = 1,
vjust = 0.5,
size = 4,
fontface = "plain",
colour = "black",
angle = 90, # 修改角度
angle2 = angle / 360 * 2 * pi,
label_value = ifelse(is.na(.data$label_value), paste0("Default_", row_number()), .data$label_value),
x = (.data$xmin + .data$xmax) / 2, # 示例逻辑,可以根据需求调整
y = (.data$ymin + .data$ymax) / 2 - 8 # 向下移动
)
# 3. 处理 geom 为空且 hjust != 0 的行
geom_text_na_hjust_not_0 <- geom_positions$text_data %>%
filter(is.na(.data$geom) & .data$hjust != 0) %>% # 筛选 geom 为空且 hjust != 0 的行
mutate(
hjust = .data$hjust, # 保留原始 hjust
vjust = 0.3,
size = 4,
fontface = "plain",
colour = "black",
angle = 0,
angle2 = angle / 360 * 2 * pi,
label_value = ifelse(is.na(.data$label_value), paste0("Default_", row_number()), .data$label_value),
x = (.data$xmin + .data$xmax) / 2, # 示例逻辑,可以根据需求调整
y = (.data$ymin + .data$ymax) / 2 # 向上移动
)
# 合并三部分数据
geom_positions$text_data <- bind_rows(geom_text_non_na, geom_text_na_hjust_0, geom_text_na_hjust_not_0)
# 绘图
g <- g + geom_text(aes(x = .data$x, y = .data$y, label = .data$label_value,
colour = .data$colour, hjust = .data$hjust, vjust = .data$vjust,
size = .data$size, fontface = .data$fontface, angle = .data$angle),
data = geom_positions$text_data)
}
替换原来那段代码后再次运行一下我们放函数的Rscript看下结果:
很好,我们第二个需求也实现了。这里要注意里面几个参数:
# 第二步,也就是处理 geom 为空且 hjust == 0 的行,这里是处理我们column label的,这里几个东西因人而异,需要自己不断调试,找到适合自己的参数:
hjust = 1
y = (.data$ymin + .data$ymax) / 2 - 8 # 向下移动
这两个参数是控制垂直位置的,每个人的需求可能不一样,所以这里你可以自行设置。只需要修改1和8这两个值就行
到这里,我们的需求就全部完成了。
其实我本来还想写个自定义图例的教程,想想算了,一个是它本身的图例其实也还不错,完全可以用。另一个原因是我写累了(捂脸笑)。。。
三、总结
以上便是进阶版的全部内容。总结起来就是一个逻辑,根据作图需要的那几个数据框的规律进行判断,然后对其中的行进行分组,以达到对不同元素进行个性化修改的目的。如果各位对此方法还有疑问,也欢迎留言提问,我会一一给大家解答!