blob: 66349c8333be5f0f72d4a81592a1a12d1f7b8220 [file] [log] [blame]
#' Collapse repeated rows to multirow cell
#'
#' @description Collapse same values in columns into multirow cells. This
#' feature does similar things with `group_rows`. However, unlike `group_rows`,
#' it analyzes existing columns, finds out rows that can be grouped together,
#' and make them multirow cells. Note that if you want to use `column_spec` to
#' specify column styles, you should use `column_spec` before `collapse_rows`.
#'
#' @param kable_input Output of `knitr::kable()` with `format` specified
#' @param columns A numeric value or vector indicating in which column(s) rows
#' need to be collapsed.
#' @param valign Select from "top", "middle"(default), "bottom". The reason why
#' "top" is not default is that the multirow package on CRAN win-builder is
#' not up to date.
#' @param latex_hline Option controlling the behavior of adding hlines to table.
#' Choose from `major`, `full`, `none`, `custom` and `linespace`. We changed the default from
#' `full` to `major` in version 1.2.
#' @param custom_latex_hline Numeric column positions whose collapsed rows will
#' be separated by hlines.
#' @param row_group_label_position Option controlling positions of row group
#' labels. Choose from `identity`, `stack`.
#' @param row_group_label_fonts A list of arguments that can be supplied to
#' group_rows function to format the row group label when
#' `row_group_label_position` is `stack`
#' @param headers_to_remove Numeric column positions where headers should be
#' removed when they are stacked.
#' @param target If multiple columns are selected to do collapsing and a target
#' column is specified, this target column will be used to collapse other
#' columns based on the groups of this target column.
#' @param col_names T/F. A LaTeX specific option. If you set `col.names` be
#' `NULL` in your `kable` call, you need to set this option false to let
#' everything work properly.
#' @param longtable_clean_cut T/F with default T. Multirow cell sometimes are
#' displayed incorrectly around pagebreak. This option forces groups to cut
#' before the end of a page. If you have a group that is longer than 1 page,
#' you need to turn off this option.
#'
#' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b"))
#' x <- knitr::kable(dt, "html")
#' collapse_rows(x)
#'
#' @export
collapse_rows <- function(kable_input, columns = NULL,
valign = c("middle", "top", "bottom"),
latex_hline = c("major", "full", "none", "custom"),
row_group_label_position = c('identity', 'stack'),
custom_latex_hline = NULL,
row_group_label_fonts = NULL,
headers_to_remove = NULL,
target = NULL,
col_names = TRUE,
longtable_clean_cut = TRUE) {
kable_format <- attr(kable_input, "format")
if (!kable_format %in% c("html", "latex")) {
warning("Please specify format in kable. kableExtra can customize either ",
"HTML or LaTeX outputs. See https://haozhu233.github.io/kableExtra/ ",
"for details.")
return(kable_input)
}
valign <- match.arg(valign)
if (!is.null(target)) {
if (length(target) > 1 && is.integer(target)) {
stop("target can only be a length 1 integer")
}
}
if (kable_format == "html") {
return(collapse_rows_html(kable_input, columns, valign, target))
}
if (kable_format == "latex") {
latex_hline <- match.arg(latex_hline)
row_group_label_position <- match.arg(row_group_label_position,
c('identity', 'stack'))
return(collapse_rows_latex(kable_input, columns, latex_hline, valign,
row_group_label_position, row_group_label_fonts, custom_latex_hline,
headers_to_remove, target, col_names, longtable_clean_cut))
}
}
collapse_rows_html <- function(kable_input, columns, valign, target) {
kable_attrs <- attributes(kable_input)
kable_xml <- kable_as_xml(kable_input)
kable_tbody <- xml_tpart(kable_xml, "tbody")
kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]]
if (is.null(columns)) {
columns <- seq(1, ncol(kable_dt))
}
if (!is.null(target)) {
if (!target %in% columns) {
stop("target has to be within the range of columns")
}
}
if (!is.null(kable_attrs$header_above)) {
kable_dt_col_names <- unlist(kable_dt[kable_attrs$header_above, ])
kable_dt <- kable_dt[-(1:kable_attrs$header_above),]
names(kable_dt) <- kable_dt_col_names
}
collapse_matrix <- collapse_row_matrix(kable_dt, columns, target = target)
for (i in 1:nrow(collapse_matrix)) {
matrix_row <- collapse_matrix[i, ]
names(matrix_row) <- names(collapse_matrix)
target_row <- xml_child(kable_tbody, i)
row_node_rm_count <- 0
for (j in 1:length(matrix_row)) {
collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) -
row_node_rm_count
target_cell <- xml_child(target_row, collapsing_col)
if (matrix_row[j] == 0) {
xml_remove(target_cell)
row_node_rm_count <- row_node_rm_count + 1
} else if (matrix_row[j] != 1) {
xml_attr(target_cell, "rowspan") <- matrix_row[j]
xml_attr(target_cell, "style") <- paste0(
xml_attr(target_cell, "style"),
"vertical-align: ", valign, " !important;")
}
}
}
out <- as_kable_xml(kable_xml)
kable_attrs$collapse_matrix <- collapse_matrix
attributes(out) <- kable_attrs
if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out))
return(out)
}
split_factor <- function(x) {
group_idx <- seq(1, length(x))
return(factor(unlist(lapply(group_idx, function(i) {rep(i, x[i])}))))
}
collapse_row_matrix <- function(kable_dt, columns, html = T, target = NULL) {
if (html) {
column_block <- function(x) c(x, rep(0, x - 1))
} else {
column_block <- function(x) c(rep(0, x - 1), x)
}
mapping_matrix <- list()
if (is.null(target)) {
for (i in columns) {
mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
rle(kable_dt[, i])$lengths, column_block))
}
} else {
target_group = split_factor(rle(kable_dt[, target])$lengths)
for (i in columns) {
column_split = split(kable_dt[, i], target_group)
mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
column_split, function(sp) {
lapply(rle(sp)$length, column_block)
}))
}
}
mapping_matrix <- data.frame(mapping_matrix)
return(mapping_matrix)
}
collapse_rows_latex <- function(kable_input, columns, latex_hline, valign,
row_group_label_position, row_group_label_fonts,
custom_latex_hline, headers_to_remove, target,
col_names, longtable_clean_cut) {
table_info <- magic_mirror(kable_input)
out <- solve_enc(kable_input)
out <- gsub("\\\\addlinespace\n", "", out)
valign <- switch(
valign,
top = "\\[t\\]",
middle = "",
bottom = "\\[b\\]"
)
if (is.null(columns)) {
columns <- seq(1, table_info$ncol)
}
contents <- table_info$contents
kable_dt <- kable_dt_latex(contents, col_names)
collapse_matrix_rev <- collapse_row_matrix(kable_dt, columns, html = TRUE,
target)
collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = FALSE,
target)
new_kable_dt <- kable_dt
for (j in seq_along(columns)) {
column_align <- table_info$align_vector_origin[columns[j]]
column_width <- ifelse(
is.null(table_info$column_width[[paste0("column_", columns[j])]]),
"*", table_info$column_width[paste0("column_", columns[j])])
for (i in seq(1:nrow(collapse_matrix))) {
if(row_group_label_position == 'stack'){
if(columns[j] < ncol(collapse_matrix) || collapse_matrix_rev[i, j] == 0){
new_kable_dt[i, columns[j]] <- ''
}
} else {
new_kable_dt[i, columns[j]] <- collapse_new_dt_item(
kable_dt[i, columns[j]], collapse_matrix[i, j], column_width,
align = column_align, valign = valign
)
}
}
}
midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol),
html = FALSE, target)
midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1
ex_bottom <- length(contents) - 1
contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\")
if (!table_info$booktabs) {
contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline")
}
new_contents <- c()
if(row_group_label_position == 'stack'){
if(is.null(headers_to_remove)) headers_to_remove <- head(columns, -1)
table_info$colnames[headers_to_remove] <- ''
new_header <- paste(table_info$colnames, collapse = ' & ')
out <- sub(contents[1], new_header, out)
table_info$contents[1] <- new_header
}
if(latex_hline == 'custom' & is.null(custom_latex_hline)){
if(row_group_label_position == 'stack'){
custom_latex_hline = 1:2
} else {
custom_latex_hline = 1
}
}
for (i in seq(1:nrow(collapse_matrix))) {
new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ")
table_info$contents[i + 1] <- new_contents[i]
if (i != nrow(collapse_matrix)) {
row_midrule <- switch(
latex_hline,
"none" = "",
"full" = ifelse(
sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
table_info$booktabs),
midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
FALSE)
),
"major" = ifelse(
sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
table_info$booktabs),
""
),
"custom" = ifelse(
sum(as.numeric(midrule_matrix[i, custom_latex_hline])) > 0,
midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
table_info$booktabs),
""
),
"linespace"= ifelse(
sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
"\\\\addlinespace\n",
""
)
)
new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule)
}
out <- sub(contents[i + 1], new_contents[i], out, perl=TRUE)
}
if (table_info$tabular == "longtable" & longtable_clean_cut) {
if (max(collapse_matrix) > 50) {
warning("It seems that you have a group larger than 50 rows and span ",
"over a page. You probably want to set longtable_clean_cut to ",
"be FALSE.")
}
if (latex_hline == "full") {
warning("kableExtra 1.2 adds a clean_cut feature to provide better page",
" breaking in collapse_rows. It only works when latex_hline = ",
"'major'. It looks like you have longtable_clean_cut = T while ",
"latex_hline = 'full'. Please change either one of them.")
}
out <- gsub("\\\\\\\\($|\n)", "\\\\\\\\\\\\nopagebreak\\1", out)
out <- gsub("(\\\\cmidrule[{][^}]*[}])", "\\1\\\\pagebreak[0]", out)
}
out <- structure(out, format = "latex", class = "knitr_kable")
table_info$collapse_rows <- TRUE
table_info$collapse_matrix <- collapse_matrix
attr(out, "kable_meta") <- table_info
if(row_group_label_position == 'stack'){
group_row_index_list <- collapse_rows_index(kable_dt, head(columns, -1))
out <- collapse_rows_latex_stack(out, group_row_index_list, row_group_label_fonts)
}
return(out)
}
kable_dt_latex <- function(x, col_names) {
if (col_names) {
x <- x[-1]
}
data.frame(do.call(rbind, str_split(x, " & ")), stringsAsFactors = FALSE)
}
collapse_new_dt_item <- function(x, span, width = NULL, align, valign) {
if (span == 0) return("")
if (span == 1) return(x)
out <- paste0(
"\\\\multirow", valign, "\\{", -span, "\\}\\{",
ifelse(is.null(width), "\\*", width),
"\\}\\{",
switch(align,
"l" = "\\\\raggedright\\\\arraybackslash ",
"c" = "\\\\centering\\\\arraybackslash ",
"r" = "\\\\raggedleft\\\\arraybackslash "),
x, "\\}"
)
return(out)
}
midline_groups <- function(x, booktabs = T) {
diffs <- c(1, diff(x))
start_indexes <- c(1, which(diffs > 1))
end_indexes <- c(start_indexes - 1, length(x))
ranges <- paste0(x[start_indexes], "-", x[end_indexes])
if (booktabs) {
out <- paste0("\\\\cmidrule{", ranges, "}")
} else {
out <- paste0("\\\\cline{", ranges, "}")
}
out <- paste0(out, collapse = "\n")
return(out)
}
linespace_groups <- function(x) {
diffs <- c(1, diff(x))
start_indexes <- c(1, which(diffs > 1))
end_indexes <- c(start_indexes - 1, length(x))
ranges <- paste0(x[start_indexes], "-", x[end_indexes])
out <- paste0("\\\\addlinespace")
out <- paste0(out, collapse = "\n")
return(out)
}
collapse_rows_index <- function(kable_dt, columns) {
format_to_row_index <- function(x){
x = rle(x)
out = x$lengths
names(out) = x$values
out
}
group_rows_index_list <- lapply(columns, function(x) {
format_to_row_index(kable_dt[, x])
})
return(group_rows_index_list)
}
collapse_rows_latex_stack <- function(kable_input, group_row_index_list,
row_group_label_fonts){
merge_lists <- function(default_list, updated_list){
for(x in names(updated_list)){
default_list[[x]] <- updated_list[[x]]
}
return(default_list)
}
default_font_list <- list(
list(bold = T, italic = F),
list(bold = F, italic = T),
list(bold = F, italic = F)
)
n_default_fonts = length(default_font_list)
n_supplied_fonts = length(row_group_label_fonts)
group_row_font_list <- list()
out <- kable_input
for(i in 1:length(group_row_index_list)){
if(i > n_default_fonts){
group_row_args <- default_font_list[[n_default_fonts]]
} else {
group_row_args <- default_font_list[[i]]
}
if(i <= n_supplied_fonts){
group_row_args <- merge_lists(group_row_args, row_group_label_fonts[[i]])
}
group_row_args <- merge_lists(
list(kable_input = out, index = group_row_index_list[[i]]),
group_row_args)
out <- do.call(group_rows, group_row_args)
}
return(out)
}