| #' Collapse repeated rows to multirow cell |
| #' |
| #' @description Collapse same values in columns into multirow cells. This |
| #' feature does similar things with `group_rows`. However, unlike `group_rows`, |
| #' it analyzes existing columns, finds out rows that can be grouped together, |
| #' and make them multirow cells. Note that if you want to use `column_spec` to |
| #' specify column styles, you should use `column_spec` before `collapse_rows`. |
| #' |
| #' @param kable_input Output of `knitr::kable()` with `format` specified |
| #' @param columns A numeric value or vector indicating in which column(s) rows |
| #' need to be collapsed. |
| #' @param valign Select from "top", "middle"(default), "bottom". The reason why |
| #' "top" is not default is that the multirow package on CRAN win-builder is |
| #' not up to date. |
| #' @param latex_hline Option controlling the behavior of adding hlines to table. |
| #' Choose from `major`, `full`, `none`, `custom` and `linespace`. We changed the default from |
| #' `full` to `major` in version 1.2. |
| #' @param custom_latex_hline Numeric column positions whose collapsed rows will |
| #' be separated by hlines. |
| #' @param row_group_label_position Option controlling positions of row group |
| #' labels. Choose from `identity`, `stack`. |
| #' @param row_group_label_fonts A list of arguments that can be supplied to |
| #' group_rows function to format the row group label when |
| #' `row_group_label_position` is `stack` |
| #' @param headers_to_remove Numeric column positions where headers should be |
| #' removed when they are stacked. |
| #' @param target If multiple columns are selected to do collapsing and a target |
| #' column is specified, this target column will be used to collapse other |
| #' columns based on the groups of this target column. |
| #' @param col_names T/F. A LaTeX specific option. If you set `col.names` be |
| #' `NULL` in your `kable` call, you need to set this option false to let |
| #' everything work properly. |
| #' @param longtable_clean_cut T/F with default T. Multirow cell sometimes are |
| #' displayed incorrectly around pagebreak. This option forces groups to cut |
| #' before the end of a page. If you have a group that is longer than 1 page, |
| #' you need to turn off this option. |
| #' |
| #' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b")) |
| #' x <- knitr::kable(dt, "html") |
| #' collapse_rows(x) |
| #' |
| #' @export |
| collapse_rows <- function(kable_input, columns = NULL, |
| valign = c("middle", "top", "bottom"), |
| latex_hline = c("major", "full", "none", "custom"), |
| row_group_label_position = c('identity', 'stack'), |
| custom_latex_hline = NULL, |
| row_group_label_fonts = NULL, |
| headers_to_remove = NULL, |
| target = NULL, |
| col_names = TRUE, |
| longtable_clean_cut = TRUE) { |
| kable_format <- attr(kable_input, "format") |
| if (!kable_format %in% c("html", "latex")) { |
| warning("Please specify format in kable. kableExtra can customize either ", |
| "HTML or LaTeX outputs. See https://haozhu233.github.io/kableExtra/ ", |
| "for details.") |
| return(kable_input) |
| } |
| valign <- match.arg(valign) |
| if (!is.null(target)) { |
| if (length(target) > 1 && is.integer(target)) { |
| stop("target can only be a length 1 integer") |
| } |
| } |
| if (kable_format == "html") { |
| return(collapse_rows_html(kable_input, columns, valign, target)) |
| } |
| if (kable_format == "latex") { |
| latex_hline <- match.arg(latex_hline) |
| row_group_label_position <- match.arg(row_group_label_position, |
| c('identity', 'stack')) |
| return(collapse_rows_latex(kable_input, columns, latex_hline, valign, |
| row_group_label_position, row_group_label_fonts, custom_latex_hline, |
| headers_to_remove, target, col_names, longtable_clean_cut)) |
| } |
| } |
| |
| collapse_rows_html <- function(kable_input, columns, valign, target) { |
| kable_attrs <- attributes(kable_input) |
| kable_xml <- kable_as_xml(kable_input) |
| kable_tbody <- xml_tpart(kable_xml, "tbody") |
| |
| kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]] |
| if (is.null(columns)) { |
| columns <- seq(1, ncol(kable_dt)) |
| } |
| if (!is.null(target)) { |
| if (!target %in% columns) { |
| stop("target has to be within the range of columns") |
| } |
| } |
| if (!is.null(kable_attrs$header_above)) { |
| kable_dt_col_names <- unlist(kable_dt[kable_attrs$header_above, ]) |
| kable_dt <- kable_dt[-(1:kable_attrs$header_above),] |
| names(kable_dt) <- kable_dt_col_names |
| } |
| collapse_matrix <- collapse_row_matrix(kable_dt, columns, target = target) |
| |
| for (i in 1:nrow(collapse_matrix)) { |
| matrix_row <- collapse_matrix[i, ] |
| names(matrix_row) <- names(collapse_matrix) |
| target_row <- xml_child(kable_tbody, i) |
| row_node_rm_count <- 0 |
| for (j in 1:length(matrix_row)) { |
| collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) - |
| row_node_rm_count |
| target_cell <- xml_child(target_row, collapsing_col) |
| if (matrix_row[j] == 0) { |
| xml_remove(target_cell) |
| row_node_rm_count <- row_node_rm_count + 1 |
| } else if (matrix_row[j] != 1) { |
| xml_attr(target_cell, "rowspan") <- matrix_row[j] |
| xml_attr(target_cell, "style") <- paste0( |
| xml_attr(target_cell, "style"), |
| "vertical-align: ", valign, " !important;") |
| } |
| } |
| } |
| |
| out <- as_kable_xml(kable_xml) |
| kable_attrs$collapse_matrix <- collapse_matrix |
| attributes(out) <- kable_attrs |
| if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out)) |
| return(out) |
| } |
| |
| split_factor <- function(x) { |
| group_idx <- seq(1, length(x)) |
| return(factor(unlist(lapply(group_idx, function(i) {rep(i, x[i])})))) |
| } |
| |
| collapse_row_matrix <- function(kable_dt, columns, html = T, target = NULL) { |
| if (html) { |
| column_block <- function(x) c(x, rep(0, x - 1)) |
| } else { |
| column_block <- function(x) c(rep(0, x - 1), x) |
| } |
| mapping_matrix <- list() |
| if (is.null(target)) { |
| for (i in columns) { |
| mapping_matrix[[paste0("x", i)]] <- unlist(lapply( |
| rle(kable_dt[, i])$lengths, column_block)) |
| } |
| } else { |
| target_group = split_factor(rle(kable_dt[, target])$lengths) |
| for (i in columns) { |
| column_split = split(kable_dt[, i], target_group) |
| mapping_matrix[[paste0("x", i)]] <- unlist(lapply( |
| column_split, function(sp) { |
| lapply(rle(sp)$length, column_block) |
| })) |
| } |
| } |
| |
| mapping_matrix <- data.frame(mapping_matrix) |
| return(mapping_matrix) |
| } |
| |
| collapse_rows_latex <- function(kable_input, columns, latex_hline, valign, |
| row_group_label_position, row_group_label_fonts, |
| custom_latex_hline, headers_to_remove, target, |
| col_names, longtable_clean_cut) { |
| table_info <- magic_mirror(kable_input) |
| out <- solve_enc(kable_input) |
| out <- gsub("\\\\addlinespace\n", "", out) |
| |
| valign <- switch( |
| valign, |
| top = "\\[t\\]", |
| middle = "", |
| bottom = "\\[b\\]" |
| ) |
| |
| if (is.null(columns)) { |
| columns <- seq(1, table_info$ncol) |
| } |
| |
| contents <- table_info$contents |
| kable_dt <- kable_dt_latex(contents, col_names) |
| |
| collapse_matrix_rev <- collapse_row_matrix(kable_dt, columns, html = TRUE, |
| target) |
| collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = FALSE, |
| target) |
| |
| new_kable_dt <- kable_dt |
| for (j in seq_along(columns)) { |
| column_align <- table_info$align_vector_origin[columns[j]] |
| column_width <- ifelse( |
| is.null(table_info$column_width[[paste0("column_", columns[j])]]), |
| "*", table_info$column_width[paste0("column_", columns[j])]) |
| for (i in seq(1:nrow(collapse_matrix))) { |
| if(row_group_label_position == 'stack'){ |
| if(columns[j] < ncol(collapse_matrix) || collapse_matrix_rev[i, j] == 0){ |
| new_kable_dt[i, columns[j]] <- '' |
| } |
| } else { |
| new_kable_dt[i, columns[j]] <- collapse_new_dt_item( |
| kable_dt[i, columns[j]], collapse_matrix[i, j], column_width, |
| align = column_align, valign = valign |
| ) |
| } |
| } |
| } |
| |
| midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol), |
| html = FALSE, target) |
| midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1 |
| |
| ex_bottom <- length(contents) - 1 |
| contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\") |
| if (!table_info$booktabs) { |
| contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline") |
| } |
| |
| new_contents <- c() |
| if(row_group_label_position == 'stack'){ |
| if(is.null(headers_to_remove)) headers_to_remove <- head(columns, -1) |
| table_info$colnames[headers_to_remove] <- '' |
| new_header <- paste(table_info$colnames, collapse = ' & ') |
| out <- sub(contents[1], new_header, out) |
| table_info$contents[1] <- new_header |
| } |
| if(latex_hline == 'custom' & is.null(custom_latex_hline)){ |
| if(row_group_label_position == 'stack'){ |
| custom_latex_hline = 1:2 |
| } else { |
| custom_latex_hline = 1 |
| } |
| } |
| for (i in seq(1:nrow(collapse_matrix))) { |
| new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ") |
| table_info$contents[i + 1] <- new_contents[i] |
| if (i != nrow(collapse_matrix)) { |
| row_midrule <- switch( |
| latex_hline, |
| "none" = "", |
| "full" = ifelse( |
| sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix), |
| midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), |
| table_info$booktabs), |
| midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), |
| FALSE) |
| ), |
| "major" = ifelse( |
| sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix), |
| midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), |
| table_info$booktabs), |
| "" |
| ), |
| "custom" = ifelse( |
| sum(as.numeric(midrule_matrix[i, custom_latex_hline])) > 0, |
| midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), |
| table_info$booktabs), |
| "" |
| ), |
| "linespace"= ifelse( |
| sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix), |
| "\\\\addlinespace\n", |
| "" |
| ) |
| ) |
| new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule) |
| } |
| out <- sub(contents[i + 1], new_contents[i], out, perl=TRUE) |
| } |
| |
| if (table_info$tabular == "longtable" & longtable_clean_cut) { |
| if (max(collapse_matrix) > 50) { |
| warning("It seems that you have a group larger than 50 rows and span ", |
| "over a page. You probably want to set longtable_clean_cut to ", |
| "be FALSE.") |
| } |
| if (latex_hline == "full") { |
| warning("kableExtra 1.2 adds a clean_cut feature to provide better page", |
| " breaking in collapse_rows. It only works when latex_hline = ", |
| "'major'. It looks like you have longtable_clean_cut = T while ", |
| "latex_hline = 'full'. Please change either one of them.") |
| } |
| out <- gsub("\\\\\\\\($|\n)", "\\\\\\\\\\\\nopagebreak\\1", out) |
| out <- gsub("(\\\\cmidrule[{][^}]*[}])", "\\1\\\\pagebreak[0]", out) |
| } |
| out <- structure(out, format = "latex", class = "knitr_kable") |
| table_info$collapse_rows <- TRUE |
| table_info$collapse_matrix <- collapse_matrix |
| attr(out, "kable_meta") <- table_info |
| if(row_group_label_position == 'stack'){ |
| group_row_index_list <- collapse_rows_index(kable_dt, head(columns, -1)) |
| out <- collapse_rows_latex_stack(out, group_row_index_list, row_group_label_fonts) |
| } |
| return(out) |
| } |
| |
| kable_dt_latex <- function(x, col_names) { |
| if (col_names) { |
| x <- x[-1] |
| } |
| data.frame(do.call(rbind, str_split(x, " & ")), stringsAsFactors = FALSE) |
| } |
| |
| collapse_new_dt_item <- function(x, span, width = NULL, align, valign) { |
| if (span == 0) return("") |
| if (span == 1) return(x) |
| out <- paste0( |
| "\\\\multirow", valign, "\\{", -span, "\\}\\{", |
| ifelse(is.null(width), "\\*", width), |
| "\\}\\{", |
| switch(align, |
| "l" = "\\\\raggedright\\\\arraybackslash ", |
| "c" = "\\\\centering\\\\arraybackslash ", |
| "r" = "\\\\raggedleft\\\\arraybackslash "), |
| x, "\\}" |
| ) |
| return(out) |
| } |
| |
| midline_groups <- function(x, booktabs = T) { |
| diffs <- c(1, diff(x)) |
| start_indexes <- c(1, which(diffs > 1)) |
| end_indexes <- c(start_indexes - 1, length(x)) |
| ranges <- paste0(x[start_indexes], "-", x[end_indexes]) |
| if (booktabs) { |
| out <- paste0("\\\\cmidrule{", ranges, "}") |
| } else { |
| out <- paste0("\\\\cline{", ranges, "}") |
| } |
| out <- paste0(out, collapse = "\n") |
| return(out) |
| } |
| |
| linespace_groups <- function(x) { |
| diffs <- c(1, diff(x)) |
| start_indexes <- c(1, which(diffs > 1)) |
| end_indexes <- c(start_indexes - 1, length(x)) |
| ranges <- paste0(x[start_indexes], "-", x[end_indexes]) |
| out <- paste0("\\\\addlinespace") |
| out <- paste0(out, collapse = "\n") |
| return(out) |
| } |
| |
| |
| collapse_rows_index <- function(kable_dt, columns) { |
| format_to_row_index <- function(x){ |
| x = rle(x) |
| out = x$lengths |
| names(out) = x$values |
| out |
| } |
| group_rows_index_list <- lapply(columns, function(x) { |
| format_to_row_index(kable_dt[, x]) |
| }) |
| return(group_rows_index_list) |
| } |
| |
| |
| collapse_rows_latex_stack <- function(kable_input, group_row_index_list, |
| row_group_label_fonts){ |
| merge_lists <- function(default_list, updated_list){ |
| for(x in names(updated_list)){ |
| default_list[[x]] <- updated_list[[x]] |
| } |
| return(default_list) |
| } |
| default_font_list <- list( |
| list(bold = T, italic = F), |
| list(bold = F, italic = T), |
| list(bold = F, italic = F) |
| ) |
| n_default_fonts = length(default_font_list) |
| n_supplied_fonts = length(row_group_label_fonts) |
| group_row_font_list <- list() |
| out <- kable_input |
| for(i in 1:length(group_row_index_list)){ |
| if(i > n_default_fonts){ |
| group_row_args <- default_font_list[[n_default_fonts]] |
| } else { |
| group_row_args <- default_font_list[[i]] |
| } |
| if(i <= n_supplied_fonts){ |
| group_row_args <- merge_lists(group_row_args, row_group_label_fonts[[i]]) |
| } |
| group_row_args <- merge_lists( |
| list(kable_input = out, index = group_row_index_list[[i]]), |
| group_row_args) |
| out <- do.call(group_rows, group_row_args) |
| } |
| return(out) |
| } |