Hao Zhu | 3166f06 | 2017-06-26 07:51:46 -1000 | [diff] [blame] | 1 | #' Collapse repeated rows to multirow cell |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 2 | #' |
Hao Zhu | 8a160b1 | 2017-06-26 13:41:35 -1000 | [diff] [blame] | 3 | #' @description Collapse same values in columns into multirow cells. This |
| 4 | #' feature does similar things with `group_rows`. However, unlike `group_rows`, |
| 5 | #' it analyzes existing columns, finds out rows that can be grouped together, |
| 6 | #' and make them multirow cells. Note that if you want to use `column_spec` to |
| 7 | #' specify column styles, you should use `column_spec` before `collapse_rows`. |
| 8 | #' |
| 9 | #' @param kable_input Output of `knitr::kable()` with `format` specified |
| 10 | #' @param columns Numeric column positions where rows need to be collapsed. |
| 11 | #' |
Hao Zhu | 5a7689e | 2017-06-26 15:37:24 -1000 | [diff] [blame] | 12 | #' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b")) |
| 13 | #' x <- knitr::kable(dt, "html") |
| 14 | #' collapse_rows(x) |
| 15 | #' |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 16 | #' @export |
| 17 | collapse_rows <- function(kable_input, columns = NULL) { |
| 18 | # if (is.null(columns)) { |
| 19 | # stop("Please specify numeric positions of columns you want to collapse.") |
| 20 | # } |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 21 | kable_format <- attr(kable_input, "format") |
| 22 | if (!kable_format %in% c("html", "latex")) { |
| 23 | message("Currently generic markdown table using pandoc is not supported.") |
| 24 | return(kable_input) |
| 25 | } |
| 26 | if (kable_format == "html") { |
| 27 | return(collapse_rows_html(kable_input, columns)) |
| 28 | } |
| 29 | if (kable_format == "latex") { |
| 30 | return(collapse_rows_latex(kable_input, columns)) |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | collapse_rows_html <- function(kable_input, columns) { |
| 35 | kable_attrs <- attributes(kable_input) |
| 36 | kable_xml <- read_xml(as.character(kable_input), options = "COMPACT") |
| 37 | kable_tbody <- xml_tpart(kable_xml, "tbody") |
| 38 | |
| 39 | kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]] |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 40 | if (is.null(columns)) { |
| 41 | columns <- seq(1, ncol(kable_dt)) |
| 42 | } |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 43 | kable_dt$row_id <- rownames(kable_dt) |
| 44 | collapse_matrix <- collapse_row_matrix(kable_dt, columns) |
| 45 | |
| 46 | for (i in 1:nrow(collapse_matrix)) { |
| 47 | matrix_row <- collapse_matrix[i, ] |
Hao Zhu | 38cdcdb | 2017-06-27 09:08:30 -1000 | [diff] [blame] | 48 | names(matrix_row) <- names(collapse_matrix) |
Hao Zhu | 3166f06 | 2017-06-26 07:51:46 -1000 | [diff] [blame] | 49 | target_row <- xml_child(kable_tbody, i) |
| 50 | row_node_rm_count <- 0 |
| 51 | for (j in 1:length(matrix_row)) { |
| 52 | collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) - |
| 53 | row_node_rm_count |
| 54 | target_cell <- xml_child(target_row, collapsing_col) |
| 55 | if (matrix_row[j] == 0) { |
| 56 | xml_remove(target_cell) |
| 57 | row_node_rm_count <- row_node_rm_count + 1 |
| 58 | } else if (matrix_row[j] != 1) { |
| 59 | xml_attr(target_cell, "rowspan") <- matrix_row[j] |
| 60 | xml_attr(target_cell, "style") <- paste0( |
| 61 | xml_attr(target_cell, "style"), |
| 62 | "vertical-align: middle !important;") |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 63 | } |
| 64 | } |
| 65 | } |
| 66 | |
Hao Zhu | f2dfd14 | 2017-07-24 14:43:28 -0400 | [diff] [blame^] | 67 | out <- as_kable_xml(kable_xml) |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 68 | attributes(out) <- kable_attrs |
| 69 | return(out) |
| 70 | } |
| 71 | |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 72 | collapse_row_matrix <- function(kable_dt, columns, html = T) { |
| 73 | if (html) { |
| 74 | column_block <- function(x) c(x, rep(0, x - 1)) |
| 75 | } else { |
| 76 | column_block <- function(x) c(rep(0, x - 1), x) |
| 77 | } |
| 78 | mapping_matrix <- list() |
| 79 | for (i in columns) { |
| 80 | mapping_matrix[[paste0("x", i)]] <- unlist(lapply( |
| 81 | rle(kable_dt[, i])$length, column_block)) |
| 82 | } |
| 83 | mapping_matrix <- data.frame(mapping_matrix) |
| 84 | return(mapping_matrix) |
| 85 | } |
| 86 | |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 87 | collapse_rows_latex <- function(kable_input, columns) { |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 88 | table_info <- magic_mirror(kable_input) |
| 89 | if (is.null(columns)) { |
| 90 | columns <- seq(1, table_info$ncol) |
| 91 | } |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 92 | out <- as.character(kable_input) |
| 93 | contents <- table_info$contents |
| 94 | kable_dt <- kable_dt_latex(contents) |
| 95 | collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = F) |
| 96 | |
| 97 | new_kable_dt <- kable_dt |
| 98 | new_contents <- c() |
| 99 | for (j in seq(1:ncol(collapse_matrix))) { |
| 100 | column_align <- table_info$align_vector_origin[columns[j]] |
| 101 | column_width <- ifelse( |
| 102 | is.null(table_info$column_width[[paste0("column_", columns[j])]]), |
| 103 | "*", table_info$column_width[paste0("column_", columns[j])]) |
| 104 | for (i in seq(1:nrow(collapse_matrix))) { |
| 105 | new_kable_dt[i, j] <- collapse_new_dt_item( |
| 106 | kable_dt[i, j], collapse_matrix[i, j], column_width, align = column_align |
| 107 | ) |
| 108 | } |
| 109 | } |
Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 110 | |
| 111 | midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol), |
| 112 | html = F) |
| 113 | midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1 |
| 114 | |
| 115 | ex_bottom <- length(contents) - 1 |
| 116 | contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\") |
| 117 | if (!table_info$booktabs) { |
| 118 | contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline") |
| 119 | } |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 120 | for (i in seq(1:nrow(collapse_matrix))) { |
| 121 | new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ") |
Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 122 | if (i != nrow(collapse_matrix)) { |
| 123 | row_midrule <- midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), |
| 124 | table_info$booktabs) |
| 125 | new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule) |
| 126 | } |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 127 | out <- sub(contents[i + 1], new_contents[i], out) |
| 128 | } |
Hao Zhu | 8f20299 | 2017-07-15 02:20:18 -0400 | [diff] [blame] | 129 | out <- gsub("\\\\addlinespace\n", "", out) |
Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 130 | |
| 131 | out <- structure(out, format = "latex", class = "knitr_kable") |
| 132 | table_info$collapse_rows <- TRUE |
| 133 | attr(out, "kable_meta") <- table_info |
| 134 | return(out) |
| 135 | } |
| 136 | |
| 137 | kable_dt_latex <- function(x) { |
| 138 | data.frame(do.call(rbind, str_split(x[-1], " & ")), stringsAsFactors = FALSE) |
| 139 | } |
| 140 | |
| 141 | collapse_new_dt_item <- function(x, span, width = NULL, align) { |
| 142 | if (span == 0) return("") |
| 143 | if (span == 1) return(x) |
| 144 | out <- paste0( |
| 145 | "\\\\multirow\\{", -span, "\\}\\{", |
| 146 | ifelse(is.null(width), "\\*", width), |
| 147 | "\\}\\{", |
| 148 | switch(align, |
| 149 | "l" = "\\\\raggedright\\\\arraybackslash ", |
| 150 | "c" = "\\\\centering\\\\arraybackslash ", |
| 151 | "r" = "\\\\raggedleft\\\\arraybackslash "), |
| 152 | x, "\\}" |
| 153 | ) |
| 154 | return(out) |
Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 155 | } |
Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 156 | |
| 157 | midline_groups <- function(x, booktabs = T) { |
| 158 | diffs <- c(1, diff(x)) |
| 159 | start_indexes <- c(1, which(diffs > 1)) |
| 160 | end_indexes <- c(start_indexes-1, length(x)) |
| 161 | ranges <- paste0(x[start_indexes], "-", x[end_indexes]) |
| 162 | if (booktabs) { |
| 163 | out <- paste0("\\\\cmidrule{", ranges, "}") |
| 164 | } else { |
| 165 | out <- paste0("\\\\cline{", ranges, "}") |
| 166 | } |
| 167 | out <- paste0(out, collapse = "\n") |
| 168 | return(out) |
| 169 | } |