| Hao Zhu | 3166f06 | 2017-06-26 07:51:46 -1000 | [diff] [blame] | 1 | #' Collapse repeated rows to multirow cell | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 2 | #' | 
| Hao Zhu | 8a160b1 | 2017-06-26 13:41:35 -1000 | [diff] [blame] | 3 | #' @description Collapse same values in columns into multirow cells. This | 
 | 4 | #' feature does similar things with `group_rows`. However, unlike `group_rows`, | 
 | 5 | #' it analyzes existing columns, finds out rows that can be grouped together, | 
 | 6 | #' and make them multirow cells. Note that if you want to use `column_spec` to | 
 | 7 | #' specify column styles, you should use `column_spec` before `collapse_rows`. | 
 | 8 | #' | 
 | 9 | #' @param kable_input Output of `knitr::kable()` with `format` specified | 
 | 10 | #' @param columns Numeric column positions where rows need to be collapsed. | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 11 | #' @param latex_hline Option controlling the behavior of adding hlines to table. | 
 | 12 | #' Choose from `full`, `major`, `none`. | 
| Hao Zhu | 8a160b1 | 2017-06-26 13:41:35 -1000 | [diff] [blame] | 13 | #' | 
| Hao Zhu | 5a7689e | 2017-06-26 15:37:24 -1000 | [diff] [blame] | 14 | #' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b")) | 
 | 15 | #' x <- knitr::kable(dt, "html") | 
 | 16 | #' collapse_rows(x) | 
 | 17 | #' | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 18 | #' @export | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 19 | collapse_rows <- function(kable_input, columns = NULL, | 
 | 20 |                           latex_hline = c("full", "major", "none")) { | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 21 |   # if (is.null(columns)) { | 
 | 22 |   #   stop("Please specify numeric positions of columns you want to collapse.") | 
 | 23 |   # } | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 24 |   kable_format <- attr(kable_input, "format") | 
 | 25 |   if (!kable_format %in% c("html", "latex")) { | 
 | 26 |     message("Currently generic markdown table using pandoc is not supported.") | 
 | 27 |     return(kable_input) | 
 | 28 |   } | 
 | 29 |   if (kable_format == "html") { | 
 | 30 |     return(collapse_rows_html(kable_input, columns)) | 
 | 31 |   } | 
 | 32 |   if (kable_format == "latex") { | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 33 |     latex_hline <- match.arg(latex_hline, c("full", "major", "none")) | 
 | 34 |     return(collapse_rows_latex(kable_input, columns, latex_hline)) | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 35 |   } | 
 | 36 | } | 
 | 37 |  | 
 | 38 | collapse_rows_html <- function(kable_input, columns) { | 
 | 39 |   kable_attrs <- attributes(kable_input) | 
| Hao Zhu | 558c72f | 2017-07-24 15:12:00 -0400 | [diff] [blame] | 40 |   kable_xml <- read_kable_as_xml(kable_input) | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 41 |   kable_tbody <- xml_tpart(kable_xml, "tbody") | 
 | 42 |  | 
 | 43 |   kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]] | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 44 |   if (is.null(columns)) { | 
 | 45 |     columns <- seq(1, ncol(kable_dt)) | 
 | 46 |   } | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 47 |   kable_dt$row_id <- rownames(kable_dt) | 
 | 48 |   collapse_matrix <- collapse_row_matrix(kable_dt, columns) | 
 | 49 |  | 
 | 50 |   for (i in 1:nrow(collapse_matrix)) { | 
 | 51 |     matrix_row <- collapse_matrix[i, ] | 
| Hao Zhu | 38cdcdb | 2017-06-27 09:08:30 -1000 | [diff] [blame] | 52 |     names(matrix_row) <- names(collapse_matrix) | 
| Hao Zhu | 3166f06 | 2017-06-26 07:51:46 -1000 | [diff] [blame] | 53 |     target_row <- xml_child(kable_tbody, i) | 
 | 54 |     row_node_rm_count <- 0 | 
 | 55 |     for (j in 1:length(matrix_row)) { | 
 | 56 |       collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) - | 
 | 57 |         row_node_rm_count | 
 | 58 |       target_cell <- xml_child(target_row, collapsing_col) | 
 | 59 |       if (matrix_row[j] == 0) { | 
 | 60 |         xml_remove(target_cell) | 
 | 61 |         row_node_rm_count <- row_node_rm_count + 1 | 
 | 62 |       } else if (matrix_row[j] != 1) { | 
 | 63 |         xml_attr(target_cell, "rowspan") <- matrix_row[j] | 
 | 64 |         xml_attr(target_cell, "style") <- paste0( | 
 | 65 |           xml_attr(target_cell, "style"), | 
 | 66 |           "vertical-align: middle !important;") | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 67 |       } | 
 | 68 |     } | 
 | 69 |   } | 
 | 70 |  | 
| Hao Zhu | f2dfd14 | 2017-07-24 14:43:28 -0400 | [diff] [blame] | 71 |   out <- as_kable_xml(kable_xml) | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 72 |   attributes(out) <- kable_attrs | 
| Hao Zhu | f210083 | 2018-01-11 16:20:29 -0500 | [diff] [blame] | 73 |   if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out)) | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 74 |   return(out) | 
 | 75 | } | 
 | 76 |  | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 77 | collapse_row_matrix <- function(kable_dt, columns, html = T)  { | 
 | 78 |   if (html) { | 
 | 79 |     column_block <- function(x) c(x, rep(0, x - 1)) | 
 | 80 |   } else { | 
 | 81 |     column_block <- function(x) c(rep(0, x - 1), x) | 
 | 82 |   } | 
 | 83 |   mapping_matrix <- list() | 
 | 84 |   for (i in columns) { | 
 | 85 |     mapping_matrix[[paste0("x", i)]] <- unlist(lapply( | 
 | 86 |       rle(kable_dt[, i])$length, column_block)) | 
 | 87 |   } | 
 | 88 |   mapping_matrix <- data.frame(mapping_matrix) | 
 | 89 |   return(mapping_matrix) | 
 | 90 | } | 
 | 91 |  | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 92 | collapse_rows_latex <- function(kable_input, columns, latex_hline) { | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 93 |   table_info <- magic_mirror(kable_input) | 
| Hao Zhu | 064990d | 2017-10-17 18:08:42 -0400 | [diff] [blame] | 94 |   out <- enc2utf8(as.character(kable_input)) | 
 | 95 |  | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 96 |   if (is.null(columns)) { | 
 | 97 |     columns <- seq(1, table_info$ncol) | 
 | 98 |   } | 
| Hao Zhu | 064990d | 2017-10-17 18:08:42 -0400 | [diff] [blame] | 99 |  | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 100 |   contents <- table_info$contents | 
 | 101 |   kable_dt <- kable_dt_latex(contents) | 
| Hao Zhu | 01b15b8 | 2018-01-12 17:48:21 -0500 | [diff] [blame] | 102 |   collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = FALSE) | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 103 |  | 
 | 104 |   new_kable_dt <- kable_dt | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 105 |   for (j in seq(1:ncol(collapse_matrix))) { | 
 | 106 |     column_align <- table_info$align_vector_origin[columns[j]] | 
 | 107 |     column_width <- ifelse( | 
 | 108 |       is.null(table_info$column_width[[paste0("column_", columns[j])]]), | 
 | 109 |       "*", table_info$column_width[paste0("column_", columns[j])]) | 
 | 110 |     for (i in seq(1:nrow(collapse_matrix))) { | 
 | 111 |       new_kable_dt[i, j] <- collapse_new_dt_item( | 
 | 112 |         kable_dt[i, j], collapse_matrix[i, j], column_width, align = column_align | 
 | 113 |       ) | 
 | 114 |     } | 
 | 115 |   } | 
| Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 116 |  | 
 | 117 |   midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol), | 
 | 118 |                                         html = F) | 
 | 119 |   midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1 | 
 | 120 |  | 
 | 121 |   ex_bottom <- length(contents) - 1 | 
 | 122 |   contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\") | 
 | 123 |   if (!table_info$booktabs) { | 
 | 124 |     contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline") | 
 | 125 |   } | 
| Hao Zhu | 01b15b8 | 2018-01-12 17:48:21 -0500 | [diff] [blame] | 126 |  | 
 | 127 |   new_contents <- c() | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 128 |   for (i in seq(1:nrow(collapse_matrix))) { | 
 | 129 |     new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ") | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 130 |     table_info$contents[i + 1] <- new_contents[i] | 
| Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 131 |     if (i != nrow(collapse_matrix)) { | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 132 |       row_midrule <- switch( | 
 | 133 |         latex_hline, | 
 | 134 |         "none" = "", | 
 | 135 |         "full" = midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), | 
 | 136 |                                 table_info$booktabs), | 
 | 137 |         "major" = ifelse( | 
 | 138 |           sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix), | 
 | 139 |           midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0), | 
 | 140 |                          table_info$booktabs), | 
 | 141 |           "" | 
 | 142 |         ) | 
 | 143 |       ) | 
| Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 144 |       new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule) | 
 | 145 |     } | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 146 |     out <- sub(contents[i + 1], new_contents[i], out) | 
 | 147 |   } | 
| Hao Zhu | 8f20299 | 2017-07-15 02:20:18 -0400 | [diff] [blame] | 148 |   out <- gsub("\\\\addlinespace\n", "", out) | 
| Hao Zhu | f4b3529 | 2017-06-25 22:38:37 -1000 | [diff] [blame] | 149 |  | 
 | 150 |   out <- structure(out, format = "latex", class = "knitr_kable") | 
 | 151 |   table_info$collapse_rows <- TRUE | 
 | 152 |   attr(out, "kable_meta") <- table_info | 
 | 153 |   return(out) | 
 | 154 | } | 
 | 155 |  | 
 | 156 | kable_dt_latex <- function(x) { | 
 | 157 |   data.frame(do.call(rbind, str_split(x[-1], " & ")), stringsAsFactors = FALSE) | 
 | 158 | } | 
 | 159 |  | 
 | 160 | collapse_new_dt_item <- function(x, span, width = NULL, align) { | 
 | 161 |   if (span == 0) return("") | 
 | 162 |   if (span == 1) return(x) | 
 | 163 |   out <- paste0( | 
 | 164 |     "\\\\multirow\\{", -span, "\\}\\{", | 
 | 165 |     ifelse(is.null(width), "\\*", width), | 
 | 166 |     "\\}\\{", | 
 | 167 |     switch(align, | 
 | 168 |            "l" = "\\\\raggedright\\\\arraybackslash ", | 
 | 169 |            "c" = "\\\\centering\\\\arraybackslash ", | 
 | 170 |            "r" = "\\\\raggedleft\\\\arraybackslash "), | 
 | 171 |     x, "\\}" | 
 | 172 |   ) | 
 | 173 |   return(out) | 
| Hao Zhu | 2a87e8e | 2017-06-14 15:49:33 -0400 | [diff] [blame] | 174 | } | 
| Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 175 |  | 
 | 176 | midline_groups <- function(x, booktabs = T) { | 
 | 177 |   diffs <- c(1, diff(x)) | 
 | 178 |   start_indexes <- c(1, which(diffs > 1)) | 
| Hao Zhu | 12b0ade | 2018-01-13 16:19:58 -0500 | [diff] [blame^] | 179 |   end_indexes <- c(start_indexes - 1, length(x)) | 
| Hao Zhu | 654c91f | 2017-07-03 14:03:34 -0400 | [diff] [blame] | 180 |   ranges <- paste0(x[start_indexes], "-", x[end_indexes]) | 
 | 181 |   if (booktabs) { | 
 | 182 |     out <- paste0("\\\\cmidrule{", ranges, "}") | 
 | 183 |   } else { | 
 | 184 |     out <- paste0("\\\\cline{", ranges, "}") | 
 | 185 |   } | 
 | 186 |   out <- paste0(out, collapse = "\n") | 
 | 187 |   return(out) | 
 | 188 | } |