blob: 06d59d5567e0f1dabca1ea8b7987b4cc49331a4b [file] [log] [blame]
Hao Zhu3166f062017-06-26 07:51:46 -10001#' Collapse repeated rows to multirow cell
Hao Zhu2a87e8e2017-06-14 15:49:33 -04002#'
Hao Zhu8a160b12017-06-26 13:41:35 -10003#' @description Collapse same values in columns into multirow cells. This
4#' feature does similar things with `group_rows`. However, unlike `group_rows`,
5#' it analyzes existing columns, finds out rows that can be grouped together,
6#' and make them multirow cells. Note that if you want to use `column_spec` to
7#' specify column styles, you should use `column_spec` before `collapse_rows`.
8#'
9#' @param kable_input Output of `knitr::kable()` with `format` specified
Hao Zhu5e2528e2020-08-03 09:16:34 -040010#' @param columns A numeric value or vector indicating in which column(s) rows
Jakob Richteraebd8292018-10-31 16:27:29 +010011#' need to be collapsed.
Hao Zhuec169362018-05-21 01:05:29 -040012#' @param valign Select from "top", "middle"(default), "bottom". The reason why
13#' "top" is not default is that the multirow package on CRAN win-builder is
14#' not up to date.
Hao Zhu12b0ade2018-01-13 16:19:58 -050015#' @param latex_hline Option controlling the behavior of adding hlines to table.
Hao Zhu876bcb02020-08-12 23:02:51 -040016#' Choose from `major`, `full`, `none`, `custom` and `linespace`. We changed the default from
Hao Zhu71b30f22020-08-12 22:47:13 -040017#' `full` to `major` in version 1.2.
georgeguieaeb0cd2018-03-30 17:39:46 -050018#' @param custom_latex_hline Numeric column positions whose collapsed rows will
19#' be separated by hlines.
20#' @param row_group_label_position Option controlling positions of row group
21#' labels. Choose from `identity`, `stack`.
22#' @param row_group_label_fonts A list of arguments that can be supplied to
23#' group_rows function to format the row group label when
24#' `row_group_label_position` is `stack`
25#' @param headers_to_remove Numeric column positions where headers should be
26#' removed when they are stacked.
Hao Zhuc9858942020-08-11 21:41:09 -040027#' @param target If multiple columns are selected to do collapsing and a target
28#' column is specified, this target column will be used to collapse other
29#' columns based on the groups of this target column.
30#' @param col_names T/F. A LaTeX specific option. If you set `col.names` be
31#' `NULL` in your `kable` call, you need to set this option false to let
32#' everything work properly.
Hao Zhu71b30f22020-08-12 22:47:13 -040033#' @param longtable_clean_cut T/F with default T. Multirow cell sometimes are
34#' displayed incorrectly around pagebreak. This option forces groups to cut
35#' before the end of a page. If you have a group that is longer than 1 page,
36#' you need to turn off this option.
Hao Zhu8a160b12017-06-26 13:41:35 -100037#'
Hao Zhu5a7689e2017-06-26 15:37:24 -100038#' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b"))
39#' x <- knitr::kable(dt, "html")
40#' collapse_rows(x)
41#'
Hao Zhuf4b35292017-06-25 22:38:37 -100042#' @export
Hao Zhu12b0ade2018-01-13 16:19:58 -050043collapse_rows <- function(kable_input, columns = NULL,
Hao Zhuec169362018-05-21 01:05:29 -040044 valign = c("middle", "top", "bottom"),
Hao Zhu71b30f22020-08-12 22:47:13 -040045 latex_hline = c("major", "full", "none", "custom"),
georgeguieaeb0cd2018-03-30 17:39:46 -050046 row_group_label_position = c('identity', 'stack'),
47 custom_latex_hline = NULL,
48 row_group_label_fonts = NULL,
Hao Zhu5e2528e2020-08-03 09:16:34 -040049 headers_to_remove = NULL,
Hao Zhuc9858942020-08-11 21:41:09 -040050 target = NULL,
Hao Zhu71b30f22020-08-12 22:47:13 -040051 col_names = TRUE,
52 longtable_clean_cut = TRUE) {
Hao Zhu2a87e8e2017-06-14 15:49:33 -040053 kable_format <- attr(kable_input, "format")
54 if (!kable_format %in% c("html", "latex")) {
Hao Zhu401ebd82018-01-14 17:10:20 -050055 warning("Please specify format in kable. kableExtra can customize either ",
56 "HTML or LaTeX outputs. See https://haozhu233.github.io/kableExtra/ ",
57 "for details.")
Hao Zhu2a87e8e2017-06-14 15:49:33 -040058 return(kable_input)
59 }
Hao Zhu33b865f2020-08-18 02:10:43 -040060 valign <- match.arg(valign)
Hao Zhu5e2528e2020-08-03 09:16:34 -040061 if (!is.null(target)) {
62 if (length(target) > 1 && is.integer(target)) {
63 stop("target can only be a length 1 integer")
64 }
65 }
Hao Zhu2a87e8e2017-06-14 15:49:33 -040066 if (kable_format == "html") {
Hao Zhu5e2528e2020-08-03 09:16:34 -040067 return(collapse_rows_html(kable_input, columns, valign, target))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040068 }
69 if (kable_format == "latex") {
Hao Zhu33b865f2020-08-18 02:10:43 -040070 latex_hline <- match.arg(latex_hline)
georgeguieaeb0cd2018-03-30 17:39:46 -050071 row_group_label_position <- match.arg(row_group_label_position,
72 c('identity', 'stack'))
Hao Zhu5dd3e282018-05-20 18:39:48 -040073 return(collapse_rows_latex(kable_input, columns, latex_hline, valign,
georgeguieaeb0cd2018-03-30 17:39:46 -050074 row_group_label_position, row_group_label_fonts, custom_latex_hline,
Hao Zhu71b30f22020-08-12 22:47:13 -040075 headers_to_remove, target, col_names, longtable_clean_cut))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040076 }
77}
78
Hao Zhu5e2528e2020-08-03 09:16:34 -040079collapse_rows_html <- function(kable_input, columns, valign, target) {
Hao Zhu2a87e8e2017-06-14 15:49:33 -040080 kable_attrs <- attributes(kable_input)
Hao Zhu5e2528e2020-08-03 09:16:34 -040081 kable_xml <- kable_as_xml(kable_input)
Hao Zhu2a87e8e2017-06-14 15:49:33 -040082 kable_tbody <- xml_tpart(kable_xml, "tbody")
83
84 kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]]
Hao Zhuf4b35292017-06-25 22:38:37 -100085 if (is.null(columns)) {
86 columns <- seq(1, ncol(kable_dt))
87 }
Hao Zhu5e2528e2020-08-03 09:16:34 -040088 if (!is.null(target)) {
89 if (!target %in% columns) {
90 stop("target has to be within the range of columns")
91 }
92 }
Hao Zhu23456762018-03-26 12:30:10 -040093 if (!is.null(kable_attrs$header_above)) {
94 kable_dt_col_names <- unlist(kable_dt[kable_attrs$header_above, ])
95 kable_dt <- kable_dt[-(1:kable_attrs$header_above),]
96 names(kable_dt) <- kable_dt_col_names
97 }
Hao Zhu5e2528e2020-08-03 09:16:34 -040098 collapse_matrix <- collapse_row_matrix(kable_dt, columns, target = target)
Hao Zhu2a87e8e2017-06-14 15:49:33 -040099
100 for (i in 1:nrow(collapse_matrix)) {
101 matrix_row <- collapse_matrix[i, ]
Hao Zhu38cdcdb2017-06-27 09:08:30 -1000102 names(matrix_row) <- names(collapse_matrix)
Hao Zhu3166f062017-06-26 07:51:46 -1000103 target_row <- xml_child(kable_tbody, i)
104 row_node_rm_count <- 0
105 for (j in 1:length(matrix_row)) {
106 collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) -
107 row_node_rm_count
108 target_cell <- xml_child(target_row, collapsing_col)
109 if (matrix_row[j] == 0) {
110 xml_remove(target_cell)
111 row_node_rm_count <- row_node_rm_count + 1
112 } else if (matrix_row[j] != 1) {
113 xml_attr(target_cell, "rowspan") <- matrix_row[j]
114 xml_attr(target_cell, "style") <- paste0(
115 xml_attr(target_cell, "style"),
Hao Zhu5dd3e282018-05-20 18:39:48 -0400116 "vertical-align: ", valign, " !important;")
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400117 }
118 }
119 }
120
Hao Zhuf2dfd142017-07-24 14:43:28 -0400121 out <- as_kable_xml(kable_xml)
Hao Zhufdff6f42020-08-09 14:38:10 -0400122 kable_attrs$collapse_matrix <- collapse_matrix
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400123 attributes(out) <- kable_attrs
Hao Zhuf2100832018-01-11 16:20:29 -0500124 if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out))
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400125 return(out)
126}
127
Hao Zhu5e2528e2020-08-03 09:16:34 -0400128split_factor <- function(x) {
129 group_idx <- seq(1, length(x))
130 return(factor(unlist(lapply(group_idx, function(i) {rep(i, x[i])}))))
131}
132
133collapse_row_matrix <- function(kable_dt, columns, html = T, target = NULL) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000134 if (html) {
135 column_block <- function(x) c(x, rep(0, x - 1))
136 } else {
137 column_block <- function(x) c(rep(0, x - 1), x)
138 }
139 mapping_matrix <- list()
Hao Zhu5e2528e2020-08-03 09:16:34 -0400140 if (is.null(target)) {
141 for (i in columns) {
142 mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
143 rle(kable_dt[, i])$lengths, column_block))
144 }
145 } else {
146 target_group = split_factor(rle(kable_dt[, target])$lengths)
147 for (i in columns) {
148 column_split = split(kable_dt[, i], target_group)
149 mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
150 column_split, function(sp) {
151 lapply(rle(sp)$length, column_block)
152 }))
153 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000154 }
Hao Zhu5e2528e2020-08-03 09:16:34 -0400155
Hao Zhuf4b35292017-06-25 22:38:37 -1000156 mapping_matrix <- data.frame(mapping_matrix)
157 return(mapping_matrix)
158}
159
Hao Zhu5dd3e282018-05-20 18:39:48 -0400160collapse_rows_latex <- function(kable_input, columns, latex_hline, valign,
georgeguieaeb0cd2018-03-30 17:39:46 -0500161 row_group_label_position, row_group_label_fonts,
Hao Zhuc9858942020-08-11 21:41:09 -0400162 custom_latex_hline, headers_to_remove, target,
Hao Zhu71b30f22020-08-12 22:47:13 -0400163 col_names, longtable_clean_cut) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000164 table_info <- magic_mirror(kable_input)
Hao Zhu3fc0e882018-04-03 16:06:41 -0400165 out <- solve_enc(kable_input)
Hao Zhu876bcb02020-08-12 23:02:51 -0400166 out <- gsub("\\\\addlinespace\n", "", out)
Hao Zhu064990d2017-10-17 18:08:42 -0400167
Hao Zhu5dd3e282018-05-20 18:39:48 -0400168 valign <- switch(
169 valign,
170 top = "\\[t\\]",
171 middle = "",
172 bottom = "\\[b\\]"
173 )
174
Hao Zhuf4b35292017-06-25 22:38:37 -1000175 if (is.null(columns)) {
176 columns <- seq(1, table_info$ncol)
177 }
Hao Zhu064990d2017-10-17 18:08:42 -0400178
Hao Zhuf4b35292017-06-25 22:38:37 -1000179 contents <- table_info$contents
Hao Zhuc9858942020-08-11 21:41:09 -0400180 kable_dt <- kable_dt_latex(contents, col_names)
georgeguieaeb0cd2018-03-30 17:39:46 -0500181
Hao Zhuaa424412020-08-03 09:21:46 -0400182 collapse_matrix_rev <- collapse_row_matrix(kable_dt, columns, html = TRUE,
183 target)
184 collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = FALSE,
185 target)
Hao Zhuf4b35292017-06-25 22:38:37 -1000186
187 new_kable_dt <- kable_dt
Jakob Richteraebd8292018-10-31 16:27:29 +0100188 for (j in seq_along(columns)) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000189 column_align <- table_info$align_vector_origin[columns[j]]
190 column_width <- ifelse(
191 is.null(table_info$column_width[[paste0("column_", columns[j])]]),
Hao Zhu4e34cd82020-08-19 01:54:23 -0400192 "\\*", table_info$column_width[paste0("column_", columns[j])])
Hao Zhuf4b35292017-06-25 22:38:37 -1000193 for (i in seq(1:nrow(collapse_matrix))) {
georgeguieaeb0cd2018-03-30 17:39:46 -0500194 if(row_group_label_position == 'stack'){
Jakob Richteraebd8292018-10-31 16:27:29 +0100195 if(columns[j] < ncol(collapse_matrix) || collapse_matrix_rev[i, j] == 0){
196 new_kable_dt[i, columns[j]] <- ''
georgeguieaeb0cd2018-03-30 17:39:46 -0500197 }
198 } else {
Jakob Richteraebd8292018-10-31 16:27:29 +0100199 new_kable_dt[i, columns[j]] <- collapse_new_dt_item(
200 kable_dt[i, columns[j]], collapse_matrix[i, j], column_width,
Hao Zhu5dd3e282018-05-20 18:39:48 -0400201 align = column_align, valign = valign
georgeguieaeb0cd2018-03-30 17:39:46 -0500202 )
203 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000204 }
205 }
Hao Zhu654c91f2017-07-03 14:03:34 -0400206
207 midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol),
Hao Zhuaa424412020-08-03 09:21:46 -0400208 html = FALSE, target)
Hao Zhu654c91f2017-07-03 14:03:34 -0400209 midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1
210
211 ex_bottom <- length(contents) - 1
212 contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\")
213 if (!table_info$booktabs) {
214 contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline")
215 }
Hao Zhu01b15b82018-01-12 17:48:21 -0500216
217 new_contents <- c()
georgeguieaeb0cd2018-03-30 17:39:46 -0500218 if(row_group_label_position == 'stack'){
219 if(is.null(headers_to_remove)) headers_to_remove <- head(columns, -1)
220 table_info$colnames[headers_to_remove] <- ''
221 new_header <- paste(table_info$colnames, collapse = ' & ')
222 out <- sub(contents[1], new_header, out)
223 table_info$contents[1] <- new_header
224 }
225 if(latex_hline == 'custom' & is.null(custom_latex_hline)){
226 if(row_group_label_position == 'stack'){
227 custom_latex_hline = 1:2
228 } else {
229 custom_latex_hline = 1
230 }
231 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000232 for (i in seq(1:nrow(collapse_matrix))) {
233 new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ")
Hao Zhu12b0ade2018-01-13 16:19:58 -0500234 table_info$contents[i + 1] <- new_contents[i]
Hao Zhu654c91f2017-07-03 14:03:34 -0400235 if (i != nrow(collapse_matrix)) {
Hao Zhu12b0ade2018-01-13 16:19:58 -0500236 row_midrule <- switch(
237 latex_hline,
238 "none" = "",
Hao Zhu71b30f22020-08-12 22:47:13 -0400239 "full" = ifelse(
240 sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
241 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
242 table_info$booktabs),
243 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
244 FALSE)
245 ),
Hao Zhu12b0ade2018-01-13 16:19:58 -0500246 "major" = ifelse(
247 sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
248 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
249 table_info$booktabs),
250 ""
georgeguieaeb0cd2018-03-30 17:39:46 -0500251 ),
252 "custom" = ifelse(
253 sum(as.numeric(midrule_matrix[i, custom_latex_hline])) > 0,
254 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
255 table_info$booktabs),
256 ""
Hao Zhu876bcb02020-08-12 23:02:51 -0400257 ),
258 "linespace"= ifelse(
259 sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
260 "\\\\addlinespace\n",
261 ""
262 )
Hao Zhu12b0ade2018-01-13 16:19:58 -0500263 )
Hao Zhu654c91f2017-07-03 14:03:34 -0400264 new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule)
265 }
Hao Zhub3f26ae2020-08-04 00:36:52 -0400266 out <- sub(contents[i + 1], new_contents[i], out, perl=TRUE)
Hao Zhuf4b35292017-06-25 22:38:37 -1000267 }
268
Hao Zhu71b30f22020-08-12 22:47:13 -0400269 if (table_info$tabular == "longtable" & longtable_clean_cut) {
270 if (max(collapse_matrix) > 50) {
271 warning("It seems that you have a group larger than 50 rows and span ",
272 "over a page. You probably want to set longtable_clean_cut to ",
273 "be FALSE.")
274 }
275 if (latex_hline == "full") {
276 warning("kableExtra 1.2 adds a clean_cut feature to provide better page",
277 " breaking in collapse_rows. It only works when latex_hline = ",
278 "'major'. It looks like you have longtable_clean_cut = T while ",
279 "latex_hline = 'full'. Please change either one of them.")
280 }
281 out <- gsub("\\\\\\\\($|\n)", "\\\\\\\\\\\\nopagebreak\\1", out)
282 out <- gsub("(\\\\cmidrule[{][^}]*[}])", "\\1\\\\pagebreak[0]", out)
283 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000284 out <- structure(out, format = "latex", class = "knitr_kable")
285 table_info$collapse_rows <- TRUE
Hao Zhuebdb3c22020-08-12 08:27:38 -0400286 table_info$collapse_matrix <- collapse_matrix
Hao Zhuf4b35292017-06-25 22:38:37 -1000287 attr(out, "kable_meta") <- table_info
georgeguieaeb0cd2018-03-30 17:39:46 -0500288 if(row_group_label_position == 'stack'){
289 group_row_index_list <- collapse_rows_index(kable_dt, head(columns, -1))
290 out <- collapse_rows_latex_stack(out, group_row_index_list, row_group_label_fonts)
291 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000292 return(out)
293}
294
Hao Zhuc9858942020-08-11 21:41:09 -0400295kable_dt_latex <- function(x, col_names) {
296 if (col_names) {
297 x <- x[-1]
298 }
299 data.frame(do.call(rbind, str_split(x, " & ")), stringsAsFactors = FALSE)
Hao Zhuf4b35292017-06-25 22:38:37 -1000300}
301
Hao Zhu5dd3e282018-05-20 18:39:48 -0400302collapse_new_dt_item <- function(x, span, width = NULL, align, valign) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000303 if (span == 0) return("")
304 if (span == 1) return(x)
305 out <- paste0(
Hao Zhu5dd3e282018-05-20 18:39:48 -0400306 "\\\\multirow", valign, "\\{", -span, "\\}\\{",
Hao Zhuf4b35292017-06-25 22:38:37 -1000307 ifelse(is.null(width), "\\*", width),
308 "\\}\\{",
309 switch(align,
310 "l" = "\\\\raggedright\\\\arraybackslash ",
311 "c" = "\\\\centering\\\\arraybackslash ",
312 "r" = "\\\\raggedleft\\\\arraybackslash "),
313 x, "\\}"
314 )
315 return(out)
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400316}
Hao Zhu654c91f2017-07-03 14:03:34 -0400317
318midline_groups <- function(x, booktabs = T) {
319 diffs <- c(1, diff(x))
320 start_indexes <- c(1, which(diffs > 1))
Hao Zhu12b0ade2018-01-13 16:19:58 -0500321 end_indexes <- c(start_indexes - 1, length(x))
Hao Zhu654c91f2017-07-03 14:03:34 -0400322 ranges <- paste0(x[start_indexes], "-", x[end_indexes])
323 if (booktabs) {
324 out <- paste0("\\\\cmidrule{", ranges, "}")
325 } else {
326 out <- paste0("\\\\cline{", ranges, "}")
327 }
328 out <- paste0(out, collapse = "\n")
329 return(out)
330}
georgeguieaeb0cd2018-03-30 17:39:46 -0500331
Hao Zhu876bcb02020-08-12 23:02:51 -0400332linespace_groups <- function(x) {
333 diffs <- c(1, diff(x))
334 start_indexes <- c(1, which(diffs > 1))
335 end_indexes <- c(start_indexes - 1, length(x))
336 ranges <- paste0(x[start_indexes], "-", x[end_indexes])
337 out <- paste0("\\\\addlinespace")
338 out <- paste0(out, collapse = "\n")
339 return(out)
340}
341
georgeguieaeb0cd2018-03-30 17:39:46 -0500342
343collapse_rows_index <- function(kable_dt, columns) {
344 format_to_row_index <- function(x){
345 x = rle(x)
346 out = x$lengths
347 names(out) = x$values
348 out
349 }
350 group_rows_index_list <- lapply(columns, function(x) {
351 format_to_row_index(kable_dt[, x])
352 })
353 return(group_rows_index_list)
354}
355
356
357collapse_rows_latex_stack <- function(kable_input, group_row_index_list,
358 row_group_label_fonts){
359 merge_lists <- function(default_list, updated_list){
360 for(x in names(updated_list)){
361 default_list[[x]] <- updated_list[[x]]
362 }
363 return(default_list)
364 }
365 default_font_list <- list(
366 list(bold = T, italic = F),
367 list(bold = F, italic = T),
368 list(bold = F, italic = F)
369 )
370 n_default_fonts = length(default_font_list)
371 n_supplied_fonts = length(row_group_label_fonts)
372 group_row_font_list <- list()
373 out <- kable_input
374 for(i in 1:length(group_row_index_list)){
375 if(i > n_default_fonts){
376 group_row_args <- default_font_list[[n_default_fonts]]
377 } else {
378 group_row_args <- default_font_list[[i]]
379 }
380 if(i <= n_supplied_fonts){
381 group_row_args <- merge_lists(group_row_args, row_group_label_fonts[[i]])
382 }
383 group_row_args <- merge_lists(
Hao Zhu718fa3f2020-08-19 08:23:28 -0400384 list(kable_input = out, index = group_row_index_list[[i]], escape = FALSE),
georgeguieaeb0cd2018-03-30 17:39:46 -0500385 group_row_args)
386 out <- do.call(group_rows, group_row_args)
387 }
388 return(out)
389}