blob: e520b40522234504c00fa142f544252e61870b50 [file] [log] [blame]
Hao Zhu3166f062017-06-26 07:51:46 -10001#' Collapse repeated rows to multirow cell
Hao Zhu2a87e8e2017-06-14 15:49:33 -04002#'
Hao Zhu8a160b12017-06-26 13:41:35 -10003#' @description Collapse same values in columns into multirow cells. This
4#' feature does similar things with `group_rows`. However, unlike `group_rows`,
5#' it analyzes existing columns, finds out rows that can be grouped together,
6#' and make them multirow cells. Note that if you want to use `column_spec` to
7#' specify column styles, you should use `column_spec` before `collapse_rows`.
8#'
9#' @param kable_input Output of `knitr::kable()` with `format` specified
Hao Zhu5e2528e2020-08-03 09:16:34 -040010#' @param columns A numeric value or vector indicating in which column(s) rows
Jakob Richteraebd8292018-10-31 16:27:29 +010011#' need to be collapsed.
Hao Zhuec169362018-05-21 01:05:29 -040012#' @param valign Select from "top", "middle"(default), "bottom". The reason why
13#' "top" is not default is that the multirow package on CRAN win-builder is
14#' not up to date.
Hao Zhu12b0ade2018-01-13 16:19:58 -050015#' @param latex_hline Option controlling the behavior of adding hlines to table.
georgeguieaeb0cd2018-03-30 17:39:46 -050016#' Choose from `full`, `major`, `none`, `custom`.
17#' @param custom_latex_hline Numeric column positions whose collapsed rows will
18#' be separated by hlines.
19#' @param row_group_label_position Option controlling positions of row group
20#' labels. Choose from `identity`, `stack`.
21#' @param row_group_label_fonts A list of arguments that can be supplied to
22#' group_rows function to format the row group label when
23#' `row_group_label_position` is `stack`
24#' @param headers_to_remove Numeric column positions where headers should be
25#' removed when they are stacked.
Hao Zhu8a160b12017-06-26 13:41:35 -100026#'
Hao Zhu5a7689e2017-06-26 15:37:24 -100027#' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b"))
28#' x <- knitr::kable(dt, "html")
29#' collapse_rows(x)
30#'
Hao Zhuf4b35292017-06-25 22:38:37 -100031#' @export
Hao Zhu12b0ade2018-01-13 16:19:58 -050032collapse_rows <- function(kable_input, columns = NULL,
Hao Zhuec169362018-05-21 01:05:29 -040033 valign = c("middle", "top", "bottom"),
georgeguieaeb0cd2018-03-30 17:39:46 -050034 latex_hline = c("full", "major", "none", "custom"),
35 row_group_label_position = c('identity', 'stack'),
36 custom_latex_hline = NULL,
37 row_group_label_fonts = NULL,
Hao Zhu5e2528e2020-08-03 09:16:34 -040038 headers_to_remove = NULL,
39 target = NULL) {
Hao Zhu2a87e8e2017-06-14 15:49:33 -040040 kable_format <- attr(kable_input, "format")
41 if (!kable_format %in% c("html", "latex")) {
Hao Zhu401ebd82018-01-14 17:10:20 -050042 warning("Please specify format in kable. kableExtra can customize either ",
43 "HTML or LaTeX outputs. See https://haozhu233.github.io/kableExtra/ ",
44 "for details.")
Hao Zhu2a87e8e2017-06-14 15:49:33 -040045 return(kable_input)
46 }
Hao Zhuec169362018-05-21 01:05:29 -040047 valign <- match.arg(valign, c("middle", "top", "bottom"))
Hao Zhu5e2528e2020-08-03 09:16:34 -040048 if (!is.null(target)) {
49 if (length(target) > 1 && is.integer(target)) {
50 stop("target can only be a length 1 integer")
51 }
52 }
Hao Zhu2a87e8e2017-06-14 15:49:33 -040053 if (kable_format == "html") {
Hao Zhu5e2528e2020-08-03 09:16:34 -040054 return(collapse_rows_html(kable_input, columns, valign, target))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040055 }
56 if (kable_format == "latex") {
georgeguieaeb0cd2018-03-30 17:39:46 -050057 latex_hline <- match.arg(latex_hline, c("full", "major", "none", "custom"))
58 row_group_label_position <- match.arg(row_group_label_position,
59 c('identity', 'stack'))
Hao Zhu5dd3e282018-05-20 18:39:48 -040060 return(collapse_rows_latex(kable_input, columns, latex_hline, valign,
georgeguieaeb0cd2018-03-30 17:39:46 -050061 row_group_label_position, row_group_label_fonts, custom_latex_hline,
62 headers_to_remove))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040063 }
64}
65
Hao Zhu5e2528e2020-08-03 09:16:34 -040066collapse_rows_html <- function(kable_input, columns, valign, target) {
Hao Zhu2a87e8e2017-06-14 15:49:33 -040067 kable_attrs <- attributes(kable_input)
Hao Zhu5e2528e2020-08-03 09:16:34 -040068 kable_xml <- kable_as_xml(kable_input)
Hao Zhu2a87e8e2017-06-14 15:49:33 -040069 kable_tbody <- xml_tpart(kable_xml, "tbody")
70
71 kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]]
Hao Zhuf4b35292017-06-25 22:38:37 -100072 if (is.null(columns)) {
73 columns <- seq(1, ncol(kable_dt))
74 }
Hao Zhu5e2528e2020-08-03 09:16:34 -040075 if (!is.null(target)) {
76 if (!target %in% columns) {
77 stop("target has to be within the range of columns")
78 }
79 }
Hao Zhu23456762018-03-26 12:30:10 -040080 if (!is.null(kable_attrs$header_above)) {
81 kable_dt_col_names <- unlist(kable_dt[kable_attrs$header_above, ])
82 kable_dt <- kable_dt[-(1:kable_attrs$header_above),]
83 names(kable_dt) <- kable_dt_col_names
84 }
Hao Zhu5e2528e2020-08-03 09:16:34 -040085 # kable_dt$row_id <- seq(nrow(kable_dt))
86 collapse_matrix <- collapse_row_matrix(kable_dt, columns, target = target)
Hao Zhu2a87e8e2017-06-14 15:49:33 -040087
88 for (i in 1:nrow(collapse_matrix)) {
89 matrix_row <- collapse_matrix[i, ]
Hao Zhu38cdcdb2017-06-27 09:08:30 -100090 names(matrix_row) <- names(collapse_matrix)
Hao Zhu3166f062017-06-26 07:51:46 -100091 target_row <- xml_child(kable_tbody, i)
92 row_node_rm_count <- 0
93 for (j in 1:length(matrix_row)) {
94 collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) -
95 row_node_rm_count
96 target_cell <- xml_child(target_row, collapsing_col)
97 if (matrix_row[j] == 0) {
98 xml_remove(target_cell)
99 row_node_rm_count <- row_node_rm_count + 1
100 } else if (matrix_row[j] != 1) {
101 xml_attr(target_cell, "rowspan") <- matrix_row[j]
102 xml_attr(target_cell, "style") <- paste0(
103 xml_attr(target_cell, "style"),
Hao Zhu5dd3e282018-05-20 18:39:48 -0400104 "vertical-align: ", valign, " !important;")
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400105 }
106 }
107 }
108
Hao Zhuf2dfd142017-07-24 14:43:28 -0400109 out <- as_kable_xml(kable_xml)
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400110 attributes(out) <- kable_attrs
Hao Zhuf2100832018-01-11 16:20:29 -0500111 if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out))
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400112 return(out)
113}
114
Hao Zhu5e2528e2020-08-03 09:16:34 -0400115split_factor <- function(x) {
116 group_idx <- seq(1, length(x))
117 return(factor(unlist(lapply(group_idx, function(i) {rep(i, x[i])}))))
118}
119
120collapse_row_matrix <- function(kable_dt, columns, html = T, target = NULL) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000121 if (html) {
122 column_block <- function(x) c(x, rep(0, x - 1))
123 } else {
124 column_block <- function(x) c(rep(0, x - 1), x)
125 }
126 mapping_matrix <- list()
Hao Zhu5e2528e2020-08-03 09:16:34 -0400127 if (is.null(target)) {
128 for (i in columns) {
129 mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
130 rle(kable_dt[, i])$lengths, column_block))
131 }
132 } else {
133 target_group = split_factor(rle(kable_dt[, target])$lengths)
134 for (i in columns) {
135 column_split = split(kable_dt[, i], target_group)
136 mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
137 column_split, function(sp) {
138 lapply(rle(sp)$length, column_block)
139 }))
140 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000141 }
Hao Zhu5e2528e2020-08-03 09:16:34 -0400142
Hao Zhuf4b35292017-06-25 22:38:37 -1000143 mapping_matrix <- data.frame(mapping_matrix)
144 return(mapping_matrix)
145}
146
Hao Zhu5dd3e282018-05-20 18:39:48 -0400147collapse_rows_latex <- function(kable_input, columns, latex_hline, valign,
georgeguieaeb0cd2018-03-30 17:39:46 -0500148 row_group_label_position, row_group_label_fonts,
149 custom_latex_hline, headers_to_remove) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000150 table_info <- magic_mirror(kable_input)
Hao Zhu3fc0e882018-04-03 16:06:41 -0400151 out <- solve_enc(kable_input)
Hao Zhu064990d2017-10-17 18:08:42 -0400152
Hao Zhu5dd3e282018-05-20 18:39:48 -0400153 valign <- switch(
154 valign,
155 top = "\\[t\\]",
156 middle = "",
157 bottom = "\\[b\\]"
158 )
159
Hao Zhuf4b35292017-06-25 22:38:37 -1000160 if (is.null(columns)) {
161 columns <- seq(1, table_info$ncol)
162 }
Hao Zhu064990d2017-10-17 18:08:42 -0400163
Hao Zhuf4b35292017-06-25 22:38:37 -1000164 contents <- table_info$contents
165 kable_dt <- kable_dt_latex(contents)
georgeguieaeb0cd2018-03-30 17:39:46 -0500166
167 collapse_matrix_rev <- collapse_row_matrix(kable_dt, columns, html = TRUE)
Hao Zhu01b15b82018-01-12 17:48:21 -0500168 collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = FALSE)
Hao Zhuf4b35292017-06-25 22:38:37 -1000169
170 new_kable_dt <- kable_dt
Jakob Richteraebd8292018-10-31 16:27:29 +0100171 for (j in seq_along(columns)) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000172 column_align <- table_info$align_vector_origin[columns[j]]
173 column_width <- ifelse(
174 is.null(table_info$column_width[[paste0("column_", columns[j])]]),
175 "*", table_info$column_width[paste0("column_", columns[j])])
176 for (i in seq(1:nrow(collapse_matrix))) {
georgeguieaeb0cd2018-03-30 17:39:46 -0500177 if(row_group_label_position == 'stack'){
Jakob Richteraebd8292018-10-31 16:27:29 +0100178 if(columns[j] < ncol(collapse_matrix) || collapse_matrix_rev[i, j] == 0){
179 new_kable_dt[i, columns[j]] <- ''
georgeguieaeb0cd2018-03-30 17:39:46 -0500180 }
181 } else {
Jakob Richteraebd8292018-10-31 16:27:29 +0100182 new_kable_dt[i, columns[j]] <- collapse_new_dt_item(
183 kable_dt[i, columns[j]], collapse_matrix[i, j], column_width,
Hao Zhu5dd3e282018-05-20 18:39:48 -0400184 align = column_align, valign = valign
georgeguieaeb0cd2018-03-30 17:39:46 -0500185 )
186 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000187 }
188 }
Hao Zhu654c91f2017-07-03 14:03:34 -0400189
190 midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol),
191 html = F)
192 midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1
193
194 ex_bottom <- length(contents) - 1
195 contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\")
196 if (!table_info$booktabs) {
197 contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline")
198 }
Hao Zhu01b15b82018-01-12 17:48:21 -0500199
200 new_contents <- c()
georgeguieaeb0cd2018-03-30 17:39:46 -0500201 if(row_group_label_position == 'stack'){
202 if(is.null(headers_to_remove)) headers_to_remove <- head(columns, -1)
203 table_info$colnames[headers_to_remove] <- ''
204 new_header <- paste(table_info$colnames, collapse = ' & ')
205 out <- sub(contents[1], new_header, out)
206 table_info$contents[1] <- new_header
207 }
208 if(latex_hline == 'custom' & is.null(custom_latex_hline)){
209 if(row_group_label_position == 'stack'){
210 custom_latex_hline = 1:2
211 } else {
212 custom_latex_hline = 1
213 }
214 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000215 for (i in seq(1:nrow(collapse_matrix))) {
216 new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ")
Hao Zhu12b0ade2018-01-13 16:19:58 -0500217 table_info$contents[i + 1] <- new_contents[i]
Hao Zhu654c91f2017-07-03 14:03:34 -0400218 if (i != nrow(collapse_matrix)) {
Hao Zhu12b0ade2018-01-13 16:19:58 -0500219 row_midrule <- switch(
220 latex_hline,
221 "none" = "",
222 "full" = midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
223 table_info$booktabs),
224 "major" = ifelse(
225 sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
226 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
227 table_info$booktabs),
228 ""
georgeguieaeb0cd2018-03-30 17:39:46 -0500229 ),
230 "custom" = ifelse(
231 sum(as.numeric(midrule_matrix[i, custom_latex_hline])) > 0,
232 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
233 table_info$booktabs),
234 ""
235 )
Hao Zhu12b0ade2018-01-13 16:19:58 -0500236 )
Hao Zhu654c91f2017-07-03 14:03:34 -0400237 new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule)
238 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000239 out <- sub(contents[i + 1], new_contents[i], out)
240 }
Hao Zhu8f202992017-07-15 02:20:18 -0400241 out <- gsub("\\\\addlinespace\n", "", out)
Hao Zhuf4b35292017-06-25 22:38:37 -1000242
243 out <- structure(out, format = "latex", class = "knitr_kable")
244 table_info$collapse_rows <- TRUE
245 attr(out, "kable_meta") <- table_info
georgeguieaeb0cd2018-03-30 17:39:46 -0500246 if(row_group_label_position == 'stack'){
247 group_row_index_list <- collapse_rows_index(kable_dt, head(columns, -1))
248 out <- collapse_rows_latex_stack(out, group_row_index_list, row_group_label_fonts)
249 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000250 return(out)
251}
252
253kable_dt_latex <- function(x) {
254 data.frame(do.call(rbind, str_split(x[-1], " & ")), stringsAsFactors = FALSE)
255}
256
Hao Zhu5dd3e282018-05-20 18:39:48 -0400257collapse_new_dt_item <- function(x, span, width = NULL, align, valign) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000258 if (span == 0) return("")
259 if (span == 1) return(x)
260 out <- paste0(
Hao Zhu5dd3e282018-05-20 18:39:48 -0400261 "\\\\multirow", valign, "\\{", -span, "\\}\\{",
Hao Zhuf4b35292017-06-25 22:38:37 -1000262 ifelse(is.null(width), "\\*", width),
263 "\\}\\{",
264 switch(align,
265 "l" = "\\\\raggedright\\\\arraybackslash ",
266 "c" = "\\\\centering\\\\arraybackslash ",
267 "r" = "\\\\raggedleft\\\\arraybackslash "),
268 x, "\\}"
269 )
270 return(out)
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400271}
Hao Zhu654c91f2017-07-03 14:03:34 -0400272
273midline_groups <- function(x, booktabs = T) {
274 diffs <- c(1, diff(x))
275 start_indexes <- c(1, which(diffs > 1))
Hao Zhu12b0ade2018-01-13 16:19:58 -0500276 end_indexes <- c(start_indexes - 1, length(x))
Hao Zhu654c91f2017-07-03 14:03:34 -0400277 ranges <- paste0(x[start_indexes], "-", x[end_indexes])
278 if (booktabs) {
279 out <- paste0("\\\\cmidrule{", ranges, "}")
280 } else {
281 out <- paste0("\\\\cline{", ranges, "}")
282 }
283 out <- paste0(out, collapse = "\n")
284 return(out)
285}
georgeguieaeb0cd2018-03-30 17:39:46 -0500286
287
288collapse_rows_index <- function(kable_dt, columns) {
289 format_to_row_index <- function(x){
290 x = rle(x)
291 out = x$lengths
292 names(out) = x$values
293 out
294 }
295 group_rows_index_list <- lapply(columns, function(x) {
296 format_to_row_index(kable_dt[, x])
297 })
298 return(group_rows_index_list)
299}
300
301
302collapse_rows_latex_stack <- function(kable_input, group_row_index_list,
303 row_group_label_fonts){
304 merge_lists <- function(default_list, updated_list){
305 for(x in names(updated_list)){
306 default_list[[x]] <- updated_list[[x]]
307 }
308 return(default_list)
309 }
310 default_font_list <- list(
311 list(bold = T, italic = F),
312 list(bold = F, italic = T),
313 list(bold = F, italic = F)
314 )
315 n_default_fonts = length(default_font_list)
316 n_supplied_fonts = length(row_group_label_fonts)
317 group_row_font_list <- list()
318 out <- kable_input
319 for(i in 1:length(group_row_index_list)){
320 if(i > n_default_fonts){
321 group_row_args <- default_font_list[[n_default_fonts]]
322 } else {
323 group_row_args <- default_font_list[[i]]
324 }
325 if(i <= n_supplied_fonts){
326 group_row_args <- merge_lists(group_row_args, row_group_label_fonts[[i]])
327 }
328 group_row_args <- merge_lists(
329 list(kable_input = out, index = group_row_index_list[[i]]),
330 group_row_args)
331 out <- do.call(group_rows, group_row_args)
332 }
333 return(out)
334}