blob: 8d42b71fa02ac6293eaefc62807b77472b4343d3 [file] [log] [blame]
Hao Zhu3166f062017-06-26 07:51:46 -10001#' Collapse repeated rows to multirow cell
Hao Zhu2a87e8e2017-06-14 15:49:33 -04002#'
Hao Zhu8a160b12017-06-26 13:41:35 -10003#' @description Collapse same values in columns into multirow cells. This
4#' feature does similar things with `group_rows`. However, unlike `group_rows`,
5#' it analyzes existing columns, finds out rows that can be grouped together,
6#' and make them multirow cells. Note that if you want to use `column_spec` to
7#' specify column styles, you should use `column_spec` before `collapse_rows`.
8#'
9#' @param kable_input Output of `knitr::kable()` with `format` specified
10#' @param columns Numeric column positions where rows need to be collapsed.
Hao Zhu12b0ade2018-01-13 16:19:58 -050011#' @param latex_hline Option controlling the behavior of adding hlines to table.
georgeguieaeb0cd2018-03-30 17:39:46 -050012#' Choose from `full`, `major`, `none`, `custom`.
13#' @param custom_latex_hline Numeric column positions whose collapsed rows will
14#' be separated by hlines.
15#' @param row_group_label_position Option controlling positions of row group
16#' labels. Choose from `identity`, `stack`.
17#' @param row_group_label_fonts A list of arguments that can be supplied to
18#' group_rows function to format the row group label when
19#' `row_group_label_position` is `stack`
20#' @param headers_to_remove Numeric column positions where headers should be
21#' removed when they are stacked.
Hao Zhu8a160b12017-06-26 13:41:35 -100022#'
Hao Zhu5a7689e2017-06-26 15:37:24 -100023#' @examples dt <- data.frame(a = c(1, 1, 2, 2), b = c("a", "a", "a", "b"))
24#' x <- knitr::kable(dt, "html")
25#' collapse_rows(x)
26#'
Hao Zhuf4b35292017-06-25 22:38:37 -100027#' @export
Hao Zhu12b0ade2018-01-13 16:19:58 -050028collapse_rows <- function(kable_input, columns = NULL,
georgeguieaeb0cd2018-03-30 17:39:46 -050029 latex_hline = c("full", "major", "none", "custom"),
30 row_group_label_position = c('identity', 'stack'),
31 custom_latex_hline = NULL,
32 row_group_label_fonts = NULL,
33 headers_to_remove = NULL) {
Hao Zhuf4b35292017-06-25 22:38:37 -100034 # if (is.null(columns)) {
35 # stop("Please specify numeric positions of columns you want to collapse.")
36 # }
Hao Zhu2a87e8e2017-06-14 15:49:33 -040037 kable_format <- attr(kable_input, "format")
38 if (!kable_format %in% c("html", "latex")) {
Hao Zhu401ebd82018-01-14 17:10:20 -050039 warning("Please specify format in kable. kableExtra can customize either ",
40 "HTML or LaTeX outputs. See https://haozhu233.github.io/kableExtra/ ",
41 "for details.")
Hao Zhu2a87e8e2017-06-14 15:49:33 -040042 return(kable_input)
43 }
44 if (kable_format == "html") {
45 return(collapse_rows_html(kable_input, columns))
46 }
47 if (kable_format == "latex") {
georgeguieaeb0cd2018-03-30 17:39:46 -050048 latex_hline <- match.arg(latex_hline, c("full", "major", "none", "custom"))
49 row_group_label_position <- match.arg(row_group_label_position,
50 c('identity', 'stack'))
51 return(collapse_rows_latex(kable_input, columns, latex_hline,
52 row_group_label_position, row_group_label_fonts, custom_latex_hline,
53 headers_to_remove))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040054 }
55}
56
57collapse_rows_html <- function(kable_input, columns) {
58 kable_attrs <- attributes(kable_input)
Hao Zhu558c72f2017-07-24 15:12:00 -040059 kable_xml <- read_kable_as_xml(kable_input)
Hao Zhu2a87e8e2017-06-14 15:49:33 -040060 kable_tbody <- xml_tpart(kable_xml, "tbody")
61
62 kable_dt <- rvest::html_table(xml2::read_html(as.character(kable_input)))[[1]]
Hao Zhuf4b35292017-06-25 22:38:37 -100063 if (is.null(columns)) {
64 columns <- seq(1, ncol(kable_dt))
65 }
Hao Zhu23456762018-03-26 12:30:10 -040066 if (!is.null(kable_attrs$header_above)) {
67 kable_dt_col_names <- unlist(kable_dt[kable_attrs$header_above, ])
68 kable_dt <- kable_dt[-(1:kable_attrs$header_above),]
69 names(kable_dt) <- kable_dt_col_names
70 }
71 kable_dt$row_id <- seq(nrow(kable_dt))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040072 collapse_matrix <- collapse_row_matrix(kable_dt, columns)
73
74 for (i in 1:nrow(collapse_matrix)) {
75 matrix_row <- collapse_matrix[i, ]
Hao Zhu38cdcdb2017-06-27 09:08:30 -100076 names(matrix_row) <- names(collapse_matrix)
Hao Zhu3166f062017-06-26 07:51:46 -100077 target_row <- xml_child(kable_tbody, i)
78 row_node_rm_count <- 0
79 for (j in 1:length(matrix_row)) {
80 collapsing_col <- as.numeric(sub("x", "", names(matrix_row)[j])) -
81 row_node_rm_count
82 target_cell <- xml_child(target_row, collapsing_col)
83 if (matrix_row[j] == 0) {
84 xml_remove(target_cell)
85 row_node_rm_count <- row_node_rm_count + 1
86 } else if (matrix_row[j] != 1) {
87 xml_attr(target_cell, "rowspan") <- matrix_row[j]
88 xml_attr(target_cell, "style") <- paste0(
89 xml_attr(target_cell, "style"),
90 "vertical-align: middle !important;")
Hao Zhu2a87e8e2017-06-14 15:49:33 -040091 }
92 }
93 }
94
Hao Zhuf2dfd142017-07-24 14:43:28 -040095 out <- as_kable_xml(kable_xml)
Hao Zhu2a87e8e2017-06-14 15:49:33 -040096 attributes(out) <- kable_attrs
Hao Zhuf2100832018-01-11 16:20:29 -050097 if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out))
Hao Zhu2a87e8e2017-06-14 15:49:33 -040098 return(out)
99}
100
Hao Zhuf4b35292017-06-25 22:38:37 -1000101collapse_row_matrix <- function(kable_dt, columns, html = T) {
102 if (html) {
103 column_block <- function(x) c(x, rep(0, x - 1))
104 } else {
105 column_block <- function(x) c(rep(0, x - 1), x)
106 }
107 mapping_matrix <- list()
108 for (i in columns) {
109 mapping_matrix[[paste0("x", i)]] <- unlist(lapply(
110 rle(kable_dt[, i])$length, column_block))
111 }
112 mapping_matrix <- data.frame(mapping_matrix)
113 return(mapping_matrix)
114}
115
georgeguieaeb0cd2018-03-30 17:39:46 -0500116collapse_rows_latex <- function(kable_input, columns, latex_hline,
117 row_group_label_position, row_group_label_fonts,
118 custom_latex_hline, headers_to_remove) {
Hao Zhuf4b35292017-06-25 22:38:37 -1000119 table_info <- magic_mirror(kable_input)
Hao Zhu3fc0e882018-04-03 16:06:41 -0400120 out <- solve_enc(kable_input)
Hao Zhu064990d2017-10-17 18:08:42 -0400121
Hao Zhuf4b35292017-06-25 22:38:37 -1000122 if (is.null(columns)) {
123 columns <- seq(1, table_info$ncol)
124 }
Hao Zhu064990d2017-10-17 18:08:42 -0400125
Hao Zhuf4b35292017-06-25 22:38:37 -1000126 contents <- table_info$contents
127 kable_dt <- kable_dt_latex(contents)
georgeguieaeb0cd2018-03-30 17:39:46 -0500128
129 collapse_matrix_rev <- collapse_row_matrix(kable_dt, columns, html = TRUE)
Hao Zhu01b15b82018-01-12 17:48:21 -0500130 collapse_matrix <- collapse_row_matrix(kable_dt, columns, html = FALSE)
Hao Zhuf4b35292017-06-25 22:38:37 -1000131
132 new_kable_dt <- kable_dt
Hao Zhuf4b35292017-06-25 22:38:37 -1000133 for (j in seq(1:ncol(collapse_matrix))) {
134 column_align <- table_info$align_vector_origin[columns[j]]
135 column_width <- ifelse(
136 is.null(table_info$column_width[[paste0("column_", columns[j])]]),
137 "*", table_info$column_width[paste0("column_", columns[j])])
138 for (i in seq(1:nrow(collapse_matrix))) {
georgeguieaeb0cd2018-03-30 17:39:46 -0500139 if(row_group_label_position == 'stack'){
140 if(j < ncol(collapse_matrix)|(collapse_matrix_rev[i, j] == 0)){
141 new_kable_dt[i, j] <- ''
142 }
143 } else {
144 new_kable_dt[i, j] <- collapse_new_dt_item(
145 kable_dt[i, j], collapse_matrix[i, j], column_width, align = column_align
146 )
147 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000148 }
149 }
Hao Zhu654c91f2017-07-03 14:03:34 -0400150
151 midrule_matrix <- collapse_row_matrix(kable_dt, seq(1, table_info$ncol),
152 html = F)
153 midrule_matrix[setdiff(seq(1, table_info$ncol), columns)] <- 1
154
155 ex_bottom <- length(contents) - 1
156 contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\\\\\\\\")
157 if (!table_info$booktabs) {
158 contents[2:ex_bottom] <- paste0(contents[2:ex_bottom], "\n\\\\hline")
159 }
Hao Zhu01b15b82018-01-12 17:48:21 -0500160
161 new_contents <- c()
georgeguieaeb0cd2018-03-30 17:39:46 -0500162 if(row_group_label_position == 'stack'){
163 if(is.null(headers_to_remove)) headers_to_remove <- head(columns, -1)
164 table_info$colnames[headers_to_remove] <- ''
165 new_header <- paste(table_info$colnames, collapse = ' & ')
166 out <- sub(contents[1], new_header, out)
167 table_info$contents[1] <- new_header
168 }
169 if(latex_hline == 'custom' & is.null(custom_latex_hline)){
170 if(row_group_label_position == 'stack'){
171 custom_latex_hline = 1:2
172 } else {
173 custom_latex_hline = 1
174 }
175 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000176 for (i in seq(1:nrow(collapse_matrix))) {
177 new_contents[i] <- paste0(new_kable_dt[i, ], collapse = " & ")
Hao Zhu12b0ade2018-01-13 16:19:58 -0500178 table_info$contents[i + 1] <- new_contents[i]
Hao Zhu654c91f2017-07-03 14:03:34 -0400179 if (i != nrow(collapse_matrix)) {
Hao Zhu12b0ade2018-01-13 16:19:58 -0500180 row_midrule <- switch(
181 latex_hline,
182 "none" = "",
183 "full" = midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
184 table_info$booktabs),
185 "major" = ifelse(
186 sum(as.numeric(midrule_matrix[i, ]) > 0) == ncol(midrule_matrix),
187 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
188 table_info$booktabs),
189 ""
georgeguieaeb0cd2018-03-30 17:39:46 -0500190 ),
191 "custom" = ifelse(
192 sum(as.numeric(midrule_matrix[i, custom_latex_hline])) > 0,
193 midline_groups(which(as.numeric(midrule_matrix[i, ]) > 0),
194 table_info$booktabs),
195 ""
196 )
Hao Zhu12b0ade2018-01-13 16:19:58 -0500197 )
Hao Zhu654c91f2017-07-03 14:03:34 -0400198 new_contents[i] <- paste0(new_contents[i], "\\\\\\\\\n", row_midrule)
199 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000200 out <- sub(contents[i + 1], new_contents[i], out)
201 }
Hao Zhu8f202992017-07-15 02:20:18 -0400202 out <- gsub("\\\\addlinespace\n", "", out)
Hao Zhuf4b35292017-06-25 22:38:37 -1000203
204 out <- structure(out, format = "latex", class = "knitr_kable")
205 table_info$collapse_rows <- TRUE
206 attr(out, "kable_meta") <- table_info
georgeguieaeb0cd2018-03-30 17:39:46 -0500207 if(row_group_label_position == 'stack'){
208 group_row_index_list <- collapse_rows_index(kable_dt, head(columns, -1))
209 out <- collapse_rows_latex_stack(out, group_row_index_list, row_group_label_fonts)
210 }
Hao Zhuf4b35292017-06-25 22:38:37 -1000211 return(out)
212}
213
214kable_dt_latex <- function(x) {
215 data.frame(do.call(rbind, str_split(x[-1], " & ")), stringsAsFactors = FALSE)
216}
217
218collapse_new_dt_item <- function(x, span, width = NULL, align) {
219 if (span == 0) return("")
220 if (span == 1) return(x)
221 out <- paste0(
222 "\\\\multirow\\{", -span, "\\}\\{",
223 ifelse(is.null(width), "\\*", width),
224 "\\}\\{",
225 switch(align,
226 "l" = "\\\\raggedright\\\\arraybackslash ",
227 "c" = "\\\\centering\\\\arraybackslash ",
228 "r" = "\\\\raggedleft\\\\arraybackslash "),
229 x, "\\}"
230 )
231 return(out)
Hao Zhu2a87e8e2017-06-14 15:49:33 -0400232}
Hao Zhu654c91f2017-07-03 14:03:34 -0400233
234midline_groups <- function(x, booktabs = T) {
235 diffs <- c(1, diff(x))
236 start_indexes <- c(1, which(diffs > 1))
Hao Zhu12b0ade2018-01-13 16:19:58 -0500237 end_indexes <- c(start_indexes - 1, length(x))
Hao Zhu654c91f2017-07-03 14:03:34 -0400238 ranges <- paste0(x[start_indexes], "-", x[end_indexes])
239 if (booktabs) {
240 out <- paste0("\\\\cmidrule{", ranges, "}")
241 } else {
242 out <- paste0("\\\\cline{", ranges, "}")
243 }
244 out <- paste0(out, collapse = "\n")
245 return(out)
246}
georgeguieaeb0cd2018-03-30 17:39:46 -0500247
248
249collapse_rows_index <- function(kable_dt, columns) {
250 format_to_row_index <- function(x){
251 x = rle(x)
252 out = x$lengths
253 names(out) = x$values
254 out
255 }
256 group_rows_index_list <- lapply(columns, function(x) {
257 format_to_row_index(kable_dt[, x])
258 })
259 return(group_rows_index_list)
260}
261
262
263collapse_rows_latex_stack <- function(kable_input, group_row_index_list,
264 row_group_label_fonts){
265 merge_lists <- function(default_list, updated_list){
266 for(x in names(updated_list)){
267 default_list[[x]] <- updated_list[[x]]
268 }
269 return(default_list)
270 }
271 default_font_list <- list(
272 list(bold = T, italic = F),
273 list(bold = F, italic = T),
274 list(bold = F, italic = F)
275 )
276 n_default_fonts = length(default_font_list)
277 n_supplied_fonts = length(row_group_label_fonts)
278 group_row_font_list <- list()
279 out <- kable_input
280 for(i in 1:length(group_row_index_list)){
281 if(i > n_default_fonts){
282 group_row_args <- default_font_list[[n_default_fonts]]
283 } else {
284 group_row_args <- default_font_list[[i]]
285 }
286 if(i <= n_supplied_fonts){
287 group_row_args <- merge_lists(group_row_args, row_group_label_fonts[[i]])
288 }
289 group_row_args <- merge_lists(
290 list(kable_input = out, index = group_row_index_list[[i]]),
291 group_row_args)
292 out <- do.call(group_rows, group_row_args)
293 }
294 return(out)
295}