Merge pull request #318 from dmurdoch/protect_latex
Protect LaTeX math in the HTML format
diff --git a/DESCRIPTION b/DESCRIPTION
index 201ba3d..478b688 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -45,7 +45,8 @@
rstudioapi,
glue,
tools,
- webshot
+ webshot,
+ digest
Suggests:
testthat,
magick,
@@ -53,5 +54,5 @@
dplyr
VignetteBuilder: knitr
Encoding: UTF-8
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
index ab0fb47..b247396 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -37,6 +37,7 @@
export(usepackage_latex)
export(xtable2kable)
import(htmltools)
+importFrom(digest,digest)
importFrom(glue,glue)
importFrom(grDevices,col2rgb)
importFrom(knitr,asis_output)
@@ -57,6 +58,7 @@
importFrom(rvest,html_table)
importFrom(scales,rescale)
importFrom(stats,ave)
+importFrom(stringr,fixed)
importFrom(stringr,str_count)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
diff --git a/R/kableExtra-package.R b/R/kableExtra-package.R
index a06fbcf..ea2a1da 100644
--- a/R/kableExtra-package.R
+++ b/R/kableExtra-package.R
@@ -55,7 +55,7 @@
#' vanilla rmarkdown. For customized rmarkdown templates, it is recommended to
#' load related LaTeX packages manually.
#'
-#' @importFrom stringr str_count str_split str_match str_detect str_match_all
+#' @importFrom stringr fixed str_count str_split str_match str_detect str_match_all
#' str_extract str_replace_all str_trim str_extract_all str_sub str_replace
#' @importFrom xml2 read_xml xml_attr xml_has_attr xml_attr<- read_html
#' xml_child xml_children xml_name xml_add_sibling xml_add_child xml_text
@@ -75,6 +75,7 @@
#' @importFrom glue glue
#' @importFrom tools file_ext file_path_sans_ext
#' @importFrom webshot webshot
+#' @importFrom digest digest
#' @import htmltools
#' @name kableExtra-package
#' @aliases kableExtra
diff --git a/R/kable_styling.R b/R/kable_styling.R
index 0a43f74..bf33f82 100644
--- a/R/kable_styling.R
+++ b/R/kable_styling.R
@@ -47,11 +47,19 @@
#' @param latex_table_env LaTeX option. A character string to define customized
#' table environment such as tabu or tabularx.You shouldn't expect all features
#' could be supported in self-defined environments.
+#' @param protect_latex If `TRUE`, LaTeX code embedded between dollar signs
+#' will be protected from HTML escaping.
#'
#' @details For LaTeX, if you use other than English environment
#' - all tables are converted to 'UTF-8'. If you use, for example, Hungarian
#' characters on a Windows machine, make sure to use
#' Sys.setlocale("LC_ALL","Hungarian") to avoid unexpected conversions.
+#' - `protect_latex = TRUE` has no effect.
+#'
+#' For HTML,
+#' - `protect_latex = TRUE` is for including complicated math in HTML output.
+#' The LaTeX may not include dollar signs even if they are escaped.
+#' Pandoc's rules for recognizing embedded LaTeX are used.
#'
#' @examples x_html <- knitr::kable(head(mtcars), "html")
#' kable_styling(x_html, "striped", position = "left", font_size = 7)
@@ -71,7 +79,8 @@
repeat_header_method = c("append", "replace"),
repeat_header_continued = FALSE,
stripe_color = "gray!6",
- latex_table_env = NULL) {
+ latex_table_env = NULL,
+ protect_latex = TRUE) {
if (length(bootstrap_options) == 1 && bootstrap_options == "basic") {
bootstrap_options <- getOption("kable_styling_bootstrap_options", "basic")
@@ -104,7 +113,8 @@
bootstrap_options = bootstrap_options,
full_width = full_width,
position = position,
- font_size = font_size))
+ font_size = font_size,
+ protect_latex = protect_latex))
}
if (kable_format == "latex") {
if (is.null(full_width)) {
@@ -125,13 +135,44 @@
}
}
+extract_latex_from_kable <- function(kable_input) {
+ kable_attrs <- attributes(kable_input)
+ regexp <- paste0("(?<!\\e)", # Not escaped
+ "([$]{1}(?![ ])[^$]+(?<![$\\\\ ])[$]{1}", # $...$
+ "|[$]{2}(?![ ])[^$]+(?<![$\\\\ ])[$]{2})", # $$...$$
+ "(?!\\d)") # Not followed by digit
+ latex <- character()
+ while (str_detect(kable_input, regexp)) {
+ block <- str_extract(kable_input, regexp)
+ name <- paste0("latex", digest(block))
+ latex[name] <- block
+ kable_input <- str_replace(kable_input, regexp, name)
+ }
+ kable_attrs$extracted_latex <- latex
+ attributes(kable_input) <- kable_attrs
+ kable_input
+}
+
+replace_latex_in_kable <- function(kable_input, latex) {
+ kable_attrs <- attributes(kable_input)
+ for (n in names(latex)) {
+ kable_input <- str_replace_all(kable_input, fixed(n), latex[n])
+ }
+ attributes(kable_input) <- kable_attrs
+ kable_input
+}
+
# htmlTable Styling ------------
htmlTable_styling <- function(kable_input,
bootstrap_options = "basic",
full_width = T,
position = c("center", "left", "right",
"float_left", "float_right"),
- font_size = NULL) {
+ font_size = NULL,
+ protect_latex = TRUE) {
+ if (protect_latex) {
+ kable_input <- extract_latex_from_kable(kable_input)
+ }
kable_attrs <- attributes(kable_input)
kable_xml <- read_kable_as_xml(kable_input)
@@ -189,6 +230,10 @@
}
out <- as_kable_xml(kable_xml)
+ if (protect_latex) {
+ out <- replace_latex_in_kable(out, kable_attrs$extracted_latex)
+ kable_attrs$extracted_latex <- NULL
+ }
attributes(out) <- kable_attrs
if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out))
return(out)
diff --git a/man/kable_styling.Rd b/man/kable_styling.Rd
index 8db8475..a3932fc 100644
--- a/man/kable_styling.Rd
+++ b/man/kable_styling.Rd
@@ -6,7 +6,11 @@
\usage{
kable_styling(kable_input, bootstrap_options = "basic",
latex_options = "basic", full_width = NULL, position = "center",
- font_size = NULL, row_label_position = "l", ...)
+ font_size = NULL, row_label_position = "l",
+ repeat_header_text = "\\\\textit{(continued)}",
+ repeat_header_method = c("append", "replace"),
+ repeat_header_continued = FALSE, stripe_color = "gray!6",
+ latex_table_env = NULL, protect_latex = TRUE)
}
\arguments{
\item{kable_input}{Output of \code{knitr::kable()} with \code{format} specified}
@@ -45,11 +49,25 @@
\item{font_size}{A numeric input for table font size}
-\item{row_label_position}{A character string determining the justification of the row
-labels in a table. Possible values inclued \code{l} for left, \code{c} for center, and \code{r} for
-right. The default value is \code{l} for left justifcation.}
+\item{row_label_position}{A character string determining the justification
+of the row labels in a table. Possible values inclued \code{l} for left, \code{c} for
+center, and \code{r} for right. The default value is \code{l} for left justifcation.}
-\item{...}{extra options for HTML or LaTeX. See \code{details}.}
+\item{repeat_header_text}{LaTeX option. A text string you want to append on
+or replace the caption.}
+
+\item{repeat_header_method}{LaTeX option, can either be \code{append}(default) or
+\code{replace}}
+
+\item{stripe_color}{LaTeX option allowing users to pick a different color
+for their strip lines. This option is not available in HTML}
+
+\item{latex_table_env}{LaTeX option. A character string to define customized
+table environment such as tabu or tabularx.You shouldn't expect all features
+could be supported in self-defined environments.}
+
+\item{protect_latex}{If \code{TRUE}, LaTeX code embedded between dollar signs
+will be protected from HTML escaping.}
}
\description{
This function provides a cleaner approach to modify the style
@@ -58,22 +76,19 @@
in some customized template being loaded.
}
\details{
-For LaTeX, extra options includes:
-\itemize{
-\item \code{repeat_header_method} can either be \code{append}(default) or \code{replace}
-\item \code{repeat_header_text} is just a text string you want to append on or
-replace the caption.
-\item \code{stripe_color} allows users to pick a different color for their strip lines.
-\item \code{latex_table_env} character string to define customized table environment
-such as tabu or tabularx.You shouldn't expect all features could be
-supported in self-defined environments.
-}
-
For LaTeX, if you use other than English environment
\itemize{
\item all tables are converted to 'UTF-8'. If you use, for example, Hungarian
characters on a Windows machine, make sure to use
Sys.setlocale("LC_ALL","Hungarian") to avoid unexpected conversions.
+\item \code{protect_latex = TRUE} has no effect.
+}
+
+For HTML,
+\itemize{
+\item \code{protect_latex = TRUE} is for including complicated math in HTML output.
+The LaTeX may not include dollar signs even if they are escaped.
+Pandoc's rules for recognizing embedded LaTeX are used.
}
}
\examples{