Merge pull request #318 from dmurdoch/protect_latex

Protect LaTeX math in the HTML format
diff --git a/DESCRIPTION b/DESCRIPTION
index 201ba3d..478b688 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -45,7 +45,8 @@
     rstudioapi,
     glue,
     tools,
-    webshot
+    webshot,
+    digest
 Suggests:
     testthat,
     magick,
@@ -53,5 +54,5 @@
     dplyr
 VignetteBuilder: knitr
 Encoding: UTF-8
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
 Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
index ab0fb47..b247396 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -37,6 +37,7 @@
 export(usepackage_latex)
 export(xtable2kable)
 import(htmltools)
+importFrom(digest,digest)
 importFrom(glue,glue)
 importFrom(grDevices,col2rgb)
 importFrom(knitr,asis_output)
@@ -57,6 +58,7 @@
 importFrom(rvest,html_table)
 importFrom(scales,rescale)
 importFrom(stats,ave)
+importFrom(stringr,fixed)
 importFrom(stringr,str_count)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
diff --git a/R/kableExtra-package.R b/R/kableExtra-package.R
index a06fbcf..ea2a1da 100644
--- a/R/kableExtra-package.R
+++ b/R/kableExtra-package.R
@@ -55,7 +55,7 @@
 #' vanilla rmarkdown. For customized rmarkdown templates, it is recommended to
 #' load related LaTeX packages manually.
 #'
-#' @importFrom stringr str_count str_split str_match str_detect str_match_all
+#' @importFrom stringr fixed str_count str_split str_match str_detect str_match_all
 #' str_extract str_replace_all str_trim str_extract_all str_sub str_replace
 #' @importFrom xml2 read_xml xml_attr xml_has_attr xml_attr<- read_html
 #' xml_child xml_children xml_name xml_add_sibling xml_add_child xml_text
@@ -75,6 +75,7 @@
 #' @importFrom glue glue
 #' @importFrom tools file_ext file_path_sans_ext
 #' @importFrom webshot webshot
+#' @importFrom digest digest
 #' @import htmltools
 #' @name kableExtra-package
 #' @aliases kableExtra
diff --git a/R/kable_styling.R b/R/kable_styling.R
index 0a43f74..bf33f82 100644
--- a/R/kable_styling.R
+++ b/R/kable_styling.R
@@ -47,11 +47,19 @@
 #' @param latex_table_env LaTeX option. A character string to define customized
 #' table environment such as tabu or tabularx.You shouldn't expect all features
 #' could be supported in self-defined environments.
+#' @param protect_latex If `TRUE`, LaTeX code embedded between dollar signs
+#' will be protected from HTML escaping.
 #'
 #' @details  For LaTeX, if you use other than English environment
 #' - all tables are converted to 'UTF-8'. If you use, for example, Hungarian
 #' characters on a Windows machine, make sure to use
 #' Sys.setlocale("LC_ALL","Hungarian") to avoid unexpected conversions.
+#' - `protect_latex = TRUE` has no effect.
+#'
+#' For HTML,
+#' - `protect_latex = TRUE` is for including complicated math in HTML output.
+#' The LaTeX may not include dollar signs even if they are escaped.
+#' Pandoc's rules for recognizing embedded LaTeX are used.
 #'
 #' @examples x_html <- knitr::kable(head(mtcars), "html")
 #' kable_styling(x_html, "striped", position = "left", font_size = 7)
@@ -71,7 +79,8 @@
                           repeat_header_method = c("append", "replace"),
                           repeat_header_continued = FALSE,
                           stripe_color = "gray!6",
-                          latex_table_env = NULL) {
+                          latex_table_env = NULL,
+                          protect_latex = TRUE) {
 
   if (length(bootstrap_options) == 1 && bootstrap_options == "basic") {
     bootstrap_options <- getOption("kable_styling_bootstrap_options", "basic")
@@ -104,7 +113,8 @@
                              bootstrap_options = bootstrap_options,
                              full_width = full_width,
                              position = position,
-                             font_size = font_size))
+                             font_size = font_size,
+                             protect_latex = protect_latex))
   }
   if (kable_format == "latex") {
     if (is.null(full_width)) {
@@ -125,13 +135,44 @@
   }
 }
 
+extract_latex_from_kable <- function(kable_input) {
+  kable_attrs <- attributes(kable_input)
+  regexp <- paste0("(?<!\\e)",   # Not escaped
+                   "([$]{1}(?![ ])[^$]+(?<![$\\\\ ])[$]{1}", # $...$
+                   "|[$]{2}(?![ ])[^$]+(?<![$\\\\ ])[$]{2})", # $$...$$
+                   "(?!\\d)")        # Not followed by digit
+  latex <- character()
+  while (str_detect(kable_input, regexp)) {
+    block <- str_extract(kable_input, regexp)
+    name <- paste0("latex", digest(block))
+    latex[name] <- block
+    kable_input <- str_replace(kable_input, regexp, name)
+  }
+  kable_attrs$extracted_latex <- latex
+  attributes(kable_input) <- kable_attrs
+  kable_input
+}
+
+replace_latex_in_kable <- function(kable_input, latex) {
+  kable_attrs <- attributes(kable_input)
+  for (n in names(latex)) {
+    kable_input <- str_replace_all(kable_input, fixed(n), latex[n])
+  }
+  attributes(kable_input) <- kable_attrs
+  kable_input
+}
+
 # htmlTable Styling ------------
 htmlTable_styling <- function(kable_input,
                               bootstrap_options = "basic",
                               full_width = T,
                               position = c("center", "left", "right",
                                            "float_left", "float_right"),
-                              font_size = NULL) {
+                              font_size = NULL,
+                              protect_latex = TRUE) {
+  if (protect_latex) {
+    kable_input <- extract_latex_from_kable(kable_input)
+  }
   kable_attrs <- attributes(kable_input)
   kable_xml <- read_kable_as_xml(kable_input)
 
@@ -189,6 +230,10 @@
   }
 
   out <- as_kable_xml(kable_xml)
+  if (protect_latex) {
+    out <- replace_latex_in_kable(out, kable_attrs$extracted_latex)
+    kable_attrs$extracted_latex <- NULL
+  }
   attributes(out) <- kable_attrs
   if (!"kableExtra" %in% class(out)) class(out) <- c("kableExtra", class(out))
   return(out)
diff --git a/man/kable_styling.Rd b/man/kable_styling.Rd
index 8db8475..a3932fc 100644
--- a/man/kable_styling.Rd
+++ b/man/kable_styling.Rd
@@ -6,7 +6,11 @@
 \usage{
 kable_styling(kable_input, bootstrap_options = "basic",
   latex_options = "basic", full_width = NULL, position = "center",
-  font_size = NULL, row_label_position = "l", ...)
+  font_size = NULL, row_label_position = "l",
+  repeat_header_text = "\\\\textit{(continued)}",
+  repeat_header_method = c("append", "replace"),
+  repeat_header_continued = FALSE, stripe_color = "gray!6",
+  latex_table_env = NULL, protect_latex = TRUE)
 }
 \arguments{
 \item{kable_input}{Output of \code{knitr::kable()} with \code{format} specified}
@@ -45,11 +49,25 @@
 
 \item{font_size}{A numeric input for table font size}
 
-\item{row_label_position}{A character string determining the justification of the row
-labels in a table.  Possible values inclued \code{l} for left, \code{c} for center, and \code{r} for
-right.  The default value is \code{l} for left justifcation.}
+\item{row_label_position}{A character string determining the justification
+of the row labels in a table.  Possible values inclued \code{l} for left, \code{c} for
+center, and \code{r} for right.  The default value is \code{l} for left justifcation.}
 
-\item{...}{extra options for HTML or LaTeX. See \code{details}.}
+\item{repeat_header_text}{LaTeX option. A text string you want to append on
+or replace the caption.}
+
+\item{repeat_header_method}{LaTeX option, can either be \code{append}(default) or
+\code{replace}}
+
+\item{stripe_color}{LaTeX option allowing users to pick a different color
+for their strip lines. This option is not available in HTML}
+
+\item{latex_table_env}{LaTeX option. A character string to define customized
+table environment such as tabu or tabularx.You shouldn't expect all features
+could be supported in self-defined environments.}
+
+\item{protect_latex}{If \code{TRUE}, LaTeX code embedded between dollar signs
+will be protected from HTML escaping.}
 }
 \description{
 This function provides a cleaner approach to modify the style
@@ -58,22 +76,19 @@
 in some customized template being loaded.
 }
 \details{
-For LaTeX, extra options includes:
-\itemize{
-\item \code{repeat_header_method} can either be \code{append}(default) or \code{replace}
-\item \code{repeat_header_text} is just a text string you want to append on or
-replace the caption.
-\item \code{stripe_color} allows users to pick a different color for their strip lines.
-\item \code{latex_table_env} character string to define customized table environment
-such as tabu or tabularx.You shouldn't expect all features could be
-supported in self-defined environments.
-}
-
 For LaTeX, if you use other than English environment
 \itemize{
 \item all tables are converted to 'UTF-8'. If you use, for example, Hungarian
 characters on a Windows machine, make sure to use
 Sys.setlocale("LC_ALL","Hungarian") to avoid unexpected conversions.
+\item \code{protect_latex = TRUE} has no effect.
+}
+
+For HTML,
+\itemize{
+\item \code{protect_latex = TRUE} is for including complicated math in HTML output.
+The LaTeX may not include dollar signs even if they are escaped.
+Pandoc's rules for recognizing embedded LaTeX are used.
 }
 }
 \examples{