Add R scripts to plot performance charts

Change-Id: I20d33a47722f515897c79e9e58c49f6391e48eac
diff --git a/charts/performance.tsv b/charts/performance.tsv
new file mode 100644
index 0000000..cebdfa8
--- /dev/null
+++ b/charts/performance.tsv
@@ -0,0 +1,28 @@
+Tool	Version	Sen	Tok	Model	Tokens/ms effi1x - 100 runs	Tokens/ms effi10x - 100 runs
+KorAP-Tokenizer					72.90	199.28
+Datok		x	x	datok	837.89	2478.71
+		x	x	matok	1371.19	2976.80
+BlingFire	0.1.8		x	wbd.bin	431.92	1697.73
+		x		sbd.bin	417.10	1908.87
+Cutter	2.5	x	x		0.38	
+JTok	2.1.19				31.19	117.22
+OpenNLP			x	Simple	290.71	1330.23
+			x	Tokenizer	74.65	145.08
+		x		SentenceD	247.84	853.01
+SoMaJo		x	x	P=1	8.15	8.41
+		x	x	P=8	27.32	39.91
+SpaCy			x	Tokenizer	19.73	44.40
+		x		Sentencizer	16.94	
+		x		Statistical	4.90	
+		x		Dependency	2.24	
+Stanford			x		75.47	156.24
+		x	x	T,S,M	46.95	91.56
+Syntok		x		segmenter	59.66	61.07
+			x	tokenizer	103.90	108.40
+Waste	2.0.20-1	x	x		141.07	144.95
+Elephant			x		8.57	8.68
+TreeTagger			x		69.92	72.98
+Deep-EOS		x		bi-lstm-de	0.25	
+		x		cnn-de	0.27	
+		x		lstm-de	0.29	
+NNsplit		x			0.90	
diff --git a/charts/performance_chart.R b/charts/performance_chart.R
new file mode 100755
index 0000000..10f2d36
--- /dev/null
+++ b/charts/performance_chart.R
@@ -0,0 +1,26 @@
+#!/bin/env Rscript
+library(tidyverse)
+library(idsThemeR) # install_git("https://korap.ids-mannheim.de/gerrit/IDS-Mannheim/idsThemeR")
+library(extrafont)
+
+df <- read_tsv("performance.tsv")
+df %>%
+  fill(Tool) %>%
+  mutate(order_by = pmax(.[[6]], .[[7]], na.rm = TRUE)) %>%
+  filter(Tool!="wc", !is.na(order_by)) %>%
+  pivot_longer(cols=c(7, 6)) %>%
+  mutate(name=str_replace_all(name, ".*[^0-9]([0-9]+)x.*", "\\1 × Effi")) %>%
+  mutate(tool = paste0(Tool, if_else(is.na(Model), "", paste0(" (", Model, ")")))) %>%
+  { df2 <<- . } %>%
+  mutate(Tool= factor(tool) %>% fct_reorder(order_by)) %>%
+  ggplot(aes(x=Tool, y=value, fill=name )) + # forcats::fct_rev(name) to reorder x1 and x10
+  geom_col(position="dodge") +
+  labs(x="Tokens / ms", y= NULL, fill = "test corpus") +
+  coord_flip() +
+  theme_ids(style="light") +
+  theme(legend.position = c(0.8, 0.2)) +
+  scale_fill_ids(palette = "ids")
+
+ggsave("tok_perf.png", width = 70 * .pt, height = 50 *.pt, units = "mm", dpi = 600)
+ggsave("tok_perf.pdf", device = cairo_pdf, width = 70 * .pt, height = 50 *.pt, units = "mm", dpi = 600)
+ggsave("tok_perf.svg", width = 70 * .pt, height = 50 *.pt, units = "mm", dpi = 600)
diff --git a/charts/single_bar_perf_chart.R b/charts/single_bar_perf_chart.R
new file mode 100644
index 0000000..c1e9757
--- /dev/null
+++ b/charts/single_bar_perf_chart.R
@@ -0,0 +1,16 @@
+df <- read_tsv("performance.tsv")
+ylabel <- colnames(df)[7]
+colnames(df)[7] <- "perf"
+df %>% 
+  fill(Tool) %>%
+  filter(Tool!="wc", !is.na(perf)) %>%
+  arrange(desc(perf)) %>%
+  mutate(tool = paste0(Tool, if_else(is.na(Model), "", paste0(" (", Model, ")")))) %>%
+  mutate(Tool= factor(tool) %>% fct_reorder(perf)) %>%
+  ggplot(aes(x=Tool, y=perf)) + 
+  geom_col() + 
+  ylab("Tokens/ms") +
+  xlab(NULL) +
+  coord_flip()# +
+#  geom_text(aes(label=perf), position=position_stack(vjus=0.5), hjust=0.25)
+#ggsave("/tmp/tok_perf.png", width = 70 * .pt, height = 40 *.pt, units = "mm", dpi = 600)
diff --git a/charts/tok_performance_charts.Rproj b/charts/tok_performance_charts.Rproj
new file mode 100644
index 0000000..8e3c2eb
--- /dev/null
+++ b/charts/tok_performance_charts.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX