Add English CA based on Wikipedia for comparison
Change-Id: Ic164e21dc25640a4de1d53561c5fc2e64d4d503c
diff --git a/R/report.Rmd b/R/report.Rmd
index 6320eda..363916c 100644
--- a/R/report.Rmd
+++ b/R/report.Rmd
@@ -107,6 +107,27 @@
take_ca_icc %>% show_table()
```
+### For comparison based on English Wikipedia
+#### (Snapshot from 2015 with 2.4 billion words, see [here](https://www.ids-mannheim.de/digspra/kl/projekte/korpora/verfuegbarkeit/))
+
+```{r take_wpe, echo=TRUE}
+if (file.exists("../data/take_ca_wpe")) {
+ take_ca_wpe <- readRDS("../data/take_ca_wpe")
+} else {
+wpe <- new("KorAPConnection", "https://korap.ids-mannheim.de/instance/english", verbose=T)
+take_ca_wpe <-
+ collocationAnalysis(
+ wpe,
+ "focus({[tt/l=take]} [tt/p=NN])",
+ leftContextSize = 0,
+ rightContextSize = 1,
+ minOccur = 5,
+ addExamples = T
+ )
+}
+take_ca_wpe %>% show_table()
+```
+
## German: *nehmen*
```{r nehmen_icc, echo=TRUE}
diff --git a/data/take_ca_wpe b/data/take_ca_wpe
new file mode 100644
index 0000000..11e97b6
--- /dev/null
+++ b/data/take_ca_wpe
Binary files differ