Add example for regional frequency distribution

Change-Id: I5cc2f31baae86ccba1eb73a4b0bb76d0fbfaba1c
diff --git a/.gitignore b/.gitignore
index 5b6a065..f51ef92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
+cache/
diff --git a/examples/geo/data/regions.rds b/examples/geo/data/regions.rds
new file mode 100644
index 0000000..9b59d92
--- /dev/null
+++ b/examples/geo/data/regions.rds
Binary files differ
diff --git a/examples/geo/regional.R b/examples/geo/regional.R
new file mode 100644
index 0000000..59e7e7f
--- /dev/null
+++ b/examples/geo/regional.R
@@ -0,0 +1,80 @@
+#!/usr/bin/Rscript
+library(RKorAPClient)
+library(ggplot2)
+library(raster)
+library(broom)
+
+mapfile <- "examples/geo/data/cache/map.rds"
+
+fetchAndPrepareMap <- function(map, pick) {
+  cat("Downloading GADM map data for ", map, "\n")
+  sp <- readRDS(url(sprintf("https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_%s_sp.rds", map)))
+  if (pick > 0) {
+    sp@polygons <- sp@polygons[pick]
+    sp@data <- sp@data[pick,]
+  }
+  sp
+}
+
+fetchMaps <- function(maps, picks) {
+  if (file.exists(mapfile)) {
+    df <- readRDS(mapfile)
+  } else {
+    cat("Downloading and caching GADM map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n")
+    df <- broom::tidy(Reduce(bind, mapply(fetchAndPrepareMap, maps, picks)))
+    dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
+    saveRDS(df, mapfile)
+  }
+  df$grp <- floor(as.numeric(as.character(df$group)))
+  df
+}
+
+map <- fetchMaps(c("DEU_1", "AUT_1", "CHE_1", "LUX_0", "BEL_3", "ITA_1"), c(0,0,0,0,34,17))
+
+geoDistrib <- function(query, kco = new("KorAPConnection", verbose=TRUE)) {
+  regions <- readRDS("examples/geo/data/regions.rds")
+  regions$freq <- NA
+  plot <- NULL
+  vc <- ""
+  for (i in 1:nrow(regions)) {
+    if (!is.na(regions[i,]$query)) {
+      cat(as.character(regions[i,]$land), "\n")
+      regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens
+      if (regions[i,]$total == 0) {
+        regions[i,]$afreq <- 0
+        regions[i,]$freq <- NA
+      } else {
+        regions[i,]$afreq <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query))@totalResults
+        regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total
+      }
+      cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n")
+      plot <- updatePlot(query, plot, map, regions)
+      cat("\n\n")
+    }
+  }
+}
+
+updatePlot <- function(query, regionsPlot, map, laender) {
+  map$ipm <- sapply(map$grp, function(grp) laender$freq[grp] * 10^6)
+  regionsPlot <- ggplot(map) +
+    geom_polygon(aes(x=long, y=lat, group=group, fill=ipm), colour= "black", size=.1) +
+    theme(axis.line.x = element_blank(),
+          axis.line.y = element_blank(),
+          panel.grid.major = element_blank(),
+          panel.grid.minor = element_blank(),
+          panel.border = element_blank(),
+          panel.background = element_blank(),
+          axis.line=element_blank(),axis.text.x=element_blank(),
+          axis.text.y=element_blank(),axis.ticks=element_blank(),
+          axis.title.x=element_blank(),
+          axis.title.y=element_blank()) +
+    coord_equal(ratio=1.5) +
+    labs(title = sprintf("Regional distribution of “%s”", query))
+  print(regionsPlot)
+  regionsPlot
+}
+
+#geoDistrib("wegen dem [tt/p=NN]")
+geoDistrib("heuer")
+#geoDistrib("Sonnabend")
+#geoDistrib("eh")