blob: 59e7e7f837093a8e724e05f9ef6ae13161ed2ea5 [file] [log] [blame]
Marc Kupietz451980d2019-09-23 23:45:10 +02001#!/usr/bin/Rscript
2library(RKorAPClient)
3library(ggplot2)
4library(raster)
5library(broom)
6
7mapfile <- "examples/geo/data/cache/map.rds"
8
9fetchAndPrepareMap <- function(map, pick) {
10 cat("Downloading GADM map data for ", map, "\n")
11 sp <- readRDS(url(sprintf("https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_%s_sp.rds", map)))
12 if (pick > 0) {
13 sp@polygons <- sp@polygons[pick]
14 sp@data <- sp@data[pick,]
15 }
16 sp
17}
18
19fetchMaps <- function(maps, picks) {
20 if (file.exists(mapfile)) {
21 df <- readRDS(mapfile)
22 } else {
23 cat("Downloading and caching GADM map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n")
24 df <- broom::tidy(Reduce(bind, mapply(fetchAndPrepareMap, maps, picks)))
25 dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
26 saveRDS(df, mapfile)
27 }
28 df$grp <- floor(as.numeric(as.character(df$group)))
29 df
30}
31
32map <- fetchMaps(c("DEU_1", "AUT_1", "CHE_1", "LUX_0", "BEL_3", "ITA_1"), c(0,0,0,0,34,17))
33
34geoDistrib <- function(query, kco = new("KorAPConnection", verbose=TRUE)) {
35 regions <- readRDS("examples/geo/data/regions.rds")
36 regions$freq <- NA
37 plot <- NULL
38 vc <- ""
39 for (i in 1:nrow(regions)) {
40 if (!is.na(regions[i,]$query)) {
41 cat(as.character(regions[i,]$land), "\n")
42 regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens
43 if (regions[i,]$total == 0) {
44 regions[i,]$afreq <- 0
45 regions[i,]$freq <- NA
46 } else {
47 regions[i,]$afreq <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query))@totalResults
48 regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total
49 }
50 cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n")
51 plot <- updatePlot(query, plot, map, regions)
52 cat("\n\n")
53 }
54 }
55}
56
57updatePlot <- function(query, regionsPlot, map, laender) {
58 map$ipm <- sapply(map$grp, function(grp) laender$freq[grp] * 10^6)
59 regionsPlot <- ggplot(map) +
60 geom_polygon(aes(x=long, y=lat, group=group, fill=ipm), colour= "black", size=.1) +
61 theme(axis.line.x = element_blank(),
62 axis.line.y = element_blank(),
63 panel.grid.major = element_blank(),
64 panel.grid.minor = element_blank(),
65 panel.border = element_blank(),
66 panel.background = element_blank(),
67 axis.line=element_blank(),axis.text.x=element_blank(),
68 axis.text.y=element_blank(),axis.ticks=element_blank(),
69 axis.title.x=element_blank(),
70 axis.title.y=element_blank()) +
71 coord_equal(ratio=1.5) +
72 labs(title = sprintf("Regional distribution of ā€œ%sā€", query))
73 print(regionsPlot)
74 regionsPlot
75}
76
77#geoDistrib("wegen dem [tt/p=NN]")
78geoDistrib("heuer")
79#geoDistrib("Sonnabend")
80#geoDistrib("eh")