blob: 60ea5c7e0b2fc41254786c2d2ab180b2ddb10055 [file] [log] [blame]
Marc Kupietz451980d2019-09-23 23:45:10 +02001#!/usr/bin/Rscript
2library(RKorAPClient)
3library(ggplot2)
4library(raster)
5library(broom)
Marc Kupietz9402dec2019-09-28 22:29:30 +02006library(plotly)
7library(htmlwidgets)
Marc Kupietz451980d2019-09-23 23:45:10 +02008
Marc Kupietze457d992019-09-29 18:17:05 +02009devAskNewPage(ask = FALSE)
10mapfile <- "demo/data/cache/map-v2.rds"
Marc Kupietz451980d2019-09-23 23:45:10 +020011
12fetchAndPrepareMap <- function(map, pick) {
13 cat("Downloading GADM map data for ", map, "\n")
14 sp <- readRDS(url(sprintf("https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_%s_sp.rds", map)))
15 if (pick > 0) {
16 sp@polygons <- sp@polygons[pick]
17 sp@data <- sp@data[pick,]
18 }
19 sp
20}
21
22fetchMaps <- function(maps, picks) {
23 if (file.exists(mapfile)) {
24 df <- readRDS(mapfile)
25 } else {
26 cat("Downloading and caching GADM map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n")
27 df <- broom::tidy(Reduce(bind, mapply(fetchAndPrepareMap, maps, picks)))
28 dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
29 saveRDS(df, mapfile)
30 }
31 df$grp <- floor(as.numeric(as.character(df$group)))
32 df
33}
34
Marc Kupietzb1be8b42019-09-28 17:57:31 +020035map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0))
Marc Kupietz451980d2019-09-23 23:45:10 +020036
37geoDistrib <- function(query, kco = new("KorAPConnection", verbose=TRUE)) {
Marc Kupietze457d992019-09-29 18:17:05 +020038 regions <- readRDS("demo/data/regions.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020039 regions$freq <- NA
Marc Kupietz9402dec2019-09-28 22:29:30 +020040 regions$url <- NA
Marc Kupietz451980d2019-09-23 23:45:10 +020041 plot <- NULL
42 vc <- ""
43 for (i in 1:nrow(regions)) {
44 if (!is.na(regions[i,]$query)) {
Marc Kupietzb1be8b42019-09-28 17:57:31 +020045 cat(as.character(regions[i,]$region), "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020046 regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens
47 if (regions[i,]$total == 0) {
48 regions[i,]$afreq <- 0
49 regions[i,]$freq <- NA
50 } else {
Marc Kupietz9402dec2019-09-28 22:29:30 +020051 kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query))
52 regions[i,]$afreq <- kqo@totalResults
Marc Kupietz451980d2019-09-23 23:45:10 +020053 regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total
Marc Kupietz9402dec2019-09-28 22:29:30 +020054 regions[i,]$url <- kqo@webUIRequestUrl
Marc Kupietz451980d2019-09-23 23:45:10 +020055 }
56 cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n")
Marc Kupietz9402dec2019-09-28 22:29:30 +020057 plot <- updatePlot(query, map, regions)
Marc Kupietz451980d2019-09-23 23:45:10 +020058 cat("\n\n")
59 }
60 }
Marc Kupietz9402dec2019-09-28 22:29:30 +020061 pp <- ggplotly(plot)
62 for (i in 1:nrow(regions)) {
63 j <- grep(paste0(regions$region[i], "\""), pp$x$data, perl=TRUE)
64 pp$x$data[[j]]$customdata <- regions[i,]$url
65 }
66 ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].data.customdata; window.open(url, 'korap') })}")
67 print(ppp)
68 pp
Marc Kupietz451980d2019-09-23 23:45:10 +020069}
70
Marc Kupietz9402dec2019-09-28 22:29:30 +020071updatePlot <- function(query, map, regions) {
72 map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6)
73 map$region <- sapply(map$grp, function(grp) regions$region[grp])
74 map$url <- sapply(map$grp, function(grp) regions$url[grp])
Marc Kupietz451980d2019-09-23 23:45:10 +020075 regionsPlot <- ggplot(map) +
Marc Kupietz69cc54a2019-09-30 12:06:54 +020076 geom_polygon(aes(x=long, y=lat, group=group, fill=ipm, hack=region), colour= "black", size=.1) +
Marc Kupietz451980d2019-09-23 23:45:10 +020077 theme(axis.line.x = element_blank(),
78 axis.line.y = element_blank(),
79 panel.grid.major = element_blank(),
80 panel.grid.minor = element_blank(),
81 panel.border = element_blank(),
82 panel.background = element_blank(),
83 axis.line=element_blank(),axis.text.x=element_blank(),
84 axis.text.y=element_blank(),axis.ticks=element_blank(),
85 axis.title.x=element_blank(),
86 axis.title.y=element_blank()) +
87 coord_equal(ratio=1.5) +
Marc Kupietze457d992019-09-29 18:17:05 +020088 labs(title = sprintf("Regional distribution of \u201c%s\u201d", query))
Marc Kupietz451980d2019-09-23 23:45:10 +020089 print(regionsPlot)
90 regionsPlot
91}
92
93#geoDistrib("wegen dem [tt/p=NN]")
94geoDistrib("heuer")
95#geoDistrib("Sonnabend")
96#geoDistrib("eh")