blob: 6c05a2bf2a271389d3a1b39c0419089e8d40b55b [file] [log] [blame]
Marc Kupietz451980d2019-09-23 23:45:10 +02001#!/usr/bin/Rscript
2library(RKorAPClient)
3library(ggplot2)
Marc Kupietz4d8824c2025-08-17 12:37:24 +02004library(sf)
Marc Kupietz35eecca2022-09-07 10:45:42 +02005# library(R.cache)
Marc Kupietz451980d2019-09-23 23:45:10 +02006
Marc Kupietze457d992019-09-29 18:17:05 +02007devAskNewPage(ask = FALSE)
Marc Kupietz35eecca2022-09-07 10:45:42 +02008
Marc Kupietz4d8824c2025-08-17 12:37:24 +02009mapfile <- file.path(tempdir(), "map-sf-v1.rds")
Marc Kupietz35eecca2022-09-07 10:45:42 +020010
11# Caching data in the user's home filespace by default
12# is not allowed to package demos by CRAN policies ...
13#
14# mapfile <- file.path(R.cache::getCachePath(), "map-v2.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020015
16fetchAndPrepareMap <- function(map, pick) {
17 cat("Downloading GADM map data for ", map, "\n")
Marc Kupietz6b27e2f2025-08-17 12:18:58 +020018 sp <- readRDS(url(sprintf("https://geodata.ucdavis.edu/gadm/gadm3.6/Rsp/gadm36_%s_sp.rds", map)))
Marc Kupietz4d8824c2025-08-17 12:37:24 +020019 sfobj <- sf::st_as_sf(sp)
Marc Kupietz451980d2019-09-23 23:45:10 +020020 if (pick > 0) {
Marc Kupietz4d8824c2025-08-17 12:37:24 +020021 sfobj <- sfobj[pick, ]
Marc Kupietz451980d2019-09-23 23:45:10 +020022 }
Marc Kupietz4d8824c2025-08-17 12:37:24 +020023 # Keep only geometry to standardize columns across layers
24 sfobj <- sfobj["geometry"]
25 sfobj
Marc Kupietz451980d2019-09-23 23:45:10 +020026}
27
28fetchMaps <- function(maps, picks) {
29 if (file.exists(mapfile)) {
30 df <- readRDS(mapfile)
31 } else {
32 cat("Downloading and caching GADM map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n")
Marc Kupietz4d8824c2025-08-17 12:37:24 +020033 # Fetch individual sf layers and row-bind
34 sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE)
35 df <- do.call(rbind, sflist)
36 # Create a stable group index compatible with original regions index logic
37 df$grp <- seq_len(nrow(df))
Marc Kupietz451980d2019-09-23 23:45:10 +020038 dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
39 saveRDS(df, mapfile)
40 }
Marc Kupietz4d8824c2025-08-17 12:37:24 +020041 # If cache is from an older version (non-sf tidy data), refresh
42 if (!inherits(df, "sf")) {
43 cat("Cached map is in outdated format; re-downloading as sf...\n")
44 sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE)
45 df <- do.call(rbind, sflist)
46 df$grp <- seq_len(nrow(df))
47 dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
48 saveRDS(df, mapfile)
49 } else if (is.null(df$grp)) {
50 df$grp <- seq_len(nrow(df))
51 }
Marc Kupietz451980d2019-09-23 23:45:10 +020052 df
53}
54
Marc Kupietzb1be8b42019-09-28 17:57:31 +020055map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0))
Marc Kupietz451980d2019-09-23 23:45:10 +020056
Marc Kupietz617266d2025-02-27 10:43:07 +010057geoDistrib <- function(query, kco = KorAPConnection(verbose=TRUE)) {
Marc Kupietze457d992019-09-29 18:17:05 +020058 regions <- readRDS("demo/data/regions.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020059 regions$freq <- NA
Marc Kupietz9402dec2019-09-28 22:29:30 +020060 regions$url <- NA
Marc Kupietz451980d2019-09-23 23:45:10 +020061 plot <- NULL
62 vc <- ""
63 for (i in 1:nrow(regions)) {
64 if (!is.na(regions[i,]$query)) {
Marc Kupietzb1be8b42019-09-28 17:57:31 +020065 cat(as.character(regions[i,]$region), "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020066 regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens
67 if (regions[i,]$total == 0) {
68 regions[i,]$afreq <- 0
69 regions[i,]$freq <- NA
70 } else {
Marc Kupietz9402dec2019-09-28 22:29:30 +020071 kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query))
72 regions[i,]$afreq <- kqo@totalResults
Marc Kupietz451980d2019-09-23 23:45:10 +020073 regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total
Marc Kupietz9402dec2019-09-28 22:29:30 +020074 regions[i,]$url <- kqo@webUIRequestUrl
Marc Kupietz451980d2019-09-23 23:45:10 +020075 }
76 cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020077 cat("\n\n")
78 }
79 }
Marc Kupietz3da02eb2019-10-04 09:15:00 +020080 plot <- updatePlot(query, map, regions)
Marc Kupietz5fb892e2021-03-05 08:18:25 +010081 print(plot)
82 plot
Marc Kupietz451980d2019-09-23 23:45:10 +020083}
84
Marc Kupietz9402dec2019-09-28 22:29:30 +020085updatePlot <- function(query, map, regions) {
86 map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6)
87 map$region <- sapply(map$grp, function(grp) regions$region[grp])
88 map$url <- sapply(map$grp, function(grp) regions$url[grp])
Marc Kupietz451980d2019-09-23 23:45:10 +020089 regionsPlot <- ggplot(map) +
Marc Kupietz4d8824c2025-08-17 12:37:24 +020090 geom_sf(aes(fill = ipm), colour = "black", linewidth = .1) +
Marc Kupietz451980d2019-09-23 23:45:10 +020091 theme(axis.line.x = element_blank(),
92 axis.line.y = element_blank(),
93 panel.grid.major = element_blank(),
94 panel.grid.minor = element_blank(),
95 panel.border = element_blank(),
96 panel.background = element_blank(),
97 axis.line=element_blank(),axis.text.x=element_blank(),
98 axis.text.y=element_blank(),axis.ticks=element_blank(),
99 axis.title.x=element_blank(),
100 axis.title.y=element_blank()) +
Marc Kupietz4d8824c2025-08-17 12:37:24 +0200101 coord_sf() +
Marc Kupietze457d992019-09-29 18:17:05 +0200102 labs(title = sprintf("Regional distribution of \u201c%s\u201d", query))
Marc Kupietz451980d2019-09-23 23:45:10 +0200103 print(regionsPlot)
104 regionsPlot
105}
106
107#geoDistrib("wegen dem [tt/p=NN]")
108geoDistrib("heuer")
109#geoDistrib("Sonnabend")
110#geoDistrib("eh")