blob: b079265c14e6f1157b37bc4cfc00995988ef5e21 [file] [log] [blame]
Marc Kupietz451980d2019-09-23 23:45:10 +02001#!/usr/bin/Rscript
2library(RKorAPClient)
3library(ggplot2)
Marc Kupietz4d8824c2025-08-17 12:37:24 +02004library(sf)
Marc Kupietz35eecca2022-09-07 10:45:42 +02005# library(R.cache)
Marc Kupietz451980d2019-09-23 23:45:10 +02006
Marc Kupietze457d992019-09-29 18:17:05 +02007devAskNewPage(ask = FALSE)
Marc Kupietz35eecca2022-09-07 10:45:42 +02008
Marc Kupietz0b7c50d2025-08-17 18:09:20 +02009# --- CRAN-compliant caching -----------------------------------------------
10# Default to tempdir() during demos/tests. Allow users to opt-in to a
11# persistent cache in a per-user directory via either
12# options(RKorAPClient.regional.cache = "user")
13# or environment variable
14# RKORAPCLIENT_CACHE=user
15# Any value among {"user","persistent","cache","true","1","yes"}
16# enables persistent caching. Everything else uses tempdir().
17get_cache_dir <- function() {
18 mode <- tolower(getOption(
19 "RKorAPClient.regional.cache",
20 Sys.getenv("RKORAPCLIENT_CACHE", "temp")
21 ))
22 if (mode %in% c("user", "persistent", "cache", "true", "1", "yes")) {
23 d <- tools::R_user_dir("RKorAPClient", which = "cache")
24 } else {
25 d <- tempdir()
26 }
27 dir.create(d, recursive = TRUE, showWarnings = FALSE)
28 d
29}
30
31mapfile <- file.path(get_cache_dir(), "map-gadm41-sf-v1.rds")
Marc Kupietz35eecca2022-09-07 10:45:42 +020032
33# Caching data in the user's home filespace by default
34# is not allowed to package demos by CRAN policies ...
35#
36# mapfile <- file.path(R.cache::getCachePath(), "map-v2.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020037
38fetchAndPrepareMap <- function(map, pick) {
Marc Kupietz0e180202025-08-17 14:09:20 +020039 cat("Downloading GADM 4.1 map data for ", map, "\n")
40 parts <- strsplit(map, "_")[[1]]
41 iso <- parts[1]
42 level <- as.integer(parts[2])
43 json_url <- sprintf("https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_%s_%d.json", iso, level)
44 sfobj <- tryCatch({
45 suppressWarnings(sf::st_read(json_url, quiet = TRUE))
46 }, error = function(e) {
47 stop(sprintf("Failed to read %s: %s", json_url, conditionMessage(e)))
48 })
Marc Kupietz451980d2019-09-23 23:45:10 +020049 if (pick > 0) {
Marc Kupietz4d8824c2025-08-17 12:37:24 +020050 sfobj <- sfobj[pick, ]
Marc Kupietz451980d2019-09-23 23:45:10 +020051 }
Marc Kupietz4d8824c2025-08-17 12:37:24 +020052 # Keep only geometry to standardize columns across layers
53 sfobj <- sfobj["geometry"]
54 sfobj
Marc Kupietz451980d2019-09-23 23:45:10 +020055}
56
57fetchMaps <- function(maps, picks) {
58 if (file.exists(mapfile)) {
59 df <- readRDS(mapfile)
Marc Kupietz0b7c50d2025-08-17 18:09:20 +020060 cat("Using cached map from:", mapfile, "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020061 } else {
Marc Kupietz0e180202025-08-17 14:09:20 +020062 cat("Downloading and caching GADM 4.1 map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n")
Marc Kupietz4d8824c2025-08-17 12:37:24 +020063 # Fetch individual sf layers and row-bind
64 sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE)
65 df <- do.call(rbind, sflist)
66 # Create a stable group index compatible with original regions index logic
67 df$grp <- seq_len(nrow(df))
Marc Kupietz451980d2019-09-23 23:45:10 +020068 saveRDS(df, mapfile)
Marc Kupietz0b7c50d2025-08-17 18:09:20 +020069 cat("Saved map cache to:", mapfile, "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020070 }
Marc Kupietz4d8824c2025-08-17 12:37:24 +020071 # If cache is from an older version (non-sf tidy data), refresh
72 if (!inherits(df, "sf")) {
73 cat("Cached map is in outdated format; re-downloading as sf...\n")
74 sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE)
75 df <- do.call(rbind, sflist)
76 df$grp <- seq_len(nrow(df))
Marc Kupietz4d8824c2025-08-17 12:37:24 +020077 saveRDS(df, mapfile)
Marc Kupietz0b7c50d2025-08-17 18:09:20 +020078 cat("Saved map cache to:", mapfile, "\n")
Marc Kupietz4d8824c2025-08-17 12:37:24 +020079 } else if (is.null(df$grp)) {
80 df$grp <- seq_len(nrow(df))
81 }
Marc Kupietz451980d2019-09-23 23:45:10 +020082 df
83}
84
Marc Kupietzb1be8b42019-09-28 17:57:31 +020085map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0))
Marc Kupietz451980d2019-09-23 23:45:10 +020086
Marc Kupietz617266d2025-02-27 10:43:07 +010087geoDistrib <- function(query, kco = KorAPConnection(verbose=TRUE)) {
Marc Kupietze457d992019-09-29 18:17:05 +020088 regions <- readRDS("demo/data/regions.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020089 regions$freq <- NA
Marc Kupietz9402dec2019-09-28 22:29:30 +020090 regions$url <- NA
Marc Kupietz451980d2019-09-23 23:45:10 +020091 plot <- NULL
92 vc <- ""
93 for (i in 1:nrow(regions)) {
94 if (!is.na(regions[i,]$query)) {
Marc Kupietzb1be8b42019-09-28 17:57:31 +020095 cat(as.character(regions[i,]$region), "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020096 regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens
97 if (regions[i,]$total == 0) {
98 regions[i,]$afreq <- 0
99 regions[i,]$freq <- NA
100 } else {
Marc Kupietz9402dec2019-09-28 22:29:30 +0200101 kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query))
102 regions[i,]$afreq <- kqo@totalResults
Marc Kupietz451980d2019-09-23 23:45:10 +0200103 regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total
Marc Kupietz9402dec2019-09-28 22:29:30 +0200104 regions[i,]$url <- kqo@webUIRequestUrl
Marc Kupietz451980d2019-09-23 23:45:10 +0200105 }
106 cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +0200107 cat("\n\n")
108 }
109 }
Marc Kupietz3da02eb2019-10-04 09:15:00 +0200110 plot <- updatePlot(query, map, regions)
Marc Kupietz5fb892e2021-03-05 08:18:25 +0100111 print(plot)
112 plot
Marc Kupietz451980d2019-09-23 23:45:10 +0200113}
114
Marc Kupietz9402dec2019-09-28 22:29:30 +0200115updatePlot <- function(query, map, regions) {
116 map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6)
117 map$region <- sapply(map$grp, function(grp) regions$region[grp])
118 map$url <- sapply(map$grp, function(grp) regions$url[grp])
Marc Kupietz451980d2019-09-23 23:45:10 +0200119 regionsPlot <- ggplot(map) +
Marc Kupietz4d8824c2025-08-17 12:37:24 +0200120 geom_sf(aes(fill = ipm), colour = "black", linewidth = .1) +
Marc Kupietz451980d2019-09-23 23:45:10 +0200121 theme(axis.line.x = element_blank(),
122 axis.line.y = element_blank(),
123 panel.grid.major = element_blank(),
124 panel.grid.minor = element_blank(),
125 panel.border = element_blank(),
126 panel.background = element_blank(),
127 axis.line=element_blank(),axis.text.x=element_blank(),
128 axis.text.y=element_blank(),axis.ticks=element_blank(),
129 axis.title.x=element_blank(),
130 axis.title.y=element_blank()) +
Marc Kupietz4d8824c2025-08-17 12:37:24 +0200131 coord_sf() +
Marc Kupietze457d992019-09-29 18:17:05 +0200132 labs(title = sprintf("Regional distribution of \u201c%s\u201d", query))
Marc Kupietz451980d2019-09-23 23:45:10 +0200133 print(regionsPlot)
134 regionsPlot
135}
136
137#geoDistrib("wegen dem [tt/p=NN]")
138geoDistrib("heuer")
139#geoDistrib("Sonnabend")
140#geoDistrib("eh")