blob: dae7ff50a97e0977a3e719edb95cc205c89ab7b5 [file] [log] [blame]
Marc Kupietz451980d2019-09-23 23:45:10 +02001#!/usr/bin/Rscript
2library(RKorAPClient)
3library(ggplot2)
Marc Kupietz4d8824c2025-08-17 12:37:24 +02004library(sf)
Marc Kupietz35eecca2022-09-07 10:45:42 +02005# library(R.cache)
Marc Kupietz451980d2019-09-23 23:45:10 +02006
Marc Kupietze457d992019-09-29 18:17:05 +02007devAskNewPage(ask = FALSE)
Marc Kupietz35eecca2022-09-07 10:45:42 +02008
Marc Kupietz0e180202025-08-17 14:09:20 +02009mapfile <- file.path(tempdir(), "map-gadm41-sf-v1.rds")
Marc Kupietz35eecca2022-09-07 10:45:42 +020010
11# Caching data in the user's home filespace by default
12# is not allowed to package demos by CRAN policies ...
13#
14# mapfile <- file.path(R.cache::getCachePath(), "map-v2.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020015
16fetchAndPrepareMap <- function(map, pick) {
Marc Kupietz0e180202025-08-17 14:09:20 +020017 cat("Downloading GADM 4.1 map data for ", map, "\n")
18 parts <- strsplit(map, "_")[[1]]
19 iso <- parts[1]
20 level <- as.integer(parts[2])
21 json_url <- sprintf("https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_%s_%d.json", iso, level)
22 sfobj <- tryCatch({
23 suppressWarnings(sf::st_read(json_url, quiet = TRUE))
24 }, error = function(e) {
25 stop(sprintf("Failed to read %s: %s", json_url, conditionMessage(e)))
26 })
Marc Kupietz451980d2019-09-23 23:45:10 +020027 if (pick > 0) {
Marc Kupietz4d8824c2025-08-17 12:37:24 +020028 sfobj <- sfobj[pick, ]
Marc Kupietz451980d2019-09-23 23:45:10 +020029 }
Marc Kupietz4d8824c2025-08-17 12:37:24 +020030 # Keep only geometry to standardize columns across layers
31 sfobj <- sfobj["geometry"]
32 sfobj
Marc Kupietz451980d2019-09-23 23:45:10 +020033}
34
35fetchMaps <- function(maps, picks) {
36 if (file.exists(mapfile)) {
37 df <- readRDS(mapfile)
38 } else {
Marc Kupietz0e180202025-08-17 14:09:20 +020039 cat("Downloading and caching GADM 4.1 map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n")
Marc Kupietz4d8824c2025-08-17 12:37:24 +020040 # Fetch individual sf layers and row-bind
41 sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE)
42 df <- do.call(rbind, sflist)
43 # Create a stable group index compatible with original regions index logic
44 df$grp <- seq_len(nrow(df))
Marc Kupietz451980d2019-09-23 23:45:10 +020045 dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
46 saveRDS(df, mapfile)
47 }
Marc Kupietz4d8824c2025-08-17 12:37:24 +020048 # If cache is from an older version (non-sf tidy data), refresh
49 if (!inherits(df, "sf")) {
50 cat("Cached map is in outdated format; re-downloading as sf...\n")
51 sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE)
52 df <- do.call(rbind, sflist)
53 df$grp <- seq_len(nrow(df))
54 dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE)
55 saveRDS(df, mapfile)
56 } else if (is.null(df$grp)) {
57 df$grp <- seq_len(nrow(df))
58 }
Marc Kupietz451980d2019-09-23 23:45:10 +020059 df
60}
61
Marc Kupietzb1be8b42019-09-28 17:57:31 +020062map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0))
Marc Kupietz451980d2019-09-23 23:45:10 +020063
Marc Kupietz617266d2025-02-27 10:43:07 +010064geoDistrib <- function(query, kco = KorAPConnection(verbose=TRUE)) {
Marc Kupietze457d992019-09-29 18:17:05 +020065 regions <- readRDS("demo/data/regions.rds")
Marc Kupietz451980d2019-09-23 23:45:10 +020066 regions$freq <- NA
Marc Kupietz9402dec2019-09-28 22:29:30 +020067 regions$url <- NA
Marc Kupietz451980d2019-09-23 23:45:10 +020068 plot <- NULL
69 vc <- ""
70 for (i in 1:nrow(regions)) {
71 if (!is.na(regions[i,]$query)) {
Marc Kupietzb1be8b42019-09-28 17:57:31 +020072 cat(as.character(regions[i,]$region), "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020073 regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens
74 if (regions[i,]$total == 0) {
75 regions[i,]$afreq <- 0
76 regions[i,]$freq <- NA
77 } else {
Marc Kupietz9402dec2019-09-28 22:29:30 +020078 kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query))
79 regions[i,]$afreq <- kqo@totalResults
Marc Kupietz451980d2019-09-23 23:45:10 +020080 regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total
Marc Kupietz9402dec2019-09-28 22:29:30 +020081 regions[i,]$url <- kqo@webUIRequestUrl
Marc Kupietz451980d2019-09-23 23:45:10 +020082 }
83 cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n")
Marc Kupietz451980d2019-09-23 23:45:10 +020084 cat("\n\n")
85 }
86 }
Marc Kupietz3da02eb2019-10-04 09:15:00 +020087 plot <- updatePlot(query, map, regions)
Marc Kupietz5fb892e2021-03-05 08:18:25 +010088 print(plot)
89 plot
Marc Kupietz451980d2019-09-23 23:45:10 +020090}
91
Marc Kupietz9402dec2019-09-28 22:29:30 +020092updatePlot <- function(query, map, regions) {
93 map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6)
94 map$region <- sapply(map$grp, function(grp) regions$region[grp])
95 map$url <- sapply(map$grp, function(grp) regions$url[grp])
Marc Kupietz451980d2019-09-23 23:45:10 +020096 regionsPlot <- ggplot(map) +
Marc Kupietz4d8824c2025-08-17 12:37:24 +020097 geom_sf(aes(fill = ipm), colour = "black", linewidth = .1) +
Marc Kupietz451980d2019-09-23 23:45:10 +020098 theme(axis.line.x = element_blank(),
99 axis.line.y = element_blank(),
100 panel.grid.major = element_blank(),
101 panel.grid.minor = element_blank(),
102 panel.border = element_blank(),
103 panel.background = element_blank(),
104 axis.line=element_blank(),axis.text.x=element_blank(),
105 axis.text.y=element_blank(),axis.ticks=element_blank(),
106 axis.title.x=element_blank(),
107 axis.title.y=element_blank()) +
Marc Kupietz4d8824c2025-08-17 12:37:24 +0200108 coord_sf() +
Marc Kupietze457d992019-09-29 18:17:05 +0200109 labs(title = sprintf("Regional distribution of \u201c%s\u201d", query))
Marc Kupietz451980d2019-09-23 23:45:10 +0200110 print(regionsPlot)
111 regionsPlot
112}
113
114#geoDistrib("wegen dem [tt/p=NN]")
115geoDistrib("heuer")
116#geoDistrib("Sonnabend")
117#geoDistrib("eh")