Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 1 | #!/usr/bin/Rscript |
| 2 | library(RKorAPClient) |
| 3 | library(ggplot2) |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 4 | library(sf) |
Marc Kupietz | 35eecca | 2022-09-07 10:45:42 +0200 | [diff] [blame] | 5 | # library(R.cache) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 6 | |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 7 | devAskNewPage(ask = FALSE) |
Marc Kupietz | 35eecca | 2022-09-07 10:45:42 +0200 | [diff] [blame] | 8 | |
Marc Kupietz | 0e18020 | 2025-08-17 14:09:20 +0200 | [diff] [blame^] | 9 | mapfile <- file.path(tempdir(), "map-gadm41-sf-v1.rds") |
Marc Kupietz | 35eecca | 2022-09-07 10:45:42 +0200 | [diff] [blame] | 10 | |
| 11 | # Caching data in the user's home filespace by default |
| 12 | # is not allowed to package demos by CRAN policies ... |
| 13 | # |
| 14 | # mapfile <- file.path(R.cache::getCachePath(), "map-v2.rds") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 15 | |
| 16 | fetchAndPrepareMap <- function(map, pick) { |
Marc Kupietz | 0e18020 | 2025-08-17 14:09:20 +0200 | [diff] [blame^] | 17 | cat("Downloading GADM 4.1 map data for ", map, "\n") |
| 18 | parts <- strsplit(map, "_")[[1]] |
| 19 | iso <- parts[1] |
| 20 | level <- as.integer(parts[2]) |
| 21 | json_url <- sprintf("https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_%s_%d.json", iso, level) |
| 22 | sfobj <- tryCatch({ |
| 23 | suppressWarnings(sf::st_read(json_url, quiet = TRUE)) |
| 24 | }, error = function(e) { |
| 25 | stop(sprintf("Failed to read %s: %s", json_url, conditionMessage(e))) |
| 26 | }) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 27 | if (pick > 0) { |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 28 | sfobj <- sfobj[pick, ] |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 29 | } |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 30 | # Keep only geometry to standardize columns across layers |
| 31 | sfobj <- sfobj["geometry"] |
| 32 | sfobj |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 33 | } |
| 34 | |
| 35 | fetchMaps <- function(maps, picks) { |
| 36 | if (file.exists(mapfile)) { |
| 37 | df <- readRDS(mapfile) |
| 38 | } else { |
Marc Kupietz | 0e18020 | 2025-08-17 14:09:20 +0200 | [diff] [blame^] | 39 | cat("Downloading and caching GADM 4.1 map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n") |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 40 | # Fetch individual sf layers and row-bind |
| 41 | sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE) |
| 42 | df <- do.call(rbind, sflist) |
| 43 | # Create a stable group index compatible with original regions index logic |
| 44 | df$grp <- seq_len(nrow(df)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 45 | dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE) |
| 46 | saveRDS(df, mapfile) |
| 47 | } |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 48 | # If cache is from an older version (non-sf tidy data), refresh |
| 49 | if (!inherits(df, "sf")) { |
| 50 | cat("Cached map is in outdated format; re-downloading as sf...\n") |
| 51 | sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE) |
| 52 | df <- do.call(rbind, sflist) |
| 53 | df$grp <- seq_len(nrow(df)) |
| 54 | dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE) |
| 55 | saveRDS(df, mapfile) |
| 56 | } else if (is.null(df$grp)) { |
| 57 | df$grp <- seq_len(nrow(df)) |
| 58 | } |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 59 | df |
| 60 | } |
| 61 | |
Marc Kupietz | b1be8b4 | 2019-09-28 17:57:31 +0200 | [diff] [blame] | 62 | map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 63 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 64 | geoDistrib <- function(query, kco = KorAPConnection(verbose=TRUE)) { |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 65 | regions <- readRDS("demo/data/regions.rds") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 66 | regions$freq <- NA |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 67 | regions$url <- NA |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 68 | plot <- NULL |
| 69 | vc <- "" |
| 70 | for (i in 1:nrow(regions)) { |
| 71 | if (!is.na(regions[i,]$query)) { |
Marc Kupietz | b1be8b4 | 2019-09-28 17:57:31 +0200 | [diff] [blame] | 72 | cat(as.character(regions[i,]$region), "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 73 | regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens |
| 74 | if (regions[i,]$total == 0) { |
| 75 | regions[i,]$afreq <- 0 |
| 76 | regions[i,]$freq <- NA |
| 77 | } else { |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 78 | kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query)) |
| 79 | regions[i,]$afreq <- kqo@totalResults |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 80 | regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 81 | regions[i,]$url <- kqo@webUIRequestUrl |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 82 | } |
| 83 | cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 84 | cat("\n\n") |
| 85 | } |
| 86 | } |
Marc Kupietz | 3da02eb | 2019-10-04 09:15:00 +0200 | [diff] [blame] | 87 | plot <- updatePlot(query, map, regions) |
Marc Kupietz | 5fb892e | 2021-03-05 08:18:25 +0100 | [diff] [blame] | 88 | print(plot) |
| 89 | plot |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 90 | } |
| 91 | |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 92 | updatePlot <- function(query, map, regions) { |
| 93 | map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6) |
| 94 | map$region <- sapply(map$grp, function(grp) regions$region[grp]) |
| 95 | map$url <- sapply(map$grp, function(grp) regions$url[grp]) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 96 | regionsPlot <- ggplot(map) + |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 97 | geom_sf(aes(fill = ipm), colour = "black", linewidth = .1) + |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 98 | theme(axis.line.x = element_blank(), |
| 99 | axis.line.y = element_blank(), |
| 100 | panel.grid.major = element_blank(), |
| 101 | panel.grid.minor = element_blank(), |
| 102 | panel.border = element_blank(), |
| 103 | panel.background = element_blank(), |
| 104 | axis.line=element_blank(),axis.text.x=element_blank(), |
| 105 | axis.text.y=element_blank(),axis.ticks=element_blank(), |
| 106 | axis.title.x=element_blank(), |
| 107 | axis.title.y=element_blank()) + |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 108 | coord_sf() + |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 109 | labs(title = sprintf("Regional distribution of \u201c%s\u201d", query)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 110 | print(regionsPlot) |
| 111 | regionsPlot |
| 112 | } |
| 113 | |
| 114 | #geoDistrib("wegen dem [tt/p=NN]") |
| 115 | geoDistrib("heuer") |
| 116 | #geoDistrib("Sonnabend") |
| 117 | #geoDistrib("eh") |