Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 1 | #!/usr/bin/Rscript |
| 2 | library(RKorAPClient) |
| 3 | library(ggplot2) |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 4 | library(sf) |
Marc Kupietz | 35eecca | 2022-09-07 10:45:42 +0200 | [diff] [blame] | 5 | # library(R.cache) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 6 | |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 7 | devAskNewPage(ask = FALSE) |
Marc Kupietz | 35eecca | 2022-09-07 10:45:42 +0200 | [diff] [blame] | 8 | |
Marc Kupietz | 0b7c50d | 2025-08-17 18:09:20 +0200 | [diff] [blame] | 9 | # --- CRAN-compliant caching ----------------------------------------------- |
| 10 | # Default to tempdir() during demos/tests. Allow users to opt-in to a |
| 11 | # persistent cache in a per-user directory via either |
| 12 | # options(RKorAPClient.regional.cache = "user") |
| 13 | # or environment variable |
| 14 | # RKORAPCLIENT_CACHE=user |
| 15 | # Any value among {"user","persistent","cache","true","1","yes"} |
| 16 | # enables persistent caching. Everything else uses tempdir(). |
| 17 | get_cache_dir <- function() { |
| 18 | mode <- tolower(getOption( |
| 19 | "RKorAPClient.regional.cache", |
| 20 | Sys.getenv("RKORAPCLIENT_CACHE", "temp") |
| 21 | )) |
| 22 | if (mode %in% c("user", "persistent", "cache", "true", "1", "yes")) { |
| 23 | d <- tools::R_user_dir("RKorAPClient", which = "cache") |
| 24 | } else { |
| 25 | d <- tempdir() |
| 26 | } |
| 27 | dir.create(d, recursive = TRUE, showWarnings = FALSE) |
| 28 | d |
| 29 | } |
| 30 | |
| 31 | mapfile <- file.path(get_cache_dir(), "map-gadm41-sf-v1.rds") |
Marc Kupietz | 35eecca | 2022-09-07 10:45:42 +0200 | [diff] [blame] | 32 | |
| 33 | # Caching data in the user's home filespace by default |
| 34 | # is not allowed to package demos by CRAN policies ... |
| 35 | # |
| 36 | # mapfile <- file.path(R.cache::getCachePath(), "map-v2.rds") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 37 | |
| 38 | fetchAndPrepareMap <- function(map, pick) { |
Marc Kupietz | 0e18020 | 2025-08-17 14:09:20 +0200 | [diff] [blame] | 39 | cat("Downloading GADM 4.1 map data for ", map, "\n") |
| 40 | parts <- strsplit(map, "_")[[1]] |
| 41 | iso <- parts[1] |
| 42 | level <- as.integer(parts[2]) |
| 43 | json_url <- sprintf("https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_%s_%d.json", iso, level) |
| 44 | sfobj <- tryCatch({ |
| 45 | suppressWarnings(sf::st_read(json_url, quiet = TRUE)) |
| 46 | }, error = function(e) { |
| 47 | stop(sprintf("Failed to read %s: %s", json_url, conditionMessage(e))) |
| 48 | }) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 49 | if (pick > 0) { |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 50 | sfobj <- sfobj[pick, ] |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 51 | } |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 52 | # Keep only geometry to standardize columns across layers |
| 53 | sfobj <- sfobj["geometry"] |
| 54 | sfobj |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 55 | } |
| 56 | |
| 57 | fetchMaps <- function(maps, picks) { |
| 58 | if (file.exists(mapfile)) { |
| 59 | df <- readRDS(mapfile) |
Marc Kupietz | 0b7c50d | 2025-08-17 18:09:20 +0200 | [diff] [blame] | 60 | cat("Using cached map from:", mapfile, "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 61 | } else { |
Marc Kupietz | 0e18020 | 2025-08-17 14:09:20 +0200 | [diff] [blame] | 62 | cat("Downloading and caching GADM 4.1 map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n") |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 63 | # Fetch individual sf layers and row-bind |
| 64 | sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE) |
| 65 | df <- do.call(rbind, sflist) |
| 66 | # Create a stable group index compatible with original regions index logic |
| 67 | df$grp <- seq_len(nrow(df)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 68 | saveRDS(df, mapfile) |
Marc Kupietz | 0b7c50d | 2025-08-17 18:09:20 +0200 | [diff] [blame] | 69 | cat("Saved map cache to:", mapfile, "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 70 | } |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 71 | # If cache is from an older version (non-sf tidy data), refresh |
| 72 | if (!inherits(df, "sf")) { |
| 73 | cat("Cached map is in outdated format; re-downloading as sf...\n") |
| 74 | sflist <- mapply(fetchAndPrepareMap, maps, picks, SIMPLIFY = FALSE) |
| 75 | df <- do.call(rbind, sflist) |
| 76 | df$grp <- seq_len(nrow(df)) |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 77 | saveRDS(df, mapfile) |
Marc Kupietz | 0b7c50d | 2025-08-17 18:09:20 +0200 | [diff] [blame] | 78 | cat("Saved map cache to:", mapfile, "\n") |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 79 | } else if (is.null(df$grp)) { |
| 80 | df$grp <- seq_len(nrow(df)) |
| 81 | } |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 82 | df |
| 83 | } |
| 84 | |
Marc Kupietz | b1be8b4 | 2019-09-28 17:57:31 +0200 | [diff] [blame] | 85 | map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 86 | |
Marc Kupietz | 617266d | 2025-02-27 10:43:07 +0100 | [diff] [blame] | 87 | geoDistrib <- function(query, kco = KorAPConnection(verbose=TRUE)) { |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 88 | regions <- readRDS("demo/data/regions.rds") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 89 | regions$freq <- NA |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 90 | regions$url <- NA |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 91 | plot <- NULL |
| 92 | vc <- "" |
| 93 | for (i in 1:nrow(regions)) { |
| 94 | if (!is.na(regions[i,]$query)) { |
Marc Kupietz | b1be8b4 | 2019-09-28 17:57:31 +0200 | [diff] [blame] | 95 | cat(as.character(regions[i,]$region), "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 96 | regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens |
| 97 | if (regions[i,]$total == 0) { |
| 98 | regions[i,]$afreq <- 0 |
| 99 | regions[i,]$freq <- NA |
| 100 | } else { |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 101 | kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query)) |
| 102 | regions[i,]$afreq <- kqo@totalResults |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 103 | regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 104 | regions[i,]$url <- kqo@webUIRequestUrl |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 105 | } |
| 106 | cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 107 | cat("\n\n") |
| 108 | } |
| 109 | } |
Marc Kupietz | 3da02eb | 2019-10-04 09:15:00 +0200 | [diff] [blame] | 110 | plot <- updatePlot(query, map, regions) |
Marc Kupietz | 5fb892e | 2021-03-05 08:18:25 +0100 | [diff] [blame] | 111 | print(plot) |
| 112 | plot |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 113 | } |
| 114 | |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 115 | updatePlot <- function(query, map, regions) { |
| 116 | map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6) |
| 117 | map$region <- sapply(map$grp, function(grp) regions$region[grp]) |
| 118 | map$url <- sapply(map$grp, function(grp) regions$url[grp]) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 119 | regionsPlot <- ggplot(map) + |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 120 | geom_sf(aes(fill = ipm), colour = "black", linewidth = .1) + |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 121 | theme(axis.line.x = element_blank(), |
| 122 | axis.line.y = element_blank(), |
| 123 | panel.grid.major = element_blank(), |
| 124 | panel.grid.minor = element_blank(), |
| 125 | panel.border = element_blank(), |
| 126 | panel.background = element_blank(), |
| 127 | axis.line=element_blank(),axis.text.x=element_blank(), |
| 128 | axis.text.y=element_blank(),axis.ticks=element_blank(), |
| 129 | axis.title.x=element_blank(), |
| 130 | axis.title.y=element_blank()) + |
Marc Kupietz | 4d8824c | 2025-08-17 12:37:24 +0200 | [diff] [blame] | 131 | coord_sf() + |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 132 | labs(title = sprintf("Regional distribution of \u201c%s\u201d", query)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 133 | print(regionsPlot) |
| 134 | regionsPlot |
| 135 | } |
| 136 | |
Marc Kupietz | 9b4295a | 2025-08-17 18:10:24 +0200 | [diff] [blame^] | 137 | # --- Alternative: Highcharter interactive map version --- |
| 138 | geoDistrib_hc <- function(query, kco = KorAPConnection(verbose=TRUE)) { |
| 139 | # Ensure optional dependencies are present |
| 140 | for (pkg in c("highcharter", "geojsonsf", "viridisLite", "htmlwidgets")) { |
| 141 | if (!requireNamespace(pkg, quietly = TRUE)) |
| 142 | stop(sprintf("Package '%s' is required for the highcharter demo. Install with install.packages('%s')", pkg, pkg)) |
| 143 | } |
| 144 | |
| 145 | regions <- readRDS("demo/data/regions.rds") |
| 146 | regions$freq <- NA |
| 147 | regions$url <- NA |
| 148 | vc <- "" |
| 149 | for (i in 1:nrow(regions)) { |
| 150 | if (!is.na(regions[i,]$query)) { |
| 151 | cat(as.character(regions[i,]$region), "\n") |
| 152 | regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens |
| 153 | if (regions[i,]$total == 0) { |
| 154 | regions[i,]$afreq <- 0 |
| 155 | regions[i,]$freq <- NA |
| 156 | } else { |
| 157 | kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query)) |
| 158 | regions[i,]$afreq <- kqo@totalResults |
| 159 | regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total |
| 160 | regions[i,]$url <- kqo@webUIRequestUrl |
| 161 | } |
| 162 | cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n") |
| 163 | cat("\n\n") |
| 164 | } |
| 165 | } |
| 166 | plot <- updatePlotHC(query, map, regions) |
| 167 | if (interactive()) { |
| 168 | print(plot) |
| 169 | } else { |
| 170 | outfile <- file.path(tempdir(), "regional_map_highcharter.html") |
| 171 | htmlwidgets::saveWidget(plot, outfile, selfcontained = TRUE) |
| 172 | cat("Saved interactive map to:", outfile, "\n") |
| 173 | } |
| 174 | plot |
| 175 | } |
| 176 | |
| 177 | updatePlotHC <- function(query, map, regions) { |
| 178 | # Build data to join to map via 'grp' |
| 179 | df <- data.frame( |
| 180 | grp = map$grp, |
| 181 | ipm = sapply(map$grp, function(grp) regions$freq[grp] * 10^6), |
| 182 | region = sapply(map$grp, function(grp) regions$region[grp]), |
| 183 | url = sapply(map$grp, function(grp) regions$url[grp]) |
| 184 | ) |
| 185 | |
| 186 | gj <- jsonlite::fromJSON(geojsonsf::sf_geojson(map), simplifyVector = FALSE) |
| 187 | |
| 188 | hc <- highcharter::highchart(type = "map") |> |
| 189 | highcharter::hc_add_series_map( |
| 190 | map = gj, |
| 191 | df = df, |
| 192 | value = "ipm", |
| 193 | joinBy = "grp", |
| 194 | name = "ipm", |
| 195 | borderColor = "#000000", |
| 196 | borderWidth = 0.2, |
| 197 | nullColor = "#eeeeee" |
| 198 | ) |> |
| 199 | highcharter::hc_colorAxis(stops = highcharter::color_stops(200, colors = viridisLite::viridis(200))) |> |
| 200 | highcharter::hc_title(text = sprintf('Regional distribution of "%s"', query)) |> |
| 201 | highcharter::hc_mapNavigation(enabled = TRUE) |> |
| 202 | highcharter::hc_tooltip(useHTML = TRUE, pointFormat = "<b>{point.region}</b><br/>{point.value:.1f} per Mio") |> |
| 203 | highcharter::hc_plotOptions( |
| 204 | series = list( |
| 205 | cursor = "pointer", |
| 206 | point = list(events = list(click = htmlwidgets::JS("function(){ if (this.url){ window.open(this.url, '_blank'); }}"))) |
| 207 | )) |
| 208 | |
| 209 | # Set the raw Highcharts map projection via options (works across versions) |
| 210 | hc$x$hc_opts$mapView <- list(projection = list(name = "WebMercator")) |
| 211 | hc |
| 212 | } |
| 213 | |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 214 | #geoDistrib("wegen dem [tt/p=NN]") |
| 215 | geoDistrib("heuer") |
| 216 | #geoDistrib("Sonnabend") |
| 217 | #geoDistrib("eh") |
Marc Kupietz | 9b4295a | 2025-08-17 18:10:24 +0200 | [diff] [blame^] | 218 | # To use the Highcharter version, call: |
| 219 | geoDistrib_hc("heuer") |