Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 1 | #!/usr/bin/Rscript |
| 2 | library(RKorAPClient) |
| 3 | library(ggplot2) |
| 4 | library(raster) |
| 5 | library(broom) |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 6 | library(plotly) |
| 7 | library(htmlwidgets) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 8 | |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 9 | devAskNewPage(ask = FALSE) |
| 10 | mapfile <- "demo/data/cache/map-v2.rds" |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 11 | |
| 12 | fetchAndPrepareMap <- function(map, pick) { |
| 13 | cat("Downloading GADM map data for ", map, "\n") |
| 14 | sp <- readRDS(url(sprintf("https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_%s_sp.rds", map))) |
| 15 | if (pick > 0) { |
| 16 | sp@polygons <- sp@polygons[pick] |
| 17 | sp@data <- sp@data[pick,] |
| 18 | } |
| 19 | sp |
| 20 | } |
| 21 | |
| 22 | fetchMaps <- function(maps, picks) { |
| 23 | if (file.exists(mapfile)) { |
| 24 | df <- readRDS(mapfile) |
| 25 | } else { |
| 26 | cat("Downloading and caching GADM map data.\nPlease note that the GADM map data is licensed for academic use and other non-commercial use, only.\nSee https://gadm.org/license.html\n") |
| 27 | df <- broom::tidy(Reduce(bind, mapply(fetchAndPrepareMap, maps, picks))) |
| 28 | dir.create(dirname(mapfile), recursive = TRUE, showWarnings = FALSE) |
| 29 | saveRDS(df, mapfile) |
| 30 | } |
| 31 | df$grp <- floor(as.numeric(as.character(df$group))) |
| 32 | df |
| 33 | } |
| 34 | |
Marc Kupietz | b1be8b4 | 2019-09-28 17:57:31 +0200 | [diff] [blame] | 35 | map <- fetchMaps(c("DEU_1", "AUT_0", "CHE_0", "LUX_0", "BEL_3", "ITA_1", "LIE_0"), c(0, 0, 0, 0, 34, 17, 0)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 36 | |
| 37 | geoDistrib <- function(query, kco = new("KorAPConnection", verbose=TRUE)) { |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 38 | regions <- readRDS("demo/data/regions.rds") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 39 | regions$freq <- NA |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 40 | regions$url <- NA |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 41 | plot <- NULL |
| 42 | vc <- "" |
| 43 | for (i in 1:nrow(regions)) { |
| 44 | if (!is.na(regions[i,]$query)) { |
Marc Kupietz | b1be8b4 | 2019-09-28 17:57:31 +0200 | [diff] [blame] | 45 | cat(as.character(regions[i,]$region), "\n") |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 46 | regions[i,]$total <- corpusStats(kco, vc=paste0(vc, regions[i,]$query))@tokens |
| 47 | if (regions[i,]$total == 0) { |
| 48 | regions[i,]$afreq <- 0 |
| 49 | regions[i,]$freq <- NA |
| 50 | } else { |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 51 | kqo <- corpusQuery(kco, query, vc=paste0(vc, regions[i,]$query)) |
| 52 | regions[i,]$afreq <- kqo@totalResults |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 53 | regions[i,]$freq <- regions[i,]$afreq / regions[i,]$total |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 54 | regions[i,]$url <- kqo@webUIRequestUrl |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 55 | } |
| 56 | cat(regions[i,]$afreq, regions[i,]$total, regions[i,]$freq, "\n") |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 57 | plot <- updatePlot(query, map, regions) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 58 | cat("\n\n") |
| 59 | } |
| 60 | } |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 61 | pp <- ggplotly(plot) |
| 62 | for (i in 1:nrow(regions)) { |
| 63 | j <- grep(paste0(regions$region[i], "\""), pp$x$data, perl=TRUE) |
| 64 | pp$x$data[[j]]$customdata <- regions[i,]$url |
| 65 | } |
| 66 | ppp <- onRender(pp, "function(el, x) { el.on('plotly_click', function(d) { var url=d.points[0].data.customdata; window.open(url, 'korap') })}") |
| 67 | print(ppp) |
| 68 | pp |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 69 | } |
| 70 | |
Marc Kupietz | 9402dec | 2019-09-28 22:29:30 +0200 | [diff] [blame] | 71 | updatePlot <- function(query, map, regions) { |
| 72 | map$ipm <- sapply(map$grp, function(grp) regions$freq[grp] * 10^6) |
| 73 | map$region <- sapply(map$grp, function(grp) regions$region[grp]) |
| 74 | map$url <- sapply(map$grp, function(grp) regions$url[grp]) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 75 | regionsPlot <- ggplot(map) + |
Marc Kupietz | 69cc54a | 2019-09-30 12:06:54 +0200 | [diff] [blame] | 76 | geom_polygon(aes(x=long, y=lat, group=group, fill=ipm, hack=region), colour= "black", size=.1) + |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 77 | theme(axis.line.x = element_blank(), |
| 78 | axis.line.y = element_blank(), |
| 79 | panel.grid.major = element_blank(), |
| 80 | panel.grid.minor = element_blank(), |
| 81 | panel.border = element_blank(), |
| 82 | panel.background = element_blank(), |
| 83 | axis.line=element_blank(),axis.text.x=element_blank(), |
| 84 | axis.text.y=element_blank(),axis.ticks=element_blank(), |
| 85 | axis.title.x=element_blank(), |
| 86 | axis.title.y=element_blank()) + |
| 87 | coord_equal(ratio=1.5) + |
Marc Kupietz | e457d99 | 2019-09-29 18:17:05 +0200 | [diff] [blame] | 88 | labs(title = sprintf("Regional distribution of \u201c%s\u201d", query)) |
Marc Kupietz | 451980d | 2019-09-23 23:45:10 +0200 | [diff] [blame] | 89 | print(regionsPlot) |
| 90 | regionsPlot |
| 91 | } |
| 92 | |
| 93 | #geoDistrib("wegen dem [tt/p=NN]") |
| 94 | geoDistrib("heuer") |
| 95 | #geoDistrib("Sonnabend") |
| 96 | #geoDistrib("eh") |