Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 1 | #' @import jsonlite |
| 2 | #' @import curl |
| 3 | |
| 4 | library(jsonlite) |
| 5 | |
| 6 | defaultFields <- c("corpusSigle", "textSigle", "pubDate", "pubPlace", |
| 7 | "availability", "textClass") |
| 8 | |
| 9 | derekoStats <- function(vc='') { |
| 10 | return(fromJSON(paste0(apiurl, 'statistics?cq=', |
| 11 | URLencode(vc, reserved=TRUE)))) |
| 12 | } |
| 13 | |
| 14 | KorAPQuery <- function(con, query, vc="", ql="poliqarp", fields=defaultFields) { |
| 15 | request <- paste0('?q=', URLencode(query, reserved=TRUE), |
| 16 | ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''), |
| 17 | '&ql=', ql); |
| 18 | webUIRequestUrl <- paste0(con$KorAPUrl, request) |
| 19 | requestUrl <- paste0(con$apiUrl, 'search', request, |
| 20 | '&fields=', paste(defaultFields, collapse = ","), |
| 21 | '&access-rewrite-disabled=true') |
| 22 | result <- fromJSON(paste0(requestUrl, '&count=1')) |
| 23 | |
| 24 | result$fields <- fields |
| 25 | result$requestUrl <- requestUrl |
| 26 | result$request <- request |
| 27 | result$webUIRequestUrl <- webUIRequestUrl |
| 28 | return(result) |
| 29 | } |
| 30 | |
Marc Kupietz | c2c59bd | 2019-08-30 16:50:49 +0200 | [diff] [blame^] | 31 | KorAPFectAll <- function(query, verbose=FALSE) { |
Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 32 | if (query$meta$totalResults == 0) { return(data.frame()) } |
| 33 | |
| 34 | page <- 1 |
| 35 | results <- 0 |
| 36 | |
| 37 | repeat { |
Marc Kupietz | 9392d5d | 2019-08-30 16:48:50 +0200 | [diff] [blame] | 38 | res <- fromJSON(paste0(query$requestUrl, '&count=50&offset=', results)) |
Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 39 | if (res$meta$totalResults == 0) { return(data.frame()) } |
| 40 | for (field in query$fields) { |
| 41 | if (!field %in% colnames(res$matches)) { |
| 42 | res$matches[, field] <- NA |
| 43 | } |
| 44 | } |
| 45 | currentMatches <- res$matches[query$fields] |
| 46 | factorCols <- colnames(subset(currentMatches, select=-c(pubDate))) |
| 47 | currentMatches[factorCols] <- lapply(currentMatches[factorCols], factor) |
| 48 | currentMatches$pubDate = as.Date(currentMatches$pubDate, format = "%Y-%m-%d") |
Marc Kupietz | 9392d5d | 2019-08-30 16:48:50 +0200 | [diff] [blame] | 49 | if (results == 0) { |
Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 50 | allMatches <- currentMatches |
| 51 | expectedResults <- res$meta$totalResults |
| 52 | } else { |
| 53 | allMatches <- rbind(allMatches, currentMatches) |
| 54 | } |
Marc Kupietz | c2c59bd | 2019-08-30 16:50:49 +0200 | [diff] [blame^] | 55 | if (verbose) { |
| 56 | cat(paste0("Retrieved page: ", page, "/", ceiling(expectedResults / res$meta$itemsPerPage), ': ', res$meta$benchmark, '\n')) |
| 57 | } |
Marc Kupietz | 5bbc9db | 2019-08-30 16:30:45 +0200 | [diff] [blame] | 58 | page <- page + 1 |
| 59 | results <- results + res$meta$itemsPerPage |
| 60 | if (results >= expectedResults) { |
| 61 | break |
| 62 | } |
| 63 | } |
| 64 | return(allMatches) |
| 65 | } |