blob: 38fbd7d420d6a7e5478b6a2fbc0572c9bbc7d62a [file] [log] [blame]
Marc Kupietz5bbc9db2019-08-30 16:30:45 +02001#' @import jsonlite
2#' @import curl
3
4library(jsonlite)
5
6defaultFields <- c("corpusSigle", "textSigle", "pubDate", "pubPlace",
7 "availability", "textClass")
8
9derekoStats <- function(vc='') {
10 return(fromJSON(paste0(apiurl, 'statistics?cq=',
11 URLencode(vc, reserved=TRUE))))
12}
13
14KorAPQuery <- function(con, query, vc="", ql="poliqarp", fields=defaultFields) {
15 request <- paste0('?q=', URLencode(query, reserved=TRUE),
16 ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''),
17 '&ql=', ql);
18 webUIRequestUrl <- paste0(con$KorAPUrl, request)
19 requestUrl <- paste0(con$apiUrl, 'search', request,
20 '&fields=', paste(defaultFields, collapse = ","),
21 '&access-rewrite-disabled=true')
22 result <- fromJSON(paste0(requestUrl, '&count=1'))
23
24 result$fields <- fields
25 result$requestUrl <- requestUrl
26 result$request <- request
27 result$webUIRequestUrl <- webUIRequestUrl
28 return(result)
29}
30
31KorAPFectAll <- function(query) {
32 if (query$meta$totalResults == 0) { return(data.frame()) }
33
34 page <- 1
35 results <- 0
36
37 repeat {
Marc Kupietz9392d5d2019-08-30 16:48:50 +020038 res <- fromJSON(paste0(query$requestUrl, '&count=50&offset=', results))
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020039 if (res$meta$totalResults == 0) { return(data.frame()) }
40 for (field in query$fields) {
41 if (!field %in% colnames(res$matches)) {
42 res$matches[, field] <- NA
43 }
44 }
45 currentMatches <- res$matches[query$fields]
46 factorCols <- colnames(subset(currentMatches, select=-c(pubDate)))
47 currentMatches[factorCols] <- lapply(currentMatches[factorCols], factor)
48 currentMatches$pubDate = as.Date(currentMatches$pubDate, format = "%Y-%m-%d")
Marc Kupietz9392d5d2019-08-30 16:48:50 +020049 if (results == 0) {
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020050 allMatches <- currentMatches
51 expectedResults <- res$meta$totalResults
52 } else {
53 allMatches <- rbind(allMatches, currentMatches)
54 }
55 print(paste0("Retrieved page: ", page, "/",
56 ceiling(expectedResults / res$meta$itemsPerPage)))
57 page <- page + 1
58 results <- results + res$meta$itemsPerPage
59 if (results >= expectedResults) {
60 break
61 }
62 }
63 return(allMatches)
64}