blob: 6898a37d49dc7bb1f6b5902fc11251beb1676119 [file] [log] [blame]
Marc Kupietz5bbc9db2019-08-30 16:30:45 +02001#' @import jsonlite
2#' @import curl
3
4library(jsonlite)
5
6defaultFields <- c("corpusSigle", "textSigle", "pubDate", "pubPlace",
7 "availability", "textClass")
8
9derekoStats <- function(vc='') {
10 return(fromJSON(paste0(apiurl, 'statistics?cq=',
11 URLencode(vc, reserved=TRUE))))
12}
13
14KorAPQuery <- function(con, query, vc="", ql="poliqarp", fields=defaultFields) {
15 request <- paste0('?q=', URLencode(query, reserved=TRUE),
16 ifelse(vc != '', paste0('&cq=', URLencode(vc, reserved=TRUE)), ''),
17 '&ql=', ql);
18 webUIRequestUrl <- paste0(con$KorAPUrl, request)
19 requestUrl <- paste0(con$apiUrl, 'search', request,
20 '&fields=', paste(defaultFields, collapse = ","),
21 '&access-rewrite-disabled=true')
22 result <- fromJSON(paste0(requestUrl, '&count=1'))
23
24 result$fields <- fields
25 result$requestUrl <- requestUrl
26 result$request <- request
27 result$webUIRequestUrl <- webUIRequestUrl
Marc Kupietzcb725f82019-08-30 18:04:57 +020028 result$nextStartIndex <- 0
29 result$hasMoreMatches <- (result$meta$totalResults > 0)
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020030 return(result)
31}
32
Marc Kupietz31569912019-08-30 16:53:04 +020033KorAPFetchAll <- function(query, verbose=FALSE) {
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020034 if (query$meta$totalResults == 0) { return(data.frame()) }
35
36 page <- 1
37 results <- 0
38
39 repeat {
Marc Kupietz9392d5d2019-08-30 16:48:50 +020040 res <- fromJSON(paste0(query$requestUrl, '&count=50&offset=', results))
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020041 if (res$meta$totalResults == 0) { return(data.frame()) }
42 for (field in query$fields) {
43 if (!field %in% colnames(res$matches)) {
44 res$matches[, field] <- NA
45 }
46 }
47 currentMatches <- res$matches[query$fields]
48 factorCols <- colnames(subset(currentMatches, select=-c(pubDate)))
49 currentMatches[factorCols] <- lapply(currentMatches[factorCols], factor)
50 currentMatches$pubDate = as.Date(currentMatches$pubDate, format = "%Y-%m-%d")
Marc Kupietz9392d5d2019-08-30 16:48:50 +020051 if (results == 0) {
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020052 allMatches <- currentMatches
53 expectedResults <- res$meta$totalResults
54 } else {
55 allMatches <- rbind(allMatches, currentMatches)
56 }
Marc Kupietzc2c59bd2019-08-30 16:50:49 +020057 if (verbose) {
58 cat(paste0("Retrieved page: ", page, "/", ceiling(expectedResults / res$meta$itemsPerPage), ': ', res$meta$benchmark, '\n'))
59 }
Marc Kupietz5bbc9db2019-08-30 16:30:45 +020060 page <- page + 1
61 results <- results + res$meta$itemsPerPage
62 if (results >= expectedResults) {
63 break
64 }
65 }
66 return(allMatches)
67}
Marc Kupietzcb725f82019-08-30 18:04:57 +020068
69KorAPFetchNext <- function(query, offset=query$nextStartIndex, verbose=FALSE) {
70 if (query$nextStartIndex >= query$meta$totalResults) {
71 query$hasMoreMatches <- FALSE
72 return(query)
73 }
74
75 res <- fromJSON(paste0(query$requestUrl, '&count=50&offset=', offset))
76 for (field in query$fields) {
77 if (!field %in% colnames(res$matches)) {
78 res$matches[, field] <- NA
79 }
80 }
81 currentMatches <- res$matches[query$fields]
82 factorCols <- colnames(subset(currentMatches, select=-c(pubDate)))
83 currentMatches[factorCols] <- lapply(currentMatches[factorCols], factor)
84 currentMatches$pubDate = as.Date(currentMatches$pubDate, format = "%Y-%m-%d")
85 if (offset == 0) {
86 res$collectedMatches <- currentMatches
87 } else {
88 res$collectedMatches <- rbind(query$collectedMatches, currentMatches)
89 }
90 if (verbose) {
91 cat(paste0("Retrieved page in ", res$meta$benchmark, '\n'))
92 }
93 res$nextStartIndex <- res$meta$startIndex + res$meta$itemsPerPage
94 res$fields <- query$fields
95 res$requestUrl <- query$requestUrl
96 res$request <- query$request
97 res$webUIRequestUrl <- query$webUIRequestUrl
98 res$hasMoreMatches <- (res$meta$totalResults > res$nextStartIndex)
99
100 return(res)
101}