Build fetch API URL using httr2
This also prevents errors with numbers getting converted to scientific
notation via paste0.
Resolves #25
Change-Id: I92d94fa27119471d5659ac4db9a34e4571794489
diff --git a/NEWS.md b/NEWS.md
index 1f48bf3..198d152 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,6 @@
# Unpublished 1.0.0.9000
+- fixed bug with fetching result pages with an offset >= 10,000 (=1e+05 ...) [issue #25](https://github.com/KorAP/RKorAPClient/issues)
- timed out corpus queries are no longer cached (see [issue #7](https://github.com/KorAP/RKorAPClient/issues/7))
- improved error handling
- improved logging
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 04d859a..340bd4c 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -474,8 +474,19 @@
# Calculate the actual offset in tokens
currentOffset <- current_offset_page * maxResultsPerPage
- # Build the query with the appropriate count and offset
- query <- paste0(kqo@requestUrl, "&count=", min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage), "&offset=", currentOffset, "&cutoff=true")
+ # Build the query with the appropriate count and offset using httr2
+ count_param <- min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage)
+
+ # Parse existing URL to preserve all query parameters
+ parsed_url <- httr2::url_parse(kqo@requestUrl)
+ existing_query <- parsed_url$query
+
+ # Add/update count and offset parameters
+ existing_query$count <- count_param
+ existing_query$offset <- currentOffset
+
+ # Rebuild the URL with all parameters
+ query <- httr2::url_modify(kqo@requestUrl, query = existing_query)
res <- apiCall(kqo@korapConnection, query)
if (length(res$matches) == 0) {
break
diff --git a/tests/testthat/test-corpusQuery.R b/tests/testthat/test-corpusQuery.R
index e5289e0..d9e9575 100644
--- a/tests/testthat/test-corpusQuery.R
+++ b/tests/testthat/test-corpusQuery.R
@@ -103,6 +103,14 @@
expect_output(print(q), "Ameisenplage.*pubDate since 2014")
})
+test_that("fetchNext with offset 1000000 (= 1e+06) works", {
+ skip_if_offline()
+ kco <- KorAPConnection(accessToken = NULL, verbose = TRUE, cache = FALSE)
+ q <- corpusQuery(kco, "<base/s=t>")
+ q <- fetchNext(q, offset = 1000000)
+ expect_gt(nrow(q@collectedMatches), 10)
+})
+
test_that("Query from KorAP URL returns as many results as corresponding direct query", {
skip_if_offline()
kco <- KorAPConnection(accessToken = NULL)