Blame - R/KorAPQuery.R - KorAP/RKorAPClient

blob: 92a534895856a17ec5d88d088838f65634d1d475 [file] [log] [blame]

Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	1	#' KorAPQuery class (internal)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	2	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	3	#' Internal class for query state management. Users work with `corpusQuery()`, `fetchAll()`, and `fetchNext()` instead.
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	4	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	5	#' @keywords internal
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	6	#' @include KorAPConnection.R
Marc Kupietz	6dfeed9	2025-06-03 11:58:06 +0200	[diff] [blame]	7	#' @include logging.R
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	8	#' @import httr2
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	9	#'
Marc Kupietz	a6e4ee6	2021-03-05 09:00:15 +0100	[diff] [blame]	10	#' @include RKorAPClient-package.R
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	11
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	12	#' @export
				13	KorAPQuery <- setClass("KorAPQuery", slots = c(
Marc Kupietz	b897218	2019-09-20 21:33:46 +0200	[diff] [blame]	14	"korapConnection",
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	15	"request",
				16	"vc",
				17	"totalResults",
				18	"nextStartIndex",
				19	"fields",
				20	"requestUrl",
				21	"webUIRequestUrl",
				22	"apiResponse",
				23	"collectedMatches",
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	24	"hasMoreMatches"
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	25	))
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	26
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	27	#' Initialize KorAPQuery object
				28	#' @keywords internal
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	29	#' @param .Object …
Marc Kupietz	b897218	2019-09-20 21:33:46 +0200	[diff] [blame]	30	#' @param korapConnection KorAPConnection object
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	31	#' @param request query part of the request URL
				32	#' @param vc definition of a virtual corpus
				33	#' @param totalResults number of hits the query has yielded
				34	#' @param nextStartIndex at what index to start the next fetch of query results
				35	#' @param fields what data / metadata fields should be collected
				36	#' @param requestUrl complete URL of the API request
				37	#' @param webUIRequestUrl URL of a web frontend request corresponding to the API request
				38	#' @param apiResponse data-frame representation of the JSON response of the API request
Marc Kupietz	7776dec	2019-09-27 16:59:02 +0200	[diff] [blame]	39	#' @param hasMoreMatches logical that signals if more query results can be fetched
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	40	#' @param collectedMatches matches already fetched from the KorAP-API-server
Marc Kupietz	97a1bca	2019-10-04 22:52:09 +0200	[diff] [blame]	41	#'
				42	#' @importFrom tibble tibble
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	43	#' @export
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	44	setMethod(
				45	"initialize", "KorAPQuery",
				46	function(.Object, korapConnection = NULL, request = NULL, vc = "", totalResults = 0, nextStartIndex = 0, fields = c(
				47	"corpusSigle", "textSigle", "pubDate", "pubPlace",
				48	"availability", "textClass", "snippet", "tokens"
				49	),
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	50	requestUrl = "", webUIRequestUrl = "", apiResponse = NULL, hasMoreMatches = FALSE, collectedMatches = NULL) {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	51	.Object <- callNextMethod()
				52	.Object@korapConnection <- korapConnection
				53	.Object@request <- request
				54	.Object@vc <- vc
				55	.Object@totalResults <- totalResults
				56	.Object@nextStartIndex <- nextStartIndex
				57	.Object@fields <- fields
				58	.Object@requestUrl <- requestUrl
				59	.Object@webUIRequestUrl <- webUIRequestUrl
				60	.Object@apiResponse <- apiResponse
				61	.Object@hasMoreMatches <- hasMoreMatches
				62	.Object@collectedMatches <- collectedMatches
				63	.Object
				64	}
				65	)
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	66
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	67	setGeneric("corpusQuery", function(kco, ...) standardGeneric("corpusQuery"))
				68	setGeneric("fetchAll", function(kqo, ...) standardGeneric("fetchAll"))
				69	setGeneric("fetchNext", function(kqo, ...) standardGeneric("fetchNext"))
				70	setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest"))
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	71	setGeneric("fetchAnnotations", function(kqo, ...) standardGeneric("fetchAnnotations"))
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	72	setGeneric("frequencyQuery", function(kco, ...) standardGeneric("frequencyQuery"))
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	73
				74	maxResultsPerPage <- 50
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	75
Marc Kupietz	4de53ec	2019-10-04 09:12:00 +0200	[diff] [blame]	76	## quiets concerns of R CMD check re: the .'s that appear in pipelines
Marc Kupietz	ef1ef4a	2025-02-19 12:12:40 +0100	[diff] [blame]	77	utils::globalVariables(c("."))
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	78
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	79	#' Search corpus for query terms
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	80	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	81	#' `corpusQuery` performs a corpus query via a connection to a KorAP-API-server
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	82	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	83	#' @family corpus search functions
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	84	#' @aliases corpusQuery
				85	#'
				86	#' @importFrom urltools url_encode
				87	#' @importFrom purrr pmap
Marc Kupietz	ea34b81	2025-06-25 15:49:00 +0200	[diff] [blame]	88	#' @importFrom dplyr bind_rows group_by
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	89	#'
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	90	#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	91	#' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`.
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	92	#' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	93	#' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in `KorAPConnection`) to provide all necessary information for the query.
Marc Kupietz	132f005	2023-04-16 14:23:05 +0200	[diff] [blame]	94	#' @param metadataOnly logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE.
				95	#' If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize
				96	#' your RKorAPClient application as explained in the
				97	#' [authorization section](https://github.com/KorAP/RKorAPClient#authorization)
				98	#' of the RKorAPClient Readme on GitHub and set the `metadataOnly` parameter to
				99	#' `FALSE`.
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	100	#' @param ql string to choose the query language (see [section on Query Parameters](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters) in the Kustvakt-Wiki for possible values.
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	101	#' @param fields character vector specifying which metadata fields to retrieve for each match.
				102	#' Available fields depend on the corpus. For DeReKo (German Reference Corpus), possible fields include:
				103	#' \describe{
				104	#' \item{Text identification:}{`textSigle`, `docSigle`, `corpusSigle` - hierarchical text identifiers}
				105	#' \item{Publication info:}{`author`, `editor`, `title`, `docTitle`, `corpusTitle` - authorship and titles}
				106	#' \item{Temporal data:}{`pubDate`, `creationDate` - when text was published/created}
				107	#' \item{Publication details:}{`pubPlace`, `publisher`, `reference` - where/how published}
				108	#' \item{Text classification:}{`textClass`, `textType`, `textTypeArt`, `textDomain`, `textColumn` - topic domain, genre, text type and column}
				109	#' \item{Adminstrative and technical info:}{`corpusEditor`, `availability`, `language`, `foundries` - access rights and annotations}
				110	#' \item{Content data:}{`snippet`, `tokens`, `tokenSource`, `externalLink` - actual text content, tokenization, and link to source text}
				111	#' \item{System data:}{`indexCreationDate`, `indexLastModified` - corpus indexing info}
				112	#' }
				113	#' Use `c("textSigle", "pubDate", "author")` to retrieve multiple fields.
				114	#' Default fields provide basic text identification and publication metadata. The actual text content (`snippet` and `tokens`) are activated by default if `metadataOnly` is set to `FALSE`.
Marc Kupietz	43a6ade	2020-02-18 17:01:44 +0100	[diff] [blame]	115	#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).
Marc Kupietz	25aebc3	2019-09-16 18:40:50 +0200	[diff] [blame]	116	#' @param verbose print some info
Marc Kupietz	4de53ec	2019-10-04 09:12:00 +0200	[diff] [blame]	117	#' @param as.df return result as data frame instead of as S4 object?
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	118	#' @param expand logical that decides if `query` and `vc` parameters are expanded to all of their combinations. Defaults to `TRUE`, iff `query` and `vc` have different lengths
Marc Kupietz	d9b2fd7	2023-04-17 19:08:50 +0200	[diff] [blame]	119	#' @param context string that specifies the size of the left and the right context returned in `snippet`
				120	#' (provided that `metadataOnly` is set to `false` and that the necessary access right are met).
				121	#' The format of the context size specifcation (e.g. `3-token,3-token`) is described in the [Service: Search GET documentation of the Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET).
				122	#' If the parameter is not set, the default context size secification of the KorAP server instance will be used.
				123	#' Note that you cannot overrule the maximum context size set in the KorAP server instance,
				124	#' as this is typically legally motivated.
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	125	#' @return Depending on the `as.df` parameter, a tibble or a [KorAPQuery()] object that, among other information, contains the total number of results in `@totalResults`. The resulting object can be used to fetch all query results (with [fetchAll()]) or the next page of results (with [fetchNext()]).
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	126	#' A corresponding URL to be used within a web browser is contained in `@webUIRequestUrl`
				127	#' Please make sure to check `$collection$rewrites` to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	128	#'
				129	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	130	#' \dontrun{
				131	#'
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	132	#' # Fetch basic metadata for "Ameisenplage"
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	133	#' KorAPConnection() \|>
				134	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	135	#' fetchAll()
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	136	#'
				137	#' # Fetch specific metadata fields for bibliographic analysis
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	138	#' query <- KorAPConnection() \|>
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	139	#' corpusQuery("Ameisenplage",
				140	#' fields = c("textSigle", "author", "title", "pubDate", "pubPlace", "textType"))
				141	#' results <- fetchAll(query)
				142	#' results@collectedMatches
Marc Kupietz	657d8e7	2020-02-25 18:31:50 +0100	[diff] [blame]	143	#' }
Marc Kupietz	3c531f6	2019-09-13 12:17:24 +0200	[diff] [blame]	144	#'
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	145	#' \dontrun{
				146	#'
Marc Kupietz	603491f	2019-09-18 14:01:02 +0200	[diff] [blame]	147	#' # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
				148	#' # and show the number of query hits (but don't fetch them).
Marc Kupietz	69cc54a	2019-09-30 12:06:54 +0200	[diff] [blame]	149	#'
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	150	#' KorAPConnection(verbose = TRUE) \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	151	#' corpusQuery(
				152	#' KorAPUrl =
				153	#' "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp"
				154	#' )
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	155	#' }
				156	#'
				157	#' \dontrun{
Marc Kupietz	3c531f6	2019-09-13 12:17:24 +0200	[diff] [blame]	158	#'
Marc Kupietz	603491f	2019-09-18 14:01:02 +0200	[diff] [blame]	159	#' # Plot the time/frequency curve of "Ameisenplage"
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	160	#' KorAPConnection(verbose = TRUE) \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	161	#' {
				162	#' . ->> kco
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	163	#' } \|>
				164	#' corpusQuery("Ameisenplage") \|>
				165	#' fetchAll() \|>
				166	#' slot("collectedMatches") \|>
				167	#' mutate(year = lubridate::year(pubDate)) \|>
				168	#' dplyr::select(year) \|>
				169	#' group_by(year) \|>
				170	#' summarise(Count = dplyr::n()) \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	171	#' mutate(Freq = mapply(function(f, y) {
				172	#' f / corpusStats(kco, paste("pubDate in", y))@tokens
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	173	#' }, Count, year)) \|>
				174	#' dplyr::select(-Count) \|>
				175	#' complete(year = min(year):max(year), fill = list(Freq = 0)) \|>
Marc Kupietz	69cc54a	2019-09-30 12:06:54 +0200	[diff] [blame]	176	#' plot(type = "l")
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	177	#' }
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	178	#' @seealso [KorAPConnection()], [fetchNext()], [fetchRest()], [fetchAll()], [corpusStats()]
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	179	#'
				180	#' @references
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	181	#' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026>
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	182	#'
				183	#' @export
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	184	setMethod(
				185	"corpusQuery", "KorAPConnection",
				186	function(kco,
				187	query = if (missing(KorAPUrl)) {
				188	stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE)
				189	} else {
				190	httr2::url_parse(KorAPUrl)$query$q
				191	},
				192	vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq,
				193	KorAPUrl,
				194	metadataOnly = TRUE,
				195	ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql,
				196	fields = c(
				197	"corpusSigle",
				198	"textSigle",
				199	"pubDate",
				200	"pubPlace",
				201	"availability",
				202	"textClass",
				203	"snippet",
				204	"tokens"
				205	),
				206	accessRewriteFatal = TRUE,
				207	verbose = kco@verbose,
				208	expand = length(vc) != length(query),
				209	as.df = FALSE,
				210	context = NULL) {
				211	if (length(query) > 1 \|\| length(vc) > 1) {
				212	grid <- if (expand) expand_grid(query = query, vc = vc) else tibble(query = query, vc = vc)
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	213
				214	# Initialize timing variables for ETA calculation
				215	total_queries <- nrow(grid)
				216	current_query <- 0
				217	start_time <- Sys.time()
				218
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	219	results <- purrr::pmap(grid, function(query, vc, ...) {
				220	current_query <<- current_query + 1
				221
				222	# Execute the single query directly (avoiding recursive call)
				223	contentFields <- c("snippet", "tokens")
				224	query_fields <- fields
				225	if (metadataOnly) {
				226	query_fields <- query_fields[!query_fields %in% contentFields]
				227	}
				228	if (!"textSigle" %in% query_fields) {
				229	query_fields <- c(query_fields, "textSigle")
				230	}
				231	request <-
				232	paste0(
				233	"?q=",
				234	url_encode(enc2utf8(query)),
				235	ifelse(!metadataOnly && !is.null(context) && context != "", paste0("&context=", url_encode(enc2utf8(context))), ""),
				236	ifelse(vc != "", paste0("&cq=", url_encode(enc2utf8(vc))), ""),
				237	ifelse(!metadataOnly, "&show-tokens=true", ""),
				238	"&ql=", ql
				239	)
				240	webUIRequestUrl <- paste0(kco@KorAPUrl, request)
				241	requestUrl <- paste0(
				242	kco@apiUrl,
				243	"search",
				244	request,
				245	"&fields=",
				246	paste(query_fields, collapse = ","),
				247	if (metadataOnly) "&access-rewrite-disabled=true" else ""
				248	)
				249
				250	# Show individual query progress
				251	log_info(verbose, "\rSearching \"", query, "\" in \"", vc, "\"", sep = "")
				252	res <- apiCall(kco, paste0(requestUrl, "&count=0"))
				253	if (is.null(res)) {
				254	log_info(verbose, ": API call failed\n")
				255	totalResults <- 0
				256	} else {
				257	totalResults <- as.integer(res$meta$totalResults)
				258	log_info(verbose, ": ", totalResults, " hits")
				259	if (!is.null(res$meta$cached)) {
				260	log_info(verbose, " [cached]")
				261	} else if (!is.null(res$meta$benchmark)) {
				262	if (is.character(res$meta$benchmark) && grepl("s$", res$meta$benchmark)) {
				263	time_value <- as.numeric(sub("s$", "", res$meta$benchmark))
				264	formatted_time <- paste0(round(time_value, 2), "s")
				265	log_info(verbose, ", took ", formatted_time)
				266	} else {
				267	log_info(verbose, ", took ", res$meta$benchmark)
				268	}
				269	}
Marc Kupietz	365660e	2025-06-25 15:09:55 +0200	[diff] [blame]	270
				271	# Calculate and display ETA information on the same line if verbose and we have more than one query
				272	if (verbose && total_queries > 1) {
				273	eta_info <- calculate_eta(current_query, total_queries, start_time)
				274	if (eta_info != "") {
				275	elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
				276	avg_time_per_query <- elapsed_time / current_query
				277
				278	# Add ETA info to the same line - remove the leading ". " for cleaner formatting
				279	clean_eta_info <- sub("^\\. ", ". ", eta_info)
				280	log_info(verbose, clean_eta_info)
				281	}
				282	}
				283
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	284	log_info(verbose, "\n")
				285	}
				286
				287	result <- data.frame(
				288	query = query,
				289	totalResults = totalResults,
				290	vc = vc,
				291	webUIRequestUrl = webUIRequestUrl,
				292	stringsAsFactors = FALSE
				293	)
				294
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	295	return(result)
				296	})
				297
				298	results %>% bind_rows()
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	299	} else {
Marc Kupietz	2078bde	2023-08-27 16:46:15 +0200	[diff] [blame]	300	contentFields <- c("snippet", "tokens")
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	301	if (metadataOnly) {
				302	fields <- fields[!fields %in% contentFields]
				303	}
Marc Kupietz	80dc643	2025-02-07 16:57:40 +0100	[diff] [blame]	304	if (!"textSigle" %in% fields) {
				305	fields <- c(fields, "textSigle")
				306	}
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	307	request <-
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	308	paste0(
				309	"?q=",
				310	url_encode(enc2utf8(query)),
				311	ifelse(!metadataOnly && !is.null(context) && context != "", paste0("&context=", url_encode(enc2utf8(context))), ""),
				312	ifelse(vc != "", paste0("&cq=", url_encode(enc2utf8(vc))), ""),
				313	ifelse(!metadataOnly, "&show-tokens=true", ""),
				314	"&ql=", ql
				315	)
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	316	webUIRequestUrl <- paste0(kco@KorAPUrl, request)
				317	requestUrl <- paste0(
				318	kco@apiUrl,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	319	"search",
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	320	request,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	321	"&fields=",
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	322	paste(fields, collapse = ","),
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	323	if (metadataOnly) "&access-rewrite-disabled=true" else ""
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	324	)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	325	log_info(verbose, "\rSearching \"", query, "\" in \"", vc, "\"",
				326	sep =
				327	""
				328	)
				329	res <- apiCall(kco, paste0(requestUrl, "&count=0"))
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	330	if (is.null(res)) {
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	331	message("API call failed.")
				332	totalResults <- 0
				333	} else {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	334	totalResults <- as.integer(res$meta$totalResults)
Marc Kupietz	a47d150	2023-04-18 15:26:47 +0200	[diff] [blame]	335	log_info(verbose, ": ", totalResults, " hits")
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	336	if (!is.null(res$meta$cached)) {
Marc Kupietz	a47d150	2023-04-18 15:26:47 +0200	[diff] [blame]	337	log_info(verbose, " [cached]\n")
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	338	} else if (!is.null(res$meta$benchmark)) {
Marc Kupietz	2baf5c5	2025-09-05 16:41:11 +0200	[diff] [blame]	339	# Round the benchmark time to 2 decimal places for better readability.
				340	# Be robust to locales using comma as decimal separator (e.g., "0,12s").
Marc Kupietz	7638ca4	2025-05-25 13:18:16 +0200	[diff] [blame]	341	if (is.character(res$meta$benchmark) && grepl("s$", res$meta$benchmark)) {
Marc Kupietz	2baf5c5	2025-09-05 16:41:11 +0200	[diff] [blame]	342	bench_str <- sub("s$", "", res$meta$benchmark)
				343	bench_num <- suppressWarnings(as.numeric(gsub(",", ".", bench_str)))
				344	if (!is.na(bench_num)) {
				345	formatted_time <- paste0(round(bench_num, 2), "s")
				346	} else {
				347	formatted_time <- res$meta$benchmark
				348	}
Marc Kupietz	7638ca4	2025-05-25 13:18:16 +0200	[diff] [blame]	349	log_info(verbose, ", took ", formatted_time, "\n", sep = "")
				350	} else {
				351	# Fallback if the format is different than expected
				352	log_info(verbose, ", took ", res$meta$benchmark, "\n", sep = "")
				353	}
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	354	} else {
				355	log_info(verbose, "\n")
				356	}
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	357	}
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	358	if (as.df) {
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	359	data.frame(
				360	query = query,
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	361	totalResults = totalResults,
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	362	vc = vc,
				363	webUIRequestUrl = webUIRequestUrl,
				364	stringsAsFactors = FALSE
				365	)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	366	} else {
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	367	KorAPQuery(
				368	korapConnection = kco,
				369	nextStartIndex = 0,
				370	fields = fields,
				371	requestUrl = requestUrl,
				372	request = request,
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	373	totalResults = totalResults,
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	374	vc = vc,
				375	apiResponse = res,
				376	webUIRequestUrl = webUIRequestUrl,
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	377	hasMoreMatches = (totalResults > 0),
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	378	)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	379	}
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	380	}
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	381	}
				382	)
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	383
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	384	#' @importFrom purrr map
				385	repair_data_strcuture <- function(x) {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	386	if (is.list(x)) {
				387	as.character(purrr::map(x, ~ if (length(.x) > 1) {
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	388	paste(.x, collapse = " ")
				389	} else {
				390	.x
				391	}))
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	392	} else {
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	393	ifelse(is.na(x), "", x)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	394	}
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	395	}
				396
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	397	#' Fetch the next bunch of results of a KorAP query.
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	398	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	399	#' `fetchNext` fetches the next bunch of results of a KorAP query.
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	400	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	401	#' @family corpus search functions
				402	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	403	#' @param kqo object obtained from [corpusQuery()]
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	404	#' @param offset start offset for query results to fetch
				405	#' @param maxFetch maximum number of query results to fetch
Marc Kupietz	25aebc3	2019-09-16 18:40:50 +0200	[diff] [blame]	406	#' @param verbose print progress information if true
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	407	#' @param randomizePageOrder fetch result pages in pseudo random order if true. Use [set.seed()] to set seed for reproducible results.
				408	#' @return The `kqo` input object with updated slots `collectedMatches`, `apiResponse`, `nextStartIndex`, `hasMoreMatches`
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	409	#'
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	410	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	411	#' \dontrun{
				412	#'
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	413	#' q <- KorAPConnection() \|>
				414	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	415	#' fetchNext()
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	416	#' q@collectedMatches
Marc Kupietz	657d8e7	2020-02-25 18:31:50 +0100	[diff] [blame]	417	#' }
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	418	#'
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	419	#' @references
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	420	#' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026>
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	421	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	422	#' @aliases fetchNext
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	423	#' @importFrom dplyr rowwise mutate bind_rows select summarise n select
Marc Kupietz	f488112	2024-12-17 14:55:39 +0100	[diff] [blame]	424	#' @importFrom tibble enframe add_column
				425	#' @importFrom stringr word
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	426	#' @importFrom tidyr unnest unchop pivot_wider
				427	#' @importFrom purrr map
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	428	#' @export
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	429	setMethod("fetchNext", "KorAPQuery", function(kqo,
				430	offset = kqo@nextStartIndex,
				431	maxFetch = maxResultsPerPage,
				432	verbose = kqo@korapConnection@verbose,
				433	randomizePageOrder = FALSE) {
Marc Kupietz	a7a8f1b	2024-12-18 15:56:19 +0100	[diff] [blame]	434	# https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	435	results <- key <- name <- tmp_positions <- 0
Marc Kupietz	a7a8f1b	2024-12-18 15:56:19 +0100	[diff] [blame]	436
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	437	if (kqo@totalResults == 0 \|\| offset >= kqo@totalResults) {
				438	return(kqo)
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	439	}
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	440	use_korap_api <- Sys.getenv("USE_KORAP_API", unset = NA)
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	441	# Calculate the initial page number (not used directly - keeping for reference)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	442	collectedMatches <- kqo@collectedMatches
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	443
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	444	# Track start time for ETA calculation
				445	start_time <- Sys.time()
				446
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	447	# For randomized page order, generate a list of randomized page indices
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	448	if (randomizePageOrder) {
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	449	# Calculate how many pages we need to fetch based on maxFetch
				450	total_pages_to_fetch <- if (!is.na(maxFetch)) {
				451	# Either limited by maxFetch or total results, whichever is smaller
				452	min(ceiling(maxFetch / maxResultsPerPage), ceiling(kqo@totalResults / maxResultsPerPage))
				453	} else {
				454	# All pages
				455	ceiling(kqo@totalResults / maxResultsPerPage)
				456	}
				457
				458	# Generate randomized page indices (0-based for API)
				459	pages <- sample.int(ceiling(kqo@totalResults / maxResultsPerPage), total_pages_to_fetch) - 1
				460	page_index <- 1 # Index to track which page in the randomized list we're on
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	461	}
				462
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	463	if (is.null(collectedMatches)) {
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	464	collectedMatches <- data.frame()
				465	}
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	466
				467	# Initialize the page counter properly based on nextStartIndex and any previously fetched results
				468	# We add 1 to make it 1-based for display purposes since users expect page numbers to start from 1
				469	# For first call, this will be 1, for subsequent calls, it will reflect our actual position
				470	current_page_number <- ceiling(offset / maxResultsPerPage) + 1
				471
				472	# For sequential fetches, keep track of which global page we're on
				473	# This is important for correctly showing page numbers in subsequent fetchNext calls
				474	page_count_start <- current_page_number
				475
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	476	repeat {
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	477	# Determine which page to fetch next
				478	if (randomizePageOrder) {
				479	# In randomized mode, get the page from our randomized list using the page_index
				480	# Make sure we don't exceed the array bounds
				481	if (page_index > length(pages)) {
				482	break # No more pages to fetch in randomized mode
				483	}
				484	current_offset_page <- pages[page_index]
				485	# For display purposes in randomized mode, show which page out of the total we're fetching
				486	display_page_number <- page_index
				487	} else {
				488	# In sequential mode, use the current_page_number to calculate the offset
				489	current_offset_page <- (current_page_number - 1)
				490	display_page_number <- current_page_number
				491	}
				492
				493	# Calculate the actual offset in tokens
				494	currentOffset <- current_offset_page * maxResultsPerPage
				495
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	496	# Build the query with the appropriate count and offset using httr2
				497	count_param <- min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage)
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	498
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	499	# Parse existing URL to preserve all query parameters
				500	parsed_url <- httr2::url_parse(kqo@requestUrl)
				501	existing_query <- parsed_url$query
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	502
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	503	# Add/update count and offset parameters
				504	existing_query$count <- count_param
				505	existing_query$offset <- currentOffset
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	506
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	507	# Rebuild the URL with all parameters
				508	query <- httr2::url_modify(kqo@requestUrl, query = existing_query)
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	509	res <- apiCall(kqo@korapConnection, query)
				510	if (length(res$matches) == 0) {
				511	break
				512	}
				513
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	514	if ("fields" %in% colnames(res$matches) && (is.na(use_korap_api) \|\| as.numeric(use_korap_api) >= 1.0)) {
Marc Kupietz	16ccf11	2025-01-26 13:25:27 +0100	[diff] [blame]	515	log_info(verbose, "Using fields API: ")
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	516	currentMatches <- res$matches$fields %>%
				517	purrr::map(~ mutate(.x, value = repair_data_strcuture(value))) %>%
				518	tibble::enframe() %>%
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	519	tidyr::unnest(cols = value) %>%
				520	tidyr::pivot_wider(names_from = key, id_cols = name, names_repair = "unique") %>%
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	521	dplyr::select(-name)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	522	if ("snippet" %in% colnames(res$matches)) {
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	523	currentMatches$snippet <- res$matches$snippet
				524	}
Marc Kupietz	3cd2c6c	2025-01-08 20:35:39 +0100	[diff] [blame]	525	if ("tokens" %in% colnames(res$matches)) {
				526	currentMatches$tokens <- res$matches$tokens
				527	}
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	528	} else {
				529	currentMatches <- res$matches
				530	}
				531
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	532	for (field in kqo@fields) {
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	533	if (!field %in% colnames(currentMatches)) {
				534	currentMatches[, field] <- NA
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	535	}
				536	}
Marc Kupietz	f488112	2024-12-17 14:55:39 +0100	[diff] [blame]	537	currentMatches <- currentMatches %>%
				538	select(kqo@fields) %>%
				539	mutate(
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	540	matchID = res$matches$matchID,
Marc Kupietz	0447da0	2025-01-08 20:51:09 +0100	[diff] [blame]	541	tmp_positions = gsub(".-p(\\d+)-(\\d+).", "\\1 \\2", res$matches$matchID),
Marc Kupietz	f488112	2024-12-17 14:55:39 +0100	[diff] [blame]	542	matchStart = as.integer(stringr::word(tmp_positions, 1)),
				543	matchEnd = as.integer(stringr::word(tmp_positions, 2)) - 1
				544	) %>%
				545	select(-tmp_positions)
				546
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	547	if (!is.list(collectedMatches)) {
				548	collectedMatches <- currentMatches
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	549	} else {
Marc Kupietz	2078bde	2023-08-27 16:46:15 +0200	[diff] [blame]	550	collectedMatches <- bind_rows(collectedMatches, currentMatches)
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	551	}
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	552
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	553	# Get the actual items per page from the API response
				554	# We now consistently use maxResultsPerPage instead
Marc Kupietz	acbaab0	2025-05-01 10:56:35 +0200	[diff] [blame]	555
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	556	# Calculate total pages consistently using fixed maxResultsPerPage
				557	# This ensures consistent page counting across the function
				558	total_pages <- ceiling(kqo@totalResults / maxResultsPerPage)
				559
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	560	# Calculate ETA using the centralized function from logging.R
				561	current_page <- if (randomizePageOrder) page_index else display_page_number
				562	total_pages_to_fetch <- if (!is.na(maxFetch)) {
				563	# Account for offset - we can only fetch from the remaining results after offset
				564	remaining_results_after_offset <- max(0, kqo@totalResults - offset)
				565	min(ceiling(maxFetch / maxResultsPerPage), ceiling(remaining_results_after_offset / maxResultsPerPage))
				566	} else {
				567	total_pages
				568	}
Marc Kupietz	365660e	2025-06-25 15:09:55 +0200	[diff] [blame]	569
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	570	eta_info <- calculate_eta(current_page, total_pages_to_fetch, start_time)
Marc Kupietz	365660e	2025-06-25 15:09:55 +0200	[diff] [blame]	571
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	572	# Extract timing information for display
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	573	time_per_page <- NA
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	574	if (!is.null(res$meta$benchmark) && is.character(res$meta$benchmark)) {
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	575	time_per_page <- suppressWarnings(as.numeric(sub("s", "", res$meta$benchmark)))
Marc Kupietz	acbaab0	2025-05-01 10:56:35 +0200	[diff] [blame]	576	}
				577
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	578	# Create the page display string with proper formatting
Marc Kupietz	acbaab0	2025-05-01 10:56:35 +0200	[diff] [blame]	579
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	580	# For global page tracking, calculate the absolute page number
				581	actual_display_number <- if (randomizePageOrder) {
				582	current_offset_page + 1 # In randomized mode, this is the actual page (0-based + 1)
				583	} else {
				584	# In sequential mode, the absolute page number is the actual offset page + 1 (to make it 1-based)
				585	current_offset_page + 1
				586	}
				587
				588	# For subsequent calls to fetchNext, we need to calculate the correct page numbers
				589	# based on the current batch being fetched
				590
				591	# For each call to fetchNext, we want to show 1/2, 2/2 (not 3/4, 4/4)
				592	# Simply count from 1 within the current batch
				593
				594	# The relative page number is simply the current position in this batch
				595	if (randomizePageOrder) {
				596	relative_page_number <- page_index # In randomized mode, we start from 1 in each batch
				597	} else {
				598	relative_page_number <- display_page_number - (page_count_start - 1)
				599	}
				600
				601	# How many pages will we fetch in this batch?
Marc Kupietz	021663d	2025-06-18 17:49:22 +0200	[diff] [blame]	602	# If maxFetch is specified, calculate the total pages for this fetch operation
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	603	pages_in_this_batch <- if (!is.na(maxFetch)) {
Marc Kupietz	021663d	2025-06-18 17:49:22 +0200	[diff] [blame]	604	# Account for offset - we can only fetch from the remaining results after offset
				605	remaining_results_after_offset <- max(0, kqo@totalResults - offset)
				606	min(ceiling(maxFetch / maxResultsPerPage), ceiling(remaining_results_after_offset / maxResultsPerPage))
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	607	} else {
				608	# Otherwise fetch all remaining pages
				609	total_pages - page_count_start + 1
				610	}
				611
				612	# The total pages to be shown in this batch
				613	batch_total_pages <- pages_in_this_batch
				614
				615	page_display <- paste0(
				616	"Retrieved page ",
				617	sprintf(paste0("%", nchar(batch_total_pages), "d"), relative_page_number),
				618	"/",
				619	sprintf("%d", batch_total_pages)
				620	)
				621
				622	# If randomized, also show which actual page we fetched
				623	if (randomizePageOrder) {
				624	# Determine the maximum width needed for page numbers (based on total pages)
				625	# This ensures consistent alignment
				626	max_page_width <- nchar(as.character(total_pages))
				627	# Add the actual page number that was fetched (0-based + 1 for display) with proper padding
Marc Kupietz	7638ca4	2025-05-25 13:18:16 +0200	[diff] [blame]	628	page_display <- paste0(
				629	page_display,
				630	sprintf(" (actual page %*d)", max_page_width, current_offset_page + 1)
				631	)
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	632	}
				633	# Always show the absolute page number and total pages (for clarity)
				634	else {
				635	# Show the absolute page number (out of total possible pages)
				636	page_display <- paste0(page_display, sprintf(
				637	" (page %d of %d total)",
				638	actual_display_number, total_pages
				639	))
				640	}
				641
				642	# Add caching or timing information
				643	if (!is.null(res$meta$cached)) {
				644	page_display <- paste0(page_display, " [cached]")
				645	} else {
				646	page_display <- paste0(
				647	page_display,
				648	" in ",
				649	if (!is.na(time_per_page)) sprintf("%4.1f", time_per_page) else "?",
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	650	"s",
				651	eta_info
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	652	)
				653	}
				654
				655	log_info(verbose, paste0(page_display, "\n"))
				656
				657	# Increment the appropriate counter based on mode
				658	if (randomizePageOrder) {
				659	page_index <- page_index + 1
				660	} else {
				661	current_page_number <- current_page_number + 1
				662	}
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	663	results <- results + res$meta$itemsPerPage
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	664	if (nrow(collectedMatches) >= kqo@totalResults \|\| (!is.na(maxFetch) && results >= maxFetch)) {
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	665	break
				666	}
				667	}
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	668	nextStartIndex <- min(res$meta$startIndex + res$meta$itemsPerPage, kqo@totalResults)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	669	KorAPQuery(
				670	nextStartIndex = nextStartIndex,
Marc Kupietz	d0d3e9b	2019-09-24 17:36:03 +0200	[diff] [blame]	671	korapConnection = kqo@korapConnection,
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	672	fields = kqo@fields,
				673	requestUrl = kqo@requestUrl,
				674	request = kqo@request,
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	675	totalResults = kqo@totalResults,
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	676	vc = kqo@vc,
				677	webUIRequestUrl = kqo@webUIRequestUrl,
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	678	hasMoreMatches = (kqo@totalResults > nextStartIndex),
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	679	apiResponse = res,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	680	collectedMatches = collectedMatches
				681	)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	682	})
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	683
				684	#' Fetch all results of a KorAP query.
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	685	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	686	#' `fetchAll` fetches all results of a KorAP query.
Marc Kupietz	a6e4ee6	2021-03-05 09:00:15 +0100	[diff] [blame]	687	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	688	#' @family corpus search functions
Marc Kupietz	dc880ac	2025-06-24 20:34:43 +0200	[diff] [blame]	689	#' @param kqo object obtained from [corpusQuery()]
				690	#' @param verbose print progress information if true
				691	#' @param ... further arguments passed to [fetchNext()]
				692	#' @return The updated `kqo` object with all results in `@collectedMatches`
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	693	#'
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	694	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	695	#' \dontrun{
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	696	#' # Fetch all metadata of every query hit for "Ameisenplage" and show a summary
				697	#' q <- KorAPConnection() \|>
				698	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	699	#' fetchAll()
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	700	#' q@collectedMatches
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	701	#'
				702	#' # Fetch also all KWICs
				703	#' q <- KorAPConnection() \|> auth() \|>
				704	#' corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
				705	#' fetchAll()
				706	#' q@collectedMatches
				707	#'
				708	#' # Retrieve title and text sigle metadata of all texts published on 1958-03-12
				709	#' q <- KorAPConnection() \|>
				710	#' corpusQuery("<base/s=t>", # this matches each text once
				711	#' vc = "pubDate in 1958-03-12",
				712	#' fields = c("textSigle", "title"),
				713	#' ) \|>
				714	#' fetchAll()
				715	#' q@collectedMatches
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	716	#' }
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	717	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	718	#' @aliases fetchAll
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	719	#' @export
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	720	setMethod("fetchAll", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) {
				721	return(fetchNext(kqo, offset = 0, maxFetch = NA, verbose = verbose, ...))
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	722	})
				723
				724	#' Fetches the remaining results of a KorAP query.
				725	#'
Marc Kupietz	dc880ac	2025-06-24 20:34:43 +0200	[diff] [blame]	726	#' @param kqo object obtained from [corpusQuery()]
				727	#' @param verbose print progress information if true
				728	#' @param ... further arguments passed to [fetchNext()]
				729	#' @return The updated `kqo` object with remaining results in `@collectedMatches`
				730	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	731	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	732	#' \dontrun{
				733	#'
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	734	#' q <- KorAPConnection() \|>
				735	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	736	#' fetchRest()
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	737	#' q@collectedMatches
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	738	#' }
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	739	#'
				740	#' @aliases fetchRest
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	741	#' @export
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	742	setMethod("fetchRest", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) {
				743	return(fetchNext(kqo, maxFetch = NA, verbose = verbose, ...))
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	744	})
				745
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	746	#'
				747	#' Parse XML annotations into linguistic layers
				748	#'
				749	#' Internal helper function to extract linguistic annotations (lemma, POS, morphology)
				750	#' from XML annotation snippets returned by the KorAP API.
				751	#'
				752	#' @param xml_snippet XML string containing annotation data
				753	#' @return Named list with vectors for 'token', 'lemma', 'pos', and 'morph'
				754	#' @keywords internal
				755	parse_xml_annotations <- function(xml_snippet) {
				756	if (is.null(xml_snippet) \|\| is.na(xml_snippet) \|\| xml_snippet == "") {
				757	return(list(token = character(0), lemma = character(0), pos = character(0), morph = character(0)))
				758	}
				759
				760	# Extract content within <span class="match">...</span> using a more robust approach
				761	if (grepl('<span class="match">', xml_snippet)) {
				762	# Find the start of match span
				763	start_pos <- regexpr('<span class="match">', xml_snippet)
				764	if (start_pos > 0) {
				765	# Find the end by counting nested spans
				766	content_start <- start_pos + attr(start_pos, "match.length")
				767	remaining <- substr(xml_snippet, content_start, nchar(xml_snippet))
				768
				769	# Simple approach: extract everything until we hit context-right or end
				770	if (grepl('<span class="context-right">', remaining)) {
				771	content_to_parse <- gsub('(.?)<span class="context-right">.', '\\1', remaining)
				772	} else {
				773	# Find the closing </span> that matches our opening span
				774	# For now, use a simpler approach - take everything until the last </span> sequence
				775	content_to_parse <- gsub('(.)</span>\\s$', '\\1', remaining)
				776	}
				777	} else {
				778	content_to_parse <- xml_snippet
				779	}
				780	} else {
				781	content_to_parse <- xml_snippet
				782	}
				783
				784	# Initialize result vectors
				785	tokens <- character(0)
				786	lemmas <- character(0)
				787	pos_tags <- character(0)
				788	morph_tags <- character(0)
				789
				790	# Split the content by </span> and process each meaningful part
				791	parts <- unlist(strsplit(content_to_parse, '</span>'))
				792
				793	for (part in parts) {
				794	part <- trimws(part)
				795	if (nchar(part) == 0) next
				796
				797	# Look for parts that have title attributes and end with text
				798	if (grepl('<span[^>]*title=', part)) {
				799	# Extract the text content (everything after the last >)
				800	text_content <- gsub('.>([^<])$', '\\1', part)
				801	text_content <- trimws(text_content)
				802
				803	if (nchar(text_content) > 0 && !grepl('^<', text_content)) {
				804	tokens <- c(tokens, text_content)
				805
				806	# Extract all title attributes from this part
				807	title_pattern <- 'title="([^"]*)"'
				808	title_matches <- gregexpr(title_pattern, part)
				809
				810	lemma <- NA
				811	pos_tag <- NA
				812	morph_tag <- NA
				813
				814	if (title_matches[[1]][1] != -1) {
				815	all_titles <- regmatches(part, title_matches)[[1]]
				816	for (title_match in all_titles) {
				817	title_content <- gsub(title_pattern, '\\1', title_match)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	818
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	819	# Split by spaces and process each annotation
				820	annotations <- unlist(strsplit(title_content, "\\s+"))
				821	for (annotation in annotations) {
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	822	if (grepl('^[^/]+/l:', annotation)) {
				823	lemma <- gsub('^[^/]+/l:(.*)$', '\\1', annotation)
				824	} else if (grepl('^[^/]+/p:', annotation)) {
				825	pos_tag <- gsub('^[^/]+/p:(.*)$', '\\1', annotation)
				826	} else if (grepl('^[^/]+/m:', annotation)) {
				827	morph_tag <- gsub('^[^/]+/m:(.*)$', '\\1', annotation)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	828	}
				829	}
				830	}
				831	}
				832
				833	lemmas <- c(lemmas, lemma)
				834	pos_tags <- c(pos_tags, pos_tag)
				835	morph_tags <- c(morph_tags, morph_tag)
				836	}
				837	}
				838	}
				839
				840	# If no tokens found with the splitting approach, try a different method
				841	if (length(tokens) == 0) {
				842	# Look for the innermost spans that contain actual text
				843	innermost_pattern <- '<span[^>]title="([^"])"[^>]*>([^<]+)</span>'
				844	innermost_matches <- gregexpr(innermost_pattern, content_to_parse, perl = TRUE)
				845
				846	if (innermost_matches[[1]][1] != -1) {
				847	matches <- regmatches(content_to_parse, innermost_matches)[[1]]
				848
				849	for (match in matches) {
				850	title <- gsub(innermost_pattern, '\\1', match, perl = TRUE)
				851	text <- gsub(innermost_pattern, '\\2', match, perl = TRUE)
				852	text <- trimws(text)
				853
				854	if (nchar(text) > 0) {
				855	tokens <- c(tokens, text)
				856
				857	# Parse space-separated annotations in title
				858	lemma <- NA
				859	pos_tag <- NA
				860	morph_tag <- NA
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	861
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	862	annotations <- unlist(strsplit(title, "\\s+"))
				863	for (annotation in annotations) {
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	864	if (grepl('^[^/]+/l:', annotation)) {
				865	lemma <- gsub('^[^/]+/l:(.*)$', '\\1', annotation)
				866	} else if (grepl('^[^/]+/p:', annotation)) {
				867	pos_tag <- gsub('^[^/]+/p:(.*)$', '\\1', annotation)
				868	} else if (grepl('^[^/]+/m:', annotation)) {
				869	morph_tag <- gsub('^[^/]+/m:(.*)$', '\\1', annotation)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	870	}
				871	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	872
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	873	lemmas <- c(lemmas, lemma)
				874	pos_tags <- c(pos_tags, pos_tag)
				875	morph_tags <- c(morph_tags, morph_tag)
				876	}
				877	}
				878	}
				879	}
				880
				881	# Ensure all vectors have the same length
				882	max_length <- max(length(tokens), length(lemmas), length(pos_tags), length(morph_tags))
				883	if (max_length > 0) {
				884	tokens <- c(tokens, rep(NA, max_length - length(tokens)))
				885	lemmas <- c(lemmas, rep(NA, max_length - length(lemmas)))
				886	pos_tags <- c(pos_tags, rep(NA, max_length - length(pos_tags)))
				887	morph_tags <- c(morph_tags, rep(NA, max_length - length(morph_tags)))
				888	}
				889
				890	return(list(
				891	token = tokens,
				892	lemma = lemmas,
				893	pos = pos_tags,
				894	morph = morph_tags
				895	))
				896	}
				897
				898	#'
				899	#' Parse XML annotations into linguistic layers with left/match/right structure
				900	#'
				901	#' Internal helper function to extract linguistic annotations (lemma, POS, morphology)
				902	#' from XML annotation snippets returned by the KorAP API, split into left context,
				903	#' match, and right context sections like the tokens field.
				904	#'
				905	#' @param xml_snippet XML string containing annotation data
				906	#' @return Named list with nested structure containing left/match/right for 'atokens', 'lemma', 'pos', and 'morph'
				907	#' @keywords internal
				908	parse_xml_annotations_structured <- function(xml_snippet) {
				909	if (is.null(xml_snippet) \|\| is.na(xml_snippet) \|\| xml_snippet == "") {
				910	empty_result <- list(left = character(0), match = character(0), right = character(0))
				911	return(list(
				912	atokens = empty_result,
				913	lemma = empty_result,
				914	pos = empty_result,
				915	morph = empty_result
				916	))
				917	}
				918
				919	# Helper function to extract annotations from a span section
				920	extract_annotations_from_section <- function(section_content) {
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	921	# Remove any <mark>...</mark> tags that may interrupt token boundaries
				922	section_no_marks <- gsub('</?mark[^>]*>', '', section_content, perl = TRUE)
				923	# Normalize separators between adjacent top-level spans so splitting is robust.
				924	# Replace any punctuation/entity/space run between one-or-more closing spans and the next opening span
				925	# with a single space, preserving all closing spans.
				926	section_norm <- gsub('((?:</span>)+)[[:space:]](?:&[^;]+;\|[[:punct:]]\|[[:space:]])[[:space:]]*(<span)', '\\1 \\2', section_no_marks, perl = TRUE)
				927	# Handle both spaced tokens and nested single tokens by scanning innermost spans with direct text
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	928	tokens <- character(0)
				929	lemmas <- character(0)
				930	pos_tags <- character(0)
				931	morph_tags <- character(0)
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	932
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	933	pat_token <- '<span[^>]title="([^"])"[^>]*>([^<]+)</span>'
				934	mm <- gregexpr(pat_token, section_norm, perl = TRUE)
				935	if (mm[[1]][1] != -1) {
				936	starts <- mm[[1]]
				937	lens <- attr(mm[[1]], 'match.length')
				938	for (k in seq_along(starts)) {
				939	s <- starts[k]
				940	e <- s + lens[k] - 1
				941	fragment <- substr(section_norm, s, e)
				942	text_content <- sub(pat_token, '\\2', fragment, perl = TRUE)
				943	text_content <- trimws(text_content)
				944	title_content <- sub(pat_token, '\\1', fragment, perl = TRUE)
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	945
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	946	if (nchar(text_content) == 0) next
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	947
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	948	lemma <- NA
				949	pos_tag <- NA
				950	morph_features <- character(0)
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	951
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	952	# parse inner title
				953	ann <- unlist(strsplit(title_content, "[[:space:]]+"))
				954	for (a in ann) {
				955	if (grepl('/l:', a)) {
				956	lemma <- sub('.?/l:(.)$', '\\1', a, perl = TRUE)
				957	} else if (grepl('/p:', a)) {
				958	pos_tag <- sub('.?/p:(.)$', '\\1', a, perl = TRUE)
				959	} else if (grepl('/m:', a)) {
				960	morph_features <- c(morph_features, sub('.?/m:(.)$', '\\1', a, perl = TRUE))
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	961	}
				962	}
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	963
				964	# If lemma missing, look back in nearby context for the nearest title containing l:
				965	if (is.na(lemma) \|\| nchar(lemma) == 0) {
				966	ctx_start <- max(1, s - 500)
				967	context <- substr(section_norm, ctx_start, s - 1)
				968	tmm <- gregexpr('title="([^"]*)"', context, perl = TRUE)
				969	if (tmm[[1]][1] != -1) {
				970	ctx_titles <- regmatches(context, tmm)[[1]]
				971	for (ti in rev(ctx_titles)) {
				972	cont <- sub('title="([^"]*)"', '\\1', ti, perl = TRUE)
				973	if (grepl('/l:', cont)) {
				974	lemma <- sub('.?/l:([^ ]+).', '\\1', cont, perl = TRUE)
				975	break
				976	}
				977	}
				978	}
				979	}
				980
				981	# If POS missing, keep NA; morphological features may also appear in outer titles
				982	if (length(morph_features) == 0) {
				983	ctx_start <- max(1, s - 500)
				984	context <- substr(section_norm, ctx_start, s - 1)
				985	tmm <- gregexpr('title="([^"]*)"', context, perl = TRUE)
				986	if (tmm[[1]][1] != -1) {
				987	ctx_titles <- regmatches(context, tmm)[[1]]
				988	for (ti in rev(ctx_titles)) {
				989	cont <- sub('title="([^"]*)"', '\\1', ti, perl = TRUE)
				990	if (grepl('/m:', cont)) {
				991	mparts <- unlist(strsplit(cont, "[[:space:]]+"))
				992	for (mp in mparts) if (grepl('/m:', mp)) morph_features <- c(morph_features, sub('.?/m:(.)$', '\\1', mp, perl = TRUE))
				993	break
				994	}
				995	}
				996	}
				997	}
				998
				999	tokens <- c(tokens, text_content)
				1000	lemmas <- c(lemmas, if (!is.null(lemma)) lemma else NA)
				1001	pos_tags <- c(pos_tags, if (!is.null(pos_tag)) pos_tag else NA)
				1002	morph_tags <- c(morph_tags, if (length(morph_features) > 0) paste(morph_features, collapse = "\|") else NA)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1003	}
				1004	}
				1005
				1006	# Ensure all vectors have the same length
				1007	max_length <- max(length(tokens), length(lemmas), length(pos_tags), length(morph_tags))
				1008	if (max_length > 0) {
				1009	tokens <- c(tokens, rep(NA, max_length - length(tokens)))
				1010	lemmas <- c(lemmas, rep(NA, max_length - length(lemmas)))
				1011	pos_tags <- c(pos_tags, rep(NA, max_length - length(pos_tags)))
				1012	morph_tags <- c(morph_tags, rep(NA, max_length - length(morph_tags)))
				1013	}
				1014
				1015	return(list(
				1016	tokens = tokens,
				1017	lemmas = lemmas,
				1018	pos_tags = pos_tags,
				1019	morph_tags = morph_tags
				1020	))
				1021	}
				1022
				1023	# Split the XML into three parts: left context, match content, and right context
				1024	# The structure is: <span class="match">...left...<mark>...match...</mark>...right...</span>
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1025
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1026	# First extract the content within the match span using DOTALL modifier
				1027	match_span_pattern <- '(?s)<span class="match">(.?)</span>\\s<span class="context-right">'
				1028	match_span_match <- regexpr(match_span_pattern, xml_snippet, perl = TRUE)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1029
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1030	if (match_span_match == -1) {
				1031	# Try alternative pattern if no context-right
				1032	match_span_pattern <- '(?s)<span class="match">(.?)</span>\\s$'
				1033	match_span_match <- regexpr(match_span_pattern, xml_snippet, perl = TRUE)
				1034	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1035
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1036	if (match_span_match > 0) {
				1037	match_span_content <- gsub(match_span_pattern, '\\1', xml_snippet, perl = TRUE)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1038
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1039	# Now find the <mark> and </mark> positions within this content
				1040	mark_start <- regexpr('<mark[^>]*>', match_span_content, perl = TRUE)
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1041	# Use the LAST closing </mark> to cover multi-part matches
				1042	mark_end_gre <- gregexpr('</mark>', match_span_content, perl = TRUE)
				1043	mark_end_positions <- mark_end_gre[[1]]
				1044	mark_end <- if (!is.null(mark_end_positions) && length(mark_end_positions) > 0 && mark_end_positions[1] != -1)
				1045	mark_end_positions[length(mark_end_positions)] else -1
				1046	mark_end_len <- if (mark_end != -1) attr(mark_end_gre[[1]], "match.length")[length(mark_end_positions)] else 0
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1047
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1048	if (mark_start > 0 && mark_end > 0) {
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1049	# Left context: everything before first <mark>
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1050	left_content <- substr(match_span_content, 1, mark_start - 1)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1051
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1052	# Match content: everything between first <mark> and last </mark>
				1053	match_content <- substr(match_span_content, mark_start, mark_end + mark_end_len - 1)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1054
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1055	# Right context: everything after last </mark>
				1056	right_content_start <- mark_end + mark_end_len
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1057	right_content <- substr(match_span_content, right_content_start, nchar(match_span_content))
				1058	} else {
				1059	# No mark tags found, treat entire match span as match content
				1060	left_content <- ""
				1061	match_content <- match_span_content
				1062	right_content <- ""
				1063	}
				1064	} else {
				1065	# No match span found, treat entire content as match
				1066	left_content <- ""
				1067	match_content <- xml_snippet
				1068	right_content <- ""
				1069	}
				1070
				1071	# Process each section
				1072	left_annotations <- extract_annotations_from_section(left_content)
				1073	match_annotations <- extract_annotations_from_section(match_content)
				1074	right_annotations <- extract_annotations_from_section(right_content)
				1075
				1076	return(list(
				1077	atokens = list(
				1078	left = left_annotations$tokens,
				1079	match = match_annotations$tokens,
				1080	right = right_annotations$tokens
				1081	),
				1082	lemma = list(
				1083	left = left_annotations$lemmas,
				1084	match = match_annotations$lemmas,
				1085	right = right_annotations$lemmas
				1086	),
				1087	pos = list(
				1088	left = left_annotations$pos_tags,
				1089	match = match_annotations$pos_tags,
				1090	right = right_annotations$pos_tags
				1091	),
				1092	morph = list(
				1093	left = left_annotations$morph_tags,
				1094	match = match_annotations$morph_tags,
				1095	right = right_annotations$morph_tags
				1096	)
				1097	))
				1098	}
				1099
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1100	#' Fetch annotations for all collected matches
				1101	#'
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1102	#' `r lifecycle::badge("experimental")`
				1103	#'
				1104	#' `fetchAnnotations` fetches annotations (only token annotations, for now)
				1105	#' for all matches in the `@collectedMatches` slot
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1106	#' of a KorAPQuery object and adds annotation columns directly to the `@collectedMatches`
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1107	#' data frame. The method uses the `matchID` from collected matches.
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1108	#'
				1109	#' Important: For copyright-restricted corpora, users must be authorized via [auth()]
				1110	#' and the initial corpus query must have `metadataOnly = FALSE` to ensure snippets are
				1111	#' available for annotation parsing.
				1112	#'
				1113	#' The method parses XML snippet annotations and adds linguistic columns to the data frame:
				1114	#' - `pos`: data frame with `left`, `match`, `right` columns, each containing list vectors of part-of-speech tags
				1115	#' - `lemma`: data frame with `left`, `match`, `right` columns, each containing list vectors of lemmas
				1116	#' - `morph`: data frame with `left`, `match`, `right` columns, each containing list vectors of morphological tags
				1117	#' - `atokens`: data frame with `left`, `match`, `right` columns, each containing list vectors of token text (from annotations)
				1118	#' - `annotation_snippet`: original XML snippet from the annotation API
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1119	#'
				1120	#' @family corpus search functions
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1121	#' @concept Annotations
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1122	#' @aliases fetchAnnotations
				1123	#'
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1124	#' @param kqo object obtained from [corpusQuery()] with collected matches. Note: the original corpus query should have `metadataOnly = FALSE` for annotation parsing to work.
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1125	#' @param foundry string specifying the foundry to use for annotations (default: "tt" for Tree-Tagger)
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1126	#' @param overwrite logical; if TRUE, re-fetch and replace any existing
				1127	#' annotation columns. If FALSE (default), only add missing annotation layers
				1128	#' and preserve already fetched ones (e.g., keep POS/lemma from a previous
				1129	#' foundry while adding morph from another).
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1130	#' @param verbose print progress information if true
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1131	#' @return The updated `kqo` object with annotation columns
				1132	#' like `pos`, `lemma`, `morph` (and `atokens` and `annotation_snippet`)
				1133	#' in the `@collectedMatches` slot. Each column is a data frame
				1134	#' with `left`, `match`, and `right` columns containing list vectors of annotations
				1135	#' for the left context, matched tokens, and right context, respectively.
				1136	#' The original XML snippet for each match is also stored in `annotation_snippet`.
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1137	#'
				1138	#' @examples
				1139	#' \dontrun{
				1140	#'
				1141	#' # Fetch annotations for matches using Tree-Tagger foundry
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1142	#' # Note: Authorization required for copyright-restricted corpora
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1143	#' q <- KorAPConnection() \|>
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1144	#' auth() \|>
				1145	#' corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1146	#' fetchNext(maxFetch = 10) \|>
				1147	#' fetchAnnotations()
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1148	#'
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1149	#' # Access linguistic annotations for match i:
				1150	#' pos_tags <- q@collectedMatches$pos # Data frame with left/match/right columns for POS tags
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1151	#' lemmas <- q@collectedMatches$lemma # Data frame with left/match/right columns for lemmas
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1152	#' morphology <- q@collectedMatches$morph # Data frame with left/match/right columns for morphological tags
				1153	#' atokens <- q@collectedMatches$atokens # Data frame with left/match/right columns for annotation token text
				1154	#' raw_snippet <- q@collectedMatches$annotation_snippet[[i]] # Original XML snippet for match i
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1155	#'
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1156	#' # Access specific components:
				1157	#' match_pos <- q@collectedMatches$pos$match[[i]] # POS tags for the matched tokens in match i
				1158	#' left_lemmas <- q@collectedMatches$lemma$left[[i]] # Lemmas for the left context in match i
				1159	#' right_tokens <- q@collectedMatches$atokens$right[[i]] # Token text for the right context in match i
				1160	#'
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1161	#' # Use a different foundry (e.g., MarMoT)
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1162	#' q <- KorAPConnection() \|>
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1163	#' auth() \|>
				1164	#' corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1165	#' fetchNext(maxFetch = 10) \|>
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1166	#' fetchAnnotations(foundry = "marmot")
				1167	#' q@collectedMatches$pos$left[1] # POS tags for the left context of the first match
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1168	#' }
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1169	#' @export
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1170	setMethod("fetchAnnotations", "KorAPQuery", function(kqo, foundry = "tt", overwrite = FALSE, verbose = kqo@korapConnection@verbose) {
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1171	if (is.null(kqo@collectedMatches) \|\| nrow(kqo@collectedMatches) == 0) {
				1172	warning("No collected matches found. Please run fetchNext() or fetchAll() first.")
				1173	return(kqo)
				1174	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1175
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1176	df <- kqo@collectedMatches
				1177	kco <- kqo@korapConnection
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1178
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1179	# Initialize annotation columns as data frames (like tokens field)
				1180	# Create the structure more explicitly to avoid assignment issues
				1181	nrows <- nrow(df)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1182
Marc Kupietz	03d2b1a	2025-07-19 09:14:45 +0200	[diff] [blame]	1183	# Pre-compute the empty character vector list to avoid repeated computation
				1184	empty_char_list <- I(replicate(nrows, character(0), simplify = FALSE))
				1185
				1186	# Helper function to create annotation data frame structure
				1187	create_annotation_df <- function(empty_list) {
				1188	data.frame(
				1189	left = empty_list,
				1190	match = empty_list,
				1191	right = empty_list,
				1192	stringsAsFactors = FALSE
				1193	)
				1194	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1195
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1196	# Track which annotation columns already existed to decide overwrite behavior
				1197	existing_types <- list(
				1198	pos = "pos" %in% colnames(df),
				1199	lemma = "lemma" %in% colnames(df),
				1200	morph = "morph" %in% colnames(df),
				1201	atokens = "atokens" %in% colnames(df),
				1202	annotation_snippet = "annotation_snippet" %in% colnames(df)
				1203	)
				1204
				1205	# Initialize annotation columns using the helper function
Marc Kupietz	03d2b1a	2025-07-19 09:14:45 +0200	[diff] [blame]	1206	annotation_types <- c("pos", "lemma", "morph", "atokens")
				1207	for (type in annotation_types) {
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1208	if (overwrite \|\| !existing_types[[type]]) {
				1209	df[[type]] <- create_annotation_df(empty_char_list)
				1210	}
Marc Kupietz	03d2b1a	2025-07-19 09:14:45 +0200	[diff] [blame]	1211	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1212
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1213	if (overwrite \|\| !existing_types$annotation_snippet) {
				1214	df$annotation_snippet <- replicate(nrows, NA, simplify = FALSE)
				1215	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1216
Marc Kupietz	e8c0fef	2025-07-18 19:59:04 +0200	[diff] [blame]	1217	# Initialize timing for ETA calculation
				1218	start_time <- Sys.time()
				1219	if (verbose) {
				1220	log_info(verbose, paste("Starting to fetch annotations for", nrows, "matches\n"))
				1221	}
				1222
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1223	# Helper to decide if existing annotation row is effectively empty
				1224	is_empty_annotation_row <- function(ann_df, row_index) {
				1225	if (is.null(ann_df) \|\| nrow(ann_df) < row_index) return(TRUE)
				1226	left_val <- ann_df$left[[row_index]]
				1227	match_val <- ann_df$match[[row_index]]
				1228	right_val <- ann_df$right[[row_index]]
				1229	all(
				1230	(is.null(left_val) \|\| (length(left_val) == 0) \|\| all(is.na(left_val))),
				1231	(is.null(match_val) \|\| (length(match_val) == 0) \|\| all(is.na(match_val))),
				1232	(is.null(right_val) \|\| (length(right_val) == 0) \|\| all(is.na(right_val)))
				1233	)
				1234	}
				1235
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1236	for (i in seq_len(nrow(df))) {
Marc Kupietz	e8c0fef	2025-07-18 19:59:04 +0200	[diff] [blame]	1237	# ETA logging
				1238	if (verbose && i > 1) {
				1239	eta_info <- calculate_eta(i, nrows, start_time)
				1240	log_info(verbose, paste("Fetching annotations for match", i, "of", nrows, eta_info, "\n"))
				1241	}
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	1242	# Use matchID if available, otherwise fall back to constructing from matchStart/matchEnd
				1243	if ("matchID" %in% colnames(df) && !is.na(df$matchID[i])) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1244	# matchID format: "match-match-A00/JUN/39609-p202-203" or encrypted format like
				1245	# "match-DNB10/CSL/80400-p2343-2344x_MinDOhu_P6dd2MMZJyyus_7MairdKnr1LxY07Cya-Ow"
				1246	# Extract document path and position, handling both regular and encrypted formats
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1247
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1248	# More flexible regex to extract the document path with position and encryption
				1249	# Look for pattern: match-(...)-p(\d+)-(\d+)(.) where (.) is the encrypted part
				1250	# We need to capture the entire path including the encrypted suffix
				1251	match_result <- regexpr("match-(.+?-p\\d+-\\d+.*)", df$matchID[i], perl = TRUE)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1252
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1253	if (match_result > 0) {
				1254	# Extract the complete path including encryption (everything after "match-")
				1255	doc_path_with_pos_and_encryption <- gsub("^match-(.+)$", "\\1", df$matchID[i], perl = TRUE)
				1256	# Convert the dash before position to slash, but keep everything after the position
				1257	match_path <- gsub("-p(\\d+-\\d+.*)", "/p\\1", doc_path_with_pos_and_encryption)
Marc Kupietz	2512130	2025-07-19 08:45:43 +0200	[diff] [blame]	1258	# Use httr2 to construct URL safely
				1259	base_url <- paste0(kco@apiUrl, "corpus/", match_path)
				1260	req <- httr2::url_modify(base_url, query = list(foundry = foundry))
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1261	} else {
Marc Kupietz	2512130	2025-07-19 08:45:43 +0200	[diff] [blame]	1262	# If regex fails, fall back to the old method with httr2
				1263	# Format numbers to avoid scientific notation
				1264	match_start <- format(df$matchStart[i], scientific = FALSE)
				1265	match_end <- format(df$matchEnd[i], scientific = FALSE)
				1266	base_url <- paste0(kco@apiUrl, "corpus/", df$textSigle[i], "/", "p", match_start, "-", match_end)
				1267	req <- httr2::url_modify(base_url, query = list(foundry = foundry))
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	1268	}
				1269	} else {
Marc Kupietz	2512130	2025-07-19 08:45:43 +0200	[diff] [blame]	1270	# Fallback to the old method with httr2
				1271	# Format numbers to avoid scientific notation
				1272	match_start <- format(df$matchStart[i], scientific = FALSE)
				1273	match_end <- format(df$matchEnd[i], scientific = FALSE)
				1274	base_url <- paste0(kco@apiUrl, "corpus/", df$textSigle[i], "/", "p", match_start, "-", match_end)
				1275	req <- httr2::url_modify(base_url, query = list(foundry = foundry))
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	1276	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1277
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1278	tryCatch({
				1279	res <- apiCall(kco, req)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1280
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1281	if (!is.null(res)) {
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1282	# Store the raw annotation snippet (respect overwrite flag)
				1283	if (overwrite \|\| !existing_types$annotation_snippet \|\| is.null(df$annotation_snippet[[i]]) \|\| is.na(df$annotation_snippet[[i]])) {
				1284	df$annotation_snippet[[i]] <- if (is.list(res) && "snippet" %in% names(res)) res$snippet else NA
				1285	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1286
				1287	# Parse XML annotations if snippet is available
				1288	if (is.list(res) && "snippet" %in% names(res)) {
				1289	parsed_annotations <- parse_xml_annotations_structured(res$snippet)
				1290
				1291	# Store the parsed linguistic data in data frame format (like tokens)
				1292	# Use individual assignment to avoid data frame mismatch errors
				1293	tryCatch({
				1294	# Assign POS annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1295	if (overwrite \|\| !existing_types$pos \|\| is_empty_annotation_row(df$pos, i)) {
				1296	df$pos$left[i] <- list(parsed_annotations$pos$left)
				1297	df$pos$match[i] <- list(parsed_annotations$pos$match)
				1298	df$pos$right[i] <- list(parsed_annotations$pos$right)
				1299	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1300
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1301	# Assign lemma annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1302	if (overwrite \|\| !existing_types$lemma \|\| is_empty_annotation_row(df$lemma, i)) {
				1303	df$lemma$left[i] <- list(parsed_annotations$lemma$left)
				1304	df$lemma$match[i] <- list(parsed_annotations$lemma$match)
				1305	df$lemma$right[i] <- list(parsed_annotations$lemma$right)
				1306	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1307
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1308	# Assign morphology annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1309	if (overwrite \|\| !existing_types$morph \|\| is_empty_annotation_row(df$morph, i)) {
				1310	df$morph$left[i] <- list(parsed_annotations$morph$left)
				1311	df$morph$match[i] <- list(parsed_annotations$morph$match)
				1312	df$morph$right[i] <- list(parsed_annotations$morph$right)
				1313	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1314
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1315	# Assign token annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1316	if (overwrite \|\| !existing_types$atokens \|\| is_empty_annotation_row(df$atokens, i)) {
				1317	df$atokens$left[i] <- list(parsed_annotations$atokens$left)
				1318	df$atokens$match[i] <- list(parsed_annotations$atokens$match)
				1319	df$atokens$right[i] <- list(parsed_annotations$atokens$right)
				1320	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1321	}, error = function(assign_error) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1322	# Set empty character vectors on assignment error using list assignment
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1323	if (overwrite \|\| !existing_types$pos) {
				1324	df$pos$left[i] <<- list(character(0))
				1325	df$pos$match[i] <<- list(character(0))
				1326	df$pos$right[i] <<- list(character(0))
				1327	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1328
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1329	if (overwrite \|\| !existing_types$lemma) {
				1330	df$lemma$left[i] <<- list(character(0))
				1331	df$lemma$match[i] <<- list(character(0))
				1332	df$lemma$right[i] <<- list(character(0))
				1333	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1334
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1335	if (overwrite \|\| !existing_types$morph) {
				1336	df$morph$left[i] <<- list(character(0))
				1337	df$morph$match[i] <<- list(character(0))
				1338	df$morph$right[i] <<- list(character(0))
				1339	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1340
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1341	if (overwrite \|\| !existing_types$atokens) {
				1342	df$atokens$left[i] <<- list(character(0))
				1343	df$atokens$match[i] <<- list(character(0))
				1344	df$atokens$right[i] <<- list(character(0))
				1345	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1346	})
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1347	} else {
				1348	# No snippet available, store empty vectors
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1349	if (overwrite \|\| !existing_types$pos) {
				1350	df$pos$left[i] <- list(character(0))
				1351	df$pos$match[i] <- list(character(0))
				1352	df$pos$right[i] <- list(character(0))
				1353	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1354
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1355	if (overwrite \|\| !existing_types$lemma) {
				1356	df$lemma$left[i] <- list(character(0))
				1357	df$lemma$match[i] <- list(character(0))
				1358	df$lemma$right[i] <- list(character(0))
				1359	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1360
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1361	if (overwrite \|\| !existing_types$morph) {
				1362	df$morph$left[i] <- list(character(0))
				1363	df$morph$match[i] <- list(character(0))
				1364	df$morph$right[i] <- list(character(0))
				1365	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1366
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1367	if (overwrite \|\| !existing_types$atokens) {
				1368	df$atokens$left[i] <- list(character(0))
				1369	df$atokens$match[i] <- list(character(0))
				1370	df$atokens$right[i] <- list(character(0))
				1371	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1372	}
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1373	} else {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1374	# Store NAs for failed requests
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1375	if (overwrite \|\| !existing_types$pos) {
				1376	df$pos$left[i] <- list(NA)
				1377	df$pos$match[i] <- list(NA)
				1378	df$pos$right[i] <- list(NA)
				1379	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1380
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1381	if (overwrite \|\| !existing_types$lemma) {
				1382	df$lemma$left[i] <- list(NA)
				1383	df$lemma$match[i] <- list(NA)
				1384	df$lemma$right[i] <- list(NA)
				1385	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1386
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1387	if (overwrite \|\| !existing_types$morph) {
				1388	df$morph$left[i] <- list(NA)
				1389	df$morph$match[i] <- list(NA)
				1390	df$morph$right[i] <- list(NA)
				1391	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1392
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1393	if (overwrite \|\| !existing_types$atokens) {
				1394	df$atokens$left[i] <- list(NA)
				1395	df$atokens$match[i] <- list(NA)
				1396	df$atokens$right[i] <- list(NA)
				1397	}
				1398	if (overwrite \|\| !existing_types$annotation_snippet) {
				1399	df$annotation_snippet[[i]] <- NA
				1400	}
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1401	}
				1402	}, error = function(e) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1403	# Store NAs for failed requests
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1404	if (overwrite \|\| !existing_types$pos) {
				1405	df$pos$left[i] <- list(NA)
				1406	df$pos$match[i] <- list(NA)
				1407	df$pos$right[i] <- list(NA)
				1408	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1409
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1410	if (overwrite \|\| !existing_types$lemma) {
				1411	df$lemma$left[i] <- list(NA)
				1412	df$lemma$match[i] <- list(NA)
				1413	df$lemma$right[i] <- list(NA)
				1414	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1415
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1416	if (overwrite \|\| !existing_types$morph) {
				1417	df$morph$left[i] <- list(NA)
				1418	df$morph$match[i] <- list(NA)
				1419	df$morph$right[i] <- list(NA)
				1420	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1421
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	1422	if (overwrite \|\| !existing_types$atokens) {
				1423	df$atokens$left[i] <- list(NA)
				1424	df$atokens$match[i] <- list(NA)
				1425	df$atokens$right[i] <- list(NA)
				1426	}
				1427	if (overwrite \|\| !existing_types$annotation_snippet) {
				1428	df$annotation_snippet[[i]] <- NA
				1429	}
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1430	})
				1431	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1432
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1433	# Validate data frame structure before assignment
				1434	if (nrow(df) != nrow(kqo@collectedMatches)) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1435	}
				1436
				1437	# Update the collectedMatches with annotation data
				1438	tryCatch({
				1439	kqo@collectedMatches <- df
				1440	}, error = function(assign_error) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1441	# Try a safer approach: add columns individually
				1442	tryCatch({
				1443	kqo@collectedMatches$pos <- df$pos
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1444	kqo@collectedMatches$lemma <- df$lemma
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1445	kqo@collectedMatches$morph <- df$morph
				1446	kqo@collectedMatches$atokens <- df$atokens
				1447	kqo@collectedMatches$annotation_snippet <- df$annotation_snippet
				1448	}, error = function(col_error) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1449	warning("Failed to add annotation data to collectedMatches")
				1450	})
				1451	})
				1452
Marc Kupietz	e8c0fef	2025-07-18 19:59:04 +0200	[diff] [blame]	1453	if (verbose) {
				1454	elapsed_time <- Sys.time() - start_time
				1455	log_info(verbose, paste("Finished fetching annotations for", nrows, "matches in", format_duration(as.numeric(elapsed_time, units = "secs")), "\n"))
				1456	}
				1457
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1458	return(kqo)
				1459	})
				1460
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1461	#' Query frequencies of search expressions in virtual corpora
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1462	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	1463	#' `frequencyQuery` combines [corpusQuery()], [corpusStats()] and
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1464	#' [ci()] to compute a tibble with the absolute and relative frequencies and
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1465	#' confidence intervals of one ore multiple search terms across one or multiple
				1466	#' virtual corpora.
				1467	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	1468	#' @family frequency analysis
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1469	#' @aliases frequencyQuery
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1470	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	1471	#' \dontrun{
				1472	#'
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1473	#' KorAPConnection(verbose = TRUE) \|>
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1474	#' frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	1475	#' }
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1476	#'
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1477	# @inheritParams corpusQuery
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	1478	#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1479	#' @param query corpus query string(s.) (can be a vector). The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`.
				1480	#' @param vc virtual corpus definition(s) (can be a vector)
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	1481	#' @param conf.level confidence level of the returned confidence interval (passed through [ci()] to [prop.test()]).
				1482	#' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If `as.alternatives` is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1483	#' @param ... further arguments passed to or from other methods (see [corpusQuery()]), most notably `expand`, a logical that decides if `query` and `vc` parameters are expanded to all of their combinations. It defaults to `TRUE`, if `query` and `vc` have different lengths, and to `FALSE` otherwise.
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1484	#' @export
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1485	#'
				1486	#' @return A tibble, with each row containing the following result columns for query and vc combinations:
				1487	#' - query: the query string used for the frequency analysis.
				1488	#' - totalResults: absolute frequency of query matches in the vc.
				1489	#' - vc: virtual corpus used for the query.
				1490	#' - webUIRequestUrl: URL of the corresponding web UI request with respect to query and vc.
				1491	#' - total: total number of words in vc.
				1492	#' - f: relative frequency of query matches in the vc.
				1493	#' - conf.low: lower bound of the confidence interval for the relative frequency, given `conf.level`.
				1494	#' - conf.high: upper bound of the confidence interval for the relative frequency, given `conf.level`.
				1495
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1496	setMethod(
				1497	"frequencyQuery", "KorAPConnection",
Marc Kupietz	71d6e05	2019-11-22 18:42:10 +0100	[diff] [blame]	1498	function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1499	(if (as.alternatives) {
				1500	corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) \|>
Marc Kupietz	ea34b81	2025-06-25 15:49:00 +0200	[diff] [blame]	1501	group_by(vc) \|>
Marc Kupietz	71d6e05	2019-11-22 18:42:10 +0100	[diff] [blame]	1502	mutate(total = sum(totalResults))
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1503	} else {
				1504	corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) \|>
				1505	mutate(total = corpusStats(kco, vc = vc, as.df = TRUE)$tokens)
Marc Kupietz	ea34b81	2025-06-25 15:49:00 +0200	[diff] [blame]	1506	}) \|>
Marc Kupietz	0c29cea	2019-10-09 08:44:36 +0200	[diff] [blame]	1507	ci(conf.level = conf.level)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1508	}
				1509	)
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1510
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1511	#' buildWebUIRequestUrlFromString
				1512	#'
				1513	#' @rdname KorAPQuery-class
				1514	#' @importFrom urltools url_encode
				1515	#' @export
				1516	buildWebUIRequestUrlFromString <- function(KorAPUrl,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1517	query,
				1518	vc = "",
				1519	ql = "poliqarp") {
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1520	if ("KorAPConnection" %in% class(KorAPUrl)) {
				1521	KorAPUrl <- KorAPUrl@KorAPUrl
				1522	}
				1523
				1524	request <-
				1525	paste0(
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1526	"?q=",
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1527	urltools::url_encode(enc2utf8(as.character(query))),
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1528	ifelse(vc != "",
				1529	paste0("&cq=", urltools::url_encode(enc2utf8(vc))),
				1530	""
				1531	),
				1532	"&ql=",
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1533	ql
				1534	)
				1535	paste0(KorAPUrl, request)
				1536	}
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1537
				1538	#' buildWebUIRequestUrl
				1539	#'
				1540	#' @rdname KorAPQuery-class
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	1541	#' @importFrom httr2 url_parse
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1542	#' @export
				1543	buildWebUIRequestUrl <- function(kco,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1544	query = if (missing(KorAPUrl)) {
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1545	stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1546	} else {
				1547	httr2::url_parse(KorAPUrl)$query$q
				1548	},
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	1549	vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq,
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1550	KorAPUrl,
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	1551	ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql) {
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1552	buildWebUIRequestUrlFromString(kco@KorAPUrl, query, vc, ql)
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1553	}
				1554
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1555	#' format()
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1556	#' @rdname KorAPQuery-class
				1557	#' @param x KorAPQuery object
				1558	#' @param ... further arguments passed to or from other methods
Marc Kupietz	b73ca0f	2025-01-28 20:45:01 +0100	[diff] [blame]	1559	#' @importFrom urltools param_get url_decode
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1560	#' @export
				1561	format.KorAPQuery <- function(x, ...) {
				1562	cat("<KorAPQuery>\n")
				1563	q <- x
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1564	param <- urltools::param_get(q@request) \|> lapply(urltools::url_decode)
Marc Kupietz	b73ca0f	2025-01-28 20:45:01 +0100	[diff] [blame]	1565	cat(" Query: ", param$q, "\n")
				1566	if (!is.null(param$cq) && param$cq != "") {
				1567	cat(" Virtual corpus: ", param$cq, "\n")
				1568	}
				1569	if (!is.null(q@collectedMatches)) {
				1570	cat("==============================================================================================================", "\n")
				1571	print(summary(q@collectedMatches))
				1572	cat("==============================================================================================================", "\n")
				1573	}
				1574	cat(" Total results: ", q@totalResults, "\n")
				1575	cat(" Fetched results: ", q@nextStartIndex, "\n")
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1576	if (!is.null(q@collectedMatches) && "pos" %in% colnames(q@collectedMatches)) {
				1577	successful_annotations <- sum(!is.na(q@collectedMatches$annotation_snippet))
				1578	parsed_annotations <- sum(!is.na(q@collectedMatches$pos))
				1579	cat(" Annotations: ", successful_annotations, " of ", nrow(q@collectedMatches), " matches")
				1580	if (parsed_annotations > 0) {
				1581	cat(" (", parsed_annotations, " with parsed linguistic data)")
				1582	}
				1583	cat("\n")
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1584	}
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	1585	}
				1586
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1587	#' show()
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	1588	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1589	#' @rdname KorAPQuery-class
				1590	#' @param object KorAPQuery object
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	1591	#' @export
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1592	setMethod("show", "KorAPQuery", function(object) {
				1593	format(object)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1594	invisible(object)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1595	})