Blame - R/KorAPQuery.R - KorAP/RKorAPClient

blob: 7e4d9cdbfa954b02fbecba7a2bd830126b7e39b9 [file] [log] [blame]

Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	1	#' KorAPQuery class (internal)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	2	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	3	#' Internal class for query state management. Users work with `corpusQuery()`, `fetchAll()`, and `fetchNext()` instead.
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	4	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	5	#' @keywords internal
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	6	#' @include KorAPConnection.R
Marc Kupietz	6dfeed9	2025-06-03 11:58:06 +0200	[diff] [blame]	7	#' @include logging.R
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	8	#' @import httr2
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	9	#'
Marc Kupietz	a6e4ee6	2021-03-05 09:00:15 +0100	[diff] [blame]	10	#' @include RKorAPClient-package.R
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	11
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	12	#' @export
				13	KorAPQuery <- setClass("KorAPQuery", slots = c(
Marc Kupietz	b897218	2019-09-20 21:33:46 +0200	[diff] [blame]	14	"korapConnection",
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	15	"request",
				16	"vc",
				17	"totalResults",
				18	"nextStartIndex",
				19	"fields",
				20	"requestUrl",
				21	"webUIRequestUrl",
				22	"apiResponse",
				23	"collectedMatches",
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	24	"hasMoreMatches"
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	25	))
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	26
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	27	#' Initialize KorAPQuery object
				28	#' @keywords internal
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	29	#' @param .Object …
Marc Kupietz	b897218	2019-09-20 21:33:46 +0200	[diff] [blame]	30	#' @param korapConnection KorAPConnection object
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	31	#' @param request query part of the request URL
				32	#' @param vc definition of a virtual corpus
				33	#' @param totalResults number of hits the query has yielded
				34	#' @param nextStartIndex at what index to start the next fetch of query results
				35	#' @param fields what data / metadata fields should be collected
				36	#' @param requestUrl complete URL of the API request
				37	#' @param webUIRequestUrl URL of a web frontend request corresponding to the API request
				38	#' @param apiResponse data-frame representation of the JSON response of the API request
Marc Kupietz	7776dec	2019-09-27 16:59:02 +0200	[diff] [blame]	39	#' @param hasMoreMatches logical that signals if more query results can be fetched
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	40	#' @param collectedMatches matches already fetched from the KorAP-API-server
Marc Kupietz	97a1bca	2019-10-04 22:52:09 +0200	[diff] [blame]	41	#'
				42	#' @importFrom tibble tibble
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	43	#' @export
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	44	setMethod(
				45	"initialize", "KorAPQuery",
				46	function(.Object, korapConnection = NULL, request = NULL, vc = "", totalResults = 0, nextStartIndex = 0, fields = c(
				47	"corpusSigle", "textSigle", "pubDate", "pubPlace",
				48	"availability", "textClass", "snippet", "tokens"
				49	),
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	50	requestUrl = "", webUIRequestUrl = "", apiResponse = NULL, hasMoreMatches = FALSE, collectedMatches = NULL) {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	51	.Object <- callNextMethod()
				52	.Object@korapConnection <- korapConnection
				53	.Object@request <- request
				54	.Object@vc <- vc
				55	.Object@totalResults <- totalResults
				56	.Object@nextStartIndex <- nextStartIndex
				57	.Object@fields <- fields
				58	.Object@requestUrl <- requestUrl
				59	.Object@webUIRequestUrl <- webUIRequestUrl
				60	.Object@apiResponse <- apiResponse
				61	.Object@hasMoreMatches <- hasMoreMatches
				62	.Object@collectedMatches <- collectedMatches
				63	.Object
				64	}
				65	)
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	66
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	67	setGeneric("corpusQuery", function(kco, ...) standardGeneric("corpusQuery"))
				68	setGeneric("fetchAll", function(kqo, ...) standardGeneric("fetchAll"))
				69	setGeneric("fetchNext", function(kqo, ...) standardGeneric("fetchNext"))
				70	setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest"))
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	71	setGeneric("fetchAnnotations", function(kqo, ...) standardGeneric("fetchAnnotations"))
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	72	setGeneric("frequencyQuery", function(kco, ...) standardGeneric("frequencyQuery"))
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	73
				74	maxResultsPerPage <- 50
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	75
Marc Kupietz	4de53ec	2019-10-04 09:12:00 +0200	[diff] [blame]	76	## quiets concerns of R CMD check re: the .'s that appear in pipelines
Marc Kupietz	ef1ef4a	2025-02-19 12:12:40 +0100	[diff] [blame]	77	utils::globalVariables(c("."))
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	78
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	79	#' Search corpus for query terms
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	80	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	81	#' `corpusQuery` performs a corpus query via a connection to a KorAP-API-server
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	82	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	83	#' @family corpus search functions
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	84	#' @aliases corpusQuery
				85	#'
				86	#' @importFrom urltools url_encode
				87	#' @importFrom purrr pmap
Marc Kupietz	ea34b81	2025-06-25 15:49:00 +0200	[diff] [blame]	88	#' @importFrom dplyr bind_rows group_by
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	89	#'
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	90	#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	91	#' @param query string that contains the corpus query. The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`.
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	92	#' @param vc string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	93	#' @param KorAPUrl instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in `KorAPConnection`) to provide all necessary information for the query.
Marc Kupietz	132f005	2023-04-16 14:23:05 +0200	[diff] [blame]	94	#' @param metadataOnly logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE.
				95	#' If you want your corpus queries to return not only metadata, but also KWICS, you need to authorize
				96	#' your RKorAPClient application as explained in the
				97	#' [authorization section](https://github.com/KorAP/RKorAPClient#authorization)
				98	#' of the RKorAPClient Readme on GitHub and set the `metadataOnly` parameter to
				99	#' `FALSE`.
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	100	#' @param ql string to choose the query language (see [section on Query Parameters](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters) in the Kustvakt-Wiki for possible values.
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	101	#' @param fields character vector specifying which metadata fields to retrieve for each match.
				102	#' Available fields depend on the corpus. For DeReKo (German Reference Corpus), possible fields include:
				103	#' \describe{
				104	#' \item{Text identification:}{`textSigle`, `docSigle`, `corpusSigle` - hierarchical text identifiers}
				105	#' \item{Publication info:}{`author`, `editor`, `title`, `docTitle`, `corpusTitle` - authorship and titles}
				106	#' \item{Temporal data:}{`pubDate`, `creationDate` - when text was published/created}
				107	#' \item{Publication details:}{`pubPlace`, `publisher`, `reference` - where/how published}
				108	#' \item{Text classification:}{`textClass`, `textType`, `textTypeArt`, `textDomain`, `textColumn` - topic domain, genre, text type and column}
				109	#' \item{Adminstrative and technical info:}{`corpusEditor`, `availability`, `language`, `foundries` - access rights and annotations}
				110	#' \item{Content data:}{`snippet`, `tokens`, `tokenSource`, `externalLink` - actual text content, tokenization, and link to source text}
				111	#' \item{System data:}{`indexCreationDate`, `indexLastModified` - corpus indexing info}
				112	#' }
				113	#' Use `c("textSigle", "pubDate", "author")` to retrieve multiple fields.
				114	#' Default fields provide basic text identification and publication metadata. The actual text content (`snippet` and `tokens`) are activated by default if `metadataOnly` is set to `FALSE`.
Marc Kupietz	43a6ade	2020-02-18 17:01:44 +0100	[diff] [blame]	115	#' @param accessRewriteFatal abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).
Marc Kupietz	25aebc3	2019-09-16 18:40:50 +0200	[diff] [blame]	116	#' @param verbose print some info
Marc Kupietz	4de53ec	2019-10-04 09:12:00 +0200	[diff] [blame]	117	#' @param as.df return result as data frame instead of as S4 object?
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	118	#' @param expand logical that decides if `query` and `vc` parameters are expanded to all of their combinations. Defaults to `TRUE`, iff `query` and `vc` have different lengths
Marc Kupietz	d9b2fd7	2023-04-17 19:08:50 +0200	[diff] [blame]	119	#' @param context string that specifies the size of the left and the right context returned in `snippet`
				120	#' (provided that `metadataOnly` is set to `false` and that the necessary access right are met).
				121	#' The format of the context size specifcation (e.g. `3-token,3-token`) is described in the [Service: Search GET documentation of the Kustvakt Wiki](https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET).
				122	#' If the parameter is not set, the default context size secification of the KorAP server instance will be used.
				123	#' Note that you cannot overrule the maximum context size set in the KorAP server instance,
				124	#' as this is typically legally motivated.
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	125	#' @return Depending on the `as.df` parameter, a tibble or a [KorAPQuery()] object that, among other information, contains the total number of results in `@totalResults`. The resulting object can be used to fetch all query results (with [fetchAll()]) or the next page of results (with [fetchNext()]).
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	126	#' A corresponding URL to be used within a web browser is contained in `@webUIRequestUrl`
				127	#' Please make sure to check `$collection$rewrites` to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	128	#'
				129	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	130	#' \dontrun{
				131	#'
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	132	#' # Fetch basic metadata for "Ameisenplage"
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	133	#' KorAPConnection() \|>
				134	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	135	#' fetchAll()
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	136	#'
				137	#' # Fetch specific metadata fields for bibliographic analysis
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	138	#' query <- KorAPConnection() \|>
Marc Kupietz	1623fe8	2025-06-24 16:31:46 +0200	[diff] [blame]	139	#' corpusQuery("Ameisenplage",
				140	#' fields = c("textSigle", "author", "title", "pubDate", "pubPlace", "textType"))
				141	#' results <- fetchAll(query)
				142	#' results@collectedMatches
Marc Kupietz	657d8e7	2020-02-25 18:31:50 +0100	[diff] [blame]	143	#' }
Marc Kupietz	3c531f6	2019-09-13 12:17:24 +0200	[diff] [blame]	144	#'
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	145	#' \dontrun{
				146	#'
Marc Kupietz	603491f	2019-09-18 14:01:02 +0200	[diff] [blame]	147	#' # Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
				148	#' # and show the number of query hits (but don't fetch them).
Marc Kupietz	69cc54a	2019-09-30 12:06:54 +0200	[diff] [blame]	149	#'
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	150	#' KorAPConnection(verbose = TRUE) \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	151	#' corpusQuery(
				152	#' KorAPUrl =
				153	#' "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp"
				154	#' )
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	155	#' }
				156	#'
				157	#' \dontrun{
Marc Kupietz	3c531f6	2019-09-13 12:17:24 +0200	[diff] [blame]	158	#'
Marc Kupietz	603491f	2019-09-18 14:01:02 +0200	[diff] [blame]	159	#' # Plot the time/frequency curve of "Ameisenplage"
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	160	#' KorAPConnection(verbose = TRUE) \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	161	#' {
				162	#' . ->> kco
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	163	#' } \|>
				164	#' corpusQuery("Ameisenplage") \|>
				165	#' fetchAll() \|>
				166	#' slot("collectedMatches") \|>
				167	#' mutate(year = lubridate::year(pubDate)) \|>
				168	#' dplyr::select(year) \|>
				169	#' group_by(year) \|>
				170	#' summarise(Count = dplyr::n()) \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	171	#' mutate(Freq = mapply(function(f, y) {
				172	#' f / corpusStats(kco, paste("pubDate in", y))@tokens
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	173	#' }, Count, year)) \|>
				174	#' dplyr::select(-Count) \|>
				175	#' complete(year = min(year):max(year), fill = list(Freq = 0)) \|>
Marc Kupietz	69cc54a	2019-09-30 12:06:54 +0200	[diff] [blame]	176	#' plot(type = "l")
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	177	#' }
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	178	#' @seealso [KorAPConnection()], [fetchNext()], [fetchRest()], [fetchAll()], [corpusStats()]
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	179	#'
				180	#' @references
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	181	#' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026>
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	182	#'
				183	#' @export
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	184	setMethod(
				185	"corpusQuery", "KorAPConnection",
				186	function(kco,
				187	query = if (missing(KorAPUrl)) {
				188	stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE)
				189	} else {
				190	httr2::url_parse(KorAPUrl)$query$q
				191	},
				192	vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq,
				193	KorAPUrl,
				194	metadataOnly = TRUE,
				195	ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql,
				196	fields = c(
				197	"corpusSigle",
				198	"textSigle",
				199	"pubDate",
				200	"pubPlace",
				201	"availability",
				202	"textClass",
				203	"snippet",
				204	"tokens"
				205	),
				206	accessRewriteFatal = TRUE,
				207	verbose = kco@verbose,
				208	expand = length(vc) != length(query),
				209	as.df = FALSE,
				210	context = NULL) {
				211	if (length(query) > 1 \|\| length(vc) > 1) {
				212	grid <- if (expand) expand_grid(query = query, vc = vc) else tibble(query = query, vc = vc)
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	213
				214	# Initialize timing variables for ETA calculation
				215	total_queries <- nrow(grid)
				216	current_query <- 0
				217	start_time <- Sys.time()
				218
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	219	results <- purrr::pmap(grid, function(query, vc, ...) {
				220	current_query <<- current_query + 1
				221
				222	# Execute the single query directly (avoiding recursive call)
				223	contentFields <- c("snippet", "tokens")
				224	query_fields <- fields
				225	if (metadataOnly) {
				226	query_fields <- query_fields[!query_fields %in% contentFields]
				227	}
				228	if (!"textSigle" %in% query_fields) {
				229	query_fields <- c(query_fields, "textSigle")
				230	}
				231	request <-
				232	paste0(
				233	"?q=",
				234	url_encode(enc2utf8(query)),
				235	ifelse(!metadataOnly && !is.null(context) && context != "", paste0("&context=", url_encode(enc2utf8(context))), ""),
				236	ifelse(vc != "", paste0("&cq=", url_encode(enc2utf8(vc))), ""),
				237	ifelse(!metadataOnly, "&show-tokens=true", ""),
				238	"&ql=", ql
				239	)
				240	webUIRequestUrl <- paste0(kco@KorAPUrl, request)
				241	requestUrl <- paste0(
				242	kco@apiUrl,
				243	"search",
				244	request,
				245	"&fields=",
				246	paste(query_fields, collapse = ","),
				247	if (metadataOnly) "&access-rewrite-disabled=true" else ""
				248	)
				249
				250	# Show individual query progress
				251	log_info(verbose, "\rSearching \"", query, "\" in \"", vc, "\"", sep = "")
				252	res <- apiCall(kco, paste0(requestUrl, "&count=0"))
				253	if (is.null(res)) {
				254	log_info(verbose, ": API call failed\n")
				255	totalResults <- 0
				256	} else {
				257	totalResults <- as.integer(res$meta$totalResults)
				258	log_info(verbose, ": ", totalResults, " hits")
				259	if (!is.null(res$meta$cached)) {
				260	log_info(verbose, " [cached]")
				261	} else if (!is.null(res$meta$benchmark)) {
				262	if (is.character(res$meta$benchmark) && grepl("s$", res$meta$benchmark)) {
				263	time_value <- as.numeric(sub("s$", "", res$meta$benchmark))
				264	formatted_time <- paste0(round(time_value, 2), "s")
				265	log_info(verbose, ", took ", formatted_time)
				266	} else {
				267	log_info(verbose, ", took ", res$meta$benchmark)
				268	}
				269	}
Marc Kupietz	365660e	2025-06-25 15:09:55 +0200	[diff] [blame]	270
				271	# Calculate and display ETA information on the same line if verbose and we have more than one query
				272	if (verbose && total_queries > 1) {
				273	eta_info <- calculate_eta(current_query, total_queries, start_time)
				274	if (eta_info != "") {
				275	elapsed_time <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
				276	avg_time_per_query <- elapsed_time / current_query
				277
				278	# Add ETA info to the same line - remove the leading ". " for cleaner formatting
				279	clean_eta_info <- sub("^\\. ", ". ", eta_info)
				280	log_info(verbose, clean_eta_info)
				281	}
				282	}
				283
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	284	log_info(verbose, "\n")
				285	}
				286
				287	result <- data.frame(
				288	query = query,
				289	totalResults = totalResults,
				290	vc = vc,
				291	webUIRequestUrl = webUIRequestUrl,
				292	stringsAsFactors = FALSE
				293	)
				294
Marc Kupietz	6ef61a8	2025-05-29 16:07:03 +0200	[diff] [blame]	295	return(result)
				296	})
				297
				298	results %>% bind_rows()
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	299	} else {
Marc Kupietz	2078bde	2023-08-27 16:46:15 +0200	[diff] [blame]	300	contentFields <- c("snippet", "tokens")
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	301	if (metadataOnly) {
				302	fields <- fields[!fields %in% contentFields]
				303	}
Marc Kupietz	80dc643	2025-02-07 16:57:40 +0100	[diff] [blame]	304	if (!"textSigle" %in% fields) {
				305	fields <- c(fields, "textSigle")
				306	}
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	307	request <-
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	308	paste0(
				309	"?q=",
				310	url_encode(enc2utf8(query)),
				311	ifelse(!metadataOnly && !is.null(context) && context != "", paste0("&context=", url_encode(enc2utf8(context))), ""),
				312	ifelse(vc != "", paste0("&cq=", url_encode(enc2utf8(vc))), ""),
				313	ifelse(!metadataOnly, "&show-tokens=true", ""),
				314	"&ql=", ql
				315	)
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	316	webUIRequestUrl <- paste0(kco@KorAPUrl, request)
				317	requestUrl <- paste0(
				318	kco@apiUrl,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	319	"search",
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	320	request,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	321	"&fields=",
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	322	paste(fields, collapse = ","),
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	323	if (metadataOnly) "&access-rewrite-disabled=true" else ""
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	324	)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	325	log_info(verbose, "\rSearching \"", query, "\" in \"", vc, "\"",
				326	sep =
				327	""
				328	)
				329	res <- apiCall(kco, paste0(requestUrl, "&count=0"))
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	330	if (is.null(res)) {
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	331	message("API call failed.")
				332	totalResults <- 0
				333	} else {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	334	totalResults <- as.integer(res$meta$totalResults)
Marc Kupietz	a47d150	2023-04-18 15:26:47 +0200	[diff] [blame]	335	log_info(verbose, ": ", totalResults, " hits")
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	336	if (!is.null(res$meta$cached)) {
Marc Kupietz	a47d150	2023-04-18 15:26:47 +0200	[diff] [blame]	337	log_info(verbose, " [cached]\n")
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	338	} else if (!is.null(res$meta$benchmark)) {
Marc Kupietz	7638ca4	2025-05-25 13:18:16 +0200	[diff] [blame]	339	# Round the benchmark time to 2 decimal places for better readability
				340	# If it's a string ending with 's', extract the number, round it, and re-add 's'
				341	if (is.character(res$meta$benchmark) && grepl("s$", res$meta$benchmark)) {
				342	time_value <- as.numeric(sub("s$", "", res$meta$benchmark))
				343	formatted_time <- paste0(round(time_value, 2), "s")
				344	log_info(verbose, ", took ", formatted_time, "\n", sep = "")
				345	} else {
				346	# Fallback if the format is different than expected
				347	log_info(verbose, ", took ", res$meta$benchmark, "\n", sep = "")
				348	}
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	349	} else {
				350	log_info(verbose, "\n")
				351	}
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	352	}
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	353	if (as.df) {
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	354	data.frame(
				355	query = query,
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	356	totalResults = totalResults,
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	357	vc = vc,
				358	webUIRequestUrl = webUIRequestUrl,
				359	stringsAsFactors = FALSE
				360	)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	361	} else {
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	362	KorAPQuery(
				363	korapConnection = kco,
				364	nextStartIndex = 0,
				365	fields = fields,
				366	requestUrl = requestUrl,
				367	request = request,
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	368	totalResults = totalResults,
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	369	vc = vc,
				370	apiResponse = res,
				371	webUIRequestUrl = webUIRequestUrl,
Marc Kupietz	a467572	2022-02-23 23:55:15 +0100	[diff] [blame]	372	hasMoreMatches = (totalResults > 0),
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	373	)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	374	}
Marc Kupietz	a96537f	2019-11-09 23:07:44 +0100	[diff] [blame]	375	}
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	376	}
				377	)
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	378
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	379	#' @importFrom purrr map
				380	repair_data_strcuture <- function(x) {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	381	if (is.list(x)) {
				382	as.character(purrr::map(x, ~ if (length(.x) > 1) {
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	383	paste(.x, collapse = " ")
				384	} else {
				385	.x
				386	}))
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	387	} else {
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	388	ifelse(is.na(x), "", x)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	389	}
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	390	}
				391
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	392	#' Fetch the next bunch of results of a KorAP query.
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	393	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	394	#' `fetchNext` fetches the next bunch of results of a KorAP query.
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	395	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	396	#' @family corpus search functions
				397	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	398	#' @param kqo object obtained from [corpusQuery()]
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	399	#' @param offset start offset for query results to fetch
				400	#' @param maxFetch maximum number of query results to fetch
Marc Kupietz	25aebc3	2019-09-16 18:40:50 +0200	[diff] [blame]	401	#' @param verbose print progress information if true
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	402	#' @param randomizePageOrder fetch result pages in pseudo random order if true. Use [set.seed()] to set seed for reproducible results.
				403	#' @return The `kqo` input object with updated slots `collectedMatches`, `apiResponse`, `nextStartIndex`, `hasMoreMatches`
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	404	#'
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	405	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	406	#' \dontrun{
				407	#'
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	408	#' q <- KorAPConnection() \|>
				409	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	410	#' fetchNext()
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	411	#' q@collectedMatches
Marc Kupietz	657d8e7	2020-02-25 18:31:50 +0100	[diff] [blame]	412	#' }
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	413	#'
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	414	#' @references
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	415	#' <https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026>
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	416	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	417	#' @aliases fetchNext
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	418	#' @importFrom dplyr rowwise mutate bind_rows select summarise n select
Marc Kupietz	f488112	2024-12-17 14:55:39 +0100	[diff] [blame]	419	#' @importFrom tibble enframe add_column
				420	#' @importFrom stringr word
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	421	#' @importFrom tidyr unnest unchop pivot_wider
				422	#' @importFrom purrr map
Marc Kupietz	632cbd4	2019-09-06 16:04:51 +0200	[diff] [blame]	423	#' @export
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	424	setMethod("fetchNext", "KorAPQuery", function(kqo,
				425	offset = kqo@nextStartIndex,
				426	maxFetch = maxResultsPerPage,
				427	verbose = kqo@korapConnection@verbose,
				428	randomizePageOrder = FALSE) {
Marc Kupietz	a7a8f1b	2024-12-18 15:56:19 +0100	[diff] [blame]	429	# https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	430	results <- key <- name <- tmp_positions <- 0
Marc Kupietz	a7a8f1b	2024-12-18 15:56:19 +0100	[diff] [blame]	431
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	432	if (kqo@totalResults == 0 \|\| offset >= kqo@totalResults) {
				433	return(kqo)
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	434	}
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	435	use_korap_api <- Sys.getenv("USE_KORAP_API", unset = NA)
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	436	# Calculate the initial page number (not used directly - keeping for reference)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	437	collectedMatches <- kqo@collectedMatches
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	438
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	439	# Track start time for ETA calculation
				440	start_time <- Sys.time()
				441
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	442	# For randomized page order, generate a list of randomized page indices
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	443	if (randomizePageOrder) {
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	444	# Calculate how many pages we need to fetch based on maxFetch
				445	total_pages_to_fetch <- if (!is.na(maxFetch)) {
				446	# Either limited by maxFetch or total results, whichever is smaller
				447	min(ceiling(maxFetch / maxResultsPerPage), ceiling(kqo@totalResults / maxResultsPerPage))
				448	} else {
				449	# All pages
				450	ceiling(kqo@totalResults / maxResultsPerPage)
				451	}
				452
				453	# Generate randomized page indices (0-based for API)
				454	pages <- sample.int(ceiling(kqo@totalResults / maxResultsPerPage), total_pages_to_fetch) - 1
				455	page_index <- 1 # Index to track which page in the randomized list we're on
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	456	}
				457
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	458	if (is.null(collectedMatches)) {
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	459	collectedMatches <- data.frame()
				460	}
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	461
				462	# Initialize the page counter properly based on nextStartIndex and any previously fetched results
				463	# We add 1 to make it 1-based for display purposes since users expect page numbers to start from 1
				464	# For first call, this will be 1, for subsequent calls, it will reflect our actual position
				465	current_page_number <- ceiling(offset / maxResultsPerPage) + 1
				466
				467	# For sequential fetches, keep track of which global page we're on
				468	# This is important for correctly showing page numbers in subsequent fetchNext calls
				469	page_count_start <- current_page_number
				470
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	471	repeat {
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	472	# Determine which page to fetch next
				473	if (randomizePageOrder) {
				474	# In randomized mode, get the page from our randomized list using the page_index
				475	# Make sure we don't exceed the array bounds
				476	if (page_index > length(pages)) {
				477	break # No more pages to fetch in randomized mode
				478	}
				479	current_offset_page <- pages[page_index]
				480	# For display purposes in randomized mode, show which page out of the total we're fetching
				481	display_page_number <- page_index
				482	} else {
				483	# In sequential mode, use the current_page_number to calculate the offset
				484	current_offset_page <- (current_page_number - 1)
				485	display_page_number <- current_page_number
				486	}
				487
				488	# Calculate the actual offset in tokens
				489	currentOffset <- current_offset_page * maxResultsPerPage
				490
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	491	# Build the query with the appropriate count and offset using httr2
				492	count_param <- min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage)
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	493
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	494	# Parse existing URL to preserve all query parameters
				495	parsed_url <- httr2::url_parse(kqo@requestUrl)
				496	existing_query <- parsed_url$query
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	497
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	498	# Add/update count and offset parameters
				499	existing_query$count <- count_param
				500	existing_query$offset <- currentOffset
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	501
Marc Kupietz	ef0e939	2025-06-18 12:21:49 +0200	[diff] [blame]	502	# Rebuild the URL with all parameters
				503	query <- httr2::url_modify(kqo@requestUrl, query = existing_query)
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	504	res <- apiCall(kqo@korapConnection, query)
				505	if (length(res$matches) == 0) {
				506	break
				507	}
				508
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	509	if ("fields" %in% colnames(res$matches) && (is.na(use_korap_api) \|\| as.numeric(use_korap_api) >= 1.0)) {
Marc Kupietz	16ccf11	2025-01-26 13:25:27 +0100	[diff] [blame]	510	log_info(verbose, "Using fields API: ")
Marc Kupietz	05a6079	2024-12-07 16:23:31 +0100	[diff] [blame]	511	currentMatches <- res$matches$fields %>%
				512	purrr::map(~ mutate(.x, value = repair_data_strcuture(value))) %>%
				513	tibble::enframe() %>%
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	514	tidyr::unnest(cols = value) %>%
				515	tidyr::pivot_wider(names_from = key, id_cols = name, names_repair = "unique") %>%
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	516	dplyr::select(-name)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	517	if ("snippet" %in% colnames(res$matches)) {
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	518	currentMatches$snippet <- res$matches$snippet
				519	}
Marc Kupietz	3cd2c6c	2025-01-08 20:35:39 +0100	[diff] [blame]	520	if ("tokens" %in% colnames(res$matches)) {
				521	currentMatches$tokens <- res$matches$tokens
				522	}
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	523	} else {
				524	currentMatches <- res$matches
				525	}
				526
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	527	for (field in kqo@fields) {
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	528	if (!field %in% colnames(currentMatches)) {
				529	currentMatches[, field] <- NA
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	530	}
				531	}
Marc Kupietz	f488112	2024-12-17 14:55:39 +0100	[diff] [blame]	532	currentMatches <- currentMatches %>%
				533	select(kqo@fields) %>%
				534	mutate(
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	535	matchID = res$matches$matchID,
Marc Kupietz	0447da0	2025-01-08 20:51:09 +0100	[diff] [blame]	536	tmp_positions = gsub(".-p(\\d+)-(\\d+).", "\\1 \\2", res$matches$matchID),
Marc Kupietz	f488112	2024-12-17 14:55:39 +0100	[diff] [blame]	537	matchStart = as.integer(stringr::word(tmp_positions, 1)),
				538	matchEnd = as.integer(stringr::word(tmp_positions, 2)) - 1
				539	) %>%
				540	select(-tmp_positions)
				541
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	542	if (!is.list(collectedMatches)) {
				543	collectedMatches <- currentMatches
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	544	} else {
Marc Kupietz	2078bde	2023-08-27 16:46:15 +0200	[diff] [blame]	545	collectedMatches <- bind_rows(collectedMatches, currentMatches)
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	546	}
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	547
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	548	# Get the actual items per page from the API response
				549	# We now consistently use maxResultsPerPage instead
Marc Kupietz	acbaab0	2025-05-01 10:56:35 +0200	[diff] [blame]	550
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	551	# Calculate total pages consistently using fixed maxResultsPerPage
				552	# This ensures consistent page counting across the function
				553	total_pages <- ceiling(kqo@totalResults / maxResultsPerPage)
				554
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	555	# Calculate ETA using the centralized function from logging.R
				556	current_page <- if (randomizePageOrder) page_index else display_page_number
				557	total_pages_to_fetch <- if (!is.na(maxFetch)) {
				558	# Account for offset - we can only fetch from the remaining results after offset
				559	remaining_results_after_offset <- max(0, kqo@totalResults - offset)
				560	min(ceiling(maxFetch / maxResultsPerPage), ceiling(remaining_results_after_offset / maxResultsPerPage))
				561	} else {
				562	total_pages
				563	}
Marc Kupietz	365660e	2025-06-25 15:09:55 +0200	[diff] [blame]	564
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	565	eta_info <- calculate_eta(current_page, total_pages_to_fetch, start_time)
Marc Kupietz	365660e	2025-06-25 15:09:55 +0200	[diff] [blame]	566
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	567	# Extract timing information for display
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	568	time_per_page <- NA
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	569	if (!is.null(res$meta$benchmark) && is.character(res$meta$benchmark)) {
Marc Kupietz	ae9b617	2025-05-02 15:50:01 +0200	[diff] [blame]	570	time_per_page <- suppressWarnings(as.numeric(sub("s", "", res$meta$benchmark)))
Marc Kupietz	acbaab0	2025-05-01 10:56:35 +0200	[diff] [blame]	571	}
				572
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	573	# Create the page display string with proper formatting
Marc Kupietz	acbaab0	2025-05-01 10:56:35 +0200	[diff] [blame]	574
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	575	# For global page tracking, calculate the absolute page number
				576	actual_display_number <- if (randomizePageOrder) {
				577	current_offset_page + 1 # In randomized mode, this is the actual page (0-based + 1)
				578	} else {
				579	# In sequential mode, the absolute page number is the actual offset page + 1 (to make it 1-based)
				580	current_offset_page + 1
				581	}
				582
				583	# For subsequent calls to fetchNext, we need to calculate the correct page numbers
				584	# based on the current batch being fetched
				585
				586	# For each call to fetchNext, we want to show 1/2, 2/2 (not 3/4, 4/4)
				587	# Simply count from 1 within the current batch
				588
				589	# The relative page number is simply the current position in this batch
				590	if (randomizePageOrder) {
				591	relative_page_number <- page_index # In randomized mode, we start from 1 in each batch
				592	} else {
				593	relative_page_number <- display_page_number - (page_count_start - 1)
				594	}
				595
				596	# How many pages will we fetch in this batch?
Marc Kupietz	021663d	2025-06-18 17:49:22 +0200	[diff] [blame]	597	# If maxFetch is specified, calculate the total pages for this fetch operation
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	598	pages_in_this_batch <- if (!is.na(maxFetch)) {
Marc Kupietz	021663d	2025-06-18 17:49:22 +0200	[diff] [blame]	599	# Account for offset - we can only fetch from the remaining results after offset
				600	remaining_results_after_offset <- max(0, kqo@totalResults - offset)
				601	min(ceiling(maxFetch / maxResultsPerPage), ceiling(remaining_results_after_offset / maxResultsPerPage))
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	602	} else {
				603	# Otherwise fetch all remaining pages
				604	total_pages - page_count_start + 1
				605	}
				606
				607	# The total pages to be shown in this batch
				608	batch_total_pages <- pages_in_this_batch
				609
				610	page_display <- paste0(
				611	"Retrieved page ",
				612	sprintf(paste0("%", nchar(batch_total_pages), "d"), relative_page_number),
				613	"/",
				614	sprintf("%d", batch_total_pages)
				615	)
				616
				617	# If randomized, also show which actual page we fetched
				618	if (randomizePageOrder) {
				619	# Determine the maximum width needed for page numbers (based on total pages)
				620	# This ensures consistent alignment
				621	max_page_width <- nchar(as.character(total_pages))
				622	# Add the actual page number that was fetched (0-based + 1 for display) with proper padding
Marc Kupietz	7638ca4	2025-05-25 13:18:16 +0200	[diff] [blame]	623	page_display <- paste0(
				624	page_display,
				625	sprintf(" (actual page %*d)", max_page_width, current_offset_page + 1)
				626	)
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	627	}
				628	# Always show the absolute page number and total pages (for clarity)
				629	else {
				630	# Show the absolute page number (out of total possible pages)
				631	page_display <- paste0(page_display, sprintf(
				632	" (page %d of %d total)",
				633	actual_display_number, total_pages
				634	))
				635	}
				636
				637	# Add caching or timing information
				638	if (!is.null(res$meta$cached)) {
				639	page_display <- paste0(page_display, " [cached]")
				640	} else {
				641	page_display <- paste0(
				642	page_display,
				643	" in ",
				644	if (!is.na(time_per_page)) sprintf("%4.1f", time_per_page) else "?",
Marc Kupietz	24799fd	2025-06-25 14:15:36 +0200	[diff] [blame]	645	"s",
				646	eta_info
Marc Kupietz	623d712	2025-05-25 12:46:12 +0200	[diff] [blame]	647	)
				648	}
				649
				650	log_info(verbose, paste0(page_display, "\n"))
				651
				652	# Increment the appropriate counter based on mode
				653	if (randomizePageOrder) {
				654	page_index <- page_index + 1
				655	} else {
				656	current_page_number <- current_page_number + 1
				657	}
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	658	results <- results + res$meta$itemsPerPage
Marc Kupietz	e8bd49b	2024-06-28 07:24:44 +0200	[diff] [blame]	659	if (nrow(collectedMatches) >= kqo@totalResults \|\| (!is.na(maxFetch) && results >= maxFetch)) {
Marc Kupietz	5bbc9db	2019-08-30 16:30:45 +0200	[diff] [blame]	660	break
				661	}
				662	}
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	663	nextStartIndex <- min(res$meta$startIndex + res$meta$itemsPerPage, kqo@totalResults)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	664	KorAPQuery(
				665	nextStartIndex = nextStartIndex,
Marc Kupietz	d0d3e9b	2019-09-24 17:36:03 +0200	[diff] [blame]	666	korapConnection = kqo@korapConnection,
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	667	fields = kqo@fields,
				668	requestUrl = kqo@requestUrl,
				669	request = kqo@request,
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	670	totalResults = kqo@totalResults,
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	671	vc = kqo@vc,
				672	webUIRequestUrl = kqo@webUIRequestUrl,
Marc Kupietz	6817095	2021-06-30 09:37:21 +0200	[diff] [blame]	673	hasMoreMatches = (kqo@totalResults > nextStartIndex),
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	674	apiResponse = res,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	675	collectedMatches = collectedMatches
				676	)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	677	})
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	678
				679	#' Fetch all results of a KorAP query.
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	680	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	681	#' `fetchAll` fetches all results of a KorAP query.
Marc Kupietz	a6e4ee6	2021-03-05 09:00:15 +0100	[diff] [blame]	682	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	683	#' @family corpus search functions
Marc Kupietz	dc880ac	2025-06-24 20:34:43 +0200	[diff] [blame]	684	#' @param kqo object obtained from [corpusQuery()]
				685	#' @param verbose print progress information if true
				686	#' @param ... further arguments passed to [fetchNext()]
				687	#' @return The updated `kqo` object with all results in `@collectedMatches`
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	688	#'
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	689	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	690	#' \dontrun{
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	691	#' # Fetch all metadata of every query hit for "Ameisenplage" and show a summary
				692	#' q <- KorAPConnection() \|>
				693	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	694	#' fetchAll()
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	695	#' q@collectedMatches
Marc Kupietz	ecc8670	2025-06-24 12:12:51 +0200	[diff] [blame]	696	#'
				697	#' # Fetch also all KWICs
				698	#' q <- KorAPConnection() \|> auth() \|>
				699	#' corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
				700	#' fetchAll()
				701	#' q@collectedMatches
				702	#'
				703	#' # Retrieve title and text sigle metadata of all texts published on 1958-03-12
				704	#' q <- KorAPConnection() \|>
				705	#' corpusQuery("<base/s=t>", # this matches each text once
				706	#' vc = "pubDate in 1958-03-12",
				707	#' fields = c("textSigle", "title"),
				708	#' ) \|>
				709	#' fetchAll()
				710	#' q@collectedMatches
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	711	#' }
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	712	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	713	#' @aliases fetchAll
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	714	#' @export
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	715	setMethod("fetchAll", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) {
				716	return(fetchNext(kqo, offset = 0, maxFetch = NA, verbose = verbose, ...))
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	717	})
				718
				719	#' Fetches the remaining results of a KorAP query.
				720	#'
Marc Kupietz	dc880ac	2025-06-24 20:34:43 +0200	[diff] [blame]	721	#' @param kqo object obtained from [corpusQuery()]
				722	#' @param verbose print progress information if true
				723	#' @param ... further arguments passed to [fetchNext()]
				724	#' @return The updated `kqo` object with remaining results in `@collectedMatches`
				725	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	726	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	727	#' \dontrun{
				728	#'
Marc Kupietz	d352642	2025-06-25 09:16:15 +0200	[diff] [blame]	729	#' q <- KorAPConnection() \|>
				730	#' corpusQuery("Ameisenplage") \|>
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	731	#' fetchRest()
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	732	#' q@collectedMatches
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	733	#' }
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	734	#'
				735	#' @aliases fetchRest
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	736	#' @export
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	737	setMethod("fetchRest", "KorAPQuery", function(kqo, verbose = kqo@korapConnection@verbose, ...) {
				738	return(fetchNext(kqo, maxFetch = NA, verbose = verbose, ...))
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	739	})
				740
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	741	#'
				742	#' Parse XML annotations into linguistic layers
				743	#'
				744	#' Internal helper function to extract linguistic annotations (lemma, POS, morphology)
				745	#' from XML annotation snippets returned by the KorAP API.
				746	#'
				747	#' @param xml_snippet XML string containing annotation data
				748	#' @return Named list with vectors for 'token', 'lemma', 'pos', and 'morph'
				749	#' @keywords internal
				750	parse_xml_annotations <- function(xml_snippet) {
				751	if (is.null(xml_snippet) \|\| is.na(xml_snippet) \|\| xml_snippet == "") {
				752	return(list(token = character(0), lemma = character(0), pos = character(0), morph = character(0)))
				753	}
				754
				755	# Extract content within <span class="match">...</span> using a more robust approach
				756	if (grepl('<span class="match">', xml_snippet)) {
				757	# Find the start of match span
				758	start_pos <- regexpr('<span class="match">', xml_snippet)
				759	if (start_pos > 0) {
				760	# Find the end by counting nested spans
				761	content_start <- start_pos + attr(start_pos, "match.length")
				762	remaining <- substr(xml_snippet, content_start, nchar(xml_snippet))
				763
				764	# Simple approach: extract everything until we hit context-right or end
				765	if (grepl('<span class="context-right">', remaining)) {
				766	content_to_parse <- gsub('(.?)<span class="context-right">.', '\\1', remaining)
				767	} else {
				768	# Find the closing </span> that matches our opening span
				769	# For now, use a simpler approach - take everything until the last </span> sequence
				770	content_to_parse <- gsub('(.)</span>\\s$', '\\1', remaining)
				771	}
				772	} else {
				773	content_to_parse <- xml_snippet
				774	}
				775	} else {
				776	content_to_parse <- xml_snippet
				777	}
				778
				779	# Initialize result vectors
				780	tokens <- character(0)
				781	lemmas <- character(0)
				782	pos_tags <- character(0)
				783	morph_tags <- character(0)
				784
				785	# Split the content by </span> and process each meaningful part
				786	parts <- unlist(strsplit(content_to_parse, '</span>'))
				787
				788	for (part in parts) {
				789	part <- trimws(part)
				790	if (nchar(part) == 0) next
				791
				792	# Look for parts that have title attributes and end with text
				793	if (grepl('<span[^>]*title=', part)) {
				794	# Extract the text content (everything after the last >)
				795	text_content <- gsub('.>([^<])$', '\\1', part)
				796	text_content <- trimws(text_content)
				797
				798	if (nchar(text_content) > 0 && !grepl('^<', text_content)) {
				799	tokens <- c(tokens, text_content)
				800
				801	# Extract all title attributes from this part
				802	title_pattern <- 'title="([^"]*)"'
				803	title_matches <- gregexpr(title_pattern, part)
				804
				805	lemma <- NA
				806	pos_tag <- NA
				807	morph_tag <- NA
				808
				809	if (title_matches[[1]][1] != -1) {
				810	all_titles <- regmatches(part, title_matches)[[1]]
				811	for (title_match in all_titles) {
				812	title_content <- gsub(title_pattern, '\\1', title_match)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	813
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	814	# Split by spaces and process each annotation
				815	annotations <- unlist(strsplit(title_content, "\\s+"))
				816	for (annotation in annotations) {
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	817	if (grepl('^[^/]+/l:', annotation)) {
				818	lemma <- gsub('^[^/]+/l:(.*)$', '\\1', annotation)
				819	} else if (grepl('^[^/]+/p:', annotation)) {
				820	pos_tag <- gsub('^[^/]+/p:(.*)$', '\\1', annotation)
				821	} else if (grepl('^[^/]+/m:', annotation)) {
				822	morph_tag <- gsub('^[^/]+/m:(.*)$', '\\1', annotation)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	823	}
				824	}
				825	}
				826	}
				827
				828	lemmas <- c(lemmas, lemma)
				829	pos_tags <- c(pos_tags, pos_tag)
				830	morph_tags <- c(morph_tags, morph_tag)
				831	}
				832	}
				833	}
				834
				835	# If no tokens found with the splitting approach, try a different method
				836	if (length(tokens) == 0) {
				837	# Look for the innermost spans that contain actual text
				838	innermost_pattern <- '<span[^>]title="([^"])"[^>]*>([^<]+)</span>'
				839	innermost_matches <- gregexpr(innermost_pattern, content_to_parse, perl = TRUE)
				840
				841	if (innermost_matches[[1]][1] != -1) {
				842	matches <- regmatches(content_to_parse, innermost_matches)[[1]]
				843
				844	for (match in matches) {
				845	title <- gsub(innermost_pattern, '\\1', match, perl = TRUE)
				846	text <- gsub(innermost_pattern, '\\2', match, perl = TRUE)
				847	text <- trimws(text)
				848
				849	if (nchar(text) > 0) {
				850	tokens <- c(tokens, text)
				851
				852	# Parse space-separated annotations in title
				853	lemma <- NA
				854	pos_tag <- NA
				855	morph_tag <- NA
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	856
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	857	annotations <- unlist(strsplit(title, "\\s+"))
				858	for (annotation in annotations) {
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	859	if (grepl('^[^/]+/l:', annotation)) {
				860	lemma <- gsub('^[^/]+/l:(.*)$', '\\1', annotation)
				861	} else if (grepl('^[^/]+/p:', annotation)) {
				862	pos_tag <- gsub('^[^/]+/p:(.*)$', '\\1', annotation)
				863	} else if (grepl('^[^/]+/m:', annotation)) {
				864	morph_tag <- gsub('^[^/]+/m:(.*)$', '\\1', annotation)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	865	}
				866	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	867
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	868	lemmas <- c(lemmas, lemma)
				869	pos_tags <- c(pos_tags, pos_tag)
				870	morph_tags <- c(morph_tags, morph_tag)
				871	}
				872	}
				873	}
				874	}
				875
				876	# Ensure all vectors have the same length
				877	max_length <- max(length(tokens), length(lemmas), length(pos_tags), length(morph_tags))
				878	if (max_length > 0) {
				879	tokens <- c(tokens, rep(NA, max_length - length(tokens)))
				880	lemmas <- c(lemmas, rep(NA, max_length - length(lemmas)))
				881	pos_tags <- c(pos_tags, rep(NA, max_length - length(pos_tags)))
				882	morph_tags <- c(morph_tags, rep(NA, max_length - length(morph_tags)))
				883	}
				884
				885	return(list(
				886	token = tokens,
				887	lemma = lemmas,
				888	pos = pos_tags,
				889	morph = morph_tags
				890	))
				891	}
				892
				893	#'
				894	#' Parse XML annotations into linguistic layers with left/match/right structure
				895	#'
				896	#' Internal helper function to extract linguistic annotations (lemma, POS, morphology)
				897	#' from XML annotation snippets returned by the KorAP API, split into left context,
				898	#' match, and right context sections like the tokens field.
				899	#'
				900	#' @param xml_snippet XML string containing annotation data
				901	#' @return Named list with nested structure containing left/match/right for 'atokens', 'lemma', 'pos', and 'morph'
				902	#' @keywords internal
				903	parse_xml_annotations_structured <- function(xml_snippet) {
				904	if (is.null(xml_snippet) \|\| is.na(xml_snippet) \|\| xml_snippet == "") {
				905	empty_result <- list(left = character(0), match = character(0), right = character(0))
				906	return(list(
				907	atokens = empty_result,
				908	lemma = empty_result,
				909	pos = empty_result,
				910	morph = empty_result
				911	))
				912	}
				913
				914	# Helper function to extract annotations from a span section
				915	extract_annotations_from_section <- function(section_content) {
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	916	# Remove any <mark>...</mark> tags that may interrupt token boundaries
				917	section_no_marks <- gsub('</?mark[^>]*>', '', section_content, perl = TRUE)
				918	# Normalize separators between adjacent top-level spans so splitting is robust.
				919	# Replace any punctuation/entity/space run between one-or-more closing spans and the next opening span
				920	# with a single space, preserving all closing spans.
				921	section_norm <- gsub('((?:</span>)+)[[:space:]](?:&[^;]+;\|[[:punct:]]\|[[:space:]])[[:space:]]*(<span)', '\\1 \\2', section_no_marks, perl = TRUE)
				922	# Handle both spaced tokens and nested single tokens by scanning innermost spans with direct text
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	923	tokens <- character(0)
				924	lemmas <- character(0)
				925	pos_tags <- character(0)
				926	morph_tags <- character(0)
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	927
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	928	pat_token <- '<span[^>]title="([^"])"[^>]*>([^<]+)</span>'
				929	mm <- gregexpr(pat_token, section_norm, perl = TRUE)
				930	if (mm[[1]][1] != -1) {
				931	starts <- mm[[1]]
				932	lens <- attr(mm[[1]], 'match.length')
				933	for (k in seq_along(starts)) {
				934	s <- starts[k]
				935	e <- s + lens[k] - 1
				936	fragment <- substr(section_norm, s, e)
				937	text_content <- sub(pat_token, '\\2', fragment, perl = TRUE)
				938	text_content <- trimws(text_content)
				939	title_content <- sub(pat_token, '\\1', fragment, perl = TRUE)
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	940
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	941	if (nchar(text_content) == 0) next
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	942
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	943	lemma <- NA
				944	pos_tag <- NA
				945	morph_features <- character(0)
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	946
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	947	# parse inner title
				948	ann <- unlist(strsplit(title_content, "[[:space:]]+"))
				949	for (a in ann) {
				950	if (grepl('/l:', a)) {
				951	lemma <- sub('.?/l:(.)$', '\\1', a, perl = TRUE)
				952	} else if (grepl('/p:', a)) {
				953	pos_tag <- sub('.?/p:(.)$', '\\1', a, perl = TRUE)
				954	} else if (grepl('/m:', a)) {
				955	morph_features <- c(morph_features, sub('.?/m:(.)$', '\\1', a, perl = TRUE))
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	956	}
				957	}
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	958
				959	# If lemma missing, look back in nearby context for the nearest title containing l:
				960	if (is.na(lemma) \|\| nchar(lemma) == 0) {
				961	ctx_start <- max(1, s - 500)
				962	context <- substr(section_norm, ctx_start, s - 1)
				963	tmm <- gregexpr('title="([^"]*)"', context, perl = TRUE)
				964	if (tmm[[1]][1] != -1) {
				965	ctx_titles <- regmatches(context, tmm)[[1]]
				966	for (ti in rev(ctx_titles)) {
				967	cont <- sub('title="([^"]*)"', '\\1', ti, perl = TRUE)
				968	if (grepl('/l:', cont)) {
				969	lemma <- sub('.?/l:([^ ]+).', '\\1', cont, perl = TRUE)
				970	break
				971	}
				972	}
				973	}
				974	}
				975
				976	# If POS missing, keep NA; morphological features may also appear in outer titles
				977	if (length(morph_features) == 0) {
				978	ctx_start <- max(1, s - 500)
				979	context <- substr(section_norm, ctx_start, s - 1)
				980	tmm <- gregexpr('title="([^"]*)"', context, perl = TRUE)
				981	if (tmm[[1]][1] != -1) {
				982	ctx_titles <- regmatches(context, tmm)[[1]]
				983	for (ti in rev(ctx_titles)) {
				984	cont <- sub('title="([^"]*)"', '\\1', ti, perl = TRUE)
				985	if (grepl('/m:', cont)) {
				986	mparts <- unlist(strsplit(cont, "[[:space:]]+"))
				987	for (mp in mparts) if (grepl('/m:', mp)) morph_features <- c(morph_features, sub('.?/m:(.)$', '\\1', mp, perl = TRUE))
				988	break
				989	}
				990	}
				991	}
				992	}
				993
				994	tokens <- c(tokens, text_content)
				995	lemmas <- c(lemmas, if (!is.null(lemma)) lemma else NA)
				996	pos_tags <- c(pos_tags, if (!is.null(pos_tag)) pos_tag else NA)
				997	morph_tags <- c(morph_tags, if (length(morph_features) > 0) paste(morph_features, collapse = "\|") else NA)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	998	}
				999	}
				1000
				1001	# Ensure all vectors have the same length
				1002	max_length <- max(length(tokens), length(lemmas), length(pos_tags), length(morph_tags))
				1003	if (max_length > 0) {
				1004	tokens <- c(tokens, rep(NA, max_length - length(tokens)))
				1005	lemmas <- c(lemmas, rep(NA, max_length - length(lemmas)))
				1006	pos_tags <- c(pos_tags, rep(NA, max_length - length(pos_tags)))
				1007	morph_tags <- c(morph_tags, rep(NA, max_length - length(morph_tags)))
				1008	}
				1009
				1010	return(list(
				1011	tokens = tokens,
				1012	lemmas = lemmas,
				1013	pos_tags = pos_tags,
				1014	morph_tags = morph_tags
				1015	))
				1016	}
				1017
				1018	# Split the XML into three parts: left context, match content, and right context
				1019	# The structure is: <span class="match">...left...<mark>...match...</mark>...right...</span>
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1020
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1021	# First extract the content within the match span using DOTALL modifier
				1022	match_span_pattern <- '(?s)<span class="match">(.?)</span>\\s<span class="context-right">'
				1023	match_span_match <- regexpr(match_span_pattern, xml_snippet, perl = TRUE)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1024
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1025	if (match_span_match == -1) {
				1026	# Try alternative pattern if no context-right
				1027	match_span_pattern <- '(?s)<span class="match">(.?)</span>\\s$'
				1028	match_span_match <- regexpr(match_span_pattern, xml_snippet, perl = TRUE)
				1029	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1030
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1031	if (match_span_match > 0) {
				1032	match_span_content <- gsub(match_span_pattern, '\\1', xml_snippet, perl = TRUE)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1033
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1034	# Now find the <mark> and </mark> positions within this content
				1035	mark_start <- regexpr('<mark[^>]*>', match_span_content, perl = TRUE)
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1036	# Use the LAST closing </mark> to cover multi-part matches
				1037	mark_end_gre <- gregexpr('</mark>', match_span_content, perl = TRUE)
				1038	mark_end_positions <- mark_end_gre[[1]]
				1039	mark_end <- if (!is.null(mark_end_positions) && length(mark_end_positions) > 0 && mark_end_positions[1] != -1)
				1040	mark_end_positions[length(mark_end_positions)] else -1
				1041	mark_end_len <- if (mark_end != -1) attr(mark_end_gre[[1]], "match.length")[length(mark_end_positions)] else 0
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1042
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1043	if (mark_start > 0 && mark_end > 0) {
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1044	# Left context: everything before first <mark>
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1045	left_content <- substr(match_span_content, 1, mark_start - 1)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1046
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1047	# Match content: everything between first <mark> and last </mark>
				1048	match_content <- substr(match_span_content, mark_start, mark_end + mark_end_len - 1)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1049
Marc Kupietz	560b591	2025-09-01 17:36:13 +0200	[diff] [blame]	1050	# Right context: everything after last </mark>
				1051	right_content_start <- mark_end + mark_end_len
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1052	right_content <- substr(match_span_content, right_content_start, nchar(match_span_content))
				1053	} else {
				1054	# No mark tags found, treat entire match span as match content
				1055	left_content <- ""
				1056	match_content <- match_span_content
				1057	right_content <- ""
				1058	}
				1059	} else {
				1060	# No match span found, treat entire content as match
				1061	left_content <- ""
				1062	match_content <- xml_snippet
				1063	right_content <- ""
				1064	}
				1065
				1066	# Process each section
				1067	left_annotations <- extract_annotations_from_section(left_content)
				1068	match_annotations <- extract_annotations_from_section(match_content)
				1069	right_annotations <- extract_annotations_from_section(right_content)
				1070
				1071	return(list(
				1072	atokens = list(
				1073	left = left_annotations$tokens,
				1074	match = match_annotations$tokens,
				1075	right = right_annotations$tokens
				1076	),
				1077	lemma = list(
				1078	left = left_annotations$lemmas,
				1079	match = match_annotations$lemmas,
				1080	right = right_annotations$lemmas
				1081	),
				1082	pos = list(
				1083	left = left_annotations$pos_tags,
				1084	match = match_annotations$pos_tags,
				1085	right = right_annotations$pos_tags
				1086	),
				1087	morph = list(
				1088	left = left_annotations$morph_tags,
				1089	match = match_annotations$morph_tags,
				1090	right = right_annotations$morph_tags
				1091	)
				1092	))
				1093	}
				1094
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1095	#' Fetch annotations for all collected matches
				1096	#'
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1097	#' `r lifecycle::badge("experimental")`
				1098	#'
				1099	#' `fetchAnnotations` fetches annotations (only token annotations, for now)
				1100	#' for all matches in the `@collectedMatches` slot
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1101	#' of a KorAPQuery object and adds annotation columns directly to the `@collectedMatches`
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1102	#' data frame. The method uses the `matchID` from collected matches.
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1103	#'
				1104	#' Important: For copyright-restricted corpora, users must be authorized via [auth()]
				1105	#' and the initial corpus query must have `metadataOnly = FALSE` to ensure snippets are
				1106	#' available for annotation parsing.
				1107	#'
				1108	#' The method parses XML snippet annotations and adds linguistic columns to the data frame:
				1109	#' - `pos`: data frame with `left`, `match`, `right` columns, each containing list vectors of part-of-speech tags
				1110	#' - `lemma`: data frame with `left`, `match`, `right` columns, each containing list vectors of lemmas
				1111	#' - `morph`: data frame with `left`, `match`, `right` columns, each containing list vectors of morphological tags
				1112	#' - `atokens`: data frame with `left`, `match`, `right` columns, each containing list vectors of token text (from annotations)
				1113	#' - `annotation_snippet`: original XML snippet from the annotation API
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1114	#'
				1115	#' @family corpus search functions
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1116	#' @concept Annotations
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1117	#' @aliases fetchAnnotations
				1118	#'
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1119	#' @param kqo object obtained from [corpusQuery()] with collected matches. Note: the original corpus query should have `metadataOnly = FALSE` for annotation parsing to work.
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1120	#' @param foundry string specifying the foundry to use for annotations (default: "tt" for Tree-Tagger)
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1121	#' @param overwrite logical; if TRUE, re-fetch and replace any existing
				1122	#' annotation columns. If FALSE (default), only add missing annotation layers
				1123	#' and preserve already fetched ones (e.g., keep POS/lemma from a previous
				1124	#' foundry while adding morph from another).
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1125	#' @param verbose print progress information if true
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1126	#' @return The updated `kqo` object with annotation columns
				1127	#' like `pos`, `lemma`, `morph` (and `atokens` and `annotation_snippet`)
				1128	#' in the `@collectedMatches` slot. Each column is a data frame
				1129	#' with `left`, `match`, and `right` columns containing list vectors of annotations
				1130	#' for the left context, matched tokens, and right context, respectively.
				1131	#' The original XML snippet for each match is also stored in `annotation_snippet`.
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1132	#'
				1133	#' @examples
				1134	#' \dontrun{
				1135	#'
				1136	#' # Fetch annotations for matches using Tree-Tagger foundry
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1137	#' # Note: Authorization required for copyright-restricted corpora
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1138	#' q <- KorAPConnection() \|>
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1139	#' auth() \|>
				1140	#' corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1141	#' fetchNext(maxFetch = 10) \|>
				1142	#' fetchAnnotations()
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1143	#'
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1144	#' # Access linguistic annotations for match i:
				1145	#' pos_tags <- q@collectedMatches$pos # Data frame with left/match/right columns for POS tags
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1146	#' lemmas <- q@collectedMatches$lemma # Data frame with left/match/right columns for lemmas
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1147	#' morphology <- q@collectedMatches$morph # Data frame with left/match/right columns for morphological tags
				1148	#' atokens <- q@collectedMatches$atokens # Data frame with left/match/right columns for annotation token text
				1149	#' raw_snippet <- q@collectedMatches$annotation_snippet[[i]] # Original XML snippet for match i
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1150	#'
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1151	#' # Access specific components:
				1152	#' match_pos <- q@collectedMatches$pos$match[[i]] # POS tags for the matched tokens in match i
				1153	#' left_lemmas <- q@collectedMatches$lemma$left[[i]] # Lemmas for the left context in match i
				1154	#' right_tokens <- q@collectedMatches$atokens$right[[i]] # Token text for the right context in match i
				1155	#'
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1156	#' # Use a different foundry (e.g., MarMoT)
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1157	#' q <- KorAPConnection() \|>
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1158	#' auth() \|>
				1159	#' corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1160	#' fetchNext(maxFetch = 10) \|>
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	1161	#' fetchAnnotations(foundry = "marmot")
				1162	#' q@collectedMatches$pos$left[1] # POS tags for the left context of the first match
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1163	#' }
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1164	#' @export
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1165	setMethod("fetchAnnotations", "KorAPQuery", function(kqo, foundry = "tt", overwrite = FALSE, verbose = kqo@korapConnection@verbose) {
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1166	if (is.null(kqo@collectedMatches) \|\| nrow(kqo@collectedMatches) == 0) {
				1167	warning("No collected matches found. Please run fetchNext() or fetchAll() first.")
				1168	return(kqo)
				1169	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1170
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1171	df <- kqo@collectedMatches
				1172	kco <- kqo@korapConnection
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1173
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1174	# Initialize annotation columns as data frames (like tokens field)
				1175	# Create the structure more explicitly to avoid assignment issues
				1176	nrows <- nrow(df)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1177
Marc Kupietz	03d2b1a	2025-07-19 09:14:45 +0200	[diff] [blame]	1178	# Pre-compute the empty character vector list to avoid repeated computation
				1179	empty_char_list <- I(replicate(nrows, character(0), simplify = FALSE))
				1180
				1181	# Helper function to create annotation data frame structure
				1182	create_annotation_df <- function(empty_list) {
				1183	data.frame(
				1184	left = empty_list,
				1185	match = empty_list,
				1186	right = empty_list,
				1187	stringsAsFactors = FALSE
				1188	)
				1189	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1190
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1191	# Track which annotation columns already existed to decide overwrite behavior
				1192	existing_types <- list(
				1193	pos = "pos" %in% colnames(df),
				1194	lemma = "lemma" %in% colnames(df),
				1195	morph = "morph" %in% colnames(df),
				1196	atokens = "atokens" %in% colnames(df),
				1197	annotation_snippet = "annotation_snippet" %in% colnames(df)
				1198	)
				1199
				1200	# Initialize annotation columns using the helper function
Marc Kupietz	03d2b1a	2025-07-19 09:14:45 +0200	[diff] [blame]	1201	annotation_types <- c("pos", "lemma", "morph", "atokens")
				1202	for (type in annotation_types) {
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1203	if (overwrite \|\| !existing_types[[type]]) {
				1204	df[[type]] <- create_annotation_df(empty_char_list)
				1205	}
Marc Kupietz	03d2b1a	2025-07-19 09:14:45 +0200	[diff] [blame]	1206	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1207
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1208	if (overwrite \|\| !existing_types$annotation_snippet) {
				1209	df$annotation_snippet <- replicate(nrows, NA, simplify = FALSE)
				1210	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1211
Marc Kupietz	e8c0fef	2025-07-18 19:59:04 +0200	[diff] [blame]	1212	# Initialize timing for ETA calculation
				1213	start_time <- Sys.time()
				1214	if (verbose) {
				1215	log_info(verbose, paste("Starting to fetch annotations for", nrows, "matches\n"))
				1216	}
				1217
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1218	# Helper to decide if existing annotation row is effectively empty
				1219	is_empty_annotation_row <- function(ann_df, row_index) {
				1220	if (is.null(ann_df) \|\| nrow(ann_df) < row_index) return(TRUE)
				1221	left_val <- ann_df$left[[row_index]]
				1222	match_val <- ann_df$match[[row_index]]
				1223	right_val <- ann_df$right[[row_index]]
				1224	all(
				1225	(is.null(left_val) \|\| (length(left_val) == 0) \|\| all(is.na(left_val))),
				1226	(is.null(match_val) \|\| (length(match_val) == 0) \|\| all(is.na(match_val))),
				1227	(is.null(right_val) \|\| (length(right_val) == 0) \|\| all(is.na(right_val)))
				1228	)
				1229	}
				1230
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1231	for (i in seq_len(nrow(df))) {
Marc Kupietz	e8c0fef	2025-07-18 19:59:04 +0200	[diff] [blame]	1232	# ETA logging
				1233	if (verbose && i > 1) {
				1234	eta_info <- calculate_eta(i, nrows, start_time)
				1235	log_info(verbose, paste("Fetching annotations for match", i, "of", nrows, eta_info, "\n"))
				1236	}
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	1237	# Use matchID if available, otherwise fall back to constructing from matchStart/matchEnd
				1238	if ("matchID" %in% colnames(df) && !is.na(df$matchID[i])) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1239	# matchID format: "match-match-A00/JUN/39609-p202-203" or encrypted format like
				1240	# "match-DNB10/CSL/80400-p2343-2344x_MinDOhu_P6dd2MMZJyyus_7MairdKnr1LxY07Cya-Ow"
				1241	# Extract document path and position, handling both regular and encrypted formats
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1242
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1243	# More flexible regex to extract the document path with position and encryption
				1244	# Look for pattern: match-(...)-p(\d+)-(\d+)(.) where (.) is the encrypted part
				1245	# We need to capture the entire path including the encrypted suffix
				1246	match_result <- regexpr("match-(.+?-p\\d+-\\d+.*)", df$matchID[i], perl = TRUE)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1247
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1248	if (match_result > 0) {
				1249	# Extract the complete path including encryption (everything after "match-")
				1250	doc_path_with_pos_and_encryption <- gsub("^match-(.+)$", "\\1", df$matchID[i], perl = TRUE)
				1251	# Convert the dash before position to slash, but keep everything after the position
				1252	match_path <- gsub("-p(\\d+-\\d+.*)", "/p\\1", doc_path_with_pos_and_encryption)
Marc Kupietz	2512130	2025-07-19 08:45:43 +0200	[diff] [blame]	1253	# Use httr2 to construct URL safely
				1254	base_url <- paste0(kco@apiUrl, "corpus/", match_path)
				1255	req <- httr2::url_modify(base_url, query = list(foundry = foundry))
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1256	} else {
Marc Kupietz	2512130	2025-07-19 08:45:43 +0200	[diff] [blame]	1257	# If regex fails, fall back to the old method with httr2
				1258	# Format numbers to avoid scientific notation
				1259	match_start <- format(df$matchStart[i], scientific = FALSE)
				1260	match_end <- format(df$matchEnd[i], scientific = FALSE)
				1261	base_url <- paste0(kco@apiUrl, "corpus/", df$textSigle[i], "/", "p", match_start, "-", match_end)
				1262	req <- httr2::url_modify(base_url, query = list(foundry = foundry))
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	1263	}
				1264	} else {
Marc Kupietz	2512130	2025-07-19 08:45:43 +0200	[diff] [blame]	1265	# Fallback to the old method with httr2
				1266	# Format numbers to avoid scientific notation
				1267	match_start <- format(df$matchStart[i], scientific = FALSE)
				1268	match_end <- format(df$matchEnd[i], scientific = FALSE)
				1269	base_url <- paste0(kco@apiUrl, "corpus/", df$textSigle[i], "/", "p", match_start, "-", match_end)
				1270	req <- httr2::url_modify(base_url, query = list(foundry = foundry))
Marc Kupietz	ff712a9	2025-07-18 09:07:23 +0200	[diff] [blame]	1271	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1272
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1273	tryCatch({
				1274	res <- apiCall(kco, req)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1275
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1276	if (!is.null(res)) {
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1277	# Store the raw annotation snippet (respect overwrite flag)
				1278	if (overwrite \|\| !existing_types$annotation_snippet \|\| is.null(df$annotation_snippet[[i]]) \|\| is.na(df$annotation_snippet[[i]])) {
				1279	df$annotation_snippet[[i]] <- if (is.list(res) && "snippet" %in% names(res)) res$snippet else NA
				1280	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1281
				1282	# Parse XML annotations if snippet is available
				1283	if (is.list(res) && "snippet" %in% names(res)) {
				1284	parsed_annotations <- parse_xml_annotations_structured(res$snippet)
				1285
				1286	# Store the parsed linguistic data in data frame format (like tokens)
				1287	# Use individual assignment to avoid data frame mismatch errors
				1288	tryCatch({
				1289	# Assign POS annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1290	if (overwrite \|\| !existing_types$pos \|\| is_empty_annotation_row(df$pos, i)) {
				1291	df$pos$left[i] <- list(parsed_annotations$pos$left)
				1292	df$pos$match[i] <- list(parsed_annotations$pos$match)
				1293	df$pos$right[i] <- list(parsed_annotations$pos$right)
				1294	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1295
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1296	# Assign lemma annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1297	if (overwrite \|\| !existing_types$lemma \|\| is_empty_annotation_row(df$lemma, i)) {
				1298	df$lemma$left[i] <- list(parsed_annotations$lemma$left)
				1299	df$lemma$match[i] <- list(parsed_annotations$lemma$match)
				1300	df$lemma$right[i] <- list(parsed_annotations$lemma$right)
				1301	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1302
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1303	# Assign morphology annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1304	if (overwrite \|\| !existing_types$morph \|\| is_empty_annotation_row(df$morph, i)) {
				1305	df$morph$left[i] <- list(parsed_annotations$morph$left)
				1306	df$morph$match[i] <- list(parsed_annotations$morph$match)
				1307	df$morph$right[i] <- list(parsed_annotations$morph$right)
				1308	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1309
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1310	# Assign token annotations
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1311	if (overwrite \|\| !existing_types$atokens \|\| is_empty_annotation_row(df$atokens, i)) {
				1312	df$atokens$left[i] <- list(parsed_annotations$atokens$left)
				1313	df$atokens$match[i] <- list(parsed_annotations$atokens$match)
				1314	df$atokens$right[i] <- list(parsed_annotations$atokens$right)
				1315	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1316	}, error = function(assign_error) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1317	# Set empty character vectors on assignment error using list assignment
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1318	if (overwrite \|\| !existing_types$pos) {
				1319	df$pos$left[i] <<- list(character(0))
				1320	df$pos$match[i] <<- list(character(0))
				1321	df$pos$right[i] <<- list(character(0))
				1322	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1323
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1324	if (overwrite \|\| !existing_types$lemma) {
				1325	df$lemma$left[i] <<- list(character(0))
				1326	df$lemma$match[i] <<- list(character(0))
				1327	df$lemma$right[i] <<- list(character(0))
				1328	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1329
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1330	if (overwrite \|\| !existing_types$morph) {
				1331	df$morph$left[i] <<- list(character(0))
				1332	df$morph$match[i] <<- list(character(0))
				1333	df$morph$right[i] <<- list(character(0))
				1334	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1335
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1336	if (overwrite \|\| !existing_types$atokens) {
				1337	df$atokens$left[i] <<- list(character(0))
				1338	df$atokens$match[i] <<- list(character(0))
				1339	df$atokens$right[i] <<- list(character(0))
				1340	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1341	})
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1342	} else {
				1343	# No snippet available, store empty vectors
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1344	if (overwrite \|\| !existing_types$pos) {
				1345	df$pos$left[i] <- list(character(0))
				1346	df$pos$match[i] <- list(character(0))
				1347	df$pos$right[i] <- list(character(0))
				1348	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1349
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1350	if (overwrite \|\| !existing_types$lemma) {
				1351	df$lemma$left[i] <- list(character(0))
				1352	df$lemma$match[i] <- list(character(0))
				1353	df$lemma$right[i] <- list(character(0))
				1354	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1355
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1356	if (overwrite \|\| !existing_types$morph) {
				1357	df$morph$left[i] <- list(character(0))
				1358	df$morph$match[i] <- list(character(0))
				1359	df$morph$right[i] <- list(character(0))
				1360	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1361
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1362	if (overwrite \|\| !existing_types$atokens) {
				1363	df$atokens$left[i] <- list(character(0))
				1364	df$atokens$match[i] <- list(character(0))
				1365	df$atokens$right[i] <- list(character(0))
				1366	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1367	}
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1368	} else {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1369	# Store NAs for failed requests
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1370	if (overwrite \|\| !existing_types$pos) {
				1371	df$pos$left[i] <- list(NA)
				1372	df$pos$match[i] <- list(NA)
				1373	df$pos$right[i] <- list(NA)
				1374	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1375
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1376	if (overwrite \|\| !existing_types$lemma) {
				1377	df$lemma$left[i] <- list(NA)
				1378	df$lemma$match[i] <- list(NA)
				1379	df$lemma$right[i] <- list(NA)
				1380	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1381
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1382	if (overwrite \|\| !existing_types$morph) {
				1383	df$morph$left[i] <- list(NA)
				1384	df$morph$match[i] <- list(NA)
				1385	df$morph$right[i] <- list(NA)
				1386	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1387
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1388	if (overwrite \|\| !existing_types$atokens) {
				1389	df$atokens$left[i] <- list(NA)
				1390	df$atokens$match[i] <- list(NA)
				1391	df$atokens$right[i] <- list(NA)
				1392	}
				1393	if (overwrite \|\| !existing_types$annotation_snippet) {
				1394	df$annotation_snippet[[i]] <- NA
				1395	}
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1396	}
				1397	}, error = function(e) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1398	# Store NAs for failed requests
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1399	if (overwrite \|\| !existing_types$pos) {
				1400	df$pos$left[i] <- list(NA)
				1401	df$pos$match[i] <- list(NA)
				1402	df$pos$right[i] <- list(NA)
				1403	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1404
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1405	if (overwrite \|\| !existing_types$lemma) {
				1406	df$lemma$left[i] <- list(NA)
				1407	df$lemma$match[i] <- list(NA)
				1408	df$lemma$right[i] <- list(NA)
				1409	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1410
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1411	if (overwrite \|\| !existing_types$morph) {
				1412	df$morph$left[i] <- list(NA)
				1413	df$morph$match[i] <- list(NA)
				1414	df$morph$right[i] <- list(NA)
				1415	}
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1416
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame^]	1417	if (overwrite \|\| !existing_types$atokens) {
				1418	df$atokens$left[i] <- list(NA)
				1419	df$atokens$match[i] <- list(NA)
				1420	df$atokens$right[i] <- list(NA)
				1421	}
				1422	if (overwrite \|\| !existing_types$annotation_snippet) {
				1423	df$annotation_snippet[[i]] <- NA
				1424	}
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1425	})
				1426	}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1427
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1428	# Validate data frame structure before assignment
				1429	if (nrow(df) != nrow(kqo@collectedMatches)) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1430	}
				1431
				1432	# Update the collectedMatches with annotation data
				1433	tryCatch({
				1434	kqo@collectedMatches <- df
				1435	}, error = function(assign_error) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1436	# Try a safer approach: add columns individually
				1437	tryCatch({
				1438	kqo@collectedMatches$pos <- df$pos
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1439	kqo@collectedMatches$lemma <- df$lemma
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1440	kqo@collectedMatches$morph <- df$morph
				1441	kqo@collectedMatches$atokens <- df$atokens
				1442	kqo@collectedMatches$annotation_snippet <- df$annotation_snippet
				1443	}, error = function(col_error) {
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1444	warning("Failed to add annotation data to collectedMatches")
				1445	})
				1446	})
				1447
Marc Kupietz	e8c0fef	2025-07-18 19:59:04 +0200	[diff] [blame]	1448	if (verbose) {
				1449	elapsed_time <- Sys.time() - start_time
				1450	log_info(verbose, paste("Finished fetching annotations for", nrows, "matches in", format_duration(as.numeric(elapsed_time, units = "secs")), "\n"))
				1451	}
				1452
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1453	return(kqo)
				1454	})
				1455
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1456	#' Query frequencies of search expressions in virtual corpora
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1457	#'
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	1458	#' `frequencyQuery` combines [corpusQuery()], [corpusStats()] and
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1459	#' [ci()] to compute a tibble with the absolute and relative frequencies and
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1460	#' confidence intervals of one ore multiple search terms across one or multiple
				1461	#' virtual corpora.
				1462	#'
Marc Kupietz	a8c40f4	2025-06-24 15:49:52 +0200	[diff] [blame]	1463	#' @family frequency analysis
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1464	#' @aliases frequencyQuery
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1465	#' @examples
Marc Kupietz	6ae7605	2021-09-21 10:34:00 +0200	[diff] [blame]	1466	#' \dontrun{
				1467	#'
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1468	#' KorAPConnection(verbose = TRUE) \|>
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1469	#' frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
Marc Kupietz	05b2277	2020-02-18 21:58:42 +0100	[diff] [blame]	1470	#' }
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1471	#'
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1472	# @inheritParams corpusQuery
Marc Kupietz	617266d	2025-02-27 10:43:07 +0100	[diff] [blame]	1473	#' @param kco [KorAPConnection()] object (obtained e.g. from `KorAPConnection()`
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1474	#' @param query corpus query string(s.) (can be a vector). The query language depends on the `ql` parameter. Either `query` must be provided or `KorAPUrl`.
				1475	#' @param vc virtual corpus definition(s) (can be a vector)
Marc Kupietz	67edcb5	2021-09-20 21:54:24 +0200	[diff] [blame]	1476	#' @param conf.level confidence level of the returned confidence interval (passed through [ci()] to [prop.test()]).
				1477	#' @param as.alternatives LOGICAL that specifies if the query terms should be treated as alternatives. If `as.alternatives` is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1478	#' @param ... further arguments passed to or from other methods (see [corpusQuery()]), most notably `expand`, a logical that decides if `query` and `vc` parameters are expanded to all of their combinations. It defaults to `TRUE`, if `query` and `vc` have different lengths, and to `FALSE` otherwise.
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1479	#' @export
Marc Kupietz	ad8d2ed	2025-04-05 15:37:38 +0200	[diff] [blame]	1480	#'
				1481	#' @return A tibble, with each row containing the following result columns for query and vc combinations:
				1482	#' - query: the query string used for the frequency analysis.
				1483	#' - totalResults: absolute frequency of query matches in the vc.
				1484	#' - vc: virtual corpus used for the query.
				1485	#' - webUIRequestUrl: URL of the corresponding web UI request with respect to query and vc.
				1486	#' - total: total number of words in vc.
				1487	#' - f: relative frequency of query matches in the vc.
				1488	#' - conf.low: lower bound of the confidence interval for the relative frequency, given `conf.level`.
				1489	#' - conf.high: upper bound of the confidence interval for the relative frequency, given `conf.level`.
				1490
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1491	setMethod(
				1492	"frequencyQuery", "KorAPConnection",
Marc Kupietz	71d6e05	2019-11-22 18:42:10 +0100	[diff] [blame]	1493	function(kco, query, vc = "", conf.level = 0.95, as.alternatives = FALSE, ...) {
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1494	(if (as.alternatives) {
				1495	corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) \|>
Marc Kupietz	ea34b81	2025-06-25 15:49:00 +0200	[diff] [blame]	1496	group_by(vc) \|>
Marc Kupietz	71d6e05	2019-11-22 18:42:10 +0100	[diff] [blame]	1497	mutate(total = sum(totalResults))
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1498	} else {
				1499	corpusQuery(kco, query, vc, metadataOnly = TRUE, as.df = TRUE, ...) \|>
				1500	mutate(total = corpusStats(kco, vc = vc, as.df = TRUE)$tokens)
Marc Kupietz	ea34b81	2025-06-25 15:49:00 +0200	[diff] [blame]	1501	}) \|>
Marc Kupietz	0c29cea	2019-10-09 08:44:36 +0200	[diff] [blame]	1502	ci(conf.level = conf.level)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1503	}
				1504	)
Marc Kupietz	3f57528	2019-10-04 14:46:04 +0200	[diff] [blame]	1505
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1506	#' buildWebUIRequestUrlFromString
				1507	#'
				1508	#' @rdname KorAPQuery-class
				1509	#' @importFrom urltools url_encode
				1510	#' @export
				1511	buildWebUIRequestUrlFromString <- function(KorAPUrl,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1512	query,
				1513	vc = "",
				1514	ql = "poliqarp") {
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1515	if ("KorAPConnection" %in% class(KorAPUrl)) {
				1516	KorAPUrl <- KorAPUrl@KorAPUrl
				1517	}
				1518
				1519	request <-
				1520	paste0(
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1521	"?q=",
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1522	urltools::url_encode(enc2utf8(as.character(query))),
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1523	ifelse(vc != "",
				1524	paste0("&cq=", urltools::url_encode(enc2utf8(vc))),
				1525	""
				1526	),
				1527	"&ql=",
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1528	ql
				1529	)
				1530	paste0(KorAPUrl, request)
				1531	}
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1532
				1533	#' buildWebUIRequestUrl
				1534	#'
				1535	#' @rdname KorAPQuery-class
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	1536	#' @importFrom httr2 url_parse
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1537	#' @export
				1538	buildWebUIRequestUrl <- function(kco,
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1539	query = if (missing(KorAPUrl)) {
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1540	stop("At least one of the parameters query and KorAPUrl must be specified.", call. = FALSE)
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1541	} else {
				1542	httr2::url_parse(KorAPUrl)$query$q
				1543	},
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	1544	vc = if (missing(KorAPUrl)) "" else httr2::url_parse(KorAPUrl)$query$cq,
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1545	KorAPUrl,
Marc Kupietz	f912959	2025-01-26 19:17:54 +0100	[diff] [blame]	1546	ql = if (missing(KorAPUrl)) "poliqarp" else httr2::url_parse(KorAPUrl)$query$ql) {
Marc Kupietz	38a9d68	2024-12-06 16:17:09 +0100	[diff] [blame]	1547	buildWebUIRequestUrlFromString(kco@KorAPUrl, query, vc, ql)
Marc Kupietz	dbd431a	2021-08-29 12:17:45 +0200	[diff] [blame]	1548	}
				1549
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1550	#' format()
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1551	#' @rdname KorAPQuery-class
				1552	#' @param x KorAPQuery object
				1553	#' @param ... further arguments passed to or from other methods
Marc Kupietz	b73ca0f	2025-01-28 20:45:01 +0100	[diff] [blame]	1554	#' @importFrom urltools param_get url_decode
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1555	#' @export
				1556	format.KorAPQuery <- function(x, ...) {
				1557	cat("<KorAPQuery>\n")
				1558	q <- x
Marc Kupietz	d885122	2025-05-01 10:57:19 +0200	[diff] [blame]	1559	param <- urltools::param_get(q@request) \|> lapply(urltools::url_decode)
Marc Kupietz	b73ca0f	2025-01-28 20:45:01 +0100	[diff] [blame]	1560	cat(" Query: ", param$q, "\n")
				1561	if (!is.null(param$cq) && param$cq != "") {
				1562	cat(" Virtual corpus: ", param$cq, "\n")
				1563	}
				1564	if (!is.null(q@collectedMatches)) {
				1565	cat("==============================================================================================================", "\n")
				1566	print(summary(q@collectedMatches))
				1567	cat("==============================================================================================================", "\n")
				1568	}
				1569	cat(" Total results: ", q@totalResults, "\n")
				1570	cat(" Fetched results: ", q@nextStartIndex, "\n")
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1571	if (!is.null(q@collectedMatches) && "pos" %in% colnames(q@collectedMatches)) {
				1572	successful_annotations <- sum(!is.na(q@collectedMatches$annotation_snippet))
				1573	parsed_annotations <- sum(!is.na(q@collectedMatches$pos))
				1574	cat(" Annotations: ", successful_annotations, " of ", nrow(q@collectedMatches), " matches")
				1575	if (parsed_annotations > 0) {
				1576	cat(" (", parsed_annotations, " with parsed linguistic data)")
				1577	}
				1578	cat("\n")
Marc Kupietz	e52b295	2025-07-17 16:53:02 +0200	[diff] [blame]	1579	}
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	1580	}
				1581
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1582	#' show()
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	1583	#'
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1584	#' @rdname KorAPQuery-class
				1585	#' @param object KorAPQuery object
Marc Kupietz	62da2b5	2019-09-12 17:43:34 +0200	[diff] [blame]	1586	#' @export
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1587	setMethod("show", "KorAPQuery", function(object) {
				1588	format(object)
Marc Kupietz	c643a12	2025-07-18 18:18:36 +0200	[diff] [blame]	1589	invisible(object)
Marc Kupietz	e95108e	2019-09-18 13:23:58 +0200	[diff] [blame]	1590	})