Blame - man/fetchAnnotations-KorAPQuery-method.Rd - KorAP/RKorAPClient

blob: 99f20e44d7a00fa0659283977305114201a845cd [file] [log] [blame]

Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	1	% Generated by roxygen2: do not edit by hand
				2	% Please edit documentation in R/KorAPQuery.R
				3	\name{fetchAnnotations,KorAPQuery-method}
				4	\alias{fetchAnnotations,KorAPQuery-method}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	5	\title{Fetch annotations for all collected matches}
				6	\usage{
Marc Kupietz	0af7593	2025-09-09 18:14:16 +0200	[diff] [blame]	7	\S4method{fetchAnnotations}{KorAPQuery}(
				8	kqo,
				9	foundry = "tt",
				10	overwrite = FALSE,
				11	verbose = kqo@korapConnection@verbose
				12	)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	13	}
				14	\arguments{
				15	\item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}} with collected matches. Note: the original corpus query should have \code{metadataOnly = FALSE} for annotation parsing to work.}
				16
				17	\item{foundry}{string specifying the foundry to use for annotations (default: "tt" for Tree-Tagger)}
				18
Marc Kupietz	93787d5	2025-09-03 13:33:25 +0200	[diff] [blame]	19	\item{overwrite}{logical; if TRUE, re-fetch and replace any existing
				20	annotation columns. If FALSE (default), only add missing annotation layers
				21	and preserve already fetched ones (e.g., keep POS/lemma from a previous
				22	foundry while adding morph from another).}
				23
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	24	\item{verbose}{print progress information if true}
				25	}
				26	\value{
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	27	The updated \code{kqo} object with annotation columns
				28	like \code{pos}, \code{lemma}, \code{morph} (and \code{atokens} and \code{annotation_snippet})
				29	in the \verb{@collectedMatches} slot. Each column is a data frame
				30	with \code{left}, \code{match}, and \code{right} columns containing list vectors of annotations
				31	for the left context, matched tokens, and right context, respectively.
				32	The original XML snippet for each match is also stored in \code{annotation_snippet}.
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	33	}
				34	\description{
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	35	\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	36	}
				37	\details{
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	38	\strong{\code{fetchAnnotations}} fetches annotations (only token annotations, for now)
				39	for all matches in the \verb{@collectedMatches} slot
				40	of a KorAPQuery object and adds annotation columns directly to the \verb{@collectedMatches}
				41	data frame. The method uses the \code{matchID} from collected matches.
				42
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	43	\strong{Important}: For copyright-restricted corpora, users must be authorized via \code{\link[=auth]{auth()}}
				44	and the initial corpus query must have \code{metadataOnly = FALSE} to ensure snippets are
				45	available for annotation parsing.
				46
				47	The method parses XML snippet annotations and adds linguistic columns to the data frame:
				48	\itemize{
				49	\item \code{pos}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of part-of-speech tags
				50	\item \code{lemma}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of lemmas
				51	\item \code{morph}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of morphological tags
				52	\item \code{atokens}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of token text (from annotations)
				53	\item \code{annotation_snippet}: original XML snippet from the annotation API
				54	}
				55	}
				56	\examples{
				57	\dontrun{
				58
				59	# Fetch annotations for matches using Tree-Tagger foundry
				60	# Note: Authorization required for copyright-restricted corpora
				61	q <- KorAPConnection() \|>
				62	auth() \|>
				63	corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
				64	fetchNext(maxFetch = 10) \|>
				65	fetchAnnotations()
				66
				67	# Access linguistic annotations for match i:
Marc Kupietz	6aa5a0d	2025-09-08 17:51:47 +0200	[diff] [blame]	68	pos_tags <- q@collectedMatches$pos
				69	# Data frame with left/match/right columns for POS tags
				70	lemmas <- q@collectedMatches$lemma
				71	# Data frame with left/match/right columns for lemmas
				72	morphology <- q@collectedMatches$morph
				73	# Data frame with left/match/right columns for morphological tags
				74	atokens <- q@collectedMatches$atokens
				75	# Data frame with left/match/right columns for annotation token text
Marc Kupietz	6aa5a0d	2025-09-08 17:51:47 +0200	[diff] [blame]	76	# Original XML snippet for match i
Marc Kupietz	0af7593	2025-09-09 18:14:16 +0200	[diff] [blame]	77	raw_snippet <- q@collectedMatches$annotation_snippet[[i]]
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	78
				79	# Access specific components:
Marc Kupietz	6aa5a0d	2025-09-08 17:51:47 +0200	[diff] [blame]	80	# POS tags for the matched tokens in match i
Marc Kupietz	0af7593	2025-09-09 18:14:16 +0200	[diff] [blame]	81	match_pos <- q@collectedMatches$pos$match[[i]]
Marc Kupietz	6aa5a0d	2025-09-08 17:51:47 +0200	[diff] [blame]	82	# Lemmas for the left context in match i
Marc Kupietz	0af7593	2025-09-09 18:14:16 +0200	[diff] [blame]	83	left_lemmas <- q@collectedMatches$lemma$left[[i]]
				84	# Token text for the right context in match i
Marc Kupietz	6aa5a0d	2025-09-08 17:51:47 +0200	[diff] [blame]	85	right_tokens <- q@collectedMatches$atokens$right[[i]]
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	86
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	87	# Use a different foundry (e.g., MarMoT)
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	88	q <- KorAPConnection() \|>
				89	auth() \|>
				90	corpusQuery("Ameisenplage", metadataOnly = FALSE) \|>
				91	fetchNext(maxFetch = 10) \|>
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	92	fetchAnnotations(foundry = "marmot")
				93	q@collectedMatches$pos$left[1] # POS tags for the left context of the first match
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	94	}
				95	}
				96	\seealso{
				97	Other corpus search functions:
				98	\code{\link{corpusQuery,KorAPConnection-method}},
				99	\code{\link{fetchAll,KorAPQuery-method}},
				100	\code{\link{fetchNext,KorAPQuery-method}}
				101	}
Marc Kupietz	89f796e	2025-07-19 09:05:06 +0200	[diff] [blame]	102	\concept{Annotations}
Marc Kupietz	a29f3d4	2025-07-18 10:14:43 +0200	[diff] [blame]	103	\concept{corpus search functions}