blob: 36f28f051a84886a5bf179272f553fa873985cef [file] [log] [blame]
Marc Kupietza29f3d42025-07-18 10:14:43 +02001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/KorAPQuery.R
3\name{fetchAnnotations,KorAPQuery-method}
4\alias{fetchAnnotations,KorAPQuery-method}
5\alias{fetchAnnotations}
6\title{Fetch annotations for all collected matches}
7\usage{
Marc Kupietz2baf5c52025-09-05 16:41:11 +02008\S4method{fetchAnnotations}{KorAPQuery}(kqo, foundry = "tt", overwrite = FALSE, verbose = kqo@korapConnection@verbose)
Marc Kupietza29f3d42025-07-18 10:14:43 +02009}
10\arguments{
11\item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}} with collected matches. Note: the original corpus query should have \code{metadataOnly = FALSE} for annotation parsing to work.}
12
13\item{foundry}{string specifying the foundry to use for annotations (default: "tt" for Tree-Tagger)}
14
Marc Kupietz93787d52025-09-03 13:33:25 +020015\item{overwrite}{logical; if TRUE, re-fetch and replace any existing
16annotation columns. If FALSE (default), only add missing annotation layers
17and preserve already fetched ones (e.g., keep POS/lemma from a previous
18foundry while adding morph from another).}
19
Marc Kupietza29f3d42025-07-18 10:14:43 +020020\item{verbose}{print progress information if true}
21}
22\value{
Marc Kupietz89f796e2025-07-19 09:05:06 +020023The updated \code{kqo} object with annotation columns
24like \code{pos}, \code{lemma}, \code{morph} (and \code{atokens} and \code{annotation_snippet})
25in the \verb{@collectedMatches} slot. Each column is a data frame
26with \code{left}, \code{match}, and \code{right} columns containing list vectors of annotations
27for the left context, matched tokens, and right context, respectively.
28The original XML snippet for each match is also stored in \code{annotation_snippet}.
Marc Kupietza29f3d42025-07-18 10:14:43 +020029}
30\description{
Marc Kupietz89f796e2025-07-19 09:05:06 +020031\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
Marc Kupietza29f3d42025-07-18 10:14:43 +020032}
33\details{
Marc Kupietz89f796e2025-07-19 09:05:06 +020034\strong{\code{fetchAnnotations}} fetches annotations (only token annotations, for now)
35for all matches in the \verb{@collectedMatches} slot
36of a KorAPQuery object and adds annotation columns directly to the \verb{@collectedMatches}
37data frame. The method uses the \code{matchID} from collected matches.
38
Marc Kupietza29f3d42025-07-18 10:14:43 +020039\strong{Important}: For copyright-restricted corpora, users must be authorized via \code{\link[=auth]{auth()}}
40and the initial corpus query must have \code{metadataOnly = FALSE} to ensure snippets are
41available for annotation parsing.
42
43The method parses XML snippet annotations and adds linguistic columns to the data frame:
44\itemize{
45\item \code{pos}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of part-of-speech tags
46\item \code{lemma}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of lemmas
47\item \code{morph}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of morphological tags
48\item \code{atokens}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of token text (from annotations)
49\item \code{annotation_snippet}: original XML snippet from the annotation API
50}
51}
52\examples{
53\dontrun{
54
55# Fetch annotations for matches using Tree-Tagger foundry
56# Note: Authorization required for copyright-restricted corpora
57q <- KorAPConnection() |>
58 auth() |>
59 corpusQuery("Ameisenplage", metadataOnly = FALSE) |>
60 fetchNext(maxFetch = 10) |>
61 fetchAnnotations()
62
63# Access linguistic annotations for match i:
64pos_tags <- q@collectedMatches$pos # Data frame with left/match/right columns for POS tags
Marc Kupietz89f796e2025-07-19 09:05:06 +020065lemmas <- q@collectedMatches$lemma # Data frame with left/match/right columns for lemmas
Marc Kupietza29f3d42025-07-18 10:14:43 +020066morphology <- q@collectedMatches$morph # Data frame with left/match/right columns for morphological tags
67atokens <- q@collectedMatches$atokens # Data frame with left/match/right columns for annotation token text
68raw_snippet <- q@collectedMatches$annotation_snippet[[i]] # Original XML snippet for match i
69
70# Access specific components:
71match_pos <- q@collectedMatches$pos$match[[i]] # POS tags for the matched tokens in match i
72left_lemmas <- q@collectedMatches$lemma$left[[i]] # Lemmas for the left context in match i
73right_tokens <- q@collectedMatches$atokens$right[[i]] # Token text for the right context in match i
74
Marc Kupietz89f796e2025-07-19 09:05:06 +020075# Use a different foundry (e.g., MarMoT)
Marc Kupietza29f3d42025-07-18 10:14:43 +020076q <- KorAPConnection() |>
77 auth() |>
78 corpusQuery("Ameisenplage", metadataOnly = FALSE) |>
79 fetchNext(maxFetch = 10) |>
Marc Kupietz89f796e2025-07-19 09:05:06 +020080 fetchAnnotations(foundry = "marmot")
81q@collectedMatches$pos$left[1] # POS tags for the left context of the first match
Marc Kupietza29f3d42025-07-18 10:14:43 +020082}
83}
84\seealso{
85Other corpus search functions:
86\code{\link{corpusQuery,KorAPConnection-method}},
87\code{\link{fetchAll,KorAPQuery-method}},
88\code{\link{fetchNext,KorAPQuery-method}}
89}
Marc Kupietz89f796e2025-07-19 09:05:06 +020090\concept{Annotations}
Marc Kupietza29f3d42025-07-18 10:14:43 +020091\concept{corpus search functions}