blob: 99f20e44d7a00fa0659283977305114201a845cd [file] [log] [blame]
Marc Kupietza29f3d42025-07-18 10:14:43 +02001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/KorAPQuery.R
3\name{fetchAnnotations,KorAPQuery-method}
4\alias{fetchAnnotations,KorAPQuery-method}
Marc Kupietza29f3d42025-07-18 10:14:43 +02005\title{Fetch annotations for all collected matches}
6\usage{
Marc Kupietz0af75932025-09-09 18:14:16 +02007\S4method{fetchAnnotations}{KorAPQuery}(
8 kqo,
9 foundry = "tt",
10 overwrite = FALSE,
11 verbose = kqo@korapConnection@verbose
12)
Marc Kupietza29f3d42025-07-18 10:14:43 +020013}
14\arguments{
15\item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}} with collected matches. Note: the original corpus query should have \code{metadataOnly = FALSE} for annotation parsing to work.}
16
17\item{foundry}{string specifying the foundry to use for annotations (default: "tt" for Tree-Tagger)}
18
Marc Kupietz93787d52025-09-03 13:33:25 +020019\item{overwrite}{logical; if TRUE, re-fetch and replace any existing
20annotation columns. If FALSE (default), only add missing annotation layers
21and preserve already fetched ones (e.g., keep POS/lemma from a previous
22foundry while adding morph from another).}
23
Marc Kupietza29f3d42025-07-18 10:14:43 +020024\item{verbose}{print progress information if true}
25}
26\value{
Marc Kupietz89f796e2025-07-19 09:05:06 +020027The updated \code{kqo} object with annotation columns
28like \code{pos}, \code{lemma}, \code{morph} (and \code{atokens} and \code{annotation_snippet})
29in the \verb{@collectedMatches} slot. Each column is a data frame
30with \code{left}, \code{match}, and \code{right} columns containing list vectors of annotations
31for the left context, matched tokens, and right context, respectively.
32The original XML snippet for each match is also stored in \code{annotation_snippet}.
Marc Kupietza29f3d42025-07-18 10:14:43 +020033}
34\description{
Marc Kupietz89f796e2025-07-19 09:05:06 +020035\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
Marc Kupietza29f3d42025-07-18 10:14:43 +020036}
37\details{
Marc Kupietz89f796e2025-07-19 09:05:06 +020038\strong{\code{fetchAnnotations}} fetches annotations (only token annotations, for now)
39for all matches in the \verb{@collectedMatches} slot
40of a KorAPQuery object and adds annotation columns directly to the \verb{@collectedMatches}
41data frame. The method uses the \code{matchID} from collected matches.
42
Marc Kupietza29f3d42025-07-18 10:14:43 +020043\strong{Important}: For copyright-restricted corpora, users must be authorized via \code{\link[=auth]{auth()}}
44and the initial corpus query must have \code{metadataOnly = FALSE} to ensure snippets are
45available for annotation parsing.
46
47The method parses XML snippet annotations and adds linguistic columns to the data frame:
48\itemize{
49\item \code{pos}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of part-of-speech tags
50\item \code{lemma}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of lemmas
51\item \code{morph}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of morphological tags
52\item \code{atokens}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of token text (from annotations)
53\item \code{annotation_snippet}: original XML snippet from the annotation API
54}
55}
56\examples{
57\dontrun{
58
59# Fetch annotations for matches using Tree-Tagger foundry
60# Note: Authorization required for copyright-restricted corpora
61q <- KorAPConnection() |>
62 auth() |>
63 corpusQuery("Ameisenplage", metadataOnly = FALSE) |>
64 fetchNext(maxFetch = 10) |>
65 fetchAnnotations()
66
67# Access linguistic annotations for match i:
Marc Kupietz6aa5a0d2025-09-08 17:51:47 +020068pos_tags <- q@collectedMatches$pos
69# Data frame with left/match/right columns for POS tags
70lemmas <- q@collectedMatches$lemma
71# Data frame with left/match/right columns for lemmas
72morphology <- q@collectedMatches$morph
73# Data frame with left/match/right columns for morphological tags
74atokens <- q@collectedMatches$atokens
75# Data frame with left/match/right columns for annotation token text
Marc Kupietz6aa5a0d2025-09-08 17:51:47 +020076# Original XML snippet for match i
Marc Kupietz0af75932025-09-09 18:14:16 +020077raw_snippet <- q@collectedMatches$annotation_snippet[[i]]
Marc Kupietza29f3d42025-07-18 10:14:43 +020078
79# Access specific components:
Marc Kupietz6aa5a0d2025-09-08 17:51:47 +020080# POS tags for the matched tokens in match i
Marc Kupietz0af75932025-09-09 18:14:16 +020081match_pos <- q@collectedMatches$pos$match[[i]]
Marc Kupietz6aa5a0d2025-09-08 17:51:47 +020082# Lemmas for the left context in match i
Marc Kupietz0af75932025-09-09 18:14:16 +020083left_lemmas <- q@collectedMatches$lemma$left[[i]]
84 # Token text for the right context in match i
Marc Kupietz6aa5a0d2025-09-08 17:51:47 +020085right_tokens <- q@collectedMatches$atokens$right[[i]]
Marc Kupietza29f3d42025-07-18 10:14:43 +020086
Marc Kupietz89f796e2025-07-19 09:05:06 +020087# Use a different foundry (e.g., MarMoT)
Marc Kupietza29f3d42025-07-18 10:14:43 +020088q <- KorAPConnection() |>
89 auth() |>
90 corpusQuery("Ameisenplage", metadataOnly = FALSE) |>
91 fetchNext(maxFetch = 10) |>
Marc Kupietz89f796e2025-07-19 09:05:06 +020092 fetchAnnotations(foundry = "marmot")
93q@collectedMatches$pos$left[1] # POS tags for the left context of the first match
Marc Kupietza29f3d42025-07-18 10:14:43 +020094}
95}
96\seealso{
97Other corpus search functions:
98\code{\link{corpusQuery,KorAPConnection-method}},
99\code{\link{fetchAll,KorAPQuery-method}},
100\code{\link{fetchNext,KorAPQuery-method}}
101}
Marc Kupietz89f796e2025-07-19 09:05:06 +0200102\concept{Annotations}
Marc Kupietza29f3d42025-07-18 10:14:43 +0200103\concept{corpus search functions}