blob: b2dc3bff6ac9f5ced5ac52ff1f042d177eaf82d6 [file] [log] [blame]
Marc Kupietza29f3d42025-07-18 10:14:43 +02001% Generated by roxygen2: do not edit by hand
2% Please edit documentation in R/KorAPQuery.R
3\name{fetchAnnotations,KorAPQuery-method}
4\alias{fetchAnnotations,KorAPQuery-method}
5\alias{fetchAnnotations}
6\title{Fetch annotations for all collected matches}
7\usage{
8\S4method{fetchAnnotations}{KorAPQuery}(kqo, foundry = "tt", verbose = kqo@korapConnection@verbose)
9}
10\arguments{
11\item{kqo}{object obtained from \code{\link[=corpusQuery]{corpusQuery()}} with collected matches. Note: the original corpus query should have \code{metadataOnly = FALSE} for annotation parsing to work.}
12
13\item{foundry}{string specifying the foundry to use for annotations (default: "tt" for Tree-Tagger)}
14
15\item{verbose}{print progress information if true}
16}
17\value{
18The updated \code{kqo} object with annotation columns added to \verb{@collectedMatches}
19}
20\description{
21\strong{\code{fetchAnnotations}} fetches annotations for all matches in the \verb{@collectedMatches} slot
22of a KorAPQuery object and adds annotation columns directly to the \verb{@collectedMatches}
23data frame. The method automatically uses the \code{matchID} from collected matches when
24available for safer and more reliable annotation retrieval, falling back to constructing
25URLs from \code{matchStart} and \code{matchEnd} if necessary.
26}
27\details{
28\strong{Important}: For copyright-restricted corpora, users must be authorized via \code{\link[=auth]{auth()}}
29and the initial corpus query must have \code{metadataOnly = FALSE} to ensure snippets are
30available for annotation parsing.
31
32The method parses XML snippet annotations and adds linguistic columns to the data frame:
33\itemize{
34\item \code{pos}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of part-of-speech tags
35\item \code{lemma}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of lemmas
36\item \code{morph}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of morphological tags
37\item \code{atokens}: data frame with \code{left}, \code{match}, \code{right} columns, each containing list vectors of token text (from annotations)
38\item \code{annotation_snippet}: original XML snippet from the annotation API
39}
40}
41\examples{
42\dontrun{
43
44# Fetch annotations for matches using Tree-Tagger foundry
45# Note: Authorization required for copyright-restricted corpora
46q <- KorAPConnection() |>
47 auth() |>
48 corpusQuery("Ameisenplage", metadataOnly = FALSE) |>
49 fetchNext(maxFetch = 10) |>
50 fetchAnnotations()
51
52# Access linguistic annotations for match i:
53pos_tags <- q@collectedMatches$pos # Data frame with left/match/right columns for POS tags
54lemmas <- q@collectedMatches$lemma # Data frame with left/match/right columns for lemmas
55morphology <- q@collectedMatches$morph # Data frame with left/match/right columns for morphological tags
56atokens <- q@collectedMatches$atokens # Data frame with left/match/right columns for annotation token text
57raw_snippet <- q@collectedMatches$annotation_snippet[[i]] # Original XML snippet for match i
58
59# Access specific components:
60match_pos <- q@collectedMatches$pos$match[[i]] # POS tags for the matched tokens in match i
61left_lemmas <- q@collectedMatches$lemma$left[[i]] # Lemmas for the left context in match i
62right_tokens <- q@collectedMatches$atokens$right[[i]] # Token text for the right context in match i
63
64# Use a different foundry (e.g., mate-parser)
65q <- KorAPConnection() |>
66 auth() |>
67 corpusQuery("Ameisenplage", metadataOnly = FALSE) |>
68 fetchNext(maxFetch = 10) |>
69 fetchAnnotations(foundry = "mate")
70q@collectedMatches
71}
72}
73\seealso{
74Other corpus search functions:
75\code{\link{corpusQuery,KorAPConnection-method}},
76\code{\link{fetchAll,KorAPQuery-method}},
77\code{\link{fetchNext,KorAPQuery-method}}
78}
79\concept{corpus search functions}