blob: 46f604f4ae3a5e8ad1128e7a4dd9da0f08090692 [file] [log] [blame]
Marc Kupietza29f3d42025-07-18 10:14:43 +02001test_that("fetchAnnotations works with valid matches", {
2 skip_if_offline()
3
4 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
5 q <- kco %>%
6 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
7 fetchNext(maxFetch = 2)
8
9 # Skip test if no matches found
10 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
11
12 # Test that structured annotation columns are initially absent
13 expect_false("atokens" %in% colnames(q@collectedMatches))
14 expect_false("pos" %in% colnames(q@collectedMatches))
15
16 # Test that matchID is preserved in collectedMatches
17 expect_true("matchID" %in% colnames(q@collectedMatches))
18 expect_true(all(!is.na(q@collectedMatches$matchID)))
19
20 # Test fetchAnnotations with default foundry
21 q_with_annotations <- fetchAnnotations(q, verbose = FALSE)
22
23 # Check that structured annotation columns are now populated
24 expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
25 expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
26
27 # Check that the structured columns have left/match/right components
28 expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$atokens)))
29 expect_true(all(c("left", "match", "right") %in% names(q_with_annotations@collectedMatches$pos)))
30
31 # Test fetchAnnotations with specific foundry
32 q_with_tt <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
33 expect_true("atokens" %in% colnames(q_with_tt@collectedMatches))
34 expect_true("pos" %in% colnames(q_with_tt@collectedMatches))
35
36 # Test that annotations contain actual content (regression test for URL construction)
37 if (nrow(q_with_tt@collectedMatches) > 0) {
38 # Check that the first match has populated annotation data
39 expect_true(length(q_with_tt@collectedMatches$atokens$left[[1]]) > 0 ||
40 length(q_with_tt@collectedMatches$atokens$match[[1]]) > 0 ||
41 length(q_with_tt@collectedMatches$atokens$right[[1]]) > 0)
42 expect_true(length(q_with_tt@collectedMatches$pos$left[[1]]) > 0 ||
43 length(q_with_tt@collectedMatches$pos$match[[1]]) > 0 ||
44 length(q_with_tt@collectedMatches$pos$right[[1]]) > 0)
45 }
46})
47
48test_that("fetchAnnotations handles empty matches gracefully", {
49 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
50
51 # Create a query object with no collected matches
52 q <- KorAPQuery(
53 korapConnection = kco,
54 collectedMatches = NULL
55 )
56
57 # Should warn and return original object
58 expect_warning(
59 result <- fetchAnnotations(q, verbose = FALSE),
60 "No collected matches found"
61 )
62 expect_identical(result, q)
63})
64
65test_that("fetchAnnotations preserves original object structure", {
66 skip_if_offline()
67
68 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
69 q <- kco %>%
70 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
71 fetchNext(maxFetch = 1)
72
73 # Skip test if no matches found
74 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
75
76 q_original <- q
77 q_with_annotations <- fetchAnnotations(q, verbose = FALSE)
78
79 # Check that all original slots are preserved
80 expect_identical(q_with_annotations@korapConnection, q_original@korapConnection)
81 expect_identical(q_with_annotations@request, q_original@request)
82 expect_identical(q_with_annotations@vc, q_original@vc)
83 expect_identical(q_with_annotations@totalResults, q_original@totalResults)
84
85 # collectedMatches should have additional annotation columns
86 expect_true(nrow(q_with_annotations@collectedMatches) == nrow(q_original@collectedMatches))
87 expect_true(ncol(q_with_annotations@collectedMatches) > ncol(q_original@collectedMatches))
88
89 # Original columns should be preserved
90 original_cols <- colnames(q_original@collectedMatches)
91 expect_true(all(original_cols %in% colnames(q_with_annotations@collectedMatches)))
92
93 # New annotation columns should be present
94 expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
95 expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
96})
97
98test_that("fetchAnnotations returns structured left/match/right format", {
99 skip_if_offline()
100
101 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
102 q <- kco %>%
103 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
104 fetchNext(maxFetch = 1)
105
106 # Skip test if no matches found
107 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
108
109 q_with_annotations <- fetchAnnotations(q, foundry = "tt", verbose = FALSE)
110
111 # Test that structured annotation columns exist
112 expect_true("atokens" %in% colnames(q_with_annotations@collectedMatches))
113 expect_true("pos" %in% colnames(q_with_annotations@collectedMatches))
114
115 # Test the structure of annotation columns
116 atokens <- q_with_annotations@collectedMatches$atokens
117 pos <- q_with_annotations@collectedMatches$pos
118
119 expect_true(is.data.frame(atokens))
120 expect_true(is.data.frame(pos))
121
122 expect_true(all(c("left", "match", "right") %in% names(atokens)))
123 expect_true(all(c("left", "match", "right") %in% names(pos)))
124
125 # Test that each component is a list column
126 expect_true(is.list(atokens$left))
127 expect_true(is.list(atokens$match))
128 expect_true(is.list(atokens$right))
129 expect_true(is.list(pos$left))
130 expect_true(is.list(pos$match))
131 expect_true(is.list(pos$right))
132
133 # Test that the first match has actual data
134 if (nrow(q_with_annotations@collectedMatches) > 0) {
135 # At least one of left/match/right should have content
136 total_tokens <- length(atokens$left[[1]]) + length(atokens$match[[1]]) + length(atokens$right[[1]])
137 expect_true(total_tokens > 0)
138
139 total_pos <- length(pos$left[[1]]) + length(pos$match[[1]]) + length(pos$right[[1]])
140 expect_true(total_pos > 0)
141
142 # Token count should match POS count
143 expect_equal(total_tokens, total_pos)
144
145 # Match tokens should not be empty (since we found a match)
146 expect_true(length(atokens$match[[1]]) > 0)
147 expect_true(length(pos$match[[1]]) > 0)
148 }
149})
150
151test_that("matchID is preserved in collectedMatches", {
152 skip_if_offline()
153
154 kco <- KorAPConnection(verbose = FALSE, cache = FALSE, accessToken = NULL)
155 q <- kco %>%
156 corpusQuery("Test", "pubDate since 2014", metadataOnly = FALSE, fields = c("textSigle", "snippet")) %>%
157 fetchNext(maxFetch = 1)
158
159 # Skip test if no matches found
160 skip_if(is.null(q@collectedMatches) || nrow(q@collectedMatches) == 0, "No matches found for test query")
161
162 # Check that matchID is present and valid
163 expect_true("matchID" %in% colnames(q@collectedMatches))
164 expect_true(all(!is.na(q@collectedMatches$matchID)))
165
166 # Verify matchID format (should contain position information)
167 expect_true(all(grepl("-p\\d+-\\d+", q@collectedMatches$matchID)))
168
169 # Verify that matchStart and matchEnd are correctly extracted from matchID
170 for (i in seq_len(nrow(q@collectedMatches))) {
171 match_id <- q@collectedMatches$matchID[i]
172 positions <- gsub(".*-p(\\d+)-(\\d+).*", "\\1 \\2", match_id)
173 expected_start <- as.integer(stringr::word(positions, 1))
174 expected_end <- as.integer(stringr::word(positions, 2)) - 1
175
176 expect_equal(q@collectedMatches$matchStart[i], expected_start)
177 expect_equal(q@collectedMatches$matchEnd[i], expected_end)
178 }
179})