Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 1 | test_that("collocationScoreQuery works", { |
Marc Kupietz | 83d0af3 | 2022-02-24 12:49:28 +0100 | [diff] [blame] | 2 | skip_if_offline() |
Marc Kupietz | fabc239 | 2025-02-07 17:03:06 +0100 | [diff] [blame] | 3 | kco <- new("KorAPConnection", accessToken = NULL, cache = TRUE, verbose = TRUE) |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 4 | df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize=0, rightContextSize=1) |
| 5 | expect_gt(df$logDice, 1) |
| 6 | expect_equal(df$ll, ll(df$O1, df$O2, df$O, df$N, df$E, df$w)) |
| 7 | expect_equal(df$pmi, pmi(df$O1, df$O2, df$O, df$N, df$E, df$w)) |
| 8 | expect_equal(df$mi2, mi2(df$O1, df$O2, df$O, df$N, df$E, df$w)) |
| 9 | expect_equal(df$mi3, mi3(df$O1, df$O2, df$O, df$N, df$E, df$w)) |
| 10 | expect_equal(df$logDice, logDice(df$O1, df$O2, df$O, df$N, df$E, df$w)) |
| 11 | }) |
| 12 | |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 13 | |
| 14 | test_that("collocationAnalysis works and warns about missing token", { |
Marc Kupietz | 83d0af3 | 2022-02-24 12:49:28 +0100 | [diff] [blame] | 15 | skip_if_offline() |
| 16 | kco <- new("KorAPConnection", |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 17 | accessToken = NULL, |
| 18 | verbose = TRUE) |
| 19 | expect_warning( |
| 20 | df <- |
| 21 | collocationAnalysis( |
| 22 | kco, |
| 23 | "focus([tt/p=ADJA] {Newstickeritis})", |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 24 | leftContextSize = 1, |
| 25 | rightContextSize = 0, |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 26 | ), |
| 27 | "access token" |
Marc Kupietz | a3fedbe | 2021-09-04 20:24:03 +0200 | [diff] [blame] | 28 | ) |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 29 | expect_gt(df$O, df$E) |
Marc Kupietz | f912959 | 2025-01-26 19:17:54 +0100 | [diff] [blame] | 30 | expect_gt(df$logDice, -1) |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 31 | }) |
| 32 | |
| 33 | test_that("collocationAnalysis on unaccounted strings does not error out", { |
Marc Kupietz | 83d0af3 | 2022-02-24 12:49:28 +0100 | [diff] [blame] | 34 | skip_if_offline() |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 35 | kco <- new("KorAPConnection", accessToken = NULL, verbose = TRUE) |
| 36 | expect_warning( |
Marc Kupietz | bdb9527 | 2021-12-22 17:42:21 +0100 | [diff] [blame] | 37 | df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc=c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2), |
Marc Kupietz | 581a29b | 2021-09-04 20:51:04 +0200 | [diff] [blame] | 38 | "access token" |
| 39 | ) |
Marc Kupietz | dbd431a | 2021-08-29 12:17:45 +0200 | [diff] [blame] | 40 | testthat::expect_equal(nrow(df), 0) |
| 41 | }) |
Marc Kupietz | d6314b6 | 2021-12-22 12:49:09 +0100 | [diff] [blame] | 42 | |
Marc Kupietz | 9707c75 | 2025-01-08 20:51:35 +0100 | [diff] [blame] | 43 | test_that("removeWithinSpanWorks", { |
Marc Kupietz | d6314b6 | 2021-12-22 12:49:09 +0100 | [diff] [blame] | 44 | expect_equal( |
| 45 | removeWithinSpan("contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))", "base/s=s"), |
| 46 | "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)") |
| 47 | }) |
Marc Kupietz | dbdbb1f | 2025-02-19 10:33:06 +0100 | [diff] [blame^] | 48 | |
| 49 | |
| 50 | test_that("mergeDuplicateCollocatesWorksAsExpected", { |
| 51 | ldf <- tibble( |
| 52 | node = c("focus(in [tt/p=NN] {[tt/l=nehmen]})"), |
| 53 | collocate = c("Anspruch"), |
| 54 | label = c(""), |
| 55 | vc = c(""), |
| 56 | query = c("Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]})"), |
| 57 | webUIRequestUrl = c( |
| 58 | "https://korap.ids-mannheim.de/?q=Anspruch%20focus%28in%20%5btt%2fp%3dNN%5d%20%7b%5btt%2fl%3dnehmen%5d%7d%29&ql=poliqarp" |
| 59 | ), |
| 60 | w = c(1), |
| 61 | leftContextSize = c(1), |
| 62 | rightContextSize = c(0), |
| 63 | N = c(23578528381.5), |
| 64 | O = c(0.5), |
| 65 | O1 = c(1168410.5), |
| 66 | O2 = c(1296870.5), |
| 67 | E = c(64.2651265093014), |
| 68 | pmi = c(11.9173498777957), |
| 69 | mi2 = c(29.8406639214616), |
| 70 | mi3 = c(47.7639779651274), |
| 71 | logDice = c(11.6899933757298), |
| 72 | ll = c(3717716.74208791) |
| 73 | ) |
| 74 | rdf <- tibble( |
| 75 | node = c("focus({[tt/l=nehmen] in} [tt/p=NN])"), |
| 76 | collocate = c("Anspruch"), |
| 77 | label = c(""), |
| 78 | vc = c(""), |
| 79 | query = c("focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch"), |
| 80 | webUIRequestUrl = c( |
| 81 | "https://korap.ids-mannheim.de/?q=focus%28%7b%5btt%2fl%3dnehmen%5d%20in%7d%20%5btt%2fp%3dNN%5d%29%20Anspruch&ql=poliqarp" |
| 82 | ), |
| 83 | w = c(1), |
| 84 | leftContextSize = c(0), |
| 85 | rightContextSize = c(1), |
| 86 | N = c(23578528381.5), |
| 87 | O = c(0.5), |
| 88 | O1 = c(17077.5), |
| 89 | O2 = c(1296870.5), |
| 90 | E = c(0.939299756346416), |
| 91 | pmi = c(7.99469408391783), |
| 92 | mi2 = c(15.8990457079122), |
| 93 | mi3 = c(23.8033973319065), |
| 94 | logDice = c(2.57887487309409), |
| 95 | ll = c(2181.35986032019) |
| 96 | ) |
| 97 | merged <- mergeDuplicateCollocates(ldf, rdf, smoothingConstant = 0.5) |
| 98 | expect_equal(merged$O, 0.5) |
| 99 | expect_equal(merged$O1, 1185487.5) |
| 100 | expect_equal(merged$O2, 1296870.5) |
| 101 | expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) | focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch") |
| 102 | }) |