CA: add measure based ranks Change-Id: I84dacfe9f61e9b65268b241bc8adf82c4cc4adc3

commit: 28a29845b877fb28ee76196b67f585fd89344877 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Sat Oct 18 12:25:09 2025 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Sat Oct 18 14:07:27 2025 +0200
tree: 3e0b032275070a5fe2cfccc40a6ee04da49fa0f2
parent: b2862d4cd478b6ec38f89cb38472dd4b416b3224 [diff] [blame]
diff --git a/tests/testthat/test-collocations.R b/tests/testthat/test-collocations.R
index 914558b..52a2a66 100644
--- a/tests/testthat/test-collocations.R
+++ b/tests/testthat/test-collocations.R

@@ -127,7 +127,28 @@
     "winner_logDice",
     "winner_logDice_value",
     "runner_up_logDice",
-    "runner_up_logDice_value"
+    "runner_up_logDice_value",
+    "max_delta_logDice",
+    "winner_rank_logDice",
+    "winner_rank_logDice_value",
+    "runner_up_rank_logDice",
+    "runner_up_rank_logDice_value",
+    "loser_rank_logDice",
+    "loser_rank_logDice_value",
+    "max_delta_rank_logDice",
+    "winner_rank_pmi",
+    "winner_rank_pmi_value",
+    "runner_up_rank_pmi",
+    "runner_up_rank_pmi_value",
+    "loser_rank_pmi",
+    "loser_rank_pmi_value",
+    "max_delta_rank_pmi",
+    "rank_A_logDice",
+    "rank_B_logDice",
+    "rank_A_pmi",
+    "rank_B_pmi",
+    "delta_rank_logDice",
+    "delta_rank_pmi"
   ) %in% colnames(enriched)))
 
   expect_true(all(enriched$winner_logDice == "B"))
@@ -164,6 +185,82 @@
   expect_equal(enriched$max_delta_logDice[1], 4)
 })
 
+test_that("add_multi_vc_comparisons computes rank deltas", {
+  base_tbl <- tidyr::expand_grid(
+    label = c("A", "B", "C"),
+    collocate = c("c1", "c2", "c3")
+  ) |>
+    dplyr::mutate(
+      node = "n",
+      vc = paste0("vc", label),
+      N = 100,
+      O = 10,
+      O1 = 50,
+      O2 = 40,
+      E = 5,
+      w = 2,
+      leftContextSize = 1,
+      rightContextSize = 1,
+      frequency = 10,
+      logDice = dplyr::case_when(
+        label == "A" & collocate == "c1" ~ 9,
+        label == "A" & collocate == "c2" ~ 6,
+        label == "A" & collocate == "c3" ~ 3,
+        label == "B" & collocate == "c1" ~ 7,
+        label == "B" & collocate == "c2" ~ 9,
+        label == "B" & collocate == "c3" ~ 5,
+        label == "C" & collocate == "c1" ~ 4,
+        label == "C" & collocate == "c2" ~ 6,
+        label == "C" & collocate == "c3" ~ 8,
+        TRUE ~ 0
+      )
+    )
+
+  enriched <- RKorAPClient:::add_multi_vc_comparisons(base_tbl, "logDice", 0.9)
+  target_row <- enriched |>
+    dplyr::filter(collocate == "c1") |>
+    dplyr::slice_head(n = 1)
+
+  expect_equal(target_row$rank_A_logDice, 1)
+  expect_equal(target_row$rank_B_logDice, 2)
+  expect_equal(target_row$rank_C_logDice, 3)
+  expect_equal(target_row$winner_rank_logDice, "A")
+  expect_equal(target_row$winner_rank_logDice_value, 1)
+  expect_equal(target_row$runner_up_rank_logDice, "B")
+  expect_equal(target_row$runner_up_rank_logDice_value, 2)
+  expect_equal(target_row$loser_rank_logDice, "C")
+  expect_equal(target_row$loser_rank_logDice_value, 3)
+  expect_equal(target_row$max_delta_rank_logDice, 2)
+})
+
+test_that("add_multi_vc_comparisons imputes missing ranks for max delta", {
+  sample_result <- tibble::tibble(
+    node = c("n", "n"),
+    collocate = c("c", "c"),
+    vc = c("vc1", "vc2"),
+    label = c("A", "B"),
+    N = c(100, 100),
+    O = c(10, 10),
+    O1 = c(50, 50),
+    O2 = c(30, 30),
+    E = c(5, 5),
+    w = c(2, 2),
+    leftContextSize = c(1, 1),
+    rightContextSize = c(1, 1),
+    frequency = c(10, 10),
+    logDice = c(5, NA)
+  )
+
+  enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result, "logDice", 0.9)
+
+  expect_equal(enriched$rank_A_logDice[1], 1)
+  expect_true(is.na(enriched$rank_B_logDice[1]))
+  expect_equal(enriched$winner_rank_logDice[1], "A")
+  expect_equal(enriched$loser_rank_logDice[1], "B")
+  expect_equal(enriched$loser_rank_logDice_value[1], 2)
+  expect_equal(enriched$max_delta_rank_logDice[1], 1)
+})
+
 # New tests for improved coverage of collocationAnalysis.R helper functions
 
 test_that("synsemanticStopwords returns German stopwords", {
commit	28a29845b877fb28ee76196b67f585fd89344877	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Sat Oct 18 12:25:09 2025 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Sat Oct 18 14:07:27 2025 +0200
tree	3e0b032275070a5fe2cfccc40a6ee04da49fa0f2
parent	b2862d4cd478b6ec38f89cb38472dd4b416b3224 [diff] [blame]