CA: add winner columns

Change-Id: Ia6ec4821d08352f2044e8a7101880d75419d995d
diff --git a/R/collocationAnalysis.R b/R/collocationAnalysis.R
index 4f58396..56d7923 100644
--- a/R/collocationAnalysis.R
+++ b/R/collocationAnalysis.R
@@ -306,6 +306,8 @@
   non_score_cols <- c("N", "O", "O1", "O2", "E", "w", "leftContextSize", "rightContextSize", "frequency")
   score_cols <- setdiff(numeric_cols, non_score_cols)
 
+  score_cols <- setdiff(score_cols, "rankWithinLabel")
+
   if (length(score_cols) == 0) {
     return(result)
   }
@@ -331,7 +333,9 @@
       values_fn = dplyr::first
     )
 
-  labels <- make.names(unique(result$label))
+  raw_labels <- unique(result$label)
+  labels <- make.names(raw_labels)
+  label_map <- stats::setNames(raw_labels, labels)
 
   if (length(labels) == 2) {
     fill_scores <- function(x, y) {
@@ -364,6 +368,8 @@
         next
       }
       filled <- fill_scores(comparison[[left_col]], comparison[[right_col]])
+      comparison[[left_col]] <- filled$x
+      comparison[[right_col]] <- filled$y
       comparison[[paste0("delta_", col)]] <- filled$x - filled$y
     }
 
@@ -371,10 +377,86 @@
     right_rank <- paste0("rankWithinLabel_", right_label)
     if (all(c(left_rank, right_rank) %in% names(comparison))) {
       filled_rank <- fill_ranks(comparison[[left_rank]], comparison[[right_rank]])
+      comparison[[left_rank]] <- filled_rank$x
+      comparison[[right_rank]] <- filled_rank$y
       comparison[["delta_rankWithinLabel"]] <- filled_rank$x - filled_rank$y
     }
   }
 
+  for (col in score_cols) {
+    value_cols <- paste0(col, "_", labels)
+    existing <- value_cols %in% names(comparison)
+    if (!any(existing)) {
+      next
+    }
+    value_cols <- value_cols[existing]
+    safe_labels <- labels[existing]
+
+    score_values <- comparison[, value_cols, drop = FALSE]
+
+    winner_label_col <- paste0("winner_", col)
+    winner_value_col <- paste0("winner_", col, "_value")
+    runner_label_col <- paste0("runner_up_", col)
+    runner_value_col <- paste0("runner_up_", col, "_value")
+
+    if (nrow(score_values) == 0) {
+      comparison[[winner_label_col]] <- character(0)
+      comparison[[winner_value_col]] <- numeric(0)
+      comparison[[runner_label_col]] <- character(0)
+      comparison[[runner_value_col]] <- numeric(0)
+      next
+    }
+
+    score_matrix <- as.matrix(score_values)
+
+    winner_labels <- apply(score_matrix, 1, function(row) {
+      row <- as.numeric(row)
+      valid <- which(!is.na(row))
+      if (length(valid) == 0) {
+        return(NA_character_)
+      }
+      ord <- valid[order(row[valid], decreasing = TRUE)]
+      unname(label_map[safe_labels[ord[1]]])
+    })
+    winner_labels <- unname(as.character(winner_labels))
+
+    winner_values <- apply(score_matrix, 1, function(row) {
+      row <- as.numeric(row)
+      if (all(is.na(row))) {
+        return(NA_real_)
+      }
+      max(row, na.rm = TRUE)
+    })
+    winner_values <- unname(as.numeric(winner_values))
+
+    runner_labels <- apply(score_matrix, 1, function(row) {
+      row <- as.numeric(row)
+      valid <- which(!is.na(row))
+      if (length(valid) < 2) {
+        return(NA_character_)
+      }
+      ord <- valid[order(row[valid], decreasing = TRUE)]
+      unname(label_map[safe_labels[ord[2]]])
+    })
+    runner_labels <- unname(as.character(runner_labels))
+
+    runner_values <- apply(score_matrix, 1, function(row) {
+      row <- as.numeric(row)
+      valid <- which(!is.na(row))
+      if (length(valid) < 2) {
+        return(NA_real_)
+      }
+      ord <- valid[order(row[valid], decreasing = TRUE)]
+      row[ord[2]]
+    })
+    runner_values <- unname(as.numeric(runner_values))
+
+    comparison[[winner_label_col]] <- winner_labels
+    comparison[[winner_value_col]] <- winner_values
+    comparison[[runner_label_col]] <- runner_labels
+    comparison[[runner_value_col]] <- runner_values
+  }
+
   dplyr::left_join(result, comparison, by = c("node", "collocate"))
 }
 
diff --git a/tests/testthat/test-collocations.R b/tests/testthat/test-collocations.R
index 448e061..bd86cdf 100644
--- a/tests/testthat/test-collocations.R
+++ b/tests/testthat/test-collocations.R
@@ -102,6 +102,39 @@
   expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) | focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch")
 })
 
+test_that("add_multi_vc_comparisons adds favorite columns", {
+  sample_result <- tibble::tibble(
+    node = c("n", "n"),
+    collocate = c("c", "c"),
+    vc = c("vc1", "vc2"),
+    label = c("A", "B"),
+    N = c(100, 100),
+    O = c(10, 20),
+    O1 = c(50, 50),
+    O2 = c(30, 30),
+    E = c(5, 5),
+    w = c(2, 2),
+    leftContextSize = c(1, 1),
+    rightContextSize = c(1, 1),
+    frequency = c(10, 20),
+    logDice = c(5, 7),
+    pmi = c(2, 3)
+  )
+
+  enriched <- RKorAPClient:::add_multi_vc_comparisons(sample_result, "logDice", 0.9)
+
+  expect_true(all(c(
+    "winner_logDice",
+    "winner_logDice_value",
+    "runner_up_logDice",
+    "runner_up_logDice_value"
+  ) %in% colnames(enriched)))
+
+  expect_true(all(enriched$winner_logDice == "B"))
+  expect_true(all(enriched$runner_up_logDice == "A"))
+  expect_true(all(enriched$winner_logDice_value >= enriched$runner_up_logDice_value))
+})
+
 # New tests for improved coverage of collocationAnalysis.R helper functions
 
 test_that("synsemanticStopwords returns German stopwords", {