CA: fix score threshold in recursive CA
Change-Id: I439a29559fab741bbb80c23591d1c64e99d1b0b6
diff --git a/R/collocationAnalysis.R b/R/collocationAnalysis.R
index ab8d2f8f..4344d1e 100644
--- a/R/collocationAnalysis.R
+++ b/R/collocationAnalysis.R
@@ -38,8 +38,8 @@
#' @param expand if TRUE, `node` and `vc` parameters are expanded to all of their combinations
#' @param maxRecurse apply collocation analysis recursively `maxRecurse` times
#' @param addExamples If TRUE, examples for instances of collocations will be added in a column `example`. This makes a difference in particular if `node` is given as a lemma query.
-#' @param thresholdScore association score function (see \code{\link{association-score-functions}}) to use for computing the threshold that is applied for recursive collocation analysis calls
-#' @param threshold minimum value of `thresholdScore` function call to apply collocation analysis recursively
+#' @param thresholdScore association score function (see \code{\link{association-score-functions}}) to use for computing the threshold that is applied for recursive collocation analysis calls (only applied when \code{maxRecurse > 0})
+#' @param threshold minimum value of `thresholdScore` function call to apply collocation analysis recursively (only applied when \code{maxRecurse > 0})
#' @param localStopwords vector of stopwords that will not be considered as collocates in the current function call, but that will not be passed to recursive calls
#' @param collocateFilterRegex allow only collocates matching the regular expression
#' @param queryMissingScores if TRUE, attempt to retrieve corpus-based association scores for vc/collocate combinations that would otherwise be imputed, by re-querying the KorAP backend without applying the collocate frequency threshold
@@ -289,10 +289,19 @@
missingScoreQuantile = missingScoreQuantile,
collocateFilterRegex = collocateFilterRegex,
queryMissingScores = queryMissingScores,
+ thresholdScore = thresholdScore,
+ threshold = threshold,
vcLabel = vcLabel
) |>
- bind_rows(result) |>
- filter(logDice >= 2) |>
+ bind_rows(result)
+
+ if (threshold_col %in% names(result)) {
+ threshold_values <- result[[threshold_col]]
+ keep_idx <- is.na(threshold_values) | threshold_values >= threshold
+ result <- result[keep_idx, , drop = FALSE]
+ }
+
+ result <- result |>
filter(O >= minOccur) |>
dplyr::arrange(dplyr::desc(logDice))
}