CA: add some more info on how missing values are imputed

Change-Id: I0268e5ad26d91300e425588c9f07eb32d45171d1
diff --git a/R/collocationAnalysis.R b/R/collocationAnalysis.R
index 69d2208..f373104 100644
--- a/R/collocationAnalysis.R
+++ b/R/collocationAnalysis.R
@@ -42,7 +42,7 @@
 #' @param threshold              minimum value of `thresholdScore` function call to apply collocation analysis recursively
 #' @param localStopwords         vector of stopwords that will not be considered as collocates in the current function call, but that will not be passed to recursive calls
 #' @param collocateFilterRegex   allow only collocates matching the regular expression
-#' @param missingScoreQuantile   lower quantile (evaluated per association measure) that anchors the adaptive floor used for imputing missing scores between virtual corpora
+#' @param missingScoreQuantile   lower quantile (evaluated per association measure) that anchors the adaptive floor used for imputing missing scores between virtual corpora; a robust spread is subtracted from this anchor so the imputed values stay below the weakest observed scores
 #' @param vcLabel optional label override for the current virtual corpus (used internally when named VC collections are expanded)
 #' @param ...                    more arguments will be passed to [collocationScoreQuery()]
 #' @inheritParams collocationScoreQuery,KorAPConnection-method
@@ -331,12 +331,14 @@
   }
 
   compute_score_floor <- function(values) {
+    # Estimate a conservative floor so missing scores can be imputed without favoring any label
     finite_values <- values[is.finite(values)]
     if (length(finite_values) == 0) {
       return(0)
     }
 
     prob <- min(max(missingScoreQuantile, 0), 0.5)
+    # Use a lower quantile as the anchor to stay near the weakest attested scores
     q_val <- suppressWarnings(stats::quantile(finite_values,
       probs = prob,
       names = FALSE,
@@ -368,6 +370,7 @@
       spread <- max(abs(q_val), abs(min_val), 1e-06)
     }
 
+    # Step away from the anchor by a robust spread estimate to avoid ties with real scores
     candidate <- q_val - spread
     if (!is.finite(candidate)) {
       candidate <- min_val
diff --git a/man/collocationAnalysis-KorAPConnection-method.Rd b/man/collocationAnalysis-KorAPConnection-method.Rd
index 9c97275..a912430 100644
--- a/man/collocationAnalysis-KorAPConnection-method.Rd
+++ b/man/collocationAnalysis-KorAPConnection-method.Rd
@@ -75,7 +75,7 @@
 
 \item{collocateFilterRegex}{allow only collocates matching the regular expression}
 
-\item{missingScoreQuantile}{lower quantile (evaluated per association measure) that anchors the adaptive floor used for imputing missing scores between virtual corpora}
+\item{missingScoreQuantile}{lower quantile (evaluated per association measure) that anchors the adaptive floor used for imputing missing scores between virtual corpora; a robust spread is subtracted from this anchor so the imputed values stay below the weakest observed scores}
 
 \item{vcLabel}{optional label override for the current virtual corpus (used internally when named VC collections are expanded)}