CA: add percentile ranks and documentation Change-Id: I3a0c6aab970db5f4685b03164b20e0489e03799f

commit: 130a2a29eb3b593878dc2ac410e8a2b716d64e01 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Sat Oct 18 16:09:23 2025 +0200
committer: Marc Kupietz <kupietz@ids-mannheim.de> Sun Oct 19 15:02:21 2025 +0200
tree: 3a50e5d226336ac30a7011ceec44a0e74e543919
parent: 9894a37ab8a660bca79e94c5480da5428e357e4f [diff] [blame]
diff --git a/man/collocationAnalysis-KorAPConnection-method.Rd b/man/collocationAnalysis-KorAPConnection-method.Rd
index 6b13db7..9c97275 100644
--- a/man/collocationAnalysis-KorAPConnection-method.Rd
+++ b/man/collocationAnalysis-KorAPConnection-method.Rd

@@ -27,6 +27,8 @@
   threshold = 2,
   localStopwords = c(),
   collocateFilterRegex = "^[:alnum:]+-?[:alnum:]*$",
+  missingScoreQuantile = 0.05,
+  vcLabel = NA_character_,
   ...
 )
 }
@@ -73,10 +75,26 @@
 
 \item{collocateFilterRegex}{allow only collocates matching the regular expression}
 
+\item{missingScoreQuantile}{lower quantile (evaluated per association measure) that anchors the adaptive floor used for imputing missing scores between virtual corpora}
+
+\item{vcLabel}{optional label override for the current virtual corpus (used internally when named VC collections are expanded)}
+
 \item{...}{more arguments will be passed to \code{\link[=collocationScoreQuery]{collocationScoreQuery()}}}
 }
 \value{
-Tibble with top collocates, association scores, corresponding URLs for web user interface queries, etc.
+A tibble where each row represents a candidate collocate for the requested node.
+Columns include (depending on the selected association measures):
+
+\itemize{
+\item \code{node}, \code{collocate}, \code{vc}, \code{label}: identifiers for the query node, collocate, virtual corpus, and optional label.
+\item Frequency and contingency information such as \code{frequency}, \code{O}, \code{O1}, \code{O2}, \code{E}, \code{leftContextSize}, \code{rightContextSize}, and \code{w}.
+\item Association measures (e.g. \code{logDice}, \code{ll}, \code{mi}, ...), one column per requested scorer.
+\item Per-labelled association scores produced by multi-VC comparisons using the pattern \code{<measure>_<label>}.
+\item Ranks per label/measure with the pattern \code{rank_<label>_<measure>} (1 is best) and the corresponding percentile ranks \code{percentile_rank_<label>_<measure>}.
+\item Pairwise contrasts for two-label comparisons, e.g. \code{delta_<measure>}, \code{delta_rank_<measure>}, and \code{delta_percentile_rank_<measure>}.
+\item Summary columns describing the strongest labels per measure (\code{winner_*}, \code{runner_up_*}, \code{loser_*}, and \code{max_delta_*}).
+\item Optional helper columns such as \code{query}, \code{example}, or \code{url} when example retrieval is requested.
+}
 }
 \description{
 Performs a collocation analysis for the given node (or query)
commit	130a2a29eb3b593878dc2ac410e8a2b716d64e01	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Sat Oct 18 16:09:23 2025 +0200
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Sun Oct 19 15:02:21 2025 +0200
tree	3a50e5d226336ac30a7011ceec44a0e74e543919
parent	9894a37ab8a660bca79e94c5480da5428e357e4f [diff] [blame]