Add full collocation analysis (client side only implementation)
Resolves #2
Change-Id: Ib01d89a72b44ff06816b21532b7ea709a4e837b0
diff --git a/man/KorAPQuery-class.Rd b/man/KorAPQuery-class.Rd
index b3a0edf..e5cf68a 100644
--- a/man/KorAPQuery-class.Rd
+++ b/man/KorAPQuery-class.Rd
@@ -5,6 +5,8 @@
\alias{KorAPQuery-class}
\alias{KorAPQuery}
\alias{initialize,KorAPQuery-method}
+\alias{corpusQuery,KorAPConnection-method}
+\alias{corpusQuery}
\alias{fetchNext,KorAPQuery-method}
\alias{fetchNext}
\alias{fetchAll,KorAPQuery-method}
@@ -13,10 +15,9 @@
\alias{fetchRest}
\alias{frequencyQuery,KorAPConnection-method}
\alias{frequencyQuery}
+\alias{buildWebUIRequestUrl}
\alias{format.KorAPQuery}
\alias{show,KorAPQuery-method}
-\alias{collocationScoreQuery,KorAPConnection-method}
-\alias{collocationScoreQuery}
\title{Class KorAPQuery}
\usage{
\S4method{initialize}{KorAPQuery}(
@@ -35,16 +36,34 @@
collectedMatches = NULL
)
+\S4method{corpusQuery}{KorAPConnection}(
+ kco,
+ query = if (missing(KorAPUrl))
+ stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
+ FALSE) else httr::parse_url(KorAPUrl)$query$q,
+ vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq,
+ KorAPUrl,
+ metadataOnly = TRUE,
+ ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql,
+ fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
+ "textClass", "snippet"),
+ accessRewriteFatal = TRUE,
+ verbose = kco@verbose,
+ expand = length(vc) != length(query),
+ as.df = FALSE
+)
+
\S4method{fetchNext}{KorAPQuery}(
kqo,
offset = kqo@nextStartIndex,
maxFetch = maxResultsPerPage,
- verbose = kqo@korapConnection@verbose
+ verbose = kqo@korapConnection@verbose,
+ randomizePageOrder = FALSE
)
-\S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose)
+\S4method{fetchAll}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
-\S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose)
+\S4method{fetchRest}{KorAPQuery}(kqo, verbose = kqo@korapConnection@verbose, ...)
\S4method{frequencyQuery}{KorAPConnection}(
kco,
@@ -55,22 +74,23 @@
...
)
+buildWebUIRequestUrl(
+ kco,
+ query = if (missing(KorAPUrl))
+ stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
+ FALSE) else httr::parse_url(KorAPUrl)$query$q,
+ vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq,
+ KorAPUrl,
+ metadataOnly = TRUE,
+ ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql,
+ fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
+ "textClass", "snippet"),
+ accessRewriteFatal = TRUE
+)
+
\method{format}{KorAPQuery}(x, ...)
\S4method{show}{KorAPQuery}(object)
-
-\S4method{collocationScoreQuery}{KorAPConnection}(
- kco,
- node,
- collocate,
- vc = "",
- lemmatizeNodeQuery = FALSE,
- lemmatizeCollocateQuery = FALSE,
- leftContextSize = 5,
- rightContextSize = 5,
- scoreFunctions = defaultAssociationScoreFunctions(),
- smoothingConstant = 0.5
-)
}
\arguments{
\item{.Object}{…}
@@ -85,7 +105,7 @@
\item{nextStartIndex}{at what index to start the next fetch of query results}
-\item{fields}{what data / metadata fields should be collected}
+\item{fields}{(meta)data fields that will be fetched for every match.}
\item{requestUrl}{complete URL of the API request}
@@ -97,67 +117,95 @@
\item{collectedMatches}{matches already fetched from the KorAP-API-server}
+\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
+
+\item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
+
+\item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.}
+
+\item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE, unless the connection is authorized (currently not possible).}
+
+\item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.}
+
+\item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).}
+
+\item{verbose}{print progress information if true}
+
+\item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations}
+
+\item{as.df}{return result as data frame instead of as S4 object?}
+
\item{kqo}{object obtained from \code{\link{corpusQuery}}}
\item{offset}{start offset for query results to fetch}
\item{maxFetch}{maximum number of query results to fetch}
-\item{verbose}{print progress information if true}
+\item{randomizePageOrder}{fetch result pages in pseudo random order if true. Use \code{\link{set.seed}} to set seed for reproducible results.}
-\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
-
-\item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
+\item{...}{further arguments passed to or from other methods}
\item{conf.level}{confidence level of the returned confidence interval (passed through \code{\link{ci}} to \code{\link{prop.test}}).}
\item{as.alternatives}{LOGICAL that specifies if the query terms should be treated as alternatives. If \code{as.alternatives} is TRUE, the sum over all query hits, instead of the respective vc token sizes is used as total for the calculation of relative frequencies.}
-\item{...}{further arguments passed to or from other methods}
-
\item{x}{KorAPQuery object}
\item{object}{KorAPQuery object}
-
-\item{node}{target word}
-
-\item{collocate}{collocate of target word}
-
-\item{lemmatizeNodeQuery}{logical, set to TRUE if node query should be lemmatized, i.e. x -> [tt/l=x]}
-
-\item{lemmatizeCollocateQuery}{logical, set to TRUE if collocate query should be lemmatized, i.e. x -> [tt/l=x]}
-
-\item{leftContextSize}{size of the left context window}
-
-\item{rightContextSize}{size of the right context window}
-
-\item{scoreFunctions}{named list of score functions of the form function(O1, O2, O, N, E, window_size), see e.g. \link{pmi}}
-
-\item{smoothingConstant}{smoothing constant will be added to all observed values}
}
\value{
-The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches}
+Depending on the \code{as.df} parameter, a table or a \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
+A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
+Please make sure to check \code{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
-tibble with query KorAP web request URL, all observed values and association scores
+The \code{kqo} input object with updated slots \code{collectedMatches}, \code{apiResponse}, \code{nextStartIndex}, \code{hasMoreMatches}
}
\description{
This class provides methods to perform different kinds of queries on the KorAP API server.
\code{KorAPQuery} objects, which are typically created by the \code{\link{corpusQuery}} method,
represent the current state of a query to a KorAP server.
+\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
+
\bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
-\bold{\code{fetchAll}} fetches allf results of a KorAP query.
+\bold{\code{fetchAll}} fetches all results of a KorAP query.
\bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
\code{\link{ci}} to compute a table with the relative frequencies and
confidence intervals of one ore multiple search terms across one or multiple
virtual corpora.
-
-\bold{\code{collocationScoreQuery}} computes various collocation association scores
-based on \code{\link{frequencyQuery}}s for a target word and a collocate.
}
\examples{
+# Fetch metadata of every query hit for "Ameisenplage" and show a summary
+\donttest{
+new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll()
+}
+
+# Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
+# and show the number of query hits (but don't fetch them).
+
+new("KorAPConnection", verbose = TRUE) \%>\%
+ corpusQuery(KorAPUrl =
+ "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp")
+
+# Plot the time/frequency curve of "Ameisenplage"
+\donttest{
+new("KorAPConnection", verbose=TRUE) \%>\%
+ { . ->> kco } \%>\%
+ corpusQuery("Ameisenplage") \%>\%
+ fetchAll() \%>\%
+ slot("collectedMatches") \%>\%
+ mutate(year = lubridate::year(pubDate)) \%>\%
+ dplyr::select(year) \%>\%
+ group_by(year) \%>\%
+ summarise(Count = dplyr::n()) \%>\%
+ mutate(Freq = mapply(function(f, y)
+ f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\%
+ dplyr::select(-Count) \%>\%
+ complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\%
+ plot(type = "l")
+}
\donttest{q <- new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchNext()
q@collectedMatches
}
@@ -177,29 +225,12 @@
frequencyQuery(c("Mücke", "Schnake"), paste0("pubDate in ", 2000:2003))
}
-\donttest{
-new("KorAPConnection", verbose = TRUE) \%>\%
- collocationScoreQuery("Grund", "triftiger")
-}
-
-\donttest{
-new("KorAPConnection", verbose = TRUE) \%>\%
-collocationScoreQuery("Grund", c("guter", "triftiger"),
- scoreFunctions = list(localMI = function(O1, O2, O, N, E, window_size) { O * log2(O/E) }) )
-}
-
-\donttest{
-library(highcharter)
-library(tidyr)
-new("KorAPConnection", verbose = TRUE) \%>\%
- collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)),
- lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\%
- pivot_longer(14:last_col(), names_to = "measure", values_to = "score") \%>\%
- hchart(type="spline", hcaes(label, score, group=measure)) \%>\%
- hc_add_onclick_korap_search()
-}
-
}
\references{
\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
+
+\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
+}
+\seealso{
+\code{\link{KorAPConnection}}, \code{\link{fetchNext}}, \code{\link{fetchRest}}, \code{\link{fetchAll}}, \code{\link{corpusStats}}
}
diff --git a/man/association-score-functions.Rd b/man/association-score-functions.Rd
index 7517561..fda531a 100644
--- a/man/association-score-functions.Rd
+++ b/man/association-score-functions.Rd
@@ -78,4 +78,11 @@
Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
Free PDF available from \url{http://purl.org/stefan.evert/PUB/Evert2004phd.pdf}
}
+\seealso{
+Other collocation analysis functions:
+\code{\link{collocationAnalysis,KorAPConnection-method}},
+\code{\link{collocationScoreQuery,KorAPConnection-method}},
+\code{\link{synsemanticStopwords}()}
+}
\concept{association-score-functions}
+\concept{collocation analysis functions}
diff --git a/man/collocationAnalysis-KorAPConnection-method.Rd b/man/collocationAnalysis-KorAPConnection-method.Rd
new file mode 100644
index 0000000..bcfe99e
--- /dev/null
+++ b/man/collocationAnalysis-KorAPConnection-method.Rd
@@ -0,0 +1,107 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/collocationAnalysis.R
+\name{collocationAnalysis,KorAPConnection-method}
+\alias{collocationAnalysis,KorAPConnection-method}
+\alias{collocationAnalysis}
+\title{Collocation analysis}
+\usage{
+\S4method{collocationAnalysis}{KorAPConnection}(
+ kco,
+ node,
+ vc = "",
+ lemmatizeNodeQuery = FALSE,
+ minOccur = 5,
+ leftContextSize = 5,
+ rightContextSize = 5,
+ topCollocatesLimit = 200,
+ searchHitsSampleLimit = 20000,
+ ignoreCollocateCase = FALSE,
+ withinSpan = ifelse(exactFrequencies, "base/s=s", ""),
+ exactFrequencies = TRUE,
+ stopwords = RKorAPClient::synsemanticStopwords(),
+ seed = 7,
+ expand = length(vc) != length(node),
+ ...
+)
+}
+\arguments{
+\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
+
+\item{node}{target word}
+
+\item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.}
+
+\item{lemmatizeNodeQuery}{if TRUE, node query will be lemmatized, i.e. x -> [tt/l=x]}
+
+\item{minOccur}{minimum absolute number of observed co-occurrences to consider a collocate candidate}
+
+\item{leftContextSize}{size of the left context window}
+
+\item{rightContextSize}{size of the right context window}
+
+\item{topCollocatesLimit}{limit analysis to the n most frequent collocates in the search hits sample}
+
+\item{searchHitsSampleLimit}{limit the size of the search hits sample}
+
+\item{ignoreCollocateCase}{logical, set to TRUE if collocate case should be ignored}
+
+\item{withinSpan}{KorAP span specification for collocations to be searched within}
+
+\item{exactFrequencies}{if FALSE, extrapolate observed co-occurrence frequencies from frequencies in search hits sample, otherwise retrieve exact co-occurrence frequencies}
+
+\item{stopwords}{vector of stopwords not to be considered as collocates}
+
+\item{seed}{seed for random page collecting order}
+
+\item{expand}{if TRUE, \code{node} and \code{vc} parameters are expanded to all of their combinations}
+
+\item{...}{more arguments will be passed to \code{\link{collocationScoreQuery}}}
+}
+\value{
+Tibble with top collocates, association scores, corresponding URLs for web user interface queries, etc.
+}
+\description{
+\Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
+
+Performs a collocation analysis for the given node (or query)
+in the given virtual corpus.
+}
+\details{
+The collocation analysis is currently implemented on the client side, as some of the
+functionality is not yet provided by the KorAP backend. Mainly for this reason
+it is very slow (several minutes, up to hours), but on the other hand very flexible.
+You can, for example, perform the analysis in arbitrary virtual corpora, use complex node queries,
+and look for expression-internal collocates using the focus function (see examples and demo).
+
+To increase speed at the cost of accuracy and possible false negatives,
+you can decrease searchHitsSampleLimit and/or topCollocatesLimit and/or set exactFrequencies to FALSE.
+
+Note that currently not the tokenization provided by the backend, i.e. the corpus itself, is used, but a tinkered one.
+This can also lead to false negatives and to frequencies that differ from corresponding ones acquired via the web
+user interface.
+}
+\examples{
+\donttest{
+ # Find top collocates of "Packung" inside and outside the sports domain.
+ new("KorAPConnection", verbose = TRUE) \%>\%
+ collocationAnalysis("Packung", vc=c("textClass=sport", "textClass!=sport"),
+ leftContextSize=1, rightContextSize=1, topCollocatesLimit=20) \%>\%
+ dplyr::filter(logDice >= 5)
+}
+
+\donttest{
+# Identify the most prominent light verb construction with "in ... setzen".
+# Note that, currently, the use of focus function disallows exactFrequencies.
+new("KorAPConnection", verbose = TRUE) \%>\%
+ collocationAnalysis("focus(in [tt/p=NN] {[tt/l=setzen]})",
+ leftContextSize=1, rightContextSize=0, exactFrequencies=FALSE, topCollocatesLimit=20)
+}
+
+}
+\seealso{
+Other collocation analysis functions:
+\code{\link{association-score-functions}},
+\code{\link{collocationScoreQuery,KorAPConnection-method}},
+\code{\link{synsemanticStopwords}()}
+}
+\concept{collocation analysis functions}
diff --git a/man/collocationScoreQuery-KorAPConnection-method.Rd b/man/collocationScoreQuery-KorAPConnection-method.Rd
new file mode 100644
index 0000000..605caf4
--- /dev/null
+++ b/man/collocationScoreQuery-KorAPConnection-method.Rd
@@ -0,0 +1,88 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/collocationScoreQuery.R
+\name{collocationScoreQuery,KorAPConnection-method}
+\alias{collocationScoreQuery,KorAPConnection-method}
+\alias{collocationScoreQuery}
+\title{Query frequencies of a node and a collocate and calculate collocation association scores}
+\usage{
+\S4method{collocationScoreQuery}{KorAPConnection}(
+ kco,
+ node,
+ collocate,
+ vc = "",
+ lemmatizeNodeQuery = FALSE,
+ lemmatizeCollocateQuery = FALSE,
+ leftContextSize = 5,
+ rightContextSize = 5,
+ scoreFunctions = defaultAssociationScoreFunctions(),
+ smoothingConstant = 0.5,
+ observed = NA,
+ ignoreCollocateCase = FALSE,
+ withinSpan = "base/s=s"
+)
+}
+\arguments{
+\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
+
+\item{node}{target word}
+
+\item{collocate}{collocate of target word}
+
+\item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.}
+
+\item{lemmatizeNodeQuery}{logical, set to TRUE if node query should be lemmatized, i.e. x -> [tt/l=x]}
+
+\item{lemmatizeCollocateQuery}{logical, set to TRUE if collocate query should be lemmatized, i.e. x -> [tt/l=x]}
+
+\item{leftContextSize}{size of the left context window}
+
+\item{rightContextSize}{size of the right context window}
+
+\item{scoreFunctions}{named list of score functions of the form function(O1, O2, O, N, E, window_size), see e.g. \link{pmi}}
+
+\item{smoothingConstant}{smoothing constant will be added to all observed values}
+
+\item{observed}{if collocation frequencies are already known (or estimated from a sample) they can be passed as a vector here, otherwise: NA}
+
+\item{ignoreCollocateCase}{logical, set to TRUE if collocate case should be ignored}
+
+\item{withinSpan}{KorAP span specification for collocations to be searched within}
+}
+\value{
+tibble with query KorAP web request URL, all observed values and association scores
+}
+\description{
+Computes various collocation association scores
+based on \code{\link{frequencyQuery}}s for a target word and a collocate.
+}
+\examples{
+\donttest{
+new("KorAPConnection", verbose = TRUE) \%>\%
+ collocationScoreQuery("Grund", "triftiger")
+}
+
+\donttest{
+new("KorAPConnection", verbose = TRUE) \%>\%
+collocationScoreQuery("Grund", c("guter", "triftiger"),
+ scoreFunctions = list(localMI = function(O1, O2, O, N, E, window_size) { O * log2(O/E) }) )
+}
+
+\donttest{
+library(highcharter)
+library(tidyr)
+new("KorAPConnection", verbose = TRUE) \%>\%
+ collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)),
+ lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\%
+ pivot_longer(14:last_col(), names_to = "measure", values_to = "score") \%>\%
+ hchart(type="spline", hcaes(label, score, group=measure)) \%>\%
+ hc_add_onclick_korap_search()
+}
+
+}
+\seealso{
+Other collocation analysis functions:
+\code{\link{association-score-functions}},
+\code{\link{collocationAnalysis,KorAPConnection-method}},
+\code{\link{synsemanticStopwords}()}
+}
+\concept{collocation analysis functions}
diff --git a/man/corpusQuery-KorAPConnection-method.Rd b/man/corpusQuery-KorAPConnection-method.Rd
deleted file mode 100644
index 0170ba3..0000000
--- a/man/corpusQuery-KorAPConnection-method.Rd
+++ /dev/null
@@ -1,92 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/KorAPQuery.R
-\name{corpusQuery,KorAPConnection-method}
-\alias{corpusQuery,KorAPConnection-method}
-\alias{corpusQuery}
-\title{\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server}
-\usage{
-\S4method{corpusQuery}{KorAPConnection}(
- kco,
- query = if (missing(KorAPUrl))
- stop("At least one of the parameters query and KorAPUrl must be specified.", call. =
- FALSE) else httr::parse_url(KorAPUrl)$query$q,
- vc = if (missing(KorAPUrl)) "" else httr::parse_url(KorAPUrl)$query$cq,
- KorAPUrl,
- metadataOnly = TRUE,
- ql = if (missing(KorAPUrl)) "poliqarp" else httr::parse_url(KorAPUrl)$query$ql,
- fields = c("corpusSigle", "textSigle", "pubDate", "pubPlace", "availability",
- "textClass", "snippet"),
- accessRewriteFatal = TRUE,
- verbose = kco@verbose,
- expand = length(vc) != length(query),
- as.df = FALSE
-)
-}
-\arguments{
-\item{kco}{\code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}}
-
-\item{query}{string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.}
-
-\item{vc}{string describing the virtual corpus in which the query should be performed. An empty string (default) means the whole corpus, as far as it is license-wise accessible.}
-
-\item{KorAPUrl}{instead of providing the query and vc string parameters, you can also simply copy a KorAP query URL from your browser and use it here (and in \code{KorAPConnection}) to provide all necessary information for the query.}
-
-\item{metadataOnly}{logical that determines whether queries should return only metadata without any snippets. This can also be useful to prevent access rewrites. Note that the default value is TRUE, unless the connection is authorized (currently not possible).}
-
-\item{ql}{string to choose the query language (see \href{https://github.com/KorAP/Kustvakt/wiki/Service:-Search-GET#user-content-parameters}{section on Query Parameters} in the Kustvakt-Wiki for possible values.}
-
-\item{fields}{(meta)data fields that will be fetched for every match.}
-
-\item{accessRewriteFatal}{abort if query or given vc had to be rewritten due to insufficient rights (not yet implemented).}
-
-\item{verbose}{print some info}
-
-\item{expand}{logical that decides if \code{query} and \code{vc} parameters are expanded to all of their combinations}
-
-\item{as.df}{return result as data frame instead of as S4 object?}
-}
-\value{
-Depending on the \code{as.df} parameter, a table or a \code{\link{KorAPQuery}} object that, among other information, contains the total number of results in \code{@totalResults}. The resulting object can be used to fetch all query results (with \code{\link{fetchAll}}) or the next page of results (with \code{\link{fetchNext}}).
-A corresponding URL to be used within a web browser is contained in \code{@webUIRequestUrl}
-Please make sure to check \code{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
-}
-\description{
-\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
-}
-\examples{
-# Fetch metadata of every query hit for "Ameisenplage" and show a summary
-\donttest{
-new("KorAPConnection") \%>\% corpusQuery("Ameisenplage") \%>\% fetchAll()
-}
-
-# Use the copy of a KorAP-web-frontend URL for an API query of "Ameise" in a virtual corpus
-# and show the number of query hits (but don't fetch them).
-
-new("KorAPConnection", verbose = TRUE) \%>\%
- corpusQuery(KorAPUrl =
- "https://korap.ids-mannheim.de/?q=Ameise&cq=pubDate+since+2017&ql=poliqarp")
-
-# Plot the time/frequency curve of "Ameisenplage"
-\donttest{
-new("KorAPConnection", verbose=TRUE) \%>\%
- { . ->> kco } \%>\%
- corpusQuery("Ameisenplage") \%>\%
- fetchAll() \%>\%
- slot("collectedMatches") \%>\%
- mutate(year = lubridate::year(pubDate)) \%>\%
- dplyr::select(year) \%>\%
- group_by(year) \%>\%
- summarise(Count = dplyr::n()) \%>\%
- mutate(Freq = mapply(function(f, y)
- f / corpusStats(kco, paste("pubDate in", y))@tokens, Count, year)) \%>\%
- dplyr::select(-Count) \%>\%
- complete(year = min(year):max(year), fill = list(Freq = 0)) \%>\%
- plot(type = "l")
-}
-}
-\references{
-\url{https://ids-pub.bsz-bw.de/frontdoor/index/index/docId/9026}
-}
-\seealso{
-\code{\link{KorAPConnection}}, \code{\link{fetchNext}}, \code{\link{fetchRest}}, \code{\link{fetchAll}}, \code{\link{corpusStats}}
-}
diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg
new file mode 100644
index 0000000..d1d060e
--- /dev/null
+++ b/man/figures/lifecycle-experimental.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="136" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="136" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h53v20H0z"/><path fill="#fe7d37" d="M53 0h83v20H53z"/><path fill="url(#b)" d="M0 0h136v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><text x="275" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">lifecycle</text><text x="275" y="140" transform="scale(.1)" textLength="430">lifecycle</text><text x="935" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text><text x="935" y="140" transform="scale(.1)" textLength="730">experimental</text></g> </svg>
\ No newline at end of file
diff --git a/man/synsemanticStopwords.Rd b/man/synsemanticStopwords.Rd
new file mode 100644
index 0000000..54b82ad
--- /dev/null
+++ b/man/synsemanticStopwords.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/collocationAnalysis.R
+\name{synsemanticStopwords}
+\alias{synsemanticStopwords}
+\title{Preliminary synsemantic stopwords function}
+\usage{
+synsemanticStopwords(...)
+}
+\arguments{
+\item{...}{future arguments for language detection}
+}
+\value{
+Vector of synsemantic stopwords.
+}
+\description{
+\Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
+
+Preliminary synsemantic stopwords function to be used in collocation analysis.
+}
+\details{
+Currently only suitable for German. See stopwords package for other languages.
+}
+\seealso{
+Other collocation analysis functions:
+\code{\link{association-score-functions}},
+\code{\link{collocationAnalysis,KorAPConnection-method}},
+\code{\link{collocationScoreQuery,KorAPConnection-method}}
+}
+\concept{collocation analysis functions}