Restructure documentation Change-Id: I2640e68972cb7c5ed67032a3b575a2aec056f592

commit: a6e4ee6c9271f0c6acc03e5c544e89e6831d7f51 [log] [tgz]
author: Marc Kupietz <kupietz@ids-mannheim.de> Fri Mar 05 09:00:15 2021 +0100
committer: Marc Kupietz <kupietz@ids-mannheim.de> Fri Mar 05 09:34:50 2021 +0100
tree: b024ed506b1bd0d7d541b7087f0540dfbe07a6f6
parent: 5a61f01306e65b032015849638f690cb29022c20 [diff]
diff --git a/man/KorAPQuery-class.Rd b/man/KorAPQuery-class.Rd
index 46eb322..b3a0edf 100644
--- a/man/KorAPQuery-class.Rd
+++ b/man/KorAPQuery-class.Rd

@@ -141,11 +141,14 @@
 tibble with query KorAP web request URL, all observed values and association scores
 }
 \description{
-\code{KorAPQuery} objects represent the current state of a query to a KorAP server.
-New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
+This class provides methods to perform different kinds of queries on the KorAP API server.
+\code{KorAPQuery} objects, which are typically created by the \code{\link{corpusQuery}} method,
+represent the current state of a query to a KorAP server.
 
 \bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
 
+\bold{\code{fetchAll}} fetches allf results of a KorAP query.
+
 \bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
 \code{\link{ci}} to compute a table with the relative frequencies and
 confidence intervals of one ore multiple search terms across one or multiple

diff --git a/man/RKorAPClient-package.Rd b/man/RKorAPClient-package.Rd
new file mode 100644
index 0000000..863b1a6
--- /dev/null
+++ b/man/RKorAPClient-package.Rd

@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RKorAPClient-package.R
+\docType{package}
+\name{RKorAPClient-package}
+\alias{RKorAPClient}
+\alias{RKorAPClient-package}
+\title{RKorAPClient: 'KorAP' Web Service Client Package}
+\description{
+
+A client package that makes the 'KorAP' web service API accessible from R.
+  The corpus analysis platform 'KorAP' has been developed as a scientific tool to make
+  potentially large, stratified and multiply annotated corpora, such as the 'German Reference Corpus DeReKo'
+  or the 'Corpus of the Contemporary Romanian Language CoRoLa', accessible for linguists to let them verify
+  hypotheses and to find interesting patterns in real language use.
+  The 'RKorAPClient' package provides access to 'KorAP' and the corpora behind it for user-created R code,
+  as a programmatic alternative to the 'KorAP' web user-interface.
+  You can learn more about 'KorAP' and use it directly on 'DeReKo' at <https://korap.ids-mannheim.de/>.
+}
+\references{
+Kupietz, Marc / Diewald, Nils / Margaretha, Eliza (2020): RKorAPClient: An R package for accessing the German Reference Corpus DeReKo via KorAP. In: Calzolari, Nicoletta, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri,  Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis (eds.): [Proceedings of The 12th Language Resources and Evaluation Conference (LREC 2020)](http://www.lrec-conf.org/proceedings/lrec2020/LREC-2020.pdf). Marseille: European Language Resources Association (ELRA), 7017-7023. <http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.867.pdf>
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://github.com/KorAP/RKorAPClient/}
+  \item \url{https://korap.ids-mannheim.de/}
+  \item \url{https://www1.ids-mannheim.de/kl/projekte/korap.html}
+  \item Report bugs at \url{https://github.com/KorAP/RKorAPClient/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Marc Kupietz \email{kupietz@ids-mannheim.de}
+
+Other contributors:
+\itemize{
+  \item Nils Diewald \email{diewald@ids-mannheim.de} [contributor]
+  \item Leibniz Institute for the German Language [copyright holder, funder]
+}
+
+}
+\keyword{internal}

diff --git a/man/RKorAPClient.Rd b/man/RKorAPClient.Rd
deleted file mode 100644
index 3a72acc..0000000
--- a/man/RKorAPClient.Rd
+++ /dev/null

@@ -1,13 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RKorAPClient.R
-\docType{package}
-\name{RKorAPClient}
-\alias{RKorAPClient}
-\title{\code{RKorapClient} package}
-\description{
-R package to access the \href{https://github.com/KorAP/}{KorAP} web service API.
-}
-\details{
-See the README.md on
-\href{https://github.com/KorAP/RKorAPClient/}{github}
-}

diff --git a/man/association-score-functions.Rd b/man/association-score-functions.Rd
new file mode 100644
index 0000000..6591f1b
--- /dev/null
+++ b/man/association-score-functions.Rd

@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/association-scores.R
+\name{association-score-functions}
+\alias{association-score-functions}
+\alias{defaultAssociationScoreFunctions}
+\alias{pmi}
+\alias{mi2}
+\alias{mi3}
+\alias{logDice}
+\alias{ll}
+\title{Association score functions}
+\usage{
+defaultAssociationScoreFunctions()
+
+pmi(O1, O2, O, N, E, window_size)
+
+mi2(O1, O2, O, N, E, window_size)
+
+mi3(O1, O2, O, N, E, window_size)
+
+logDice(O1, O2, O, N, E, window_size)
+
+ll(O1, O2, O, N, E, window_size)
+}
+\arguments{
+\item{O1}{observed absolute frequency of node}
+
+\item{O2}{observed absolute frequency of collocate}
+
+\item{O}{observed absolute frequency of collocation}
+
+\item{N}{corpus size}
+
+\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
+
+\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
+}
+\value{
+association score
+}
+\description{
+Functions to calculate different collocation association scores between
+a node (target word) and words in a window around the it.
+The functions are primarily used by \code{\link{collocationScoreQuery}}.
+}
+\examples{
+\donttest{
+new("KorAPConnection", verbose = TRUE) \%>\%
+collocationScoreQuery("Perlen", c("verziertes", "Säue"),
+  scoreFunctions = append(defaultAssociationScoreFunctions(),
+     list(localMI = function(O1, O2, O, N, E, window_size) {
+                       O * log2(O/E)
+                    })))
+}
+
+}
+\references{
+Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.
+
+Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.
+
+Rychlý, Pavel (2008):  A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. <http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf>.
+
+Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.
+
+Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
+Free PDF available from <http://purl.org/stefan.evert/PUB/Evert2004phd.pdf>
+}
+\concept{association-score-functions}

diff --git a/man/ci.Rd b/man/ci.Rd
deleted file mode 100644
index cb51cad..0000000
--- a/man/ci.Rd
+++ /dev/null

@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ci.R
-\name{ci}
-\alias{ci}
-\title{Add confidence interval and relative frequency variables}
-\usage{
-ci(df, x = totalResults, N = total, conf.level = 0.95)
-}
-\arguments{
-\item{df}{table with columns for absolute and total frequencies.}
-
-\item{x}{column with the observed absolute frequency.}
-
-\item{N}{column with the total frequencies}
-
-\item{conf.level}{confidence level of the returned confidence interval. Must
-be a single number between 0 and 1.}
-}
-\description{
-Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
-1. relative frequency (\code{f})
-2. lower bound of a confidence interval (\code{ci.low})
-3. upper bound of a confidence interval
-}
-\examples{
-\donttest{
-library(ggplot2)
-kco <- new("KorAPConnection", verbose=TRUE)
-expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
-  bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
-  mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
-  ci() \%>\%
-  ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
-    geom_point() + geom_line() + geom_ribbon(alpha=.3)
-}
-}
-\seealso{
-\code{ci} is already included in \code{\link{frequencyQuery}}
-}

diff --git a/man/corpusQuery-KorAPConnection-method.Rd b/man/corpusQuery-KorAPConnection-method.Rd
index d3284a6..0170ba3 100644
--- a/man/corpusQuery-KorAPConnection-method.Rd
+++ b/man/corpusQuery-KorAPConnection-method.Rd

@@ -3,7 +3,7 @@
 \name{corpusQuery,KorAPConnection-method}
 \alias{corpusQuery,KorAPConnection-method}
 \alias{corpusQuery}
-\title{Method corpusQuery}
+\title{\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server}
 \usage{
 \S4method{corpusQuery}{KorAPConnection}(
   kco,
@@ -51,7 +51,7 @@
 Please make sure to check \code{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
 }
 \description{
-Perform a corpus query via a connection to a KorAP-API-server.
+\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
 }
 \examples{
 # Fetch metadata of every query hit for "Ameisenplage" and show a summary

diff --git a/man/defaultAssociationScoreFunctions.Rd b/man/defaultAssociationScoreFunctions.Rd
deleted file mode 100644
index 09a87d8..0000000
--- a/man/defaultAssociationScoreFunctions.Rd
+++ /dev/null

@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{defaultAssociationScoreFunctions}
-\alias{defaultAssociationScoreFunctions}
-\title{Default association score functions}
-\usage{
-defaultAssociationScoreFunctions()
-}
-\value{
-list of default association score functions
-}
-\description{
-Default association score functions
-}
-\examples{
-\donttest{
-new("KorAPConnection", verbose = TRUE) \%>\%
-collocationScoreQuery("Perlen", c("verziertes", "Säue"),
-  scoreFunctions = append(associationScoreFunctions(),
-     list(localMI = function(O1, O2, O, N, E, window_size) {
-                       O * log2(O/E)
-                    })))
-}
-
-}
-\seealso{
-Other association-score-functions: 
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}

diff --git a/man/geom_freq_by_year_ci.Rd b/man/geom_freq_by_year_ci.Rd
deleted file mode 100644
index ef53526..0000000
--- a/man/geom_freq_by_year_ci.Rd
+++ /dev/null

@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{geom_freq_by_year_ci}
-\alias{geom_freq_by_year_ci}
-\title{Experimental: Plot frequency by year graphs with confidence intervals}
-\usage{
-geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
-}
-\arguments{
-\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
-
-\item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.}
-}
-\description{
-Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
-\bold{Warning:} This function may be moved to a new package.
-}
-\examples{
-library(ggplot2)
-kco <- new("KorAPConnection", verbose=TRUE)
-\donttest{
-expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
-            year = (2005:2011)) \%>\%
-  cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
-                            paste0(.$condition," & pubDate in ", .$year)))  \%>\%
-  ipm() \%>\%
-  ggplot(aes(year, ipm, fill = condition, color = condition)) +
-  geom_freq_by_year_ci()
-}
-}

diff --git a/man/ggplotly.Rd b/man/ggplotly.Rd
deleted file mode 100644
index 9b09c25..0000000
--- a/man/ggplotly.Rd
+++ /dev/null

@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{ggplotly}
-\alias{ggplotly}
-\title{Experimental: Convert ggplot2 to plotly with hyperlinks to KorAP queries}
-\usage{
-ggplotly(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...)
-}
-\arguments{
-\item{p}{a ggplot object.}
-
-\item{tooltip}{a character vector specifying which aesthetic mappings to show
-in the tooltip. If you want hyperlinks to KorAP queries you need to include
-\code{"url"} here.}
-
-\item{...}{Other arguments passed to \code{plotly::ggplotly}}
-}
-\description{
-\code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly
-object with hyperlinks from data points to corresponding KorAP queries.
-\bold{Warning:} This function may be moved to a new package.
-}
-\examples{
-library(ggplot2)
-kco <- new("KorAPConnection", verbose=TRUE)
-\donttest{year <- (2003:2011)}\dontshow{year <- c(2005)}
-\donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")}
-g <- expand_grid(condition, year) \%>\%
-  cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
-                       paste0(.$condition," & pubDate in ", .$year)))  \%>\%
-  ipm() \%>\%
-  ggplot(aes(year, ipm, fill = condition, color = condition)) +
-  ##  theme_light(base_size = 20) +
-  geom_freq_by_year_ci()
-p <- ggplotly(g)
-print(p)
-## saveWidget(p, paste0(tmpdir(), "heuschrecke.html")
-
-
-}

diff --git a/man/hc_add_onclick_korap_search.Rd b/man/hc_add_onclick_korap_search.Rd
deleted file mode 100644
index 4e3ac06..0000000
--- a/man/hc_add_onclick_korap_search.Rd
+++ /dev/null

@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/highcharter-helper.R
-\name{hc_add_onclick_korap_search}
-\alias{hc_add_onclick_korap_search}
-\title{Add KorAP search click events to highchart}
-\usage{
-hc_add_onclick_korap_search(hc)
-}
-\arguments{
-\item{hc}{highchart}
-}
-\description{
-Adds on-click events to data points of highcarts that were constructed with
-\ref{frequencyQuery} or ref \ref{collocationScoreQuery}. Clicks on data points
-then launch KorAP web UI queries for the given query term and virtual corpus in
-a separate frame.
-}
-\examples{
-\donttest{
-library(highcharter)
-new("KorAPConnection", verbose = TRUE) \%>\%
-  collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)),
-                        lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\%
-                         pivot_longer(c("O", "E")) \%>\%
-  hchart(type="spline", hcaes(label, score, group=name)) \%>\%
-  hc_add_onclick_korap_search()
-}
-
-}

diff --git a/man/hc_freq_by_year_ci.Rd b/man/highcharter-helpers.Rd
similarity index 67%
rename from man/hc_freq_by_year_ci.Rd
rename to man/highcharter-helpers.Rd
index a0702e4..5dac932 100644
--- a/man/hc_freq_by_year_ci.Rd
+++ b/man/highcharter-helpers.Rd

@@ -1,8 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/highcharter-helper.R
-\name{hc_freq_by_year_ci}
+\name{highcharter-helpers}
+\alias{highcharter-helpers}
 \alias{hc_freq_by_year_ci}
-\title{Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter}
+\alias{hc_add_onclick_korap_search}
+\title{Helper functions for producing highcharts}
 \usage{
 hc_freq_by_year_ci(
   df,
@@ -11,6 +13,8 @@
   smooth = FALSE,
   ...
 )
+
+hc_add_onclick_korap_search(hc)
 }
 \arguments{
 \item{df}{data frame like the value of a \code{\link{frequencyQuery}}}
@@ -22,10 +26,17 @@
 \item{smooth}{boolean decides whether the graph is smoothed using the highcharts plot types spline and areasplinerange.}
 
 \item{...}{additional arguments passed to \code{\link{hc_add_series}}}
+
+\item{hc}{highchart}
 }
 \description{
 Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
 \bold{Warning:} This function may be moved to a new package.
+
+Adds on-click events to data points of highcarts that were constructed with
+\code{\link{frequencyQuery}} or \code{\link{collocationScoreQuery}}. Clicks on data points
+then launch KorAP web UI queries for the given query term and virtual corpus in
+a separate tab.
 }
 \examples{
 \donttest{year <- c(1990:2018)}\dontshow{year <- c(2013:2013)}
@@ -50,4 +61,16 @@
   hc_freq_by_year_ci()
 }
 
+\donttest{
+library(highcharter)
+library(tidyr)
+
+new("KorAPConnection", verbose = TRUE) \%>\%
+  collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)),
+                        lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\%
+                         pivot_longer(c("O", "E")) \%>\%
+  hchart(type="spline", hcaes(label, value, group=name)) \%>\%
+  hc_add_onclick_korap_search()
+}
+
 }

diff --git a/man/ipm.Rd b/man/ipm.Rd
deleted file mode 100644
index e4b5aec..0000000
--- a/man/ipm.Rd
+++ /dev/null

@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{ipm}
-\alias{ipm}
-\title{Convert corpus frequency table to instances per million.}
-\usage{
-ipm(df)
-}
-\arguments{
-\item{df}{table returned from \code{\link{frequencyQuery}}}
-}
-\value{
-original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
-}
-\description{
-Convenience function for converting frequency tables to instances per
-million.
-}
-\details{
-Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
-und multiplies conf.low and \code{conf.high} with 10^6.
-}
-\examples{
-\donttest{
-new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
-}
-}

diff --git a/man/ll.Rd b/man/ll.Rd
deleted file mode 100644
index 95f2107..0000000
--- a/man/ll.Rd
+++ /dev/null

@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{ll}
-\alias{ll}
-\title{Log likelihood}
-\usage{
-ll(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-Log likelihood
-}
-\references{
-Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.
-
-Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
-Free PDF available from <http://purl.org/stefan.evert/PUB/Evert2004phd.pdf>
-}
-\seealso{
-Other association-score-functions: 
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}

diff --git a/man/logDice.Rd b/man/logDice.Rd
deleted file mode 100644
index 9ecdc24..0000000
--- a/man/logDice.Rd
+++ /dev/null

@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{logDice}
-\alias{logDice}
-\title{log-Dice coefficient}
-\usage{
-logDice(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-log-Dice coefficient
-}
-\examples{
-
-}
-\references{
-Rychlý, Pavel (2008): <a href="http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf">A lexicographer-friendly association score.</a> In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9.
-}
-\seealso{
-Other association-score-functions: 
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}

diff --git a/man/mi2.Rd b/man/mi2.Rd
deleted file mode 100644
index 4cbbab4..0000000
--- a/man/mi2.Rd
+++ /dev/null

@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{mi2}
-\alias{mi2}
-\title{Pointwise mutual information squared}
-\usage{
-mi2(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-Pointwise mutual information squared
-}
-\details{
-Also referenced to as mutual dependency (MD)
-}
-\seealso{
-Other association-score-functions: 
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}

diff --git a/man/mi3.Rd b/man/mi3.Rd
deleted file mode 100644
index 7c8815e..0000000
--- a/man/mi3.Rd
+++ /dev/null

@@ -1,41 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{mi3}
-\alias{mi3}
-\title{Pointwise mutual information cubed}
-\usage{
-mi3(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-Pointwise mutual information cubed
-}
-\details{
-Also referenced to as log-frequency biased mutual dependency (LFMD)
-}
-\references{
-Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.
-
-Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.
-}
-\seealso{
-Other association-score-functions: 
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}

diff --git a/man/misc-functions.Rd b/man/misc-functions.Rd
new file mode 100644
index 0000000..b0d9c0d
--- /dev/null
+++ b/man/misc-functions.Rd

@@ -0,0 +1,139 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ci.R, R/misc.R
+\name{ci}
+\alias{ci}
+\alias{misc-functions}
+\alias{ipm}
+\alias{percent}
+\alias{queryStringToLabel}
+\alias{geom_freq_by_year_ci}
+\alias{ggplotly}
+\title{Add confidence interval and relative frequency variables}
+\usage{
+ci(df, x = totalResults, N = total, conf.level = 0.95)
+
+ipm(df)
+
+percent(df)
+
+queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
+
+geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
+
+ggplotly(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...)
+}
+\arguments{
+\item{df}{table returned from \code{\link{frequencyQuery}}}
+
+\item{x}{column with the observed absolute frequency.}
+
+\item{N}{column with the total frequencies}
+
+\item{conf.level}{confidence level of the returned confidence interval. Must
+be a single number between 0 and 1.}
+
+\item{data}{string or vector of query or vc definition strings}
+
+\item{pubDateOnly}{discard all but the publication date}
+
+\item{excludePubDate}{discard publication date constraints}
+
+\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
+
+\item{...}{Other arguments passed to \code{plotly::ggplotly}}
+
+\item{p}{a ggplot object.}
+
+\item{tooltip}{a character vector specifying which aesthetic mappings to show
+in the tooltip. If you want hyperlinks to KorAP queries you need to include
+\code{"url"} here.}
+}
+\value{
+original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
+
+original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
+
+string or vector of strings with clipped off common prefixes and suffixes
+}
+\description{
+Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
+1. relative frequency (\code{f})
+2. lower bound of a confidence interval (\code{ci.low})
+3. upper bound of a confidence interval
+
+Convenience function for converting frequency tables to instances per
+million.
+
+Convenience function for converting frequency tables of alternative variants
+(generated with \code{as.alternatives=TRUE}) to percent.
+
+Converts a vector of query or vc strings to typically appropriate legend labels
+by clipping off prefixes and suffixes that are common to all query strings.
+
+Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
+\bold{Warning:} This function may be moved to a new package.
+
+\code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly
+object with hyperlinks from data points to corresponding KorAP queries.
+\bold{Warning:} This function may be moved to a new package.
+}
+\details{
+Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
+und multiplies conf.low and \code{conf.high} with 10^6.
+}
+\examples{
+\donttest{
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
+  bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
+  mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
+  ci() \%>\%
+  ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
+    geom_point() + geom_line() + geom_ribbon(alpha=.3)
+}
+\donttest{
+new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
+}
+\donttest{
+new("KorAPConnection") \%>\%
+    frequencyQuery(c("Tollpatsch", "Tolpatsch"),
+    vc=paste0("pubDate in ", 2000:2002),
+    as.alternatives = TRUE) \%>\%
+  percent()
+}
+queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
+queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
+queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
+
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+\donttest{
+expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
+            year = (2005:2011)) \%>\%
+  cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
+                            paste0(.$condition," & pubDate in ", .$year)))  \%>\%
+  ipm() \%>\%
+  ggplot(aes(year, ipm, fill = condition, color = condition)) +
+  geom_freq_by_year_ci()
+}
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+\donttest{year <- (2003:2011)}\dontshow{year <- c(2005)}
+\donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")}
+g <- expand_grid(condition, year) \%>\%
+  cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
+                       paste0(.$condition," & pubDate in ", .$year)))  \%>\%
+  ipm() \%>\%
+  ggplot(aes(year, ipm, fill = condition, color = condition)) +
+  ##  theme_light(base_size = 20) +
+  geom_freq_by_year_ci()
+p <- ggplotly(g)
+print(p)
+## saveWidget(p, paste0(tmpdir(), "heuschrecke.html")
+
+
+}
+\seealso{
+\code{ci} is already included in \code{\link{frequencyQuery}}
+}

diff --git a/man/percent.Rd b/man/percent.Rd
deleted file mode 100644
index 1e40204..0000000
--- a/man/percent.Rd
+++ /dev/null

@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{percent}
-\alias{percent}
-\title{Convert corpus frequency table of alternatives to percent}
-\usage{
-percent(df)
-}
-\arguments{
-\item{df}{table returned from \code{\link{frequencyQuery}}}
-}
-\value{
-original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
-}
-\description{
-Convenience function for converting frequency tables of alternative variants
-(generated with \code{as.alternatives=TRUE}) to percent.
-}
-\examples{
-\donttest{
-new("KorAPConnection") \%>\%
-    frequencyQuery(c("Tollpatsch", "Tolpatsch"),
-    vc=paste0("pubDate in ", 2000:2002),
-    as.alternatives = TRUE) \%>\%
-  percent()
-}
-}

diff --git a/man/pmi.Rd b/man/pmi.Rd
deleted file mode 100644
index 015e00d..0000000
--- a/man/pmi.Rd
+++ /dev/null

@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{pmi}
-\alias{pmi}
-\title{Pointwise mutual information}
-\usage{
-pmi(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\value{
-association score
-}
-\description{
-Pointwise mutual information
-}
-\seealso{
-Other association-score-functions: 
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()}
-}
-\concept{association-score-functions}

diff --git a/man/queryStringToLabel.Rd b/man/queryStringToLabel.Rd
deleted file mode 100644
index 8dedafe..0000000
--- a/man/queryStringToLabel.Rd
+++ /dev/null

@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{queryStringToLabel}
-\alias{queryStringToLabel}
-\title{Convert query or vc strings to plot labels}
-\usage{
-queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
-}
-\arguments{
-\item{data}{string or vector of query or vc definition strings}
-
-\item{pubDateOnly}{discard all but the publication date}
-
-\item{excludePubDate}{discard publication date constraints}
-}
-\value{
-string or vector of strings with clipped off common prefixes and suffixes
-}
-\description{
-Converts a vector of query or vc strings to typically appropriate legend labels
-by clipping off prefixes and suffixes that are common to all query strings.
-}
-\examples{
-queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
-queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
-queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
-
-}
commit	a6e4ee6c9271f0c6acc03e5c544e89e6831d7f51	[log] [tgz]
author	Marc Kupietz <kupietz@ids-mannheim.de>	Fri Mar 05 09:00:15 2021 +0100
committer	Marc Kupietz <kupietz@ids-mannheim.de>	Fri Mar 05 09:34:50 2021 +0100
tree	b024ed506b1bd0d7d541b7087f0540dfbe07a6f6
parent	5a61f01306e65b032015849638f690cb29022c20 [diff]