Restructure documentation
Change-Id: I2640e68972cb7c5ed67032a3b575a2aec056f592
diff --git a/.gitignore b/.gitignore
index f51ef92..9df09e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
.RData
.Ruserdata
cache/
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
index 776dc04..45b8220 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -24,6 +24,10 @@
Depends: R (>= 3.5.0)
Language: en-US
License: BSD_2_clause + file LICENSE
+URL: https://github.com/KorAP/RKorAPClient/,
+ https://korap.ids-mannheim.de/,
+ https://www1.ids-mannheim.de/kl/projekte/korap.html
+BugReports: https://github.com/KorAP/RKorAPClient/issues
Encoding: UTF-8
LazyData: false
RoxygenNote: 7.1.1
@@ -50,7 +54,7 @@
Collate:
'KorAPConnection.R'
'KorAPCorpusStats.R'
- 'RKorAPClient.R'
+ 'RKorAPClient-package.R'
'KorAPQuery.R'
'association-scores.R'
'ci.R'
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index fa84839..b06dea1 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -1,12 +1,13 @@
#' Class KorAPQuery
#'
-#' \code{KorAPQuery} objects represent the current state of a query to a KorAP server.
-#' New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
+#' This class provides methods to perform different kinds of queries on the KorAP API server.
+#' \code{KorAPQuery} objects, which are typically created by the \code{\link{corpusQuery}} method,
+#' represent the current state of a query to a KorAP server.
#'
#' @include KorAPConnection.R
#' @import httr
#'
-#' @include RKorAPClient.R
+#' @include RKorAPClient-package.R
#' @export
KorAPQuery <- setClass("KorAPQuery", slots = c(
@@ -66,17 +67,13 @@
setGeneric("fetchRest", function(kqo, ...) standardGeneric("fetchRest") )
setGeneric("frequencyQuery", function(kco, ...) standardGeneric("frequencyQuery") )
setGeneric("collocationScoreQuery", function(kco, ...) standardGeneric("collocationScoreQuery") )
-setGeneric("collocationScoreQueryNew", function(kco, ...) standardGeneric("collocationScoreQueryNew") )
-
maxResultsPerPage <- 50
## quiets concerns of R CMD check re: the .'s that appear in pipelines
if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
-#' Method corpusQuery
-#'
-#' Perform a corpus query via a connection to a KorAP-API-server.
+#' \bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
#'
#' @param kco \code{\link{KorAPConnection}} object (obtained e.g. from \code{new("KorAPConnection")}
#' @param query string that contains the corpus query. The query language depends on the \code{ql} parameter. Either \code{query} must be provided or \code{KorAPUrl}.
@@ -293,6 +290,8 @@
#' Fetch all results of a KorAP query.
#'
+#' \bold{\code{fetchAll}} fetches allf results of a KorAP query.
+#'
#' @examples
#' \donttest{
#' q <- new("KorAPConnection") %>% corpusQuery("Ameisenplage") %>% fetchAll()
diff --git a/R/RKorAPClient-package.R b/R/RKorAPClient-package.R
new file mode 100644
index 0000000..5c66076
--- /dev/null
+++ b/R/RKorAPClient-package.R
@@ -0,0 +1,7 @@
+#' @references
+#' Kupietz, Marc / Diewald, Nils / Margaretha, Eliza (2020): RKorAPClient: An R package for accessing the German Reference Corpus DeReKo via KorAP. In: Calzolari, Nicoletta, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis (eds.): [Proceedings of The 12th Language Resources and Evaluation Conference (LREC 2020)](http://www.lrec-conf.org/proceedings/lrec2020/LREC-2020.pdf). Marseille: European Language Resources Association (ELRA), 7017-7023. <http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.867.pdf>
+#'
+#' @keywords internal
+"_PACKAGE"
+#' [1] "_PACKAGE"
+#'
diff --git a/R/RKorAPClient.R b/R/RKorAPClient.R
deleted file mode 100644
index 92fc66e..0000000
--- a/R/RKorAPClient.R
+++ /dev/null
@@ -1,10 +0,0 @@
-#' \code{RKorapClient} package
-#'
-#' R package to access the \href{https://github.com/KorAP/}{KorAP} web service API.
-#'
-#' See the README.md on
-#' \href{https://github.com/KorAP/RKorAPClient/}{github}
-#'
-#' @docType package
-#' @name RKorAPClient
-NULL
diff --git a/R/association-scores.R b/R/association-scores.R
index 494477e..2d9b41e 100644
--- a/R/association-scores.R
+++ b/R/association-scores.R
@@ -1,8 +1,23 @@
-#' Default association score functions
+#' Association score functions
#'
-#' @family association-score-functions
+#' @param O1 observed absolute frequency of node
+#' @param O2 observed absolute frequency of collocate
+#' @param O observed absolute frequency of collocation
+#' @param N corpus size
+#' @param E expected absolute frequency of collocation (already adjusted to window size)
+#' @param window_size total window size around node (left neighbour count + right neighbour count)
#'
-#' @return list of default association score functions
+#' @return association score
+#' @name association-score-functions
+#' @description
+#' Functions to calculate different collocation association scores between
+#' a node (target word) and words in a window around the it.
+#' The functions are primarily used by \code{\link{collocationScoreQuery}}.
+NULL
+#' NULL
+
+#' @rdname association-score-functions
+#'
#' @export
#'
#' @examples
@@ -19,47 +34,26 @@
list(pmi=pmi, mi2=mi2, mi3=mi3, logDice=logDice, ll=ll)
}
-#' Pointwise mutual information
+#' @rdname association-score-functions
#'
-#' @family association-score-functions
-#'
-#' @param O1 observed absolute frequency of node
-#' @param O2 observed absolute frequency of collocate
-#' @param O observed absolute frequency of collocation
-#' @param N corpus size
-#' @param E expected absolute frequency of collocation (already adjusted to window size)
-#' @param window_size total window size around node (left neighbour count + right neighbour count)
-#'
-#' @return association score
#' @export
#'
-
pmi <- function(O1, O2, O, N, E, window_size) {
log2(O / E)
}
-#' Pointwise mutual information squared
+#' @rdname association-score-functions
#'
-#' @family association-score-functions
-#'
-#' @details
-#' Also referenced to as mutual dependency (MD)
-#'
-#' @inheritParams pmi
#' @export
#'
mi2 <- function(O1, O2, O, N, E, window_size) {
log2(O ^ 2 / E)
}
-#' Pointwise mutual information cubed
-#'
+#' @rdname association-score-functions
#' @family association-score-functions
#'
-#' @details
-#' Also referenced to as log-frequency biased mutual dependency (LFMD)
#'
-#' @inheritParams pmi
#' @export
#'
#' @references
@@ -71,16 +65,12 @@
log2(O ^ 3 / E)
}
-#' log-Dice coefficient
-#'
+#' @rdname association-score-functions
#' @family association-score-functions
-#' @inheritParams pmi
#' @export
#'
-#' @examples
-#'
#' @references
-#' Rychlý, Pavel (2008): <a href="http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf">A lexicographer-friendly association score.</a> In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9.
+#' Rychlý, Pavel (2008): A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. <http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf>.
#'
logDice <- function(O1, O2, O, N, E, window_size) {
@@ -90,14 +80,13 @@
#' Log likelihood
#'
+#' @rdname association-score-functions
#' @family association-score-functions
#'
#' @export
#'
#' @importFrom dplyr if_else
#'
-#' @inheritParams pmi
-#'
#' @references
#' Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.
#'
diff --git a/R/ci.R b/R/ci.R
index 64f7c0a..80c45f1 100644
--- a/R/ci.R
+++ b/R/ci.R
@@ -15,6 +15,8 @@
#' @param conf.level confidence level of the returned confidence interval. Must
#' be a single number between 0 and 1.
#'
+#' @rdname misc-functions
+#'
#' @export
#' @importFrom stats prop.test
#' @importFrom tibble remove_rownames
diff --git a/R/highcharter-helper.R b/R/highcharter-helper.R
index 2b9a557..929ac0a 100644
--- a/R/highcharter-helper.R
+++ b/R/highcharter-helper.R
@@ -1,8 +1,17 @@
+#' Helper functions for producing highcharts
+#'
+#' @param hc highchart
+#'
+#' @name highcharter-helpers
+NULL
+#' NULL
+
#' Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter
#'
#' Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
#' \bold{Warning:} This function may be moved to a new package.
#'
+#' @rdname highcharter-helpers
#' @import highcharter
#' @importFrom tibble add_column
#' @export
@@ -148,12 +157,11 @@
#'
#' @description
#' Adds on-click events to data points of highcarts that were constructed with
-#' \ref{frequencyQuery} or ref \ref{collocationScoreQuery}. Clicks on data points
+#' \code{\link{frequencyQuery}} or \code{\link{collocationScoreQuery}}. Clicks on data points
#' then launch KorAP web UI queries for the given query term and virtual corpus in
-#' a separate frame.
+#' a separate tab.
#'
-#' @param hc highchart
-#'
+#' @rdname highcharter-helpers
#' @export
#'
#' @examples
diff --git a/R/misc.R b/R/misc.R
index a39ef21..4529d7a 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -1,3 +1,8 @@
+#' Misc functions
+#'
+#' @name misc-functions
+NULL
+#' NULL
#' Convert corpus frequency table to instances per million.
#'
@@ -12,6 +17,7 @@
#' @return original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
#' @export
#'
+#' @rdname misc-functions
#' @importFrom dplyr .data
#'
#' @examples
@@ -35,6 +41,7 @@
#'
#' @importFrom dplyr .data
#'
+#' @rdname misc-functions
#' @examples
#' \donttest{
#' new("KorAPConnection") %>%
@@ -58,6 +65,8 @@
#' @param excludePubDate discard publication date constraints
#' @return string or vector of strings with clipped off common prefixes and suffixes
#'
+#' @rdname misc-functions
+#'
#' @examples
#' queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
#' queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
@@ -97,6 +106,8 @@
#' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.
#' @param ... Other arguments passed to geom_ribbon, geom_line, and geom_click_point.
#'
+#' @rdname misc-functions
+#'
#' @examples
#' library(ggplot2)
#' kco <- new("KorAPConnection", verbose=TRUE)
@@ -175,6 +186,8 @@
#' \code{"url"} here.
#' @param ... Other arguments passed to \code{plotly::ggplotly}
#'
+#' @rdname misc-functions
+#'
#' @examples
#' library(ggplot2)
#' kco <- new("KorAPConnection", verbose=TRUE)
diff --git a/RKorAPClient.Rproj b/RKorAPClient.Rproj
index 2611ff5..6b153a0 100644
--- a/RKorAPClient.Rproj
+++ b/RKorAPClient.Rproj
@@ -18,5 +18,6 @@
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
+PackageCheckArgs: --no-examples
PackageRoxygenize: rd,collate,namespace
DisableExecuteRprofile: Yes
diff --git a/man/KorAPQuery-class.Rd b/man/KorAPQuery-class.Rd
index 46eb322..b3a0edf 100644
--- a/man/KorAPQuery-class.Rd
+++ b/man/KorAPQuery-class.Rd
@@ -141,11 +141,14 @@
tibble with query KorAP web request URL, all observed values and association scores
}
\description{
-\code{KorAPQuery} objects represent the current state of a query to a KorAP server.
-New \code{KorAPQuery} objects are typically created by the \code{\link{corpusQuery}} method.
+This class provides methods to perform different kinds of queries on the KorAP API server.
+\code{KorAPQuery} objects, which are typically created by the \code{\link{corpusQuery}} method,
+represent the current state of a query to a KorAP server.
\bold{\code{fetchNext}} fetches the next bunch of results of a KorAP query.
+\bold{\code{fetchAll}} fetches allf results of a KorAP query.
+
\bold{\code{frequencyQuery}} combines \code{\link{corpusQuery}}, \code{\link{corpusStats}} and
\code{\link{ci}} to compute a table with the relative frequencies and
confidence intervals of one ore multiple search terms across one or multiple
diff --git a/man/RKorAPClient-package.Rd b/man/RKorAPClient-package.Rd
new file mode 100644
index 0000000..863b1a6
--- /dev/null
+++ b/man/RKorAPClient-package.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/RKorAPClient-package.R
+\docType{package}
+\name{RKorAPClient-package}
+\alias{RKorAPClient}
+\alias{RKorAPClient-package}
+\title{RKorAPClient: 'KorAP' Web Service Client Package}
+\description{
+
+A client package that makes the 'KorAP' web service API accessible from R.
+ The corpus analysis platform 'KorAP' has been developed as a scientific tool to make
+ potentially large, stratified and multiply annotated corpora, such as the 'German Reference Corpus DeReKo'
+ or the 'Corpus of the Contemporary Romanian Language CoRoLa', accessible for linguists to let them verify
+ hypotheses and to find interesting patterns in real language use.
+ The 'RKorAPClient' package provides access to 'KorAP' and the corpora behind it for user-created R code,
+ as a programmatic alternative to the 'KorAP' web user-interface.
+ You can learn more about 'KorAP' and use it directly on 'DeReKo' at <https://korap.ids-mannheim.de/>.
+}
+\references{
+Kupietz, Marc / Diewald, Nils / Margaretha, Eliza (2020): RKorAPClient: An R package for accessing the German Reference Corpus DeReKo via KorAP. In: Calzolari, Nicoletta, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis (eds.): [Proceedings of The 12th Language Resources and Evaluation Conference (LREC 2020)](http://www.lrec-conf.org/proceedings/lrec2020/LREC-2020.pdf). Marseille: European Language Resources Association (ELRA), 7017-7023. <http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.867.pdf>
+}
+\seealso{
+Useful links:
+\itemize{
+ \item \url{https://github.com/KorAP/RKorAPClient/}
+ \item \url{https://korap.ids-mannheim.de/}
+ \item \url{https://www1.ids-mannheim.de/kl/projekte/korap.html}
+ \item Report bugs at \url{https://github.com/KorAP/RKorAPClient/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Marc Kupietz \email{kupietz@ids-mannheim.de}
+
+Other contributors:
+\itemize{
+ \item Nils Diewald \email{diewald@ids-mannheim.de} [contributor]
+ \item Leibniz Institute for the German Language [copyright holder, funder]
+}
+
+}
+\keyword{internal}
diff --git a/man/RKorAPClient.Rd b/man/RKorAPClient.Rd
deleted file mode 100644
index 3a72acc..0000000
--- a/man/RKorAPClient.Rd
+++ /dev/null
@@ -1,13 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RKorAPClient.R
-\docType{package}
-\name{RKorAPClient}
-\alias{RKorAPClient}
-\title{\code{RKorapClient} package}
-\description{
-R package to access the \href{https://github.com/KorAP/}{KorAP} web service API.
-}
-\details{
-See the README.md on
-\href{https://github.com/KorAP/RKorAPClient/}{github}
-}
diff --git a/man/association-score-functions.Rd b/man/association-score-functions.Rd
new file mode 100644
index 0000000..6591f1b
--- /dev/null
+++ b/man/association-score-functions.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/association-scores.R
+\name{association-score-functions}
+\alias{association-score-functions}
+\alias{defaultAssociationScoreFunctions}
+\alias{pmi}
+\alias{mi2}
+\alias{mi3}
+\alias{logDice}
+\alias{ll}
+\title{Association score functions}
+\usage{
+defaultAssociationScoreFunctions()
+
+pmi(O1, O2, O, N, E, window_size)
+
+mi2(O1, O2, O, N, E, window_size)
+
+mi3(O1, O2, O, N, E, window_size)
+
+logDice(O1, O2, O, N, E, window_size)
+
+ll(O1, O2, O, N, E, window_size)
+}
+\arguments{
+\item{O1}{observed absolute frequency of node}
+
+\item{O2}{observed absolute frequency of collocate}
+
+\item{O}{observed absolute frequency of collocation}
+
+\item{N}{corpus size}
+
+\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
+
+\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
+}
+\value{
+association score
+}
+\description{
+Functions to calculate different collocation association scores between
+a node (target word) and words in a window around the it.
+The functions are primarily used by \code{\link{collocationScoreQuery}}.
+}
+\examples{
+\donttest{
+new("KorAPConnection", verbose = TRUE) \%>\%
+collocationScoreQuery("Perlen", c("verziertes", "Säue"),
+ scoreFunctions = append(defaultAssociationScoreFunctions(),
+ list(localMI = function(O1, O2, O, N, E, window_size) {
+ O * log2(O/E)
+ })))
+}
+
+}
+\references{
+Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.
+
+Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.
+
+Rychlý, Pavel (2008): A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. <http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf>.
+
+Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.
+
+Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
+Free PDF available from <http://purl.org/stefan.evert/PUB/Evert2004phd.pdf>
+}
+\concept{association-score-functions}
diff --git a/man/ci.Rd b/man/ci.Rd
deleted file mode 100644
index cb51cad..0000000
--- a/man/ci.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ci.R
-\name{ci}
-\alias{ci}
-\title{Add confidence interval and relative frequency variables}
-\usage{
-ci(df, x = totalResults, N = total, conf.level = 0.95)
-}
-\arguments{
-\item{df}{table with columns for absolute and total frequencies.}
-
-\item{x}{column with the observed absolute frequency.}
-
-\item{N}{column with the total frequencies}
-
-\item{conf.level}{confidence level of the returned confidence interval. Must
-be a single number between 0 and 1.}
-}
-\description{
-Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
-1. relative frequency (\code{f})
-2. lower bound of a confidence interval (\code{ci.low})
-3. upper bound of a confidence interval
-}
-\examples{
-\donttest{
-library(ggplot2)
-kco <- new("KorAPConnection", verbose=TRUE)
-expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
- bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
- mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
- ci() \%>\%
- ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
- geom_point() + geom_line() + geom_ribbon(alpha=.3)
-}
-}
-\seealso{
-\code{ci} is already included in \code{\link{frequencyQuery}}
-}
diff --git a/man/corpusQuery-KorAPConnection-method.Rd b/man/corpusQuery-KorAPConnection-method.Rd
index d3284a6..0170ba3 100644
--- a/man/corpusQuery-KorAPConnection-method.Rd
+++ b/man/corpusQuery-KorAPConnection-method.Rd
@@ -3,7 +3,7 @@
\name{corpusQuery,KorAPConnection-method}
\alias{corpusQuery,KorAPConnection-method}
\alias{corpusQuery}
-\title{Method corpusQuery}
+\title{\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server}
\usage{
\S4method{corpusQuery}{KorAPConnection}(
kco,
@@ -51,7 +51,7 @@
Please make sure to check \code{$collection$rewrites} to see if any unforeseen access rewrites of the query's virtual corpus had to be performed.
}
\description{
-Perform a corpus query via a connection to a KorAP-API-server.
+\bold{\code{corpusQuery}} performs a corpus query via a connection to a KorAP-API-server
}
\examples{
# Fetch metadata of every query hit for "Ameisenplage" and show a summary
diff --git a/man/defaultAssociationScoreFunctions.Rd b/man/defaultAssociationScoreFunctions.Rd
deleted file mode 100644
index 09a87d8..0000000
--- a/man/defaultAssociationScoreFunctions.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{defaultAssociationScoreFunctions}
-\alias{defaultAssociationScoreFunctions}
-\title{Default association score functions}
-\usage{
-defaultAssociationScoreFunctions()
-}
-\value{
-list of default association score functions
-}
-\description{
-Default association score functions
-}
-\examples{
-\donttest{
-new("KorAPConnection", verbose = TRUE) \%>\%
-collocationScoreQuery("Perlen", c("verziertes", "Säue"),
- scoreFunctions = append(associationScoreFunctions(),
- list(localMI = function(O1, O2, O, N, E, window_size) {
- O * log2(O/E)
- })))
-}
-
-}
-\seealso{
-Other association-score-functions:
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}
diff --git a/man/geom_freq_by_year_ci.Rd b/man/geom_freq_by_year_ci.Rd
deleted file mode 100644
index ef53526..0000000
--- a/man/geom_freq_by_year_ci.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{geom_freq_by_year_ci}
-\alias{geom_freq_by_year_ci}
-\title{Experimental: Plot frequency by year graphs with confidence intervals}
-\usage{
-geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
-}
-\arguments{
-\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
-
-\item{...}{Other arguments passed to geom_ribbon, geom_line, and geom_click_point.}
-}
-\description{
-Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
-\bold{Warning:} This function may be moved to a new package.
-}
-\examples{
-library(ggplot2)
-kco <- new("KorAPConnection", verbose=TRUE)
-\donttest{
-expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
- year = (2005:2011)) \%>\%
- cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
- paste0(.$condition," & pubDate in ", .$year))) \%>\%
- ipm() \%>\%
- ggplot(aes(year, ipm, fill = condition, color = condition)) +
- geom_freq_by_year_ci()
-}
-}
diff --git a/man/ggplotly.Rd b/man/ggplotly.Rd
deleted file mode 100644
index 9b09c25..0000000
--- a/man/ggplotly.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{ggplotly}
-\alias{ggplotly}
-\title{Experimental: Convert ggplot2 to plotly with hyperlinks to KorAP queries}
-\usage{
-ggplotly(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...)
-}
-\arguments{
-\item{p}{a ggplot object.}
-
-\item{tooltip}{a character vector specifying which aesthetic mappings to show
-in the tooltip. If you want hyperlinks to KorAP queries you need to include
-\code{"url"} here.}
-
-\item{...}{Other arguments passed to \code{plotly::ggplotly}}
-}
-\description{
-\code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly
-object with hyperlinks from data points to corresponding KorAP queries.
-\bold{Warning:} This function may be moved to a new package.
-}
-\examples{
-library(ggplot2)
-kco <- new("KorAPConnection", verbose=TRUE)
-\donttest{year <- (2003:2011)}\dontshow{year <- c(2005)}
-\donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")}
-g <- expand_grid(condition, year) \%>\%
- cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
- paste0(.$condition," & pubDate in ", .$year))) \%>\%
- ipm() \%>\%
- ggplot(aes(year, ipm, fill = condition, color = condition)) +
- ## theme_light(base_size = 20) +
- geom_freq_by_year_ci()
-p <- ggplotly(g)
-print(p)
-## saveWidget(p, paste0(tmpdir(), "heuschrecke.html")
-
-
-}
diff --git a/man/hc_add_onclick_korap_search.Rd b/man/hc_add_onclick_korap_search.Rd
deleted file mode 100644
index 4e3ac06..0000000
--- a/man/hc_add_onclick_korap_search.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/highcharter-helper.R
-\name{hc_add_onclick_korap_search}
-\alias{hc_add_onclick_korap_search}
-\title{Add KorAP search click events to highchart}
-\usage{
-hc_add_onclick_korap_search(hc)
-}
-\arguments{
-\item{hc}{highchart}
-}
-\description{
-Adds on-click events to data points of highcarts that were constructed with
-\ref{frequencyQuery} or ref \ref{collocationScoreQuery}. Clicks on data points
-then launch KorAP web UI queries for the given query term and virtual corpus in
-a separate frame.
-}
-\examples{
-\donttest{
-library(highcharter)
-new("KorAPConnection", verbose = TRUE) \%>\%
- collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)),
- lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\%
- pivot_longer(c("O", "E")) \%>\%
- hchart(type="spline", hcaes(label, score, group=name)) \%>\%
- hc_add_onclick_korap_search()
-}
-
-}
diff --git a/man/hc_freq_by_year_ci.Rd b/man/highcharter-helpers.Rd
similarity index 67%
rename from man/hc_freq_by_year_ci.Rd
rename to man/highcharter-helpers.Rd
index a0702e4..5dac932 100644
--- a/man/hc_freq_by_year_ci.Rd
+++ b/man/highcharter-helpers.Rd
@@ -1,8 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/highcharter-helper.R
-\name{hc_freq_by_year_ci}
+\name{highcharter-helpers}
+\alias{highcharter-helpers}
\alias{hc_freq_by_year_ci}
-\title{Experimental: Plot interactive frequency by year graphs with confidence intervals using highcharter}
+\alias{hc_add_onclick_korap_search}
+\title{Helper functions for producing highcharts}
\usage{
hc_freq_by_year_ci(
df,
@@ -11,6 +13,8 @@
smooth = FALSE,
...
)
+
+hc_add_onclick_korap_search(hc)
}
\arguments{
\item{df}{data frame like the value of a \code{\link{frequencyQuery}}}
@@ -22,10 +26,17 @@
\item{smooth}{boolean decides whether the graph is smoothed using the highcharts plot types spline and areasplinerange.}
\item{...}{additional arguments passed to \code{\link{hc_add_series}}}
+
+\item{hc}{highchart}
}
\description{
Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using highcharter.
\bold{Warning:} This function may be moved to a new package.
+
+Adds on-click events to data points of highcarts that were constructed with
+\code{\link{frequencyQuery}} or \code{\link{collocationScoreQuery}}. Clicks on data points
+then launch KorAP web UI queries for the given query term and virtual corpus in
+a separate tab.
}
\examples{
\donttest{year <- c(1990:2018)}\dontshow{year <- c(2013:2013)}
@@ -50,4 +61,16 @@
hc_freq_by_year_ci()
}
+\donttest{
+library(highcharter)
+library(tidyr)
+
+new("KorAPConnection", verbose = TRUE) \%>\%
+ collocationScoreQuery("Team", "agil", vc = paste("pubDate in", c(2014:2018)),
+ lemmatizeNodeQuery = TRUE, lemmatizeCollocateQuery = TRUE) \%>\%
+ pivot_longer(c("O", "E")) \%>\%
+ hchart(type="spline", hcaes(label, value, group=name)) \%>\%
+ hc_add_onclick_korap_search()
+}
+
}
diff --git a/man/ipm.Rd b/man/ipm.Rd
deleted file mode 100644
index e4b5aec..0000000
--- a/man/ipm.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{ipm}
-\alias{ipm}
-\title{Convert corpus frequency table to instances per million.}
-\usage{
-ipm(df)
-}
-\arguments{
-\item{df}{table returned from \code{\link{frequencyQuery}}}
-}
-\value{
-original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
-}
-\description{
-Convenience function for converting frequency tables to instances per
-million.
-}
-\details{
-Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
-und multiplies conf.low and \code{conf.high} with 10^6.
-}
-\examples{
-\donttest{
-new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
-}
-}
diff --git a/man/ll.Rd b/man/ll.Rd
deleted file mode 100644
index 95f2107..0000000
--- a/man/ll.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{ll}
-\alias{ll}
-\title{Log likelihood}
-\usage{
-ll(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-Log likelihood
-}
-\references{
-Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.
-
-Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
-Free PDF available from <http://purl.org/stefan.evert/PUB/Evert2004phd.pdf>
-}
-\seealso{
-Other association-score-functions:
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}
diff --git a/man/logDice.Rd b/man/logDice.Rd
deleted file mode 100644
index 9ecdc24..0000000
--- a/man/logDice.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{logDice}
-\alias{logDice}
-\title{log-Dice coefficient}
-\usage{
-logDice(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-log-Dice coefficient
-}
-\examples{
-
-}
-\references{
-Rychlý, Pavel (2008): <a href="http://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf">A lexicographer-friendly association score.</a> In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9.
-}
-\seealso{
-Other association-score-functions:
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}
diff --git a/man/mi2.Rd b/man/mi2.Rd
deleted file mode 100644
index 4cbbab4..0000000
--- a/man/mi2.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{mi2}
-\alias{mi2}
-\title{Pointwise mutual information squared}
-\usage{
-mi2(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-Pointwise mutual information squared
-}
-\details{
-Also referenced to as mutual dependency (MD)
-}
-\seealso{
-Other association-score-functions:
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi3}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}
diff --git a/man/mi3.Rd b/man/mi3.Rd
deleted file mode 100644
index 7c8815e..0000000
--- a/man/mi3.Rd
+++ /dev/null
@@ -1,41 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{mi3}
-\alias{mi3}
-\title{Pointwise mutual information cubed}
-\usage{
-mi3(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\description{
-Pointwise mutual information cubed
-}
-\details{
-Also referenced to as log-frequency biased mutual dependency (LFMD)
-}
-\references{
-Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.
-
-Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.
-}
-\seealso{
-Other association-score-functions:
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{pmi}()}
-}
-\concept{association-score-functions}
diff --git a/man/misc-functions.Rd b/man/misc-functions.Rd
new file mode 100644
index 0000000..b0d9c0d
--- /dev/null
+++ b/man/misc-functions.Rd
@@ -0,0 +1,139 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ci.R, R/misc.R
+\name{ci}
+\alias{ci}
+\alias{misc-functions}
+\alias{ipm}
+\alias{percent}
+\alias{queryStringToLabel}
+\alias{geom_freq_by_year_ci}
+\alias{ggplotly}
+\title{Add confidence interval and relative frequency variables}
+\usage{
+ci(df, x = totalResults, N = total, conf.level = 0.95)
+
+ipm(df)
+
+percent(df)
+
+queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
+
+geom_freq_by_year_ci(mapping = aes(ymin = conf.low, ymax = conf.high), ...)
+
+ggplotly(p = ggplot2::last_plot(), tooltip = c("x", "y", "colour", "url"), ...)
+}
+\arguments{
+\item{df}{table returned from \code{\link{frequencyQuery}}}
+
+\item{x}{column with the observed absolute frequency.}
+
+\item{N}{column with the total frequencies}
+
+\item{conf.level}{confidence level of the returned confidence interval. Must
+be a single number between 0 and 1.}
+
+\item{data}{string or vector of query or vc definition strings}
+
+\item{pubDateOnly}{discard all but the publication date}
+
+\item{excludePubDate}{discard publication date constraints}
+
+\item{mapping}{Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.}
+
+\item{...}{Other arguments passed to \code{plotly::ggplotly}}
+
+\item{p}{a ggplot object.}
+
+\item{tooltip}{a character vector specifying which aesthetic mappings to show
+in the tooltip. If you want hyperlinks to KorAP queries you need to include
+\code{"url"} here.}
+}
+\value{
+original table with additional column \code{ipm} and converted columns \code{conf.low} and \code{conf.high}
+
+original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
+
+string or vector of strings with clipped off common prefixes and suffixes
+}
+\description{
+Using \code{\link{prop.test}}, \code{ci} adds three columns to a data frame:
+1. relative frequency (\code{f})
+2. lower bound of a confidence interval (\code{ci.low})
+3. upper bound of a confidence interval
+
+Convenience function for converting frequency tables to instances per
+million.
+
+Convenience function for converting frequency tables of alternative variants
+(generated with \code{as.alternatives=TRUE}) to percent.
+
+Converts a vector of query or vc strings to typically appropriate legend labels
+by clipping off prefixes and suffixes that are common to all query strings.
+
+Experimental convenience function for plotting typical frequency by year graphs with confidence intervals using ggplot2.
+\bold{Warning:} This function may be moved to a new package.
+
+\code{RKorAPClient::ggplotly} converts a \code{ggplot2::ggplot()} object to a plotly
+object with hyperlinks from data points to corresponding KorAP queries.
+\bold{Warning:} This function may be moved to a new package.
+}
+\details{
+Given a table with columns \code{f}, \code{conf.low}, and \code{conf.high}, \code{ipm} ads a \code{column ipm}
+und multiplies conf.low and \code{conf.high} with 10^6.
+}
+\examples{
+\donttest{
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+expand_grid(year=2015:2018, alternatives=c("Hate Speech", "Hatespeech")) \%>\%
+ bind_cols(corpusQuery(kco, .$alternatives, sprintf("pubDate in \%d", .$year))) \%>\%
+ mutate(total=corpusStats(kco, vc=vc)$tokens) \%>\%
+ ci() \%>\%
+ ggplot(aes(x=year, y=f, fill=query, color=query, ymin=conf.low, ymax=conf.high)) +
+ geom_point() + geom_line() + geom_ribbon(alpha=.3)
+}
+\donttest{
+new("KorAPConnection") \%>\% frequencyQuery("Test", paste0("pubDate in ", 2000:2002)) \%>\% ipm()
+}
+\donttest{
+new("KorAPConnection") \%>\%
+ frequencyQuery(c("Tollpatsch", "Tolpatsch"),
+ vc=paste0("pubDate in ", 2000:2002),
+ as.alternatives = TRUE) \%>\%
+ percent()
+}
+queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
+queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
+queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
+
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+\donttest{
+expand_grid(condition = c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/"),
+ year = (2005:2011)) \%>\%
+ cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
+ paste0(.$condition," & pubDate in ", .$year))) \%>\%
+ ipm() \%>\%
+ ggplot(aes(year, ipm, fill = condition, color = condition)) +
+ geom_freq_by_year_ci()
+}
+library(ggplot2)
+kco <- new("KorAPConnection", verbose=TRUE)
+\donttest{year <- (2003:2011)}\dontshow{year <- c(2005)}
+\donttest{condition <- c("textDomain = /Wirtschaft.*/", "textDomain != /Wirtschaft.*/")}\dontshow{condition <- c("textDomain = /Wirtschaft.*/")}
+g <- expand_grid(condition, year) \%>\%
+ cbind(frequencyQuery(kco, "[tt/l=Heuschrecke]",
+ paste0(.$condition," & pubDate in ", .$year))) \%>\%
+ ipm() \%>\%
+ ggplot(aes(year, ipm, fill = condition, color = condition)) +
+ ## theme_light(base_size = 20) +
+ geom_freq_by_year_ci()
+p <- ggplotly(g)
+print(p)
+## saveWidget(p, paste0(tmpdir(), "heuschrecke.html")
+
+
+}
+\seealso{
+\code{ci} is already included in \code{\link{frequencyQuery}}
+}
diff --git a/man/percent.Rd b/man/percent.Rd
deleted file mode 100644
index 1e40204..0000000
--- a/man/percent.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{percent}
-\alias{percent}
-\title{Convert corpus frequency table of alternatives to percent}
-\usage{
-percent(df)
-}
-\arguments{
-\item{df}{table returned from \code{\link{frequencyQuery}}}
-}
-\value{
-original table with converted columns \code{f}, \code{conf.low} and \code{conf.high}
-}
-\description{
-Convenience function for converting frequency tables of alternative variants
-(generated with \code{as.alternatives=TRUE}) to percent.
-}
-\examples{
-\donttest{
-new("KorAPConnection") \%>\%
- frequencyQuery(c("Tollpatsch", "Tolpatsch"),
- vc=paste0("pubDate in ", 2000:2002),
- as.alternatives = TRUE) \%>\%
- percent()
-}
-}
diff --git a/man/pmi.Rd b/man/pmi.Rd
deleted file mode 100644
index 015e00d..0000000
--- a/man/pmi.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/association-scores.R
-\name{pmi}
-\alias{pmi}
-\title{Pointwise mutual information}
-\usage{
-pmi(O1, O2, O, N, E, window_size)
-}
-\arguments{
-\item{O1}{observed absolute frequency of node}
-
-\item{O2}{observed absolute frequency of collocate}
-
-\item{O}{observed absolute frequency of collocation}
-
-\item{N}{corpus size}
-
-\item{E}{expected absolute frequency of collocation (already adjusted to window size)}
-
-\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
-}
-\value{
-association score
-}
-\description{
-Pointwise mutual information
-}
-\seealso{
-Other association-score-functions:
-\code{\link{defaultAssociationScoreFunctions}()},
-\code{\link{ll}()},
-\code{\link{logDice}()},
-\code{\link{mi2}()},
-\code{\link{mi3}()}
-}
-\concept{association-score-functions}
diff --git a/man/queryStringToLabel.Rd b/man/queryStringToLabel.Rd
deleted file mode 100644
index 8dedafe..0000000
--- a/man/queryStringToLabel.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/misc.R
-\name{queryStringToLabel}
-\alias{queryStringToLabel}
-\title{Convert query or vc strings to plot labels}
-\usage{
-queryStringToLabel(data, pubDateOnly = FALSE, excludePubDate = FALSE)
-}
-\arguments{
-\item{data}{string or vector of query or vc definition strings}
-
-\item{pubDateOnly}{discard all but the publication date}
-
-\item{excludePubDate}{discard publication date constraints}
-}
-\value{
-string or vector of strings with clipped off common prefixes and suffixes
-}
-\description{
-Converts a vector of query or vc strings to typically appropriate legend labels
-by clipping off prefixes and suffixes that are common to all query strings.
-}
-\examples{
-queryStringToLabel(paste("textType = /Zeit.*/ & pubDate in", c(2010:2019)))
-queryStringToLabel(c("[marmot/m=mood:subj]", "[marmot/m=mood:ind]"))
-queryStringToLabel(c("wegen dem [tt/p=NN]", "wegen des [tt/p=NN]"))
-
-}