Add mergeDuplicateCollocates function
Change-Id: Ib9d458dc233167c20c9fdedf2f30656d693c32ff
diff --git a/NAMESPACE b/NAMESPACE
index 47165ab..42fe68e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,7 @@
export(ipm)
export(ll)
export(logDice)
+export(mergeDuplicateCollocates)
export(mi2)
export(mi3)
export(mutate)
@@ -60,6 +61,7 @@
importFrom(broom,tidy)
importFrom(curl,has_internet)
importFrom(dplyr,.data)
+importFrom(dplyr,across)
importFrom(dplyr,anti_join)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
@@ -68,7 +70,9 @@
importFrom(dplyr,case_when)
importFrom(dplyr,desc)
importFrom(dplyr,enquo)
+importFrom(dplyr,everything)
importFrom(dplyr,filter)
+importFrom(dplyr,first)
importFrom(dplyr,group_by)
importFrom(dplyr,if_else)
importFrom(dplyr,mutate)
@@ -81,6 +85,7 @@
importFrom(dplyr,starts_with)
importFrom(dplyr,summarise)
importFrom(dplyr,tibble)
+importFrom(dplyr,ungroup)
importFrom(ggplot2,GeomPoint)
importFrom(ggplot2,aes)
importFrom(ggplot2,element_text)
@@ -92,6 +97,7 @@
importFrom(ggplot2,layer)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,theme)
+importFrom(httr,build_url)
importFrom(httr,parse_url)
importFrom(jsonlite,fromJSON)
importFrom(lubridate,year)
diff --git a/R/collocationScoreQuery.R b/R/collocationScoreQuery.R
index 22a9f9f..0855e18 100644
--- a/R/collocationScoreQuery.R
+++ b/R/collocationScoreQuery.R
@@ -180,3 +180,41 @@
else
w
}
+
+#' Merge duplicate collocate rows and re-calculate association scores and urls
+#'
+#' @param ... tibbles with collocate rows returned from [collocationAnalysis()]
+#' @return tibble with unique collocate rows
+#'
+#' @importFrom dplyr bind_rows group_by summarise ungroup mutate across first everything
+#' @importFrom httr parse_url build_url
+#' @export
+mergeDuplicateCollocates <- function(...) {
+ combined_df <- bind_rows(...)
+
+ korapUrl <- parse_url(combined_df$webUIRequestUrl[1])
+ korapUrl$query <- ''
+ korapUrl <- build_url(korapUrl)
+
+ # Group by collocate and summarize
+ combined_df %>%
+ group_by(collocate, O2, N) %>%
+ summarise(
+ O = sum(O),
+ O1 = sum(O1),
+ leftContextSize = sum(leftContextSize),
+ rightContextSize = sum(rightContextSize),
+ w = sum(w),
+ E = sum(w) * sum(O1) * first(O2) / first(N),
+ logDice = logDice(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+ pmi = pmi(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+ mi2 = mi2(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+ mi3 = mi3(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+ ll = RKorAPClient::ll(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+ query = paste(query, collapse = " | "),
+ webUIRequestUrl = buildWebUIRequestUrlFromString(korapUrl, query = paste(query, collapse = " | "), vc = first(vc)),
+ across(everything(), first),
+ ) %>%
+ ungroup()
+}
+
diff --git a/man/mergeDuplicateCollocates.Rd b/man/mergeDuplicateCollocates.Rd
new file mode 100644
index 0000000..ba69bf4
--- /dev/null
+++ b/man/mergeDuplicateCollocates.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/collocationScoreQuery.R
+\name{mergeDuplicateCollocates}
+\alias{mergeDuplicateCollocates}
+\title{Merge duplicate collocate rows and re-calculate association scores and urls}
+\usage{
+mergeDuplicateCollocates(...)
+}
+\arguments{
+\item{...}{tibbles with collocate rows returned from \code{\link[=collocationAnalysis]{collocationAnalysis()}}}
+}
+\value{
+tibble with unique collocate rows
+}
+\description{
+Merge duplicate collocate rows and re-calculate association scores and urls
+}