Add mergeDuplicateCollocates function

Change-Id: Ib9d458dc233167c20c9fdedf2f30656d693c32ff
diff --git a/NAMESPACE b/NAMESPACE
index 47165ab..42fe68e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,7 @@
 export(ipm)
 export(ll)
 export(logDice)
+export(mergeDuplicateCollocates)
 export(mi2)
 export(mi3)
 export(mutate)
@@ -60,6 +61,7 @@
 importFrom(broom,tidy)
 importFrom(curl,has_internet)
 importFrom(dplyr,.data)
+importFrom(dplyr,across)
 importFrom(dplyr,anti_join)
 importFrom(dplyr,arrange)
 importFrom(dplyr,as_tibble)
@@ -68,7 +70,9 @@
 importFrom(dplyr,case_when)
 importFrom(dplyr,desc)
 importFrom(dplyr,enquo)
+importFrom(dplyr,everything)
 importFrom(dplyr,filter)
+importFrom(dplyr,first)
 importFrom(dplyr,group_by)
 importFrom(dplyr,if_else)
 importFrom(dplyr,mutate)
@@ -81,6 +85,7 @@
 importFrom(dplyr,starts_with)
 importFrom(dplyr,summarise)
 importFrom(dplyr,tibble)
+importFrom(dplyr,ungroup)
 importFrom(ggplot2,GeomPoint)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,element_text)
@@ -92,6 +97,7 @@
 importFrom(ggplot2,layer)
 importFrom(ggplot2,scale_x_continuous)
 importFrom(ggplot2,theme)
+importFrom(httr,build_url)
 importFrom(httr,parse_url)
 importFrom(jsonlite,fromJSON)
 importFrom(lubridate,year)
diff --git a/R/collocationScoreQuery.R b/R/collocationScoreQuery.R
index 22a9f9f..0855e18 100644
--- a/R/collocationScoreQuery.R
+++ b/R/collocationScoreQuery.R
@@ -180,3 +180,41 @@
   else
     w
 }
+
+#' Merge duplicate collocate rows and re-calculate association scores and urls
+#'
+#' @param ... tibbles with collocate rows returned from [collocationAnalysis()]
+#' @return tibble with unique collocate rows
+#'
+#' @importFrom dplyr bind_rows group_by summarise ungroup mutate across first everything
+#' @importFrom httr parse_url build_url
+#' @export
+mergeDuplicateCollocates <- function(...) {
+  combined_df <- bind_rows(...)
+
+  korapUrl <- parse_url(combined_df$webUIRequestUrl[1])
+  korapUrl$query <- ''
+  korapUrl <- build_url(korapUrl)
+
+  # Group by collocate and summarize
+  combined_df %>%
+    group_by(collocate, O2, N) %>%
+    summarise(
+      O = sum(O),
+      O1 = sum(O1),
+      leftContextSize = sum(leftContextSize),
+      rightContextSize = sum(rightContextSize),
+      w = sum(w),
+      E = sum(w) * sum(O1) * first(O2) / first(N),
+      logDice = logDice(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+      pmi = pmi(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+      mi2 = mi2(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+      mi3 = mi3(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+      ll = RKorAPClient::ll(sum(O1), first(O2), sum(O), first(N), E = sum(w) * sum(O1) * first(O2) / first(N), sum(w)),
+      query = paste(query, collapse = " | "),
+      webUIRequestUrl = buildWebUIRequestUrlFromString(korapUrl, query = paste(query, collapse = " | "), vc = first(vc)),
+      across(everything(), first),
+    ) %>%
+    ungroup()
+}
+
diff --git a/man/mergeDuplicateCollocates.Rd b/man/mergeDuplicateCollocates.Rd
new file mode 100644
index 0000000..ba69bf4
--- /dev/null
+++ b/man/mergeDuplicateCollocates.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/collocationScoreQuery.R
+\name{mergeDuplicateCollocates}
+\alias{mergeDuplicateCollocates}
+\title{Merge duplicate collocate rows and re-calculate association scores and urls}
+\usage{
+mergeDuplicateCollocates(...)
+}
+\arguments{
+\item{...}{tibbles with collocate rows returned from \code{\link[=collocationAnalysis]{collocationAnalysis()}}}
+}
+\value{
+tibble with unique collocate rows
+}
+\description{
+Merge duplicate collocate rows and re-calculate association scores and urls
+}