In corpusQuery Use fields API result as metadata

Resolves #19

Change-Id: I83e4f66efc06d62a2aec10a7719faafaadab15bb
diff --git a/NAMESPACE b/NAMESPACE
index 53ab148..9255cb8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -95,6 +95,7 @@
 importFrom(lubridate,year)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,debug_pipe)
+importFrom(purrr,map)
 importFrom(purrr,pmap)
 importFrom(stats,prop.test)
 importFrom(stringr,str_detect)
@@ -102,6 +103,7 @@
 importFrom(stringr,str_split)
 importFrom(tibble,add_column)
 importFrom(tibble,as_tibble)
+importFrom(tibble,enframe)
 importFrom(tibble,remove_rownames)
 importFrom(tibble,rownames_to_column)
 importFrom(tibble,tibble)
@@ -109,4 +111,6 @@
 importFrom(tidyr,expand_grid)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
+importFrom(tidyr,unchop)
+importFrom(tidyr,unnest)
 importFrom(urltools,url_encode)
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 5ead599..c1c29d9 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -263,7 +263,10 @@
 #'
 #' @aliases fetchNext
 #' @rdname KorAPQuery-class
-#' @importFrom dplyr rowwise bind_rows select summarise n
+#' @importFrom dplyr rowwise mutate bind_rows select summarise n select
+#' @importFrom tibble enframe
+#' @importFrom tidyr unnest unchop pivot_wider
+#' @importFrom purrr map
 #' @export
 setMethod("fetchNext", "KorAPQuery", function(kqo,
                                               offset = kqo@nextStartIndex,
@@ -273,7 +276,7 @@
   if (kqo@totalResults == 0 || offset >= kqo@totalResults) {
     return(kqo)
   }
-
+  use_korap_api <- Sys.getenv("USE_KORAP_API", unset = NA)
   page <- kqo@nextStartIndex / maxResultsPerPage + 1
   results <- 0
   pubDate <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
@@ -283,23 +286,40 @@
     pages <- head(sample.int(ceiling(kqo@totalResults / maxResultsPerPage)), maxFetch) - 1
   }
 
+  if(is.null(collectedMatches)) {
+    collectedMatches <- data.frame()
+  }
   repeat {
-    page = length(collectedMatches[,1]) %/% maxResultsPerPage + 1
+    page = nrow(collectedMatches) %/% maxResultsPerPage + 1
     currentOffset = ifelse(randomizePageOrder, pages[page],  page - 1) * maxResultsPerPage
     query <- paste0(kqo@requestUrl, '&count=', min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage) ,'&offset=', currentOffset, '&cutoff=true')
     res <- apiCall(kqo@korapConnection, query)
+    rawRes <<- res
     if (length(res$matches) == 0) {
       break
     }
 
+    if ("fields" %in% colnames(res$matches) && (is.na(use_korap_api) || as.numeric(use_korap_api) >= 1.0)) {
+      if (verbose) cat("Using fields API: ")
+      currentMatches <-  tibble::enframe(res$matches$fields) %>%
+        tidyr::unnest(cols = value) %>%
+        tidyr::pivot_wider(names_from = key, id_cols = name, names_repair = "unique") %>%
+        dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = " ")))) %>%
+        tidyr::unchop(where(is.list)) %>%
+        dplyr::select(-name)
+      if("snippet" %in% colnames(res$matches)) {
+        currentMatches$snippet <- res$matches$snippet
+      }
+    } else {
+      currentMatches <- res$matches
+    }
+
     for (field in kqo@fields) {
-      if (!field %in% colnames(res$matches)) {
-        res$matches[, field] <- NA
+      if (!field %in% colnames(currentMatches)) {
+        currentMatches[, field] <- NA
       }
     }
-    currentMatches <-
-      res$matches %>%
-      dplyr::select(kqo@fields)
+    currentMatches <- currentMatches %>% select(kqo@fields)
     if (!is.list(collectedMatches)) {
       collectedMatches <- currentMatches
     } else {
@@ -308,7 +328,7 @@
     if (verbose) {
       cat(paste0(
         "Retrieved page ",
-        ceiling(length(collectedMatches[, 1]) / res$meta$itemsPerPage),
+        ceiling(nrow(collectedMatches) / res$meta$itemsPerPage),
         "/",
         if (!is.na(maxFetch) && maxFetch < kqo@totalResults)
           sprintf("%d (%d)", ceiling(maxFetch / res$meta$itemsPerPage), ceiling(kqo@totalResults / res$meta$itemsPerPage))
@@ -321,7 +341,7 @@
     }
     page <- page + 1
     results <- results + res$meta$itemsPerPage
-    if (length(collectedMatches[,1]) >= kqo@totalResults || (!is.na(maxFetch) && results >= maxFetch)) {
+    if (nrow(collectedMatches) >= kqo@totalResults || (!is.na(maxFetch) && results >= maxFetch)) {
       break
     }
   }
diff --git a/tests/testthat/test-corpusQuery.R b/tests/testthat/test-corpusQuery.R
index 0483745..f080345 100644
--- a/tests/testthat/test-corpusQuery.R
+++ b/tests/testthat/test-corpusQuery.R
@@ -67,6 +67,17 @@
   expect_equal(nrow(matches), expectedResults)
 })
 
+test_that("fetchAll fetches textClass metadatum", {
+  skip_if_offline()
+  q <- new("KorAPConnection", verbose = TRUE) %>%
+    corpusQuery("Ameisenplage", vc = "pubDate since 2014")
+  expectedResults <- q@totalResults
+  matches <- fetchAll(q)@collectedMatches
+  expect_true(any(grepl("wissenschaft ", matches$textClass)))
+  expect_true(any(grepl(" populaerwissenschaft", matches$textClass)))
+  expect_true(any(grepl("kultur literatur", matches$textClass)))
+})
+
 test_that("Uncached query for non-matching search string return 0 results", {
   skip_if_offline()
   q <- new("KorAPConnection", cache = FALSE) %>% corpusQuery("Xmeisenplagx")