In corpusQuery Use fields API result as metadata

Resolves #19

Change-Id: I83e4f66efc06d62a2aec10a7719faafaadab15bb
diff --git a/R/KorAPQuery.R b/R/KorAPQuery.R
index 5ead599..c1c29d9 100644
--- a/R/KorAPQuery.R
+++ b/R/KorAPQuery.R
@@ -263,7 +263,10 @@
 #'
 #' @aliases fetchNext
 #' @rdname KorAPQuery-class
-#' @importFrom dplyr rowwise bind_rows select summarise n
+#' @importFrom dplyr rowwise mutate bind_rows select summarise n select
+#' @importFrom tibble enframe
+#' @importFrom tidyr unnest unchop pivot_wider
+#' @importFrom purrr map
 #' @export
 setMethod("fetchNext", "KorAPQuery", function(kqo,
                                               offset = kqo@nextStartIndex,
@@ -273,7 +276,7 @@
   if (kqo@totalResults == 0 || offset >= kqo@totalResults) {
     return(kqo)
   }
-
+  use_korap_api <- Sys.getenv("USE_KORAP_API", unset = NA)
   page <- kqo@nextStartIndex / maxResultsPerPage + 1
   results <- 0
   pubDate <- NULL # https://stackoverflow.com/questions/8096313/no-visible-binding-for-global-variable-note-in-r-cmd-check
@@ -283,23 +286,40 @@
     pages <- head(sample.int(ceiling(kqo@totalResults / maxResultsPerPage)), maxFetch) - 1
   }
 
+  if(is.null(collectedMatches)) {
+    collectedMatches <- data.frame()
+  }
   repeat {
-    page = length(collectedMatches[,1]) %/% maxResultsPerPage + 1
+    page = nrow(collectedMatches) %/% maxResultsPerPage + 1
     currentOffset = ifelse(randomizePageOrder, pages[page],  page - 1) * maxResultsPerPage
     query <- paste0(kqo@requestUrl, '&count=', min(if (!is.na(maxFetch)) maxFetch - results else maxResultsPerPage, maxResultsPerPage) ,'&offset=', currentOffset, '&cutoff=true')
     res <- apiCall(kqo@korapConnection, query)
+    rawRes <<- res
     if (length(res$matches) == 0) {
       break
     }
 
+    if ("fields" %in% colnames(res$matches) && (is.na(use_korap_api) || as.numeric(use_korap_api) >= 1.0)) {
+      if (verbose) cat("Using fields API: ")
+      currentMatches <-  tibble::enframe(res$matches$fields) %>%
+        tidyr::unnest(cols = value) %>%
+        tidyr::pivot_wider(names_from = key, id_cols = name, names_repair = "unique") %>%
+        dplyr::mutate(across(where(is.list), ~ purrr::map(.x, ~ if (length(.x) < 2) unlist(.x) else paste(.x, collapse = " ")))) %>%
+        tidyr::unchop(where(is.list)) %>%
+        dplyr::select(-name)
+      if("snippet" %in% colnames(res$matches)) {
+        currentMatches$snippet <- res$matches$snippet
+      }
+    } else {
+      currentMatches <- res$matches
+    }
+
     for (field in kqo@fields) {
-      if (!field %in% colnames(res$matches)) {
-        res$matches[, field] <- NA
+      if (!field %in% colnames(currentMatches)) {
+        currentMatches[, field] <- NA
       }
     }
-    currentMatches <-
-      res$matches %>%
-      dplyr::select(kqo@fields)
+    currentMatches <- currentMatches %>% select(kqo@fields)
     if (!is.list(collectedMatches)) {
       collectedMatches <- currentMatches
     } else {
@@ -308,7 +328,7 @@
     if (verbose) {
       cat(paste0(
         "Retrieved page ",
-        ceiling(length(collectedMatches[, 1]) / res$meta$itemsPerPage),
+        ceiling(nrow(collectedMatches) / res$meta$itemsPerPage),
         "/",
         if (!is.na(maxFetch) && maxFetch < kqo@totalResults)
           sprintf("%d (%d)", ceiling(maxFetch / res$meta$itemsPerPage), ceiling(kqo@totalResults / res$meta$itemsPerPage))
@@ -321,7 +341,7 @@
     }
     page <- page + 1
     results <- results + res$meta$itemsPerPage
-    if (length(collectedMatches[,1]) >= kqo@totalResults || (!is.na(maxFetch) && results >= maxFetch)) {
+    if (nrow(collectedMatches) >= kqo@totalResults || (!is.na(maxFetch) && results >= maxFetch)) {
       break
     }
   }