Improve test coverage

Change-Id: Ife111b053e20391dcffdc2335de52c41b24649be
diff --git a/tests/testthat/test-collocations.R b/tests/testthat/test-collocations.R
index b0c7a7a..a93ca57 100644
--- a/tests/testthat/test-collocations.R
+++ b/tests/testthat/test-collocations.R
@@ -1,7 +1,7 @@
 test_that("collocationScoreQuery works", {
   skip_if_offline()
   kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = TRUE)
-  df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize=0, rightContextSize=1)
+  df <- collocationScoreQuery(kco, "Ameisenplage", "heimgesucht", leftContextSize = 0, rightContextSize = 1)
   expect_gt(df$logDice, 1)
   expect_equal(df$ll, ll(df$O1, df$O2, df$O, df$N, df$E, df$w))
   expect_equal(df$pmi, pmi(df$O1, df$O2, df$O, df$N, df$E, df$w))
@@ -14,18 +14,19 @@
 test_that("collocationAnalysis works and warns about missing token", {
   skip_if_offline()
   kco <- KorAPConnection(
-               accessToken = NULL,
-               verbose = TRUE)
-    expect_warning(
-      df <-
-        collocationAnalysis(
-          kco,
-          "focus([tt/p=ADJA] {Newstickeritis})",
-          leftContextSize = 1,
-          rightContextSize = 0,
-        ),
-      "access token"
-    )
+    accessToken = NULL,
+    verbose = TRUE
+  )
+  expect_warning(
+    df <-
+      collocationAnalysis(
+        kco,
+        "focus([tt/p=ADJA] {Newstickeritis})",
+        leftContextSize = 1,
+        rightContextSize = 0,
+      ),
+    "access token"
+  )
   expect_gt(df$O, df$E)
   expect_gt(df$logDice, -1)
 })
@@ -34,17 +35,17 @@
   skip_if_offline()
   kco <- KorAPConnection(accessToken = NULL, verbose = TRUE)
   expect_warning(
-    df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc=c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2),
+    df <- collocationAnalysis(kco, "XXXXXXXXAmeisenplage", vc = c("corpusSigle=/WDD17/", "corpusSigle=/WUD17/"), maxRecurse = 2),
     "access token"
   )
   testthat::expect_equal(nrow(df), 0)
 })
 
-#test_that("removeWithinSpanWorks", {
+# test_that("removeWithinSpanWorks", {
 #  expect_equal(
 #    removeWithinSpan("contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))", "base/s=s"),
 #    "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
-#})
+# })
 
 
 test_that("mergeDuplicateCollocatesWorksAsExpected", {
@@ -100,3 +101,217 @@
   expect_equal(merged$O2, 1296870.5)
   expect_equal(merged$query, "Anspruch focus(in [tt/p=NN] {[tt/l=nehmen]}) | focus({[tt/l=nehmen] in} [tt/p=NN]) Anspruch")
 })
+
+# New tests for improved coverage of collocationAnalysis.R helper functions
+
+test_that("synsemanticStopwords returns German stopwords", {
+  stopwords <- synsemanticStopwords()
+  expect_true(is.character(stopwords))
+  expect_true(length(stopwords) > 50)
+  expect_true("der" %in% stopwords)
+  expect_true("die" %in% stopwords)
+  expect_true("und" %in% stopwords)
+  expect_true("mit" %in% stopwords)
+})
+
+test_that("removeWithinSpan removes span constraints correctly", {
+  # Test basic span removal
+  query1 <- "contains(<base/s=s>, (machen []{0,1} aufmerksam | aufmerksam []{0,1} machen))"
+  result1 <- RKorAPClient:::removeWithinSpan(query1, "base/s=s")
+  expect_equal(result1, "(machen []{0,1} aufmerksam | aufmerksam []{0,1} machen)")
+
+  # Test with different span
+  query2 <- "contains(<p/s=s>, (test query))"
+  result2 <- RKorAPClient:::removeWithinSpan(query2, "p/s=s")
+  expect_equal(result2, "(test query)")
+
+  # Test with empty span - should return original query
+  query3 <- "simple query"
+  result3 <- RKorAPClient:::removeWithinSpan(query3, "")
+  expect_equal(result3, query3)
+
+  # Test with non-matching span
+  query4 <- "contains(<base/s=s>, test)"
+  result4 <- RKorAPClient:::removeWithinSpan(query4, "other/span")
+  expect_equal(result4, query4)
+})
+
+test_that("matches2FreqTable handles empty matches", {
+  empty_matches <- data.frame()
+  result <- RKorAPClient:::matches2FreqTable(empty_matches, index = 0)
+
+  expect_true(is.data.frame(result))
+  expect_equal(nrow(result), 0)
+})
+
+test_that("matches2FreqTable processes single match correctly", {
+  # Create mock matches data
+  mock_matches <- data.frame(
+    tokens = I(list(list(
+      left = c("der", "große"),
+      match = "Test",
+      right = c("ist", "wichtig")
+    ))),
+    stringsAsFactors = FALSE
+  )
+
+  result <- RKorAPClient:::matches2FreqTable(
+    mock_matches,
+    index = 1,
+    leftContextSize = 2,
+    rightContextSize = 2,
+    stopwords = c("der", "ist") # Provide stopwords to avoid empty join
+  )
+
+  expect_true(is.data.frame(result))
+})
+
+test_that("snippet2FreqTable handles empty snippet", {
+  result <- RKorAPClient:::snippet2FreqTable(character(0))
+
+  expect_true(is.data.frame(result))
+  expect_equal(nrow(result), 0)
+})
+
+test_that("snippet2FreqTable processes single snippet correctly", {
+  snippet <- '<span class="context-left">der große </span><span class="match"><mark>Test</mark></span><span class="context-right"> ist wichtig</span>'
+
+  result <- RKorAPClient:::snippet2FreqTable(
+    snippet,
+    leftContextSize = 2,
+    rightContextSize = 2,
+    stopwords = c("der"), # Provide stopwords to avoid empty join
+    verbose = FALSE
+  )
+
+  expect_true(is.data.frame(result))
+})
+
+# Removed hanging findExample tests as they cause infinite wait
+# These tests make API calls that don't complete properly
+
+# Removed hanging collocatesQuery tests as they cause infinite wait
+# These tests were causing the test suite to hang and not terminate
+
+test_that("collocationAnalysis handles exactFrequencies parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      "Test",
+      exactFrequencies = TRUE,
+      searchHitsSampleLimit = 5,
+      topCollocatesLimit = 5
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+})
+
+test_that("collocationAnalysis handles withinSpan parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      "Test",
+      withinSpan = "base/s=s",
+      exactFrequencies = TRUE,
+      searchHitsSampleLimit = 5,
+      topCollocatesLimit = 5
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+})
+
+test_that("collocationAnalysis handles expand parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      c("Test", "der"),
+      expand = TRUE,
+      searchHitsSampleLimit = 2,
+      topCollocatesLimit = 2
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+})
+
+test_that("collocationAnalysis handles stopwords parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      "Test",
+      stopwords = c("der", "die", "und"),
+      searchHitsSampleLimit = 5,
+      topCollocatesLimit = 5
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+})
+
+test_that("collocationAnalysis handles lemmatizeNodeQuery parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      "laufen",
+      lemmatizeNodeQuery = TRUE,
+      searchHitsSampleLimit = 5,
+      topCollocatesLimit = 5
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+})
+
+test_that("collocationAnalysis handles addExamples parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      "Test",
+      addExamples = TRUE,
+      searchHitsSampleLimit = 3,
+      topCollocatesLimit = 3
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+  if (nrow(result) > 0) {
+    expect_true("example" %in% colnames(result))
+  }
+})
+
+test_that("collocationAnalysis handles maxRecurse parameter", {
+  skip_if_offline()
+  kco <- KorAPConnection(accessToken = NULL, cache = TRUE, verbose = FALSE)
+
+  expect_warning(
+    result <- collocationAnalysis(
+      kco,
+      "Test",
+      maxRecurse = 1,
+      searchHitsSampleLimit = 2,
+      topCollocatesLimit = 2
+    ),
+    "access token"
+  )
+  expect_true(is.data.frame(result))
+})