Marc Kupietz | 04784b9 | 2025-05-04 13:38:12 +0200 | [diff] [blame] | 1 | """ |
| 2 | Tests for the pyderekovecs package. |
| 3 | """ |
| 4 | import unittest |
| 5 | import pandas as pd |
| 6 | from unittest import mock |
| 7 | |
| 8 | from pyderekovecs import ( |
| 9 | syntagmatic_neighbours, |
| 10 | countbased_collocates, |
| 11 | word_frequency, |
| 12 | corpus_size, |
| 13 | paradigmatic_neighbours, |
| 14 | word_embedding, |
| 15 | frequency_rank, |
| 16 | cosine_similarity, |
| 17 | collocation_scores, |
| 18 | derekovecs_api_call |
| 19 | ) |
| 20 | from pyderekovecs.utils import is_word |
| 21 | |
| 22 | |
| 23 | class MockResponse: |
| 24 | """Mock response class for testing.""" |
| 25 | |
| 26 | def __init__(self, json_data, status_code=200): |
| 27 | self.json_data = json_data |
| 28 | self.status_code = status_code |
| 29 | |
| 30 | def json(self): |
| 31 | return self.json_data |
| 32 | |
| 33 | def raise_for_status(self): |
| 34 | if self.status_code != 200: |
| 35 | raise Exception(f"HTTP Error: {self.status_code}") |
| 36 | |
| 37 | |
| 38 | class TestDerekovecs(unittest.TestCase): |
| 39 | """Test cases for the pyderekovecs package.""" |
| 40 | |
| 41 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 42 | def test_paradigmatic_neighbours(self, mock_get): |
| 43 | """Test that paradigmatic_neighbours works.""" |
| 44 | mock_response = { |
| 45 | "list": [ |
| 46 | [{"word": "Test", "similarity": 1.0}, {"word": "Experiment", "similarity": 0.8}] |
| 47 | ] |
| 48 | } |
| 49 | mock_get.return_value = MockResponse(mock_response) |
| 50 | |
| 51 | result = paradigmatic_neighbours("Test") |
| 52 | self.assertEqual(result.iloc[0]['word'], "Test") |
| 53 | |
| 54 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 55 | def test_syntagmatic_neighbours(self, mock_get): |
| 56 | """Test that syntagmatic_neighbours works.""" |
| 57 | mock_response = { |
| 58 | "collocators": [ |
| 59 | {"word": "durchführen", "rank": 1, "average": 0.8} |
| 60 | ] |
| 61 | } |
| 62 | mock_get.return_value = MockResponse(mock_response) |
| 63 | |
| 64 | result = syntagmatic_neighbours("Test") |
| 65 | self.assertTrue(is_word(result.iloc[0]['word'])) |
| 66 | |
| 67 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 68 | def test_countbased_collocates(self, mock_get): |
| 69 | """Test that countbased_collocates works.""" |
| 70 | mock_response = { |
| 71 | "collocates": [ |
| 72 | {"word": "durchführen", "f": 100, "pmi": 0.8} |
| 73 | ] |
| 74 | } |
| 75 | mock_get.return_value = MockResponse(mock_response) |
| 76 | |
| 77 | result = countbased_collocates("Test") |
| 78 | self.assertTrue(is_word(result.iloc[0]['word'])) |
| 79 | |
| 80 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 81 | def test_collocation_scores(self, mock_get): |
| 82 | """Test that collocation_scores works.""" |
| 83 | mock_response = { |
| 84 | "collocates": [ |
| 85 | {"word": "putzen", "f2": 500, "pmi": 0.8} |
| 86 | ] |
| 87 | } |
| 88 | mock_get.return_value = MockResponse(mock_response) |
| 89 | |
| 90 | result = collocation_scores("Zähne", "putzen") |
| 91 | self.assertTrue(result.iloc[0]['f2'] > 0) |
| 92 | |
| 93 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 94 | def test_cosine_similarity(self, mock_get): |
| 95 | """Test that cosine_similarity works.""" |
| 96 | # Test for same word |
| 97 | mock_get.return_value = MockResponse(1.0) |
| 98 | |
| 99 | result = cosine_similarity("Test", "Test") |
| 100 | self.assertEqual(result, 1.0) |
| 101 | |
| 102 | # Test for different words |
| 103 | mock_get.return_value = MockResponse(0.7) |
| 104 | |
| 105 | result = cosine_similarity("Test", "testen") |
| 106 | self.assertTrue(0 <= result <= 1.0) |
| 107 | |
| 108 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 109 | def test_word_embedding(self, mock_get): |
| 110 | """Test that word_embedding works.""" |
| 111 | # Create a mock vector of length 200 |
| 112 | mock_vector = [0.1] * 200 |
| 113 | mock_response = { |
| 114 | "list": [ |
feldmueller | 41c1e3a | 2025-05-19 14:46:32 +0200 | [diff] [blame^] | 115 | [{"vector": mock_vector}] |
Marc Kupietz | 04784b9 | 2025-05-04 13:38:12 +0200 | [diff] [blame] | 116 | ] |
| 117 | } |
| 118 | mock_get.return_value = MockResponse(mock_response) |
| 119 | |
| 120 | result = word_embedding("Test") |
| 121 | self.assertEqual(len(result), 200) |
| 122 | |
| 123 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 124 | def test_frequency_rank(self, mock_get): |
| 125 | """Test that frequency_rank works.""" |
| 126 | mock_get.return_value = MockResponse({"frequencyRank": 500}) |
| 127 | |
| 128 | result = frequency_rank("Test") |
| 129 | self.assertTrue(isinstance(result, int) and result > 0) |
| 130 | |
| 131 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 132 | def test_word_frequency(self, mock_get): |
| 133 | """Test that word_frequency works.""" |
| 134 | mock_get.return_value = MockResponse({"f1": 1000}) |
| 135 | |
| 136 | result = word_frequency("Test") |
| 137 | self.assertTrue(isinstance(result, int) and result > 0) |
| 138 | |
| 139 | @mock.patch('pyderekovecs.derekovecs.requests.get') |
| 140 | def test_corpus_size(self, mock_get): |
| 141 | """Test that corpus_size works.""" |
| 142 | mock_get.return_value = MockResponse({"N": 1000000}) |
| 143 | |
| 144 | result = corpus_size() |
| 145 | self.assertTrue(isinstance(result, int) and result > 1000) |
| 146 | |
| 147 | |
| 148 | if __name__ == '__main__': |
| 149 | unittest.main() |