blob: 8e5a22f469e17d25202d949cc1ae990cd5d6a9b7 [file] [log] [blame]
Marc Kupietz55fc3162022-12-04 16:25:49 +01001const chai = require('chai');
2const assert = chai.assert;
Marc Kupietz5e45a2f2022-12-03 15:32:40 +01003
4class KorAPRC {
5 korap_url = ""
6
7 constructor(korap_url) {
8 this.korap_url = korap_url
9 }
10
11 static new(korap_url) {
12 return new KorAPRC(korap_url)
13 }
14
15 async login(page, username, password) {
Marc Kupietz93d7f702025-06-27 15:41:48 +020016 try {
17 await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
18 if (username == "") return false;
19 if (password == "") return false;
Marc Kupietz5e45a2f2022-12-03 15:32:40 +010020
Marc Kupietz93d7f702025-06-27 15:41:48 +020021 await page.waitForSelector('.dropdown-btn', { visible: true });
22 await page.click('.dropdown-btn');
23 await page.waitForSelector('input[name=handle_or_email]', { visible: true });
24 const username_field = await page.$("input[name=handle_or_email]")
25 if (username_field != null) {
26 await username_field.focus();
27 await username_field.type(username);
28 const password_field = await page.$("input[name=pwd]")
29 await password_field.focus()
30 await page.keyboard.type(password)
31 await page.keyboard.press("Enter")
32 } else {
33 return false
34 }
35
36 await page.waitForNavigation({ waitUntil: 'domcontentloaded' }); // Wait for navigation after login
37 await page.waitForSelector("#q-field", { visible: true }); // Wait for query field to confirm login
38 const logout = await page.$(".logout")
39 if (logout == null) {
40 return false
41 }
42
43 return true
44 } catch (error) {
45 console.error(`Login failed: ${error.message}`);
46 return false;
Marc Kupietz5e45a2f2022-12-03 15:32:40 +010047 }
Marc Kupietz5e45a2f2022-12-03 15:32:40 +010048 }
49
50 async search(page, query) {
Marc Kupietz964e7772025-06-03 15:02:30 +020051 try {
Marc Kupietz93d7f702025-06-27 15:41:48 +020052 await page.waitForSelector("#q-field", { visible: true });
53 const query_field = await page.$("#q-field");
54 assert.notEqual(query_field, null, "Query field not found");
Marc Kupietz964e7772025-06-03 15:02:30 +020055
Marc Kupietz93d7f702025-06-27 15:41:48 +020056 await query_field.click({ clickCount: 3 });
57 await page.keyboard.type(query);
58 await page.keyboard.press("Enter");
Marc Kupietz964e7772025-06-03 15:02:30 +020059
Marc Kupietz93d7f702025-06-27 15:41:48 +020060 await page.waitForNavigation({ waitUntil: 'domcontentloaded' });
61
62 // Wait for search results to be fully loaded
63 try {
64 await page.waitForSelector('ol li, #resultinfo, .result-item', {
65 visible: true,
66 timeout: 15000
67 });
68 // Give additional time for the results count to be populated
69 await new Promise(resolve => setTimeout(resolve, 2000));
70 } catch (error) {
71 // Continue if timeout, fallback methods will handle it
72 }
73
74 const resultsInfo = await page.evaluate(() => {
75 // Check common selectors for result counts
76 const selectors = [
77 '#total-results',
78 '#resultinfo',
79 '.result-count',
80 '.total-results',
81 '[data-results]',
82 '.found'
83 ];
84
85 for (const selector of selectors) {
86 const element = document.querySelector(selector);
87 if (element) {
88 const text = element.textContent || element.innerText || '';
89 const numbers = text.match(/\d+/g);
90 if (numbers && numbers.length > 0) {
91 return {
92 selector: selector,
93 numbers: numbers
94 };
95 }
96 }
97 }
98
99 // Look in the page title for results count
100 const title = document.title;
101 if (title) {
102 const numbers = title.match(/\d+/g);
Marc Kupietz964e7772025-06-03 15:02:30 +0200103 if (numbers && numbers.length > 0) {
104 return {
Marc Kupietz93d7f702025-06-27 15:41:48 +0200105 selector: 'title',
Marc Kupietz964e7772025-06-03 15:02:30 +0200106 numbers: numbers
107 };
108 }
109 }
Marc Kupietz964e7772025-06-03 15:02:30 +0200110
Marc Kupietz93d7f702025-06-27 15:41:48 +0200111 // Count the actual result items as fallback
112 const resultItems = document.querySelectorAll('ol li');
113 if (resultItems.length > 0) {
Marc Kupietz964e7772025-06-03 15:02:30 +0200114 return {
Marc Kupietz93d7f702025-06-27 15:41:48 +0200115 selector: 'counted-items',
116 numbers: [resultItems.length.toString()]
Marc Kupietz964e7772025-06-03 15:02:30 +0200117 };
118 }
Marc Kupietz964e7772025-06-03 15:02:30 +0200119
Marc Kupietz93d7f702025-06-27 15:41:48 +0200120 return null;
Marc Kupietz964e7772025-06-03 15:02:30 +0200121 });
122
Marc Kupietz93d7f702025-06-27 15:41:48 +0200123 if (!resultsInfo || !resultsInfo.numbers || resultsInfo.numbers.length === 0) {
124 // Final fallback: just count visible list items
125 const itemCount = await page.evaluate(() => {
126 return document.querySelectorAll('ol li').length;
127 });
128
129 if (itemCount > 0) {
130 return itemCount;
131 }
132
133 throw new Error("Cannot find any results count on the page");
Marc Kupietz964e7772025-06-03 15:02:30 +0200134 }
135
Marc Kupietz93d7f702025-06-27 15:41:48 +0200136 // Extract the largest number found (likely the total results)
137 const hits = Math.max(...resultsInfo.numbers.map(n => parseInt(n, 10)));
138 return hits;
139 } catch (error) {
140 throw new Error(`Failed to perform search: ${error.message}`);
Marc Kupietz964e7772025-06-03 15:02:30 +0200141 }
Marc Kupietz5e45a2f2022-12-03 15:32:40 +0100142 }
143
144 async logout(page) {
Marc Kupietz964e7772025-06-03 15:02:30 +0200145 try {
146 // Direct navigation to logout URL - most reliable method
147 const currentUrl = await page.url();
148 const logoutUrl = currentUrl.replace(/\/$/, '') + '/logout';
149
150 await page.goto(logoutUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
151
152 // Navigate back to main page to ensure clean state for subsequent tests
153 await page.goto(this.korap_url, { waitUntil: 'domcontentloaded', timeout: 10000 });
154
155 return true;
156 } catch (error) {
157 return false;
Marc Kupietz5e45a2f2022-12-03 15:32:40 +0100158 }
Marc Kupietz5e45a2f2022-12-03 15:32:40 +0100159 }
160
161 async assure_glimpse_off(page) {
162 const glimpse = await page.$("input[name=cutoff]")
163 const glimpse_value = await (await glimpse.getProperty('checked')).jsonValue()
164 if (glimpse_value) {
165 await page.click("#glimpse")
166 }
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200167 }
Marc Kupietz5e45a2f2022-12-03 15:32:40 +0100168
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200169 async check_corpus_statistics(page, minTokenThreshold = 1000) {
170 try {
171 // Navigate to the corpus view if not already there
Marc Kupietz93d7f702025-06-27 15:41:48 +0200172 await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200173
174 // Click the vc-choose element to open corpus selection
Marc Kupietz93d7f702025-06-27 15:41:48 +0200175 await page.waitForSelector('#vc-choose', { visible: true, timeout: 90000 });
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200176 await page.click('#vc-choose');
177
178 // Wait a moment for the UI to respond
179 await new Promise(resolve => setTimeout(resolve, 1000));
180
181 // Click the statistic element
Marc Kupietz93d7f702025-06-27 15:41:48 +0200182 await page.waitForSelector('.statistic', { visible: true, timeout: 90000 });
183 try {
184 await page.click('.statistic');
185 } catch (error) {
186 throw new Error(`Failed to click statistic element: ${error.message}`);
187 }
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200188
Marc Kupietz93d7f702025-06-27 15:41:48 +0200189 // Wait for statistics to load and token count to appear
190 await page.waitForFunction(() => {
191 const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
192 for (const element of tokenTitleElements) {
193 let nextElement = element.nextElementSibling;
194 while (nextElement) {
195 if (nextElement.tagName.toLowerCase() === 'dd') {
196 const text = nextElement.textContent || nextElement.innerText || '';
197 const cleanedText = text.replace(/[,\.]/g, '');
198 const numbers = cleanedText.match(/\d+/g);
199 if (numbers && numbers.length > 0) {
200 return true;
201 }
202 }
203 nextElement = nextElement.nextElementSibling;
204 }
205 }
206 const ddElements = document.querySelectorAll('dd');
207 for (const dd of ddElements) {
208 const text = dd.textContent || dd.innerText || '';
209 const cleanedText = text.replace(/[,\.]/g, '');
210 const numbers = cleanedText.match(/\d+/g);
211 if (numbers && numbers.length > 0) {
212 return true;
213 }
214 }
215 return false;
216 }, { timeout: 60000 });
217
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200218 // Look for the tokens count in a dd element that follows an element with title "tokens"
219 const tokenCount = await page.evaluate((minThreshold) => {
Marc Kupietz93d7f702025-06-27 15:41:48 +0200220 console.log("Attempting to find token count within page.evaluate...");
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200221 // Find the element with title "tokens"
222 const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
223
224 for (const element of tokenTitleElements) {
225 // Look for the next dd element
226 let nextElement = element.nextElementSibling;
227 while (nextElement) {
228 if (nextElement.tagName.toLowerCase() === 'dd') {
229 const text = nextElement.textContent || nextElement.innerText || '';
230 // Remove number separators (commas and periods) and extract number
231 const cleanedText = text.replace(/[,\.]/g, '');
232 const numbers = cleanedText.match(/\d+/g);
233 if (numbers && numbers.length > 0) {
Marc Kupietz93d7f702025-06-27 15:41:48 +0200234 console.log(`Found token count from title element: ${numbers[0]}`);
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200235 return parseInt(numbers[0], 10);
236 }
237 }
238 nextElement = nextElement.nextElementSibling;
239 }
240 }
241
242 // Alternative approach: look for dd elements that contain large numbers
243 const ddElements = document.querySelectorAll('dd');
244 for (const dd of ddElements) {
245 const text = dd.textContent || dd.innerText || '';
246 // Remove separators and check if it's a large number (likely token count)
247 const cleanedText = text.replace(/[,\.]/g, '');
248 const numbers = cleanedText.match(/\d+/g);
249 if (numbers && numbers.length > 0) {
250 const num = parseInt(numbers[0], 10);
251 // Use the provided threshold instead of hardcoded value
252 if (num > minThreshold) {
Marc Kupietz93d7f702025-06-27 15:41:48 +0200253 console.log(`Found token count from dd element: ${num}`);
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200254 return num;
255 }
256 }
257 }
258
Marc Kupietz93d7f702025-06-27 15:41:48 +0200259 console.log("Could not find token count using any method.");
Marc Kupietzc8ffb2b2025-06-12 16:44:23 +0200260 return null;
261 }, minTokenThreshold);
262
263 if (tokenCount === null) {
264 throw new Error("Could not find token count in corpus statistics");
265 }
266
267 return tokenCount;
268
269 } catch (error) {
270 throw new Error(`Failed to check corpus statistics: ${error.message}`);
271 }
Marc Kupietz5e45a2f2022-12-03 15:32:40 +0100272 }
273}
274
Marc Kupietz93d7f702025-06-27 15:41:48 +0200275module.exports = KorAPRC