blob: 8e5a22f469e17d25202d949cc1ae990cd5d6a9b7 [file] [log] [blame]
const chai = require('chai');
const assert = chai.assert;
class KorAPRC {
korap_url = ""
constructor(korap_url) {
this.korap_url = korap_url
}
static new(korap_url) {
return new KorAPRC(korap_url)
}
async login(page, username, password) {
try {
await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
if (username == "") return false;
if (password == "") return false;
await page.waitForSelector('.dropdown-btn', { visible: true });
await page.click('.dropdown-btn');
await page.waitForSelector('input[name=handle_or_email]', { visible: true });
const username_field = await page.$("input[name=handle_or_email]")
if (username_field != null) {
await username_field.focus();
await username_field.type(username);
const password_field = await page.$("input[name=pwd]")
await password_field.focus()
await page.keyboard.type(password)
await page.keyboard.press("Enter")
} else {
return false
}
await page.waitForNavigation({ waitUntil: 'domcontentloaded' }); // Wait for navigation after login
await page.waitForSelector("#q-field", { visible: true }); // Wait for query field to confirm login
const logout = await page.$(".logout")
if (logout == null) {
return false
}
return true
} catch (error) {
console.error(`Login failed: ${error.message}`);
return false;
}
}
async search(page, query) {
try {
await page.waitForSelector("#q-field", { visible: true });
const query_field = await page.$("#q-field");
assert.notEqual(query_field, null, "Query field not found");
await query_field.click({ clickCount: 3 });
await page.keyboard.type(query);
await page.keyboard.press("Enter");
await page.waitForNavigation({ waitUntil: 'domcontentloaded' });
// Wait for search results to be fully loaded
try {
await page.waitForSelector('ol li, #resultinfo, .result-item', {
visible: true,
timeout: 15000
});
// Give additional time for the results count to be populated
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (error) {
// Continue if timeout, fallback methods will handle it
}
const resultsInfo = await page.evaluate(() => {
// Check common selectors for result counts
const selectors = [
'#total-results',
'#resultinfo',
'.result-count',
'.total-results',
'[data-results]',
'.found'
];
for (const selector of selectors) {
const element = document.querySelector(selector);
if (element) {
const text = element.textContent || element.innerText || '';
const numbers = text.match(/\d+/g);
if (numbers && numbers.length > 0) {
return {
selector: selector,
numbers: numbers
};
}
}
}
// Look in the page title for results count
const title = document.title;
if (title) {
const numbers = title.match(/\d+/g);
if (numbers && numbers.length > 0) {
return {
selector: 'title',
numbers: numbers
};
}
}
// Count the actual result items as fallback
const resultItems = document.querySelectorAll('ol li');
if (resultItems.length > 0) {
return {
selector: 'counted-items',
numbers: [resultItems.length.toString()]
};
}
return null;
});
if (!resultsInfo || !resultsInfo.numbers || resultsInfo.numbers.length === 0) {
// Final fallback: just count visible list items
const itemCount = await page.evaluate(() => {
return document.querySelectorAll('ol li').length;
});
if (itemCount > 0) {
return itemCount;
}
throw new Error("Cannot find any results count on the page");
}
// Extract the largest number found (likely the total results)
const hits = Math.max(...resultsInfo.numbers.map(n => parseInt(n, 10)));
return hits;
} catch (error) {
throw new Error(`Failed to perform search: ${error.message}`);
}
}
async logout(page) {
try {
// Direct navigation to logout URL - most reliable method
const currentUrl = await page.url();
const logoutUrl = currentUrl.replace(/\/$/, '') + '/logout';
await page.goto(logoutUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
// Navigate back to main page to ensure clean state for subsequent tests
await page.goto(this.korap_url, { waitUntil: 'domcontentloaded', timeout: 10000 });
return true;
} catch (error) {
return false;
}
}
async assure_glimpse_off(page) {
const glimpse = await page.$("input[name=cutoff]")
const glimpse_value = await (await glimpse.getProperty('checked')).jsonValue()
if (glimpse_value) {
await page.click("#glimpse")
}
}
async check_corpus_statistics(page, minTokenThreshold = 1000) {
try {
// Navigate to the corpus view if not already there
await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
// Click the vc-choose element to open corpus selection
await page.waitForSelector('#vc-choose', { visible: true, timeout: 90000 });
await page.click('#vc-choose');
// Wait a moment for the UI to respond
await new Promise(resolve => setTimeout(resolve, 1000));
// Click the statistic element
await page.waitForSelector('.statistic', { visible: true, timeout: 90000 });
try {
await page.click('.statistic');
} catch (error) {
throw new Error(`Failed to click statistic element: ${error.message}`);
}
// Wait for statistics to load and token count to appear
await page.waitForFunction(() => {
const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
for (const element of tokenTitleElements) {
let nextElement = element.nextElementSibling;
while (nextElement) {
if (nextElement.tagName.toLowerCase() === 'dd') {
const text = nextElement.textContent || nextElement.innerText || '';
const cleanedText = text.replace(/[,\.]/g, '');
const numbers = cleanedText.match(/\d+/g);
if (numbers && numbers.length > 0) {
return true;
}
}
nextElement = nextElement.nextElementSibling;
}
}
const ddElements = document.querySelectorAll('dd');
for (const dd of ddElements) {
const text = dd.textContent || dd.innerText || '';
const cleanedText = text.replace(/[,\.]/g, '');
const numbers = cleanedText.match(/\d+/g);
if (numbers && numbers.length > 0) {
return true;
}
}
return false;
}, { timeout: 60000 });
// Look for the tokens count in a dd element that follows an element with title "tokens"
const tokenCount = await page.evaluate((minThreshold) => {
console.log("Attempting to find token count within page.evaluate...");
// Find the element with title "tokens"
const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
for (const element of tokenTitleElements) {
// Look for the next dd element
let nextElement = element.nextElementSibling;
while (nextElement) {
if (nextElement.tagName.toLowerCase() === 'dd') {
const text = nextElement.textContent || nextElement.innerText || '';
// Remove number separators (commas and periods) and extract number
const cleanedText = text.replace(/[,\.]/g, '');
const numbers = cleanedText.match(/\d+/g);
if (numbers && numbers.length > 0) {
console.log(`Found token count from title element: ${numbers[0]}`);
return parseInt(numbers[0], 10);
}
}
nextElement = nextElement.nextElementSibling;
}
}
// Alternative approach: look for dd elements that contain large numbers
const ddElements = document.querySelectorAll('dd');
for (const dd of ddElements) {
const text = dd.textContent || dd.innerText || '';
// Remove separators and check if it's a large number (likely token count)
const cleanedText = text.replace(/[,\.]/g, '');
const numbers = cleanedText.match(/\d+/g);
if (numbers && numbers.length > 0) {
const num = parseInt(numbers[0], 10);
// Use the provided threshold instead of hardcoded value
if (num > minThreshold) {
console.log(`Found token count from dd element: ${num}`);
return num;
}
}
}
console.log("Could not find token count using any method.");
return null;
}, minTokenThreshold);
if (tokenCount === null) {
throw new Error("Could not find token count in corpus statistics");
}
return tokenCount;
} catch (error) {
throw new Error(`Failed to check corpus statistics: ${error.message}`);
}
}
}
module.exports = KorAPRC