First test if server is reachable and via https
Change-Id: I02bdb64e0300d2d7913e3929505eb2e5e29f79ea
diff --git a/lib/korap_rc.js b/lib/korap_rc.js
index 21c1eb8..8e5a22f 100644
--- a/lib/korap_rc.js
+++ b/lib/korap_rc.js
@@ -13,123 +13,132 @@
}
async login(page, username, password) {
- page.goto(this.korap_url);
- await page.waitForNavigation({ waitUntil: 'networkidle2' });
- if (this.login == "") return false;
- if (username == "") return false;
- if (password == "") return false;
+ try {
+ await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
+ if (username == "") return false;
+ if (password == "") return false;
- await page.click('.dropdown-btn');
- await page.waitForSelector('input[name=handle_or_email]', { visible: true });
- const username_field = await page.$("input[name=handle_or_email]")
- if (username_field != null) {
- await username_field.focus();
- await username_field.type(username);
- const password_field = await page.$("input[name=pwd]")
- await password_field.focus()
- await page.keyboard.type(password)
- await page.keyboard.press("Enter")
- } else {
- return false
+ await page.waitForSelector('.dropdown-btn', { visible: true });
+ await page.click('.dropdown-btn');
+ await page.waitForSelector('input[name=handle_or_email]', { visible: true });
+ const username_field = await page.$("input[name=handle_or_email]")
+ if (username_field != null) {
+ await username_field.focus();
+ await username_field.type(username);
+ const password_field = await page.$("input[name=pwd]")
+ await password_field.focus()
+ await page.keyboard.type(password)
+ await page.keyboard.press("Enter")
+ } else {
+ return false
+ }
+
+ await page.waitForNavigation({ waitUntil: 'domcontentloaded' }); // Wait for navigation after login
+ await page.waitForSelector("#q-field", { visible: true }); // Wait for query field to confirm login
+ const logout = await page.$(".logout")
+ if (logout == null) {
+ return false
+ }
+
+ return true
+ } catch (error) {
+ console.error(`Login failed: ${error.message}`);
+ return false;
}
-
- await page.waitForNavigation({ waitUntil: 'networkidle2' });
- const logout = await page.$(".logout")
- if (logout == null) {
- return false
- }
-
- return true
}
async search(page, query) {
- const query_field = await page.$("#q-field");
- assert.notEqual(query_field, null, "Query field not found");
-
- await page.waitForSelector("#q-field", { visible: true });
- await query_field.click({ clickCount: 3 });
- await page.keyboard.type(query);
- await page.keyboard.press("Enter");
-
- await page.waitForNavigation({ waitUntil: 'networkidle2' });
-
- // Wait for search results to be fully loaded
try {
- await page.waitForSelector('ol li, #resultinfo, .result-item', {
- visible: true,
- timeout: 15000
- });
- // Give additional time for the results count to be populated
- await new Promise(resolve => setTimeout(resolve, 2000));
- } catch (error) {
- // Continue if timeout, fallback methods will handle it
- }
+ await page.waitForSelector("#q-field", { visible: true });
+ const query_field = await page.$("#q-field");
+ assert.notEqual(query_field, null, "Query field not found");
- const resultsInfo = await page.evaluate(() => {
- // Check common selectors for result counts
- const selectors = [
- '#total-results',
- '#resultinfo',
- '.result-count',
- '.total-results',
- '[data-results]',
- '.found'
- ];
+ await query_field.click({ clickCount: 3 });
+ await page.keyboard.type(query);
+ await page.keyboard.press("Enter");
- for (const selector of selectors) {
- const element = document.querySelector(selector);
- if (element) {
- const text = element.textContent || element.innerText || '';
- const numbers = text.match(/\d+/g);
+ await page.waitForNavigation({ waitUntil: 'domcontentloaded' });
+
+ // Wait for search results to be fully loaded
+ try {
+ await page.waitForSelector('ol li, #resultinfo, .result-item', {
+ visible: true,
+ timeout: 15000
+ });
+ // Give additional time for the results count to be populated
+ await new Promise(resolve => setTimeout(resolve, 2000));
+ } catch (error) {
+ // Continue if timeout, fallback methods will handle it
+ }
+
+ const resultsInfo = await page.evaluate(() => {
+ // Check common selectors for result counts
+ const selectors = [
+ '#total-results',
+ '#resultinfo',
+ '.result-count',
+ '.total-results',
+ '[data-results]',
+ '.found'
+ ];
+
+ for (const selector of selectors) {
+ const element = document.querySelector(selector);
+ if (element) {
+ const text = element.textContent || element.innerText || '';
+ const numbers = text.match(/\d+/g);
+ if (numbers && numbers.length > 0) {
+ return {
+ selector: selector,
+ numbers: numbers
+ };
+ }
+ }
+ }
+
+ // Look in the page title for results count
+ const title = document.title;
+ if (title) {
+ const numbers = title.match(/\d+/g);
if (numbers && numbers.length > 0) {
return {
- selector: selector,
+ selector: 'title',
numbers: numbers
};
}
}
- }
- // Look in the page title for results count
- const title = document.title;
- if (title) {
- const numbers = title.match(/\d+/g);
- if (numbers && numbers.length > 0) {
+ // Count the actual result items as fallback
+ const resultItems = document.querySelectorAll('ol li');
+ if (resultItems.length > 0) {
return {
- selector: 'title',
- numbers: numbers
+ selector: 'counted-items',
+ numbers: [resultItems.length.toString()]
};
}
- }
- // Count the actual result items as fallback
- const resultItems = document.querySelectorAll('ol li');
- if (resultItems.length > 0) {
- return {
- selector: 'counted-items',
- numbers: [resultItems.length.toString()]
- };
- }
-
- return null;
- });
-
- if (!resultsInfo || !resultsInfo.numbers || resultsInfo.numbers.length === 0) {
- // Final fallback: just count visible list items
- const itemCount = await page.evaluate(() => {
- return document.querySelectorAll('ol li').length;
+ return null;
});
- if (itemCount > 0) {
- return itemCount;
+ if (!resultsInfo || !resultsInfo.numbers || resultsInfo.numbers.length === 0) {
+ // Final fallback: just count visible list items
+ const itemCount = await page.evaluate(() => {
+ return document.querySelectorAll('ol li').length;
+ });
+
+ if (itemCount > 0) {
+ return itemCount;
+ }
+
+ throw new Error("Cannot find any results count on the page");
}
- throw new Error("Cannot find any results count on the page");
+ // Extract the largest number found (likely the total results)
+ const hits = Math.max(...resultsInfo.numbers.map(n => parseInt(n, 10)));
+ return hits;
+ } catch (error) {
+ throw new Error(`Failed to perform search: ${error.message}`);
}
-
- // Extract the largest number found (likely the total results)
- const hits = Math.max(...resultsInfo.numbers.map(n => parseInt(n, 10)));
- return hits;
}
async logout(page) {
@@ -160,24 +169,55 @@
async check_corpus_statistics(page, minTokenThreshold = 1000) {
try {
// Navigate to the corpus view if not already there
- await page.goto(this.korap_url, { waitUntil: 'networkidle2' });
+ await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
// Click the vc-choose element to open corpus selection
- await page.waitForSelector('#vc-choose', { visible: true, timeout: 10000 });
+ await page.waitForSelector('#vc-choose', { visible: true, timeout: 90000 });
await page.click('#vc-choose');
// Wait a moment for the UI to respond
await new Promise(resolve => setTimeout(resolve, 1000));
// Click the statistic element
- await page.waitForSelector('.statistic', { visible: true, timeout: 10000 });
- await page.click('.statistic');
+ await page.waitForSelector('.statistic', { visible: true, timeout: 90000 });
+ try {
+ await page.click('.statistic');
+ } catch (error) {
+ throw new Error(`Failed to click statistic element: ${error.message}`);
+ }
- // Wait for statistics to load
- await new Promise(resolve => setTimeout(resolve, 3000));
-
+ // Wait for statistics to load and token count to appear
+ await page.waitForFunction(() => {
+ const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
+ for (const element of tokenTitleElements) {
+ let nextElement = element.nextElementSibling;
+ while (nextElement) {
+ if (nextElement.tagName.toLowerCase() === 'dd') {
+ const text = nextElement.textContent || nextElement.innerText || '';
+ const cleanedText = text.replace(/[,\.]/g, '');
+ const numbers = cleanedText.match(/\d+/g);
+ if (numbers && numbers.length > 0) {
+ return true;
+ }
+ }
+ nextElement = nextElement.nextElementSibling;
+ }
+ }
+ const ddElements = document.querySelectorAll('dd');
+ for (const dd of ddElements) {
+ const text = dd.textContent || dd.innerText || '';
+ const cleanedText = text.replace(/[,\.]/g, '');
+ const numbers = cleanedText.match(/\d+/g);
+ if (numbers && numbers.length > 0) {
+ return true;
+ }
+ }
+ return false;
+ }, { timeout: 60000 });
+
// Look for the tokens count in a dd element that follows an element with title "tokens"
const tokenCount = await page.evaluate((minThreshold) => {
+ console.log("Attempting to find token count within page.evaluate...");
// Find the element with title "tokens"
const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
@@ -191,6 +231,7 @@
const cleanedText = text.replace(/[,\.]/g, '');
const numbers = cleanedText.match(/\d+/g);
if (numbers && numbers.length > 0) {
+ console.log(`Found token count from title element: ${numbers[0]}`);
return parseInt(numbers[0], 10);
}
}
@@ -209,11 +250,13 @@
const num = parseInt(numbers[0], 10);
// Use the provided threshold instead of hardcoded value
if (num > minThreshold) {
+ console.log(`Found token count from dd element: ${num}`);
return num;
}
}
}
+ console.log("Could not find token count using any method.");
return null;
}, minTokenThreshold);
@@ -229,4 +272,4 @@
}
}
-module.exports = KorAPRC
+module.exports = KorAPRC
\ No newline at end of file