Add debug logging for token count test

Change-Id: I7702d270880ee61f4a0deb01a9b7589bb1017c44
diff --git a/lib/korap_rc.js b/lib/korap_rc.js
index 8e5a22f..a628dbf 100644
--- a/lib/korap_rc.js
+++ b/lib/korap_rc.js
@@ -168,105 +168,133 @@
 
     async check_corpus_statistics(page, minTokenThreshold = 1000) {
         try {
+            console.log(`Starting corpus statistics check with minTokenThreshold: ${minTokenThreshold}`);
+
             // Navigate to the corpus view if not already there
+            console.log(`Navigating to: ${this.korap_url}`);
             await page.goto(this.korap_url, { waitUntil: 'domcontentloaded' });
-            
+            console.log("Navigation completed");
+
             // Click the vc-choose element to open corpus selection
+            console.log("Waiting for #vc-choose selector...");
             await page.waitForSelector('#vc-choose', { visible: true, timeout: 90000 });
+            console.log("Found #vc-choose, clicking...");
             await page.click('#vc-choose');
-            
+            console.log("Clicked #vc-choose");
+
             // Wait a moment for the UI to respond
+            console.log("Waiting 1 second for UI to respond...");
             await new Promise(resolve => setTimeout(resolve, 1000));
-            
+
             // Click the statistic element
+            console.log("Waiting for .statistic selector...");
             await page.waitForSelector('.statistic', { visible: true, timeout: 90000 });
+            console.log("Found .statistic element, attempting to click...");
             try {
                 await page.click('.statistic');
+                console.log("Successfully clicked .statistic element");
             } catch (error) {
+                console.error(`Failed to click statistic element: ${error.message}`);
                 throw new Error(`Failed to click statistic element: ${error.message}`);
             }
+
+            // Wait for statistics to load with a more efficient approach
+            console.log("Waiting for token statistics to load...");
             
-            // Wait for statistics to load and token count to appear
+            // First, wait for any dd elements to appear (basic structure)
+            await page.waitForSelector('dd', { visible: true, timeout: 30000 });
+            
+            // Then wait for the specific token statistics with a simplified check
             await page.waitForFunction(() => {
-                const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
-                for (const element of tokenTitleElements) {
-                    let nextElement = element.nextElementSibling;
-                    while (nextElement) {
-                        if (nextElement.tagName.toLowerCase() === 'dd') {
-                            const text = nextElement.textContent || nextElement.innerText || '';
-                            const cleanedText = text.replace(/[,\.]/g, '');
-                            const numbers = cleanedText.match(/\d+/g);
-                            if (numbers && numbers.length > 0) {
-                                return true;
-                            }
-                        }
-                        nextElement = nextElement.nextElementSibling;
-                    }
-                }
+                // Simplified check - look for any dd element with a large number
                 const ddElements = document.querySelectorAll('dd');
-                for (const dd of ddElements) {
-                    const text = dd.textContent || dd.innerText || '';
+                for (let i = 0; i < ddElements.length; i++) {
+                    const text = ddElements[i].textContent || ddElements[i].innerText || '';
                     const cleanedText = text.replace(/[,\.]/g, '');
                     const numbers = cleanedText.match(/\d+/g);
                     if (numbers && numbers.length > 0) {
-                        return true;
+                        const num = parseInt(numbers[0], 10);
+                        if (num > 1000) { // Found a substantial number, likely loaded
+                            return true;
+                        }
                     }
                 }
                 return false;
-            }, { timeout: 60000 });
+            }, { timeout: 90000, polling: 1000 }); // Poll every second instead of continuously
 
             // Look for the tokens count in a dd element that follows an element with title "tokens"
+            console.log(`Starting token count extraction with minThreshold: ${minTokenThreshold}`);
             const tokenCount = await page.evaluate((minThreshold) => {
-                console.log("Attempting to find token count within page.evaluate...");
-                // Find the element with title "tokens"
+                // Find the element with title "tokens" first
                 const tokenTitleElements = document.querySelectorAll('[title="tokens"], [title*="token"]');
                 
-                for (const element of tokenTitleElements) {
+                for (let i = 0; i < tokenTitleElements.length; i++) {
+                    const element = tokenTitleElements[i];
+                    
                     // Look for the next dd element
                     let nextElement = element.nextElementSibling;
-                    while (nextElement) {
+                    let siblingCount = 0;
+                    while (nextElement && siblingCount < 10) {
+                        siblingCount++;
+                        
                         if (nextElement.tagName.toLowerCase() === 'dd') {
                             const text = nextElement.textContent || nextElement.innerText || '';
                             // Remove number separators (commas and periods) and extract number
                             const cleanedText = text.replace(/[,\.]/g, '');
                             const numbers = cleanedText.match(/\d+/g);
                             if (numbers && numbers.length > 0) {
-                                console.log(`Found token count from title element: ${numbers[0]}`);
-                                return parseInt(numbers[0], 10);
+                                const tokenValue = parseInt(numbers[0], 10);
+                                return tokenValue;
                             }
                         }
                         nextElement = nextElement.nextElementSibling;
                     }
                 }
-                
+
                 // Alternative approach: look for dd elements that contain large numbers
                 const ddElements = document.querySelectorAll('dd');
-                for (const dd of ddElements) {
+                const candidateTokenCounts = [];
+
+                for (let i = 0; i < ddElements.length; i++) {
+                    const dd = ddElements[i];
                     const text = dd.textContent || dd.innerText || '';
                     // Remove separators and check if it's a large number (likely token count)
                     const cleanedText = text.replace(/[,\.]/g, '');
                     const numbers = cleanedText.match(/\d+/g);
                     if (numbers && numbers.length > 0) {
                         const num = parseInt(numbers[0], 10);
-                        // Use the provided threshold instead of hardcoded value
+                        
+                        // Use the provided threshold to filter candidates
                         if (num > minThreshold) {
-                            console.log(`Found token count from dd element: ${num}`);
-                            return num;
+                            candidateTokenCounts.push({ value: num, text: text, index: i });
                         }
                     }
                 }
-                
-                console.log("Could not find token count using any method.");
+
+                if (candidateTokenCounts.length > 0) {
+                    // Return the largest candidate (most likely to be the total token count)
+                    const bestCandidate = candidateTokenCounts.reduce((max, current) =>
+                        current.value > max.value ? current : max
+                    );
+                    return bestCandidate.value;
+                }
+
                 return null;
             }, minTokenThreshold);
-            
+
+            console.log(`Token count extraction completed. Result: ${tokenCount}`);
+
             if (tokenCount === null) {
+                console.error("ERROR: Token count extraction returned null");
                 throw new Error("Could not find token count in corpus statistics");
             }
-            
+
+            console.log(`SUCCESS: Found token count: ${tokenCount}, threshold was: ${minTokenThreshold}`);
             return tokenCount;
-            
+
         } catch (error) {
+            console.error(`ERROR in check_corpus_statistics: ${error.message}`);
+            console.error("Full error stack:", error.stack);
             throw new Error(`Failed to check corpus statistics: ${error.message}`);
         }
     }