Marc Kupietz | c07712b | 2025-04-13 14:22:38 +0200 | [diff] [blame^] | 1 | from KorAPClient import KorAPConnection |
| 2 | from rpy2.robjects import r |
| 3 | |
| 4 | # As base, use the fiction corpus DeLiKo@DNB (see <https://doi.org/10.5281/zenodo.14943116>) |
| 5 | kcon = KorAPConnection(KorAPUrl="https://korap.dnb.de/", verbose=True).auth() |
| 6 | |
| 7 | r['set.seed'](42) # Set the seed for reproducibility, will in future be exported by KorAPClient |
| 8 | q = kcon.corpusQuery("[tt/l=Wange]", metadataOnly=False) |
| 9 | q = q.fetchNext(maxFetch=1000, randomizePageOrder=True) |
| 10 | |
| 11 | # Calculate the maximum width for the left and right columns |
| 12 | max_left_width = max(len(row['tokens.left']) for _, row in q.slots['collectedMatches'].iterrows()) |
| 13 | max_right_width = max(len(row['tokens.right']) for _, row in q.slots['collectedMatches'].iterrows()) |
| 14 | |
| 15 | # Iterate through all rows of the collected matches |
| 16 | i = 0 |
| 17 | for _, row in q.slots['collectedMatches'].iterrows(): |
| 18 | left_context, match, right_context = (row[col].replace("\t", " ") for col in |
| 19 | ['tokens.left', 'tokens.match', 'tokens.right']) |
| 20 | |
| 21 | # ANSI escapes for bold text |
| 22 | bold_start = "\033[1m" |
| 23 | bold_end = "\033[0m" |
| 24 | |
| 25 | print(f"{i:>5} {left_context:>{max_left_width}} {bold_start}{match}{bold_end} {right_context:<{max_right_width}}") |
| 26 | i = i + 1 |