blob: 7466783ed288261b3d40a9d53bfdf2af9973f47f [file] [log] [blame]
Marc Kupietzc07712b2025-04-13 14:22:38 +02001from KorAPClient import KorAPConnection
2from rpy2.robjects import r
3
4# As base, use the fiction corpus DeLiKo@DNB (see <https://doi.org/10.5281/zenodo.14943116>)
5kcon = KorAPConnection(KorAPUrl="https://korap.dnb.de/", verbose=True).auth()
6
7r['set.seed'](42) # Set the seed for reproducibility, will in future be exported by KorAPClient
8q = kcon.corpusQuery("[tt/l=Wange]", metadataOnly=False)
9q = q.fetchNext(maxFetch=1000, randomizePageOrder=True)
10
11# Calculate the maximum width for the left and right columns
12max_left_width = max(len(row['tokens.left']) for _, row in q.slots['collectedMatches'].iterrows())
13max_right_width = max(len(row['tokens.right']) for _, row in q.slots['collectedMatches'].iterrows())
14
15# Iterate through all rows of the collected matches
16i = 0
17for _, row in q.slots['collectedMatches'].iterrows():
18 left_context, match, right_context = (row[col].replace("\t", " ") for col in
19 ['tokens.left', 'tokens.match', 'tokens.right'])
20
21 # ANSI escapes for bold text
22 bold_start = "\033[1m"
23 bold_end = "\033[0m"
24
25 print(f"{i:>5} {left_context:>{max_left_width}} {bold_start}{match}{bold_end} {right_context:<{max_right_width}}")
26 i = i + 1