blob: 168d53b98d3eaaf97811f9a985ddfce6a94d4589 [file] [log] [blame]
Marc Kupietz240ff892021-05-04 08:19:08 +02001#!/usr/bin/env python3
2
3from KorAPClient import KorAPConnection
4import plotly.express as px
5import pandas as pd
6
7startYear = 1991
8endYear = 2020
9span = 5
10
11NODE = "Ei"
12COLLOCATES = ["pellen", "schälen"]
13COUNTRIES = ["DE", "AT", "CH"]
14
15TITLE = f"Collocation strength of <i>{NODE} + {' / '.join(COLLOCATES)} </i> in {', '.join(COUNTRIES)} {startYear}-{endYear}"
16
17YEARS = [y for y in range(startYear, endYear, span)]
18
19# build all combinations of all variables
20df = pd.DataFrame(YEARS, columns=["year"]) \
21 .merge(pd.DataFrame(COUNTRIES, columns=["Country"]), how='cross') \
22 .merge(pd.DataFrame(COLLOCATES, columns=["Collocate"]), how='cross')
23
24# add column with virtual corpus specifications based on Country and year variables
25df['vc'] = [
26 f"textType=/Zeit.*/ & pubPlaceKey={df['Country'][i]} & pubDate since {df['year'][i]} & pubDate until {df['year'][i] + span - 1} "
27 for i in range(0, len(df.index))]
28
29# add column with label for x axis
30df['Period'] = [f"{df['year'][i]}-{df['year'][i] + span - 1}" for i in range(0, len(df.index))]
31
32# connect to KorAP API server
33kcon = KorAPConnection(verbose=True)
34
35# perform the actual KorAP query
36results = kcon.collocationScoreQuery(NODE, df['Collocate'], df['vc'], lemmatizeNodeQuery=True,
37 lemmatizeCollocateQuery=True)
38
39# join query result columns (axis=1 ...) with condition information columns
40# (why is reset_index needed?)
41df = pd.concat([df.reset_index(drop=True), results.reset_index(drop=True)], axis=1)
42
43fig = px.line(df, title=TITLE, x="Period", y="logDice", color="Country", line_dash="Collocate")
44fig.show()
45# fig.write_image(f"{NODE}_collocates_{startYear}-{endYear}_in_{'_'.join(COUNTRIES)}.png")