Provide Python wrappers for RKorAPClient R-S4 classes and class methods
Change-Id: I216a6894ed1246ebb2300cac1e52972a517150a2
diff --git a/KorAPClient/__init__.py b/KorAPClient/__init__.py
index b74adab..9571401 100644
--- a/KorAPClient/__init__.py
+++ b/KorAPClient/__init__.py
@@ -1,4 +1,36 @@
-import rpy2.robjects.pandas2ri as pandas2ri
import rpy2.robjects.packages as packages
+import rpy2.robjects.pandas2ri as pandas2ri
+from rpy2.robjects.methods import RS4
+
KorAPClient = packages.importr('RKorAPClient')
pandas2ri.activate()
+
+
+class KorAPConnection(RS4):
+ def __init__(self, *args, **kwargs):
+ kco = KorAPClient.KorAPConnection(*args, **kwargs)
+ super().__init__(kco)
+
+ def corpusStats(self, *args, **kwargs):
+ return KorAPClient.corpusStats(self, *args, **kwargs)
+
+ def frequencyQuery(self, *args, **kwargs):
+ return KorAPClient.frequencyQuery(self, *args, **kwargs)
+
+ def corpusQuery(self, *args, **kwargs):
+ return KorAPQuery(self, *args, **kwargs)
+
+
+class KorAPQuery(RS4):
+ def __init__(self, *args, **kwargs):
+ kco = KorAPClient.corpusQuery(*args, **kwargs)
+ super().__init__(kco)
+
+ def fetchNext(self, *args, **kwargs):
+ return KorAPClient.fetchNext(self, *args, **kwargs)
+
+ def fetchRest(self, *args, **kwargs):
+ return KorAPClient.fetchRest(self, *args, **kwargs)
+
+ def fetchAll(self, *args, **kwargs):
+ return KorAPClient.fetchAll(self, *args, **kwargs)
diff --git a/KorAPClient/tests/test_korapclient.py b/KorAPClient/tests/test_korapclient.py
index 2b0b7d6..34a52fa 100644
--- a/KorAPClient/tests/test_korapclient.py
+++ b/KorAPClient/tests/test_korapclient.py
@@ -1,17 +1,29 @@
import unittest
-from KorAPClient import KorAPClient
+from KorAPClient import KorAPConnection
+
class TestKorAPClient(unittest.TestCase):
+ def setUp(self):
+ self.kcon = KorAPConnection(verbose=True)
+
def test_query(self):
- kcon = KorAPClient.KorAPConnection()
- q = KorAPClient.KorAPQuery(kcon, "Test")
+ q = self.kcon.corpusQuery("Test")
self.assertEqual(q.slots['class'], 'KorAPQuery')
def test_frequency_query(self):
- kcon = KorAPClient.KorAPConnection()
- df = KorAPClient.frequencyQuery(kcon, "Ameisenplage")
+ df = self.kcon.frequencyQuery("Ameisenplage")
self.assertGreater(df['totalResults'][0], 10)
self.assertGreater(10000, df['totalResults'][0])
+ def test_corpus_stats(self):
+ df = self.kcon.corpusStats(**{"as.df": True})
+ self.assertGreater(df['tokens'][0], 10**10)
+
+ def test_corpus_stats_with_vc(self):
+ de_tokens = self.kcon.corpusStats(vc='pubPlaceKey="DE"', **{"as.df": True})['tokens'][0]
+ ch_tokens = self.kcon.corpusStats(vc='pubPlaceKey="CH"', **{"as.df": True})['tokens'][0]
+ self.assertGreater(de_tokens, ch_tokens)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/Readme.md b/Readme.md
index 01da70a..f2d6aae 100644
--- a/Readme.md
+++ b/Readme.md
@@ -43,21 +43,27 @@
Currently, there is no dedicated documentation for the Python variant of the library. Please refer to the [Refernce Manual of RKorAPClient](https://cran.r-project.org/web/packages/RKorAPClient/RKorAPClient.pdf) for now.
For translating the R syntax to Python and vice versa, pleas refer to the [rpy2 Documentation](https://rpy2.github.io/doc/latest/html/index.html).
+Please note that some arguments in the original RKorAPClient functions use characters that are not allowed in Python keyword argument names.
+For these cases, you can however use Python's `**kwargs` syntax.
+For example, to get the result of `corpusStats` as a `pandas.DataFrame`, and print the size of the whole corpus in tokens, you can write:
+```
+print(kcon.corpusStats(**{"as.df": True})['tokens'])
+```
## Examples
#### Frequencies over years and countries
```python
-from KorAPClient import KorAPClient
+from KorAPClient import KorAPClient, KorAPConnection
import plotly.express as px
QUERY = "Hello World"
YEARS = range(2010, 2019)
COUNTRIES = ["DE", "CH"]
-kcon = KorAPClient.KorAPConnection(verbose=True)
+kcon = KorAPConnection(verbose=True)
vcs = ["textType=/Zeit.*/ & pubPlaceKey=" + c + " & pubDate in " + str(y) for c in COUNTRIES for y in YEARS]
-df = KorAPClient.ipm(KorAPClient.frequencyQuery(kcon, QUERY, vcs))
+df = KorAPClient.ipm(kcon.frequencyQuery(QUERY, vcs))
df['Year'] = [y for c in COUNTRIES for y in YEARS]
df['Country'] = [c for c in COUNTRIES for y in YEARS]
diff --git a/examples/hello_world.py b/examples/hello_world.py
index 126b3bf..6ac91cc 100755
--- a/examples/hello_world.py
+++ b/examples/hello_world.py
@@ -1,15 +1,17 @@
#!/usr/bin/env python3
-from KorAPClient import KorAPClient
+from KorAPClient import KorAPClient, KorAPConnection
import plotly.express as px
QUERY = "Hello World"
YEARS = range(2010, 2019)
COUNTRIES = ["DE", "CH"]
-kcon = KorAPClient.KorAPConnection(verbose=True)
+kcon = KorAPConnection(verbose=True)
vcs = ["textType=/Zeit.*/ & pubPlaceKey=" + c + " & pubDate in " + str(y) for c in COUNTRIES for y in YEARS]
-df = KorAPClient.ipm(KorAPClient.frequencyQuery(kcon, QUERY, vcs))
+df = KorAPClient.ipm(kcon.frequencyQuery(QUERY, vcs))
+print(df)
+
df['Year'] = [y for c in COUNTRIES for y in YEARS]
df['Country'] = [c for c in COUNTRIES for y in YEARS]
diff --git a/setup.py b/setup.py
index 2f7c9c1..53f5cb8 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
setup(
name="KorAPClient",
- version="0.0.2",
+ version="0.1.0",
author="Marc Kupietz",
author_email="kupietz@ids-mannheim.de",
description="Client package to access KorAP's web service API",