print grouped files
diff --git a/DeReKo/explore_dereko.py b/DeReKo/explore_dereko.py
index fa14a6c..ee56ee9 100644
--- a/DeReKo/explore_dereko.py
+++ b/DeReKo/explore_dereko.py
@@ -1,8 +1,20 @@
 import glob
+from collections import defaultdict
 
-DEREKO_DIR = "/export/netapp/kupietz/N-GRAMM-STUDIE/conllu"
+DEREKO_DIR = "/export/netapp/kupietz/N-GRAMM-STUDIE/conllu/"
 
+def get_filenames(data_dir):
+    filenames = []
+    for filepath in glob.iglob(f'{data_dir}/*.conllu.gz', recursive=False):
+        fname = filepath.split("/")[-1]
+        filenames.append(fname)
+    return sorted(filenames)
 
 if __name__ == "__main__":
-    for filepath in glob.iglob(f'{DEREKO_DIR}/*.conllu.gz', recursive=False):
-        print(filepath)
\ No newline at end of file
+    file_groups = defaultdict(list)
+    filenames = get_filenames(DEREKO_DIR)
+    for fn in filenames:
+        prefix = fn.split(".")[0]
+        file_groups[prefix].append(fn)
+    for group,files in file_groups.items():
+        print(group, len(files))
\ No newline at end of file