upgrading repo to latest version
diff --git a/DeReKo/explore_dereko.py b/DeReKo/explore_dereko.py
new file mode 100644
index 0000000..c5ddc49
--- /dev/null
+++ b/DeReKo/explore_dereko.py
@@ -0,0 +1,23 @@
+import glob,re
+from collections import defaultdict
+
+DEREKO_DIR = "/export/netapp/kupietz/N-GRAMM-STUDIE/conllu/"
+
+def get_filenames(data_dir):
+    filenames = []
+    for filepath in glob.iglob(f'{data_dir}/*.conllu.gz', recursive=False):
+        fname = filepath.split("/")[-1]
+        filenames.append(fname)
+    return sorted(filenames)
+
+if __name__ == "__main__":
+    file_groups = defaultdict(list)
+    filenames = get_filenames(DEREKO_DIR)
+    for fn in filenames:
+        prefix = fn.split(".")[0]
+        prefix = re.findall("\D+", prefix)[0]
+        file_groups[prefix].append(fn)
+        print(fn.split(".")[0])
+    
+    #for group,files in sorted(file_groups.items(), key=lambda x: len(x[1]), reverse=True):
+    #    print(group, len(files))
\ No newline at end of file