diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..18831b2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.w2v
diff --git a/20cbt.corpus_files.lst b/20cbt.corpus_files.lst
new file mode 100644
index 0000000..ee1b1c7
--- /dev/null
+++ b/20cbt.corpus_files.lst
@@ -0,0 +1,166 @@
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/h85.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/h86.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/h87.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/h88.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/bzk.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/umb45.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/umb68.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/wkb.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/wkd.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s00.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s01.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s02.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s03.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s04.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s05.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s06.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s07.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s08.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s09.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s10.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s11.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s12.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s13.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s14.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s15.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s16.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s17.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s18.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s19.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s20.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s21.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s47.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s48.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s49.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s50.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s51.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s52.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s53.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s54.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s55.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s56.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s57.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s58.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s59.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s60.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s61.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s62.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s63.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s64.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s65.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s66.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s67.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s68.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s69.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s70.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s71.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s72.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s73.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s74.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s75.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s76.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s77.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s78.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s79.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s80.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s81.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s82.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s83.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s84.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s85.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s86.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s87.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s88.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s89.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s90.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s91.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s92.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s93.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s94.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s95.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s96.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s97.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s98.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/s99.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t86.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t87.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t88.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t89.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t90.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t91.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t92.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t93.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t94.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t95.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t96.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t97.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t98.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/t99.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z00.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z01.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z02.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z03.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z04.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z05.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z06.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z07.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z08.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z09.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z10.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z11.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z12.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z13.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z14.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z15.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z16.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z17.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z18.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z19.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z20.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z53.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z54.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z55.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z56.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z57.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z58.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z59.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z60.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z61.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z62.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z63.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z64.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z65.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z66.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z67.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z68.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z69.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z70.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z71.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z72.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z73.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z74.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z75.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z76.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z77.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z78.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z79.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z80.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z81.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z82.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z83.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z84.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z85.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z86.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z87.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z88.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z89.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z90.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z91.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z92.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z93.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z94.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z95.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z96.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z97.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z98.zip
+/vol/corpora/DeReKo/DeReKo-2022-I/KorAP/zip/z99.zip
diff --git a/extract-shuffled-sentences.sh b/extract-shuffled-sentences.sh
new file mode 100755
index 0000000..3a71484
--- /dev/null
+++ b/extract-shuffled-sentences.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+DESTNAME=20CBT.tsv.bz2
+while read line; do
+		dest=$(basename $line)
+		dest=${dest%%.zip}.w2v
+		if [ -e $dest ]; then
+				echo "Skipping $dest" > /dev/stderr
+		else
+				echo "reading: ${line}" > /dev/stderr
+				echo "korapxml2conllu -m '<textSigle>([^<.]+)' -m '<creatDate>([^<]{4,7})' --word2vec $line > $dest"
+		fi
+done < 20cbt.corpus_files.lst | parallel
+
+if find . -maxdepth 1 -name "*.w2v" -type f -newer $DESTNAME; then
+	pv *.w2v | grep -P '^[A-Z/0-9]+\t[0-9.]+\t[^\t]+' | shuf | pbzip2 -m1000 -c | tee $DESTNAME | \
+	pbzcat | perl -wlne 'if(/^([^\/]+)/) { $a{$1}++ }; END { print "corpus ID\tcorpus title\tsentence count\n"; foreach $sigle (sort keys %a) {$t=`corpussigle2title $sigle`; chomp $t; print "$t\t$a{$sigle}"}}' > ${DESTNAME%%.tsv.bz2}.contents.txt 
+fi
+
