blob: 13ca55707209662b063d32b03cb271a474290bae [file] [log] [blame]
#!/bin/bash
source ~/.bashrc
ROOT_DATAPATH=/export/netapp/kupietz/N-GRAMM-STUDIE/conllu
FILES_TO_PROCESS=DeReKo/all_dereko_filenames.txt
while IFS= read -r line
do
my_files+=($line)
done < ${FILES_TO_PROCESS}
let files_len=${#my_files[@]}+10
for (( i=0; i<=$files_len; i+=10 ))
do
let next=i
file_1=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_2=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_3=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_4=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_5=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_6=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_7=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_8=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_9=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
let next=next+1
file_10=${my_files[next]}
python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \
-i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \
-o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu &
wait
# Remove Original Uncompressed Files
rm ${ROOT_DATAPATH}/${file_1}.conllu
rm ${ROOT_DATAPATH}/${file_2}.conllu
rm ${ROOT_DATAPATH}/${file_3}.conllu
rm ${ROOT_DATAPATH}/${file_4}.conllu
rm ${ROOT_DATAPATH}/${file_5}.conllu
rm ${ROOT_DATAPATH}/${file_6}.conllu
rm ${ROOT_DATAPATH}/${file_7}.conllu
rm ${ROOT_DATAPATH}/${file_8}.conllu
rm ${ROOT_DATAPATH}/${file_9}.conllu
rm ${ROOT_DATAPATH}/${file_10}.conllu
# ZIP The generated Parsed Outputs
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_1}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_2}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_3}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_4}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_5}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_6}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_7}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_8}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_9}.spacy.gl.conllu &
gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_10}.spacy.gl.conllu &
wait
done