| #!/bin/bash |
| |
| source ~/.bashrc |
| |
| ROOT_DATAPATH=/export/netapp/kupietz/N-GRAMM-STUDIE/conllu |
| FILES_TO_PROCESS=DeReKo/all_dereko_filenames.txt |
| |
| while IFS= read -r line |
| do |
| my_files+=($line) |
| done < ${FILES_TO_PROCESS} |
| |
| let files_len=${#my_files[@]}+10 |
| |
| for (( i=0; i<=$files_len; i+=10 )) |
| do |
| let next=i |
| file_1=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_2=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_3=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_4=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_5=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_6=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_7=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_8=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_9=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| let next=next+1 |
| file_10=${my_files[next]} |
| python systems/parse_spacy.py --corpus_name DeReKo_${my_files[next]} --comment_str "#" \ |
| -i ${ROOT_DATAPATH}/${my_files[next]}.conllu.gz \ |
| -o ${ROOT_DATAPATH}/0_SpaCyParsed/${my_files[next]}.spacy.gl.conllu & |
| wait |
| # Remove Original Uncompressed Files |
| rm ${ROOT_DATAPATH}/${file_1}.conllu |
| rm ${ROOT_DATAPATH}/${file_2}.conllu |
| rm ${ROOT_DATAPATH}/${file_3}.conllu |
| rm ${ROOT_DATAPATH}/${file_4}.conllu |
| rm ${ROOT_DATAPATH}/${file_5}.conllu |
| rm ${ROOT_DATAPATH}/${file_6}.conllu |
| rm ${ROOT_DATAPATH}/${file_7}.conllu |
| rm ${ROOT_DATAPATH}/${file_8}.conllu |
| rm ${ROOT_DATAPATH}/${file_9}.conllu |
| rm ${ROOT_DATAPATH}/${file_10}.conllu |
| # ZIP The generated Parsed Outputs |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_1}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_2}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_3}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_4}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_5}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_6}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_7}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_8}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_9}.spacy.gl.conllu & |
| gzip ${ROOT_DATAPATH}/0_SpaCyParsed/${file_10}.spacy.gl.conllu & |
| wait |
| done |
| |
| |
| |